unable to solve java.io.InvalidClassException: no valid constructor scala


I am stuck with java.io.InvalidClassException: no valid constructor using Scala. I read about this exception I found that when trying to deserialize an object of serialized class the no_arg constructor is fired and if this constructor is not existed this exception throws. And I already have this no-arg constructor in my code but still, have this exception.

Here is my class:

class KB(var UrlOwlFile: String, rdd: OWLAxiomsRDD, sparkSession: SparkSession) extends Serializable {


    var Url = UrlOwlFile
    var r = rdd
    var s = sparkSession

    var ontology: OWLOntology = initKB()
    var reasoner: OWLReasoner = _
    var hermit: Reasoner = _
    var manager: OWLOntologyManager = _
    var Concepts: RDD[OWLClass] = _
    var Roles: RDD[OWLObjectProperty] = _
    var dataFactory: OWLDataFactory = _
    var Examples: RDD[OWLIndividual] = _

    //  Date property: property, values and domains
    var dataPropertiesValue: RDD[RDD[OWLLiteral]] = _
    var Properties: RDD[OWLDataProperty] = _
    var domain: Array[Array[OWLIndividual]] = _
    var classifications: RDD[((OWLClassExpression, OWLIndividual), Int)]= _
    var newEle: ((OWLClassExpression, OWLIndividual), Int) = _
    var choiceDataP: Random = new Random(1)
    var choiceObjectP: Random = new Random(1)
    val d: Double = 0.3
    var generator: Random = new Random(2)

    def this() = {
      this("" , null, null)
      hermit = new Reasoner(null,  null)
    }

def getClassMembershipResult(testConcepts: Array[OWLClassExpression], negTestConcepts: Array[OWLClassExpression],
                                 examples: RDD[OWLIndividual]): RDD[((OWLClassExpression, OWLIndividual), Int)] = {

      println("\nClassifying all examples \n ------------ ")

      var flag: Boolean = false
      println("Processed concepts (" + testConcepts.size + "): \n")

      for (c <- 0 until testConcepts.size) {
        var p: Int = 0
        var n: Int = 0
        println("\nTest Concept number " + (c+1) + ": " + testConcepts(c))

        var c1 = examples.map{ x => (testConcepts(c),x)}
        var newEle1 = c1.map{x => (x, 0)}
        var c2 = newEle1.mapPartitions{data => data.map{ele => 
          if (getReasoner.isEntailed(getDataFactory.getOWLClassAssertionAxiom(testConcepts(c), ele._1._2))){
            newEle = (ele._1, +1)
            p = p + 1
          }
          else {
                if (!flag){
                  if (getReasoner.isEntailed(getDataFactory.getOWLClassAssertionAxiom(negTestConcepts(c), ele._1._2)))
                     newEle = (ele._1, -1)  
                }
                else
                     newEle = (ele._1, -1)  
                n = n + 1
              }
          println("\n Pos: " + p + "\t Neg: " + n)  

          newEle  
         }
        }

        classifications = c2
     }
      classifications.take(10).foreach(println(_))
      classifications
    }
    }
}

and here is the output:

18/03/23 11:10:09 ERROR Executor: Exception in task 0.0 in stage 36.0 (TID 44)
java.io.InvalidClassException: net.sansa_stack.ml.spark.classification.KB$KB$$anon$2; no valid constructor
    at java.io.ObjectStreamClass$ExceptionInfo.newInvalidClassException(ObjectStreamClass.java:157)
    at java.io.ObjectStreamClass.checkDeserialize(ObjectStreamClass.java:862)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2034)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1567)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2278)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2202)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2060)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1567)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2278)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2202)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2060)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1567)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2278)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2202)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2060)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1567)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2278)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2202)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2060)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1567)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2278)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2202)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2060)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1567)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2278)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2202)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2060)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1567)
    at java.io.ObjectInputStream.readObject(ObjectInputStream.java:427)
    at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:75)
    at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:114)
    at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:80)
    at org.apache.spark.scheduler.Task.run(Task.scala:99)
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:322)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
    at java.lang.Thread.run(Thread.java:748)
18/03/23 11:10:09 ERROR TaskSetManager: Task 0 in stage 36.0 failed 1 times; aborting job
Exception in thread "main" org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 36.0 failed 1 times, most recent failure: Lost task 0.0 in stage 36.0 (TID 44, localhost, executor driver): java.io.InvalidClassException: net.sansa_stack.ml.spark.classification.KB$KB$$anon$2; no valid constructor
    at java.io.ObjectStreamClass$ExceptionInfo.newInvalidClassException(ObjectStreamClass.java:157)
    at java.io.ObjectStreamClass.checkDeserialize(ObjectStreamClass.java:862)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2034)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1567)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2278)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2202)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2060)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1567)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2278)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2202)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2060)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1567)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2278)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2202)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2060)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1567)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2278)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2202)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2060)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1567)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2278)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2202)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2060)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1567)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2278)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2202)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2060)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1567)
    at java.io.ObjectInputStream.readObject(ObjectInputStream.java:427)
    at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:75)
    at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:114)
    at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:80)
    at org.apache.spark.scheduler.Task.run(Task.scala:99)
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:322)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
    at java.lang.Thread.run(Thread.java:748)

Driver stacktrace:
    at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1435)
    at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1423)
    at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1422)
    at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
    at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
    at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1422)
    at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802)
    at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802)
    at scala.Option.foreach(Option.scala:257)
    at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:802)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1650)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1605)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1594)
    at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
    at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:628)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:1925)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:1938)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:1951)
    at org.apache.spark.rdd.RDD$$anonfun$take$1.apply(RDD.scala:1354)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
    at org.apache.spark.rdd.RDD.withScope(RDD.scala:362)
    at org.apache.spark.rdd.RDD.take(RDD.scala:1327)
    at net.sansa_stack.ml.spark.classification.KB$KB.getClassMembershipResult(KB.scala:226)
    at net.sansa_stack.ml.spark.classification.ClassMembership$ClassMembership.<init>(ClassMembership.scala:48)
    at net.sansa_stack.ml.spark.classification.TermDecisionTrees$.main(TermDecisionTrees.scala:47)
    at net.sansa_stack.ml.spark.classification.TermDecisionTrees.main(TermDecisionTrees.scala)
Caused by: java.io.InvalidClassException: net.sansa_stack.ml.spark.classification.KB$KB$$anon$2; no valid constructor
    at java.io.ObjectStreamClass$ExceptionInfo.newInvalidClassException(ObjectStreamClass.java:157)
    at java.io.ObjectStreamClass.checkDeserialize(ObjectStreamClass.java:862)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2034)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1567)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2278)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2202)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2060)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1567)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2278)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2202)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2060)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1567)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2278)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2202)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2060)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1567)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2278)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2202)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2060)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1567)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2278)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2202)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2060)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1567)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2278)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2202)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2060)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1567)
    at java.io.ObjectInputStream.readObject(ObjectInputStream.java:427)
    at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:75)
    at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:114)
    at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:80)
    at org.apache.spark.scheduler.Task.run(Task.scala:99)
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:322)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
    at java.lang.Thread.run(Thread.java:748)


The error message says KB$KB$$anon$2 doesn't have a valid constructor, not KB. It looks like some anonymous class in code you don't show (or maybe one of the closures?).

This code is also problematic because

  1. you have var everywhere, which generally won't work as expected in Spark;

  2. your closures can easily end up with a reference to KB, which has a reference to SparkSession, RDDs and other things which can't be serialized.