I am using the java spark API to write a test application. I use a class that does not extend the serializable interface. To make the application work, I use the kryo serializer to serialize the class. But the problem that I observed during debugging was that during de-serialization, the returned class object becomes zero and, in turn, throws a null pointer exception . It seems to be a closing problem when everything goes wrong, but not sure. Since I'm new to such serialization, I don't know where to start digging.
Here is the code I'm testing:
package org.apache.spark.examples;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.net.InetAddress;
import java.net.UnknownHostException;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
public class Test {
static PrintWriter outputFileWriter;
static FileWriter file;
static JavaSparkContext ssc;
public static void main(String[] args) {
String inputFile = "/home/incubator-spark/examples/src/main/scala/org/apache/spark/examples/InputFile.txt";
String master = "local";
String jobName = "TestSerialization";
String sparkHome = "/home/test/Spark_Installation/spark-0.7.0";
String sparkJar = "/home/test/TestSerializationIssesInSpark/TestSparkSerIssueApp/target/TestSparkSerIssueApp-0.0.1-SNAPSHOT.jar";
SparkConf conf = new SparkConf();
conf.set("spark.closure.serializer","org.apache.spark.serializer.KryoSerializer");
conf.set("spark.kryo.registrator", "org.apache.spark.examples.MyRegistrator");
if(master.equals("local")){
ssc = new JavaSparkContext("local", jobName,conf);
} else {
ssc = new JavaSparkContext(master, jobName, sparkHome, sparkJar);
}
JavaRDD<String> testData = ssc.textFile(inputFile).cache();
final NotSerializableJavaClass notSerializableTestObject= new NotSerializableJavaClass("Hi ");
@SuppressWarnings({ "serial", "unchecked"})
JavaRDD<String> classificationResults = testData.map(
new Function<String, String>() {
@Override
public String call(String inputRecord) throws Exception {
if(!inputRecord.isEmpty()) {
String result = "";
try {
FileWriter file = new FileWriter("/home/test/TestSerializationIssesInSpark/results/test_result_" + (int) (Math.random() * 100));
PrintWriter outputFile = new PrintWriter(file);
InetAddress ip;
ip = InetAddress.getLocalHost();
outputFile.println("IP of the server: " + ip);
result = notSerializableTestObject.testMethod(inputRecord);
outputFile.println("Result: " + result);
outputFile.flush();
outputFile.close();
file.close();
} catch (UnknownHostException e) {
e.printStackTrace();
}
catch (IOException e1) {
e1.printStackTrace();
}
return result;
} else {
System.out.println("End of elements in the stream.");
String result = "End of elements in the input data";
return result;
}
}
}).cache();
long processedRecords = classificationResults.count();
ssc.stop();
System.out.println("sssssssssss"+processedRecords);
}
}
Here is the KryoRegistrator class
package org.apache.spark.examples;
import org.apache.spark.serializer.KryoRegistrator;
import com.esotericsoftware.kryo.Kryo;
public class MyRegistrator implements KryoRegistrator {
public void registerClasses(Kryo kryo) {
kryo.register(NotSerializableJavaClass.class);
}
}
Here is the class I'm serializing:
package org.apache.spark.examples;
public class NotSerializableJavaClass {
public String testVariable;
public NotSerializableJavaClass(String testVariable) {
super();
this.testVariable = testVariable;
}
public String testMethod(String vartoAppend){
return this.testVariable + vartoAppend;
}
}