. ArrayStoreException, Long String.
ArrayStoreException ,
ArrayStoreException?
vertices.json.gz:
{"toid": "osgb4000000031043205", "index": 1, "point": [508180.748, 195333.973]}
{"toid": "osgb4000000031043206", "index": 2, "point": [508163.122, 195316.627]}
{"toid": "osgb4000000031043207", "index": 3, "point": [508172.075, 195325.719]}
{"toid": "osgb4000000031043208", "index": 4, "point": [508513, 196023]}
"index" LongType vertices_raw DataFrame, :
scala> vertices_raw.schema
res4: org.apache.spark.sql.types.StructType = StructType(StructField(index,LongType,true), StructField(point,ArrayType(DoubleType,true),true), StructField(toid,StringType,true))
, Long String, :
val graph: Graph[(String),Double] = Graph(verticesRDD, edgesRDD)
1:
Long index, :
val vertices = vertices_raw.rdd.map(row=> ((row.getAs[String]("toid").stripPrefix("osgb").toLong),row.getAs[String]("index")))
val verticesRDD: RDD[(VertexId, String)] = vertices
val graph: Graph[(String),Double] = Graph(verticesRDD, edgesRDD)
:
val vertices = vertices_raw.rdd.map(row=> ((row.getAs[String]("toid").stripPrefix("osgb").toLong),row.getAs[Long]("index")))
val verticesRDD: RDD[(VertexId, Long)] = vertices
val graph: Graph[(Long),Double] = Graph(verticesRDD, edgesRDD)
2:
DataFrame vertices_raw2 vertices_raw, index LongType StringType, :
import org.apache.spark.sql.functions._
val to_string = udf[String, Long]( _.toString)
val vertices_raw2 = vertices_raw.withColumn("index", to_string(vertices_raw("index"))).select("index", "toid")
vertices_raw2 DataFrame vertices RDD:
val vertices = vertices_raw2.rdd.map(row=> ((row.getAs[String]("toid").stripPrefix("osgb").toLong),row.getAs[String]("index")))
:
scala> graph.edges.foreach(println)
Edge(5000005125740769,4000000029965899,51.55460482650549)
Edge(5000005125740770,5000005125740759,26.108461618676447)
Edge(5000005125740771,5000005125740763,30.841246458481766) ...
scala> graph.vertices.foreach(println)
(4000000029867298,58335)
(4000000029892180,10846)
(4000000027730512,338018)
(4000000023185673,43945) ...