-, , , - Spark. , , , , .
, Spark . , , , , , , - , .
:
trait UsingSparkContextTrait {
def sc: SparkContext
def loadHdfsFileAndBroadcast = {
val df = sc.textFile("hdfs://.../myfile.csv").map(...)
sc.broadcast(df)
}
}
trait UsingHiveContextTrait {
def hiveContext: HiveContext
def df1B: Broadcast[Map[String, String]]
def loadHiveTable = {
val data = hiveContext.sql("select * from myHiveTable")
val cleanCol = udf(cleanMyCol(df1B)).apply(col("myCol"))
data.withColumn("myCol", cleanCol)
}
}
:
import org.apache.spark._
import org.apache.spark.sql._
import org.apache.spark.sql.hive.HiveContext
class ClassDoingWork extends UsingSparkContextTrait with UsingHiveContextTrait {
val conf = new SparkConf().setAppName("example")
val sc = new SparkContext(conf)
val sqlContext = new SQLContext(sc)
val hiveContext = new HiveContext(sc)
val dfb = loadHdfsFileAndBroadcast
import hiveContext.implicits._
import hiveContext.sql
def doAnalytics = {
val dfCleaned = loadHiveTable
...
}
}
- , , - , .
, SparkContext RDD rdd.context. .