, ...
!== 2.0, =!=, , . . :
https://github.com/apache/spark/blob/branch-2.2/sql/core/src/main/scala/org/apache/spark/sql/Column.scala#L319-L320
:
, .
DataFrame (DF) DataSet (DS), :
1) , ( ) - DF, DS case class NullStrings (n: Int, s: String)
val df = spark.sparkContext.parallelize(Seq(
(1, "abc"),
(2, "ABC"),
(3, null),
(4, ""))
).toDF("n", "s")
df.filter("s is not null and s != ''").show()
+---+---+
| n| s|
+---+---+
| 1|abc|
| 2|ABC|
+---+---+
2) dataframe Column ($ spark.implicits._ import) :
df.filter($"s" =!= "" || $"s" =!= null).show()
=!= (. <=> - ),
df.filter($"s" =!= "").show()
+---+---+
| n| s|
+---+---+
| 1|abc|
| 2|ABC|
+---+---+
3)
val ds = df.as[NullStrings]
ds.filter(r => r.s != null && r.s.nonEmpty).show()
+---+---+
| n| s|
+---+---+
| 1|abc|
| 2|ABC|
+---+---+
, Option case, , .
case class NullStringsOption(n: Int, s: Option[String])
val ds1 = df.as[NullStringsOption]
ds1.filter(_.s.exists(_.nonEmpty)).show()