aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWenchen Fan <wenchen@databricks.com>2015-11-23 10:13:59 -0800
committerMichael Armbrust <michael@databricks.com>2015-11-23 10:13:59 -0800
commit1a5baaa6517872b9a4fd6cd41c4b2cf1e390f6d1 (patch)
tree5d399059cdd9c4b14c02ce551618e9a13f179cf0
parent94ce65dfcbba1fe3a1fc9d8002c37d9cd1a11336 (diff)
downloadspark-1a5baaa6517872b9a4fd6cd41c4b2cf1e390f6d1.tar.gz
spark-1a5baaa6517872b9a4fd6cd41c4b2cf1e390f6d1.tar.bz2
spark-1a5baaa6517872b9a4fd6cd41c4b2cf1e390f6d1.zip
[SPARK-11894][SQL] fix isNull for GetInternalRowField
We should use `InternalRow.isNullAt` to check if the field is null before calling `InternalRow.getXXX` Thanks gatorsmile who discovered this bug. Author: Wenchen Fan <wenchen@databricks.com> Closes #9904 from cloud-fan/null.
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala23
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala15
2 files changed, 23 insertions, 15 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala
index 82317d3385..4a1f419f0a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala
@@ -236,11 +236,6 @@ case class NewInstance(
}
if (propagateNull) {
- val objNullCheck = if (ctx.defaultValue(dataType) == "null") {
- s"${ev.isNull} = ${ev.value} == null;"
- } else {
- ""
- }
val argsNonNull = s"!(${argGen.map(_.isNull).mkString(" || ")})"
s"""
@@ -531,15 +526,15 @@ case class GetInternalRowField(child: Expression, ordinal: Int, dataType: DataTy
throw new UnsupportedOperationException("Only code-generated evaluation is supported")
override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
- val row = child.gen(ctx)
- s"""
- ${row.code}
- final boolean ${ev.isNull} = ${row.isNull};
- ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};
- if (!${ev.isNull}) {
- ${ev.value} = ${ctx.getValue(row.value, dataType, ordinal.toString)};
- }
- """
+ nullSafeCodeGen(ctx, ev, eval => {
+ s"""
+ if ($eval.isNullAt($ordinal)) {
+ ${ev.isNull} = true;
+ } else {
+ ${ev.value} = ${ctx.getValue(eval, dataType, ordinal.toString)};
+ }
+ """
+ })
}
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 9da02550b3..cc8e4325fd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -386,7 +386,7 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
Seq((JavaData(1), 1L), (JavaData(2), 1L)))
}
- ignore("Java encoder self join") {
+ test("Java encoder self join") {
implicit val kryoEncoder = Encoders.javaSerialization[JavaData]
val ds = Seq(JavaData(1), JavaData(2)).toDS()
assert(ds.joinWith(ds, lit(true)).collect().toSet ==
@@ -396,6 +396,19 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
(JavaData(2), JavaData(1)),
(JavaData(2), JavaData(2))))
}
+
+ test("SPARK-11894: Incorrect results are returned when using null") {
+ val nullInt = null.asInstanceOf[java.lang.Integer]
+ val ds1 = Seq((nullInt, "1"), (new java.lang.Integer(22), "2")).toDS()
+ val ds2 = Seq((nullInt, "1"), (new java.lang.Integer(22), "2")).toDS()
+
+ checkAnswer(
+ ds1.joinWith(ds2, lit(true)),
+ ((nullInt, "1"), (nullInt, "1")),
+ ((new java.lang.Integer(22), "2"), (nullInt, "1")),
+ ((nullInt, "1"), (new java.lang.Integer(22), "2")),
+ ((new java.lang.Integer(22), "2"), (new java.lang.Integer(22), "2")))
+ }
}