From 2837bf8548db7e9d43f6eefedf5a73feb22daedb Mon Sep 17 00:00:00 2001 From: Michael Armbrust Date: Thu, 9 Oct 2014 17:54:02 -0700 Subject: [SPARK-3798][SQL] Store the output of a generator in a val This prevents it from changing during serialization, leading to corrupted results. Author: Michael Armbrust Closes #2656 from marmbrus/generateBug and squashes the following commits: efa32eb [Michael Armbrust] Store the output of a generator in a val. This prevents it from changing during serialization. --- .../src/main/scala/org/apache/spark/sql/execution/Generate.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'sql/core/src') diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala index c386fd121c..38877c28de 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala @@ -39,7 +39,8 @@ case class Generate( child: SparkPlan) extends UnaryNode { - protected def generatorOutput: Seq[Attribute] = { + // This must be a val since the generator output expr ids are not preserved by serialization. + protected val generatorOutput: Seq[Attribute] = { if (join && outer) { generator.output.map(_.withNullability(true)) } else { @@ -62,7 +63,7 @@ case class Generate( newProjection(child.output ++ nullValues, child.output) val joinProjection = - newProjection(child.output ++ generator.output, child.output ++ generator.output) + newProjection(child.output ++ generatorOutput, child.output ++ generatorOutput) val joinedRow = new JoinedRow iter.flatMap {row => -- cgit v1.2.3