aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortedyu <yuzhihong@gmail.com>2015-06-15 17:00:38 -0700
committerReynold Xin <rxin@databricks.com>2015-06-15 17:00:38 -0700
commit1a62d61696a0481508d83a07d19ab3701245ac20 (patch)
treee31e1af717b7f14e5b5d002ee5485d96ce2a5c87
parent6ae21a944a0f4580b55749776223c827450b00da (diff)
downloadspark-1a62d61696a0481508d83a07d19ab3701245ac20.tar.gz
spark-1a62d61696a0481508d83a07d19ab3701245ac20.tar.bz2
spark-1a62d61696a0481508d83a07d19ab3701245ac20.zip
SPARK-8336 Fix NullPointerException with functions.rand()
This PR fixes the problem reported by Justin Yip in the thread 'NullPointerException with functions.rand()' Tested using spark-shell and verified that the following works: sqlContext.createDataFrame(Seq((1,2), (3, 100))).withColumn("index", rand(30)).show() Author: tedyu <yuzhihong@gmail.com> Closes #6793 from tedyu/master and squashes the following commits: 62fd97b [tedyu] Create RandomSuite 750f92c [tedyu] Add test for Rand() with seed a1d66c5 [tedyu] Fix NullPointerException with functions.rand()
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/random.scala6
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomSuite.scala33
2 files changed, 38 insertions, 1 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/random.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/random.scala
index cc34467391..45588bacd2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/random.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/random.scala
@@ -37,7 +37,11 @@ abstract class RDG(seed: Long) extends LeafExpression with Serializable {
* Record ID within each partition. By being transient, the Random Number Generator is
* reset every time we serialize and deserialize it.
*/
- @transient protected lazy val rng = new XORShiftRandom(seed + TaskContext.get().partitionId())
+ @transient protected lazy val partitionId = TaskContext.get() match {
+ case null => 0
+ case _ => TaskContext.get().partitionId()
+ }
+ @transient protected lazy val rng = new XORShiftRandom(seed + partitionId)
override def deterministic: Boolean = false
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomSuite.scala
new file mode 100644
index 0000000000..9be2b23a53
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomSuite.scala
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import org.scalatest.Matchers._
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.types.{DoubleType, IntegerType}
+
+
+class RandomSuite extends SparkFunSuite with ExpressionEvalHelper {
+
+ test("random") {
+ val row = create_row(1.1, 2.0, 3.1, null)
+ checkDoubleEvaluation(Rand(30), (0.7363714192755834 +- 0.001), row)
+ }
+}