aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorImran Rashid <irashid@cloudera.com>2015-11-06 20:06:24 +0000
committerSean Owen <sowen@cloudera.com>2015-11-06 20:06:24 +0000
commit49f1a820372d1cba41f3f00d07eb5728f2ed6705 (patch)
tree535797cc3662bfd7d8247b2d01f6fd00b2e1b2a9 /sql
parent62bb290773c9f9fa53cbe6d4eedc6e153761a763 (diff)
downloadspark-49f1a820372d1cba41f3f00d07eb5728f2ed6705.tar.gz
spark-49f1a820372d1cba41f3f00d07eb5728f2ed6705.tar.bz2
spark-49f1a820372d1cba41f3f00d07eb5728f2ed6705.zip
[SPARK-10116][CORE] XORShiftRandom.hashSeed is random in high bits
https://issues.apache.org/jira/browse/SPARK-10116 This is really trivial, just happened to notice it -- if `XORShiftRandom.hashSeed` is really supposed to have random bits throughout (as the comment implies), it needs to do something for the conversion to `long`. mengxr mkolod Author: Imran Rashid <irashid@cloudera.com> Closes #8314 from squito/SPARK-10116.
Diffstat (limited to 'sql')
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomSuite.scala8
-rw-r--r--sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java6
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala4
3 files changed, 10 insertions, 8 deletions
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomSuite.scala
index 4a644d136f..b7a0d44fa7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomSuite.scala
@@ -24,12 +24,12 @@ import org.apache.spark.SparkFunSuite
class RandomSuite extends SparkFunSuite with ExpressionEvalHelper {
test("random") {
- checkDoubleEvaluation(Rand(30), 0.7363714192755834 +- 0.001)
- checkDoubleEvaluation(Randn(30), 0.5181478766595276 +- 0.001)
+ checkDoubleEvaluation(Rand(30), 0.31429268272540556 +- 0.001)
+ checkDoubleEvaluation(Randn(30), -0.4798519469521663 +- 0.001)
}
test("SPARK-9127 codegen with long seed") {
- checkDoubleEvaluation(Rand(5419823303878592871L), 0.4061913198963727 +- 0.001)
- checkDoubleEvaluation(Randn(5419823303878592871L), -0.24417152005343168 +- 0.001)
+ checkDoubleEvaluation(Rand(5419823303878592871L), 0.2304755080444375 +- 0.001)
+ checkDoubleEvaluation(Randn(5419823303878592871L), -1.2824262718225607 +- 0.001)
}
}
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java
index 49f516e86d..40bff57a17 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java
@@ -257,7 +257,9 @@ public class JavaDataFrameSuite {
DataFrame df = context.range(0, 100, 1, 2).select(col("id").mod(3).as("key"));
DataFrame sampled = df.stat().<Integer>sampleBy("key", ImmutableMap.of(0, 0.1, 1, 0.2), 0L);
Row[] actual = sampled.groupBy("key").count().orderBy("key").collect();
- Row[] expected = {RowFactory.create(0, 5), RowFactory.create(1, 8)};
- Assert.assertArrayEquals(expected, actual);
+ Assert.assertEquals(0, actual[0].getLong(0));
+ Assert.assertTrue(0 <= actual[0].getLong(1) && actual[0].getLong(1) <= 8);
+ Assert.assertEquals(1, actual[1].getLong(0));
+ Assert.assertTrue(2 <= actual[1].getLong(1) && actual[1].getLong(1) <= 13);
}
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
index 6524abcf5e..b15af42caa 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
@@ -41,7 +41,7 @@ class DataFrameStatSuite extends QueryTest with SharedSQLContext {
val data = sparkContext.parallelize(1 to n, 2).toDF("id")
checkAnswer(
data.sample(withReplacement = false, 0.05, seed = 13),
- Seq(16, 23, 88, 100).map(Row(_))
+ Seq(3, 17, 27, 58, 62).map(Row(_))
)
}
@@ -186,6 +186,6 @@ class DataFrameStatSuite extends QueryTest with SharedSQLContext {
val sampled = df.stat.sampleBy("key", Map(0 -> 0.1, 1 -> 0.2), 0L)
checkAnswer(
sampled.groupBy("key").count().orderBy("key"),
- Seq(Row(0, 5), Row(1, 8)))
+ Seq(Row(0, 6), Row(1, 11)))
}
}