aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala18
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala11
2 files changed, 25 insertions, 4 deletions
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
index 55efea80d1..7c173cbcee 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
@@ -47,9 +47,9 @@ object RandomDataGenerator {
*/
private val PROBABILITY_OF_NULL: Float = 0.1f
- private val MAX_STR_LEN: Int = 1024
- private val MAX_ARR_SIZE: Int = 128
- private val MAX_MAP_SIZE: Int = 128
+ final val MAX_STR_LEN: Int = 1024
+ final val MAX_ARR_SIZE: Int = 128
+ final val MAX_MAP_SIZE: Int = 128
/**
* Helper function for constructing a biased random number generator which returns "interesting"
@@ -208,7 +208,17 @@ object RandomDataGenerator {
forType(valueType, nullable = valueContainsNull, rand)
) yield {
() => {
- Seq.fill(rand.nextInt(MAX_MAP_SIZE))((keyGenerator(), valueGenerator())).toMap
+ val length = rand.nextInt(MAX_MAP_SIZE)
+ val keys = scala.collection.mutable.HashSet(Seq.fill(length)(keyGenerator()): _*)
+ // In case the number of different keys is not enough, set a max iteration to avoid
+ // infinite loop.
+ var count = 0
+ while (keys.size < length && count < MAX_MAP_SIZE) {
+ keys += keyGenerator()
+ count += 1
+ }
+ val values = Seq.fill(keys.size)(valueGenerator())
+ keys.zip(values).toMap
}
}
}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala
index b8ccdf7516..9fba7924e9 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala
@@ -95,4 +95,15 @@ class RandomDataGeneratorSuite extends SparkFunSuite {
}
}
+ test("check size of generated map") {
+ val mapType = MapType(IntegerType, IntegerType)
+ for (seed <- 1 to 1000) {
+ val generator = RandomDataGenerator.forType(
+ mapType, nullable = false, rand = new Random(seed)).get
+ val maps = Seq.fill(100)(generator().asInstanceOf[Map[Int, Int]])
+ val expectedTotalElements = 100 / 2 * RandomDataGenerator.MAX_MAP_SIZE
+ val deviation = math.abs(maps.map(_.size).sum - expectedTotalElements)
+ assert(deviation.toDouble / expectedTotalElements < 2e-1)
+ }
+ }
}