aboutsummaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorReynold Xin <rxin@databricks.com>2016-05-04 11:00:01 -0700
committerReynold Xin <rxin@databricks.com>2016-05-04 11:00:01 -0700
commit6274a520fa743b7d079fde4a3033da5c3a2532a1 (patch)
tree15d44bfff9ab4e96d734bf89c19e380067b3e803 /core
parent4530250f5a51a77f9d0b91f036f8e44e0f943a32 (diff)
downloadspark-6274a520fa743b7d079fde4a3033da5c3a2532a1.tar.gz
spark-6274a520fa743b7d079fde4a3033da5c3a2532a1.tar.bz2
spark-6274a520fa743b7d079fde4a3033da5c3a2532a1.zip
[SPARK-15115][SQL] Reorganize whole stage codegen benchmark suites
## What changes were proposed in this pull request? We currently have a single suite that is very large, making it difficult to maintain and play with specific primitives. This patch reorganizes the file by creating multiple benchmark suites in a single package. Most of the changes are straightforward move of code. On top of the code moving, I did: 1. Use SparkSession instead of SQLContext. 2. Turned most benchmark scenarios into a their own test cases, rather than having multiple scenarios in a single test case, which takes forever to run. ## How was this patch tested? This is a test only change. Author: Reynold Xin <rxin@databricks.com> Closes #12891 from rxin/SPARK-15115.
Diffstat (limited to 'core')
-rw-r--r--core/src/main/java/org/apache/spark/util/collection/unsafe/sort/RecordPointerAndKeyPrefix.java2
-rw-r--r--core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSortDataFormat.java3
-rw-r--r--core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala78
3 files changed, 3 insertions, 80 deletions
diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/RecordPointerAndKeyPrefix.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/RecordPointerAndKeyPrefix.java
index de92b8db47..e9571aa8bb 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/RecordPointerAndKeyPrefix.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/RecordPointerAndKeyPrefix.java
@@ -17,7 +17,7 @@
package org.apache.spark.util.collection.unsafe.sort;
-final class RecordPointerAndKeyPrefix {
+public final class RecordPointerAndKeyPrefix {
/**
* A pointer to a record; see {@link org.apache.spark.memory.TaskMemoryManager} for a
* description of how these addresses are encoded.
diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSortDataFormat.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSortDataFormat.java
index 12fb62fb77..d19b71fbc1 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSortDataFormat.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSortDataFormat.java
@@ -29,7 +29,8 @@ import org.apache.spark.util.collection.SortDataFormat;
* Within each long[] buffer, position {@code 2 * i} holds a pointer pointer to the record at
* index {@code i}, while position {@code 2 * i + 1} in the array holds an 8-byte key prefix.
*/
-final class UnsafeSortDataFormat extends SortDataFormat<RecordPointerAndKeyPrefix, LongArray> {
+public final class UnsafeSortDataFormat
+ extends SortDataFormat<RecordPointerAndKeyPrefix, LongArray> {
public static final UnsafeSortDataFormat INSTANCE = new UnsafeSortDataFormat();
diff --git a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala
index b03df1a94d..def0752b46 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala
@@ -26,7 +26,6 @@ import org.apache.spark.SparkFunSuite
import org.apache.spark.internal.Logging
import org.apache.spark.unsafe.array.LongArray
import org.apache.spark.unsafe.memory.MemoryBlock
-import org.apache.spark.util.Benchmark
import org.apache.spark.util.collection.Sorter
import org.apache.spark.util.random.XORShiftRandom
@@ -184,81 +183,4 @@ class RadixSortSuite extends SparkFunSuite with Logging {
assert(res1.view == res2.view)
}
}
-
- ignore("microbenchmarks") {
- val size = 25000000
- val rand = new XORShiftRandom(123)
- val benchmark = new Benchmark("radix sort " + size, size)
- benchmark.addTimerCase("reference TimSort key prefix array") { timer =>
- val array = Array.tabulate[Long](size * 2) { i => rand.nextLong }
- val buf = new LongArray(MemoryBlock.fromLongArray(array))
- timer.startTiming()
- referenceKeyPrefixSort(buf, 0, size, PrefixComparators.BINARY)
- timer.stopTiming()
- }
- benchmark.addTimerCase("reference Arrays.sort") { timer =>
- val ref = Array.tabulate[Long](size) { i => rand.nextLong }
- timer.startTiming()
- Arrays.sort(ref)
- timer.stopTiming()
- }
- benchmark.addTimerCase("radix sort one byte") { timer =>
- val array = new Array[Long](size * 2)
- var i = 0
- while (i < size) {
- array(i) = rand.nextLong & 0xff
- i += 1
- }
- val buf = new LongArray(MemoryBlock.fromLongArray(array))
- timer.startTiming()
- RadixSort.sort(buf, size, 0, 7, false, false)
- timer.stopTiming()
- }
- benchmark.addTimerCase("radix sort two bytes") { timer =>
- val array = new Array[Long](size * 2)
- var i = 0
- while (i < size) {
- array(i) = rand.nextLong & 0xffff
- i += 1
- }
- val buf = new LongArray(MemoryBlock.fromLongArray(array))
- timer.startTiming()
- RadixSort.sort(buf, size, 0, 7, false, false)
- timer.stopTiming()
- }
- benchmark.addTimerCase("radix sort eight bytes") { timer =>
- val array = new Array[Long](size * 2)
- var i = 0
- while (i < size) {
- array(i) = rand.nextLong
- i += 1
- }
- val buf = new LongArray(MemoryBlock.fromLongArray(array))
- timer.startTiming()
- RadixSort.sort(buf, size, 0, 7, false, false)
- timer.stopTiming()
- }
- benchmark.addTimerCase("radix sort key prefix array") { timer =>
- val (_, buf2) = generateKeyPrefixTestData(size, rand.nextLong)
- timer.startTiming()
- RadixSort.sortKeyPrefixArray(buf2, size, 0, 7, false, false)
- timer.stopTiming()
- }
- benchmark.run()
-
- /**
- Running benchmark: radix sort 25000000
- Java HotSpot(TM) 64-Bit Server VM 1.8.0_66-b17 on Linux 3.13.0-44-generic
- Intel(R) Core(TM) i7-4600U CPU @ 2.10GHz
-
- radix sort 25000000: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
- -------------------------------------------------------------------------------------------
- reference TimSort key prefix array 15546 / 15859 1.6 621.9 1.0X
- reference Arrays.sort 2416 / 2446 10.3 96.6 6.4X
- radix sort one byte 133 / 137 188.4 5.3 117.2X
- radix sort two bytes 255 / 258 98.2 10.2 61.1X
- radix sort eight bytes 991 / 997 25.2 39.6 15.7X
- radix sort key prefix array 1540 / 1563 16.2 61.6 10.1X
- */
- }
}