aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Liang <ekl@databricks.com>2016-06-11 15:26:08 -0700
committerReynold Xin <rxin@databricks.com>2016-06-11 15:26:08 -0700
commit5bb4564cd47c8bf06409287e0de4ec45609970b2 (patch)
tree6568b8d8d1995b57675c283eafc5cca4fd3c8536
parentcb5d933d86ac4afd947874f1f1c31c7154cb8249 (diff)
downloadspark-5bb4564cd47c8bf06409287e0de4ec45609970b2.tar.gz
spark-5bb4564cd47c8bf06409287e0de4ec45609970b2.tar.bz2
spark-5bb4564cd47c8bf06409287e0de4ec45609970b2.zip
[SPARK-15881] Update microbenchmark results for WideSchemaBenchmark
## What changes were proposed in this pull request? These were not updated after performance improvements. To make updating them easier, I also moved the results from inline comments out into a file, which is auto-generated when the benchmark is re-run. Author: Eric Liang <ekl@databricks.com> Closes #13607 from ericl/sc-3538.
-rw-r--r--project/SparkBuild.scala2
-rw-r--r--sql/core/benchmarks/WideSchemaBenchmark-results.txt93
-rw-r--r--sql/core/src/test/resources/log4j.properties2
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideSchemaBenchmark.scala260
4 files changed, 123 insertions, 234 deletions
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 304288a32c..2f7da31e55 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -833,7 +833,7 @@ object TestSettings {
javaOptions in Test += "-Dspark.ui.enabled=false",
javaOptions in Test += "-Dspark.ui.showConsoleProgress=false",
javaOptions in Test += "-Dspark.unsafe.exceptionOnMemoryLeak=true",
- javaOptions in Test += "-Dsun.io.serialization.extendedDebugInfo=true",
+ javaOptions in Test += "-Dsun.io.serialization.extendedDebugInfo=false",
javaOptions in Test += "-Dderby.system.durability=test",
javaOptions in Test ++= System.getProperties.asScala.filter(_._1.startsWith("spark"))
.map { case (k,v) => s"-D$k=$v" }.toSeq,
diff --git a/sql/core/benchmarks/WideSchemaBenchmark-results.txt b/sql/core/benchmarks/WideSchemaBenchmark-results.txt
new file mode 100644
index 0000000000..ea6a6616c2
--- /dev/null
+++ b/sql/core/benchmarks/WideSchemaBenchmark-results.txt
@@ -0,0 +1,93 @@
+OpenJDK 64-Bit Server VM 1.8.0_66-internal-b17 on Linux 4.2.0-36-generic
+Intel(R) Xeon(R) CPU E5-1650 v3 @ 3.50GHz
+parsing large select: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+1 select expressions 3 / 5 0.0 2967064.0 1.0X
+100 select expressions 11 / 12 0.0 11369518.0 0.3X
+2500 select expressions 243 / 250 0.0 242561004.0 0.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_66-internal-b17 on Linux 4.2.0-36-generic
+Intel(R) Xeon(R) CPU E5-1650 v3 @ 3.50GHz
+many column field r/w: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+1 cols x 100000 rows (read in-mem) 28 / 40 3.6 278.8 1.0X
+1 cols x 100000 rows (exec in-mem) 28 / 42 3.5 284.0 1.0X
+1 cols x 100000 rows (read parquet) 23 / 35 4.4 228.8 1.2X
+1 cols x 100000 rows (write parquet) 163 / 182 0.6 1633.0 0.2X
+100 cols x 1000 rows (read in-mem) 27 / 39 3.7 266.9 1.0X
+100 cols x 1000 rows (exec in-mem) 48 / 79 2.1 481.7 0.6X
+100 cols x 1000 rows (read parquet) 25 / 36 3.9 254.3 1.1X
+100 cols x 1000 rows (write parquet) 182 / 196 0.5 1819.5 0.2X
+2500 cols x 40 rows (read in-mem) 280 / 315 0.4 2797.1 0.1X
+2500 cols x 40 rows (exec in-mem) 606 / 638 0.2 6064.3 0.0X
+2500 cols x 40 rows (read parquet) 836 / 843 0.1 8356.4 0.0X
+2500 cols x 40 rows (write parquet) 490 / 522 0.2 4900.6 0.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_66-internal-b17 on Linux 4.2.0-36-generic
+Intel(R) Xeon(R) CPU E5-1650 v3 @ 3.50GHz
+wide shallowly nested struct field r/w: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+1 wide x 100000 rows (read in-mem) 22 / 35 4.6 216.0 1.0X
+1 wide x 100000 rows (exec in-mem) 40 / 63 2.5 400.6 0.5X
+1 wide x 100000 rows (read parquet) 93 / 134 1.1 933.9 0.2X
+1 wide x 100000 rows (write parquet) 133 / 174 0.7 1334.3 0.2X
+100 wide x 1000 rows (read in-mem) 22 / 44 4.5 223.3 1.0X
+100 wide x 1000 rows (exec in-mem) 88 / 138 1.1 878.6 0.2X
+100 wide x 1000 rows (read parquet) 117 / 186 0.9 1172.0 0.2X
+100 wide x 1000 rows (write parquet) 144 / 174 0.7 1441.6 0.1X
+2500 wide x 40 rows (read in-mem) 36 / 57 2.8 358.9 0.6X
+2500 wide x 40 rows (exec in-mem) 1466 / 1507 0.1 14656.6 0.0X
+2500 wide x 40 rows (read parquet) 690 / 802 0.1 6898.2 0.0X
+2500 wide x 40 rows (write parquet) 197 / 207 0.5 1970.9 0.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_66-internal-b17 on Linux 4.2.0-36-generic
+Intel(R) Xeon(R) CPU E5-1650 v3 @ 3.50GHz
+deeply nested struct field r/w: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+1 deep x 100000 rows (read in-mem) 22 / 35 4.5 223.9 1.0X
+1 deep x 100000 rows (exec in-mem) 28 / 52 3.6 280.6 0.8X
+1 deep x 100000 rows (read parquet) 41 / 65 2.4 410.5 0.5X
+1 deep x 100000 rows (write parquet) 163 / 173 0.6 1634.5 0.1X
+100 deep x 1000 rows (read in-mem) 43 / 63 2.3 425.9 0.5X
+100 deep x 1000 rows (exec in-mem) 232 / 280 0.4 2321.7 0.1X
+100 deep x 1000 rows (read parquet) 1989 / 2281 0.1 19886.6 0.0X
+100 deep x 1000 rows (write parquet) 144 / 184 0.7 1442.6 0.2X
+250 deep x 400 rows (read in-mem) 68 / 95 1.5 680.9 0.3X
+250 deep x 400 rows (exec in-mem) 1310 / 1403 0.1 13096.4 0.0X
+250 deep x 400 rows (read parquet) 41477 / 41847 0.0 414766.8 0.0X
+250 deep x 400 rows (write parquet) 243 / 272 0.4 2433.1 0.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_66-internal-b17 on Linux 4.2.0-36-generic
+Intel(R) Xeon(R) CPU E5-1650 v3 @ 3.50GHz
+bushy struct field r/w: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+1 x 1 deep x 100000 rows (read in-mem) 23 / 36 4.4 229.8 1.0X
+1 x 1 deep x 100000 rows (exec in-mem) 27 / 48 3.7 269.6 0.9X
+1 x 1 deep x 100000 rows (read parquet) 25 / 33 4.0 247.5 0.9X
+1 x 1 deep x 100000 rows (write parquet) 82 / 134 1.2 821.1 0.3X
+128 x 8 deep x 1000 rows (read in-mem) 19 / 29 5.3 189.5 1.2X
+128 x 8 deep x 1000 rows (exec in-mem) 144 / 165 0.7 1440.4 0.2X
+128 x 8 deep x 1000 rows (read parquet) 117 / 159 0.9 1174.4 0.2X
+128 x 8 deep x 1000 rows (write parquet) 135 / 162 0.7 1349.0 0.2X
+1024 x 11 deep x 100 rows (read in-mem) 30 / 49 3.3 304.4 0.8X
+1024 x 11 deep x 100 rows (exec in-mem) 1146 / 1183 0.1 11457.6 0.0X
+1024 x 11 deep x 100 rows (read parquet) 712 / 758 0.1 7119.5 0.0X
+1024 x 11 deep x 100 rows (write parquet) 104 / 143 1.0 1037.3 0.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_66-internal-b17 on Linux 4.2.0-36-generic
+Intel(R) Xeon(R) CPU E5-1650 v3 @ 3.50GHz
+wide array field r/w: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+1 wide x 100000 rows (read in-mem) 18 / 31 5.6 179.3 1.0X
+1 wide x 100000 rows (exec in-mem) 31 / 47 3.2 310.2 0.6X
+1 wide x 100000 rows (read parquet) 45 / 73 2.2 445.1 0.4X
+1 wide x 100000 rows (write parquet) 109 / 140 0.9 1085.9 0.2X
+100 wide x 1000 rows (read in-mem) 17 / 25 5.8 172.7 1.0X
+100 wide x 1000 rows (exec in-mem) 18 / 22 5.4 184.6 1.0X
+100 wide x 1000 rows (read parquet) 26 / 42 3.8 261.8 0.7X
+100 wide x 1000 rows (write parquet) 150 / 164 0.7 1499.4 0.1X
+2500 wide x 40 rows (read in-mem) 19 / 31 5.1 194.7 0.9X
+2500 wide x 40 rows (exec in-mem) 19 / 24 5.3 188.5 1.0X
+2500 wide x 40 rows (read parquet) 33 / 47 3.0 334.4 0.5X
+2500 wide x 40 rows (write parquet) 153 / 164 0.7 1528.2 0.1X
+
diff --git a/sql/core/src/test/resources/log4j.properties b/sql/core/src/test/resources/log4j.properties
index e53cb1f4e6..33b9ecf1e2 100644
--- a/sql/core/src/test/resources/log4j.properties
+++ b/sql/core/src/test/resources/log4j.properties
@@ -16,7 +16,7 @@
#
# Set everything to be logged to the file core/target/unit-tests.log
-log4j.rootLogger=DEBUG, CA, FA
+log4j.rootLogger=INFO, CA, FA
#Console Appender
log4j.appender.CA=org.apache.log4j.ConsoleAppender
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideSchemaBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideSchemaBenchmark.scala
index 06466e629b..d2704b3d3f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideSchemaBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideSchemaBenchmark.scala
@@ -17,7 +17,7 @@
package org.apache.spark.sql
-import java.io.File
+import java.io.{File, FileOutputStream, OutputStream}
import org.scalatest.BeforeAndAfterEach
@@ -29,11 +29,13 @@ import org.apache.spark.util.{Benchmark, Utils}
* Benchmark for performance with very wide and nested DataFrames.
* To run this:
* build/sbt "sql/test-only *WideSchemaBenchmark"
+ *
+ * Results will be written to "sql/core/benchmarks/WideSchemaBenchmark-results.txt".
*/
class WideSchemaBenchmark extends SparkFunSuite with BeforeAndAfterEach {
private val scaleFactor = 100000
- private val widthsToTest = Seq(1, 10, 100, 1000, 2500)
- private val depthsToTest = Seq(1, 10, 100, 250)
+ private val widthsToTest = Seq(1, 100, 2500)
+ private val depthsToTest = Seq(1, 100, 250)
assert(scaleFactor > widthsToTest.max)
private lazy val sparkSession = SparkSession.builder
@@ -44,15 +46,22 @@ class WideSchemaBenchmark extends SparkFunSuite with BeforeAndAfterEach {
import sparkSession.implicits._
private var tmpFiles: List[File] = Nil
+ private var out: OutputStream = null
+
+ override def beforeAll() {
+ super.beforeAll()
+ out = new FileOutputStream(new File("benchmarks/WideSchemaBenchmark-results.txt"))
+ }
+
+ override def afterAll() {
+ super.afterAll()
+ out.close()
+ }
override def afterEach() {
- try {
- for (tmpFile <- tmpFiles) {
- Utils.deleteRecursively(tmpFile)
- }
- } finally {
- tmpFiles = Nil
- super.afterEach()
+ super.afterEach()
+ for (tmpFile <- tmpFiles) {
+ Utils.deleteRecursively(tmpFile)
}
}
@@ -80,7 +89,7 @@ class WideSchemaBenchmark extends SparkFunSuite with BeforeAndAfterEach {
benchmark.addCase(desc + " (read in-mem)") { iter =>
df.selectExpr(s"sum($selector)").collect()
}
- benchmark.addCase(desc + " (write in-mem)") { iter =>
+ benchmark.addCase(desc + " (exec in-mem)") { iter =>
df.selectExpr("*", s"hash($selector) as f").selectExpr(s"sum($selector)", "sum(f)").collect()
}
val parquet = saveAsParquet(df)
@@ -93,7 +102,7 @@ class WideSchemaBenchmark extends SparkFunSuite with BeforeAndAfterEach {
}
ignore("parsing large select expressions") {
- val benchmark = new Benchmark("parsing large select", 1)
+ val benchmark = new Benchmark("parsing large select", 1, output = Some(out))
for (width <- widthsToTest) {
val selectExpr = (1 to width).map(i => s"id as a_$i")
benchmark.addCase(s"$width select expressions") { iter =>
@@ -101,22 +110,10 @@ class WideSchemaBenchmark extends SparkFunSuite with BeforeAndAfterEach {
}
}
benchmark.run()
-
-/*
-OpenJDK 64-Bit Server VM 1.8.0_66-internal-b17 on Linux 4.2.0-36-generic
-Intel(R) Xeon(R) CPU E5-1650 v3 @ 3.50GHz
-parsing large select: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------
-1 select expressions 22 / 25 0.0 22053737.0 1.0X
-10 select expressions 8 / 13 0.0 8288520.0 2.7X
-100 select expressions 29 / 32 0.0 29481040.0 0.7X
-1000 select expressions 268 / 276 0.0 268183159.0 0.1X
-2500 select expressions 683 / 691 0.0 683422241.0 0.0X
-*/
}
ignore("many column field read and write") {
- val benchmark = new Benchmark("many column field r/w", scaleFactor)
+ val benchmark = new Benchmark("many column field r/w", scaleFactor, output = Some(out))
for (width <- widthsToTest) {
// normalize by width to keep constant data size
val numRows = scaleFactor / width
@@ -126,38 +123,11 @@ parsing large select: Best/Avg Time(ms) Rate(M/s) Per Ro
addCases(benchmark, df, s"$width cols x $numRows rows", "a_1")
}
benchmark.run()
-
-/*
-OpenJDK 64-Bit Server VM 1.8.0_66-internal-b17 on Linux 4.2.0-36-generic
-Intel(R) Xeon(R) CPU E5-1650 v3 @ 3.50GHz
-many column field r/w: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------
-1 cols x 100000 rows (read in-mem) 26 / 33 3.8 262.9 1.0X
-1 cols x 100000 rows (write in-mem) 40 / 51 2.5 401.6 0.7X
-1 cols x 100000 rows (read parquet) 37 / 57 2.7 374.3 0.7X
-1 cols x 100000 rows (write parquet) 105 / 157 0.9 1054.9 0.2X
-10 cols x 10000 rows (read in-mem) 26 / 39 3.8 260.5 1.0X
-10 cols x 10000 rows (write in-mem) 37 / 44 2.7 367.4 0.7X
-10 cols x 10000 rows (read parquet) 31 / 39 3.3 305.1 0.9X
-10 cols x 10000 rows (write parquet) 86 / 137 1.2 860.2 0.3X
-100 cols x 1000 rows (read in-mem) 40 / 64 2.5 401.2 0.7X
-100 cols x 1000 rows (write in-mem) 112 / 139 0.9 1118.3 0.2X
-100 cols x 1000 rows (read parquet) 35 / 52 2.9 349.8 0.8X
-100 cols x 1000 rows (write parquet) 150 / 182 0.7 1497.1 0.2X
-1000 cols x 100 rows (read in-mem) 304 / 362 0.3 3043.6 0.1X
-1000 cols x 100 rows (write in-mem) 647 / 729 0.2 6467.8 0.0X
-1000 cols x 100 rows (read parquet) 194 / 235 0.5 1937.7 0.1X
-1000 cols x 100 rows (write parquet) 511 / 521 0.2 5105.0 0.1X
-2500 cols x 40 rows (read in-mem) 915 / 924 0.1 9148.2 0.0X
-2500 cols x 40 rows (write in-mem) 1856 / 1933 0.1 18558.1 0.0X
-2500 cols x 40 rows (read parquet) 802 / 881 0.1 8019.3 0.0X
-2500 cols x 40 rows (write parquet) 1268 / 1291 0.1 12681.6 0.0X
-*/
}
ignore("wide shallowly nested struct field read and write") {
val benchmark = new Benchmark(
- "wide shallowly nested struct field r/w", scaleFactor)
+ "wide shallowly nested struct field r/w", scaleFactor, output = Some(out))
for (width <- widthsToTest) {
val numRows = scaleFactor / width
var datum: String = "{"
@@ -175,84 +145,10 @@ many column field r/w: Best/Avg Time(ms) Rate(M/s) Per Ro
addCases(benchmark, df, s"$width wide x $numRows rows", "a.b.c.value_1")
}
benchmark.run()
-
-/*
-Java HotSpot(TM) 64-Bit Server VM 1.7.0_80-b15 on Linux 4.2.0-36-generic
-Intel(R) Xeon(R) CPU E5-1650 v3 @ 3.50GHz
-wide shallowly nested struct field r/w: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------
-1 wide x 100000 rows (read in-mem) 100 / 125 1.0 997.7 1.0X
-1 wide x 100000 rows (write in-mem) 130 / 147 0.8 1302.9 0.8X
-1 wide x 100000 rows (read parquet) 195 / 228 0.5 1951.4 0.5X
-1 wide x 100000 rows (write parquet) 248 / 259 0.4 2479.7 0.4X
-10 wide x 10000 rows (read in-mem) 76 / 89 1.3 757.2 1.3X
-10 wide x 10000 rows (write in-mem) 90 / 116 1.1 900.0 1.1X
-10 wide x 10000 rows (read parquet) 90 / 135 1.1 903.9 1.1X
-10 wide x 10000 rows (write parquet) 222 / 240 0.4 2222.8 0.4X
-100 wide x 1000 rows (read in-mem) 71 / 91 1.4 710.8 1.4X
-100 wide x 1000 rows (write in-mem) 252 / 324 0.4 2522.4 0.4X
-100 wide x 1000 rows (read parquet) 310 / 329 0.3 3095.9 0.3X
-100 wide x 1000 rows (write parquet) 253 / 267 0.4 2525.7 0.4X
-1000 wide x 100 rows (read in-mem) 144 / 160 0.7 1439.5 0.7X
-1000 wide x 100 rows (write in-mem) 2055 / 2326 0.0 20553.9 0.0X
-1000 wide x 100 rows (read parquet) 750 / 925 0.1 7496.8 0.1X
-1000 wide x 100 rows (write parquet) 235 / 317 0.4 2347.5 0.4X
-2500 wide x 40 rows (read in-mem) 219 / 227 0.5 2190.9 0.5X
-2500 wide x 40 rows (write in-mem) 5177 / 5423 0.0 51773.2 0.0X
-2500 wide x 40 rows (read parquet) 1642 / 1714 0.1 16417.7 0.1X
-2500 wide x 40 rows (write parquet) 357 / 381 0.3 3568.2 0.3X
-*/
- }
-
- ignore("wide struct field read and write") {
- val benchmark = new Benchmark("wide struct field r/w", scaleFactor)
- for (width <- widthsToTest) {
- val numRows = scaleFactor / width
- var datum: String = "{"
- for (i <- 1 to width) {
- if (i == 1) {
- datum += s""""value_$i": 1"""
- } else {
- datum += s""", "value_$i": 1"""
- }
- }
- datum += "}"
- val df = sparkSession.read.json(sparkSession.range(numRows).map(_ => datum).rdd).cache()
- df.count() // force caching
- addCases(benchmark, df, s"$width wide x $numRows rows", "value_1")
- }
- benchmark.run()
-
-/*
-OpenJDK 64-Bit Server VM 1.8.0_66-internal-b17 on Linux 4.2.0-36-generic
-Intel(R) Xeon(R) CPU E5-1650 v3 @ 3.50GHz
-wide struct field r/w: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------
-1 wide x 100000 rows (read in-mem) 22 / 37 4.6 216.8 1.0X
-1 wide x 100000 rows (write in-mem) 37 / 54 2.7 365.6 0.6X
-1 wide x 100000 rows (read parquet) 27 / 44 3.6 274.7 0.8X
-1 wide x 100000 rows (write parquet) 155 / 183 0.6 1546.3 0.1X
-10 wide x 10000 rows (read in-mem) 27 / 40 3.7 272.1 0.8X
-10 wide x 10000 rows (write in-mem) 32 / 44 3.2 315.7 0.7X
-10 wide x 10000 rows (read parquet) 31 / 44 3.2 309.8 0.7X
-10 wide x 10000 rows (write parquet) 151 / 169 0.7 1509.3 0.1X
-100 wide x 1000 rows (read in-mem) 37 / 62 2.7 374.4 0.6X
-100 wide x 1000 rows (write in-mem) 81 / 96 1.2 805.6 0.3X
-100 wide x 1000 rows (read parquet) 31 / 44 3.3 307.3 0.7X
-100 wide x 1000 rows (write parquet) 174 / 209 0.6 1745.0 0.1X
-1000 wide x 100 rows (read in-mem) 308 / 339 0.3 3082.4 0.1X
-1000 wide x 100 rows (write in-mem) 672 / 696 0.1 6717.7 0.0X
-1000 wide x 100 rows (read parquet) 182 / 228 0.5 1821.2 0.1X
-1000 wide x 100 rows (write parquet) 484 / 497 0.2 4841.2 0.0X
-2500 wide x 40 rows (read in-mem) 727 / 786 0.1 7268.4 0.0X
-2500 wide x 40 rows (write in-mem) 1734 / 1782 0.1 17341.5 0.0X
-2500 wide x 40 rows (read parquet) 882 / 935 0.1 8816.8 0.0X
-2500 wide x 40 rows (write parquet) 935 / 982 0.1 9351.9 0.0X
-*/
}
ignore("deeply nested struct field read and write") {
- val benchmark = new Benchmark("deeply nested struct field r/w", scaleFactor)
+ val benchmark = new Benchmark("deeply nested struct field r/w", scaleFactor, output = Some(out))
for (depth <- depthsToTest) {
val numRows = scaleFactor / depth
var datum: String = "{\"value\": 1}"
@@ -266,34 +162,11 @@ wide struct field r/w: Best/Avg Time(ms) Rate(M/s) Per Ro
addCases(benchmark, df, s"$depth deep x $numRows rows", selector)
}
benchmark.run()
-
-/*
-OpenJDK 64-Bit Server VM 1.8.0_66-internal-b17 on Linux 4.2.0-36-generic
-Intel(R) Xeon(R) CPU E5-1650 v3 @ 3.50GHz
-deeply nested struct field r/w: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------
-1 deep x 100000 rows (read in-mem) 24 / 39 4.2 239.0 1.0X
-1 deep x 100000 rows (write in-mem) 34 / 47 3.0 335.1 0.7X
-1 deep x 100000 rows (read parquet) 45 / 51 2.2 446.1 0.5X
-1 deep x 100000 rows (write parquet) 86 / 108 1.2 859.4 0.3X
-10 deep x 10000 rows (read in-mem) 28 / 38 3.6 275.1 0.9X
-10 deep x 10000 rows (write in-mem) 43 / 64 2.3 427.1 0.6X
-10 deep x 10000 rows (read parquet) 44 / 59 2.3 438.1 0.5X
-10 deep x 10000 rows (write parquet) 85 / 110 1.2 853.6 0.3X
-100 deep x 1000 rows (read in-mem) 79 / 100 1.3 785.5 0.3X
-100 deep x 1000 rows (write in-mem) 776 / 800 0.1 7760.3 0.0X
-100 deep x 1000 rows (read parquet) 3302 / 3394 0.0 33021.2 0.0X
-100 deep x 1000 rows (write parquet) 226 / 243 0.4 2259.0 0.1X
-250 deep x 400 rows (read in-mem) 610 / 639 0.2 6104.0 0.0X
-250 deep x 400 rows (write in-mem) 8526 / 8531 0.0 85256.9 0.0X
-250 deep x 400 rows (read parquet) 54968 / 55069 0.0 549681.4 0.0X
-250 deep x 400 rows (write parquet) 714 / 718 0.1 7143.0 0.0X
-*/
}
ignore("bushy struct field read and write") {
- val benchmark = new Benchmark("bushy struct field r/w", scaleFactor)
- for (width <- Seq(1, 10, 100, 500)) {
+ val benchmark = new Benchmark("bushy struct field r/w", scaleFactor, output = Some(out))
+ for (width <- Seq(1, 100, 1000)) {
val numRows = scaleFactor / width
var numNodes = 1
var datum: String = "{\"value\": 1}"
@@ -312,33 +185,10 @@ deeply nested struct field r/w: Best/Avg Time(ms) Rate(M/s) Per Ro
addCases(benchmark, df, s"$numNodes x $depth deep x $numRows rows", selector)
}
benchmark.run()
-
-/*
-OpenJDK 64-Bit Server VM 1.8.0_66-internal-b17 on Linux 4.2.0-36-generic
-Intel(R) Xeon(R) CPU E5-1650 v3 @ 3.50GHz
-bushy struct field r/w: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------
-1 x 1 deep x 100000 rows (read in-mem) 21 / 27 4.7 212.6 1.0X
-1 x 1 deep x 100000 rows (write in-mem) 27 / 38 3.8 265.8 0.8X
-1 x 1 deep x 100000 rows (read parquet) 26 / 32 3.9 259.1 0.8X
-1 x 1 deep x 100000 rows (write parquet) 150 / 169 0.7 1499.5 0.1X
-16 x 5 deep x 10000 rows (read in-mem) 26 / 45 3.9 258.7 0.8X
-16 x 5 deep x 10000 rows (write in-mem) 54 / 58 1.9 535.1 0.4X
-16 x 5 deep x 10000 rows (read parquet) 60 / 84 1.7 595.8 0.4X
-16 x 5 deep x 10000 rows (write parquet) 179 / 184 0.6 1787.5 0.1X
-128 x 8 deep x 1000 rows (read in-mem) 26 / 40 3.8 261.4 0.8X
-128 x 8 deep x 1000 rows (write in-mem) 592 / 592 0.2 5915.3 0.0X
-128 x 8 deep x 1000 rows (read parquet) 203 / 251 0.5 2031.8 0.1X
-128 x 8 deep x 1000 rows (write parquet) 105 / 131 1.0 1045.2 0.2X
-512 x 10 deep x 200 rows (read in-mem) 101 / 125 1.0 1007.4 0.2X
-512 x 10 deep x 200 rows (write in-mem) 6778 / 6943 0.0 67781.1 0.0X
-512 x 10 deep x 200 rows (read parquet) 958 / 1071 0.1 9584.9 0.0X
-512 x 10 deep x 200 rows (write parquet) 173 / 207 0.6 1726.1 0.1X
-*/
}
ignore("wide array field read and write") {
- val benchmark = new Benchmark("wide array field r/w", scaleFactor)
+ val benchmark = new Benchmark("wide array field r/w", scaleFactor, output = Some(out))
for (width <- widthsToTest) {
val numRows = scaleFactor / width
var datum: String = "{\"value\": ["
@@ -355,37 +205,10 @@ bushy struct field r/w: Best/Avg Time(ms) Rate(M/s) Per Ro
addCases(benchmark, df, s"$width wide x $numRows rows", "value[0]")
}
benchmark.run()
-
-/*
-OpenJDK 64-Bit Server VM 1.8.0_66-internal-b17 on Linux 4.2.0-36-generic
-Intel(R) Xeon(R) CPU E5-1650 v3 @ 3.50GHz
-wide array field r/w: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------
-1 wide x 100000 rows (read in-mem) 27 / 45 3.7 268.0 1.0X
-1 wide x 100000 rows (write in-mem) 37 / 52 2.7 368.3 0.7X
-1 wide x 100000 rows (read parquet) 52 / 65 1.9 524.9 0.5X
-1 wide x 100000 rows (write parquet) 102 / 139 1.0 1016.7 0.3X
-10 wide x 10000 rows (read in-mem) 20 / 26 5.0 201.7 1.3X
-10 wide x 10000 rows (write in-mem) 26 / 35 3.8 259.8 1.0X
-10 wide x 10000 rows (read parquet) 39 / 59 2.5 393.8 0.7X
-10 wide x 10000 rows (write parquet) 120 / 143 0.8 1201.4 0.2X
-100 wide x 1000 rows (read in-mem) 24 / 31 4.2 240.1 1.1X
-100 wide x 1000 rows (write in-mem) 26 / 35 3.8 264.1 1.0X
-100 wide x 1000 rows (read parquet) 30 / 47 3.4 296.8 0.9X
-100 wide x 1000 rows (write parquet) 109 / 147 0.9 1094.8 0.2X
-1000 wide x 100 rows (read in-mem) 20 / 38 5.0 200.6 1.3X
-1000 wide x 100 rows (write in-mem) 24 / 32 4.1 242.3 1.1X
-1000 wide x 100 rows (read parquet) 47 / 55 2.1 470.1 0.6X
-1000 wide x 100 rows (write parquet) 146 / 164 0.7 1465.0 0.2X
-2500 wide x 40 rows (read in-mem) 20 / 28 5.1 196.1 1.4X
-2500 wide x 40 rows (write in-mem) 25 / 27 4.0 249.3 1.1X
-2500 wide x 40 rows (read parquet) 33 / 48 3.0 332.0 0.8X
-2500 wide x 40 rows (write parquet) 149 / 176 0.7 1489.3 0.2X
-*/
}
ignore("wide map field read and write") {
- val benchmark = new Benchmark("wide map field r/w", scaleFactor)
+ val benchmark = new Benchmark("wide map field r/w", scaleFactor, output = Some(out))
for (width <- widthsToTest) {
val numRows = scaleFactor / width
val datum = Tuple1((1 to width).map(i => ("value_" + i -> 1)).toMap)
@@ -394,32 +217,5 @@ wide array field r/w: Best/Avg Time(ms) Rate(M/s) Per Ro
addCases(benchmark, df, s"$width wide x $numRows rows", "_1[\"value_1\"]")
}
benchmark.run()
-
-/*
-OpenJDK 64-Bit Server VM 1.8.0_66-internal-b17 on Linux 4.2.0-36-generic
-Intel(R) Xeon(R) CPU E5-1650 v3 @ 3.50GHz
-wide map field r/w: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------
-1 wide x 100000 rows (read in-mem) 27 / 42 3.7 270.9 1.0X
-1 wide x 100000 rows (write in-mem) 40 / 63 2.5 403.4 0.7X
-1 wide x 100000 rows (read parquet) 71 / 114 1.4 705.8 0.4X
-1 wide x 100000 rows (write parquet) 169 / 184 0.6 1689.7 0.2X
-10 wide x 10000 rows (read in-mem) 22 / 35 4.6 216.6 1.3X
-10 wide x 10000 rows (write in-mem) 29 / 34 3.5 285.6 0.9X
-10 wide x 10000 rows (read parquet) 61 / 81 1.6 610.3 0.4X
-10 wide x 10000 rows (write parquet) 150 / 172 0.7 1504.7 0.2X
-100 wide x 1000 rows (read in-mem) 21 / 29 4.8 207.9 1.3X
-100 wide x 1000 rows (write in-mem) 30 / 57 3.3 304.9 0.9X
-100 wide x 1000 rows (read parquet) 36 / 61 2.8 356.7 0.8X
-100 wide x 1000 rows (write parquet) 108 / 136 0.9 1075.7 0.3X
-1000 wide x 100 rows (read in-mem) 22 / 31 4.5 223.0 1.2X
-1000 wide x 100 rows (write in-mem) 33 / 41 3.0 332.0 0.8X
-1000 wide x 100 rows (read parquet) 49 / 66 2.0 493.6 0.5X
-1000 wide x 100 rows (write parquet) 127 / 139 0.8 1265.9 0.2X
-2500 wide x 40 rows (read in-mem) 23 / 34 4.4 226.0 1.2X
-2500 wide x 40 rows (write in-mem) 33 / 42 3.1 326.6 0.8X
-2500 wide x 40 rows (read parquet) 36 / 48 2.8 359.2 0.8X
-2500 wide x 40 rows (write parquet) 155 / 168 0.6 1549.2 0.2X
-*/
}
}