aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/HashBenchmark.scala104
1 files changed, 104 insertions, 0 deletions
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/HashBenchmark.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/HashBenchmark.scala
new file mode 100644
index 0000000000..184f845b4d
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/HashBenchmark.scala
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.sql.catalyst.encoders.RowEncoder
+import org.apache.spark.sql.catalyst.expressions.{Murmur3Hash, UnsafeProjection}
+import org.apache.spark.sql.catalyst.expressions.codegen.GenerateSafeProjection
+import org.apache.spark.sql.types._
+import org.apache.spark.util.Benchmark
+
+/**
+ * Benchmark for the previous interpreted hash function(InternalRow.hashCode) vs the new codegen
+ * hash expression(Murmur3Hash).
+ */
+object HashBenchmark {
+
+ def test(name: String, schema: StructType, iters: Int): Unit = {
+ val numRows = 1024 * 8
+
+ val generator = RandomDataGenerator.forType(schema, nullable = false).get
+ val encoder = RowEncoder(schema)
+ val attrs = schema.toAttributes
+ val safeProjection = GenerateSafeProjection.generate(attrs, attrs)
+
+ val rows = (1 to numRows).map(_ =>
+ // The output of encoder is UnsafeRow, use safeProjection to turn in into safe format.
+ safeProjection(encoder.toRow(generator().asInstanceOf[Row])).copy()
+ ).toArray
+
+ val benchmark = new Benchmark("Hash For " + name, iters * numRows)
+ benchmark.addCase("interpreted version") { _: Int =>
+ for (_ <- 0L until iters) {
+ var sum = 0
+ var i = 0
+ while (i < numRows) {
+ sum += rows(i).hashCode()
+ i += 1
+ }
+ }
+ }
+
+ val getHashCode = UnsafeProjection.create(new Murmur3Hash(attrs) :: Nil, attrs)
+ benchmark.addCase("codegen version") { _: Int =>
+ for (_ <- 0L until iters) {
+ var sum = 0
+ var i = 0
+ while (i < numRows) {
+ sum += getHashCode(rows(i)).getInt(0)
+ i += 1
+ }
+ }
+ }
+ benchmark.run()
+ }
+
+ def main(args: Array[String]): Unit = {
+ val simple = new StructType().add("i", IntegerType)
+ test("simple", simple, 1024)
+
+ val normal = new StructType()
+ .add("null", NullType)
+ .add("boolean", BooleanType)
+ .add("byte", ByteType)
+ .add("short", ShortType)
+ .add("int", IntegerType)
+ .add("long", LongType)
+ .add("float", FloatType)
+ .add("double", DoubleType)
+ .add("bigDecimal", DecimalType.SYSTEM_DEFAULT)
+ .add("smallDecimal", DecimalType.USER_DEFAULT)
+ .add("string", StringType)
+ .add("binary", BinaryType)
+ .add("date", DateType)
+ .add("timestamp", TimestampType)
+ test("normal", normal, 128)
+
+ val arrayOfInt = ArrayType(IntegerType)
+ val array = new StructType()
+ .add("array", arrayOfInt)
+ .add("arrayOfArray", ArrayType(arrayOfInt))
+ test("array", array, 64)
+
+ val mapOfInt = MapType(IntegerType, IntegerType)
+ val map = new StructType()
+ .add("map", mapOfInt)
+ .add("mapOfMap", MapType(IntegerType, mapOfInt))
+ test("map", map, 64)
+ }
+}