aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst/src/test
diff options
context:
space:
mode:
authorZhenhua Wang <wzh_zju@163.com>2017-01-08 21:15:52 -0800
committerReynold Xin <rxin@databricks.com>2017-01-08 21:15:52 -0800
commit3ccabdfb4d760d684b1e0c0ed448a57331f209f2 (patch)
tree7d884209ea08ba5350b161e6bd5806b6083680db /sql/catalyst/src/test
parent19d9d4c855eab8f647a5ec66b079172de81221d0 (diff)
downloadspark-3ccabdfb4d760d684b1e0c0ed448a57331f209f2.tar.gz
spark-3ccabdfb4d760d684b1e0c0ed448a57331f209f2.tar.bz2
spark-3ccabdfb4d760d684b1e0c0ed448a57331f209f2.zip
[SPARK-17077][SQL] Cardinality estimation for project operator
## What changes were proposed in this pull request? Support cardinality estimation for project operator. ## How was this patch tested? Add a test suite and a base class in the catalyst package. Author: Zhenhua Wang <wzh_zju@163.com> Closes #16430 from wzhfy/projectEstimation.
Diffstat (limited to 'sql/catalyst/src/test')
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/ProjectEstimationSuite.scala51
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/StatsEstimationTestBase.scala41
2 files changed, 92 insertions, 0 deletions
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/ProjectEstimationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/ProjectEstimationSuite.scala
new file mode 100644
index 0000000000..4a1bed84f8
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/ProjectEstimationSuite.scala
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.statsEstimation
+
+import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeMap, AttributeReference}
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.EstimationUtils._
+import org.apache.spark.sql.types.IntegerType
+
+
+class ProjectEstimationSuite extends StatsEstimationTestBase {
+
+ test("estimate project with alias") {
+ val ar1 = AttributeReference("key1", IntegerType)()
+ val ar2 = AttributeReference("key2", IntegerType)()
+ val colStat1 = ColumnStat(2, Some(1), Some(2), 0, 4, 4)
+ val colStat2 = ColumnStat(1, Some(10), Some(10), 0, 4, 4)
+
+ val child = StatsTestPlan(
+ outputList = Seq(ar1, ar2),
+ stats = Statistics(
+ sizeInBytes = 2 * (4 + 4),
+ rowCount = Some(2),
+ attributeStats = AttributeMap(Seq(ar1 -> colStat1, ar2 -> colStat2))))
+
+ val project = Project(Seq(ar1, Alias(ar2, "abc")()), child)
+ val expectedColStats = Seq("key1" -> colStat1, "abc" -> colStat2)
+ val expectedAttrStats = toAttributeMap(expectedColStats, project)
+ // The number of rows won't change for project.
+ val expectedStats = Statistics(
+ sizeInBytes = 2 * getRowSize(project.output, expectedAttrStats),
+ rowCount = Some(2),
+ attributeStats = expectedAttrStats)
+ assert(project.statistics == expectedStats)
+ }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/StatsEstimationTestBase.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/StatsEstimationTestBase.scala
new file mode 100644
index 0000000000..fa5b290ecb
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/StatsEstimationTestBase.scala
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.statsEstimation
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap}
+import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, LeafNode, LogicalPlan, Statistics}
+
+
+class StatsEstimationTestBase extends SparkFunSuite {
+
+ /** Convert (column name, column stat) pairs to an AttributeMap based on plan output. */
+ def toAttributeMap(colStats: Seq[(String, ColumnStat)], plan: LogicalPlan)
+ : AttributeMap[ColumnStat] = {
+ val nameToAttr: Map[String, Attribute] = plan.output.map(a => (a.name, a)).toMap
+ AttributeMap(colStats.map(kv => nameToAttr(kv._1) -> kv._2))
+ }
+}
+
+/**
+ * This class is used for unit-testing. It's a logical plan whose output and stats are passed in.
+ */
+protected case class StatsTestPlan(outputList: Seq[Attribute], stats: Statistics) extends LeafNode {
+ override def output: Seq[Attribute] = outputList
+ override lazy val statistics = stats
+}