[SPARK-17188][SQL] Moves class QuantileSummaries to project catalyst for implementing percentile_approx

## What changes were proposed in this pull request? This is a sub-task of [SPARK-16283](https://issues.apache.org/jira/browse/SPARK-16283) (Implement percentile_approx SQL function), which moves class QuantileSummaries to project catalyst so that it can be reused when implementing aggregation function `percentile_approx`. ## How was this patch tested? This PR only does class relocation, class implementation is not changed. Author: Sean Zhong <seanzhong@databricks.com> Closes #14754 from clockfly/move_QuantileSummaries_to_catalyst.
author: Sean Zhong <seanzhong@databricks.com> 2016-08-23 14:57:00 +0800
committer: Wenchen Fan <wenchen@databricks.com> 2016-08-23 14:57:00 +0800
commit: cc33460a51d2890fe8f50f5b6b87003d6d210f04 (patch)
tree: 0a45fb85c9b3848112fc5b5e1ed63eda73669fa6 /sql/core/src/test/scala
parent: d2b3d3e63e1a9217de6ef507c350308017664a62 (diff)
download: spark-cc33460a51d2890fe8f50f5b6b87003d6d210f04.tar.gz
spark-cc33460a51d2890fe8f50f5b6b87003d6d210f04.tar.bz2
spark-cc33460a51d2890fe8f50f5b6b87003d6d210f04.zip
1 files changed, 0 insertions, 129 deletions
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/stat/ApproxQuantileSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/stat/ApproxQuantileSuite.scala
deleted file mode 100644
index 0a989d026c..0000000000
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/stat/ApproxQuantileSuite.scala
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.execution.stat
-
-import scala.util.Random
-
-import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.execution.stat.StatFunctions.QuantileSummaries
-
-
-class ApproxQuantileSuite extends SparkFunSuite {
-
-  private val r = new Random(1)
-  private val n = 100
-  private val increasing = "increasing" -> (0 until n).map(_.toDouble)
-  private val decreasing = "decreasing" -> (n until 0 by -1).map(_.toDouble)
-  private val random = "random" -> Seq.fill(n)(math.ceil(r.nextDouble() * 1000))
-
-  private def buildSummary(
-      data: Seq[Double],
-      epsi: Double,
-      threshold: Int): QuantileSummaries = {
-    var summary = new QuantileSummaries(threshold, epsi)
-    data.foreach { x =>
-      summary = summary.insert(x)
-    }
-    summary.compress()
-  }
-
-  private def checkQuantile(quant: Double, data: Seq[Double], summary: QuantileSummaries): Unit = {
-    val approx = summary.query(quant)
-    // The rank of the approximation.
-    val rank = data.count(_ < approx) // has to be <, not <= to be exact
-    val lower = math.floor((quant - summary.relativeError) * data.size)
-    val upper = math.ceil((quant + summary.relativeError) * data.size)
-    val msg =
-      s"$rank not in [$lower $upper], requested quantile: $quant, approx returned: $approx"
-    assert(rank >= lower, msg)
-    assert(rank <= upper, msg)
-  }
-
-  for {
-    (seq_name, data) <- Seq(increasing, decreasing, random)
-    epsi <- Seq(0.1, 0.0001)
-    compression <- Seq(1000, 10)
-  } {
-
-    test(s"Extremas with epsi=$epsi and seq=$seq_name, compression=$compression") {
-      val s = buildSummary(data, epsi, compression)
-      val min_approx = s.query(0.0)
-      assert(min_approx == data.min, s"Did not return the min: min=${data.min}, got $min_approx")
-      val max_approx = s.query(1.0)
-      assert(max_approx == data.max, s"Did not return the max: max=${data.max}, got $max_approx")
-    }
-
-    test(s"Some quantile values with epsi=$epsi and seq=$seq_name, compression=$compression") {
-      val s = buildSummary(data, epsi, compression)
-      assert(s.count == data.size, s"Found count=${s.count} but data size=${data.size}")
-      checkQuantile(0.9999, data, s)
-      checkQuantile(0.9, data, s)
-      checkQuantile(0.5, data, s)
-      checkQuantile(0.1, data, s)
-      checkQuantile(0.001, data, s)
-    }
-  }
-
-  // Tests for merging procedure
-  for {
-    (seq_name, data) <- Seq(increasing, decreasing, random)
-    epsi <- Seq(0.1, 0.0001)
-    compression <- Seq(1000, 10)
-  } {
-
-    val (data1, data2) = {
-      val l = data.size
-      data.take(l / 2) -> data.drop(l / 2)
-    }
-
-    test(s"Merging ordered lists with epsi=$epsi and seq=$seq_name, compression=$compression") {
-      val s1 = buildSummary(data1, epsi, compression)
-      val s2 = buildSummary(data2, epsi, compression)
-      val s = s1.merge(s2)
-      val min_approx = s.query(0.0)
-      assert(min_approx == data.min, s"Did not return the min: min=${data.min}, got $min_approx")
-      val max_approx = s.query(1.0)
-      assert(max_approx == data.max, s"Did not return the max: max=${data.max}, got $max_approx")
-      checkQuantile(0.9999, data, s)
-      checkQuantile(0.9, data, s)
-      checkQuantile(0.5, data, s)
-      checkQuantile(0.1, data, s)
-      checkQuantile(0.001, data, s)
-    }
-
-    val (data11, data12) = {
-      data.sliding(2).map(_.head).toSeq -> data.sliding(2).map(_.last).toSeq
-    }
-
-    test(s"Merging interleaved lists with epsi=$epsi and seq=$seq_name, compression=$compression") {
-      val s1 = buildSummary(data11, epsi, compression)
-      val s2 = buildSummary(data12, epsi, compression)
-      val s = s1.merge(s2)
-      val min_approx = s.query(0.0)
-      assert(min_approx == data.min, s"Did not return the min: min=${data.min}, got $min_approx")
-      val max_approx = s.query(1.0)
-      assert(max_approx == data.max, s"Did not return the max: max=${data.max}, got $max_approx")
-      checkQuantile(0.9999, data, s)
-      checkQuantile(0.9, data, s)
-      checkQuantile(0.5, data, s)
-      checkQuantile(0.1, data, s)
-      checkQuantile(0.001, data, s)
-    }
-  }
-
-}
author	Sean Zhong <seanzhong@databricks.com>	2016-08-23 14:57:00 +0800
committer	Wenchen Fan <wenchen@databricks.com>	2016-08-23 14:57:00 +0800
commit	cc33460a51d2890fe8f50f5b6b87003d6d210f04 (patch)
tree	0a45fb85c9b3848112fc5b5e1ed63eda73669fa6 /sql/core/src/test/scala
parent	d2b3d3e63e1a9217de6ef507c350308017664a62 (diff)
download	spark-cc33460a51d2890fe8f50f5b6b87003d6d210f04.tar.gz spark-cc33460a51d2890fe8f50f5b6b87003d6d210f04.tar.bz2 spark-cc33460a51d2890fe8f50f5b6b87003d6d210f04.zip