aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorJoseph K. Bradley <joseph@databricks.com>2016-02-24 23:15:36 -0800
committerXiangrui Meng <meng@databricks.com>2016-02-24 23:15:36 -0800
commit13ce10e95401b21fa40ca0bb27ebf9a0bfffe70f (patch)
treec24ffc89625ab7b5168edef1e4c0e087bcd49836 /sql
parent2b042577fb077865c3fce69c9d4eda22fde92673 (diff)
downloadspark-13ce10e95401b21fa40ca0bb27ebf9a0bfffe70f.tar.gz
spark-13ce10e95401b21fa40ca0bb27ebf9a0bfffe70f.tar.bz2
spark-13ce10e95401b21fa40ca0bb27ebf9a0bfffe70f.zip
[SPARK-13479][SQL][PYTHON] Added Python API for approxQuantile
## What changes were proposed in this pull request? * Scala DataFrameStatFunctions: Added version of approxQuantile taking a List instead of an Array, for Python compatbility * Python DataFrame and DataFrameStatFunctions: Added approxQuantile ## How was this patch tested? * unit test in sql/tests.py Documentation was copied from the existing approxQuantile exactly. Author: Joseph K. Bradley <joseph@databricks.com> Closes #11356 from jkbradley/approx-quantile-python.
Diffstat (limited to 'sql')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala10
1 files changed, 10 insertions, 0 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
index 39a31ab028..3eb1f0f0d5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
@@ -71,6 +71,16 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
}
/**
+ * Python-friendly version of [[approxQuantile()]]
+ */
+ private[spark] def approxQuantile(
+ col: String,
+ probabilities: List[Double],
+ relativeError: Double): java.util.List[Double] = {
+ approxQuantile(col, probabilities.toArray, relativeError).toList.asJava
+ }
+
+ /**
* Calculate the sample covariance of two numerical columns of a DataFrame.
* @param col1 the name of the first column
* @param col2 the name of the second column