diff options
Diffstat (limited to 'sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala')
-rw-r--r-- | sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala | 4 |
1 files changed, 3 insertions, 1 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala index 1855eab96e..d69be36917 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala @@ -52,6 +52,7 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) { * The algorithm was first present in [[http://dx.doi.org/10.1145/375663.375670 Space-efficient * Online Computation of Quantile Summaries]] by Greenwald and Khanna. * + * Note that NaN values will be removed from the numerical column before calculation * @param col the name of the numerical column * @param probabilities a list of quantile probabilities * Each number must belong to [0, 1]. @@ -67,7 +68,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) { col: String, probabilities: Array[Double], relativeError: Double): Array[Double] = { - StatFunctions.multipleApproxQuantiles(df, Seq(col), probabilities, relativeError).head.toArray + StatFunctions.multipleApproxQuantiles(df.select(col).na.drop(), + Seq(col), probabilities, relativeError).head.toArray } /** |