aboutsummaryrefslogtreecommitdiff
path: root/R
diff options
context:
space:
mode:
authorDongjoon Hyun <dongjoon@apache.org>2016-07-07 17:47:29 -0700
committerShivaram Venkataraman <shivaram@cs.berkeley.edu>2016-07-07 17:47:29 -0700
commit6aa7d09f4e126f42e41085dec169c813379ed354 (patch)
treecbbf96af27d5047087e6ce01b2b1b5fb0956a88c /R
parentf4767bcc7a9d1bdd301f054776aa45e7c9f344a7 (diff)
downloadspark-6aa7d09f4e126f42e41085dec169c813379ed354.tar.gz
spark-6aa7d09f4e126f42e41085dec169c813379ed354.tar.bz2
spark-6aa7d09f4e126f42e41085dec169c813379ed354.zip
[SPARK-16425][R] `describe()` should not fail with non-numeric columns
## What changes were proposed in this pull request? This PR prevents ERRORs when `summary(df)` is called for `SparkDataFrame` with not-numeric columns. This failure happens only in `SparkR`. **Before** ```r > df <- createDataFrame(faithful) > df <- withColumn(df, "boolean", df$waiting==79) > summary(df) 16/07/07 14:15:16 ERROR RBackendHandler: describe on 34 failed Error in invokeJava(isStatic = FALSE, objId$id, methodName, ...) : org.apache.spark.sql.AnalysisException: cannot resolve 'avg(`boolean`)' due to data type mismatch: function average requires numeric types, not BooleanType; ``` **After** ```r > df <- createDataFrame(faithful) > df <- withColumn(df, "boolean", df$waiting==79) > summary(df) SparkDataFrame[summary:string, eruptions:string, waiting:string] ``` ## How was this patch tested? Pass the Jenkins with a updated testcase. Author: Dongjoon Hyun <dongjoon@apache.org> Closes #14096 from dongjoon-hyun/SPARK-16425.
Diffstat (limited to 'R')
-rw-r--r--R/pkg/R/DataFrame.R3
-rw-r--r--R/pkg/inst/tests/testthat/test_sparkSQL.R8
2 files changed, 7 insertions, 4 deletions
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 5944bbc765..a18eee3a0f 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -2622,8 +2622,7 @@ setMethod("describe",
setMethod("describe",
signature(x = "SparkDataFrame"),
function(x) {
- colList <- as.list(c(columns(x)))
- sdf <- callJMethod(x@sdf, "describe", colList)
+ sdf <- callJMethod(x@sdf, "describe", list())
dataFrame(sdf)
})
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index a0ab719202..e2a1da0f1e 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -1824,13 +1824,17 @@ test_that("describe() and summarize() on a DataFrame", {
expect_equal(collect(stats)[2, "age"], "24.5")
expect_equal(collect(stats)[3, "age"], "7.7781745930520225")
stats <- describe(df)
- expect_equal(collect(stats)[4, "name"], "Andy")
+ expect_equal(collect(stats)[4, "name"], NULL)
expect_equal(collect(stats)[5, "age"], "30")
stats2 <- summary(df)
- expect_equal(collect(stats2)[4, "name"], "Andy")
+ expect_equal(collect(stats2)[4, "name"], NULL)
expect_equal(collect(stats2)[5, "age"], "30")
+ # SPARK-16425: SparkR summary() fails on column of type logical
+ df <- withColumn(df, "boolean", df$age == 30)
+ summary(df)
+
# Test base::summary is working
expect_equal(length(summary(attenu, digits = 4)), 35)
})