aboutsummaryrefslogtreecommitdiff
path: root/R/pkg
diff options
context:
space:
mode:
authorJosh Rosen <joshrosen@databricks.com>2015-05-18 21:53:44 -0700
committerReynold Xin <rxin@databricks.com>2015-05-18 21:53:52 -0700
commit99436bd040cf477f475fa14fcf3a730350085c51 (patch)
treebeab5dffc884b35249b7b55734eb620a9984d0ce /R/pkg
parent914ecd0504b68f7b01e825d8661fd208d3e40f1a (diff)
downloadspark-99436bd040cf477f475fa14fcf3a730350085c51.tar.gz
spark-99436bd040cf477f475fa14fcf3a730350085c51.tar.bz2
spark-99436bd040cf477f475fa14fcf3a730350085c51.zip
[SPARK-7687] [SQL] DataFrame.describe() should cast all aggregates to String
In `DataFrame.describe()`, the `count` aggregate produces an integer, the `avg` and `stdev` aggregates produce doubles, and `min` and `max` aggregates can produce varying types depending on what type of column they're applied to. As a result, we should cast all aggregate results to String so that `describe()`'s output types match its declared output schema. Author: Josh Rosen <joshrosen@databricks.com> Closes #6218 from JoshRosen/SPARK-7687 and squashes the following commits: 146b615 [Josh Rosen] Fix R test. 2974bd5 [Josh Rosen] Cast to string type instead f206580 [Josh Rosen] Cast to double to fix SPARK-7687 307ecbf [Josh Rosen] Add failing regression test for SPARK-7687 (cherry picked from commit c9fa870a6de3f7d0903fa7a75ea5ffb6a2fcd174) Signed-off-by: Reynold Xin <rxin@databricks.com>
Diffstat (limited to 'R/pkg')
-rw-r--r--R/pkg/inst/tests/test_sparkSQL.R10
1 files changed, 5 insertions, 5 deletions
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index 3e5658eb5b..1768c57fd0 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -757,12 +757,12 @@ test_that("parquetFile works with multiple input paths", {
test_that("describe() on a DataFrame", {
df <- jsonFile(sqlCtx, jsonPath)
stats <- describe(df, "age")
- expect_true(collect(stats)[1, "summary"] == "count")
- expect_true(collect(stats)[2, "age"] == 24.5)
- expect_true(collect(stats)[3, "age"] == 5.5)
+ expect_equal(collect(stats)[1, "summary"], "count")
+ expect_equal(collect(stats)[2, "age"], "24.5")
+ expect_equal(collect(stats)[3, "age"], "5.5")
stats <- describe(df)
- expect_true(collect(stats)[4, "name"] == "Andy")
- expect_true(collect(stats)[5, "age"] == 30.0)
+ expect_equal(collect(stats)[4, "name"], "Andy")
+ expect_equal(collect(stats)[5, "age"], "30")
})
unlink(parquetPath)