diff options
author | qhuang <qian.huang@intel.com> | 2015-05-05 20:39:56 -0700 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2015-05-05 20:39:56 -0700 |
commit | a4669443999dc13a1bb34509c827d8b9096ea84f (patch) | |
tree | 1d1b27d2c50cdee371ef530f77deaedc0d39549b /R/pkg/inst/tests/test_sparkSQL.R | |
parent | 51b3d41e160a1326a04536241b427e65b39ed8df (diff) | |
download | spark-a4669443999dc13a1bb34509c827d8b9096ea84f.tar.gz spark-a4669443999dc13a1bb34509c827d8b9096ea84f.tar.bz2 spark-a4669443999dc13a1bb34509c827d8b9096ea84f.zip |
[SPARK-6841] [SPARKR] add support for mean, median, stdev etc.
Moving here from https://github.com/amplab-extras/SparkR-pkg/pull/241
sum() has been implemented. (https://github.com/amplab-extras/SparkR-pkg/pull/242)
Now Phase 1: mean, sd, var have been implemented, but some things still need to be improved with the suggestions in https://issues.apache.org/jira/browse/SPARK-6841
Author: qhuang <qian.huang@intel.com>
Closes #5446 from hqzizania/R and squashes the following commits:
f283572 [qhuang] add test unit for describe()
2e74d5a [qhuang] add describe() DataFrame API
Diffstat (limited to 'R/pkg/inst/tests/test_sparkSQL.R')
-rw-r--r-- | R/pkg/inst/tests/test_sparkSQL.R | 11 |
1 files changed, 11 insertions, 0 deletions
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index f82e56fdd8..7a42e289fc 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -705,5 +705,16 @@ test_that("parquetFile works with multiple input paths", { expect_true(count(parquetDF) == count(df)*2) }) +test_that("describe() on a DataFrame", { + df <- jsonFile(sqlCtx, jsonPath) + stats <- describe(df, "age") + expect_true(collect(stats)[1, "summary"] == "count") + expect_true(collect(stats)[2, "age"] == 24.5) + expect_true(collect(stats)[3, "age"] == 5.5) + stats <- describe(df) + expect_true(collect(stats)[4, "name"] == "Andy") + expect_true(collect(stats)[5, "age"] == 30.0) +}) + unlink(parquetPath) unlink(jsonPath) |