diff options
author | Felix Cheung <felixcheung_m@hotmail.com> | 2017-01-26 21:06:39 -0800 |
---|---|---|
committer | Felix Cheung <felixcheung@apache.org> | 2017-01-26 21:06:39 -0800 |
commit | 90817a6cd06068fa9f9ff77384a1fcba73b43006 (patch) | |
tree | 57f16720535f76af3c4bb414dc3054bc3b1c184b /R/pkg/inst | |
parent | c0ba284300e494354f5bb205a10a12ac7daa2b5e (diff) | |
download | spark-90817a6cd06068fa9f9ff77384a1fcba73b43006.tar.gz spark-90817a6cd06068fa9f9ff77384a1fcba73b43006.tar.bz2 spark-90817a6cd06068fa9f9ff77384a1fcba73b43006.zip |
[SPARK-18788][SPARKR] Add API for getNumPartitions
## What changes were proposed in this pull request?
With doc to say this would convert DF into RDD
## How was this patch tested?
unit tests, manual tests
Author: Felix Cheung <felixcheung_m@hotmail.com>
Closes #16668 from felixcheung/rgetnumpartitions.
Diffstat (limited to 'R/pkg/inst')
-rw-r--r-- | R/pkg/inst/tests/testthat/test_rdd.R | 10 | ||||
-rw-r--r-- | R/pkg/inst/tests/testthat/test_sparkSQL.R | 14 |
2 files changed, 12 insertions, 12 deletions
diff --git a/R/pkg/inst/tests/testthat/test_rdd.R b/R/pkg/inst/tests/testthat/test_rdd.R index 2c41a6b075..ceb31bd896 100644 --- a/R/pkg/inst/tests/testthat/test_rdd.R +++ b/R/pkg/inst/tests/testthat/test_rdd.R @@ -29,8 +29,8 @@ intPairs <- list(list(1L, -1), list(2L, 100), list(2L, 1), list(1L, 200)) intRdd <- parallelize(sc, intPairs, 2L) test_that("get number of partitions in RDD", { - expect_equal(getNumPartitions(rdd), 2) - expect_equal(getNumPartitions(intRdd), 2) + expect_equal(getNumPartitionsRDD(rdd), 2) + expect_equal(getNumPartitionsRDD(intRdd), 2) }) test_that("first on RDD", { @@ -305,18 +305,18 @@ test_that("repartition/coalesce on RDDs", { # repartition r1 <- repartitionRDD(rdd, 2) - expect_equal(getNumPartitions(r1), 2L) + expect_equal(getNumPartitionsRDD(r1), 2L) count <- length(collectPartition(r1, 0L)) expect_true(count >= 8 && count <= 12) r2 <- repartitionRDD(rdd, 6) - expect_equal(getNumPartitions(r2), 6L) + expect_equal(getNumPartitionsRDD(r2), 6L) count <- length(collectPartition(r2, 0L)) expect_true(count >= 0 && count <= 4) # coalesce r3 <- coalesce(rdd, 1) - expect_equal(getNumPartitions(r3), 1L) + expect_equal(getNumPartitionsRDD(r3), 1L) count <- length(collectPartition(r3, 0L)) expect_equal(count, 20) }) diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index aaa8fb498c..417a03ff61 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -196,18 +196,18 @@ test_that("create DataFrame from RDD", { expect_equal(dtypes(df), list(c("name", "string"), c("age", "int"), c("height", "float"))) expect_equal(as.list(collect(where(df, df$name == "John"))), list(name = "John", age = 19L, height = 176.5)) - expect_equal(getNumPartitions(toRDD(df)), 1) + expect_equal(getNumPartitions(df), 1) df <- as.DataFrame(cars, numPartitions = 2) - expect_equal(getNumPartitions(toRDD(df)), 2) + expect_equal(getNumPartitions(df), 2) df <- createDataFrame(cars, numPartitions = 3) - expect_equal(getNumPartitions(toRDD(df)), 3) + expect_equal(getNumPartitions(df), 3) # validate limit by num of rows df <- createDataFrame(cars, numPartitions = 60) - expect_equal(getNumPartitions(toRDD(df)), 50) + expect_equal(getNumPartitions(df), 50) # validate when 1 < (length(coll) / numSlices) << length(coll) df <- createDataFrame(cars, numPartitions = 20) - expect_equal(getNumPartitions(toRDD(df)), 20) + expect_equal(getNumPartitions(df), 20) df <- as.DataFrame(data.frame(0)) expect_is(df, "SparkDataFrame") @@ -215,7 +215,7 @@ test_that("create DataFrame from RDD", { expect_is(df, "SparkDataFrame") df <- as.DataFrame(data.frame(0), numPartitions = 2) # no data to partition, goes to 1 - expect_equal(getNumPartitions(toRDD(df)), 1) + expect_equal(getNumPartitions(df), 1) setHiveContext(sc) sql("CREATE TABLE people (name string, age double, height float)") @@ -234,7 +234,7 @@ test_that("createDataFrame uses files for large objects", { conf <- callJMethod(sparkSession, "conf") callJMethod(conf, "set", "spark.r.maxAllocationLimit", "100") df <- suppressWarnings(createDataFrame(iris, numPartitions = 3)) - expect_equal(getNumPartitions(toRDD(df)), 3) + expect_equal(getNumPartitions(df), 3) # Resetting the conf back to default value callJMethod(conf, "set", "spark.r.maxAllocationLimit", toString(.Machine$integer.max / 10)) |