diff options
author | Monica Liu <liu.monica.f@gmail.com> | 2015-10-13 22:24:52 -0700 |
---|---|---|
committer | Shivaram Venkataraman <shivaram@cs.berkeley.edu> | 2015-10-13 22:24:52 -0700 |
commit | 8b32885704502ab2a715cf5142d7517181074428 (patch) | |
tree | 8e6547c43f98736fa3a22d8da9d52433d80a33e3 /R/pkg/inst/tests/test_sparkSQL.R | |
parent | ce3f9a80657751ee0bc0ed6a9b6558acbb40af4f (diff) | |
download | spark-8b32885704502ab2a715cf5142d7517181074428.tar.gz spark-8b32885704502ab2a715cf5142d7517181074428.tar.bz2 spark-8b32885704502ab2a715cf5142d7517181074428.zip |
[SPARK-10981] [SPARKR] SparkR Join improvements
I was having issues with collect() and orderBy() in Spark 1.5.0 so I used the DataFrame.R file and test_sparkSQL.R file from the Spark 1.5.1 download. I only modified the join() function in DataFrame.R to include "full", "fullouter", "left", "right", and "leftsemi" and added corresponding test cases in the test for join() and merge() in test_sparkSQL.R file.
Pull request because I filed this JIRA bug report:
https://issues.apache.org/jira/browse/SPARK-10981
Author: Monica Liu <liu.monica.f@gmail.com>
Closes #9029 from mfliu/master.
Diffstat (limited to 'R/pkg/inst/tests/test_sparkSQL.R')
-rw-r--r-- | R/pkg/inst/tests/test_sparkSQL.R | 27 |
1 files changed, 25 insertions, 2 deletions
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index d5509e475d..46cab7646d 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -1071,7 +1071,7 @@ test_that("join() and merge() on a DataFrame", { expect_equal(names(joined2), c("age", "name", "name", "test")) expect_equal(count(joined2), 3) - joined3 <- join(df, df2, df$name == df2$name, "right_outer") + joined3 <- join(df, df2, df$name == df2$name, "rightouter") expect_equal(names(joined3), c("age", "name", "name", "test")) expect_equal(count(joined3), 4) expect_true(is.na(collect(orderBy(joined3, joined3$age))$age[2])) @@ -1082,11 +1082,34 @@ test_that("join() and merge() on a DataFrame", { expect_equal(count(joined4), 4) expect_equal(collect(orderBy(joined4, joined4$name))$newAge[3], 24) + joined5 <- join(df, df2, df$name == df2$name, "leftouter") + expect_equal(names(joined5), c("age", "name", "name", "test")) + expect_equal(count(joined5), 3) + expect_true(is.na(collect(orderBy(joined5, joined5$age))$age[1])) + + joined6 <- join(df, df2, df$name == df2$name, "inner") + expect_equal(names(joined6), c("age", "name", "name", "test")) + expect_equal(count(joined6), 3) + + joined7 <- join(df, df2, df$name == df2$name, "leftsemi") + expect_equal(names(joined7), c("age", "name")) + expect_equal(count(joined7), 3) + + joined8 <- join(df, df2, df$name == df2$name, "left_outer") + expect_equal(names(joined8), c("age", "name", "name", "test")) + expect_equal(count(joined8), 3) + expect_true(is.na(collect(orderBy(joined8, joined8$age))$age[1])) + + joined9 <- join(df, df2, df$name == df2$name, "right_outer") + expect_equal(names(joined9), c("age", "name", "name", "test")) + expect_equal(count(joined9), 4) + expect_true(is.na(collect(orderBy(joined9, joined9$age))$age[2])) + merged <- select(merge(df, df2, df$name == df2$name, "outer"), alias(df$age + 5, "newAge"), df$name, df2$test) expect_equal(names(merged), c("newAge", "name", "test")) expect_equal(count(merged), 4) - expect_equal(collect(orderBy(merged, joined4$name))$newAge[3], 24) + expect_equal(collect(orderBy(merged, merged$name))$newAge[3], 24) }) test_that("toJSON() returns an RDD of the correct values", { |