From e21e1c946c4b7448fb150cfa2d9419864ae6f9b5 Mon Sep 17 00:00:00 2001 From: Felix Cheung Date: Fri, 21 Oct 2016 12:35:37 -0700 Subject: [SPARK-18013][SPARKR] add crossJoin API ## What changes were proposed in this pull request? Add crossJoin and do not default to cross join if joinExpr is left out ## How was this patch tested? unit test Author: Felix Cheung Closes #15559 from felixcheung/rcrossjoin. --- R/pkg/inst/tests/testthat/test_sparkSQL.R | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'R/pkg/inst') diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 1c806869e9..3a987cd862 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -1572,7 +1572,7 @@ test_that("filter() on a DataFrame", { #expect_true(is.ts(filter(1:100, rep(1, 3)))) # nolint }) -test_that("join() and merge() on a DataFrame", { +test_that("join(), crossJoin() and merge() on a DataFrame", { df <- read.json(jsonPath) mockLines2 <- c("{\"name\":\"Michael\", \"test\": \"yes\"}", @@ -1583,7 +1583,14 @@ test_that("join() and merge() on a DataFrame", { writeLines(mockLines2, jsonPath2) df2 <- read.json(jsonPath2) - joined <- join(df, df2) + # inner join, not cartesian join + expect_equal(count(where(join(df, df2), df$name == df2$name)), 3) + # cartesian join + expect_error(tryCatch(count(join(df, df2)), error = function(e) { stop(e) }), + paste0(".*(org.apache.spark.sql.AnalysisException: Detected cartesian product for", + " INNER join between logical plans).*")) + + joined <- crossJoin(df, df2) expect_equal(names(joined), c("age", "name", "name", "test")) expect_equal(count(joined), 12) expect_equal(names(collect(joined)), c("age", "name", "name", "test")) -- cgit v1.2.3