[SPARK-12034][SPARKR] Eliminate warnings in SparkR test cases.

This PR: 1. Suppress all known warnings. 2. Cleanup test cases and fix some errors in test cases. 3. Fix errors in HiveContext related test cases. These test cases are actually not run previously due to a bug of creating TestHiveContext. 4. Support 'testthat' package version 0.11.0 which prefers that test cases be under 'tests/testthat' 5. Make sure the default Hadoop file system is local when running test cases. 6. Turn on warnings into errors. Author: Sun Rui <rui.sun@intel.com> Closes #10030 from sun-rui/SPARK-12034.
author: Sun Rui <rui.sun@intel.com> 2015-12-07 10:38:17 -0800
committer: Shivaram Venkataraman <shivaram@cs.berkeley.edu> 2015-12-07 10:38:17 -0800
commit: 39d677c8f1ee7ebd7e142bec0415cf8f90ac84b6 (patch)
tree: a4e8d1cb04e4956d0157819402d88977ab248d89 /R
parent: 9cde7d5fa87e7ddfff0b9c1212920a1d9000539b (diff)
download: spark-39d677c8f1ee7ebd7e142bec0415cf8f90ac84b6.tar.gz
spark-39d677c8f1ee7ebd7e142bec0415cf8f90ac84b6.tar.bz2
spark-39d677c8f1ee7ebd7e142bec0415cf8f90ac84b6.zip
20 files changed, 50 insertions, 39 deletions
diff --git a/R/pkg/inst/tests/jarTest.R b/R/pkg/inst/tests/testthat/jarTest.R
index d68bb20950..d68bb20950 100644
--- a/R/pkg/inst/tests/jarTest.R
+++ b/R/pkg/inst/tests/testthat/jarTest.R
diff --git a/R/pkg/inst/tests/packageInAJarTest.R b/R/pkg/inst/tests/testthat/packageInAJarTest.R
index 207a37a0cb..207a37a0cb 100644
--- a/R/pkg/inst/tests/packageInAJarTest.R
+++ b/R/pkg/inst/tests/testthat/packageInAJarTest.R
diff --git a/R/pkg/inst/tests/test_Serde.R b/R/pkg/inst/tests/testthat/test_Serde.R
index dddce54d70..dddce54d70 100644
--- a/R/pkg/inst/tests/test_Serde.R
+++ b/R/pkg/inst/tests/testthat/test_Serde.R
diff --git a/R/pkg/inst/tests/test_binaryFile.R b/R/pkg/inst/tests/testthat/test_binaryFile.R
index f2452ed97d..f2452ed97d 100644
--- a/R/pkg/inst/tests/test_binaryFile.R
+++ b/R/pkg/inst/tests/testthat/test_binaryFile.R
diff --git a/R/pkg/inst/tests/test_binary_function.R b/R/pkg/inst/tests/testthat/test_binary_function.R
index f054ac9a87..f054ac9a87 100644
--- a/R/pkg/inst/tests/test_binary_function.R
+++ b/R/pkg/inst/tests/testthat/test_binary_function.R
diff --git a/R/pkg/inst/tests/test_broadcast.R b/R/pkg/inst/tests/testthat/test_broadcast.R
index bb86a5c922..bb86a5c922 100644
--- a/R/pkg/inst/tests/test_broadcast.R
+++ b/R/pkg/inst/tests/testthat/test_broadcast.R
diff --git a/R/pkg/inst/tests/test_client.R b/R/pkg/inst/tests/testthat/test_client.R
index a0664f32f3..a0664f32f3 100644
--- a/R/pkg/inst/tests/test_client.R
+++ b/R/pkg/inst/tests/testthat/test_client.R
diff --git a/R/pkg/inst/tests/test_context.R b/R/pkg/inst/tests/testthat/test_context.R
index 1707e314be..1707e314be 100644
--- a/R/pkg/inst/tests/test_context.R
+++ b/R/pkg/inst/tests/testthat/test_context.R
diff --git a/R/pkg/inst/tests/test_includeJAR.R b/R/pkg/inst/tests/testthat/test_includeJAR.R
index cc1faeabff..f89aa8e507 100644
--- a/R/pkg/inst/tests/test_includeJAR.R
+++ b/R/pkg/inst/tests/testthat/test_includeJAR.R
@@ -20,7 +20,7 @@ runScript <- function() {
   sparkHome <- Sys.getenv("SPARK_HOME")
   sparkTestJarPath <- "R/lib/SparkR/test_support/sparktestjar_2.10-1.0.jar"
   jarPath <- paste("--jars", shQuote(file.path(sparkHome, sparkTestJarPath)))
-  scriptPath <- file.path(sparkHome, "R/lib/SparkR/tests/jarTest.R")
+  scriptPath <- file.path(sparkHome, "R/lib/SparkR/tests/testthat/jarTest.R")
   submitPath <- file.path(sparkHome, "bin/spark-submit")
   res <- system2(command = submitPath,
                  args = c(jarPath, scriptPath),
diff --git a/R/pkg/inst/tests/test_includePackage.R b/R/pkg/inst/tests/testthat/test_includePackage.R
index 8152b448d0..8152b448d0 100644
--- a/R/pkg/inst/tests/test_includePackage.R
+++ b/R/pkg/inst/tests/testthat/test_includePackage.R
diff --git a/R/pkg/inst/tests/test_mllib.R b/R/pkg/inst/tests/testthat/test_mllib.R
index e0667e5e22..08099dd96a 100644
--- a/R/pkg/inst/tests/test_mllib.R
+++ b/R/pkg/inst/tests/testthat/test_mllib.R
@@ -26,7 +26,7 @@ sc <- sparkR.init()
 sqlContext <- sparkRSQL.init(sc)
 
 test_that("glm and predict", {
-  training <- createDataFrame(sqlContext, iris)
+  training <- suppressWarnings(createDataFrame(sqlContext, iris))
   test <- select(training, "Sepal_Length")
   model <- glm(Sepal_Width ~ Sepal_Length, training, family = "gaussian")
   prediction <- predict(model, test)
@@ -39,7 +39,7 @@ test_that("glm and predict", {
 })
 
 test_that("glm should work with long formula", {
-  training <- createDataFrame(sqlContext, iris)
+  training <- suppressWarnings(createDataFrame(sqlContext, iris))
   training$LongLongLongLongLongName <- training$Sepal_Width
   training$VeryLongLongLongLonLongName <- training$Sepal_Length
   training$AnotherLongLongLongLongName <- training$Species
@@ -51,7 +51,7 @@ test_that("glm should work with long formula", {
 })
 
 test_that("predictions match with native glm", {
-  training <- createDataFrame(sqlContext, iris)
+  training <- suppressWarnings(createDataFrame(sqlContext, iris))
   model <- glm(Sepal_Width ~ Sepal_Length + Species, data = training)
   vals <- collect(select(predict(model, training), "prediction"))
   rVals <- predict(glm(Sepal.Width ~ Sepal.Length + Species, data = iris), iris)
@@ -59,7 +59,7 @@ test_that("predictions match with native glm", {
 })
 
 test_that("dot minus and intercept vs native glm", {
-  training <- createDataFrame(sqlContext, iris)
+  training <- suppressWarnings(createDataFrame(sqlContext, iris))
   model <- glm(Sepal_Width ~ . - Species + 0, data = training)
   vals <- collect(select(predict(model, training), "prediction"))
   rVals <- predict(glm(Sepal.Width ~ . - Species + 0, data = iris), iris)
@@ -67,7 +67,7 @@ test_that("dot minus and intercept vs native glm", {
 })
 
 test_that("feature interaction vs native glm", {
-  training <- createDataFrame(sqlContext, iris)
+  training <- suppressWarnings(createDataFrame(sqlContext, iris))
   model <- glm(Sepal_Width ~ Species:Sepal_Length, data = training)
   vals <- collect(select(predict(model, training), "prediction"))
   rVals <- predict(glm(Sepal.Width ~ Species:Sepal.Length, data = iris), iris)
@@ -75,7 +75,7 @@ test_that("feature interaction vs native glm", {
 })
 
 test_that("summary coefficients match with native glm", {
-  training <- createDataFrame(sqlContext, iris)
+  training <- suppressWarnings(createDataFrame(sqlContext, iris))
   stats <- summary(glm(Sepal_Width ~ Sepal_Length + Species, data = training, solver = "normal"))
   coefs <- unlist(stats$coefficients)
   devianceResiduals <- unlist(stats$devianceResiduals)
@@ -92,7 +92,7 @@ test_that("summary coefficients match with native glm", {
 })
 
 test_that("summary coefficients match with native glm of family 'binomial'", {
-  df <- createDataFrame(sqlContext, iris)
+  df <- suppressWarnings(createDataFrame(sqlContext, iris))
   training <- filter(df, df$Species != "setosa")
   stats <- summary(glm(Species ~ Sepal_Length + Sepal_Width, data = training,
     family = "binomial"))
diff --git a/R/pkg/inst/tests/test_parallelize_collect.R b/R/pkg/inst/tests/testthat/test_parallelize_collect.R
index 2552127cc5..2552127cc5 100644
--- a/R/pkg/inst/tests/test_parallelize_collect.R
+++ b/R/pkg/inst/tests/testthat/test_parallelize_collect.R
diff --git a/R/pkg/inst/tests/test_rdd.R b/R/pkg/inst/tests/testthat/test_rdd.R
index 7423b4f2be..7423b4f2be 100644
--- a/R/pkg/inst/tests/test_rdd.R
+++ b/R/pkg/inst/tests/testthat/test_rdd.R
diff --git a/R/pkg/inst/tests/test_shuffle.R b/R/pkg/inst/tests/testthat/test_shuffle.R
index adf0b91d25..adf0b91d25 100644
--- a/R/pkg/inst/tests/test_shuffle.R
+++ b/R/pkg/inst/tests/testthat/test_shuffle.R
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 6ef03ae976..39fc94aea5 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -133,38 +133,45 @@ test_that("create DataFrame from RDD", {
   expect_equal(columns(df), c("a", "b"))
   expect_equal(dtypes(df), list(c("a", "int"), c("b", "string")))
 
-  df <- jsonFile(sqlContext, jsonPathNa)
-  hiveCtx <- tryCatch({
-    newJObject("org.apache.spark.sql.hive.test.TestHiveContext", ssc)
-  },
-  error = function(err) {
-    skip("Hive is not build with SparkSQL, skipped")
-  })
-  sql(hiveCtx, "CREATE TABLE people (name string, age double, height float)")
-  insertInto(df, "people")
-  expect_equal(sql(hiveCtx, "SELECT age from people WHERE name = 'Bob'"), c(16))
-  expect_equal(sql(hiveCtx, "SELECT height from people WHERE name ='Bob'"), c(176.5))
-
   schema <- structType(structField("name", "string"), structField("age", "integer"),
                        structField("height", "float"))
-  df2 <- createDataFrame(sqlContext, df.toRDD, schema)
-  df2AsDF <- as.DataFrame(sqlContext, df.toRDD, schema)
+  df <- read.df(sqlContext, jsonPathNa, "json", schema)
+  df2 <- createDataFrame(sqlContext, toRDD(df), schema)
+  df2AsDF <- as.DataFrame(sqlContext, toRDD(df), schema)
   expect_equal(columns(df2), c("name", "age", "height"))
   expect_equal(columns(df2AsDF), c("name", "age", "height"))
   expect_equal(dtypes(df2), list(c("name", "string"), c("age", "int"), c("height", "float")))
   expect_equal(dtypes(df2AsDF), list(c("name", "string"), c("age", "int"), c("height", "float")))
-  expect_equal(collect(where(df2, df2$name == "Bob")), c("Bob", 16, 176.5))
-  expect_equal(collect(where(df2AsDF, df2$name == "Bob")), c("Bob", 16, 176.5))
+  expect_equal(as.list(collect(where(df2, df2$name == "Bob"))),
+               list(name = "Bob", age = 16, height = 176.5))
+  expect_equal(as.list(collect(where(df2AsDF, df2AsDF$name == "Bob"))),
+               list(name = "Bob", age = 16, height = 176.5))
 
   localDF <- data.frame(name=c("John", "Smith", "Sarah"),
-                        age=c(19, 23, 18),
-                        height=c(164.10, 181.4, 173.7))
+                        age=c(19L, 23L, 18L),
+                        height=c(176.5, 181.4, 173.7))
   df <- createDataFrame(sqlContext, localDF, schema)
   expect_is(df, "DataFrame")
   expect_equal(count(df), 3)
   expect_equal(columns(df), c("name", "age", "height"))
   expect_equal(dtypes(df), list(c("name", "string"), c("age", "int"), c("height", "float")))
-  expect_equal(collect(where(df, df$name == "John")), c("John", 19, 164.10))
+  expect_equal(as.list(collect(where(df, df$name == "John"))),
+               list(name = "John", age = 19L, height = 176.5))
+
+  ssc <- callJMethod(sc, "sc")
+  hiveCtx <- tryCatch({
+    newJObject("org.apache.spark.sql.hive.test.TestHiveContext", ssc)
+  },
+  error = function(err) {
+    skip("Hive is not build with SparkSQL, skipped")
+  })
+  sql(hiveCtx, "CREATE TABLE people (name string, age double, height float)")
+  df <- read.df(hiveCtx, jsonPathNa, "json", schema)
+  invisible(insertInto(df, "people"))
+  expect_equal(collect(sql(hiveCtx, "SELECT age from people WHERE name = 'Bob'"))$age,
+               c(16))
+  expect_equal(collect(sql(hiveCtx, "SELECT height from people WHERE name ='Bob'"))$height,
+               c(176.5))
 })
 
 test_that("convert NAs to null type in DataFrames", {
@@ -250,7 +257,7 @@ test_that("create DataFrame from list or data.frame", {
   ldf2 <- collect(df)
   expect_equal(ldf$a, ldf2$a)
 
-  irisdf <- createDataFrame(sqlContext, iris)
+  irisdf <- suppressWarnings(createDataFrame(sqlContext, iris))
   iris_collected <- collect(irisdf)
   expect_equivalent(iris_collected[,-5], iris[,-5])
   expect_equal(iris_collected$Species, as.character(iris$Species))
@@ -463,7 +470,7 @@ test_that("union on two RDDs created from DataFrames returns an RRDD", {
   RDD2 <- toRDD(df)
   unioned <- unionRDD(RDD1, RDD2)
   expect_is(unioned, "RDD")
-  expect_equal(SparkR:::getSerializedMode(unioned), "byte")
+  expect_equal(getSerializedMode(unioned), "byte")
   expect_equal(collect(unioned)[[2]]$name, "Andy")
 })
 
@@ -485,13 +492,13 @@ test_that("union on mixed serialization types correctly returns a byte RRDD", {
 
   unionByte <- unionRDD(rdd, dfRDD)
   expect_is(unionByte, "RDD")
-  expect_equal(SparkR:::getSerializedMode(unionByte), "byte")
+  expect_equal(getSerializedMode(unionByte), "byte")
   expect_equal(collect(unionByte)[[1]], 1)
   expect_equal(collect(unionByte)[[12]]$name, "Andy")
 
   unionString <- unionRDD(textRDD, dfRDD)
   expect_is(unionString, "RDD")
-  expect_equal(SparkR:::getSerializedMode(unionString), "byte")
+  expect_equal(getSerializedMode(unionString), "byte")
   expect_equal(collect(unionString)[[1]], "Michael")
   expect_equal(collect(unionString)[[5]]$name, "Andy")
 })
@@ -504,7 +511,7 @@ test_that("objectFile() works with row serialization", {
   objectIn <- objectFile(sc, objectPath)
 
   expect_is(objectIn, "RDD")
-  expect_equal(SparkR:::getSerializedMode(objectIn), "byte")
+  expect_equal(getSerializedMode(objectIn), "byte")
   expect_equal(collect(objectIn)[[2]]$age, 30)
 })
 
@@ -849,6 +856,7 @@ test_that("write.df() as parquet file", {
 })
 
 test_that("test HiveContext", {
+  ssc <- callJMethod(sc, "sc")
   hiveCtx <- tryCatch({
     newJObject("org.apache.spark.sql.hive.test.TestHiveContext", ssc)
   },
@@ -863,10 +871,10 @@ test_that("test HiveContext", {
   expect_equal(count(df2), 3)
 
   jsonPath2 <- tempfile(pattern="sparkr-test", fileext=".tmp")
-  saveAsTable(df, "json", "json", "append", path = jsonPath2)
-  df3 <- sql(hiveCtx, "select * from json")
+  invisible(saveAsTable(df, "json2", "json", "append", path = jsonPath2))
+  df3 <- sql(hiveCtx, "select * from json2")
   expect_is(df3, "DataFrame")
-  expect_equal(count(df3), 6)
+  expect_equal(count(df3), 3)
 })
 
 test_that("column operators", {
@@ -1311,7 +1319,7 @@ test_that("toJSON() returns an RDD of the correct values", {
   df <- jsonFile(sqlContext, jsonPath)
   testRDD <- toJSON(df)
   expect_is(testRDD, "RDD")
-  expect_equal(SparkR:::getSerializedMode(testRDD), "string")
+  expect_equal(getSerializedMode(testRDD), "string")
   expect_equal(collect(testRDD)[[1]], mockLines[1])
 })
 
@@ -1641,7 +1649,7 @@ test_that("SQL error message is returned from JVM", {
   expect_equal(grepl("Table not found: blah", retError), TRUE)
 })
 
-irisDF <- createDataFrame(sqlContext, iris)
+irisDF <- suppressWarnings(createDataFrame(sqlContext, iris))
 
 test_that("Method as.data.frame as a synonym for collect()", {
   expect_equal(as.data.frame(irisDF), collect(irisDF))
@@ -1670,7 +1678,7 @@ test_that("attach() on a DataFrame", {
 })
 
 test_that("with() on a DataFrame", {
-  df <- createDataFrame(sqlContext, iris)
+  df <- suppressWarnings(createDataFrame(sqlContext, iris))
   expect_error(Sepal_Length)
   sum1 <- with(df, list(summary(Sepal_Length), summary(Sepal_Width)))
   expect_equal(collect(sum1[[1]])[1, "Sepal_Length"], "150")
diff --git a/R/pkg/inst/tests/test_take.R b/R/pkg/inst/tests/testthat/test_take.R
index c2c724cdc7..c2c724cdc7 100644
--- a/R/pkg/inst/tests/test_take.R
+++ b/R/pkg/inst/tests/testthat/test_take.R
diff --git a/R/pkg/inst/tests/test_textFile.R b/R/pkg/inst/tests/testthat/test_textFile.R
index a9cf83dbdb..a9cf83dbdb 100644
--- a/R/pkg/inst/tests/test_textFile.R
+++ b/R/pkg/inst/tests/testthat/test_textFile.R
diff --git a/R/pkg/inst/tests/test_utils.R b/R/pkg/inst/tests/testthat/test_utils.R
index 12df4cf4f6..12df4cf4f6 100644
--- a/R/pkg/inst/tests/test_utils.R
+++ b/R/pkg/inst/tests/testthat/test_utils.R
diff --git a/R/pkg/tests/run-all.R b/R/pkg/tests/run-all.R
index 4f8a1ed2d8..1d04656ac2 100644
--- a/R/pkg/tests/run-all.R
+++ b/R/pkg/tests/run-all.R
@@ -18,4 +18,7 @@
 library(testthat)
 library(SparkR)
 
+# Turn all warnings into errors
+options("warn" = 2)
+
 test_package("SparkR")
diff --git a/R/run-tests.sh b/R/run-tests.sh
index e82ad0ba2c..e64a4ea94c 100755
--- a/R/run-tests.sh
+++ b/R/run-tests.sh
@@ -23,7 +23,7 @@ FAILED=0
 LOGFILE=$FWDIR/unit-tests.out
 rm -f $LOGFILE
 
-SPARK_TESTING=1 $FWDIR/../bin/sparkR --driver-java-options "-Dlog4j.configuration=file:$FWDIR/log4j.properties" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE
+SPARK_TESTING=1 $FWDIR/../bin/sparkR --driver-java-options "-Dlog4j.configuration=file:$FWDIR/log4j.properties" --conf spark.hadoop.fs.default.name="file:///" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE
 FAILED=$((PIPESTATUS[0]||$FAILED))
 
 if [[ $FAILED != 0 ]]; then
author	Sun Rui <rui.sun@intel.com>	2015-12-07 10:38:17 -0800
committer	Shivaram Venkataraman <shivaram@cs.berkeley.edu>	2015-12-07 10:38:17 -0800
commit	39d677c8f1ee7ebd7e142bec0415cf8f90ac84b6 (patch)
tree	a4e8d1cb04e4956d0157819402d88977ab248d89 /R
parent	9cde7d5fa87e7ddfff0b9c1212920a1d9000539b (diff)
download	spark-39d677c8f1ee7ebd7e142bec0415cf8f90ac84b6.tar.gz spark-39d677c8f1ee7ebd7e142bec0415cf8f90ac84b6.tar.bz2 spark-39d677c8f1ee7ebd7e142bec0415cf8f90ac84b6.zip