aboutsummaryrefslogtreecommitdiff
path: root/R/pkg/inst/tests/testthat
diff options
context:
space:
mode:
authorwm624@hotmail.com <wm624@hotmail.com>2016-11-30 20:32:17 -0800
committerYanbo Liang <ybliang8@gmail.com>2016-11-30 20:32:17 -0800
commit2eb6764fbb23553fc17772d8a4a1cad55ff7ba6e (patch)
tree8d33015f0e154aec683c7fa7bc0770526c37c64e /R/pkg/inst/tests/testthat
parent0a811210f809eb5b80eae14694d484d45b48b3f6 (diff)
downloadspark-2eb6764fbb23553fc17772d8a4a1cad55ff7ba6e.tar.gz
spark-2eb6764fbb23553fc17772d8a4a1cad55ff7ba6e.tar.bz2
spark-2eb6764fbb23553fc17772d8a4a1cad55ff7ba6e.zip
[SPARK-18476][SPARKR][ML] SparkR Logistic Regression should should support output original label.
## What changes were proposed in this pull request? Similar to SPARK-18401, as a classification algorithm, logistic regression should support output original label instead of supporting index label. In this PR, original label output is supported and test cases are modified and added. Document is also modified. ## How was this patch tested? Unit tests. Author: wm624@hotmail.com <wm624@hotmail.com> Closes #15910 from wangmiao1981/audit.
Diffstat (limited to 'R/pkg/inst/tests/testthat')
-rw-r--r--R/pkg/inst/tests/testthat/test_mllib.R26
1 files changed, 18 insertions, 8 deletions
diff --git a/R/pkg/inst/tests/testthat/test_mllib.R b/R/pkg/inst/tests/testthat/test_mllib.R
index 467e00cf79..0553e704bd 100644
--- a/R/pkg/inst/tests/testthat/test_mllib.R
+++ b/R/pkg/inst/tests/testthat/test_mllib.R
@@ -646,30 +646,30 @@ test_that("spark.isotonicRegression", {
test_that("spark.logit", {
# test binary logistic regression
- label <- c(1.0, 1.0, 1.0, 0.0, 0.0)
+ label <- c(0.0, 0.0, 0.0, 1.0, 1.0)
feature <- c(1.1419053, 0.9194079, -0.9498666, -1.1069903, 0.2809776)
binary_data <- as.data.frame(cbind(label, feature))
binary_df <- createDataFrame(binary_data)
blr_model <- spark.logit(binary_df, label ~ feature, thresholds = 1.0)
blr_predict <- collect(select(predict(blr_model, binary_df), "prediction"))
- expect_equal(blr_predict$prediction, c(0, 0, 0, 0, 0))
+ expect_equal(blr_predict$prediction, c("0.0", "0.0", "0.0", "0.0", "0.0"))
blr_model1 <- spark.logit(binary_df, label ~ feature, thresholds = 0.0)
blr_predict1 <- collect(select(predict(blr_model1, binary_df), "prediction"))
- expect_equal(blr_predict1$prediction, c(1, 1, 1, 1, 1))
+ expect_equal(blr_predict1$prediction, c("1.0", "1.0", "1.0", "1.0", "1.0"))
# test summary of binary logistic regression
blr_summary <- summary(blr_model)
blr_fmeasure <- collect(select(blr_summary$fMeasureByThreshold, "threshold", "F-Measure"))
- expect_equal(blr_fmeasure$threshold, c(0.8221347, 0.7884005, 0.6674709, 0.3785437, 0.3434487),
+ expect_equal(blr_fmeasure$threshold, c(0.6565513, 0.6214563, 0.3325291, 0.2115995, 0.1778653),
tolerance = 1e-4)
- expect_equal(blr_fmeasure$"F-Measure", c(0.5000000, 0.8000000, 0.6666667, 0.8571429, 0.7500000),
+ expect_equal(blr_fmeasure$"F-Measure", c(0.6666667, 0.5000000, 0.8000000, 0.6666667, 0.5714286),
tolerance = 1e-4)
blr_precision <- collect(select(blr_summary$precisionByThreshold, "threshold", "precision"))
- expect_equal(blr_precision$precision, c(1.0000000, 1.0000000, 0.6666667, 0.7500000, 0.6000000),
+ expect_equal(blr_precision$precision, c(1.0000000, 0.5000000, 0.6666667, 0.5000000, 0.4000000),
tolerance = 1e-4)
blr_recall <- collect(select(blr_summary$recallByThreshold, "threshold", "recall"))
- expect_equal(blr_recall$recall, c(0.3333333, 0.6666667, 0.6666667, 1.0000000, 1.0000000),
+ expect_equal(blr_recall$recall, c(0.5000000, 0.5000000, 1.0000000, 1.0000000, 1.0000000),
tolerance = 1e-4)
# test model save and read
@@ -683,6 +683,16 @@ test_that("spark.logit", {
expect_error(summary(blr_model2))
unlink(modelPath)
+ # test prediction label as text
+ training <- suppressWarnings(createDataFrame(iris))
+ binomial_training <- training[training$Species %in% c("versicolor", "virginica"), ]
+ binomial_model <- spark.logit(binomial_training, Species ~ Sepal_Length + Sepal_Width)
+ prediction <- predict(binomial_model, binomial_training)
+ expect_equal(typeof(take(select(prediction, "prediction"), 1)$prediction), "character")
+ expected <- c("virginica", "virginica", "virginica", "versicolor", "virginica",
+ "versicolor", "virginica", "versicolor", "virginica", "versicolor")
+ expect_equal(as.list(take(select(prediction, "prediction"), 10))[[1]], expected)
+
# test multinomial logistic regression
label <- c(0.0, 1.0, 2.0, 0.0, 0.0)
feature1 <- c(4.845940, 5.64480, 7.430381, 6.464263, 5.555667)
@@ -694,7 +704,7 @@ test_that("spark.logit", {
model <- spark.logit(df, label ~., family = "multinomial", thresholds = c(0, 1, 1))
predict1 <- collect(select(predict(model, df), "prediction"))
- expect_equal(predict1$prediction, c(0, 0, 0, 0, 0))
+ expect_equal(predict1$prediction, c("0.0", "0.0", "0.0", "0.0", "0.0"))
# Summary of multinomial logistic regression is not implemented yet
expect_error(summary(model))
})