aboutsummaryrefslogtreecommitdiff
path: root/examples/src/main/r/ml/glm.R
diff options
context:
space:
mode:
Diffstat (limited to 'examples/src/main/r/ml/glm.R')
-rw-r--r--examples/src/main/r/ml/glm.R20
1 files changed, 11 insertions, 9 deletions
diff --git a/examples/src/main/r/ml/glm.R b/examples/src/main/r/ml/glm.R
index e41af97751..ee13910382 100644
--- a/examples/src/main/r/ml/glm.R
+++ b/examples/src/main/r/ml/glm.R
@@ -25,11 +25,12 @@ library(SparkR)
sparkR.session(appName = "SparkR-ML-glm-example")
# $example on$
-irisDF <- suppressWarnings(createDataFrame(iris))
+training <- read.df("data/mllib/sample_multiclass_classification_data.txt", source = "libsvm")
# Fit a generalized linear model of family "gaussian" with spark.glm
-gaussianDF <- irisDF
-gaussianTestDF <- irisDF
-gaussianGLM <- spark.glm(gaussianDF, Sepal_Length ~ Sepal_Width + Species, family = "gaussian")
+df_list <- randomSplit(training, c(7,3), 2)
+gaussianDF <- df_list[[1]]
+gaussianTestDF <- df_list[[2]]
+gaussianGLM <- spark.glm(gaussianDF, label ~ features, family = "gaussian")
# Model summary
summary(gaussianGLM)
@@ -39,14 +40,15 @@ gaussianPredictions <- predict(gaussianGLM, gaussianTestDF)
head(gaussianPredictions)
# Fit a generalized linear model with glm (R-compliant)
-gaussianGLM2 <- glm(Sepal_Length ~ Sepal_Width + Species, gaussianDF, family = "gaussian")
+gaussianGLM2 <- glm(label ~ features, gaussianDF, family = "gaussian")
summary(gaussianGLM2)
# Fit a generalized linear model of family "binomial" with spark.glm
-# Note: Filter out "setosa" from label column (two labels left) to match "binomial" family.
-binomialDF <- filter(irisDF, irisDF$Species != "setosa")
-binomialTestDF <- binomialDF
-binomialGLM <- spark.glm(binomialDF, Species ~ Sepal_Length + Sepal_Width, family = "binomial")
+training2 <- read.df("data/mllib/sample_binary_classification_data.txt", source = "libsvm")
+df_list2 <- randomSplit(training2, c(7,3), 2)
+binomialDF <- df_list2[[1]]
+binomialTestDF <- df_list2[[2]]
+binomialGLM <- spark.glm(binomialDF, label ~ features, family = "binomial")
# Model summary
summary(binomialGLM)