diff options
Diffstat (limited to 'examples/src/main/r/ml/glm.R')
-rw-r--r-- | examples/src/main/r/ml/glm.R | 20 |
1 files changed, 11 insertions, 9 deletions
diff --git a/examples/src/main/r/ml/glm.R b/examples/src/main/r/ml/glm.R index e41af97751..ee13910382 100644 --- a/examples/src/main/r/ml/glm.R +++ b/examples/src/main/r/ml/glm.R @@ -25,11 +25,12 @@ library(SparkR) sparkR.session(appName = "SparkR-ML-glm-example") # $example on$ -irisDF <- suppressWarnings(createDataFrame(iris)) +training <- read.df("data/mllib/sample_multiclass_classification_data.txt", source = "libsvm") # Fit a generalized linear model of family "gaussian" with spark.glm -gaussianDF <- irisDF -gaussianTestDF <- irisDF -gaussianGLM <- spark.glm(gaussianDF, Sepal_Length ~ Sepal_Width + Species, family = "gaussian") +df_list <- randomSplit(training, c(7,3), 2) +gaussianDF <- df_list[[1]] +gaussianTestDF <- df_list[[2]] +gaussianGLM <- spark.glm(gaussianDF, label ~ features, family = "gaussian") # Model summary summary(gaussianGLM) @@ -39,14 +40,15 @@ gaussianPredictions <- predict(gaussianGLM, gaussianTestDF) head(gaussianPredictions) # Fit a generalized linear model with glm (R-compliant) -gaussianGLM2 <- glm(Sepal_Length ~ Sepal_Width + Species, gaussianDF, family = "gaussian") +gaussianGLM2 <- glm(label ~ features, gaussianDF, family = "gaussian") summary(gaussianGLM2) # Fit a generalized linear model of family "binomial" with spark.glm -# Note: Filter out "setosa" from label column (two labels left) to match "binomial" family. -binomialDF <- filter(irisDF, irisDF$Species != "setosa") -binomialTestDF <- binomialDF -binomialGLM <- spark.glm(binomialDF, Species ~ Sepal_Length + Sepal_Width, family = "binomial") +training2 <- read.df("data/mllib/sample_binary_classification_data.txt", source = "libsvm") +df_list2 <- randomSplit(training2, c(7,3), 2) +binomialDF <- df_list2[[1]] +binomialTestDF <- df_list2[[2]] +binomialGLM <- spark.glm(binomialDF, label ~ features, family = "binomial") # Model summary summary(binomialGLM) |