aboutsummaryrefslogtreecommitdiff
path: root/examples/src/main/r/ml.R
diff options
context:
space:
mode:
Diffstat (limited to 'examples/src/main/r/ml.R')
-rw-r--r--examples/src/main/r/ml.R148
1 files changed, 0 insertions, 148 deletions
diff --git a/examples/src/main/r/ml.R b/examples/src/main/r/ml.R
deleted file mode 100644
index a8a1274ac9..0000000000
--- a/examples/src/main/r/ml.R
+++ /dev/null
@@ -1,148 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# To run this example use
-# ./bin/spark-submit examples/src/main/r/ml.R
-
-# Load SparkR library into your R session
-library(SparkR)
-
-# Initialize SparkSession
-sparkR.session(appName = "SparkR-ML-example")
-
-############################ spark.glm and glm ##############################################
-# $example on:glm$
-irisDF <- suppressWarnings(createDataFrame(iris))
-# Fit a generalized linear model of family "gaussian" with spark.glm
-gaussianDF <- irisDF
-gaussianTestDF <- irisDF
-gaussianGLM <- spark.glm(gaussianDF, Sepal_Length ~ Sepal_Width + Species, family = "gaussian")
-
-# Model summary
-summary(gaussianGLM)
-
-# Prediction
-gaussianPredictions <- predict(gaussianGLM, gaussianTestDF)
-showDF(gaussianPredictions)
-
-# Fit a generalized linear model with glm (R-compliant)
-gaussianGLM2 <- glm(Sepal_Length ~ Sepal_Width + Species, gaussianDF, family = "gaussian")
-summary(gaussianGLM2)
-
-# Fit a generalized linear model of family "binomial" with spark.glm
-binomialDF <- filter(irisDF, irisDF$Species != "setosa")
-binomialTestDF <- binomialDF
-binomialGLM <- spark.glm(binomialDF, Species ~ Sepal_Length + Sepal_Width, family = "binomial")
-
-# Model summary
-summary(binomialGLM)
-
-# Prediction
-binomialPredictions <- predict(binomialGLM, binomialTestDF)
-showDF(binomialPredictions)
-# $example off:glm$
-############################ spark.survreg ##############################################
-# $example on:survreg$
-# Use the ovarian dataset available in R survival package
-library(survival)
-
-# Fit an accelerated failure time (AFT) survival regression model with spark.survreg
-ovarianDF <- suppressWarnings(createDataFrame(ovarian))
-aftDF <- ovarianDF
-aftTestDF <- ovarianDF
-aftModel <- spark.survreg(aftDF, Surv(futime, fustat) ~ ecog_ps + rx)
-
-# Model summary
-summary(aftModel)
-
-# Prediction
-aftPredictions <- predict(aftModel, aftTestDF)
-showDF(aftPredictions)
-# $example off:survreg$
-############################ spark.naiveBayes ##############################################
-# $example on:naiveBayes$
-# Fit a Bernoulli naive Bayes model with spark.naiveBayes
-titanic <- as.data.frame(Titanic)
-titanicDF <- createDataFrame(titanic[titanic$Freq > 0, -5])
-nbDF <- titanicDF
-nbTestDF <- titanicDF
-nbModel <- spark.naiveBayes(nbDF, Survived ~ Class + Sex + Age)
-
-# Model summary
-summary(nbModel)
-
-# Prediction
-nbPredictions <- predict(nbModel, nbTestDF)
-showDF(nbPredictions)
-# $example off:naiveBayes$
-############################ spark.kmeans ##############################################
-# $example on:kmeans$
-# Fit a k-means model with spark.kmeans
-irisDF <- suppressWarnings(createDataFrame(iris))
-kmeansDF <- irisDF
-kmeansTestDF <- irisDF
-kmeansModel <- spark.kmeans(kmeansDF, ~ Sepal_Length + Sepal_Width + Petal_Length + Petal_Width,
- k = 3)
-
-# Model summary
-summary(kmeansModel)
-
-# Get fitted result from the k-means model
-showDF(fitted(kmeansModel))
-
-# Prediction
-kmeansPredictions <- predict(kmeansModel, kmeansTestDF)
-showDF(kmeansPredictions)
-# $example off:kmeans$
-############################ model read/write ##############################################
-# $example on:read_write$
-irisDF <- suppressWarnings(createDataFrame(iris))
-# Fit a generalized linear model of family "gaussian" with spark.glm
-gaussianDF <- irisDF
-gaussianTestDF <- irisDF
-gaussianGLM <- spark.glm(gaussianDF, Sepal_Length ~ Sepal_Width + Species, family = "gaussian")
-
-# Save and then load a fitted MLlib model
-modelPath <- tempfile(pattern = "ml", fileext = ".tmp")
-write.ml(gaussianGLM, modelPath)
-gaussianGLM2 <- read.ml(modelPath)
-
-# Check model summary
-summary(gaussianGLM2)
-
-# Check model prediction
-gaussianPredictions <- predict(gaussianGLM2, gaussianTestDF)
-showDF(gaussianPredictions)
-
-unlink(modelPath)
-# $example off:read_write$
-############################ fit models with spark.lapply #####################################
-
-# Perform distributed training of multiple models with spark.lapply
-families <- c("gaussian", "poisson")
-train <- function(family) {
- model <- glm(Sepal.Length ~ Sepal.Width + Species, iris, family = family)
- summary(model)
-}
-model.summaries <- spark.lapply(families, train)
-
-# Print the summary of each model
-print(model.summaries)
-
-
-# Stop the SparkSession now
-sparkR.session.stop()