aboutsummaryrefslogtreecommitdiff
path: root/examples/src/main/r/ml/ml.R
diff options
context:
space:
mode:
authorYanbo Liang <ybliang8@gmail.com>2016-12-08 06:19:38 -0800
committerYanbo Liang <ybliang8@gmail.com>2016-12-08 06:19:38 -0800
commit9bf8f3cd4f62f921c32fb50b8abf49576a80874f (patch)
tree37a11c334001a981d5a87c5d0fefec67d2ef4889 /examples/src/main/r/ml/ml.R
parentb47b892e4579b7b06b4b2837ee4b614e517789f9 (diff)
downloadspark-9bf8f3cd4f62f921c32fb50b8abf49576a80874f.tar.gz
spark-9bf8f3cd4f62f921c32fb50b8abf49576a80874f.tar.bz2
spark-9bf8f3cd4f62f921c32fb50b8abf49576a80874f.zip
[SPARK-18325][SPARKR][ML] SparkR ML wrappers example code and user guide
## What changes were proposed in this pull request? * Add all R examples for ML wrappers which were added during 2.1 release cycle. * Split the whole ```ml.R``` example file into individual example for each algorithm, which will be convenient for users to rerun them. * Add corresponding examples to ML user guide. * Update ML section of SparkR user guide. Note: MLlib Scala/Java/Python examples will be consistent, however, SparkR examples may different from them, since R users may use the algorithms in a different way, for example, using R ```formula``` to specify ```featuresCol``` and ```labelCol```. ## How was this patch tested? Run all examples manually. Author: Yanbo Liang <ybliang8@gmail.com> Closes #16148 from yanboliang/spark-18325.
Diffstat (limited to 'examples/src/main/r/ml/ml.R')
-rw-r--r--examples/src/main/r/ml/ml.R65
1 files changed, 65 insertions, 0 deletions
diff --git a/examples/src/main/r/ml/ml.R b/examples/src/main/r/ml/ml.R
new file mode 100644
index 0000000000..d601590c22
--- /dev/null
+++ b/examples/src/main/r/ml/ml.R
@@ -0,0 +1,65 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# To run this example use
+# ./bin/spark-submit examples/src/main/r/ml/ml.R
+
+# Load SparkR library into your R session
+library(SparkR)
+
+# Initialize SparkSession
+sparkR.session(appName = "SparkR-ML-example")
+
+############################ model read/write ##############################################
+# $example on:read_write$
+irisDF <- suppressWarnings(createDataFrame(iris))
+# Fit a generalized linear model of family "gaussian" with spark.glm
+gaussianDF <- irisDF
+gaussianTestDF <- irisDF
+gaussianGLM <- spark.glm(gaussianDF, Sepal_Length ~ Sepal_Width + Species, family = "gaussian")
+
+# Save and then load a fitted MLlib model
+modelPath <- tempfile(pattern = "ml", fileext = ".tmp")
+write.ml(gaussianGLM, modelPath)
+gaussianGLM2 <- read.ml(modelPath)
+
+# Check model summary
+summary(gaussianGLM2)
+
+# Check model prediction
+gaussianPredictions <- predict(gaussianGLM2, gaussianTestDF)
+showDF(gaussianPredictions)
+
+unlink(modelPath)
+# $example off:read_write$
+
+############################ fit models with spark.lapply #####################################
+# Perform distributed training of multiple models with spark.lapply
+costs <- exp(seq(from = log(1), to = log(1000), length.out = 5))
+train <- function(cost) {
+ stopifnot(requireNamespace("e1071", quietly = TRUE))
+ model <- e1071::svm(Species ~ ., data = iris, cost = cost)
+ summary(model)
+}
+
+model.summaries <- spark.lapply(costs, train)
+
+# Print the summary of each model
+print(model.summaries)
+
+# Stop the SparkSession now
+sparkR.session.stop()