From 8c9cd0a7a719ce4286f77f35bb787e2b626a472e Mon Sep 17 00:00:00 2001 From: Xin Ren Date: Wed, 29 Jun 2016 11:25:00 -0700 Subject: [SPARK-16140][MLLIB][SPARKR][DOCS] Group k-means method in generated R doc https://issues.apache.org/jira/browse/SPARK-16140 ## What changes were proposed in this pull request? Group the R doc of spark.kmeans, predict(KM), summary(KM), read/write.ml(KM) under Rd spark.kmeans. The example code was updated. ## How was this patch tested? Tested on my local machine And on my laptop `jekyll build` is failing to build API docs, so here I can only show you the html I manually generated from Rd files, with no CSS applied, but the doc content should be there. ![screenshotkmeans](https://cloud.githubusercontent.com/assets/3925641/16403203/c2c9ca1e-3ca7-11e6-9e29-f2164aee75fc.png) Author: Xin Ren Closes #13921 from keypointt/SPARK-16140. --- R/pkg/R/generics.R | 2 ++ R/pkg/R/mllib.R | 72 +++++++++++++++++++++++++----------------------------- 2 files changed, 35 insertions(+), 39 deletions(-) (limited to 'R/pkg') diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 27dfd67ffc..0e4350f861 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -1247,6 +1247,7 @@ setGeneric("spark.glm", function(data, formula, ...) { standardGeneric("spark.gl #' @export setGeneric("glm") +#' predict #' @rdname predict #' @export setGeneric("predict", function(object, ...) { standardGeneric("predict") }) @@ -1271,6 +1272,7 @@ setGeneric("spark.naiveBayes", function(data, formula, ...) { standardGeneric("s #' @export setGeneric("spark.survreg", function(data, formula, ...) { standardGeneric("spark.survreg") }) +#' write.ml #' @rdname write.ml #' @export setGeneric("write.ml", function(object, path, ...) { standardGeneric("write.ml") }) diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R index 897a376a94..4fe73671f8 100644 --- a/R/pkg/R/mllib.R +++ b/R/pkg/R/mllib.R @@ -267,9 +267,10 @@ setMethod("summary", signature(object = "NaiveBayesModel"), return(list(apriori = apriori, tables = tables)) }) -#' Fit a k-means model +#' K-Means Clustering Model #' -#' Fit a k-means model, similarly to R's kmeans(). +#' Fits a k-means clustering model against a Spark DataFrame, similarly to R's kmeans(). +#' Users can print, make predictions on the produced model and save the model to the input path. #' #' @param data SparkDataFrame for training #' @param formula A symbolic description of the model to be fitted. Currently only a few formula @@ -278,14 +279,32 @@ setMethod("summary", signature(object = "NaiveBayesModel"), #' @param k Number of centers #' @param maxIter Maximum iteration number #' @param initMode The initialization algorithm choosen to fit the model -#' @return A fitted k-means model +#' @return \code{spark.kmeans} returns a fitted k-means model #' @rdname spark.kmeans +#' @name spark.kmeans #' @export #' @examples #' \dontrun{ -#' model <- spark.kmeans(data, ~ ., k = 4, initMode = "random") +#' sparkR.session() +#' data(iris) +#' df <- createDataFrame(iris) +#' model <- spark.kmeans(df, Sepal_Length ~ Sepal_Width, k = 4, initMode = "random") +#' summary(model) +#' +#' # fitted values on training data +#' fitted <- predict(model, df) +#' head(select(fitted, "Sepal_Length", "prediction")) +#' +#' # save fitted model to input path +#' path <- "path/to/model" +#' write.ml(model, path) +#' +#' # can also read back the saved model and print +#' savedModel <- read.ml(path) +#' summary(savedModel) #' } #' @note spark.kmeans since 2.0.0 +#' @seealso \link{predict}, \link{read.ml}, \link{write.ml} setMethod("spark.kmeans", signature(data = "SparkDataFrame", formula = "formula"), function(data, formula, k = 2, maxIter = 20, initMode = c("k-means||", "random")) { formula <- paste(deparse(formula), collapse = "") @@ -301,7 +320,7 @@ setMethod("spark.kmeans", signature(data = "SparkDataFrame", formula = "formula" #' Note: A saved-loaded model does not support this method. #' #' @param object A fitted k-means model -#' @return SparkDataFrame containing fitted values +#' @return \code{fitted} returns a SparkDataFrame containing fitted values #' @rdname fitted #' @export #' @examples @@ -323,20 +342,12 @@ setMethod("fitted", signature(object = "KMeansModel"), } }) -#' Get the summary of a k-means model -#' -#' Returns the summary of a k-means model produced by spark.kmeans(), -#' similarly to R's summary(). +# Get the summary of a k-means model #' -#' @param object a fitted k-means model -#' @return the model's coefficients, size and cluster -#' @rdname summary +#' @param object A fitted k-means model +#' @return \code{summary} returns the model's coefficients, size and cluster +#' @rdname spark.kmeans #' @export -#' @examples -#' \dontrun{ -#' model <- spark.kmeans(trainingData, ~ ., 2) -#' summary(model) -#' } #' @note summary(KMeansModel) since 2.0.0 setMethod("summary", signature(object = "KMeansModel"), function(object, ...) { @@ -358,19 +369,11 @@ setMethod("summary", signature(object = "KMeansModel"), cluster = cluster, is.loaded = is.loaded)) }) -#' Predicted values based on model -#' -#' Makes predictions from a k-means model or a model produced by spark.kmeans(). +# Predicted values based on a k-means model #' -#' @param object A fitted k-means model -#' @rdname predict +#' @return \code{predict} returns the predicted values based on a k-means model +#' @rdname spark.kmeans #' @export -#' @examples -#' \dontrun{ -#' model <- spark.kmeans(trainingData, ~ ., 2) -#' predicted <- predict(model, testData) -#' showDF(predicted) -#' } #' @note predict(KMeansModel) since 2.0.0 setMethod("predict", signature(object = "KMeansModel"), function(object, newData) { @@ -477,24 +480,15 @@ setMethod("write.ml", signature(object = "GeneralizedLinearRegressionModel", pat invisible(callJMethod(writer, "save", path)) }) -#' Save fitted MLlib model to the input path -#' -#' Save the k-means model to the input path. +# Save fitted MLlib model to the input path #' -#' @param object A fitted k-means model #' @param path The directory where the model is saved #' @param overwrite Overwrites or not if the output path already exists. Default is FALSE #' which means throw exception if the output path exists. #' -#' @rdname write.ml +#' @rdname spark.kmeans #' @name write.ml #' @export -#' @examples -#' \dontrun{ -#' model <- spark.kmeans(trainingData, ~ ., k = 2) -#' path <- "path/to/model" -#' write.ml(model, path) -#' } #' @note write.ml(KMeansModel, character) since 2.0.0 setMethod("write.ml", signature(object = "KMeansModel", path = "character"), function(object, path, overwrite = FALSE) { -- cgit v1.2.3