aboutsummaryrefslogtreecommitdiff
path: root/examples/src
diff options
context:
space:
mode:
authorXiangrui Meng <meng@databricks.com>2014-05-19 21:29:33 -0700
committerTathagata Das <tathagata.das1565@gmail.com>2014-05-19 21:29:33 -0700
commitbcb9dce6f444a977c714117811bce0c54b417650 (patch)
treed12d9ba87dcf5d85edc38f082adbb6fae2a19052 /examples/src
parentb0ce22e071da4cc62ec5e29abf7b1299b8e4a6b0 (diff)
downloadspark-bcb9dce6f444a977c714117811bce0c54b417650.tar.gz
spark-bcb9dce6f444a977c714117811bce0c54b417650.tar.bz2
spark-bcb9dce6f444a977c714117811bce0c54b417650.zip
[SPARK-1874][MLLIB] Clean up MLlib sample data
1. Added synthetic datasets for `MovieLensALS`, `LinearRegression`, `BinaryClassification`. 2. Embedded instructions in the help message of those example apps. Per discussion with Matei on the JIRA page, new example data is under `data/mllib`. Author: Xiangrui Meng <meng@databricks.com> Closes #833 from mengxr/mllib-sample-data and squashes the following commits: 59f0a18 [Xiangrui Meng] add sample binary classification data 3c2f92f [Xiangrui Meng] add linear regression data 050f1ca [Xiangrui Meng] add a sample dataset for MovieLensALS example
Diffstat (limited to 'examples/src')
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassification.scala12
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegression.scala11
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala15
3 files changed, 36 insertions, 2 deletions
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassification.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassification.scala
index 4001908c98..56b02b65d8 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassification.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassification.scala
@@ -29,8 +29,9 @@ import org.apache.spark.mllib.optimization.{SquaredL2Updater, L1Updater}
/**
* An example app for binary classification. Run with
* {{{
- * ./bin/run-example org.apache.spark.examples.mllib.BinaryClassification
+ * bin/run-example org.apache.spark.examples.mllib.BinaryClassification
* }}}
+ * A synthetic dataset is located at `data/mllib/sample_binary_classification_data.txt`.
* If you use it as a template to create your own app, please use `spark-submit` to submit your app.
*/
object BinaryClassification {
@@ -81,6 +82,15 @@ object BinaryClassification {
.required()
.text("input paths to labeled examples in LIBSVM format")
.action((x, c) => c.copy(input = x))
+ note(
+ """
+ |For example, the following command runs this app on a synthetic dataset:
+ |
+ | bin/spark-submit --class org.apache.spark.examples.mllib.BinaryClassification \
+ | examples/target/scala-*/spark-examples-*.jar \
+ | --algorithm LR --regType L2 --regParam 1.0 \
+ | data/mllib/sample_binary_classification_data.txt
+ """.stripMargin)
}
parser.parse(args, defaultParams).map { params =>
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegression.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegression.scala
index 658d370f86..4811bb70e4 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegression.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegression.scala
@@ -28,8 +28,9 @@ import org.apache.spark.mllib.optimization.{SimpleUpdater, SquaredL2Updater, L1U
/**
* An example app for linear regression. Run with
* {{{
- * ./bin/run-example org.apache.spark.examples.mllib.LinearRegression
+ * bin/run-example org.apache.spark.examples.mllib.LinearRegression
* }}}
+ * A synthetic dataset can be found at `data/mllib/sample_linear_regression_data.txt`.
* If you use it as a template to create your own app, please use `spark-submit` to submit your app.
*/
object LinearRegression extends App {
@@ -68,6 +69,14 @@ object LinearRegression extends App {
.required()
.text("input paths to labeled examples in LIBSVM format")
.action((x, c) => c.copy(input = x))
+ note(
+ """
+ |For example, the following command runs this app on a synthetic dataset:
+ |
+ | bin/spark-submit --class org.apache.spark.examples.mllib.LinearRegression \
+ | examples/target/scala-*/spark-examples-*.jar \
+ | data/mllib/sample_linear_regression_data.txt
+ """.stripMargin)
}
parser.parse(args, defaultParams).map { params =>
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala
index 0e4447e0de..6eb41e7ba3 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala
@@ -29,6 +29,12 @@ import org.apache.spark.serializer.{KryoSerializer, KryoRegistrator}
/**
* An example app for ALS on MovieLens data (http://grouplens.org/datasets/movielens/).
+ * Run with
+ * {{{
+ * bin/run-example org.apache.spark.examples.mllib.MovieLensALS
+ * }}}
+ * A synthetic dataset in MovieLens format can be found at `data/mllib/sample_movielens_data.txt`.
+ * If you use it as a template to create your own app, please use `spark-submit` to submit your app.
*/
object MovieLensALS {
@@ -70,6 +76,15 @@ object MovieLensALS {
.required()
.text("input paths to a MovieLens dataset of ratings")
.action((x, c) => c.copy(input = x))
+ note(
+ """
+ |For example, the following command runs this app on a synthetic dataset:
+ |
+ | bin/spark-submit --class org.apache.spark.examples.mllib.MovieLensALS \
+ | examples/target/scala-*/spark-examples-*.jar \
+ | --rank 5 --numIterations 20 --lambda 1.0 --kryo \
+ | data/mllib/sample_movielens_data.txt
+ """.stripMargin)
}
parser.parse(args, defaultParams).map { params =>