aboutsummaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
authorYanbo Liang <ybliang8@gmail.com>2015-11-23 11:51:29 -0800
committerJoseph K. Bradley <joseph@databricks.com>2015-11-23 11:51:29 -0800
commit98d7ec7df4bb115dbd84cb9acd744b6c8abfebd5 (patch)
tree1c9f2845324f06519e5f42a0559873d2041fae0e /examples
parent5231cd5acaae69d735ba3209531705cc222f3cfb (diff)
downloadspark-98d7ec7df4bb115dbd84cb9acd744b6c8abfebd5.tar.gz
spark-98d7ec7df4bb115dbd84cb9acd744b6c8abfebd5.tar.bz2
spark-98d7ec7df4bb115dbd84cb9acd744b6c8abfebd5.zip
[SPARK-11920][ML][DOC] ML LinearRegression should use correct dataset in examples and user guide doc
ML ```LinearRegression``` use ```data/mllib/sample_libsvm_data.txt``` as dataset in examples and user guide doc, but it's actually classification dataset rather than regression dataset. We should use ```data/mllib/sample_linear_regression_data.txt``` instead. The deeper causes is that ```LinearRegression``` with "normal" solver can not solve this dataset correctly, may be due to the ill condition and unreasonable label. This issue has been reported at [SPARK-11918](https://issues.apache.org/jira/browse/SPARK-11918). It will confuse users if they run the example code but get exception, so we should make this change which can clearly illustrate the usage of ```LinearRegression``` algorithm. Author: Yanbo Liang <ybliang8@gmail.com> Closes #9905 from yanboliang/spark-11920.
Diffstat (limited to 'examples')
-rw-r--r--examples/src/main/java/org/apache/spark/examples/ml/JavaLinearRegressionWithElasticNetExample.java2
-rw-r--r--examples/src/main/python/ml/linear_regression_with_elastic_net.py3
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/LinearRegressionWithElasticNetExample.scala3
3 files changed, 5 insertions, 3 deletions
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaLinearRegressionWithElasticNetExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaLinearRegressionWithElasticNetExample.java
index 593f8fb3e9..4ad7676c8d 100644
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaLinearRegressionWithElasticNetExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaLinearRegressionWithElasticNetExample.java
@@ -37,7 +37,7 @@ public class JavaLinearRegressionWithElasticNetExample {
// $example on$
// Load training data
DataFrame training = sqlContext.read().format("libsvm")
- .load("data/mllib/sample_libsvm_data.txt");
+ .load("data/mllib/sample_linear_regression_data.txt");
LinearRegression lr = new LinearRegression()
.setMaxIter(10)
diff --git a/examples/src/main/python/ml/linear_regression_with_elastic_net.py b/examples/src/main/python/ml/linear_regression_with_elastic_net.py
index b027827633..a4cd40cf26 100644
--- a/examples/src/main/python/ml/linear_regression_with_elastic_net.py
+++ b/examples/src/main/python/ml/linear_regression_with_elastic_net.py
@@ -29,7 +29,8 @@ if __name__ == "__main__":
# $example on$
# Load training data
- training = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+ training = sqlContext.read.format("libsvm")\
+ .load("data/mllib/sample_linear_regression_data.txt")
lr = LinearRegression(maxIter=10, regParam=0.3, elasticNetParam=0.8)
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/LinearRegressionWithElasticNetExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/LinearRegressionWithElasticNetExample.scala
index 5a51ece6f9..22c824cea8 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/LinearRegressionWithElasticNetExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/LinearRegressionWithElasticNetExample.scala
@@ -33,7 +33,8 @@ object LinearRegressionWithElasticNetExample {
// $example on$
// Load training data
- val training = sqlCtx.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+ val training = sqlCtx.read.format("libsvm")
+ .load("data/mllib/sample_linear_regression_data.txt")
val lr = new LinearRegression()
.setMaxIter(10)