aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorRosstin <asterazul@gmail.com>2015-06-29 16:09:29 -0700
committerReynold Xin <rxin@databricks.com>2015-06-29 16:09:29 -0700
commit4e880cf5967c0933e1d098a1d1f7db34b23ca8f8 (patch)
treee72382d51ef619bdcfe7887f7971f7e5eb62f84e /mllib
parented359de595d5dd67b666660eddf092eaf89041c8 (diff)
downloadspark-4e880cf5967c0933e1d098a1d1f7db34b23ca8f8.tar.gz
spark-4e880cf5967c0933e1d098a1d1f7db34b23ca8f8.tar.bz2
spark-4e880cf5967c0933e1d098a1d1f7db34b23ca8f8.zip
[SPARK-8661][ML] for LinearRegressionSuite.scala, changed javadoc-style comments to regular multiline comments, to make copy-pasting R code more simple
for mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala, changed javadoc-style comments to regular multiline comments, to make copy-pasting R code more simple Author: Rosstin <asterazul@gmail.com> Closes #7098 from Rosstin/SPARK-8661 and squashes the following commits: 5a05dee [Rosstin] SPARK-8661 for LinearRegressionSuite.scala, changed javadoc-style comments to regular multiline comments to make it easier to copy-paste the R code. bb9a4b1 [Rosstin] Merge branch 'master' of github.com:apache/spark into SPARK-8660 242aedd [Rosstin] SPARK-8660, changed comment style from JavaDoc style to normal multiline comment in order to make copypaste into R easier, in file classification/LogisticRegressionSuite.scala 2cd2985 [Rosstin] Merge branch 'master' of github.com:apache/spark into SPARK-8639 21ac1e5 [Rosstin] Merge branch 'master' of github.com:apache/spark into SPARK-8639 6c18058 [Rosstin] fixed minor typos in docs/README.md and docs/api.md
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala192
1 files changed, 96 insertions, 96 deletions
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
index ad1e9da692..5f39d44f37 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
@@ -28,26 +28,26 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
@transient var dataset: DataFrame = _
@transient var datasetWithoutIntercept: DataFrame = _
- /**
- * In `LinearRegressionSuite`, we will make sure that the model trained by SparkML
- * is the same as the one trained by R's glmnet package. The following instruction
- * describes how to reproduce the data in R.
- *
- * import org.apache.spark.mllib.util.LinearDataGenerator
- * val data =
- * sc.parallelize(LinearDataGenerator.generateLinearInput(6.3, Array(4.7, 7.2),
- * Array(0.9, -1.3), Array(0.7, 1.2), 10000, 42, 0.1), 2)
- * data.map(x=> x.label + ", " + x.features(0) + ", " + x.features(1)).coalesce(1)
- * .saveAsTextFile("path")
+ /*
+ In `LinearRegressionSuite`, we will make sure that the model trained by SparkML
+ is the same as the one trained by R's glmnet package. The following instruction
+ describes how to reproduce the data in R.
+
+ import org.apache.spark.mllib.util.LinearDataGenerator
+ val data =
+ sc.parallelize(LinearDataGenerator.generateLinearInput(6.3, Array(4.7, 7.2),
+ Array(0.9, -1.3), Array(0.7, 1.2), 10000, 42, 0.1), 2)
+ data.map(x=> x.label + ", " + x.features(0) + ", " + x.features(1)).coalesce(1)
+ .saveAsTextFile("path")
*/
override def beforeAll(): Unit = {
super.beforeAll()
dataset = sqlContext.createDataFrame(
sc.parallelize(LinearDataGenerator.generateLinearInput(
6.3, Array(4.7, 7.2), Array(0.9, -1.3), Array(0.7, 1.2), 10000, 42, 0.1), 2))
- /**
- * datasetWithoutIntercept is not needed for correctness testing but is useful for illustrating
- * training model without intercept
+ /*
+ datasetWithoutIntercept is not needed for correctness testing but is useful for illustrating
+ training model without intercept
*/
datasetWithoutIntercept = sqlContext.createDataFrame(
sc.parallelize(LinearDataGenerator.generateLinearInput(
@@ -59,20 +59,20 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
val trainer = new LinearRegression
val model = trainer.fit(dataset)
- /**
- * Using the following R code to load the data and train the model using glmnet package.
- *
- * library("glmnet")
- * data <- read.csv("path", header=FALSE, stringsAsFactors=FALSE)
- * features <- as.matrix(data.frame(as.numeric(data$V2), as.numeric(data$V3)))
- * label <- as.numeric(data$V1)
- * weights <- coef(glmnet(features, label, family="gaussian", alpha = 0, lambda = 0))
- * > weights
- * 3 x 1 sparse Matrix of class "dgCMatrix"
- * s0
- * (Intercept) 6.300528
- * as.numeric.data.V2. 4.701024
- * as.numeric.data.V3. 7.198257
+ /*
+ Using the following R code to load the data and train the model using glmnet package.
+
+ library("glmnet")
+ data <- read.csv("path", header=FALSE, stringsAsFactors=FALSE)
+ features <- as.matrix(data.frame(as.numeric(data$V2), as.numeric(data$V3)))
+ label <- as.numeric(data$V1)
+ weights <- coef(glmnet(features, label, family="gaussian", alpha = 0, lambda = 0))
+ > weights
+ 3 x 1 sparse Matrix of class "dgCMatrix"
+ s0
+ (Intercept) 6.300528
+ as.numeric.data.V2. 4.701024
+ as.numeric.data.V3. 7.198257
*/
val interceptR = 6.298698
val weightsR = Array(4.700706, 7.199082)
@@ -94,29 +94,29 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
val model = trainer.fit(dataset)
val modelWithoutIntercept = trainer.fit(datasetWithoutIntercept)
- /**
- * weights <- coef(glmnet(features, label, family="gaussian", alpha = 0, lambda = 0,
- * intercept = FALSE))
- * > weights
- * 3 x 1 sparse Matrix of class "dgCMatrix"
- * s0
- * (Intercept) .
- * as.numeric.data.V2. 6.995908
- * as.numeric.data.V3. 5.275131
+ /*
+ weights <- coef(glmnet(features, label, family="gaussian", alpha = 0, lambda = 0,
+ intercept = FALSE))
+ > weights
+ 3 x 1 sparse Matrix of class "dgCMatrix"
+ s0
+ (Intercept) .
+ as.numeric.data.V2. 6.995908
+ as.numeric.data.V3. 5.275131
*/
val weightsR = Array(6.995908, 5.275131)
assert(model.intercept ~== 0 relTol 1E-3)
assert(model.weights(0) ~== weightsR(0) relTol 1E-3)
assert(model.weights(1) ~== weightsR(1) relTol 1E-3)
- /**
- * Then again with the data with no intercept:
- * > weightsWithoutIntercept
- * 3 x 1 sparse Matrix of class "dgCMatrix"
- * s0
- * (Intercept) .
- * as.numeric.data3.V2. 4.70011
- * as.numeric.data3.V3. 7.19943
+ /*
+ Then again with the data with no intercept:
+ > weightsWithoutIntercept
+ 3 x 1 sparse Matrix of class "dgCMatrix"
+ s0
+ (Intercept) .
+ as.numeric.data3.V2. 4.70011
+ as.numeric.data3.V3. 7.19943
*/
val weightsWithoutInterceptR = Array(4.70011, 7.19943)
@@ -129,14 +129,14 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
val trainer = (new LinearRegression).setElasticNetParam(1.0).setRegParam(0.57)
val model = trainer.fit(dataset)
- /**
- * weights <- coef(glmnet(features, label, family="gaussian", alpha = 1.0, lambda = 0.57))
- * > weights
- * 3 x 1 sparse Matrix of class "dgCMatrix"
- * s0
- * (Intercept) 6.24300
- * as.numeric.data.V2. 4.024821
- * as.numeric.data.V3. 6.679841
+ /*
+ weights <- coef(glmnet(features, label, family="gaussian", alpha = 1.0, lambda = 0.57))
+ > weights
+ 3 x 1 sparse Matrix of class "dgCMatrix"
+ s0
+ (Intercept) 6.24300
+ as.numeric.data.V2. 4.024821
+ as.numeric.data.V3. 6.679841
*/
val interceptR = 6.24300
val weightsR = Array(4.024821, 6.679841)
@@ -158,15 +158,15 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
.setFitIntercept(false)
val model = trainer.fit(dataset)
- /**
- * weights <- coef(glmnet(features, label, family="gaussian", alpha = 1.0, lambda = 0.57,
- * intercept=FALSE))
- * > weights
- * 3 x 1 sparse Matrix of class "dgCMatrix"
- * s0
- * (Intercept) .
- * as.numeric.data.V2. 6.299752
- * as.numeric.data.V3. 4.772913
+ /*
+ weights <- coef(glmnet(features, label, family="gaussian", alpha = 1.0, lambda = 0.57,
+ intercept=FALSE))
+ > weights
+ 3 x 1 sparse Matrix of class "dgCMatrix"
+ s0
+ (Intercept) .
+ as.numeric.data.V2. 6.299752
+ as.numeric.data.V3. 4.772913
*/
val interceptR = 0.0
val weightsR = Array(6.299752, 4.772913)
@@ -187,14 +187,14 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
val trainer = (new LinearRegression).setElasticNetParam(0.0).setRegParam(2.3)
val model = trainer.fit(dataset)
- /**
- * weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3))
- * > weights
- * 3 x 1 sparse Matrix of class "dgCMatrix"
- * s0
- * (Intercept) 6.328062
- * as.numeric.data.V2. 3.222034
- * as.numeric.data.V3. 4.926260
+ /*
+ weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3))
+ > weights
+ 3 x 1 sparse Matrix of class "dgCMatrix"
+ s0
+ (Intercept) 6.328062
+ as.numeric.data.V2. 3.222034
+ as.numeric.data.V3. 4.926260
*/
val interceptR = 5.269376
val weightsR = Array(3.736216, 5.712356)
@@ -216,15 +216,15 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
.setFitIntercept(false)
val model = trainer.fit(dataset)
- /**
- * weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3,
- * intercept = FALSE))
- * > weights
- * 3 x 1 sparse Matrix of class "dgCMatrix"
- * s0
- * (Intercept) .
- * as.numeric.data.V2. 5.522875
- * as.numeric.data.V3. 4.214502
+ /*
+ weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3,
+ intercept = FALSE))
+ > weights
+ 3 x 1 sparse Matrix of class "dgCMatrix"
+ s0
+ (Intercept) .
+ as.numeric.data.V2. 5.522875
+ as.numeric.data.V3. 4.214502
*/
val interceptR = 0.0
val weightsR = Array(5.522875, 4.214502)
@@ -245,14 +245,14 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
val trainer = (new LinearRegression).setElasticNetParam(0.3).setRegParam(1.6)
val model = trainer.fit(dataset)
- /**
- * weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.3, lambda = 1.6))
- * > weights
- * 3 x 1 sparse Matrix of class "dgCMatrix"
- * s0
- * (Intercept) 6.324108
- * as.numeric.data.V2. 3.168435
- * as.numeric.data.V3. 5.200403
+ /*
+ weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.3, lambda = 1.6))
+ > weights
+ 3 x 1 sparse Matrix of class "dgCMatrix"
+ s0
+ (Intercept) 6.324108
+ as.numeric.data.V2. 3.168435
+ as.numeric.data.V3. 5.200403
*/
val interceptR = 5.696056
val weightsR = Array(3.670489, 6.001122)
@@ -274,15 +274,15 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
.setFitIntercept(false)
val model = trainer.fit(dataset)
- /**
- * weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.3, lambda = 1.6,
- * intercept=FALSE))
- * > weights
- * 3 x 1 sparse Matrix of class "dgCMatrix"
- * s0
- * (Intercept) .
- * as.numeric.dataM.V2. 5.673348
- * as.numeric.dataM.V3. 4.322251
+ /*
+ weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.3, lambda = 1.6,
+ intercept=FALSE))
+ > weights
+ 3 x 1 sparse Matrix of class "dgCMatrix"
+ s0
+ (Intercept) .
+ as.numeric.dataM.V2. 5.673348
+ as.numeric.dataM.V3. 4.322251
*/
val interceptR = 0.0
val weightsR = Array(5.673348, 4.322251)