aboutsummaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
authorXusen Yin <yinxusen@gmail.com>2015-11-17 13:59:59 -0800
committerXiangrui Meng <meng@databricks.com>2015-11-17 13:59:59 -0800
commit328eb49e6222271337e09188853b29c8f32fb157 (patch)
treeddebcfb1f3430cfd084e901af71b71ba7259d1d9 /examples
parentfa603e08de641df16d066302be5d5f92a60a923e (diff)
downloadspark-328eb49e6222271337e09188853b29c8f32fb157.tar.gz
spark-328eb49e6222271337e09188853b29c8f32fb157.tar.bz2
spark-328eb49e6222271337e09188853b29c8f32fb157.zip
[SPARK-11729] Replace example code in ml-linear-methods.md using include_example
JIRA link: https://issues.apache.org/jira/browse/SPARK-11729 Author: Xusen Yin <yinxusen@gmail.com> Closes #9713 from yinxusen/SPARK-11729.
Diffstat (limited to 'examples')
-rw-r--r--examples/src/main/java/org/apache/spark/examples/ml/JavaLinearRegressionWithElasticNetExample.java65
-rw-r--r--examples/src/main/java/org/apache/spark/examples/ml/JavaLogisticRegressionSummaryExample.java84
-rw-r--r--examples/src/main/java/org/apache/spark/examples/ml/JavaLogisticRegressionWithElasticNetExample.java55
-rw-r--r--examples/src/main/python/ml/linear_regression_with_elastic_net.py44
-rw-r--r--examples/src/main/python/ml/logistic_regression_with_elastic_net.py44
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/LinearRegressionWithElasticNetExample.scala61
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/LogisticRegressionSummaryExample.scala77
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/LogisticRegressionWithElasticNetExample.scala53
8 files changed, 483 insertions, 0 deletions
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaLinearRegressionWithElasticNetExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaLinearRegressionWithElasticNetExample.java
new file mode 100644
index 0000000000..593f8fb3e9
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaLinearRegressionWithElasticNetExample.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+// $example on$
+import org.apache.spark.ml.regression.LinearRegression;
+import org.apache.spark.ml.regression.LinearRegressionModel;
+import org.apache.spark.ml.regression.LinearRegressionTrainingSummary;
+import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.SQLContext;
+// $example off$
+
+public class JavaLinearRegressionWithElasticNetExample {
+ public static void main(String[] args) {
+ SparkConf conf = new SparkConf().setAppName("JavaLinearRegressionWithElasticNetExample");
+ JavaSparkContext jsc = new JavaSparkContext(conf);
+ SQLContext sqlContext = new SQLContext(jsc);
+
+ // $example on$
+ // Load training data
+ DataFrame training = sqlContext.read().format("libsvm")
+ .load("data/mllib/sample_libsvm_data.txt");
+
+ LinearRegression lr = new LinearRegression()
+ .setMaxIter(10)
+ .setRegParam(0.3)
+ .setElasticNetParam(0.8);
+
+ // Fit the model
+ LinearRegressionModel lrModel = lr.fit(training);
+
+ // Print the coefficients and intercept for linear regression
+ System.out.println("Coefficients: "
+ + lrModel.coefficients() + " Intercept: " + lrModel.intercept());
+
+ // Summarize the model over the training set and print out some metrics
+ LinearRegressionTrainingSummary trainingSummary = lrModel.summary();
+ System.out.println("numIterations: " + trainingSummary.totalIterations());
+ System.out.println("objectiveHistory: " + Vectors.dense(trainingSummary.objectiveHistory()));
+ trainingSummary.residuals().show();
+ System.out.println("RMSE: " + trainingSummary.rootMeanSquaredError());
+ System.out.println("r2: " + trainingSummary.r2());
+ // $example off$
+
+ jsc.stop();
+ }
+}
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaLogisticRegressionSummaryExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaLogisticRegressionSummaryExample.java
new file mode 100644
index 0000000000..986f3b3b28
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaLogisticRegressionSummaryExample.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+// $example on$
+import org.apache.spark.ml.classification.BinaryLogisticRegressionSummary;
+import org.apache.spark.ml.classification.LogisticRegression;
+import org.apache.spark.ml.classification.LogisticRegressionModel;
+import org.apache.spark.ml.classification.LogisticRegressionTrainingSummary;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.functions;
+// $example off$
+
+public class JavaLogisticRegressionSummaryExample {
+ public static void main(String[] args) {
+ SparkConf conf = new SparkConf().setAppName("JavaLogisticRegressionSummaryExample");
+ JavaSparkContext jsc = new JavaSparkContext(conf);
+ SQLContext sqlContext = new SQLContext(jsc);
+
+ // Load training data
+ DataFrame training = sqlContext.read().format("libsvm")
+ .load("data/mllib/sample_libsvm_data.txt");
+
+ LogisticRegression lr = new LogisticRegression()
+ .setMaxIter(10)
+ .setRegParam(0.3)
+ .setElasticNetParam(0.8);
+
+ // Fit the model
+ LogisticRegressionModel lrModel = lr.fit(training);
+
+ // $example on$
+ // Extract the summary from the returned LogisticRegressionModel instance trained in the earlier
+ // example
+ LogisticRegressionTrainingSummary trainingSummary = lrModel.summary();
+
+ // Obtain the loss per iteration.
+ double[] objectiveHistory = trainingSummary.objectiveHistory();
+ for (double lossPerIteration : objectiveHistory) {
+ System.out.println(lossPerIteration);
+ }
+
+ // Obtain the metrics useful to judge performance on test data.
+ // We cast the summary to a BinaryLogisticRegressionSummary since the problem is a binary
+ // classification problem.
+ BinaryLogisticRegressionSummary binarySummary =
+ (BinaryLogisticRegressionSummary) trainingSummary;
+
+ // Obtain the receiver-operating characteristic as a dataframe and areaUnderROC.
+ DataFrame roc = binarySummary.roc();
+ roc.show();
+ roc.select("FPR").show();
+ System.out.println(binarySummary.areaUnderROC());
+
+ // Get the threshold corresponding to the maximum F-Measure and rerun LogisticRegression with
+ // this selected threshold.
+ DataFrame fMeasure = binarySummary.fMeasureByThreshold();
+ double maxFMeasure = fMeasure.select(functions.max("F-Measure")).head().getDouble(0);
+ double bestThreshold = fMeasure.where(fMeasure.col("F-Measure").equalTo(maxFMeasure))
+ .select("threshold").head().getDouble(0);
+ lrModel.setThreshold(bestThreshold);
+ // $example off$
+
+ jsc.stop();
+ }
+}
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaLogisticRegressionWithElasticNetExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaLogisticRegressionWithElasticNetExample.java
new file mode 100644
index 0000000000..1d28279d72
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaLogisticRegressionWithElasticNetExample.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+// $example on$
+import org.apache.spark.ml.classification.LogisticRegression;
+import org.apache.spark.ml.classification.LogisticRegressionModel;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.SQLContext;
+// $example off$
+
+public class JavaLogisticRegressionWithElasticNetExample {
+ public static void main(String[] args) {
+ SparkConf conf = new SparkConf().setAppName("JavaLogisticRegressionWithElasticNetExample");
+ JavaSparkContext jsc = new JavaSparkContext(conf);
+ SQLContext sqlContext = new SQLContext(jsc);
+
+ // $example on$
+ // Load training data
+ DataFrame training = sqlContext.read().format("libsvm")
+ .load("data/mllib/sample_libsvm_data.txt");
+
+ LogisticRegression lr = new LogisticRegression()
+ .setMaxIter(10)
+ .setRegParam(0.3)
+ .setElasticNetParam(0.8);
+
+ // Fit the model
+ LogisticRegressionModel lrModel = lr.fit(training);
+
+ // Print the coefficients and intercept for logistic regression
+ System.out.println("Coefficients: "
+ + lrModel.coefficients() + " Intercept: " + lrModel.intercept());
+ // $example off$
+
+ jsc.stop();
+ }
+}
diff --git a/examples/src/main/python/ml/linear_regression_with_elastic_net.py b/examples/src/main/python/ml/linear_regression_with_elastic_net.py
new file mode 100644
index 0000000000..b027827633
--- /dev/null
+++ b/examples/src/main/python/ml/linear_regression_with_elastic_net.py
@@ -0,0 +1,44 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark import SparkContext
+from pyspark.sql import SQLContext
+# $example on$
+from pyspark.ml.regression import LinearRegression
+# $example off$
+
+if __name__ == "__main__":
+ sc = SparkContext(appName="LinearRegressionWithElasticNet")
+ sqlContext = SQLContext(sc)
+
+ # $example on$
+ # Load training data
+ training = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+
+ lr = LinearRegression(maxIter=10, regParam=0.3, elasticNetParam=0.8)
+
+ # Fit the model
+ lrModel = lr.fit(training)
+
+ # Print the coefficients and intercept for linear regression
+ print("Coefficients: " + str(lrModel.coefficients))
+ print("Intercept: " + str(lrModel.intercept))
+ # $example off$
+
+ sc.stop()
diff --git a/examples/src/main/python/ml/logistic_regression_with_elastic_net.py b/examples/src/main/python/ml/logistic_regression_with_elastic_net.py
new file mode 100644
index 0000000000..b0b1d27e13
--- /dev/null
+++ b/examples/src/main/python/ml/logistic_regression_with_elastic_net.py
@@ -0,0 +1,44 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark import SparkContext
+from pyspark.sql import SQLContext
+# $example on$
+from pyspark.ml.classification import LogisticRegression
+# $example off$
+
+if __name__ == "__main__":
+ sc = SparkContext(appName="LogisticRegressionWithElasticNet")
+ sqlContext = SQLContext(sc)
+
+ # $example on$
+ # Load training data
+ training = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+
+ lr = LogisticRegression(maxIter=10, regParam=0.3, elasticNetParam=0.8)
+
+ # Fit the model
+ lrModel = lr.fit(training)
+
+ # Print the coefficients and intercept for logistic regression
+ print("Coefficients: " + str(lrModel.coefficients))
+ print("Intercept: " + str(lrModel.intercept))
+ # $example off$
+
+ sc.stop()
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/LinearRegressionWithElasticNetExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/LinearRegressionWithElasticNetExample.scala
new file mode 100644
index 0000000000..5a51ece6f9
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/LinearRegressionWithElasticNetExample.scala
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.regression.LinearRegression
+// $example off$
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkConf, SparkContext}
+
+object LinearRegressionWithElasticNetExample {
+
+ def main(args: Array[String]): Unit = {
+ val conf = new SparkConf().setAppName("LinearRegressionWithElasticNetExample")
+ val sc = new SparkContext(conf)
+ val sqlCtx = new SQLContext(sc)
+
+ // $example on$
+ // Load training data
+ val training = sqlCtx.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+
+ val lr = new LinearRegression()
+ .setMaxIter(10)
+ .setRegParam(0.3)
+ .setElasticNetParam(0.8)
+
+ // Fit the model
+ val lrModel = lr.fit(training)
+
+ // Print the coefficients and intercept for linear regression
+ println(s"Coefficients: ${lrModel.coefficients} Intercept: ${lrModel.intercept}")
+
+ // Summarize the model over the training set and print out some metrics
+ val trainingSummary = lrModel.summary
+ println(s"numIterations: ${trainingSummary.totalIterations}")
+ println(s"objectiveHistory: ${trainingSummary.objectiveHistory.toList}")
+ trainingSummary.residuals.show()
+ println(s"RMSE: ${trainingSummary.rootMeanSquaredError}")
+ println(s"r2: ${trainingSummary.r2}")
+ // $example off$
+
+ sc.stop()
+ }
+}
+// scalastyle:on println
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/LogisticRegressionSummaryExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/LogisticRegressionSummaryExample.scala
new file mode 100644
index 0000000000..4c420421b6
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/LogisticRegressionSummaryExample.scala
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.classification.{BinaryLogisticRegressionSummary, LogisticRegression}
+// $example off$
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.sql.functions.max
+import org.apache.spark.{SparkConf, SparkContext}
+
+object LogisticRegressionSummaryExample {
+
+ def main(args: Array[String]): Unit = {
+ val conf = new SparkConf().setAppName("LogisticRegressionSummaryExample")
+ val sc = new SparkContext(conf)
+ val sqlCtx = new SQLContext(sc)
+ import sqlCtx.implicits._
+
+ // Load training data
+ val training = sqlCtx.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+
+ val lr = new LogisticRegression()
+ .setMaxIter(10)
+ .setRegParam(0.3)
+ .setElasticNetParam(0.8)
+
+ // Fit the model
+ val lrModel = lr.fit(training)
+
+ // $example on$
+ // Extract the summary from the returned LogisticRegressionModel instance trained in the earlier
+ // example
+ val trainingSummary = lrModel.summary
+
+ // Obtain the objective per iteration.
+ val objectiveHistory = trainingSummary.objectiveHistory
+ objectiveHistory.foreach(loss => println(loss))
+
+ // Obtain the metrics useful to judge performance on test data.
+ // We cast the summary to a BinaryLogisticRegressionSummary since the problem is a
+ // binary classification problem.
+ val binarySummary = trainingSummary.asInstanceOf[BinaryLogisticRegressionSummary]
+
+ // Obtain the receiver-operating characteristic as a dataframe and areaUnderROC.
+ val roc = binarySummary.roc
+ roc.show()
+ println(binarySummary.areaUnderROC)
+
+ // Set the model threshold to maximize F-Measure
+ val fMeasure = binarySummary.fMeasureByThreshold
+ val maxFMeasure = fMeasure.select(max("F-Measure")).head().getDouble(0)
+ val bestThreshold = fMeasure.where($"F-Measure" === maxFMeasure)
+ .select("threshold").head().getDouble(0)
+ lrModel.setThreshold(bestThreshold)
+ // $example off$
+
+ sc.stop()
+ }
+}
+// scalastyle:on println
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/LogisticRegressionWithElasticNetExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/LogisticRegressionWithElasticNetExample.scala
new file mode 100644
index 0000000000..9ee995b52c
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/LogisticRegressionWithElasticNetExample.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.classification.LogisticRegression
+// $example off$
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkConf, SparkContext}
+
+object LogisticRegressionWithElasticNetExample {
+
+ def main(args: Array[String]): Unit = {
+ val conf = new SparkConf().setAppName("LogisticRegressionWithElasticNetExample")
+ val sc = new SparkContext(conf)
+ val sqlCtx = new SQLContext(sc)
+
+ // $example on$
+ // Load training data
+ val training = sqlCtx.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+
+ val lr = new LogisticRegression()
+ .setMaxIter(10)
+ .setRegParam(0.3)
+ .setElasticNetParam(0.8)
+
+ // Fit the model
+ val lrModel = lr.fit(training)
+
+ // Print the coefficients and intercept for logistic regression
+ println(s"Coefficients: ${lrModel.coefficients} Intercept: ${lrModel.intercept}")
+ // $example off$
+
+ sc.stop()
+ }
+}
+// scalastyle:on println