aboutsummaryrefslogtreecommitdiff
path: root/examples/src
diff options
context:
space:
mode:
authorYanbo Liang <ybliang8@gmail.com>2016-05-16 09:55:35 +0200
committerNick Pentreath <nickp@za.ibm.com>2016-05-16 09:55:35 +0200
commitf116a84ef8bf8a201c1a67154fda6990e4222074 (patch)
tree4e17d026c1a1b6b5ae22a613a87d009237e39ccd /examples/src
parentc1836d66bdc93f80ff9e8852efe8f2d2bc1ca941 (diff)
downloadspark-f116a84ef8bf8a201c1a67154fda6990e4222074.tar.gz
spark-f116a84ef8bf8a201c1a67154fda6990e4222074.tar.bz2
spark-f116a84ef8bf8a201c1a67154fda6990e4222074.zip
[SPARK-14979][ML][PYSPARK] Add examples for GeneralizedLinearRegression
## What changes were proposed in this pull request? Add Scala/Java/Python examples for ```GeneralizedLinearRegression```. ## How was this patch tested? They are examples and have been tested offline. Author: Yanbo Liang <ybliang8@gmail.com> Closes #12754 from yanboliang/spark-14979.
Diffstat (limited to 'examples/src')
-rw-r--r--examples/src/main/java/org/apache/spark/examples/ml/JavaGeneralizedLinearRegressionExample.java83
-rw-r--r--examples/src/main/python/ml/generalized_linear_regression_example.py66
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/GeneralizedLinearRegressionExample.scala78
3 files changed, 227 insertions, 0 deletions
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaGeneralizedLinearRegressionExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaGeneralizedLinearRegressionExample.java
new file mode 100644
index 0000000000..3f072d1e50
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaGeneralizedLinearRegressionExample.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+// $example on$
+import java.util.Arrays;
+
+import org.apache.spark.ml.regression.GeneralizedLinearRegression;
+import org.apache.spark.ml.regression.GeneralizedLinearRegressionModel;
+import org.apache.spark.ml.regression.GeneralizedLinearRegressionTrainingSummary;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+// $example off$
+import org.apache.spark.sql.SparkSession;
+
+/**
+ * An example demonstrating generalized linear regression.
+ * Run with
+ * <pre>
+ * bin/run-example ml.JavaGeneralizedLinearRegressionExample
+ * </pre>
+ */
+
+public class JavaGeneralizedLinearRegressionExample {
+
+ public static void main(String[] args) {
+ SparkSession spark = SparkSession
+ .builder()
+ .appName("JavaGeneralizedLinearRegressionExample")
+ .getOrCreate();
+
+ // $example on$
+ // Load training data
+ Dataset<Row> dataset = spark.read().format("libsvm")
+ .load("data/mllib/sample_linear_regression_data.txt");
+
+ GeneralizedLinearRegression glr = new GeneralizedLinearRegression()
+ .setFamily("gaussian")
+ .setLink("identity")
+ .setMaxIter(10)
+ .setRegParam(0.3);
+
+ // Fit the model
+ GeneralizedLinearRegressionModel model = glr.fit(dataset);
+
+ // Print the coefficients and intercept for generalized linear regression model
+ System.out.println("Coefficients: " + model.coefficients());
+ System.out.println("Intercept: " + model.intercept());
+
+ // Summarize the model over the training set and print out some metrics
+ GeneralizedLinearRegressionTrainingSummary summary = model.summary();
+ System.out.println("Coefficient Standard Errors: "
+ + Arrays.toString(summary.coefficientStandardErrors()));
+ System.out.println("T Values: " + Arrays.toString(summary.tValues()));
+ System.out.println("P Values: " + Arrays.toString(summary.pValues()));
+ System.out.println("Dispersion: " + summary.dispersion());
+ System.out.println("Null Deviance: " + summary.nullDeviance());
+ System.out.println("Residual Degree Of Freedom Null: " + summary.residualDegreeOfFreedomNull());
+ System.out.println("Deviance: " + summary.deviance());
+ System.out.println("Residual Degree Of Freedom: " + summary.residualDegreeOfFreedom());
+ System.out.println("AIC: " + summary.aic());
+ System.out.println("Deviance Residuals: ");
+ summary.residuals().show();
+ // $example off$
+
+ spark.stop();
+ }
+}
diff --git a/examples/src/main/python/ml/generalized_linear_regression_example.py b/examples/src/main/python/ml/generalized_linear_regression_example.py
new file mode 100644
index 0000000000..796752a60f
--- /dev/null
+++ b/examples/src/main/python/ml/generalized_linear_regression_example.py
@@ -0,0 +1,66 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark.sql import SparkSession
+# $example on$
+from pyspark.ml.regression import GeneralizedLinearRegression
+# $example off$
+
+"""
+An example demonstrating generalized linear regression.
+Run with:
+ bin/spark-submit examples/src/main/python/ml/generalized_linear_regression_example.py
+"""
+
+if __name__ == "__main__":
+ spark = SparkSession\
+ .builder\
+ .appName("GeneralizedLinearRegressionExample")\
+ .getOrCreate()
+
+ # $example on$
+ # Load training data
+ dataset = spark.read.format("libsvm")\
+ .load("data/mllib/sample_linear_regression_data.txt")
+
+ glr = GeneralizedLinearRegression(family="gaussian", link="identity", maxIter=10, regParam=0.3)
+
+ # Fit the model
+ model = glr.fit(dataset)
+
+ # Print the coefficients and intercept for generalized linear regression model
+ print("Coefficients: " + str(model.coefficients))
+ print("Intercept: " + str(model.intercept))
+
+ # Summarize the model over the training set and print out some metrics
+ summary = model.summary
+ print("Coefficient Standard Errors: " + str(summary.coefficientStandardErrors))
+ print("T Values: " + str(summary.tValues))
+ print("P Values: " + str(summary.pValues))
+ print("Dispersion: " + str(summary.dispersion))
+ print("Null Deviance: " + str(summary.nullDeviance))
+ print("Residual Degree Of Freedom Null: " + str(summary.residualDegreeOfFreedomNull))
+ print("Deviance: " + str(summary.deviance))
+ print("Residual Degree Of Freedom: " + str(summary.residualDegreeOfFreedom))
+ print("AIC: " + str(summary.aic))
+ print("Deviance Residuals: ")
+ summary.residuals().show()
+ # $example off$
+
+ spark.stop()
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/GeneralizedLinearRegressionExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/GeneralizedLinearRegressionExample.scala
new file mode 100644
index 0000000000..1b86d7cad0
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/GeneralizedLinearRegressionExample.scala
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.regression.GeneralizedLinearRegression
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+/**
+ * An example demonstrating generalized linear regression.
+ * Run with
+ * {{{
+ * bin/run-example ml.GeneralizedLinearRegressionExample
+ * }}}
+ */
+
+object GeneralizedLinearRegressionExample {
+
+ def main(args: Array[String]): Unit = {
+ val spark = SparkSession
+ .builder
+ .appName("GeneralizedLinearRegressionExample")
+ .getOrCreate()
+
+ // $example on$
+ // Load training data
+ val dataset = spark.read.format("libsvm")
+ .load("data/mllib/sample_linear_regression_data.txt")
+
+ val glr = new GeneralizedLinearRegression()
+ .setFamily("gaussian")
+ .setLink("identity")
+ .setMaxIter(10)
+ .setRegParam(0.3)
+
+ // Fit the model
+ val model = glr.fit(dataset)
+
+ // Print the coefficients and intercept for generalized linear regression model
+ println(s"Coefficients: ${model.coefficients}")
+ println(s"Intercept: ${model.intercept}")
+
+ // Summarize the model over the training set and print out some metrics
+ val summary = model.summary
+ println(s"Coefficient Standard Errors: ${summary.coefficientStandardErrors.mkString(",")}")
+ println(s"T Values: ${summary.tValues.mkString(",")}")
+ println(s"P Values: ${summary.pValues.mkString(",")}")
+ println(s"Dispersion: ${summary.dispersion}")
+ println(s"Null Deviance: ${summary.nullDeviance}")
+ println(s"Residual Degree Of Freedom Null: ${summary.residualDegreeOfFreedomNull}")
+ println(s"Deviance: ${summary.deviance}")
+ println(s"Residual Degree Of Freedom: ${summary.residualDegreeOfFreedom}")
+ println(s"AIC: ${summary.aic}")
+ println("Deviance Residuals: ")
+ summary.residuals().show()
+ // $example off$
+
+ spark.stop()
+ }
+}
+// scalastyle:on println