From 2804674a7af8f11eeb1280459bc9145815398eed Mon Sep 17 00:00:00 2001 From: Rishabh Bhardwaj Date: Mon, 2 Nov 2015 14:03:50 -0800 Subject: [SPARK-11383][DOCS] Replaced example code in mllib-naive-bayes.md/mllib-isotonic-regression.md using include_example I have made the required changes in mllib-naive-bayes.md/mllib-isotonic-regression.md and also verified them. Kindle Review it. Author: Rishabh Bhardwaj Closes #9353 from rishabhbhardwaj/SPARK-11383. --- .../python/mllib/isotonic_regression_example.py | 56 ++++++++++++++++++++++ .../src/main/python/mllib/naive_bayes_example.py | 56 ++++++++++++++++++++++ 2 files changed, 112 insertions(+) create mode 100644 examples/src/main/python/mllib/isotonic_regression_example.py create mode 100644 examples/src/main/python/mllib/naive_bayes_example.py (limited to 'examples/src/main/python') diff --git a/examples/src/main/python/mllib/isotonic_regression_example.py b/examples/src/main/python/mllib/isotonic_regression_example.py new file mode 100644 index 0000000000..89dc9f4b66 --- /dev/null +++ b/examples/src/main/python/mllib/isotonic_regression_example.py @@ -0,0 +1,56 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +""" +Isotonic Regression Example. +""" +from __future__ import print_function + +from pyspark import SparkContext +# $example on$ +import math +from pyspark.mllib.regression import IsotonicRegression, IsotonicRegressionModel +# $example off$ + +if __name__ == "__main__": + + sc = SparkContext(appName="PythonIsotonicRegressionExample") + + # $example on$ + data = sc.textFile("data/mllib/sample_isotonic_regression_data.txt") + + # Create label, feature, weight tuples from input data with weight set to default value 1.0. + parsedData = data.map(lambda line: tuple([float(x) for x in line.split(',')]) + (1.0,)) + + # Split data into training (60%) and test (40%) sets. + training, test = parsedData.randomSplit([0.6, 0.4], 11) + + # Create isotonic regression model from training data. + # Isotonic parameter defaults to true so it is only shown for demonstration + model = IsotonicRegression.train(training) + + # Create tuples of predicted and real labels. + predictionAndLabel = test.map(lambda p: (model.predict(p[1]), p[0])) + + # Calculate mean squared error between predicted and real labels. + meanSquaredError = predictionAndLabel.map(lambda pl: math.pow((pl[0] - pl[1]), 2)).mean() + print("Mean Squared Error = " + str(meanSquaredError)) + + # Save and load model + model.save(sc, "target/tmp/myIsotonicRegressionModel") + sameModel = IsotonicRegressionModel.load(sc, "target/tmp/myIsotonicRegressionModel") + # $example off$ diff --git a/examples/src/main/python/mllib/naive_bayes_example.py b/examples/src/main/python/mllib/naive_bayes_example.py new file mode 100644 index 0000000000..a2e7dacf25 --- /dev/null +++ b/examples/src/main/python/mllib/naive_bayes_example.py @@ -0,0 +1,56 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +""" +NaiveBayes Example. +""" +from __future__ import print_function + +# $example on$ +from pyspark.mllib.classification import NaiveBayes, NaiveBayesModel +from pyspark.mllib.linalg import Vectors +from pyspark.mllib.regression import LabeledPoint + + +def parseLine(line): + parts = line.split(',') + label = float(parts[0]) + features = Vectors.dense([float(x) for x in parts[1].split(' ')]) + return LabeledPoint(label, features) +# $example off$ + +if __name__ == "__main__": + + sc = SparkContext(appName="PythonNaiveBayesExample") + + # $example on$ + data = sc.textFile('data/mllib/sample_naive_bayes_data.txt').map(parseLine) + + # Split data aproximately into training (60%) and test (40%) + training, test = data.randomSplit([0.6, 0.4], seed=0) + + # Train a naive Bayes model. + model = NaiveBayes.train(training, 1.0) + + # Make prediction and test accuracy. + predictionAndLabel = test.map(lambda p: (model.predict(p.features), p.label)) + accuracy = 1.0 * predictionAndLabel.filter(lambda (x, v): x == v).count() / test.count() + + # Save and load model + model.save(sc, "target/tmp/myNaiveBayesModel") + sameModel = NaiveBayesModel.load(sc, "target/tmp/myNaiveBayesModel") + # $example off$ -- cgit v1.2.3