diff options
author | JeremyNixon <jnixon2@gmail.com> | 2016-02-23 15:57:29 -0800 |
---|---|---|
committer | Xiangrui Meng <meng@databricks.com> | 2016-02-23 15:57:29 -0800 |
commit | 230bbeaa614ed0ee87ecceece42355dd9a4bacb3 (patch) | |
tree | 79804a397f59d67c84b9405e190d37eec3f7bbfa /examples | |
parent | 8d29001dec5c3695721a76df3f70da50512ef28f (diff) | |
download | spark-230bbeaa614ed0ee87ecceece42355dd9a4bacb3.tar.gz spark-230bbeaa614ed0ee87ecceece42355dd9a4bacb3.tar.bz2 spark-230bbeaa614ed0ee87ecceece42355dd9a4bacb3.zip |
[SPARK-10759][ML] update cross validator with include_example
This pull request uses {%include_example%} to add an example for the python cross validator to ml-guide.
Author: JeremyNixon <jnixon2@gmail.com>
Closes #11240 from JeremyNixon/pipeline_include_example.
Diffstat (limited to 'examples')
-rw-r--r-- | examples/src/main/python/ml/cross_validator.py | 5 |
1 files changed, 4 insertions, 1 deletions
diff --git a/examples/src/main/python/ml/cross_validator.py b/examples/src/main/python/ml/cross_validator.py index f0ca97c724..5f0ef20218 100644 --- a/examples/src/main/python/ml/cross_validator.py +++ b/examples/src/main/python/ml/cross_validator.py @@ -18,12 +18,14 @@ from __future__ import print_function from pyspark import SparkContext +# $example on$ from pyspark.ml import Pipeline from pyspark.ml.classification import LogisticRegression from pyspark.ml.evaluation import BinaryClassificationEvaluator from pyspark.ml.feature import HashingTF, Tokenizer from pyspark.ml.tuning import CrossValidator, ParamGridBuilder from pyspark.sql import Row, SQLContext +# $example off$ """ A simple example demonstrating model selection using CrossValidator. @@ -36,7 +38,7 @@ Run with: if __name__ == "__main__": sc = SparkContext(appName="CrossValidatorExample") sqlContext = SQLContext(sc) - + # $example on$ # Prepare training documents, which are labeled. LabeledDocument = Row("id", "text", "label") training = sc.parallelize([(0, "a b c d e spark", 1.0), @@ -92,5 +94,6 @@ if __name__ == "__main__": selected = prediction.select("id", "text", "probability", "prediction") for row in selected.collect(): print(row) + # $example off$ sc.stop() |