diff options
author | Peter Rudenko <petro.rudenko@gmail.com> | 2015-03-24 16:33:38 +0000 |
---|---|---|
committer | Sean Owen <sowen@cloudera.com> | 2015-03-24 16:33:38 +0000 |
commit | 08d452801195cc6cf0697a594e98cd4778f358ee (patch) | |
tree | 95fd13f26ab08c98fcf0a7d76bcd6ca72f1faced /docs | |
parent | 85cf0636825d1997d64d0bdc04618f29b7222da1 (diff) | |
download | spark-08d452801195cc6cf0697a594e98cd4778f358ee.tar.gz spark-08d452801195cc6cf0697a594e98cd4778f358ee.tar.bz2 spark-08d452801195cc6cf0697a594e98cd4778f358ee.zip |
[ML][docs][minor] Define LabeledDocument/Document classes in CV example
To easier copy/paste Cross-Validation example code snippet need to define LabeledDocument/Document in it, since they difined in a previous example.
Author: Peter Rudenko <petro.rudenko@gmail.com>
Closes #5135 from petro-rudenko/patch-3 and squashes the following commits:
5190c75 [Peter Rudenko] Fix primitive types for java examples.
1d35383 [Peter Rudenko] [SQL][docs][minor] Define LabeledDocument/Document classes in CV example
Diffstat (limited to 'docs')
-rw-r--r-- | docs/ml-guide.md | 51 |
1 files changed, 43 insertions, 8 deletions
diff --git a/docs/ml-guide.md b/docs/ml-guide.md index da6aef7f14..c08c76d226 100644 --- a/docs/ml-guide.md +++ b/docs/ml-guide.md @@ -408,31 +408,31 @@ import org.apache.spark.sql.SQLContext; // Labeled and unlabeled instance types. // Spark SQL can infer schema from Java Beans. public class Document implements Serializable { - private Long id; + private long id; private String text; - public Document(Long id, String text) { + public Document(long id, String text) { this.id = id; this.text = text; } - public Long getId() { return this.id; } - public void setId(Long id) { this.id = id; } + public long getId() { return this.id; } + public void setId(long id) { this.id = id; } public String getText() { return this.text; } public void setText(String text) { this.text = text; } } public class LabeledDocument extends Document implements Serializable { - private Double label; + private double label; - public LabeledDocument(Long id, String text, Double label) { + public LabeledDocument(long id, String text, double label) { super(id, text); this.label = label; } - public Double getLabel() { return this.label; } - public void setLabel(Double label) { this.label = label; } + public double getLabel() { return this.label; } + public void setLabel(double label) { this.label = label; } } // Set up contexts. @@ -565,6 +565,11 @@ import org.apache.spark.ml.tuning.{ParamGridBuilder, CrossValidator} import org.apache.spark.mllib.linalg.Vector import org.apache.spark.sql.{Row, SQLContext} +// Labeled and unlabeled instance types. +// Spark SQL can infer schema from case classes. +case class LabeledDocument(id: Long, text: String, label: Double) +case class Document(id: Long, text: String) + val conf = new SparkConf().setAppName("CrossValidatorExample") val sc = new SparkContext(conf) val sqlContext = new SQLContext(sc) @@ -655,6 +660,36 @@ import org.apache.spark.sql.DataFrame; import org.apache.spark.sql.Row; import org.apache.spark.sql.SQLContext; +// Labeled and unlabeled instance types. +// Spark SQL can infer schema from Java Beans. +public class Document implements Serializable { + private long id; + private String text; + + public Document(long id, String text) { + this.id = id; + this.text = text; + } + + public long getId() { return this.id; } + public void setId(long id) { this.id = id; } + + public String getText() { return this.text; } + public void setText(String text) { this.text = text; } +} + +public class LabeledDocument extends Document implements Serializable { + private double label; + + public LabeledDocument(long id, String text, double label) { + super(id, text); + this.label = label; + } + + public double getLabel() { return this.label; } + public void setLabel(double label) { this.label = label; } +} + SparkConf conf = new SparkConf().setAppName("JavaCrossValidatorExample"); JavaSparkContext jsc = new JavaSparkContext(conf); SQLContext jsql = new SQLContext(jsc); |