aboutsummaryrefslogtreecommitdiff
path: root/docs/ml-guide.md
diff options
context:
space:
mode:
authorPeter Rudenko <petro.rudenko@gmail.com>2015-03-24 16:33:38 +0000
committerSean Owen <sowen@cloudera.com>2015-03-24 16:33:38 +0000
commit08d452801195cc6cf0697a594e98cd4778f358ee (patch)
tree95fd13f26ab08c98fcf0a7d76bcd6ca72f1faced /docs/ml-guide.md
parent85cf0636825d1997d64d0bdc04618f29b7222da1 (diff)
downloadspark-08d452801195cc6cf0697a594e98cd4778f358ee.tar.gz
spark-08d452801195cc6cf0697a594e98cd4778f358ee.tar.bz2
spark-08d452801195cc6cf0697a594e98cd4778f358ee.zip
[ML][docs][minor] Define LabeledDocument/Document classes in CV example
To easier copy/paste Cross-Validation example code snippet need to define LabeledDocument/Document in it, since they difined in a previous example. Author: Peter Rudenko <petro.rudenko@gmail.com> Closes #5135 from petro-rudenko/patch-3 and squashes the following commits: 5190c75 [Peter Rudenko] Fix primitive types for java examples. 1d35383 [Peter Rudenko] [SQL][docs][minor] Define LabeledDocument/Document classes in CV example
Diffstat (limited to 'docs/ml-guide.md')
-rw-r--r--docs/ml-guide.md51
1 files changed, 43 insertions, 8 deletions
diff --git a/docs/ml-guide.md b/docs/ml-guide.md
index da6aef7f14..c08c76d226 100644
--- a/docs/ml-guide.md
+++ b/docs/ml-guide.md
@@ -408,31 +408,31 @@ import org.apache.spark.sql.SQLContext;
// Labeled and unlabeled instance types.
// Spark SQL can infer schema from Java Beans.
public class Document implements Serializable {
- private Long id;
+ private long id;
private String text;
- public Document(Long id, String text) {
+ public Document(long id, String text) {
this.id = id;
this.text = text;
}
- public Long getId() { return this.id; }
- public void setId(Long id) { this.id = id; }
+ public long getId() { return this.id; }
+ public void setId(long id) { this.id = id; }
public String getText() { return this.text; }
public void setText(String text) { this.text = text; }
}
public class LabeledDocument extends Document implements Serializable {
- private Double label;
+ private double label;
- public LabeledDocument(Long id, String text, Double label) {
+ public LabeledDocument(long id, String text, double label) {
super(id, text);
this.label = label;
}
- public Double getLabel() { return this.label; }
- public void setLabel(Double label) { this.label = label; }
+ public double getLabel() { return this.label; }
+ public void setLabel(double label) { this.label = label; }
}
// Set up contexts.
@@ -565,6 +565,11 @@ import org.apache.spark.ml.tuning.{ParamGridBuilder, CrossValidator}
import org.apache.spark.mllib.linalg.Vector
import org.apache.spark.sql.{Row, SQLContext}
+// Labeled and unlabeled instance types.
+// Spark SQL can infer schema from case classes.
+case class LabeledDocument(id: Long, text: String, label: Double)
+case class Document(id: Long, text: String)
+
val conf = new SparkConf().setAppName("CrossValidatorExample")
val sc = new SparkContext(conf)
val sqlContext = new SQLContext(sc)
@@ -655,6 +660,36 @@ import org.apache.spark.sql.DataFrame;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SQLContext;
+// Labeled and unlabeled instance types.
+// Spark SQL can infer schema from Java Beans.
+public class Document implements Serializable {
+ private long id;
+ private String text;
+
+ public Document(long id, String text) {
+ this.id = id;
+ this.text = text;
+ }
+
+ public long getId() { return this.id; }
+ public void setId(long id) { this.id = id; }
+
+ public String getText() { return this.text; }
+ public void setText(String text) { this.text = text; }
+}
+
+public class LabeledDocument extends Document implements Serializable {
+ private double label;
+
+ public LabeledDocument(long id, String text, double label) {
+ super(id, text);
+ this.label = label;
+ }
+
+ public double getLabel() { return this.label; }
+ public void setLabel(double label) { this.label = label; }
+}
+
SparkConf conf = new SparkConf().setAppName("JavaCrossValidatorExample");
JavaSparkContext jsc = new JavaSparkContext(conf);
SQLContext jsql = new SQLContext(jsc);