aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDongjoon Hyun <dongjoon@apache.org>2016-03-13 12:11:18 +0800
committerCheng Lian <lian@databricks.com>2016-03-13 12:11:18 +0800
commitdb88d0204e3a9a05dbe6e67e1abb942639c50a06 (patch)
tree4c500955b24a44395ad7ffa5c1b46323ebb71916
parentc079420d7c55d8972db716a2695a5ddd606d11cd (diff)
downloadspark-db88d0204e3a9a05dbe6e67e1abb942639c50a06.tar.gz
spark-db88d0204e3a9a05dbe6e67e1abb942639c50a06.tar.bz2
spark-db88d0204e3a9a05dbe6e67e1abb942639c50a06.zip
[MINOR][DOCS] Replace `DataFrame` with `Dataset` in Javadoc.
## What changes were proposed in this pull request? SPARK-13817 (PR #11656) replaces `DataFrame` with `Dataset` from Java. This PR fixes the remaining broken links and sample Java code in `package-info.java`. As a result, it will update the following Javadoc. * http://spark.apache.org/docs/latest/api/java/org/apache/spark/ml/attribute/package-summary.html * http://spark.apache.org/docs/latest/api/java/org/apache/spark/ml/feature/package-summary.html ## How was this patch tested? Manual. Author: Dongjoon Hyun <dongjoon@apache.org> Closes #11675 from dongjoon-hyun/replace_dataframe_with_dataset_in_javadoc.
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/attribute/package-info.java2
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/package-info.java12
2 files changed, 7 insertions, 7 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/ml/attribute/package-info.java b/mllib/src/main/scala/org/apache/spark/ml/attribute/package-info.java
index e3474f3c1d..464ed12569 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/attribute/package-info.java
+++ b/mllib/src/main/scala/org/apache/spark/ml/attribute/package-info.java
@@ -20,7 +20,7 @@
/**
* <h2>ML attributes</h2>
*
- * The ML pipeline API uses {@link org.apache.spark.sql.DataFrame}s as ML datasets.
+ * The ML pipeline API uses {@link org.apache.spark.sql.Dataset}s as ML datasets.
* Each dataset consists of typed columns, e.g., string, double, vector, etc.
* However, knowing only the column type may not be sufficient to handle the data properly.
* For instance, a double column with values 0.0, 1.0, 2.0, ... may represent some label indices,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/package-info.java b/mllib/src/main/scala/org/apache/spark/ml/feature/package-info.java
index 7a35f2d448..dcff4245d1 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/package-info.java
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/package-info.java
@@ -22,7 +22,7 @@
* The `ml.feature` package provides common feature transformers that help convert raw data or
* features into more suitable forms for model fitting.
* Most feature transformers are implemented as {@link org.apache.spark.ml.Transformer}s, which
- * transforms one {@link org.apache.spark.sql.DataFrame} into another, e.g.,
+ * transforms one {@link org.apache.spark.sql.Dataset} into another, e.g.,
* {@link org.apache.spark.ml.feature.HashingTF}.
* Some feature transformers are implemented as {@link org.apache.spark.ml.Estimator}}s, because the
* transformation requires some aggregated information of the dataset, e.g., document
@@ -31,7 +31,7 @@
* obtain the model first, e.g., {@link org.apache.spark.ml.feature.IDFModel}, in order to apply
* transformation.
* The transformation is usually done by appending new columns to the input
- * {@link org.apache.spark.sql.DataFrame}, so all input columns are carried over.
+ * {@link org.apache.spark.sql.Dataset}, so all input columns are carried over.
*
* We try to make each transformer minimal, so it becomes flexible to assemble feature
* transformation pipelines.
@@ -46,7 +46,7 @@
* import org.apache.spark.api.java.JavaRDD;
* import static org.apache.spark.sql.types.DataTypes.*;
* import org.apache.spark.sql.types.StructType;
- * import org.apache.spark.sql.DataFrame;
+ * import org.apache.spark.sql.Dataset;
* import org.apache.spark.sql.RowFactory;
* import org.apache.spark.sql.Row;
*
@@ -66,7 +66,7 @@
* RowFactory.create(0, "Hi I heard about Spark", 3.0),
* RowFactory.create(1, "I wish Java could use case classes", 4.0),
* RowFactory.create(2, "Logistic regression models are neat", 4.0)));
- * DataFrame df = jsql.createDataFrame(rowRDD, schema);
+ * Dataset<Row> dataset = jsql.createDataFrame(rowRDD, schema);
* // define feature transformers
* RegexTokenizer tok = new RegexTokenizer()
* .setInputCol("text")
@@ -88,10 +88,10 @@
* // assemble and fit the feature transformation pipeline
* Pipeline pipeline = new Pipeline()
* .setStages(new PipelineStage[] {tok, sw, tf, idf, assembler});
- * PipelineModel model = pipeline.fit(df);
+ * PipelineModel model = pipeline.fit(dataset);
*
* // save transformed features with raw data
- * model.transform(df)
+ * model.transform(dataset)
* .select("id", "text", "rating", "features")
* .write().format("parquet").save("/output/path");
* </code>