aboutsummaryrefslogtreecommitdiff
path: root/examples/src/main/scala
diff options
context:
space:
mode:
authorBryan Cutler <cutlerb@gmail.com>2016-07-14 09:12:46 +0100
committerSean Owen <sowen@cloudera.com>2016-07-14 09:12:46 +0100
commite3f8a033679261aaee15bda0f970a1890411e743 (patch)
treefecc6121b1d5357c2214f710018de2a9ddea2786 /examples/src/main/scala
parent252d4f27f23b547777892bcea25a2cea62d8cbab (diff)
downloadspark-e3f8a033679261aaee15bda0f970a1890411e743.tar.gz
spark-e3f8a033679261aaee15bda0f970a1890411e743.tar.bz2
spark-e3f8a033679261aaee15bda0f970a1890411e743.zip
[SPARK-16403][EXAMPLES] Cleanup to remove unused imports, consistent style, minor fixes
## What changes were proposed in this pull request? Cleanup of examples, mostly from PySpark-ML to fix minor issues: unused imports, style consistency, pipeline_example is a duplicate, use future print funciton, and a spelling error. * The "Pipeline Example" is duplicated by "Simple Text Classification Pipeline" in Scala, Python, and Java. * "Estimator Transformer Param Example" is duplicated by "Simple Params Example" in Scala, Python and Java * Synced random_forest_classifier_example.py with Scala by adding IndexToString label converted * Synced train_validation_split.py (in Scala ModelSelectionViaTrainValidationExample) by adjusting data split, adding grid for intercept. * RegexTokenizer was doing nothing in tokenizer_example.py and JavaTokenizerExample.java, synced with Scala version ## How was this patch tested? local tests and run modified examples Author: Bryan Cutler <cutlerb@gmail.com> Closes #14081 from BryanCutler/examples-cleanup-SPARK-16403.
Diffstat (limited to 'examples/src/main/scala')
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala1
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/GaussianMixtureExample.scala6
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/IsotonicRegressionExample.scala2
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/KMeansExample.scala1
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/ModelSelectionViaTrainValidationSplitExample.scala1
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/MultilayerPerceptronClassifierExample.scala6
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/NaiveBayesExample.scala1
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/PipelineExample.scala4
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/QuantileDiscretizerExample.scala4
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/RFormulaExample.scala2
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/SimpleParamsExample.scala104
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/SimpleTextClassificationPipeline.scala93
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/StopWordsRemoverExample.scala2
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/TfIdfExample.scala3
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/TokenizerExample.scala1
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/Word2VecExample.scala1
16 files changed, 24 insertions, 208 deletions
diff --git a/examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala b/examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala
index a68fd0285f..86eed3867c 100644
--- a/examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala
@@ -18,7 +18,6 @@
// scalastyle:off println
package org.apache.spark.examples
-import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
/**
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/GaussianMixtureExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/GaussianMixtureExample.scala
index 2c2bf421bc..26095b46f5 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/GaussianMixtureExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/GaussianMixtureExample.scala
@@ -33,8 +33,10 @@ import org.apache.spark.sql.SparkSession
*/
object GaussianMixtureExample {
def main(args: Array[String]): Unit = {
- // Creates a SparkSession
- val spark = SparkSession.builder.appName(s"${this.getClass.getSimpleName}").getOrCreate()
+ val spark = SparkSession
+ .builder
+ .appName(s"${this.getClass.getSimpleName}")
+ .getOrCreate()
// $example on$
// Loads data
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/IsotonicRegressionExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/IsotonicRegressionExample.scala
index 7c5d3f2341..a840559d24 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/IsotonicRegressionExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/IsotonicRegressionExample.scala
@@ -33,8 +33,6 @@ import org.apache.spark.sql.SparkSession
object IsotonicRegressionExample {
def main(args: Array[String]): Unit = {
-
- // Creates a SparkSession.
val spark = SparkSession
.builder
.appName(s"${this.getClass.getSimpleName}")
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/KMeansExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/KMeansExample.scala
index 2341b36db2..a1d19e138d 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/KMeansExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/KMeansExample.scala
@@ -34,7 +34,6 @@ import org.apache.spark.sql.SparkSession
object KMeansExample {
def main(args: Array[String]): Unit = {
- // Creates a SparkSession.
val spark = SparkSession
.builder
.appName(s"${this.getClass.getSimpleName}")
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/ModelSelectionViaTrainValidationSplitExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/ModelSelectionViaTrainValidationSplitExample.scala
index 75fef2922a..1cd2641f9a 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/ModelSelectionViaTrainValidationSplitExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/ModelSelectionViaTrainValidationSplitExample.scala
@@ -46,6 +46,7 @@ object ModelSelectionViaTrainValidationSplitExample {
val Array(training, test) = data.randomSplit(Array(0.9, 0.1), seed = 12345)
val lr = new LinearRegression()
+ .setMaxIter(10)
// We use a ParamGridBuilder to construct a grid of parameters to search over.
// TrainValidationSplit will try all combinations of values and determine best model using
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/MultilayerPerceptronClassifierExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/MultilayerPerceptronClassifierExample.scala
index e8a9b32da9..a39e3202ba 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/MultilayerPerceptronClassifierExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/MultilayerPerceptronClassifierExample.scala
@@ -39,27 +39,33 @@ object MultilayerPerceptronClassifierExample {
// Load the data stored in LIBSVM format as a DataFrame.
val data = spark.read.format("libsvm")
.load("data/mllib/sample_multiclass_classification_data.txt")
+
// Split the data into train and test
val splits = data.randomSplit(Array(0.6, 0.4), seed = 1234L)
val train = splits(0)
val test = splits(1)
+
// specify layers for the neural network:
// input layer of size 4 (features), two intermediate of size 5 and 4
// and output of size 3 (classes)
val layers = Array[Int](4, 5, 4, 3)
+
// create the trainer and set its parameters
val trainer = new MultilayerPerceptronClassifier()
.setLayers(layers)
.setBlockSize(128)
.setSeed(1234L)
.setMaxIter(100)
+
// train the model
val model = trainer.fit(train)
+
// compute accuracy on the test set
val result = model.transform(test)
val predictionAndLabels = result.select("prediction", "label")
val evaluator = new MulticlassClassificationEvaluator()
.setMetricName("accuracy")
+
println("Accuracy: " + evaluator.evaluate(predictionAndLabels))
// $example off$
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/NaiveBayesExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/NaiveBayesExample.scala
index 7089a4bc87..3ae0623c4c 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/NaiveBayesExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/NaiveBayesExample.scala
@@ -30,6 +30,7 @@ object NaiveBayesExample {
.builder
.appName("NaiveBayesExample")
.getOrCreate()
+
// $example on$
// Load the data stored in LIBSVM format as a DataFrame.
val data = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/PipelineExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/PipelineExample.scala
index b16692b1fa..12f8663b9c 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/PipelineExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/PipelineExample.scala
@@ -54,7 +54,7 @@ object PipelineExample {
.setOutputCol("features")
val lr = new LogisticRegression()
.setMaxIter(10)
- .setRegParam(0.01)
+ .setRegParam(0.001)
val pipeline = new Pipeline()
.setStages(Array(tokenizer, hashingTF, lr))
@@ -74,7 +74,7 @@ object PipelineExample {
val test = spark.createDataFrame(Seq(
(4L, "spark i j k"),
(5L, "l m n"),
- (6L, "mapreduce spark"),
+ (6L, "spark hadoop spark"),
(7L, "apache hadoop")
)).toDF("id", "text")
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/QuantileDiscretizerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/QuantileDiscretizerExample.scala
index 2f7e217b8f..aedb9e7d3b 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/QuantileDiscretizerExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/QuantileDiscretizerExample.scala
@@ -28,16 +28,16 @@ object QuantileDiscretizerExample {
.builder
.appName("QuantileDiscretizerExample")
.getOrCreate()
- import spark.implicits._
// $example on$
val data = Array((0, 18.0), (1, 19.0), (2, 8.0), (3, 5.0), (4, 2.2))
- var df = spark.createDataFrame(data).toDF("id", "hour")
+ val df = spark.createDataFrame(data).toDF("id", "hour")
// $example off$
// Output of QuantileDiscretizer for such small datasets can depend on the number of
// partitions. Here we force a single partition to ensure consistent results.
// Note this is not necessary for normal use cases
.repartition(1)
+
// $example on$
val discretizer = new QuantileDiscretizer()
.setInputCol("hour")
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/RFormulaExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/RFormulaExample.scala
index 9ea4920146..3498fa8a50 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/RFormulaExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/RFormulaExample.scala
@@ -36,10 +36,12 @@ object RFormulaExample {
(8, "CA", 12, 0.0),
(9, "NZ", 15, 0.0)
)).toDF("id", "country", "hour", "clicked")
+
val formula = new RFormula()
.setFormula("clicked ~ country + hour")
.setFeaturesCol("features")
.setLabelCol("label")
+
val output = formula.fit(dataset).transform(dataset)
output.select("features", "label").show()
// $example off$
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/SimpleParamsExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/SimpleParamsExample.scala
deleted file mode 100644
index 29f1f50960..0000000000
--- a/examples/src/main/scala/org/apache/spark/examples/ml/SimpleParamsExample.scala
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// scalastyle:off println
-package org.apache.spark.examples.ml
-
-import org.apache.spark.ml.classification.LogisticRegression
-import org.apache.spark.ml.feature.LabeledPoint
-import org.apache.spark.ml.linalg.{Vector, Vectors}
-import org.apache.spark.ml.param.ParamMap
-import org.apache.spark.sql.{Row, SparkSession}
-
-/**
- * A simple example demonstrating ways to specify parameters for Estimators and Transformers.
- * Run with
- * {{{
- * bin/run-example ml.SimpleParamsExample
- * }}}
- */
-object SimpleParamsExample {
-
- def main(args: Array[String]) {
- val spark = SparkSession
- .builder
- .appName("SimpleParamsExample")
- .getOrCreate()
- import spark.implicits._
-
- // Prepare training data.
- // We use LabeledPoint, which is a case class. Spark SQL can convert RDDs of case classes
- // into DataFrames, where it uses the case class metadata to infer the schema.
- val training = spark.createDataFrame(Seq(
- LabeledPoint(1.0, Vectors.dense(0.0, 1.1, 0.1)),
- LabeledPoint(0.0, Vectors.dense(2.0, 1.0, -1.0)),
- LabeledPoint(0.0, Vectors.dense(2.0, 1.3, 1.0)),
- LabeledPoint(1.0, Vectors.dense(0.0, 1.2, -0.5))))
-
- // Create a LogisticRegression instance. This instance is an Estimator.
- val lr = new LogisticRegression()
- // Print out the parameters, documentation, and any default values.
- println("LogisticRegression parameters:\n" + lr.explainParams() + "\n")
-
- // We may set parameters using setter methods.
- lr.setMaxIter(10)
- .setRegParam(0.01)
-
- // Learn a LogisticRegression model. This uses the parameters stored in lr.
- val model1 = lr.fit(training)
- // Since model1 is a Model (i.e., a Transformer produced by an Estimator),
- // we can view the parameters it used during fit().
- // This prints the parameter (name: value) pairs, where names are unique IDs for this
- // LogisticRegression instance.
- println("Model 1 was fit using parameters: " + model1.parent.extractParamMap())
-
- // We may alternatively specify parameters using a ParamMap,
- // which supports several methods for specifying parameters.
- val paramMap = ParamMap(lr.maxIter -> 20)
- paramMap.put(lr.maxIter, 30) // Specify 1 Param. This overwrites the original maxIter.
- paramMap.put(lr.regParam -> 0.1, lr.thresholds -> Array(0.5, 0.5)) // Specify multiple Params.
-
- // One can also combine ParamMaps.
- val paramMap2 = ParamMap(lr.probabilityCol -> "myProbability") // Change output column name
- val paramMapCombined = paramMap ++ paramMap2
-
- // Now learn a new model using the paramMapCombined parameters.
- // paramMapCombined overrides all parameters set earlier via lr.set* methods.
- val model2 = lr.fit(training.toDF(), paramMapCombined)
- println("Model 2 was fit using parameters: " + model2.parent.extractParamMap())
-
- // Prepare test data.
- val test = spark.createDataFrame(Seq(
- LabeledPoint(1.0, Vectors.dense(-1.0, 1.5, 1.3)),
- LabeledPoint(0.0, Vectors.dense(3.0, 2.0, -0.1)),
- LabeledPoint(1.0, Vectors.dense(0.0, 2.2, -1.5))))
-
- // Make predictions on test data using the Transformer.transform() method.
- // LogisticRegressionModel.transform will only use the 'features' column.
- // Note that model2.transform() outputs a 'myProbability' column instead of the usual
- // 'probability' column since we renamed the lr.probabilityCol parameter previously.
- model2.transform(test)
- .select("features", "label", "myProbability", "prediction")
- .collect()
- .foreach { case Row(features: Vector, label: Double, prob: Vector, prediction: Double) =>
- println(s"($features, $label) -> prob=$prob, prediction=$prediction")
- }
-
- spark.stop()
- }
-}
-// scalastyle:on println
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/SimpleTextClassificationPipeline.scala b/examples/src/main/scala/org/apache/spark/examples/ml/SimpleTextClassificationPipeline.scala
deleted file mode 100644
index 0b2a058bb6..0000000000
--- a/examples/src/main/scala/org/apache/spark/examples/ml/SimpleTextClassificationPipeline.scala
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// scalastyle:off println
-package org.apache.spark.examples.ml
-
-import scala.beans.BeanInfo
-
-import org.apache.spark.ml.Pipeline
-import org.apache.spark.ml.classification.LogisticRegression
-import org.apache.spark.ml.feature.{HashingTF, Tokenizer}
-import org.apache.spark.ml.linalg.Vector
-import org.apache.spark.sql.{Row, SparkSession}
-
-@BeanInfo
-case class LabeledDocument(id: Long, text: String, label: Double)
-
-@BeanInfo
-case class Document(id: Long, text: String)
-
-/**
- * A simple text classification pipeline that recognizes "spark" from input text. This is to show
- * how to create and configure an ML pipeline. Run with
- * {{{
- * bin/run-example ml.SimpleTextClassificationPipeline
- * }}}
- */
-object SimpleTextClassificationPipeline {
-
- def main(args: Array[String]) {
- val spark = SparkSession
- .builder
- .appName("SimpleTextClassificationPipeline")
- .getOrCreate()
- import spark.implicits._
-
- // Prepare training documents, which are labeled.
- val training = spark.createDataFrame(Seq(
- LabeledDocument(0L, "a b c d e spark", 1.0),
- LabeledDocument(1L, "b d", 0.0),
- LabeledDocument(2L, "spark f g h", 1.0),
- LabeledDocument(3L, "hadoop mapreduce", 0.0)))
-
- // Configure an ML pipeline, which consists of three stages: tokenizer, hashingTF, and lr.
- val tokenizer = new Tokenizer()
- .setInputCol("text")
- .setOutputCol("words")
- val hashingTF = new HashingTF()
- .setNumFeatures(1000)
- .setInputCol(tokenizer.getOutputCol)
- .setOutputCol("features")
- val lr = new LogisticRegression()
- .setMaxIter(10)
- .setRegParam(0.001)
- val pipeline = new Pipeline()
- .setStages(Array(tokenizer, hashingTF, lr))
-
- // Fit the pipeline to training documents.
- val model = pipeline.fit(training.toDF())
-
- // Prepare test documents, which are unlabeled.
- val test = spark.createDataFrame(Seq(
- Document(4L, "spark i j k"),
- Document(5L, "l m n"),
- Document(6L, "spark hadoop spark"),
- Document(7L, "apache hadoop")))
-
- // Make predictions on test documents.
- model.transform(test.toDF())
- .select("id", "text", "probability", "prediction")
- .collect()
- .foreach { case Row(id: Long, text: String, prob: Vector, prediction: Double) =>
- println(s"($id, $text) --> prob=$prob, prediction=$prediction")
- }
-
- spark.stop()
- }
-}
-// scalastyle:on println
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/StopWordsRemoverExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/StopWordsRemoverExample.scala
index fb1a43e962..a56de0856d 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/StopWordsRemoverExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/StopWordsRemoverExample.scala
@@ -36,7 +36,7 @@ object StopWordsRemoverExample {
.setOutputCol("filtered")
val dataSet = spark.createDataFrame(Seq(
- (0, Seq("I", "saw", "the", "red", "baloon")),
+ (0, Seq("I", "saw", "the", "red", "balloon")),
(1, Seq("Mary", "had", "a", "little", "lamb"))
)).toDF("id", "raw")
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/TfIdfExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/TfIdfExample.scala
index 33b5daec59..97f6fcce15 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/TfIdfExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/TfIdfExample.scala
@@ -40,13 +40,16 @@ object TfIdfExample {
val tokenizer = new Tokenizer().setInputCol("sentence").setOutputCol("words")
val wordsData = tokenizer.transform(sentenceData)
+
val hashingTF = new HashingTF()
.setInputCol("words").setOutputCol("rawFeatures").setNumFeatures(20)
+
val featurizedData = hashingTF.transform(wordsData)
// alternatively, CountVectorizer can also be used to get term frequency vectors
val idf = new IDF().setInputCol("rawFeatures").setOutputCol("features")
val idfModel = idf.fit(featurizedData)
+
val rescaledData = idfModel.transform(featurizedData)
rescaledData.select("features", "label").take(3).foreach(println)
// $example off$
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/TokenizerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/TokenizerExample.scala
index 1c70dc700b..90d0faaf47 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/TokenizerExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/TokenizerExample.scala
@@ -45,6 +45,7 @@ object TokenizerExample {
val tokenized = tokenizer.transform(sentenceDataFrame)
tokenized.select("words", "label").take(3).foreach(println)
+
val regexTokenized = regexTokenizer.transform(sentenceDataFrame)
regexTokenized.select("words", "label").take(3).foreach(println)
// $example off$
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/Word2VecExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/Word2VecExample.scala
index 9ac5623607..5c8bd19f20 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/Word2VecExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/Word2VecExample.scala
@@ -45,6 +45,7 @@ object Word2VecExample {
.setVectorSize(3)
.setMinCount(0)
val model = word2Vec.fit(documentDF)
+
val result = model.transform(documentDF)
result.select("result").take(3).foreach(println)
// $example off$