aboutsummaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
authorXusen Yin <yinxusen@gmail.com>2016-08-02 07:28:46 -0700
committerYanbo Liang <ybliang8@gmail.com>2016-08-02 07:28:46 -0700
commitdd8514fa2059a695143073f852b1abee50e522bd (patch)
treee9d1aa02847ead31faa7a3a4a06d6aa08071056b /examples
parentd9e0919d30e9f79a0eb1ceb8d1b5e9fc58cf085e (diff)
downloadspark-dd8514fa2059a695143073f852b1abee50e522bd.tar.gz
spark-dd8514fa2059a695143073f852b1abee50e522bd.tar.bz2
spark-dd8514fa2059a695143073f852b1abee50e522bd.zip
[SPARK-16558][EXAMPLES][MLLIB] examples/mllib/LDAExample should use MLVector instead of MLlib Vector
## What changes were proposed in this pull request? mllib.LDAExample uses ML pipeline and MLlib LDA algorithm. The former transforms original data into MLVector format, while the latter uses MLlibVector format. ## How was this patch tested? Test manually. Author: Xusen Yin <yinxusen@gmail.com> Closes #14212 from yinxusen/SPARK-16558.
Diffstat (limited to 'examples')
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala5
1 files changed, 3 insertions, 2 deletions
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala
index 7e50b122e6..b923e627f2 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala
@@ -24,8 +24,9 @@ import scopt.OptionParser
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.ml.Pipeline
import org.apache.spark.ml.feature.{CountVectorizer, CountVectorizerModel, RegexTokenizer, StopWordsRemover}
+import org.apache.spark.ml.linalg.{Vector => MLVector}
import org.apache.spark.mllib.clustering.{DistributedLDAModel, EMLDAOptimizer, LDA, OnlineLDAOptimizer}
-import org.apache.spark.mllib.linalg.Vector
+import org.apache.spark.mllib.linalg.{Vector, Vectors}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{Row, SparkSession}
@@ -223,7 +224,7 @@ object LDAExample {
val documents = model.transform(df)
.select("features")
.rdd
- .map { case Row(features: Vector) => features }
+ .map { case Row(features: MLVector) => Vectors.fromML(features) }
.zipWithIndex()
.map(_.swap)