aboutsummaryrefslogtreecommitdiff
path: root/examples/src
diff options
context:
space:
mode:
authorwm624@hotmail.com <wm624@hotmail.com>2016-05-19 23:21:17 -0700
committerXiangrui Meng <meng@databricks.com>2016-05-19 23:21:17 -0700
commit4c7a6b385c79f4de07a89495afce4f8e73b06086 (patch)
treebe203bbc460f68f03476f8bfd7aba57a0408d706 /examples/src
parent09a00510c4759ff87abb0b2fdf1630ddf36ca12c (diff)
downloadspark-4c7a6b385c79f4de07a89495afce4f8e73b06086.tar.gz
spark-4c7a6b385c79f4de07a89495afce4f8e73b06086.tar.bz2
spark-4c7a6b385c79f4de07a89495afce4f8e73b06086.zip
[SPARK-15363][ML][EXAMPLE] Example code shouldn't use VectorImplicits._, asML/fromML
## What changes were proposed in this pull request? (Please fill in changes proposed in this fix) In this DataFrame example, we use VectorImplicits._, which is private API. Since Vectors object has public API, we use Vectors.fromML instead of implicts. ## How was this patch tested? (Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests) Manually run the example. Author: wm624@hotmail.com <wm624@hotmail.com> Closes #13213 from wangmiao1981/ml.
Diffstat (limited to 'examples/src')
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/DataFrameExample.scala4
1 files changed, 2 insertions, 2 deletions
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DataFrameExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DataFrameExample.scala
index 8ed6367787..c69027babb 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/DataFrameExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/DataFrameExample.scala
@@ -25,7 +25,7 @@ import scopt.OptionParser
import org.apache.spark.examples.mllib.AbstractParams
import org.apache.spark.ml.linalg.Vector
-import org.apache.spark.mllib.linalg.VectorImplicits._
+import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.stat.MultivariateOnlineSummarizer
import org.apache.spark.sql.{DataFrame, Row, SparkSession}
@@ -81,7 +81,7 @@ object DataFrameExample {
// Convert features column to an RDD of vectors.
val features = df.select("features").rdd.map { case Row(v: Vector) => v }
val featureSummary = features.aggregate(new MultivariateOnlineSummarizer())(
- (summary, feat) => summary.add(feat),
+ (summary, feat) => summary.add(Vectors.fromML(feat)),
(sum1, sum2) => sum1.merge(sum2))
println(s"Selected features column with average values:\n ${featureSummary.mean.toString}")