aboutsummaryrefslogtreecommitdiff
path: root/docs/mllib-feature-extraction.md
diff options
context:
space:
mode:
authorOctavian Geagla <ogeagla@gmail.com>2015-05-30 00:00:36 -0700
committerJoseph K. Bradley <joseph@databricks.com>2015-05-30 00:00:36 -0700
commite3a43748338b02ef6864ca62de40e218e5677506 (patch)
tree0a28addbcbecd61c69fd6d70e3541d20ed9370eb /docs/mllib-feature-extraction.md
parent78657d53d71b9d3e86b675cc519868f99e2ffa01 (diff)
downloadspark-e3a43748338b02ef6864ca62de40e218e5677506.tar.gz
spark-e3a43748338b02ef6864ca62de40e218e5677506.tar.bz2
spark-e3a43748338b02ef6864ca62de40e218e5677506.zip
[SPARK-7459] [MLLIB] ElementwiseProduct Java example
Author: Octavian Geagla <ogeagla@gmail.com> Closes #6008 from ogeagla/elementwise-prod-doc and squashes the following commits: 72e6dc0 [Octavian Geagla] [SPARK-7459] [MLLIB] Java example import. cf2afbd [Octavian Geagla] [SPARK-7459] [MLLIB] Update description of example. b66431b [Octavian Geagla] [SPARK-7459] [MLLIB] Add override annotation to java example, make scala example use same data as java. 6b26b03 [Octavian Geagla] [SPARK-7459] [MLLIB] Fix line which is too long. 79af020 [Octavian Geagla] [SPARK-7459] [MLLIB] Actually don't use Java 8. 9d5b31a [Octavian Geagla] [SPARK-7459] [MLLIB] Don't use Java 8 4f0c92f [Octavian Geagla] [SPARK-7459] [MLLIB] ElementwiseProduct Java example.
Diffstat (limited to 'docs/mllib-feature-extraction.md')
-rw-r--r--docs/mllib-feature-extraction.md40
1 files changed, 34 insertions, 6 deletions
diff --git a/docs/mllib-feature-extraction.md b/docs/mllib-feature-extraction.md
index f723cd6b9d..764985d436 100644
--- a/docs/mllib-feature-extraction.md
+++ b/docs/mllib-feature-extraction.md
@@ -505,7 +505,7 @@ v_N
### Example
-This example below demonstrates how to load a simple vectors file, extract a set of vectors, then transform those vectors using a transforming vector value.
+This example below demonstrates how to transform vectors using a transforming vector value.
<div class="codetabs">
<div data-lang="scala">
@@ -514,16 +514,44 @@ import org.apache.spark.SparkContext._
import org.apache.spark.mllib.feature.ElementwiseProduct
import org.apache.spark.mllib.linalg.Vectors
-// Load and parse the data:
-val data = sc.textFile("data/mllib/kmeans_data.txt")
-val parsedData = data.map(s => Vectors.dense(s.split(' ').map(_.toDouble)))
+// Create some vector data; also works for sparse vectors
+val data = sc.parallelize(Array(Vectors.dense(1.0, 2.0, 3.0), Vectors.dense(4.0, 5.0, 6.0)))
val transformingVector = Vectors.dense(0.0, 1.0, 2.0)
val transformer = new ElementwiseProduct(transformingVector)
// Batch transform and per-row transform give the same results:
-val transformedData = transformer.transform(parsedData)
-val transformedData2 = parsedData.map(x => transformer.transform(x))
+val transformedData = transformer.transform(data)
+val transformedData2 = data.map(x => transformer.transform(x))
+
+{% endhighlight %}
+</div>
+
+<div data-lang="java">
+{% highlight java %}
+import java.util.Arrays;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.mllib.feature.ElementwiseProduct;
+import org.apache.spark.mllib.linalg.Vector;
+import org.apache.spark.mllib.linalg.Vectors;
+
+// Create some vector data; also works for sparse vectors
+JavaRDD<Vector> data = sc.parallelize(Arrays.asList(
+ Vectors.dense(1.0, 2.0, 3.0), Vectors.dense(4.0, 5.0, 6.0)));
+Vector transformingVector = Vectors.dense(0.0, 1.0, 2.0);
+ElementwiseProduct transformer = new ElementwiseProduct(transformingVector);
+
+// Batch transform and per-row transform give the same results:
+JavaRDD<Vector> transformedData = transformer.transform(data);
+JavaRDD<Vector> transformedData2 = data.map(
+ new Function<Vector, Vector>() {
+ @Override
+ public Vector call(Vector v) {
+ return transformer.transform(v);
+ }
+ }
+);
{% endhighlight %}
</div>