diff options
Diffstat (limited to 'docs/mllib-feature-extraction.md')
-rw-r--r-- | docs/mllib-feature-extraction.md | 22 |
1 files changed, 22 insertions, 0 deletions
diff --git a/docs/mllib-feature-extraction.md b/docs/mllib-feature-extraction.md index 4fe470a8de..1197dbbb8d 100644 --- a/docs/mllib-feature-extraction.md +++ b/docs/mllib-feature-extraction.md @@ -560,6 +560,28 @@ JavaRDD<Vector> transformedData2 = data.map( {% endhighlight %} </div> + +<div data-lang="python"> +{% highlight python %} +from pyspark import SparkContext +from pyspark.mllib.linalg import Vectors +from pyspark.mllib.feature import ElementwiseProduct + +# Load and parse the data +sc = SparkContext() +data = sc.textFile("data/mllib/kmeans_data.txt") +parsedData = data.map(lambda x: [float(t) for t in x.split(" ")]) + +# Create weight vector. +transformingVector = Vectors.dense([0.0, 1.0, 2.0]) +transformer = ElementwiseProduct(transformingVector) + +# Batch transform. +transformedData = transformer.transform(parsedData) +transformedData2 = transformer.transform(parsedData.first()) + +{% endhighlight %} +</div> </div> |