From 98b212d36b34ab490c391ea2adf5b141e4fb9289 Mon Sep 17 00:00:00 2001 From: Xusen Yin Date: Sat, 12 Dec 2015 17:47:01 -0800 Subject: [SPARK-12199][DOC] Follow-up: Refine example code in ml-features.md https://issues.apache.org/jira/browse/SPARK-12199 Follow-up PR of SPARK-11551. Fix some errors in ml-features.md mengxr Author: Xusen Yin Closes #10193 from yinxusen/SPARK-12199. --- docs/ml-features.md | 22 ++++----- .../spark/examples/ml/JavaBinarizerExample.java | 2 +- .../main/python/ml/polynomial_expansion_example.py | 6 +-- .../examples/ml/ElementWiseProductExample.scala | 52 ---------------------- .../examples/ml/ElementwiseProductExample.scala | 52 ++++++++++++++++++++++ 5 files changed, 67 insertions(+), 67 deletions(-) delete mode 100644 examples/src/main/scala/org/apache/spark/examples/ml/ElementWiseProductExample.scala create mode 100644 examples/src/main/scala/org/apache/spark/examples/ml/ElementwiseProductExample.scala diff --git a/docs/ml-features.md b/docs/ml-features.md index 8b00cc652d..158f3f2018 100644 --- a/docs/ml-features.md +++ b/docs/ml-features.md @@ -63,7 +63,7 @@ the [IDF Python docs](api/python/pyspark.ml.html#pyspark.ml.feature.IDF) for mor `Word2VecModel`. The model maps each word to a unique fixed-size vector. The `Word2VecModel` transforms each document into a vector using the average of all words in the document; this vector can then be used for as features for prediction, document similarity calculations, etc. -Please refer to the [MLlib user guide on Word2Vec](mllib-feature-extraction.html#Word2Vec) for more +Please refer to the [MLlib user guide on Word2Vec](mllib-feature-extraction.html#word2Vec) for more details. In the following code segment, we start with a set of documents, each of which is represented as a sequence of words. For each document, we transform it into a feature vector. This feature vector could then be passed to a learning algorithm. @@ -411,7 +411,7 @@ for more details on the API. Refer to the [DCT Java docs](api/java/org/apache/spark/ml/feature/DCT.html) for more details on the API. -{% include_example java/org/apache/spark/examples/ml/JavaDCTExample.java %}} +{% include_example java/org/apache/spark/examples/ml/JavaDCTExample.java %} @@ -669,7 +669,7 @@ for more details on the API. The following example demonstrates how to load a dataset in libsvm format and then normalize each row to have unit $L^2$ norm and unit $L^\infty$ norm.
-
+
Refer to the [Normalizer Scala docs](api/scala/index.html#org.apache.spark.ml.feature.Normalizer) for more details on the API. @@ -677,7 +677,7 @@ for more details on the API. {% include_example scala/org/apache/spark/examples/ml/NormalizerExample.scala %}
-
+
Refer to the [Normalizer Java docs](api/java/org/apache/spark/ml/feature/Normalizer.html) for more details on the API. @@ -685,7 +685,7 @@ for more details on the API. {% include_example java/org/apache/spark/examples/ml/JavaNormalizerExample.java %}
-
+
Refer to the [Normalizer Python docs](api/python/pyspark.ml.html#pyspark.ml.feature.Normalizer) for more details on the API. @@ -709,7 +709,7 @@ Note that if the standard deviation of a feature is zero, it will return default The following example demonstrates how to load a dataset in libsvm format and then normalize each feature to have unit standard deviation.
-
+
Refer to the [StandardScaler Scala docs](api/scala/index.html#org.apache.spark.ml.feature.StandardScaler) for more details on the API. @@ -717,7 +717,7 @@ for more details on the API. {% include_example scala/org/apache/spark/examples/ml/StandardScalerExample.scala %}
-
+
Refer to the [StandardScaler Java docs](api/java/org/apache/spark/ml/feature/StandardScaler.html) for more details on the API. @@ -725,7 +725,7 @@ for more details on the API. {% include_example java/org/apache/spark/examples/ml/JavaStandardScalerExample.java %}
-
+
Refer to the [StandardScaler Python docs](api/python/pyspark.ml.html#pyspark.ml.feature.StandardScaler) for more details on the API. @@ -788,7 +788,7 @@ More details can be found in the API docs for [Bucketizer](api/scala/index.html# The following example demonstrates how to bucketize a column of `Double`s into another index-wised column.
-
+
Refer to the [Bucketizer Scala docs](api/scala/index.html#org.apache.spark.ml.feature.Bucketizer) for more details on the API. @@ -796,7 +796,7 @@ for more details on the API. {% include_example scala/org/apache/spark/examples/ml/BucketizerExample.scala %}
-
+
Refer to the [Bucketizer Java docs](api/java/org/apache/spark/ml/feature/Bucketizer.html) for more details on the API. @@ -804,7 +804,7 @@ for more details on the API. {% include_example java/org/apache/spark/examples/ml/JavaBucketizerExample.java %}
-
+
Refer to the [Bucketizer Python docs](api/python/pyspark.ml.html#pyspark.ml.feature.Bucketizer) for more details on the API. diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaBinarizerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaBinarizerExample.java index 9698cac504..1eda1f694f 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaBinarizerExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaBinarizerExample.java @@ -59,7 +59,7 @@ public class JavaBinarizerExample { DataFrame binarizedDataFrame = binarizer.transform(continuousDataFrame); DataFrame binarizedFeatures = binarizedDataFrame.select("binarized_feature"); for (Row r : binarizedFeatures.collect()) { - Double binarized_value = r.getDouble(0); + Double binarized_value = r.getDouble(0); System.out.println(binarized_value); } // $example off$ diff --git a/examples/src/main/python/ml/polynomial_expansion_example.py b/examples/src/main/python/ml/polynomial_expansion_example.py index 3d4fafd1a4..89f5cbe8f2 100644 --- a/examples/src/main/python/ml/polynomial_expansion_example.py +++ b/examples/src/main/python/ml/polynomial_expansion_example.py @@ -30,9 +30,9 @@ if __name__ == "__main__": # $example on$ df = sqlContext\ - .createDataFrame([(Vectors.dense([-2.0, 2.3]), ), - (Vectors.dense([0.0, 0.0]), ), - (Vectors.dense([0.6, -1.1]), )], + .createDataFrame([(Vectors.dense([-2.0, 2.3]),), + (Vectors.dense([0.0, 0.0]),), + (Vectors.dense([0.6, -1.1]),)], ["features"]) px = PolynomialExpansion(degree=2, inputCol="features", outputCol="polyFeatures") polyDF = px.transform(df) diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/ElementWiseProductExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/ElementWiseProductExample.scala deleted file mode 100644 index 872de51dc7..0000000000 --- a/examples/src/main/scala/org/apache/spark/examples/ml/ElementWiseProductExample.scala +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// scalastyle:off println -package org.apache.spark.examples.ml - -// $example on$ -import org.apache.spark.ml.feature.ElementwiseProduct -import org.apache.spark.mllib.linalg.Vectors -// $example off$ -import org.apache.spark.sql.SQLContext -import org.apache.spark.{SparkConf, SparkContext} - -object ElementwiseProductExample { - def main(args: Array[String]): Unit = { - val conf = new SparkConf().setAppName("ElementwiseProductExample") - val sc = new SparkContext(conf) - val sqlContext = new SQLContext(sc) - - // $example on$ - // Create some vector data; also works for sparse vectors - val dataFrame = sqlContext.createDataFrame(Seq( - ("a", Vectors.dense(1.0, 2.0, 3.0)), - ("b", Vectors.dense(4.0, 5.0, 6.0)))).toDF("id", "vector") - - val transformingVector = Vectors.dense(0.0, 1.0, 2.0) - val transformer = new ElementwiseProduct() - .setScalingVec(transformingVector) - .setInputCol("vector") - .setOutputCol("transformedVector") - - // Batch transform the vectors to create new column: - transformer.transform(dataFrame).show() - // $example off$ - sc.stop() - } -} -// scalastyle:on println diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/ElementwiseProductExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/ElementwiseProductExample.scala new file mode 100644 index 0000000000..872de51dc7 --- /dev/null +++ b/examples/src/main/scala/org/apache/spark/examples/ml/ElementwiseProductExample.scala @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// scalastyle:off println +package org.apache.spark.examples.ml + +// $example on$ +import org.apache.spark.ml.feature.ElementwiseProduct +import org.apache.spark.mllib.linalg.Vectors +// $example off$ +import org.apache.spark.sql.SQLContext +import org.apache.spark.{SparkConf, SparkContext} + +object ElementwiseProductExample { + def main(args: Array[String]): Unit = { + val conf = new SparkConf().setAppName("ElementwiseProductExample") + val sc = new SparkContext(conf) + val sqlContext = new SQLContext(sc) + + // $example on$ + // Create some vector data; also works for sparse vectors + val dataFrame = sqlContext.createDataFrame(Seq( + ("a", Vectors.dense(1.0, 2.0, 3.0)), + ("b", Vectors.dense(4.0, 5.0, 6.0)))).toDF("id", "vector") + + val transformingVector = Vectors.dense(0.0, 1.0, 2.0) + val transformer = new ElementwiseProduct() + .setScalingVec(transformingVector) + .setInputCol("vector") + .setOutputCol("transformedVector") + + // Batch transform the vectors to create new column: + transformer.transform(dataFrame).show() + // $example off$ + sc.stop() + } +} +// scalastyle:on println -- cgit v1.2.3