aboutsummaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
authorBenFradet <benjamin.fradet@gmail.com>2015-12-08 12:45:34 -0800
committerJoseph K. Bradley <joseph@databricks.com>2015-12-08 12:45:34 -0800
commit06746b3005e5e9892d0314bee3bfdfaebc36d3d4 (patch)
treed0bdd5af1a56b07fe00c6c5c44a0da3f276338e6 /examples
parent5cb4695051e3dac847b1ea14d62e54dcf672c31c (diff)
downloadspark-06746b3005e5e9892d0314bee3bfdfaebc36d3d4.tar.gz
spark-06746b3005e5e9892d0314bee3bfdfaebc36d3d4.tar.bz2
spark-06746b3005e5e9892d0314bee3bfdfaebc36d3d4.zip
[SPARK-12159][ML] Add user guide section for IndexToString transformer
Documentation regarding the `IndexToString` label transformer with code snippets in Scala/Java/Python. Author: BenFradet <benjamin.fradet@gmail.com> Closes #10166 from BenFradet/SPARK-12159.
Diffstat (limited to 'examples')
-rw-r--r--examples/src/main/java/org/apache/spark/examples/ml/JavaIndexToStringExample.java75
-rw-r--r--examples/src/main/python/ml/index_to_string_example.py45
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/IndexToStringExample.scala60
3 files changed, 180 insertions, 0 deletions
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaIndexToStringExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaIndexToStringExample.java
new file mode 100644
index 0000000000..3ccd699326
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaIndexToStringExample.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.SQLContext;
+
+// $example on$
+import java.util.Arrays;
+
+import org.apache.spark.ml.feature.IndexToString;
+import org.apache.spark.ml.feature.StringIndexer;
+import org.apache.spark.ml.feature.StringIndexerModel;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+// $example off$
+
+public class JavaIndexToStringExample {
+ public static void main(String[] args) {
+ SparkConf conf = new SparkConf().setAppName("JavaIndexToStringExample");
+ JavaSparkContext jsc = new JavaSparkContext(conf);
+ SQLContext sqlContext = new SQLContext(jsc);
+
+ // $example on$
+ JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
+ RowFactory.create(0, "a"),
+ RowFactory.create(1, "b"),
+ RowFactory.create(2, "c"),
+ RowFactory.create(3, "a"),
+ RowFactory.create(4, "a"),
+ RowFactory.create(5, "c")
+ ));
+ StructType schema = new StructType(new StructField[]{
+ new StructField("id", DataTypes.IntegerType, false, Metadata.empty()),
+ new StructField("category", DataTypes.StringType, false, Metadata.empty())
+ });
+ DataFrame df = sqlContext.createDataFrame(jrdd, schema);
+
+ StringIndexerModel indexer = new StringIndexer()
+ .setInputCol("category")
+ .setOutputCol("categoryIndex")
+ .fit(df);
+ DataFrame indexed = indexer.transform(df);
+
+ IndexToString converter = new IndexToString()
+ .setInputCol("categoryIndex")
+ .setOutputCol("originalCategory");
+ DataFrame converted = converter.transform(indexed);
+ converted.select("id", "originalCategory").show();
+ // $example off$
+ jsc.stop();
+ }
+}
diff --git a/examples/src/main/python/ml/index_to_string_example.py b/examples/src/main/python/ml/index_to_string_example.py
new file mode 100644
index 0000000000..fb0ba2950b
--- /dev/null
+++ b/examples/src/main/python/ml/index_to_string_example.py
@@ -0,0 +1,45 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark import SparkContext
+# $example on$
+from pyspark.ml.feature import IndexToString, StringIndexer
+# $example off$
+from pyspark.sql import SQLContext
+
+if __name__ == "__main__":
+ sc = SparkContext(appName="IndexToStringExample")
+ sqlContext = SQLContext(sc)
+
+ # $example on$
+ df = sqlContext.createDataFrame(
+ [(0, "a"), (1, "b"), (2, "c"), (3, "a"), (4, "a"), (5, "c")],
+ ["id", "category"])
+
+ stringIndexer = StringIndexer(inputCol="category", outputCol="categoryIndex")
+ model = stringIndexer.fit(df)
+ indexed = model.transform(df)
+
+ converter = IndexToString(inputCol="categoryIndex", outputCol="originalCategory")
+ converted = converter.transform(indexed)
+
+ converted.select("id", "originalCategory").show()
+ # $example off$
+
+ sc.stop()
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/IndexToStringExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/IndexToStringExample.scala
new file mode 100644
index 0000000000..52537e5bb5
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/IndexToStringExample.scala
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkConf, SparkContext}
+// $example on$
+import org.apache.spark.ml.feature.{StringIndexer, IndexToString}
+// $example off$
+
+object IndexToStringExample {
+ def main(args: Array[String]) {
+ val conf = new SparkConf().setAppName("IndexToStringExample")
+ val sc = new SparkContext(conf)
+
+ val sqlContext = SQLContext.getOrCreate(sc)
+
+ // $example on$
+ val df = sqlContext.createDataFrame(Seq(
+ (0, "a"),
+ (1, "b"),
+ (2, "c"),
+ (3, "a"),
+ (4, "a"),
+ (5, "c")
+ )).toDF("id", "category")
+
+ val indexer = new StringIndexer()
+ .setInputCol("category")
+ .setOutputCol("categoryIndex")
+ .fit(df)
+ val indexed = indexer.transform(df)
+
+ val converter = new IndexToString()
+ .setInputCol("categoryIndex")
+ .setOutputCol("originalCategory")
+
+ val converted = converter.transform(indexed)
+ converted.select("id", "originalCategory").show()
+ // $example off$
+ sc.stop()
+ }
+}
+// scalastyle:on println