aboutsummaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
authorCheng Lian <lian@databricks.com>2015-12-08 19:18:59 +0800
committerCheng Lian <lian@databricks.com>2015-12-08 19:18:59 +0800
commitda2012a0e152aa078bdd19a5c7f91786a2dd7016 (patch)
tree1f00975b821733925effbaf0090a40795c50d669 /examples
parent037b7e76a7f8b59e031873a768d81417dd180472 (diff)
downloadspark-da2012a0e152aa078bdd19a5c7f91786a2dd7016.tar.gz
spark-da2012a0e152aa078bdd19a5c7f91786a2dd7016.tar.bz2
spark-da2012a0e152aa078bdd19a5c7f91786a2dd7016.zip
[SPARK-11551][DOC][EXAMPLE] Revert PR #10002
This reverts PR #10002, commit 78209b0ccaf3f22b5e2345dfb2b98edfdb746819. The original PR wasn't tested on Jenkins before being merged. Author: Cheng Lian <lian@databricks.com> Closes #10200 from liancheng/revert-pr-10002.
Diffstat (limited to 'examples')
-rw-r--r--examples/src/main/java/org/apache/spark/examples/ml/JavaBinarizerExample.java68
-rw-r--r--examples/src/main/java/org/apache/spark/examples/ml/JavaBucketizerExample.java70
-rw-r--r--examples/src/main/java/org/apache/spark/examples/ml/JavaDCTExample.java65
-rw-r--r--examples/src/main/java/org/apache/spark/examples/ml/JavaElementwiseProductExample.java75
-rw-r--r--examples/src/main/java/org/apache/spark/examples/ml/JavaMinMaxScalerExample.java50
-rw-r--r--examples/src/main/java/org/apache/spark/examples/ml/JavaNGramExample.java71
-rw-r--r--examples/src/main/java/org/apache/spark/examples/ml/JavaNormalizerExample.java52
-rw-r--r--examples/src/main/java/org/apache/spark/examples/ml/JavaOneHotEncoderExample.java77
-rw-r--r--examples/src/main/java/org/apache/spark/examples/ml/JavaPCAExample.java71
-rw-r--r--examples/src/main/java/org/apache/spark/examples/ml/JavaPolynomialExpansionExample.java71
-rw-r--r--examples/src/main/java/org/apache/spark/examples/ml/JavaRFormulaExample.java69
-rw-r--r--examples/src/main/java/org/apache/spark/examples/ml/JavaStandardScalerExample.java53
-rw-r--r--examples/src/main/java/org/apache/spark/examples/ml/JavaStopWordsRemoverExample.java65
-rw-r--r--examples/src/main/java/org/apache/spark/examples/ml/JavaStringIndexerExample.java66
-rw-r--r--examples/src/main/java/org/apache/spark/examples/ml/JavaTokenizerExample.java75
-rw-r--r--examples/src/main/java/org/apache/spark/examples/ml/JavaVectorAssemblerExample.java67
-rw-r--r--examples/src/main/java/org/apache/spark/examples/ml/JavaVectorIndexerExample.java60
-rw-r--r--examples/src/main/java/org/apache/spark/examples/ml/JavaVectorSlicerExample.java73
-rw-r--r--examples/src/main/python/ml/binarizer_example.py43
-rw-r--r--examples/src/main/python/ml/bucketizer_example.py42
-rw-r--r--examples/src/main/python/ml/elementwise_product_example.py39
-rw-r--r--examples/src/main/python/ml/n_gram_example.py42
-rw-r--r--examples/src/main/python/ml/normalizer_example.py41
-rw-r--r--examples/src/main/python/ml/onehot_encoder_example.py47
-rw-r--r--examples/src/main/python/ml/pca_example.py42
-rw-r--r--examples/src/main/python/ml/polynomial_expansion_example.py43
-rw-r--r--examples/src/main/python/ml/rformula_example.py44
-rw-r--r--examples/src/main/python/ml/standard_scaler_example.py42
-rw-r--r--examples/src/main/python/ml/stopwords_remover_example.py40
-rw-r--r--examples/src/main/python/ml/string_indexer_example.py39
-rw-r--r--examples/src/main/python/ml/tokenizer_example.py44
-rw-r--r--examples/src/main/python/ml/vector_assembler_example.py42
-rw-r--r--examples/src/main/python/ml/vector_indexer_example.py39
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/BinarizerExample.scala48
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/BucketizerExample.scala51
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/DCTExample.scala54
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/ElementWiseProductExample.scala53
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/MinMaxScalerExample.scala49
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/NGramExample.scala47
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/NormalizerExample.scala50
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/OneHotEncoderExample.scala58
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/PCAExample.scala54
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/PolynomialExpansionExample.scala53
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/RFormulaExample.scala49
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/StandardScalerExample.scala51
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/StopWordsRemoverExample.scala48
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/StringIndexerExample.scala49
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/TokenizerExample.scala54
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/VectorAssemblerExample.scala49
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/VectorIndexerExample.scala53
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/VectorSlicerExample.scala58
51 files changed, 0 insertions, 2755 deletions
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaBinarizerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaBinarizerExample.java
deleted file mode 100644
index 9698cac504..0000000000
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaBinarizerExample.java
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.examples.ml;
-
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.sql.SQLContext;
-
-// $example on$
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.ml.feature.Binarizer;
-import org.apache.spark.sql.DataFrame;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.types.DataTypes;
-import org.apache.spark.sql.types.Metadata;
-import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.StructType;
-// $example off$
-
-public class JavaBinarizerExample {
- public static void main(String[] args) {
- SparkConf conf = new SparkConf().setAppName("JavaBinarizerExample");
- JavaSparkContext jsc = new JavaSparkContext(conf);
- SQLContext jsql = new SQLContext(jsc);
-
- // $example on$
- JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
- RowFactory.create(0, 0.1),
- RowFactory.create(1, 0.8),
- RowFactory.create(2, 0.2)
- ));
- StructType schema = new StructType(new StructField[]{
- new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
- new StructField("feature", DataTypes.DoubleType, false, Metadata.empty())
- });
- DataFrame continuousDataFrame = jsql.createDataFrame(jrdd, schema);
- Binarizer binarizer = new Binarizer()
- .setInputCol("feature")
- .setOutputCol("binarized_feature")
- .setThreshold(0.5);
- DataFrame binarizedDataFrame = binarizer.transform(continuousDataFrame);
- DataFrame binarizedFeatures = binarizedDataFrame.select("binarized_feature");
- for (Row r : binarizedFeatures.collect()) {
- Double binarized_value = r.getDouble(0);
- System.out.println(binarized_value);
- }
- // $example off$
- jsc.stop();
- }
-} \ No newline at end of file
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaBucketizerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaBucketizerExample.java
deleted file mode 100644
index b06a23e76d..0000000000
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaBucketizerExample.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.examples.ml;
-
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.sql.SQLContext;
-
-// $example on$
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.ml.feature.Bucketizer;
-import org.apache.spark.sql.DataFrame;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.types.DataTypes;
-import org.apache.spark.sql.types.Metadata;
-import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.StructType;
-// $example off$
-
-public class JavaBucketizerExample {
- public static void main(String[] args) {
- SparkConf conf = new SparkConf().setAppName("JavaBucketizerExample");
- JavaSparkContext jsc = new JavaSparkContext(conf);
- SQLContext jsql = new SQLContext(jsc);
-
- // $example on$
- double[] splits = {Double.NEGATIVE_INFINITY, -0.5, 0.0, 0.5, Double.POSITIVE_INFINITY};
-
- JavaRDD<Row> data = jsc.parallelize(Arrays.asList(
- RowFactory.create(-0.5),
- RowFactory.create(-0.3),
- RowFactory.create(0.0),
- RowFactory.create(0.2)
- ));
- StructType schema = new StructType(new StructField[]{
- new StructField("features", DataTypes.DoubleType, false, Metadata.empty())
- });
- DataFrame dataFrame = jsql.createDataFrame(data, schema);
-
- Bucketizer bucketizer = new Bucketizer()
- .setInputCol("features")
- .setOutputCol("bucketedFeatures")
- .setSplits(splits);
-
- // Transform original data into its bucket index.
- DataFrame bucketedData = bucketizer.transform(dataFrame);
- // $example off$
- jsc.stop();
- }
-}
-
-
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaDCTExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaDCTExample.java
deleted file mode 100644
index 35c0d534a4..0000000000
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaDCTExample.java
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.examples.ml;
-
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.sql.SQLContext;
-
-// $example on$
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.ml.feature.DCT;
-import org.apache.spark.mllib.linalg.VectorUDT;
-import org.apache.spark.mllib.linalg.Vectors;
-import org.apache.spark.sql.DataFrame;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.types.Metadata;
-import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.StructType;
-// $example off$
-
-public class JavaDCTExample {
- public static void main(String[] args) {
- SparkConf conf = new SparkConf().setAppName("JavaDCTExample");
- JavaSparkContext jsc = new JavaSparkContext(conf);
- SQLContext jsql = new SQLContext(jsc);
-
- // $example on$
- JavaRDD<Row> data = jsc.parallelize(Arrays.asList(
- RowFactory.create(Vectors.dense(0.0, 1.0, -2.0, 3.0)),
- RowFactory.create(Vectors.dense(-1.0, 2.0, 4.0, -7.0)),
- RowFactory.create(Vectors.dense(14.0, -2.0, -5.0, 1.0))
- ));
- StructType schema = new StructType(new StructField[]{
- new StructField("features", new VectorUDT(), false, Metadata.empty()),
- });
- DataFrame df = jsql.createDataFrame(data, schema);
- DCT dct = new DCT()
- .setInputCol("features")
- .setOutputCol("featuresDCT")
- .setInverse(false);
- DataFrame dctDf = dct.transform(df);
- dctDf.select("featuresDCT").show(3);
- // $example off$
- jsc.stop();
- }
-}
-
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaElementwiseProductExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaElementwiseProductExample.java
deleted file mode 100644
index 2898accec6..0000000000
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaElementwiseProductExample.java
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.examples.ml;
-
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.sql.SQLContext;
-
-// $example on$
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.ml.feature.ElementwiseProduct;
-import org.apache.spark.mllib.linalg.Vector;
-import org.apache.spark.mllib.linalg.VectorUDT;
-import org.apache.spark.mllib.linalg.Vectors;
-import org.apache.spark.sql.DataFrame;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.types.DataTypes;
-import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.StructType;
-// $example off$
-
-public class JavaElementwiseProductExample {
- public static void main(String[] args) {
- SparkConf conf = new SparkConf().setAppName("JavaElementwiseProductExample");
- JavaSparkContext jsc = new JavaSparkContext(conf);
- SQLContext sqlContext = new SQLContext(jsc);
-
- // $example on$
- // Create some vector data; also works for sparse vectors
- JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
- RowFactory.create("a", Vectors.dense(1.0, 2.0, 3.0)),
- RowFactory.create("b", Vectors.dense(4.0, 5.0, 6.0))
- ));
-
- List<StructField> fields = new ArrayList<StructField>(2);
- fields.add(DataTypes.createStructField("id", DataTypes.StringType, false));
- fields.add(DataTypes.createStructField("vector", new VectorUDT(), false));
-
- StructType schema = DataTypes.createStructType(fields);
-
- DataFrame dataFrame = sqlContext.createDataFrame(jrdd, schema);
-
- Vector transformingVector = Vectors.dense(0.0, 1.0, 2.0);
-
- ElementwiseProduct transformer = new ElementwiseProduct()
- .setScalingVec(transformingVector)
- .setInputCol("vector")
- .setOutputCol("transformedVector");
-
- // Batch transform the vectors to create new column:
- transformer.transform(dataFrame).show();
- // $example off$
- jsc.stop();
- }
-} \ No newline at end of file
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaMinMaxScalerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaMinMaxScalerExample.java
deleted file mode 100644
index 138b3ab6ab..0000000000
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaMinMaxScalerExample.java
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.examples.ml;
-
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.sql.SQLContext;
-
-// $example on$
-import org.apache.spark.ml.feature.MinMaxScaler;
-import org.apache.spark.ml.feature.MinMaxScalerModel;
-import org.apache.spark.sql.DataFrame;
-// $example off$
-
-public class JavaMinMaxScalerExample {
- public static void main(String[] args) {
- SparkConf conf = new SparkConf().setAppName("JaveMinMaxScalerExample");
- JavaSparkContext jsc = new JavaSparkContext(conf);
- SQLContext jsql = new SQLContext(jsc);
-
- // $example on$
- DataFrame dataFrame = jsql.read().format("libsvm").load("data/mllib/sample_libsvm_data.txt");
- MinMaxScaler scaler = new MinMaxScaler()
- .setInputCol("features")
- .setOutputCol("scaledFeatures");
-
- // Compute summary statistics and generate MinMaxScalerModel
- MinMaxScalerModel scalerModel = scaler.fit(dataFrame);
-
- // rescale each feature to range [min, max].
- DataFrame scaledData = scalerModel.transform(dataFrame);
- // $example off$
- jsc.stop();
- }
-} \ No newline at end of file
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaNGramExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaNGramExample.java
deleted file mode 100644
index 8fd75ed8b5..0000000000
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaNGramExample.java
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.examples.ml;
-
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.sql.SQLContext;
-
-// $example on$
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.ml.feature.NGram;
-import org.apache.spark.sql.DataFrame;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.types.DataTypes;
-import org.apache.spark.sql.types.Metadata;
-import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.StructType;
-// $example off$
-
-public class JavaNGramExample {
- public static void main(String[] args) {
- SparkConf conf = new SparkConf().setAppName("JavaNGramExample");
- JavaSparkContext jsc = new JavaSparkContext(conf);
- SQLContext sqlContext = new SQLContext(jsc);
-
- // $example on$
- JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
- RowFactory.create(0.0, Arrays.asList("Hi", "I", "heard", "about", "Spark")),
- RowFactory.create(1.0, Arrays.asList("I", "wish", "Java", "could", "use", "case", "classes")),
- RowFactory.create(2.0, Arrays.asList("Logistic", "regression", "models", "are", "neat"))
- ));
-
- StructType schema = new StructType(new StructField[]{
- new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
- new StructField(
- "words", DataTypes.createArrayType(DataTypes.StringType), false, Metadata.empty())
- });
-
- DataFrame wordDataFrame = sqlContext.createDataFrame(jrdd, schema);
-
- NGram ngramTransformer = new NGram().setInputCol("words").setOutputCol("ngrams");
-
- DataFrame ngramDataFrame = ngramTransformer.transform(wordDataFrame);
-
- for (Row r : ngramDataFrame.select("ngrams", "label").take(3)) {
- java.util.List<String> ngrams = r.getList(0);
- for (String ngram : ngrams) System.out.print(ngram + " --- ");
- System.out.println();
- }
- // $example off$
- jsc.stop();
- }
-} \ No newline at end of file
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaNormalizerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaNormalizerExample.java
deleted file mode 100644
index 6283a355e1..0000000000
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaNormalizerExample.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.examples.ml;
-
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.sql.SQLContext;
-
-// $example on$
-import org.apache.spark.ml.feature.Normalizer;
-import org.apache.spark.sql.DataFrame;
-// $example off$
-
-public class JavaNormalizerExample {
- public static void main(String[] args) {
- SparkConf conf = new SparkConf().setAppName("JavaNormalizerExample");
- JavaSparkContext jsc = new JavaSparkContext(conf);
- SQLContext jsql = new SQLContext(jsc);
-
- // $example on$
- DataFrame dataFrame = jsql.read().format("libsvm").load("data/mllib/sample_libsvm_data.txt");
-
- // Normalize each Vector using $L^1$ norm.
- Normalizer normalizer = new Normalizer()
- .setInputCol("features")
- .setOutputCol("normFeatures")
- .setP(1.0);
-
- DataFrame l1NormData = normalizer.transform(dataFrame);
-
- // Normalize each Vector using $L^\infty$ norm.
- DataFrame lInfNormData =
- normalizer.transform(dataFrame, normalizer.p().w(Double.POSITIVE_INFINITY));
- // $example off$
- jsc.stop();
- }
-} \ No newline at end of file
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaOneHotEncoderExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaOneHotEncoderExample.java
deleted file mode 100644
index 172a9cc6fe..0000000000
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaOneHotEncoderExample.java
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.examples.ml;
-
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.sql.SQLContext;
-
-// $example on$
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.ml.feature.OneHotEncoder;
-import org.apache.spark.ml.feature.StringIndexer;
-import org.apache.spark.ml.feature.StringIndexerModel;
-import org.apache.spark.sql.DataFrame;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.types.DataTypes;
-import org.apache.spark.sql.types.Metadata;
-import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.StructType;
-// $example off$
-
-public class JavaOneHotEncoderExample {
- public static void main(String[] args) {
- SparkConf conf = new SparkConf().setAppName("JavaOneHotEncoderExample");
- JavaSparkContext jsc = new JavaSparkContext(conf);
- SQLContext sqlContext = new SQLContext(jsc);
-
- // $example on$
- JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
- RowFactory.create(0, "a"),
- RowFactory.create(1, "b"),
- RowFactory.create(2, "c"),
- RowFactory.create(3, "a"),
- RowFactory.create(4, "a"),
- RowFactory.create(5, "c")
- ));
-
- StructType schema = new StructType(new StructField[]{
- new StructField("id", DataTypes.DoubleType, false, Metadata.empty()),
- new StructField("category", DataTypes.StringType, false, Metadata.empty())
- });
-
- DataFrame df = sqlContext.createDataFrame(jrdd, schema);
-
- StringIndexerModel indexer = new StringIndexer()
- .setInputCol("category")
- .setOutputCol("categoryIndex")
- .fit(df);
- DataFrame indexed = indexer.transform(df);
-
- OneHotEncoder encoder = new OneHotEncoder()
- .setInputCol("categoryIndex")
- .setOutputCol("categoryVec");
- DataFrame encoded = encoder.transform(indexed);
- // $example off$
- jsc.stop();
- }
-}
-
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaPCAExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaPCAExample.java
deleted file mode 100644
index 8282fab084..0000000000
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaPCAExample.java
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.examples.ml;
-
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.sql.SQLContext;
-
-// $example on$
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.ml.feature.PCA;
-import org.apache.spark.ml.feature.PCAModel;
-import org.apache.spark.mllib.linalg.VectorUDT;
-import org.apache.spark.mllib.linalg.Vectors;
-import org.apache.spark.sql.DataFrame;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.types.Metadata;
-import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.StructType;
-// $example off$
-
-public class JavaPCAExample {
- public static void main(String[] args) {
- SparkConf conf = new SparkConf().setAppName("JavaPCAExample");
- JavaSparkContext jsc = new JavaSparkContext(conf);
- SQLContext jsql = new SQLContext(jsc);
-
- // $example on$
- JavaRDD<Row> data = jsc.parallelize(Arrays.asList(
- RowFactory.create(Vectors.sparse(5, new int[]{1, 3}, new double[]{1.0, 7.0})),
- RowFactory.create(Vectors.dense(2.0, 0.0, 3.0, 4.0, 5.0)),
- RowFactory.create(Vectors.dense(4.0, 0.0, 0.0, 6.0, 7.0))
- ));
-
- StructType schema = new StructType(new StructField[]{
- new StructField("features", new VectorUDT(), false, Metadata.empty()),
- });
-
- DataFrame df = jsql.createDataFrame(data, schema);
-
- PCAModel pca = new PCA()
- .setInputCol("features")
- .setOutputCol("pcaFeatures")
- .setK(3)
- .fit(df);
-
- DataFrame result = pca.transform(df).select("pcaFeatures");
- result.show();
- // $example off$
- jsc.stop();
- }
-}
-
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaPolynomialExpansionExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaPolynomialExpansionExample.java
deleted file mode 100644
index 668f71e640..0000000000
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaPolynomialExpansionExample.java
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.examples.ml;
-
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.sql.SQLContext;
-
-// $example on$
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.ml.feature.PolynomialExpansion;
-import org.apache.spark.mllib.linalg.VectorUDT;
-import org.apache.spark.mllib.linalg.Vectors;
-import org.apache.spark.sql.DataFrame;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.types.Metadata;
-import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.StructType;
-// $example off$
-
-public class JavaPolynomialExpansionExample {
- public static void main(String[] args) {
- SparkConf conf = new SparkConf().setAppName("JavaPolynomialExpansionExample");
- JavaSparkContext jsc = new JavaSparkContext(conf);
- SQLContext jsql = new SQLContext(jsc);
-
- // $example on$
- PolynomialExpansion polyExpansion = new PolynomialExpansion()
- .setInputCol("features")
- .setOutputCol("polyFeatures")
- .setDegree(3);
-
- JavaRDD<Row> data = jsc.parallelize(Arrays.asList(
- RowFactory.create(Vectors.dense(-2.0, 2.3)),
- RowFactory.create(Vectors.dense(0.0, 0.0)),
- RowFactory.create(Vectors.dense(0.6, -1.1))
- ));
-
- StructType schema = new StructType(new StructField[]{
- new StructField("features", new VectorUDT(), false, Metadata.empty()),
- });
-
- DataFrame df = jsql.createDataFrame(data, schema);
- DataFrame polyDF = polyExpansion.transform(df);
-
- Row[] row = polyDF.select("polyFeatures").take(3);
- for (Row r : row) {
- System.out.println(r.get(0));
- }
- // $example off$
- jsc.stop();
- }
-} \ No newline at end of file
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaRFormulaExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaRFormulaExample.java
deleted file mode 100644
index 1e1062b541..0000000000
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaRFormulaExample.java
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.examples.ml;
-
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.sql.SQLContext;
-
-// $example on$
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.ml.feature.RFormula;
-import org.apache.spark.sql.DataFrame;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.StructType;
-
-import static org.apache.spark.sql.types.DataTypes.*;
-// $example off$
-
-public class JavaRFormulaExample {
- public static void main(String[] args) {
- SparkConf conf = new SparkConf().setAppName("JavaRFormulaExample");
- JavaSparkContext jsc = new JavaSparkContext(conf);
- SQLContext sqlContext = new SQLContext(jsc);
-
- // $example on$
- StructType schema = createStructType(new StructField[]{
- createStructField("id", IntegerType, false),
- createStructField("country", StringType, false),
- createStructField("hour", IntegerType, false),
- createStructField("clicked", DoubleType, false)
- });
-
- JavaRDD<Row> rdd = jsc.parallelize(Arrays.asList(
- RowFactory.create(7, "US", 18, 1.0),
- RowFactory.create(8, "CA", 12, 0.0),
- RowFactory.create(9, "NZ", 15, 0.0)
- ));
-
- DataFrame dataset = sqlContext.createDataFrame(rdd, schema);
- RFormula formula = new RFormula()
- .setFormula("clicked ~ country + hour")
- .setFeaturesCol("features")
- .setLabelCol("label");
- DataFrame output = formula.fit(dataset).transform(dataset);
- output.select("features", "label").show();
- // $example off$
- jsc.stop();
- }
-}
-
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaStandardScalerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaStandardScalerExample.java
deleted file mode 100644
index 0cbdc97e8a..0000000000
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaStandardScalerExample.java
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.examples.ml;
-
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.sql.SQLContext;
-
-// $example on$
-import org.apache.spark.ml.feature.StandardScaler;
-import org.apache.spark.ml.feature.StandardScalerModel;
-import org.apache.spark.sql.DataFrame;
-// $example off$
-
-public class JavaStandardScalerExample {
- public static void main(String[] args) {
- SparkConf conf = new SparkConf().setAppName("JavaStandardScalerExample");
- JavaSparkContext jsc = new JavaSparkContext(conf);
- SQLContext jsql = new SQLContext(jsc);
-
- // $example on$
- DataFrame dataFrame = jsql.read().format("libsvm").load("data/mllib/sample_libsvm_data.txt");
-
- StandardScaler scaler = new StandardScaler()
- .setInputCol("features")
- .setOutputCol("scaledFeatures")
- .setWithStd(true)
- .setWithMean(false);
-
- // Compute summary statistics by fitting the StandardScaler
- StandardScalerModel scalerModel = scaler.fit(dataFrame);
-
- // Normalize each feature to have unit standard deviation.
- DataFrame scaledData = scalerModel.transform(dataFrame);
- // $example off$
- jsc.stop();
- }
-} \ No newline at end of file
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaStopWordsRemoverExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaStopWordsRemoverExample.java
deleted file mode 100644
index b6b201c6b6..0000000000
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaStopWordsRemoverExample.java
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.examples.ml;
-
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.sql.SQLContext;
-
-// $example on$
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.ml.feature.StopWordsRemover;
-import org.apache.spark.sql.DataFrame;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.types.DataTypes;
-import org.apache.spark.sql.types.Metadata;
-import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.StructType;
-// $example off$
-
-public class JavaStopWordsRemoverExample {
-
- public static void main(String[] args) {
- SparkConf conf = new SparkConf().setAppName("JavaStopWordsRemoverExample");
- JavaSparkContext jsc = new JavaSparkContext(conf);
- SQLContext jsql = new SQLContext(jsc);
-
- // $example on$
- StopWordsRemover remover = new StopWordsRemover()
- .setInputCol("raw")
- .setOutputCol("filtered");
-
- JavaRDD<Row> rdd = jsc.parallelize(Arrays.asList(
- RowFactory.create(Arrays.asList("I", "saw", "the", "red", "baloon")),
- RowFactory.create(Arrays.asList("Mary", "had", "a", "little", "lamb"))
- ));
-
- StructType schema = new StructType(new StructField[]{
- new StructField(
- "raw", DataTypes.createArrayType(DataTypes.StringType), false, Metadata.empty())
- });
-
- DataFrame dataset = jsql.createDataFrame(rdd, schema);
- remover.transform(dataset).show();
- // $example off$
- jsc.stop();
- }
-}
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaStringIndexerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaStringIndexerExample.java
deleted file mode 100644
index 05d12c1e70..0000000000
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaStringIndexerExample.java
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.examples.ml;
-
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.sql.SQLContext;
-
-// $example on$
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.ml.feature.StringIndexer;
-import org.apache.spark.sql.DataFrame;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.StructType;
-
-import static org.apache.spark.sql.types.DataTypes.*;
-// $example off$
-
-public class JavaStringIndexerExample {
- public static void main(String[] args) {
- SparkConf conf = new SparkConf().setAppName("JavaStringIndexerExample");
- JavaSparkContext jsc = new JavaSparkContext(conf);
- SQLContext sqlContext = new SQLContext(jsc);
-
- // $example on$
- JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
- RowFactory.create(0, "a"),
- RowFactory.create(1, "b"),
- RowFactory.create(2, "c"),
- RowFactory.create(3, "a"),
- RowFactory.create(4, "a"),
- RowFactory.create(5, "c")
- ));
- StructType schema = new StructType(new StructField[]{
- createStructField("id", IntegerType, false),
- createStructField("category", StringType, false)
- });
- DataFrame df = sqlContext.createDataFrame(jrdd, schema);
- StringIndexer indexer = new StringIndexer()
- .setInputCol("category")
- .setOutputCol("categoryIndex");
- DataFrame indexed = indexer.fit(df).transform(df);
- indexed.show();
- // $example off$
- jsc.stop();
- }
-} \ No newline at end of file
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaTokenizerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaTokenizerExample.java
deleted file mode 100644
index 617dc3f66e..0000000000
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaTokenizerExample.java
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.examples.ml;
-
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.sql.SQLContext;
-
-// $example on$
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.ml.feature.RegexTokenizer;
-import org.apache.spark.ml.feature.Tokenizer;
-import org.apache.spark.sql.DataFrame;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.types.DataTypes;
-import org.apache.spark.sql.types.Metadata;
-import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.StructType;
-// $example off$
-
-public class JavaTokenizerExample {
- public static void main(String[] args) {
- SparkConf conf = new SparkConf().setAppName("JavaTokenizerExample");
- JavaSparkContext jsc = new JavaSparkContext(conf);
- SQLContext sqlContext = new SQLContext(jsc);
-
- // $example on$
- JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
- RowFactory.create(0, "Hi I heard about Spark"),
- RowFactory.create(1, "I wish Java could use case classes"),
- RowFactory.create(2, "Logistic,regression,models,are,neat")
- ));
-
- StructType schema = new StructType(new StructField[]{
- new StructField("label", DataTypes.IntegerType, false, Metadata.empty()),
- new StructField("sentence", DataTypes.StringType, false, Metadata.empty())
- });
-
- DataFrame sentenceDataFrame = sqlContext.createDataFrame(jrdd, schema);
-
- Tokenizer tokenizer = new Tokenizer().setInputCol("sentence").setOutputCol("words");
-
- DataFrame wordsDataFrame = tokenizer.transform(sentenceDataFrame);
- for (Row r : wordsDataFrame.select("words", "label"). take(3)) {
- java.util.List<String> words = r.getList(0);
- for (String word : words) System.out.print(word + " ");
- System.out.println();
- }
-
- RegexTokenizer regexTokenizer = new RegexTokenizer()
- .setInputCol("sentence")
- .setOutputCol("words")
- .setPattern("\\W"); // alternatively .setPattern("\\w+").setGaps(false);
- // $example off$
- jsc.stop();
- }
-}
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorAssemblerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorAssemblerExample.java
deleted file mode 100644
index 7e230b5897..0000000000
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorAssemblerExample.java
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.examples.ml;
-
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.sql.SQLContext;
-
-// $example on$
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.ml.feature.VectorAssembler;
-import org.apache.spark.mllib.linalg.VectorUDT;
-import org.apache.spark.mllib.linalg.Vectors;
-import org.apache.spark.sql.DataFrame;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.types.*;
-
-import static org.apache.spark.sql.types.DataTypes.*;
-// $example off$
-
-public class JavaVectorAssemblerExample {
- public static void main(String[] args) {
- SparkConf conf = new SparkConf().setAppName("JavaVectorAssemblerExample");
- JavaSparkContext jsc = new JavaSparkContext(conf);
- SQLContext sqlContext = new SQLContext(jsc);
-
- // $example on$
- StructType schema = createStructType(new StructField[]{
- createStructField("id", IntegerType, false),
- createStructField("hour", IntegerType, false),
- createStructField("mobile", DoubleType, false),
- createStructField("userFeatures", new VectorUDT(), false),
- createStructField("clicked", DoubleType, false)
- });
- Row row = RowFactory.create(0, 18, 1.0, Vectors.dense(0.0, 10.0, 0.5), 1.0);
- JavaRDD<Row> rdd = jsc.parallelize(Arrays.asList(row));
- DataFrame dataset = sqlContext.createDataFrame(rdd, schema);
-
- VectorAssembler assembler = new VectorAssembler()
- .setInputCols(new String[]{"hour", "mobile", "userFeatures"})
- .setOutputCol("features");
-
- DataFrame output = assembler.transform(dataset);
- System.out.println(output.select("features", "clicked").first());
- // $example off$
- jsc.stop();
- }
-}
-
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorIndexerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorIndexerExample.java
deleted file mode 100644
index 06b4bf6bf8..0000000000
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorIndexerExample.java
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.examples.ml;
-
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.sql.SQLContext;
-
-// $example on$
-import java.util.Map;
-
-import org.apache.spark.ml.feature.VectorIndexer;
-import org.apache.spark.ml.feature.VectorIndexerModel;
-import org.apache.spark.sql.DataFrame;
-// $example off$
-
-public class JavaVectorIndexerExample {
- public static void main(String[] args) {
- SparkConf conf = new SparkConf().setAppName("JavaVectorIndexerExample");
- JavaSparkContext jsc = new JavaSparkContext(conf);
- SQLContext jsql = new SQLContext(jsc);
-
- // $example on$
- DataFrame data = jsql.read().format("libsvm").load("data/mllib/sample_libsvm_data.txt");
-
- VectorIndexer indexer = new VectorIndexer()
- .setInputCol("features")
- .setOutputCol("indexed")
- .setMaxCategories(10);
- VectorIndexerModel indexerModel = indexer.fit(data);
-
- Map<Integer, Map<Double, Integer>> categoryMaps = indexerModel.javaCategoryMaps();
- System.out.print("Chose " + categoryMaps.size() + " categorical features:");
-
- for (Integer feature : categoryMaps.keySet()) {
- System.out.print(" " + feature);
- }
- System.out.println();
-
- // Create new column "indexed" with categorical values transformed to indices
- DataFrame indexedData = indexerModel.transform(data);
- // $example off$
- jsc.stop();
- }
-} \ No newline at end of file
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorSlicerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorSlicerExample.java
deleted file mode 100644
index 4d5cb04ff5..0000000000
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorSlicerExample.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.examples.ml;
-
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.sql.SQLContext;
-
-// $example on$
-import com.google.common.collect.Lists;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.ml.attribute.Attribute;
-import org.apache.spark.ml.attribute.AttributeGroup;
-import org.apache.spark.ml.attribute.NumericAttribute;
-import org.apache.spark.ml.feature.VectorSlicer;
-import org.apache.spark.mllib.linalg.Vectors;
-import org.apache.spark.sql.DataFrame;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.types.*;
-// $example off$
-
-public class JavaVectorSlicerExample {
- public static void main(String[] args) {
- SparkConf conf = new SparkConf().setAppName("JavaVectorSlicerExample");
- JavaSparkContext jsc = new JavaSparkContext(conf);
- SQLContext jsql = new SQLContext(jsc);
-
- // $example on$
- Attribute[] attrs = new Attribute[]{
- NumericAttribute.defaultAttr().withName("f1"),
- NumericAttribute.defaultAttr().withName("f2"),
- NumericAttribute.defaultAttr().withName("f3")
- };
- AttributeGroup group = new AttributeGroup("userFeatures", attrs);
-
- JavaRDD<Row> jrdd = jsc.parallelize(Lists.newArrayList(
- RowFactory.create(Vectors.sparse(3, new int[]{0, 1}, new double[]{-2.0, 2.3})),
- RowFactory.create(Vectors.dense(-2.0, 2.3, 0.0))
- ));
-
- DataFrame dataset = jsql.createDataFrame(jrdd, (new StructType()).add(group.toStructField()));
-
- VectorSlicer vectorSlicer = new VectorSlicer()
- .setInputCol("userFeatures").setOutputCol("features");
-
- vectorSlicer.setIndices(new int[]{1}).setNames(new String[]{"f3"});
- // or slicer.setIndices(new int[]{1, 2}), or slicer.setNames(new String[]{"f2", "f3"})
-
- DataFrame output = vectorSlicer.transform(dataset);
-
- System.out.println(output.select("userFeatures", "features").first());
- // $example off$
- jsc.stop();
- }
-}
-
diff --git a/examples/src/main/python/ml/binarizer_example.py b/examples/src/main/python/ml/binarizer_example.py
deleted file mode 100644
index 960ad208be..0000000000
--- a/examples/src/main/python/ml/binarizer_example.py
+++ /dev/null
@@ -1,43 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.ml.feature import Binarizer
-# $example off$
-
-if __name__ == "__main__":
- sc = SparkContext(appName="BinarizerExample")
- sqlContext = SQLContext(sc)
-
- # $example on$
- continuousDataFrame = sqlContext.createDataFrame([
- (0, 0.1),
- (1, 0.8),
- (2, 0.2)
- ], ["label", "feature"])
- binarizer = Binarizer(threshold=0.5, inputCol="feature", outputCol="binarized_feature")
- binarizedDataFrame = binarizer.transform(continuousDataFrame)
- binarizedFeatures = binarizedDataFrame.select("binarized_feature")
- for binarized_feature, in binarizedFeatures.collect():
- print(binarized_feature)
- # $example off$
-
- sc.stop()
diff --git a/examples/src/main/python/ml/bucketizer_example.py b/examples/src/main/python/ml/bucketizer_example.py
deleted file mode 100644
index a12750aa92..0000000000
--- a/examples/src/main/python/ml/bucketizer_example.py
+++ /dev/null
@@ -1,42 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.ml.feature import Bucketizer
-# $example off$
-
-if __name__ == "__main__":
- sc = SparkContext(appName="BucketizerExample")
- sqlContext = SQLContext(sc)
-
- # $example on$
- splits = [-float("inf"), -0.5, 0.0, 0.5, float("inf")]
-
- data = [(-0.5,), (-0.3,), (0.0,), (0.2,)]
- dataFrame = sqlContext.createDataFrame(data, ["features"])
-
- bucketizer = Bucketizer(splits=splits, inputCol="features", outputCol="bucketedFeatures")
-
- # Transform original data into its bucket index.
- bucketedData = bucketizer.transform(dataFrame)
- # $example off$
-
- sc.stop()
diff --git a/examples/src/main/python/ml/elementwise_product_example.py b/examples/src/main/python/ml/elementwise_product_example.py
deleted file mode 100644
index c85cb0d895..0000000000
--- a/examples/src/main/python/ml/elementwise_product_example.py
+++ /dev/null
@@ -1,39 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.ml.feature import ElementwiseProduct
-from pyspark.mllib.linalg import Vectors
-# $example off$
-
-if __name__ == "__main__":
- sc = SparkContext(appName="ElementwiseProductExample")
- sqlContext = SQLContext(sc)
-
- # $example on$
- data = [(Vectors.dense([1.0, 2.0, 3.0]),), (Vectors.dense([4.0, 5.0, 6.0]),)]
- df = sqlContext.createDataFrame(data, ["vector"])
- transformer = ElementwiseProduct(scalingVec=Vectors.dense([0.0, 1.0, 2.0]),
- inputCol="vector", outputCol="transformedVector")
- transformer.transform(df).show()
- # $example off$
-
- sc.stop()
diff --git a/examples/src/main/python/ml/n_gram_example.py b/examples/src/main/python/ml/n_gram_example.py
deleted file mode 100644
index f2d85f53e7..0000000000
--- a/examples/src/main/python/ml/n_gram_example.py
+++ /dev/null
@@ -1,42 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.ml.feature import NGram
-# $example off$
-
-if __name__ == "__main__":
- sc = SparkContext(appName="NGramExample")
- sqlContext = SQLContext(sc)
-
- # $example on$
- wordDataFrame = sqlContext.createDataFrame([
- (0, ["Hi", "I", "heard", "about", "Spark"]),
- (1, ["I", "wish", "Java", "could", "use", "case", "classes"]),
- (2, ["Logistic", "regression", "models", "are", "neat"])
- ], ["label", "words"])
- ngram = NGram(inputCol="words", outputCol="ngrams")
- ngramDataFrame = ngram.transform(wordDataFrame)
- for ngrams_label in ngramDataFrame.select("ngrams", "label").take(3):
- print(ngrams_label)
- # $example off$
-
- sc.stop()
diff --git a/examples/src/main/python/ml/normalizer_example.py b/examples/src/main/python/ml/normalizer_example.py
deleted file mode 100644
index 833d93e976..0000000000
--- a/examples/src/main/python/ml/normalizer_example.py
+++ /dev/null
@@ -1,41 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.ml.feature import Normalizer
-# $example off$
-
-if __name__ == "__main__":
- sc = SparkContext(appName="NormalizerExample")
- sqlContext = SQLContext(sc)
-
- # $example on$
- dataFrame = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
-
- # Normalize each Vector using $L^1$ norm.
- normalizer = Normalizer(inputCol="features", outputCol="normFeatures", p=1.0)
- l1NormData = normalizer.transform(dataFrame)
-
- # Normalize each Vector using $L^\infty$ norm.
- lInfNormData = normalizer.transform(dataFrame, {normalizer.p: float("inf")})
- # $example off$
-
- sc.stop()
diff --git a/examples/src/main/python/ml/onehot_encoder_example.py b/examples/src/main/python/ml/onehot_encoder_example.py
deleted file mode 100644
index 7529dfd092..0000000000
--- a/examples/src/main/python/ml/onehot_encoder_example.py
+++ /dev/null
@@ -1,47 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.ml.feature import OneHotEncoder, StringIndexer
-# $example off$
-
-if __name__ == "__main__":
- sc = SparkContext(appName="OneHotEncoderExample")
- sqlContext = SQLContext(sc)
-
- # $example on$
- df = sqlContext.createDataFrame([
- (0, "a"),
- (1, "b"),
- (2, "c"),
- (3, "a"),
- (4, "a"),
- (5, "c")
- ], ["id", "category"])
-
- stringIndexer = StringIndexer(inputCol="category", outputCol="categoryIndex")
- model = stringIndexer.fit(df)
- indexed = model.transform(df)
- encoder = OneHotEncoder(dropLast=False, inputCol="categoryIndex", outputCol="categoryVec")
- encoded = encoder.transform(indexed)
- # $example off$
-
- sc.stop()
diff --git a/examples/src/main/python/ml/pca_example.py b/examples/src/main/python/ml/pca_example.py
deleted file mode 100644
index 8b66140a40..0000000000
--- a/examples/src/main/python/ml/pca_example.py
+++ /dev/null
@@ -1,42 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.ml.feature import PCA
-from pyspark.mllib.linalg import Vectors
-# $example off$
-
-if __name__ == "__main__":
- sc = SparkContext(appName="PCAExample")
- sqlContext = SQLContext(sc)
-
- # $example on$
- data = [(Vectors.sparse(5, [(1, 1.0), (3, 7.0)]),),
- (Vectors.dense([2.0, 0.0, 3.0, 4.0, 5.0]),),
- (Vectors.dense([4.0, 0.0, 0.0, 6.0, 7.0]),)]
- df = sqlContext.createDataFrame(data,["features"])
- pca = PCA(k=3, inputCol="features", outputCol="pcaFeatures")
- model = pca.fit(df)
- result = model.transform(df).select("pcaFeatures")
- result.show(truncate=False)
- # $example off$
-
- sc.stop()
diff --git a/examples/src/main/python/ml/polynomial_expansion_example.py b/examples/src/main/python/ml/polynomial_expansion_example.py
deleted file mode 100644
index 030a6132a4..0000000000
--- a/examples/src/main/python/ml/polynomial_expansion_example.py
+++ /dev/null
@@ -1,43 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.ml.feature import PolynomialExpansion
-from pyspark.mllib.linalg import Vectors
-# $example off$
-
-if __name__ == "__main__":
- sc = SparkContext(appName="PolynomialExpansionExample")
- sqlContext = SQLContext(sc)
-
- # $example on$
- df = sqlContext.createDataFrame(
- [(Vectors.dense([-2.0, 2.3]), ),
- (Vectors.dense([0.0, 0.0]), ),
- (Vectors.dense([0.6, -1.1]), )],
- ["features"])
- px = PolynomialExpansion(degree=2, inputCol="features", outputCol="polyFeatures")
- polyDF = px.transform(df)
- for expanded in polyDF.select("polyFeatures").take(3):
- print(expanded)
- # $example off$
-
- sc.stop()
diff --git a/examples/src/main/python/ml/rformula_example.py b/examples/src/main/python/ml/rformula_example.py
deleted file mode 100644
index b544a14700..0000000000
--- a/examples/src/main/python/ml/rformula_example.py
+++ /dev/null
@@ -1,44 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.ml.feature import RFormula
-# $example off$
-
-if __name__ == "__main__":
- sc = SparkContext(appName="RFormulaExample")
- sqlContext = SQLContext(sc)
-
- # $example on$
- dataset = sqlContext.createDataFrame(
- [(7, "US", 18, 1.0),
- (8, "CA", 12, 0.0),
- (9, "NZ", 15, 0.0)],
- ["id", "country", "hour", "clicked"])
- formula = RFormula(
- formula="clicked ~ country + hour",
- featuresCol="features",
- labelCol="label")
- output = formula.fit(dataset).transform(dataset)
- output.select("features", "label").show()
- # $example off$
-
- sc.stop()
diff --git a/examples/src/main/python/ml/standard_scaler_example.py b/examples/src/main/python/ml/standard_scaler_example.py
deleted file mode 100644
index 139acecbfb..0000000000
--- a/examples/src/main/python/ml/standard_scaler_example.py
+++ /dev/null
@@ -1,42 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.ml.feature import StandardScaler
-# $example off$
-
-if __name__ == "__main__":
- sc = SparkContext(appName="StandardScalerExample")
- sqlContext = SQLContext(sc)
-
- # $example on$
- dataFrame = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
- scaler = StandardScaler(inputCol="features", outputCol="scaledFeatures",
- withStd=True, withMean=False)
-
- # Compute summary statistics by fitting the StandardScaler
- scalerModel = scaler.fit(dataFrame)
-
- # Normalize each feature to have unit standard deviation.
- scaledData = scalerModel.transform(dataFrame)
- # $example off$
-
- sc.stop()
diff --git a/examples/src/main/python/ml/stopwords_remover_example.py b/examples/src/main/python/ml/stopwords_remover_example.py
deleted file mode 100644
index 01f94af8ca..0000000000
--- a/examples/src/main/python/ml/stopwords_remover_example.py
+++ /dev/null
@@ -1,40 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.ml.feature import StopWordsRemover
-# $example off$
-
-if __name__ == "__main__":
- sc = SparkContext(appName="StopWordsRemoverExample")
- sqlContext = SQLContext(sc)
-
- # $example on$
- sentenceData = sqlContext.createDataFrame([
- (0, ["I", "saw", "the", "red", "baloon"]),
- (1, ["Mary", "had", "a", "little", "lamb"])
- ], ["label", "raw"])
-
- remover = StopWordsRemover(inputCol="raw", outputCol="filtered")
- remover.transform(sentenceData).show(truncate=False)
- # $example off$
-
- sc.stop()
diff --git a/examples/src/main/python/ml/string_indexer_example.py b/examples/src/main/python/ml/string_indexer_example.py
deleted file mode 100644
index 58a8cb5d56..0000000000
--- a/examples/src/main/python/ml/string_indexer_example.py
+++ /dev/null
@@ -1,39 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.ml.feature import StringIndexer
-# $example off$
-
-if __name__ == "__main__":
- sc = SparkContext(appName="StringIndexerExample")
- sqlContext = SQLContext(sc)
-
- # $example on$
- df = sqlContext.createDataFrame(
- [(0, "a"), (1, "b"), (2, "c"), (3, "a"), (4, "a"), (5, "c")],
- ["id", "category"])
- indexer = StringIndexer(inputCol="category", outputCol="categoryIndex")
- indexed = indexer.fit(df).transform(df)
- indexed.show()
- # $example off$
-
- sc.stop()
diff --git a/examples/src/main/python/ml/tokenizer_example.py b/examples/src/main/python/ml/tokenizer_example.py
deleted file mode 100644
index ce9b225be5..0000000000
--- a/examples/src/main/python/ml/tokenizer_example.py
+++ /dev/null
@@ -1,44 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.ml.feature import Tokenizer, RegexTokenizer
-# $example off$
-
-if __name__ == "__main__":
- sc = SparkContext(appName="TokenizerExample")
- sqlContext = SQLContext(sc)
-
- # $example on$
- sentenceDataFrame = sqlContext.createDataFrame([
- (0, "Hi I heard about Spark"),
- (1, "I wish Java could use case classes"),
- (2, "Logistic,regression,models,are,neat")
- ], ["label", "sentence"])
- tokenizer = Tokenizer(inputCol="sentence", outputCol="words")
- wordsDataFrame = tokenizer.transform(sentenceDataFrame)
- for words_label in wordsDataFrame.select("words", "label").take(3):
- print(words_label)
- regexTokenizer = RegexTokenizer(inputCol="sentence", outputCol="words", pattern="\\W")
- # alternatively, pattern="\\w+", gaps(False)
- # $example off$
-
- sc.stop()
diff --git a/examples/src/main/python/ml/vector_assembler_example.py b/examples/src/main/python/ml/vector_assembler_example.py
deleted file mode 100644
index 04f64839f1..0000000000
--- a/examples/src/main/python/ml/vector_assembler_example.py
+++ /dev/null
@@ -1,42 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.mllib.linalg import Vectors
-from pyspark.ml.feature import VectorAssembler
-# $example off$
-
-if __name__ == "__main__":
- sc = SparkContext(appName="VectorAssemblerExample")
- sqlContext = SQLContext(sc)
-
- # $example on$
- dataset = sqlContext.createDataFrame(
- [(0, 18, 1.0, Vectors.dense([0.0, 10.0, 0.5]), 1.0)],
- ["id", "hour", "mobile", "userFeatures", "clicked"])
- assembler = VectorAssembler(
- inputCols=["hour", "mobile", "userFeatures"],
- outputCol="features")
- output = assembler.transform(dataset)
- print(output.select("features", "clicked").first())
- # $example off$
-
- sc.stop()
diff --git a/examples/src/main/python/ml/vector_indexer_example.py b/examples/src/main/python/ml/vector_indexer_example.py
deleted file mode 100644
index cc00d1454f..0000000000
--- a/examples/src/main/python/ml/vector_indexer_example.py
+++ /dev/null
@@ -1,39 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.ml.feature import VectorIndexer
-# $example off$
-
-if __name__ == "__main__":
- sc = SparkContext(appName="VectorIndexerExample")
- sqlContext = SQLContext(sc)
-
- # $example on$
- data = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
- indexer = VectorIndexer(inputCol="features", outputCol="indexed", maxCategories=10)
- indexerModel = indexer.fit(data)
-
- # Create new column "indexed" with categorical values transformed to indices
- indexedData = indexerModel.transform(data)
- # $example off$
-
- sc.stop()
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/BinarizerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/BinarizerExample.scala
deleted file mode 100644
index e724aa5872..0000000000
--- a/examples/src/main/scala/org/apache/spark/examples/ml/BinarizerExample.scala
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// scalastyle:off println
-package org.apache.spark.examples.ml
-
-// $example on$
-import org.apache.spark.ml.feature.Binarizer
-// $example off$
-import org.apache.spark.sql.{DataFrame, SQLContext}
-import org.apache.spark.{SparkConf, SparkContext}
-
-object BinarizerExample {
- def main(args: Array[String]): Unit = {
- val conf = new SparkConf().setAppName("BinarizerExample")
- val sc = new SparkContext(conf)
- val sqlContext = new SQLContext(sc)
- // $example on$
- val data = Array((0, 0.1), (1, 0.8), (2, 0.2))
- val dataFrame: DataFrame = sqlContext.createDataFrame(data).toDF("label", "feature")
-
- val binarizer: Binarizer = new Binarizer()
- .setInputCol("feature")
- .setOutputCol("binarized_feature")
- .setThreshold(0.5)
-
- val binarizedDataFrame = binarizer.transform(dataFrame)
- val binarizedFeatures = binarizedDataFrame.select("binarized_feature")
- binarizedFeatures.collect().foreach(println)
- // $example off$
- sc.stop()
- }
-}
-// scalastyle:on println
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/BucketizerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/BucketizerExample.scala
deleted file mode 100644
index 30c2776d39..0000000000
--- a/examples/src/main/scala/org/apache/spark/examples/ml/BucketizerExample.scala
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// scalastyle:off println
-package org.apache.spark.examples.ml
-
-// $example on$
-import org.apache.spark.ml.feature.Bucketizer
-// $example off$
-import org.apache.spark.sql.SQLContext
-import org.apache.spark.{SparkConf, SparkContext}
-
-object BucketizerExample {
- def main(args: Array[String]): Unit = {
- val conf = new SparkConf().setAppName("BucketizerExample")
- val sc = new SparkContext(conf)
- val sqlContext = new SQLContext(sc)
-
- // $example on$
- val splits = Array(Double.NegativeInfinity, -0.5, 0.0, 0.5, Double.PositiveInfinity)
-
- val data = Array(-0.5, -0.3, 0.0, 0.2)
- val dataFrame = sqlContext.createDataFrame(data.map(Tuple1.apply)).toDF("features")
-
- val bucketizer = new Bucketizer()
- .setInputCol("features")
- .setOutputCol("bucketedFeatures")
- .setSplits(splits)
-
- // Transform original data into its bucket index.
- val bucketedData = bucketizer.transform(dataFrame)
- // $example off$
- sc.stop()
- }
-}
-// scalastyle:on println
-
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DCTExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DCTExample.scala
deleted file mode 100644
index 314c2c28a2..0000000000
--- a/examples/src/main/scala/org/apache/spark/examples/ml/DCTExample.scala
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// scalastyle:off println
-package org.apache.spark.examples.ml
-
-// $example on$
-import org.apache.spark.ml.feature.DCT
-import org.apache.spark.mllib.linalg.Vectors
-// $example off$
-import org.apache.spark.sql.SQLContext
-import org.apache.spark.{SparkConf, SparkContext}
-
-object DCTExample {
- def main(args: Array[String]): Unit = {
- val conf = new SparkConf().setAppName("DCTExample")
- val sc = new SparkContext(conf)
- val sqlContext = new SQLContext(sc)
-
- // $example on$
- val data = Seq(
- Vectors.dense(0.0, 1.0, -2.0, 3.0),
- Vectors.dense(-1.0, 2.0, 4.0, -7.0),
- Vectors.dense(14.0, -2.0, -5.0, 1.0))
-
- val df = sqlContext.createDataFrame(data.map(Tuple1.apply)).toDF("features")
-
- val dct = new DCT()
- .setInputCol("features")
- .setOutputCol("featuresDCT")
- .setInverse(false)
-
- val dctDf = dct.transform(df)
- dctDf.select("featuresDCT").show(3)
- // $example off$
- sc.stop()
- }
-}
-// scalastyle:on println
-
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/ElementWiseProductExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/ElementWiseProductExample.scala
deleted file mode 100644
index ac50bb7b2b..0000000000
--- a/examples/src/main/scala/org/apache/spark/examples/ml/ElementWiseProductExample.scala
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// scalastyle:off println
-package org.apache.spark.examples.ml
-
-// $example on$
-import org.apache.spark.ml.feature.ElementwiseProduct
-import org.apache.spark.mllib.linalg.Vectors
-// $example off$
-import org.apache.spark.sql.SQLContext
-import org.apache.spark.{SparkConf, SparkContext}
-
-object ElementwiseProductExample {
- def main(args: Array[String]): Unit = {
- val conf = new SparkConf().setAppName("ElementwiseProductExample")
- val sc = new SparkContext(conf)
- val sqlContext = new SQLContext(sc)
-
- // $example on$
- // Create some vector data; also works for sparse vectors
- val dataFrame = sqlContext.createDataFrame(Seq(
- ("a", Vectors.dense(1.0, 2.0, 3.0)),
- ("b", Vectors.dense(4.0, 5.0, 6.0)))).toDF("id", "vector")
-
- val transformingVector = Vectors.dense(0.0, 1.0, 2.0)
- val transformer = new ElementwiseProduct()
- .setScalingVec(transformingVector)
- .setInputCol("vector")
- .setOutputCol("transformedVector")
-
- // Batch transform the vectors to create new column:
- transformer.transform(dataFrame).show()
- // $example off$
- sc.stop()
- }
-}
-// scalastyle:on println
-
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/MinMaxScalerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/MinMaxScalerExample.scala
deleted file mode 100644
index dac3679a5b..0000000000
--- a/examples/src/main/scala/org/apache/spark/examples/ml/MinMaxScalerExample.scala
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// scalastyle:off println
-package org.apache.spark.examples.ml
-
-// $example on$
-import org.apache.spark.ml.feature.MinMaxScaler
-// $example off$
-import org.apache.spark.sql.SQLContext
-import org.apache.spark.{SparkConf, SparkContext}
-
-object MinMaxScalerExample {
- def main(args: Array[String]): Unit = {
- val conf = new SparkConf().setAppName("MinMaxScalerExample")
- val sc = new SparkContext(conf)
- val sqlContext = new SQLContext(sc)
-
- // $example on$
- val dataFrame = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
-
- val scaler = new MinMaxScaler()
- .setInputCol("features")
- .setOutputCol("scaledFeatures")
-
- // Compute summary statistics and generate MinMaxScalerModel
- val scalerModel = scaler.fit(dataFrame)
-
- // rescale each feature to range [min, max].
- val scaledData = scalerModel.transform(dataFrame)
- // $example off$
- sc.stop()
- }
-}
-// scalastyle:on println
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/NGramExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/NGramExample.scala
deleted file mode 100644
index 8a85f71b56..0000000000
--- a/examples/src/main/scala/org/apache/spark/examples/ml/NGramExample.scala
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// scalastyle:off println
-package org.apache.spark.examples.ml
-
-// $example on$
-import org.apache.spark.ml.feature.NGram
-// $example off$
-import org.apache.spark.sql.SQLContext
-import org.apache.spark.{SparkConf, SparkContext}
-
-object NGramExample {
- def main(args: Array[String]): Unit = {
- val conf = new SparkConf().setAppName("NGramExample")
- val sc = new SparkContext(conf)
- val sqlContext = new SQLContext(sc)
-
- // $example on$
- val wordDataFrame = sqlContext.createDataFrame(Seq(
- (0, Array("Hi", "I", "heard", "about", "Spark")),
- (1, Array("I", "wish", "Java", "could", "use", "case", "classes")),
- (2, Array("Logistic", "regression", "models", "are", "neat"))
- )).toDF("label", "words")
-
- val ngram = new NGram().setInputCol("words").setOutputCol("ngrams")
- val ngramDataFrame = ngram.transform(wordDataFrame)
- ngramDataFrame.take(3).map(_.getAs[Stream[String]]("ngrams").toList).foreach(println)
- // $example off$
- sc.stop()
- }
-}
-// scalastyle:on println
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/NormalizerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/NormalizerExample.scala
deleted file mode 100644
index 17571f0aad..0000000000
--- a/examples/src/main/scala/org/apache/spark/examples/ml/NormalizerExample.scala
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// scalastyle:off println
-package org.apache.spark.examples.ml
-
-// $example on$
-import org.apache.spark.ml.feature.Normalizer
-// $example off$
-import org.apache.spark.sql.SQLContext
-import org.apache.spark.{SparkConf, SparkContext}
-
-object NormalizerExample {
- def main(args: Array[String]): Unit = {
- val conf = new SparkConf().setAppName("NormalizerExample")
- val sc = new SparkContext(conf)
- val sqlContext = new SQLContext(sc)
-
- // $example on$
- val dataFrame = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
-
- // Normalize each Vector using $L^1$ norm.
- val normalizer = new Normalizer()
- .setInputCol("features")
- .setOutputCol("normFeatures")
- .setP(1.0)
-
- val l1NormData = normalizer.transform(dataFrame)
-
- // Normalize each Vector using $L^\infty$ norm.
- val lInfNormData = normalizer.transform(dataFrame, normalizer.p -> Double.PositiveInfinity)
- // $example off$
- sc.stop()
- }
-}
-// scalastyle:on println
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/OneHotEncoderExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/OneHotEncoderExample.scala
deleted file mode 100644
index 4512736943..0000000000
--- a/examples/src/main/scala/org/apache/spark/examples/ml/OneHotEncoderExample.scala
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// scalastyle:off println
-package org.apache.spark.examples.ml
-
-// $example on$
-import org.apache.spark.ml.feature.{OneHotEncoder, StringIndexer}
-// $example off$
-import org.apache.spark.sql.SQLContext
-import org.apache.spark.{SparkConf, SparkContext}
-
-object OneHotEncoderExample {
- def main(args: Array[String]): Unit = {
- val conf = new SparkConf().setAppName("OneHotEncoderExample")
- val sc = new SparkContext(conf)
- val sqlContext = new SQLContext(sc)
-
- // $example on$
- val df = sqlContext.createDataFrame(Seq(
- (0, "a"),
- (1, "b"),
- (2, "c"),
- (3, "a"),
- (4, "a"),
- (5, "c")
- )).toDF("id", "category")
-
- val indexer = new StringIndexer()
- .setInputCol("category")
- .setOutputCol("categoryIndex")
- .fit(df)
- val indexed = indexer.transform(df)
-
- val encoder = new OneHotEncoder().setInputCol("categoryIndex").
- setOutputCol("categoryVec")
- val encoded = encoder.transform(indexed)
- encoded.select("id", "categoryVec").foreach(println)
- // $example off$
- sc.stop()
- }
-}
-// scalastyle:on println
-
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/PCAExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/PCAExample.scala
deleted file mode 100644
index a18d4f3397..0000000000
--- a/examples/src/main/scala/org/apache/spark/examples/ml/PCAExample.scala
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// scalastyle:off println
-package org.apache.spark.examples.ml
-
-// $example on$
-import org.apache.spark.ml.feature.PCA
-import org.apache.spark.mllib.linalg.Vectors
-// $example off$
-import org.apache.spark.sql.SQLContext
-import org.apache.spark.{SparkConf, SparkContext}
-
-object PCAExample {
- def main(args: Array[String]): Unit = {
- val conf = new SparkConf().setAppName("PCAExample")
- val sc = new SparkContext(conf)
- val sqlContext = new SQLContext(sc)
-
- // $example on$
- val data = Array(
- Vectors.sparse(5, Seq((1, 1.0), (3, 7.0))),
- Vectors.dense(2.0, 0.0, 3.0, 4.0, 5.0),
- Vectors.dense(4.0, 0.0, 0.0, 6.0, 7.0)
- )
- val df = sqlContext.createDataFrame(data.map(Tuple1.apply)).toDF("features")
- val pca = new PCA()
- .setInputCol("features")
- .setOutputCol("pcaFeatures")
- .setK(3)
- .fit(df)
- val pcaDF = pca.transform(df)
- val result = pcaDF.select("pcaFeatures")
- result.show()
- // $example off$
- sc.stop()
- }
-}
-// scalastyle:on println
-
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/PolynomialExpansionExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/PolynomialExpansionExample.scala
deleted file mode 100644
index b8e9e6952a..0000000000
--- a/examples/src/main/scala/org/apache/spark/examples/ml/PolynomialExpansionExample.scala
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// scalastyle:off println
-package org.apache.spark.examples.ml
-
-// $example on$
-import org.apache.spark.ml.feature.PolynomialExpansion
-import org.apache.spark.mllib.linalg.Vectors
-// $example off$
-import org.apache.spark.sql.SQLContext
-import org.apache.spark.{SparkConf, SparkContext}
-
-object PolynomialExpansionExample {
- def main(args: Array[String]): Unit = {
- val conf = new SparkConf().setAppName("PolynomialExpansionExample")
- val sc = new SparkContext(conf)
- val sqlContext = new SQLContext(sc)
-
- // $example on$
- val data = Array(
- Vectors.dense(-2.0, 2.3),
- Vectors.dense(0.0, 0.0),
- Vectors.dense(0.6, -1.1)
- )
- val df = sqlContext.createDataFrame(data.map(Tuple1.apply)).toDF("features")
- val polynomialExpansion = new PolynomialExpansion()
- .setInputCol("features")
- .setOutputCol("polyFeatures")
- .setDegree(3)
- val polyDF = polynomialExpansion.transform(df)
- polyDF.select("polyFeatures").take(3).foreach(println)
- // $example off$
- sc.stop()
- }
-}
-// scalastyle:on println
-
-
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/RFormulaExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/RFormulaExample.scala
deleted file mode 100644
index 286866edea..0000000000
--- a/examples/src/main/scala/org/apache/spark/examples/ml/RFormulaExample.scala
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// scalastyle:off println
-package org.apache.spark.examples.ml
-
-// $example on$
-import org.apache.spark.ml.feature.RFormula
-// $example off$
-import org.apache.spark.sql.SQLContext
-import org.apache.spark.{SparkConf, SparkContext}
-
-object RFormulaExample {
- def main(args: Array[String]): Unit = {
- val conf = new SparkConf().setAppName("RFormulaExample")
- val sc = new SparkContext(conf)
- val sqlContext = new SQLContext(sc)
-
- // $example on$
- val dataset = sqlContext.createDataFrame(Seq(
- (7, "US", 18, 1.0),
- (8, "CA", 12, 0.0),
- (9, "NZ", 15, 0.0)
- )).toDF("id", "country", "hour", "clicked")
- val formula = new RFormula()
- .setFormula("clicked ~ country + hour")
- .setFeaturesCol("features")
- .setLabelCol("label")
- val output = formula.fit(dataset).transform(dataset)
- output.select("features", "label").show()
- // $example off$
- sc.stop()
- }
-}
-// scalastyle:on println
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/StandardScalerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/StandardScalerExample.scala
deleted file mode 100644
index 646ce0f13e..0000000000
--- a/examples/src/main/scala/org/apache/spark/examples/ml/StandardScalerExample.scala
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// scalastyle:off println
-package org.apache.spark.examples.ml
-
-// $example on$
-import org.apache.spark.ml.feature.StandardScaler
-// $example off$
-import org.apache.spark.sql.SQLContext
-import org.apache.spark.{SparkConf, SparkContext}
-
-object StandardScalerExample {
- def main(args: Array[String]): Unit = {
- val conf = new SparkConf().setAppName("StandardScalerExample")
- val sc = new SparkContext(conf)
- val sqlContext = new SQLContext(sc)
-
- // $example on$
- val dataFrame = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
-
- val scaler = new StandardScaler()
- .setInputCol("features")
- .setOutputCol("scaledFeatures")
- .setWithStd(true)
- .setWithMean(false)
-
- // Compute summary statistics by fitting the StandardScaler.
- val scalerModel = scaler.fit(dataFrame)
-
- // Normalize each feature to have unit standard deviation.
- val scaledData = scalerModel.transform(dataFrame)
- // $example off$
- sc.stop()
- }
-}
-// scalastyle:on println
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/StopWordsRemoverExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/StopWordsRemoverExample.scala
deleted file mode 100644
index 655ffce08d..0000000000
--- a/examples/src/main/scala/org/apache/spark/examples/ml/StopWordsRemoverExample.scala
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// scalastyle:off println
-package org.apache.spark.examples.ml
-
-// $example on$
-import org.apache.spark.ml.feature.StopWordsRemover
-// $example off$
-import org.apache.spark.sql.SQLContext
-import org.apache.spark.{SparkConf, SparkContext}
-
-object StopWordsRemoverExample {
- def main(args: Array[String]): Unit = {
- val conf = new SparkConf().setAppName("StopWordsRemoverExample")
- val sc = new SparkContext(conf)
- val sqlContext = new SQLContext(sc)
-
- // $example on$
- val remover = new StopWordsRemover()
- .setInputCol("raw")
- .setOutputCol("filtered")
-
- val dataSet = sqlContext.createDataFrame(Seq(
- (0, Seq("I", "saw", "the", "red", "baloon")),
- (1, Seq("Mary", "had", "a", "little", "lamb"))
- )).toDF("id", "raw")
-
- remover.transform(dataSet).show()
- // $example off$
- sc.stop()
- }
-}
-// scalastyle:on println
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/StringIndexerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/StringIndexerExample.scala
deleted file mode 100644
index 1be8a5f33f..0000000000
--- a/examples/src/main/scala/org/apache/spark/examples/ml/StringIndexerExample.scala
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// scalastyle:off println
-package org.apache.spark.examples.ml
-
-// $example on$
-import org.apache.spark.ml.feature.StringIndexer
-// $example off$
-import org.apache.spark.sql.SQLContext
-import org.apache.spark.{SparkConf, SparkContext}
-
-object StringIndexerExample {
- def main(args: Array[String]): Unit = {
- val conf = new SparkConf().setAppName("StringIndexerExample")
- val sc = new SparkContext(conf)
- val sqlContext = new SQLContext(sc)
-
- // $example on$
- val df = sqlContext.createDataFrame(
- Seq((0, "a"), (1, "b"), (2, "c"), (3, "a"), (4, "a"), (5, "c"))
- ).toDF("id", "category")
-
- val indexer = new StringIndexer()
- .setInputCol("category")
- .setOutputCol("categoryIndex")
-
- val indexed = indexer.fit(df).transform(df)
- indexed.show()
- // $example off$
- sc.stop()
- }
-}
-// scalastyle:on println
-
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/TokenizerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/TokenizerExample.scala
deleted file mode 100644
index 01e0d1388a..0000000000
--- a/examples/src/main/scala/org/apache/spark/examples/ml/TokenizerExample.scala
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// scalastyle:off println
-package org.apache.spark.examples.ml
-
-// $example on$
-import org.apache.spark.ml.feature.{RegexTokenizer, Tokenizer}
-// $example off$
-import org.apache.spark.sql.SQLContext
-import org.apache.spark.{SparkConf, SparkContext}
-
-object TokenizerExample {
- def main(args: Array[String]): Unit = {
- val conf = new SparkConf().setAppName("TokenizerExample")
- val sc = new SparkContext(conf)
- val sqlContext = new SQLContext(sc)
-
- // $example on$
- val sentenceDataFrame = sqlContext.createDataFrame(Seq(
- (0, "Hi I heard about Spark"),
- (1, "I wish Java could use case classes"),
- (2, "Logistic,regression,models,are,neat")
- )).toDF("label", "sentence")
-
- val tokenizer = new Tokenizer().setInputCol("sentence").setOutputCol("words")
- val regexTokenizer = new RegexTokenizer()
- .setInputCol("sentence")
- .setOutputCol("words")
- .setPattern("\\W") // alternatively .setPattern("\\w+").setGaps(false)
-
- val tokenized = tokenizer.transform(sentenceDataFrame)
- tokenized.select("words", "label").take(3).foreach(println)
- val regexTokenized = regexTokenizer.transform(sentenceDataFrame)
- regexTokenized.select("words", "label").take(3).foreach(println)
- // $example off$
- sc.stop()
- }
-}
-// scalastyle:on println
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/VectorAssemblerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/VectorAssemblerExample.scala
deleted file mode 100644
index d527924419..0000000000
--- a/examples/src/main/scala/org/apache/spark/examples/ml/VectorAssemblerExample.scala
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// scalastyle:off println
-package org.apache.spark.examples.ml
-
-// $example on$
-import org.apache.spark.ml.feature.VectorAssembler
-import org.apache.spark.mllib.linalg.Vectors
-// $example off$
-import org.apache.spark.sql.SQLContext
-import org.apache.spark.{SparkConf, SparkContext}
-
-object VectorAssemblerExample {
- def main(args: Array[String]): Unit = {
- val conf = new SparkConf().setAppName("VectorAssemblerExample")
- val sc = new SparkContext(conf)
- val sqlContext = new SQLContext(sc)
-
- // $example on$
- val dataset = sqlContext.createDataFrame(
- Seq((0, 18, 1.0, Vectors.dense(0.0, 10.0, 0.5), 1.0))
- ).toDF("id", "hour", "mobile", "userFeatures", "clicked")
-
- val assembler = new VectorAssembler()
- .setInputCols(Array("hour", "mobile", "userFeatures"))
- .setOutputCol("features")
-
- val output = assembler.transform(dataset)
- println(output.select("features", "clicked").first())
- // $example off$
- sc.stop()
- }
-}
-// scalastyle:on println
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/VectorIndexerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/VectorIndexerExample.scala
deleted file mode 100644
index 14279d610f..0000000000
--- a/examples/src/main/scala/org/apache/spark/examples/ml/VectorIndexerExample.scala
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// scalastyle:off println
-package org.apache.spark.examples.ml
-
-// $example on$
-import org.apache.spark.ml.feature.VectorIndexer
-// $example off$
-import org.apache.spark.sql.SQLContext
-import org.apache.spark.{SparkConf, SparkContext}
-
-object VectorIndexerExample {
- def main(args: Array[String]): Unit = {
- val conf = new SparkConf().setAppName("VectorIndexerExample")
- val sc = new SparkContext(conf)
- val sqlContext = new SQLContext(sc)
-
- // $example on$
- val data = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
-
- val indexer = new VectorIndexer()
- .setInputCol("features")
- .setOutputCol("indexed")
- .setMaxCategories(10)
-
- val indexerModel = indexer.fit(data)
-
- val categoricalFeatures: Set[Int] = indexerModel.categoryMaps.keys.toSet
- println(s"Chose ${categoricalFeatures.size} categorical features: " +
- categoricalFeatures.mkString(", "))
-
- // Create new column "indexed" with categorical values transformed to indices
- val indexedData = indexerModel.transform(data)
- // $example off$
- sc.stop()
- }
-}
-// scalastyle:on println
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/VectorSlicerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/VectorSlicerExample.scala
deleted file mode 100644
index 04f19829ef..0000000000
--- a/examples/src/main/scala/org/apache/spark/examples/ml/VectorSlicerExample.scala
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// scalastyle:off println
-package org.apache.spark.examples.ml
-
-// $example on$
-import org.apache.spark.ml.attribute.{Attribute, AttributeGroup, NumericAttribute}
-import org.apache.spark.ml.feature.VectorSlicer
-import org.apache.spark.mllib.linalg.Vectors
-import org.apache.spark.sql.Row
-import org.apache.spark.sql.types.StructType
-// $example off$
-import org.apache.spark.sql.SQLContext
-import org.apache.spark.{SparkConf, SparkContext}
-
-object VectorSlicerExample {
- def main(args: Array[String]): Unit = {
- val conf = new SparkConf().setAppName("VectorSlicerExample")
- val sc = new SparkContext(conf)
- val sqlContext = new SQLContext(sc)
-
- // $example on$
- val data = Array(Row(Vectors.dense(-2.0, 2.3, 0.0)))
-
- val defaultAttr = NumericAttribute.defaultAttr
- val attrs = Array("f1", "f2", "f3").map(defaultAttr.withName)
- val attrGroup = new AttributeGroup("userFeatures", attrs.asInstanceOf[Array[Attribute]])
-
- val dataRDD = sc.parallelize(data)
- val dataset = sqlContext.createDataFrame(dataRDD, StructType(Array(attrGroup.toStructField())))
-
- val slicer = new VectorSlicer().setInputCol("userFeatures").setOutputCol("features")
-
- slicer.setIndices(Array(1)).setNames(Array("f3"))
- // or slicer.setIndices(Array(1, 2)), or slicer.setNames(Array("f2", "f3"))
-
- val output = slicer.transform(dataset)
- println(output.select("userFeatures", "features").first())
- // $example off$
- sc.stop()
- }
-}
-// scalastyle:on println