aboutsummaryrefslogtreecommitdiff
path: root/examples/src/main/python/ml
diff options
context:
space:
mode:
authorCheng Lian <lian@databricks.com>2015-12-08 19:18:59 +0800
committerCheng Lian <lian@databricks.com>2015-12-08 19:18:59 +0800
commitda2012a0e152aa078bdd19a5c7f91786a2dd7016 (patch)
tree1f00975b821733925effbaf0090a40795c50d669 /examples/src/main/python/ml
parent037b7e76a7f8b59e031873a768d81417dd180472 (diff)
downloadspark-da2012a0e152aa078bdd19a5c7f91786a2dd7016.tar.gz
spark-da2012a0e152aa078bdd19a5c7f91786a2dd7016.tar.bz2
spark-da2012a0e152aa078bdd19a5c7f91786a2dd7016.zip
[SPARK-11551][DOC][EXAMPLE] Revert PR #10002
This reverts PR #10002, commit 78209b0ccaf3f22b5e2345dfb2b98edfdb746819. The original PR wasn't tested on Jenkins before being merged. Author: Cheng Lian <lian@databricks.com> Closes #10200 from liancheng/revert-pr-10002.
Diffstat (limited to 'examples/src/main/python/ml')
-rw-r--r--examples/src/main/python/ml/binarizer_example.py43
-rw-r--r--examples/src/main/python/ml/bucketizer_example.py42
-rw-r--r--examples/src/main/python/ml/elementwise_product_example.py39
-rw-r--r--examples/src/main/python/ml/n_gram_example.py42
-rw-r--r--examples/src/main/python/ml/normalizer_example.py41
-rw-r--r--examples/src/main/python/ml/onehot_encoder_example.py47
-rw-r--r--examples/src/main/python/ml/pca_example.py42
-rw-r--r--examples/src/main/python/ml/polynomial_expansion_example.py43
-rw-r--r--examples/src/main/python/ml/rformula_example.py44
-rw-r--r--examples/src/main/python/ml/standard_scaler_example.py42
-rw-r--r--examples/src/main/python/ml/stopwords_remover_example.py40
-rw-r--r--examples/src/main/python/ml/string_indexer_example.py39
-rw-r--r--examples/src/main/python/ml/tokenizer_example.py44
-rw-r--r--examples/src/main/python/ml/vector_assembler_example.py42
-rw-r--r--examples/src/main/python/ml/vector_indexer_example.py39
15 files changed, 0 insertions, 629 deletions
diff --git a/examples/src/main/python/ml/binarizer_example.py b/examples/src/main/python/ml/binarizer_example.py
deleted file mode 100644
index 960ad208be..0000000000
--- a/examples/src/main/python/ml/binarizer_example.py
+++ /dev/null
@@ -1,43 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.ml.feature import Binarizer
-# $example off$
-
-if __name__ == "__main__":
- sc = SparkContext(appName="BinarizerExample")
- sqlContext = SQLContext(sc)
-
- # $example on$
- continuousDataFrame = sqlContext.createDataFrame([
- (0, 0.1),
- (1, 0.8),
- (2, 0.2)
- ], ["label", "feature"])
- binarizer = Binarizer(threshold=0.5, inputCol="feature", outputCol="binarized_feature")
- binarizedDataFrame = binarizer.transform(continuousDataFrame)
- binarizedFeatures = binarizedDataFrame.select("binarized_feature")
- for binarized_feature, in binarizedFeatures.collect():
- print(binarized_feature)
- # $example off$
-
- sc.stop()
diff --git a/examples/src/main/python/ml/bucketizer_example.py b/examples/src/main/python/ml/bucketizer_example.py
deleted file mode 100644
index a12750aa92..0000000000
--- a/examples/src/main/python/ml/bucketizer_example.py
+++ /dev/null
@@ -1,42 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.ml.feature import Bucketizer
-# $example off$
-
-if __name__ == "__main__":
- sc = SparkContext(appName="BucketizerExample")
- sqlContext = SQLContext(sc)
-
- # $example on$
- splits = [-float("inf"), -0.5, 0.0, 0.5, float("inf")]
-
- data = [(-0.5,), (-0.3,), (0.0,), (0.2,)]
- dataFrame = sqlContext.createDataFrame(data, ["features"])
-
- bucketizer = Bucketizer(splits=splits, inputCol="features", outputCol="bucketedFeatures")
-
- # Transform original data into its bucket index.
- bucketedData = bucketizer.transform(dataFrame)
- # $example off$
-
- sc.stop()
diff --git a/examples/src/main/python/ml/elementwise_product_example.py b/examples/src/main/python/ml/elementwise_product_example.py
deleted file mode 100644
index c85cb0d895..0000000000
--- a/examples/src/main/python/ml/elementwise_product_example.py
+++ /dev/null
@@ -1,39 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.ml.feature import ElementwiseProduct
-from pyspark.mllib.linalg import Vectors
-# $example off$
-
-if __name__ == "__main__":
- sc = SparkContext(appName="ElementwiseProductExample")
- sqlContext = SQLContext(sc)
-
- # $example on$
- data = [(Vectors.dense([1.0, 2.0, 3.0]),), (Vectors.dense([4.0, 5.0, 6.0]),)]
- df = sqlContext.createDataFrame(data, ["vector"])
- transformer = ElementwiseProduct(scalingVec=Vectors.dense([0.0, 1.0, 2.0]),
- inputCol="vector", outputCol="transformedVector")
- transformer.transform(df).show()
- # $example off$
-
- sc.stop()
diff --git a/examples/src/main/python/ml/n_gram_example.py b/examples/src/main/python/ml/n_gram_example.py
deleted file mode 100644
index f2d85f53e7..0000000000
--- a/examples/src/main/python/ml/n_gram_example.py
+++ /dev/null
@@ -1,42 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.ml.feature import NGram
-# $example off$
-
-if __name__ == "__main__":
- sc = SparkContext(appName="NGramExample")
- sqlContext = SQLContext(sc)
-
- # $example on$
- wordDataFrame = sqlContext.createDataFrame([
- (0, ["Hi", "I", "heard", "about", "Spark"]),
- (1, ["I", "wish", "Java", "could", "use", "case", "classes"]),
- (2, ["Logistic", "regression", "models", "are", "neat"])
- ], ["label", "words"])
- ngram = NGram(inputCol="words", outputCol="ngrams")
- ngramDataFrame = ngram.transform(wordDataFrame)
- for ngrams_label in ngramDataFrame.select("ngrams", "label").take(3):
- print(ngrams_label)
- # $example off$
-
- sc.stop()
diff --git a/examples/src/main/python/ml/normalizer_example.py b/examples/src/main/python/ml/normalizer_example.py
deleted file mode 100644
index 833d93e976..0000000000
--- a/examples/src/main/python/ml/normalizer_example.py
+++ /dev/null
@@ -1,41 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.ml.feature import Normalizer
-# $example off$
-
-if __name__ == "__main__":
- sc = SparkContext(appName="NormalizerExample")
- sqlContext = SQLContext(sc)
-
- # $example on$
- dataFrame = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
-
- # Normalize each Vector using $L^1$ norm.
- normalizer = Normalizer(inputCol="features", outputCol="normFeatures", p=1.0)
- l1NormData = normalizer.transform(dataFrame)
-
- # Normalize each Vector using $L^\infty$ norm.
- lInfNormData = normalizer.transform(dataFrame, {normalizer.p: float("inf")})
- # $example off$
-
- sc.stop()
diff --git a/examples/src/main/python/ml/onehot_encoder_example.py b/examples/src/main/python/ml/onehot_encoder_example.py
deleted file mode 100644
index 7529dfd092..0000000000
--- a/examples/src/main/python/ml/onehot_encoder_example.py
+++ /dev/null
@@ -1,47 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.ml.feature import OneHotEncoder, StringIndexer
-# $example off$
-
-if __name__ == "__main__":
- sc = SparkContext(appName="OneHotEncoderExample")
- sqlContext = SQLContext(sc)
-
- # $example on$
- df = sqlContext.createDataFrame([
- (0, "a"),
- (1, "b"),
- (2, "c"),
- (3, "a"),
- (4, "a"),
- (5, "c")
- ], ["id", "category"])
-
- stringIndexer = StringIndexer(inputCol="category", outputCol="categoryIndex")
- model = stringIndexer.fit(df)
- indexed = model.transform(df)
- encoder = OneHotEncoder(dropLast=False, inputCol="categoryIndex", outputCol="categoryVec")
- encoded = encoder.transform(indexed)
- # $example off$
-
- sc.stop()
diff --git a/examples/src/main/python/ml/pca_example.py b/examples/src/main/python/ml/pca_example.py
deleted file mode 100644
index 8b66140a40..0000000000
--- a/examples/src/main/python/ml/pca_example.py
+++ /dev/null
@@ -1,42 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.ml.feature import PCA
-from pyspark.mllib.linalg import Vectors
-# $example off$
-
-if __name__ == "__main__":
- sc = SparkContext(appName="PCAExample")
- sqlContext = SQLContext(sc)
-
- # $example on$
- data = [(Vectors.sparse(5, [(1, 1.0), (3, 7.0)]),),
- (Vectors.dense([2.0, 0.0, 3.0, 4.0, 5.0]),),
- (Vectors.dense([4.0, 0.0, 0.0, 6.0, 7.0]),)]
- df = sqlContext.createDataFrame(data,["features"])
- pca = PCA(k=3, inputCol="features", outputCol="pcaFeatures")
- model = pca.fit(df)
- result = model.transform(df).select("pcaFeatures")
- result.show(truncate=False)
- # $example off$
-
- sc.stop()
diff --git a/examples/src/main/python/ml/polynomial_expansion_example.py b/examples/src/main/python/ml/polynomial_expansion_example.py
deleted file mode 100644
index 030a6132a4..0000000000
--- a/examples/src/main/python/ml/polynomial_expansion_example.py
+++ /dev/null
@@ -1,43 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.ml.feature import PolynomialExpansion
-from pyspark.mllib.linalg import Vectors
-# $example off$
-
-if __name__ == "__main__":
- sc = SparkContext(appName="PolynomialExpansionExample")
- sqlContext = SQLContext(sc)
-
- # $example on$
- df = sqlContext.createDataFrame(
- [(Vectors.dense([-2.0, 2.3]), ),
- (Vectors.dense([0.0, 0.0]), ),
- (Vectors.dense([0.6, -1.1]), )],
- ["features"])
- px = PolynomialExpansion(degree=2, inputCol="features", outputCol="polyFeatures")
- polyDF = px.transform(df)
- for expanded in polyDF.select("polyFeatures").take(3):
- print(expanded)
- # $example off$
-
- sc.stop()
diff --git a/examples/src/main/python/ml/rformula_example.py b/examples/src/main/python/ml/rformula_example.py
deleted file mode 100644
index b544a14700..0000000000
--- a/examples/src/main/python/ml/rformula_example.py
+++ /dev/null
@@ -1,44 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.ml.feature import RFormula
-# $example off$
-
-if __name__ == "__main__":
- sc = SparkContext(appName="RFormulaExample")
- sqlContext = SQLContext(sc)
-
- # $example on$
- dataset = sqlContext.createDataFrame(
- [(7, "US", 18, 1.0),
- (8, "CA", 12, 0.0),
- (9, "NZ", 15, 0.0)],
- ["id", "country", "hour", "clicked"])
- formula = RFormula(
- formula="clicked ~ country + hour",
- featuresCol="features",
- labelCol="label")
- output = formula.fit(dataset).transform(dataset)
- output.select("features", "label").show()
- # $example off$
-
- sc.stop()
diff --git a/examples/src/main/python/ml/standard_scaler_example.py b/examples/src/main/python/ml/standard_scaler_example.py
deleted file mode 100644
index 139acecbfb..0000000000
--- a/examples/src/main/python/ml/standard_scaler_example.py
+++ /dev/null
@@ -1,42 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.ml.feature import StandardScaler
-# $example off$
-
-if __name__ == "__main__":
- sc = SparkContext(appName="StandardScalerExample")
- sqlContext = SQLContext(sc)
-
- # $example on$
- dataFrame = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
- scaler = StandardScaler(inputCol="features", outputCol="scaledFeatures",
- withStd=True, withMean=False)
-
- # Compute summary statistics by fitting the StandardScaler
- scalerModel = scaler.fit(dataFrame)
-
- # Normalize each feature to have unit standard deviation.
- scaledData = scalerModel.transform(dataFrame)
- # $example off$
-
- sc.stop()
diff --git a/examples/src/main/python/ml/stopwords_remover_example.py b/examples/src/main/python/ml/stopwords_remover_example.py
deleted file mode 100644
index 01f94af8ca..0000000000
--- a/examples/src/main/python/ml/stopwords_remover_example.py
+++ /dev/null
@@ -1,40 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.ml.feature import StopWordsRemover
-# $example off$
-
-if __name__ == "__main__":
- sc = SparkContext(appName="StopWordsRemoverExample")
- sqlContext = SQLContext(sc)
-
- # $example on$
- sentenceData = sqlContext.createDataFrame([
- (0, ["I", "saw", "the", "red", "baloon"]),
- (1, ["Mary", "had", "a", "little", "lamb"])
- ], ["label", "raw"])
-
- remover = StopWordsRemover(inputCol="raw", outputCol="filtered")
- remover.transform(sentenceData).show(truncate=False)
- # $example off$
-
- sc.stop()
diff --git a/examples/src/main/python/ml/string_indexer_example.py b/examples/src/main/python/ml/string_indexer_example.py
deleted file mode 100644
index 58a8cb5d56..0000000000
--- a/examples/src/main/python/ml/string_indexer_example.py
+++ /dev/null
@@ -1,39 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.ml.feature import StringIndexer
-# $example off$
-
-if __name__ == "__main__":
- sc = SparkContext(appName="StringIndexerExample")
- sqlContext = SQLContext(sc)
-
- # $example on$
- df = sqlContext.createDataFrame(
- [(0, "a"), (1, "b"), (2, "c"), (3, "a"), (4, "a"), (5, "c")],
- ["id", "category"])
- indexer = StringIndexer(inputCol="category", outputCol="categoryIndex")
- indexed = indexer.fit(df).transform(df)
- indexed.show()
- # $example off$
-
- sc.stop()
diff --git a/examples/src/main/python/ml/tokenizer_example.py b/examples/src/main/python/ml/tokenizer_example.py
deleted file mode 100644
index ce9b225be5..0000000000
--- a/examples/src/main/python/ml/tokenizer_example.py
+++ /dev/null
@@ -1,44 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.ml.feature import Tokenizer, RegexTokenizer
-# $example off$
-
-if __name__ == "__main__":
- sc = SparkContext(appName="TokenizerExample")
- sqlContext = SQLContext(sc)
-
- # $example on$
- sentenceDataFrame = sqlContext.createDataFrame([
- (0, "Hi I heard about Spark"),
- (1, "I wish Java could use case classes"),
- (2, "Logistic,regression,models,are,neat")
- ], ["label", "sentence"])
- tokenizer = Tokenizer(inputCol="sentence", outputCol="words")
- wordsDataFrame = tokenizer.transform(sentenceDataFrame)
- for words_label in wordsDataFrame.select("words", "label").take(3):
- print(words_label)
- regexTokenizer = RegexTokenizer(inputCol="sentence", outputCol="words", pattern="\\W")
- # alternatively, pattern="\\w+", gaps(False)
- # $example off$
-
- sc.stop()
diff --git a/examples/src/main/python/ml/vector_assembler_example.py b/examples/src/main/python/ml/vector_assembler_example.py
deleted file mode 100644
index 04f64839f1..0000000000
--- a/examples/src/main/python/ml/vector_assembler_example.py
+++ /dev/null
@@ -1,42 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.mllib.linalg import Vectors
-from pyspark.ml.feature import VectorAssembler
-# $example off$
-
-if __name__ == "__main__":
- sc = SparkContext(appName="VectorAssemblerExample")
- sqlContext = SQLContext(sc)
-
- # $example on$
- dataset = sqlContext.createDataFrame(
- [(0, 18, 1.0, Vectors.dense([0.0, 10.0, 0.5]), 1.0)],
- ["id", "hour", "mobile", "userFeatures", "clicked"])
- assembler = VectorAssembler(
- inputCols=["hour", "mobile", "userFeatures"],
- outputCol="features")
- output = assembler.transform(dataset)
- print(output.select("features", "clicked").first())
- # $example off$
-
- sc.stop()
diff --git a/examples/src/main/python/ml/vector_indexer_example.py b/examples/src/main/python/ml/vector_indexer_example.py
deleted file mode 100644
index cc00d1454f..0000000000
--- a/examples/src/main/python/ml/vector_indexer_example.py
+++ /dev/null
@@ -1,39 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.ml.feature import VectorIndexer
-# $example off$
-
-if __name__ == "__main__":
- sc = SparkContext(appName="VectorIndexerExample")
- sqlContext = SQLContext(sc)
-
- # $example on$
- data = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
- indexer = VectorIndexer(inputCol="features", outputCol="indexed", maxCategories=10)
- indexerModel = indexer.fit(data)
-
- # Create new column "indexed" with categorical values transformed to indices
- indexedData = indexerModel.transform(data)
- # $example off$
-
- sc.stop()