[SPARK-11551][DOC][EXAMPLE] Revert PR #10002

This reverts PR #10002, commit 78209b0ccaf3f22b5e2345dfb2b98edfdb746819. The original PR wasn't tested on Jenkins before being merged. Author: Cheng Lian <lian@databricks.com> Closes #10200 from liancheng/revert-pr-10002.
author: Cheng Lian <lian@databricks.com> 2015-12-08 19:18:59 +0800
committer: Cheng Lian <lian@databricks.com> 2015-12-08 19:18:59 +0800
commit: da2012a0e152aa078bdd19a5c7f91786a2dd7016 (patch)
tree: 1f00975b821733925effbaf0090a40795c50d669 /examples/src/main/python/ml
parent: 037b7e76a7f8b59e031873a768d81417dd180472 (diff)
download: spark-da2012a0e152aa078bdd19a5c7f91786a2dd7016.tar.gz
spark-da2012a0e152aa078bdd19a5c7f91786a2dd7016.tar.bz2
spark-da2012a0e152aa078bdd19a5c7f91786a2dd7016.zip
15 files changed, 0 insertions, 629 deletions
diff --git a/examples/src/main/python/ml/binarizer_example.py b/examples/src/main/python/ml/binarizer_example.py
deleted file mode 100644
index 960ad208be..0000000000
--- a/examples/src/main/python/ml/binarizer_example.py
+++ /dev/null
@@ -1,43 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.ml.feature import Binarizer
-# $example off$
-
-if __name__ == "__main__":
-    sc = SparkContext(appName="BinarizerExample")
-    sqlContext = SQLContext(sc)
-
-    # $example on$
-    continuousDataFrame = sqlContext.createDataFrame([
-        (0, 0.1),
-        (1, 0.8),
-        (2, 0.2)
-    ], ["label", "feature"])
-    binarizer = Binarizer(threshold=0.5, inputCol="feature", outputCol="binarized_feature")
-    binarizedDataFrame = binarizer.transform(continuousDataFrame)
-    binarizedFeatures = binarizedDataFrame.select("binarized_feature")
-    for binarized_feature, in binarizedFeatures.collect():
-        print(binarized_feature)
-   # $example off$
-
-    sc.stop()
diff --git a/examples/src/main/python/ml/bucketizer_example.py b/examples/src/main/python/ml/bucketizer_example.py
deleted file mode 100644
index a12750aa92..0000000000
--- a/examples/src/main/python/ml/bucketizer_example.py
+++ /dev/null
@@ -1,42 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.ml.feature import Bucketizer
-# $example off$
-
-if __name__ == "__main__":
-    sc = SparkContext(appName="BucketizerExample")
-    sqlContext = SQLContext(sc)
-
-    # $example on$
-    splits = [-float("inf"), -0.5, 0.0, 0.5, float("inf")]
-
-    data = [(-0.5,), (-0.3,), (0.0,), (0.2,)]
-    dataFrame = sqlContext.createDataFrame(data, ["features"])
-
-    bucketizer = Bucketizer(splits=splits, inputCol="features", outputCol="bucketedFeatures")
-
-    # Transform original data into its bucket index.
-    bucketedData = bucketizer.transform(dataFrame)
-    # $example off$
-
-    sc.stop()
diff --git a/examples/src/main/python/ml/elementwise_product_example.py b/examples/src/main/python/ml/elementwise_product_example.py
deleted file mode 100644
index c85cb0d895..0000000000
--- a/examples/src/main/python/ml/elementwise_product_example.py
+++ /dev/null
@@ -1,39 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.ml.feature import ElementwiseProduct
-from pyspark.mllib.linalg import Vectors
-# $example off$
-
-if __name__ == "__main__":
-    sc = SparkContext(appName="ElementwiseProductExample")
-    sqlContext = SQLContext(sc)
-
-    # $example on$
-    data = [(Vectors.dense([1.0, 2.0, 3.0]),), (Vectors.dense([4.0, 5.0, 6.0]),)]
-    df = sqlContext.createDataFrame(data, ["vector"])
-    transformer = ElementwiseProduct(scalingVec=Vectors.dense([0.0, 1.0, 2.0]),
-                                     inputCol="vector", outputCol="transformedVector")
-    transformer.transform(df).show()
-    # $example off$
-
-    sc.stop()
diff --git a/examples/src/main/python/ml/n_gram_example.py b/examples/src/main/python/ml/n_gram_example.py
deleted file mode 100644
index f2d85f53e7..0000000000
--- a/examples/src/main/python/ml/n_gram_example.py
+++ /dev/null
@@ -1,42 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.ml.feature import NGram
-# $example off$
-
-if __name__ == "__main__":
-    sc = SparkContext(appName="NGramExample")
-    sqlContext = SQLContext(sc)
-
-    # $example on$
-    wordDataFrame = sqlContext.createDataFrame([
-        (0, ["Hi", "I", "heard", "about", "Spark"]),
-        (1, ["I", "wish", "Java", "could", "use", "case", "classes"]),
-        (2, ["Logistic", "regression", "models", "are", "neat"])
-    ], ["label", "words"])
-    ngram = NGram(inputCol="words", outputCol="ngrams")
-    ngramDataFrame = ngram.transform(wordDataFrame)
-    for ngrams_label in ngramDataFrame.select("ngrams", "label").take(3):
-        print(ngrams_label)
-    # $example off$
-
-    sc.stop()
diff --git a/examples/src/main/python/ml/normalizer_example.py b/examples/src/main/python/ml/normalizer_example.py
deleted file mode 100644
index 833d93e976..0000000000
--- a/examples/src/main/python/ml/normalizer_example.py
+++ /dev/null
@@ -1,41 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.ml.feature import Normalizer
-# $example off$
-
-if __name__ == "__main__":
-    sc = SparkContext(appName="NormalizerExample")
-    sqlContext = SQLContext(sc)
-
-    # $example on$
-    dataFrame = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
-
-    # Normalize each Vector using $L^1$ norm.
-    normalizer = Normalizer(inputCol="features", outputCol="normFeatures", p=1.0)
-    l1NormData = normalizer.transform(dataFrame)
-
-    # Normalize each Vector using $L^\infty$ norm.
-    lInfNormData = normalizer.transform(dataFrame, {normalizer.p: float("inf")})
-    # $example off$
-
-    sc.stop()
diff --git a/examples/src/main/python/ml/onehot_encoder_example.py b/examples/src/main/python/ml/onehot_encoder_example.py
deleted file mode 100644
index 7529dfd092..0000000000
--- a/examples/src/main/python/ml/onehot_encoder_example.py
+++ /dev/null
@@ -1,47 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.ml.feature import OneHotEncoder, StringIndexer
-# $example off$
-
-if __name__ == "__main__":
-    sc = SparkContext(appName="OneHotEncoderExample")
-    sqlContext = SQLContext(sc)
-
-    # $example on$
-    df = sqlContext.createDataFrame([
-        (0, "a"),
-        (1, "b"),
-        (2, "c"),
-        (3, "a"),
-        (4, "a"),
-        (5, "c")
-    ], ["id", "category"])
-    
-    stringIndexer = StringIndexer(inputCol="category", outputCol="categoryIndex")
-    model = stringIndexer.fit(df)
-    indexed = model.transform(df)
-    encoder = OneHotEncoder(dropLast=False, inputCol="categoryIndex", outputCol="categoryVec")
-    encoded = encoder.transform(indexed)
-    # $example off$
-
-    sc.stop()
diff --git a/examples/src/main/python/ml/pca_example.py b/examples/src/main/python/ml/pca_example.py
deleted file mode 100644
index 8b66140a40..0000000000
--- a/examples/src/main/python/ml/pca_example.py
+++ /dev/null
@@ -1,42 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.ml.feature import PCA
-from pyspark.mllib.linalg import Vectors
-# $example off$
-
-if __name__ == "__main__":
-    sc = SparkContext(appName="PCAExample")
-    sqlContext = SQLContext(sc)
-
-    # $example on$
-    data = [(Vectors.sparse(5, [(1, 1.0), (3, 7.0)]),),
-        (Vectors.dense([2.0, 0.0, 3.0, 4.0, 5.0]),),
-        (Vectors.dense([4.0, 0.0, 0.0, 6.0, 7.0]),)]
-    df = sqlContext.createDataFrame(data,["features"])
-    pca = PCA(k=3, inputCol="features", outputCol="pcaFeatures")
-    model = pca.fit(df)
-    result = model.transform(df).select("pcaFeatures")
-    result.show(truncate=False)
-    # $example off$
-
-    sc.stop()
diff --git a/examples/src/main/python/ml/polynomial_expansion_example.py b/examples/src/main/python/ml/polynomial_expansion_example.py
deleted file mode 100644
index 030a6132a4..0000000000
--- a/examples/src/main/python/ml/polynomial_expansion_example.py
+++ /dev/null
@@ -1,43 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.ml.feature import PolynomialExpansion
-from pyspark.mllib.linalg import Vectors
-# $example off$
-
-if __name__ == "__main__":
-    sc = SparkContext(appName="PolynomialExpansionExample")
-    sqlContext = SQLContext(sc)
-
-    # $example on$
-    df = sqlContext.createDataFrame(
-        [(Vectors.dense([-2.0, 2.3]), ),
-        (Vectors.dense([0.0, 0.0]), ),
-        (Vectors.dense([0.6, -1.1]), )],
-        ["features"])
-    px = PolynomialExpansion(degree=2, inputCol="features", outputCol="polyFeatures")
-    polyDF = px.transform(df)
-    for expanded in polyDF.select("polyFeatures").take(3):
-        print(expanded)
-    # $example off$
-
-    sc.stop()
diff --git a/examples/src/main/python/ml/rformula_example.py b/examples/src/main/python/ml/rformula_example.py
deleted file mode 100644
index b544a14700..0000000000
--- a/examples/src/main/python/ml/rformula_example.py
+++ /dev/null
@@ -1,44 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.ml.feature import RFormula
-# $example off$
-
-if __name__ == "__main__":
-    sc = SparkContext(appName="RFormulaExample")
-    sqlContext = SQLContext(sc)
-
-    # $example on$
-    dataset = sqlContext.createDataFrame(
-        [(7, "US", 18, 1.0),
-         (8, "CA", 12, 0.0),
-         (9, "NZ", 15, 0.0)],
-        ["id", "country", "hour", "clicked"])
-    formula = RFormula(
-        formula="clicked ~ country + hour",
-        featuresCol="features",
-        labelCol="label")
-    output = formula.fit(dataset).transform(dataset)
-    output.select("features", "label").show()
-    # $example off$
-
-    sc.stop()
diff --git a/examples/src/main/python/ml/standard_scaler_example.py b/examples/src/main/python/ml/standard_scaler_example.py
deleted file mode 100644
index 139acecbfb..0000000000
--- a/examples/src/main/python/ml/standard_scaler_example.py
+++ /dev/null
@@ -1,42 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.ml.feature import StandardScaler
-# $example off$
-
-if __name__ == "__main__":
-    sc = SparkContext(appName="StandardScalerExample")
-    sqlContext = SQLContext(sc)
-
-    # $example on$
-    dataFrame = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
-    scaler = StandardScaler(inputCol="features", outputCol="scaledFeatures",
-                            withStd=True, withMean=False)
-
-    # Compute summary statistics by fitting the StandardScaler
-    scalerModel = scaler.fit(dataFrame)
-
-    # Normalize each feature to have unit standard deviation.
-    scaledData = scalerModel.transform(dataFrame)
-    # $example off$
-
-    sc.stop()
diff --git a/examples/src/main/python/ml/stopwords_remover_example.py b/examples/src/main/python/ml/stopwords_remover_example.py
deleted file mode 100644
index 01f94af8ca..0000000000
--- a/examples/src/main/python/ml/stopwords_remover_example.py
+++ /dev/null
@@ -1,40 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.ml.feature import StopWordsRemover
-# $example off$
-
-if __name__ == "__main__":
-    sc = SparkContext(appName="StopWordsRemoverExample")
-    sqlContext = SQLContext(sc)
-
-    # $example on$
-    sentenceData = sqlContext.createDataFrame([
-        (0, ["I", "saw", "the", "red", "baloon"]),
-        (1, ["Mary", "had", "a", "little", "lamb"])
-    ], ["label", "raw"])
-
-    remover = StopWordsRemover(inputCol="raw", outputCol="filtered")
-    remover.transform(sentenceData).show(truncate=False)
-    # $example off$
-
-    sc.stop()
diff --git a/examples/src/main/python/ml/string_indexer_example.py b/examples/src/main/python/ml/string_indexer_example.py
deleted file mode 100644
index 58a8cb5d56..0000000000
--- a/examples/src/main/python/ml/string_indexer_example.py
+++ /dev/null
@@ -1,39 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.ml.feature import StringIndexer
-# $example off$
-
-if __name__ == "__main__":
-    sc = SparkContext(appName="StringIndexerExample")
-    sqlContext = SQLContext(sc)
-
-    # $example on$
-    df = sqlContext.createDataFrame(
-        [(0, "a"), (1, "b"), (2, "c"), (3, "a"), (4, "a"), (5, "c")],
-        ["id", "category"])
-    indexer = StringIndexer(inputCol="category", outputCol="categoryIndex")
-    indexed = indexer.fit(df).transform(df)
-    indexed.show()
-    # $example off$
-
-    sc.stop()
diff --git a/examples/src/main/python/ml/tokenizer_example.py b/examples/src/main/python/ml/tokenizer_example.py
deleted file mode 100644
index ce9b225be5..0000000000
--- a/examples/src/main/python/ml/tokenizer_example.py
+++ /dev/null
@@ -1,44 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.ml.feature import Tokenizer, RegexTokenizer
-# $example off$
-
-if __name__ == "__main__":
-    sc = SparkContext(appName="TokenizerExample")
-    sqlContext = SQLContext(sc)
-
-    # $example on$
-    sentenceDataFrame = sqlContext.createDataFrame([
-        (0, "Hi I heard about Spark"),
-        (1, "I wish Java could use case classes"),
-        (2, "Logistic,regression,models,are,neat")
-    ], ["label", "sentence"])
-    tokenizer = Tokenizer(inputCol="sentence", outputCol="words")
-    wordsDataFrame = tokenizer.transform(sentenceDataFrame)
-    for words_label in wordsDataFrame.select("words", "label").take(3):
-        print(words_label)
-    regexTokenizer = RegexTokenizer(inputCol="sentence", outputCol="words", pattern="\\W")
-    # alternatively, pattern="\\w+", gaps(False)
-    # $example off$
-
-    sc.stop()
diff --git a/examples/src/main/python/ml/vector_assembler_example.py b/examples/src/main/python/ml/vector_assembler_example.py
deleted file mode 100644
index 04f64839f1..0000000000
--- a/examples/src/main/python/ml/vector_assembler_example.py
+++ /dev/null
@@ -1,42 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.mllib.linalg import Vectors
-from pyspark.ml.feature import VectorAssembler
-# $example off$
-
-if __name__ == "__main__":
-    sc = SparkContext(appName="VectorAssemblerExample")
-    sqlContext = SQLContext(sc)
-
-    # $example on$
-    dataset = sqlContext.createDataFrame(
-        [(0, 18, 1.0, Vectors.dense([0.0, 10.0, 0.5]), 1.0)],
-        ["id", "hour", "mobile", "userFeatures", "clicked"])
-    assembler = VectorAssembler(
-        inputCols=["hour", "mobile", "userFeatures"],
-        outputCol="features")
-    output = assembler.transform(dataset)
-    print(output.select("features", "clicked").first())
-    # $example off$
-
-    sc.stop()
diff --git a/examples/src/main/python/ml/vector_indexer_example.py b/examples/src/main/python/ml/vector_indexer_example.py
deleted file mode 100644
index cc00d1454f..0000000000
--- a/examples/src/main/python/ml/vector_indexer_example.py
+++ /dev/null
@@ -1,39 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
-# $example on$
-from pyspark.ml.feature import VectorIndexer
-# $example off$
-
-if __name__ == "__main__":
-    sc = SparkContext(appName="VectorIndexerExample")
-    sqlContext = SQLContext(sc)
-
-    # $example on$
-    data = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
-    indexer = VectorIndexer(inputCol="features", outputCol="indexed", maxCategories=10)
-    indexerModel = indexer.fit(data)
-
-    # Create new column "indexed" with categorical values transformed to indices
-    indexedData = indexerModel.transform(data)
-    # $example off$
-
-    sc.stop()
author	Cheng Lian <lian@databricks.com>	2015-12-08 19:18:59 +0800
committer	Cheng Lian <lian@databricks.com>	2015-12-08 19:18:59 +0800
commit	da2012a0e152aa078bdd19a5c7f91786a2dd7016 (patch)
tree	1f00975b821733925effbaf0090a40795c50d669 /examples/src/main/python/ml
parent	037b7e76a7f8b59e031873a768d81417dd180472 (diff)
download	spark-da2012a0e152aa078bdd19a5c7f91786a2dd7016.tar.gz spark-da2012a0e152aa078bdd19a5c7f91786a2dd7016.tar.bz2 spark-da2012a0e152aa078bdd19a5c7f91786a2dd7016.zip