26 files changed, 120 insertions, 62 deletions
diff --git a/examples/src/main/python/ml/binarizer_example.py b/examples/src/main/python/ml/binarizer_example.py
index 4224a27dbe..669bb2aeab 100644
--- a/examples/src/main/python/ml/binarizer_example.py
+++ b/examples/src/main/python/ml/binarizer_example.py
@@ -33,12 +33,14 @@ if __name__ == "__main__":
         (0, 0.1),
         (1, 0.8),
         (2, 0.2)
-    ], ["label", "feature"])
+    ], ["id", "feature"])
+
     binarizer = Binarizer(threshold=0.5, inputCol="feature", outputCol="binarized_feature")
+
     binarizedDataFrame = binarizer.transform(continuousDataFrame)
-    binarizedFeatures = binarizedDataFrame.select("binarized_feature")
-    for binarized_feature, in binarizedFeatures.collect():
-        print(binarized_feature)
+
+    print("Binarizer output with Threshold = %f" % binarizer.getThreshold())
+    binarizedDataFrame.show()
     # $example off$
 
     spark.stop()
diff --git a/examples/src/main/python/ml/bucketizer_example.py b/examples/src/main/python/ml/bucketizer_example.py
index 8177e560dd..742f35093b 100644
--- a/examples/src/main/python/ml/bucketizer_example.py
+++ b/examples/src/main/python/ml/bucketizer_example.py
@@ -31,13 +31,15 @@ if __name__ == "__main__":
     # $example on$
     splits = [-float("inf"), -0.5, 0.0, 0.5, float("inf")]
 
-    data = [(-0.5,), (-0.3,), (0.0,), (0.2,)]
+    data = [(-999.9,), (-0.5,), (-0.3,), (0.0,), (0.2,), (999.9,)]
     dataFrame = spark.createDataFrame(data, ["features"])
 
     bucketizer = Bucketizer(splits=splits, inputCol="features", outputCol="bucketedFeatures")
 
     # Transform original data into its bucket index.
     bucketedData = bucketizer.transform(dataFrame)
+
+    print("Bucketizer output with %d buckets" % (len(bucketizer.getSplits())-1))
     bucketedData.show()
     # $example off$
 
diff --git a/examples/src/main/python/ml/chisq_selector_example.py b/examples/src/main/python/ml/chisq_selector_example.py
index 5e19ef1624..028a9ea9d6 100644
--- a/examples/src/main/python/ml/chisq_selector_example.py
+++ b/examples/src/main/python/ml/chisq_selector_example.py
@@ -39,6 +39,8 @@ if __name__ == "__main__":
                              outputCol="selectedFeatures", labelCol="clicked")
 
     result = selector.fit(df).transform(df)
+
+    print("ChiSqSelector output with top %d features selected" % selector.getNumTopFeatures())
     result.show()
     # $example off$
 
diff --git a/examples/src/main/python/ml/count_vectorizer_example.py b/examples/src/main/python/ml/count_vectorizer_example.py
index 38cfac82fb..f2e41db77d 100644
--- a/examples/src/main/python/ml/count_vectorizer_example.py
+++ b/examples/src/main/python/ml/count_vectorizer_example.py
@@ -37,9 +37,11 @@ if __name__ == "__main__":
 
     # fit a CountVectorizerModel from the corpus.
     cv = CountVectorizer(inputCol="words", outputCol="features", vocabSize=3, minDF=2.0)
+
     model = cv.fit(df)
+
     result = model.transform(df)
-    result.show()
+    result.show(truncate=False)
     # $example off$
 
     spark.stop()
diff --git a/examples/src/main/python/ml/dct_example.py b/examples/src/main/python/ml/dct_example.py
index a4f25df784..c0457f8d0f 100644
--- a/examples/src/main/python/ml/dct_example.py
+++ b/examples/src/main/python/ml/dct_example.py
@@ -39,8 +39,7 @@ if __name__ == "__main__":
 
     dctDf = dct.transform(df)
 
-    for dcts in dctDf.select("featuresDCT").take(3):
-        print(dcts)
+    dctDf.select("featuresDCT").show(truncate=False)
     # $example off$
 
     spark.stop()
diff --git a/examples/src/main/python/ml/gaussian_mixture_example.py b/examples/src/main/python/ml/gaussian_mixture_example.py
index edc258de05..8ad450b669 100644
--- a/examples/src/main/python/ml/gaussian_mixture_example.py
+++ b/examples/src/main/python/ml/gaussian_mixture_example.py
@@ -38,11 +38,11 @@ if __name__ == "__main__":
     # loads data
     dataset = spark.read.format("libsvm").load("data/mllib/sample_kmeans_data.txt")
 
-    gmm = GaussianMixture().setK(2)
+    gmm = GaussianMixture().setK(2).setSeed(538009335L)
     model = gmm.fit(dataset)
 
-    print("Gaussians: ")
-    model.gaussiansDF.show()
+    print("Gaussians shown as a DataFrame: ")
+    model.gaussiansDF.show(truncate=False)
     # $example off$
 
     spark.stop()
diff --git a/examples/src/main/python/ml/index_to_string_example.py b/examples/src/main/python/ml/index_to_string_example.py
index 523caac00c..33d104e8e3 100644
--- a/examples/src/main/python/ml/index_to_string_example.py
+++ b/examples/src/main/python/ml/index_to_string_example.py
@@ -33,14 +33,22 @@ if __name__ == "__main__":
         [(0, "a"), (1, "b"), (2, "c"), (3, "a"), (4, "a"), (5, "c")],
         ["id", "category"])
 
-    stringIndexer = StringIndexer(inputCol="category", outputCol="categoryIndex")
-    model = stringIndexer.fit(df)
+    indexer = StringIndexer(inputCol="category", outputCol="categoryIndex")
+    model = indexer.fit(df)
     indexed = model.transform(df)
 
+    print("Transformed string column '%s' to indexed column '%s'"
+          % (indexer.getInputCol(), indexer.getOutputCol()))
+    indexed.show()
+
+    print("StringIndexer will store labels in output column metadata\n")
+
     converter = IndexToString(inputCol="categoryIndex", outputCol="originalCategory")
     converted = converter.transform(indexed)
 
-    converted.select("id", "originalCategory").show()
+    print("Transformed indexed column '%s' back to original string column '%s' using "
+          "labels in metadata" % (converter.getInputCol(), converter.getOutputCol()))
+    converted.select("id", "categoryIndex", "originalCategory").show()
     # $example off$
 
     spark.stop()
diff --git a/examples/src/main/python/ml/isotonic_regression_example.py b/examples/src/main/python/ml/isotonic_regression_example.py
index a41b8ffacb..6ae15f1b4b 100644
--- a/examples/src/main/python/ml/isotonic_regression_example.py
+++ b/examples/src/main/python/ml/isotonic_regression_example.py
@@ -44,8 +44,8 @@ if __name__ == "__main__":
 
     # Trains an isotonic regression model.
     model = IsotonicRegression().fit(dataset)
-    print("Boundaries in increasing order: " + str(model.boundaries))
-    print("Predictions associated with the boundaries: " + str(model.predictions))
+    print("Boundaries in increasing order: %s\n" % str(model.boundaries))
+    print("Predictions associated with the boundaries: %s\n" % str(model.predictions))
 
     # Makes predictions.
     model.transform(dataset).show()
diff --git a/examples/src/main/python/ml/linear_regression_with_elastic_net.py b/examples/src/main/python/ml/linear_regression_with_elastic_net.py
index 620ab5b87e..6639e9160a 100644
--- a/examples/src/main/python/ml/linear_regression_with_elastic_net.py
+++ b/examples/src/main/python/ml/linear_regression_with_elastic_net.py
@@ -39,8 +39,16 @@ if __name__ == "__main__":
     lrModel = lr.fit(training)
 
     # Print the coefficients and intercept for linear regression
-    print("Coefficients: " + str(lrModel.coefficients))
-    print("Intercept: " + str(lrModel.intercept))
+    print("Coefficients: %s" % str(lrModel.coefficients))
+    print("Intercept: %s" % str(lrModel.intercept))
+
+    # Summarize the model over the training set and print out some metrics
+    trainingSummary = lrModel.summary
+    print("numIterations: %d" % trainingSummary.totalIterations)
+    print("objectiveHistory: %s" % str(trainingSummary.objectiveHistory))
+    trainingSummary.residuals.show()
+    print("RMSE: %f" % trainingSummary.rootMeanSquaredError)
+    print("r2: %f" % trainingSummary.r2)
     # $example off$
 
     spark.stop()
diff --git a/examples/src/main/python/ml/max_abs_scaler_example.py b/examples/src/main/python/ml/max_abs_scaler_example.py
index ab91198b08..45eda3cdad 100644
--- a/examples/src/main/python/ml/max_abs_scaler_example.py
+++ b/examples/src/main/python/ml/max_abs_scaler_example.py
@@ -19,6 +19,7 @@ from __future__ import print_function
 
 # $example on$
 from pyspark.ml.feature import MaxAbsScaler
+from pyspark.ml.linalg import Vectors
 # $example off$
 from pyspark.sql import SparkSession
 
@@ -29,7 +30,11 @@ if __name__ == "__main__":
         .getOrCreate()
 
     # $example on$
-    dataFrame = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+    dataFrame = spark.createDataFrame([
+        (0, Vectors.dense([1.0, 0.1, -8.0]),),
+        (1, Vectors.dense([2.0, 1.0, -4.0]),),
+        (2, Vectors.dense([4.0, 10.0, 8.0]),)
+    ], ["id", "features"])
 
     scaler = MaxAbsScaler(inputCol="features", outputCol="scaledFeatures")
 
@@ -38,7 +43,8 @@ if __name__ == "__main__":
 
     # rescale each feature to range [-1, 1].
     scaledData = scalerModel.transform(dataFrame)
-    scaledData.show()
+
+    scaledData.select("features", "scaledFeatures").show()
     # $example off$
 
     spark.stop()
diff --git a/examples/src/main/python/ml/min_max_scaler_example.py b/examples/src/main/python/ml/min_max_scaler_example.py
index e3e7bc205b..b5f272e59b 100644
--- a/examples/src/main/python/ml/min_max_scaler_example.py
+++ b/examples/src/main/python/ml/min_max_scaler_example.py
@@ -19,6 +19,7 @@ from __future__ import print_function
 
 # $example on$
 from pyspark.ml.feature import MinMaxScaler
+from pyspark.ml.linalg import Vectors
 # $example off$
 from pyspark.sql import SparkSession
 
@@ -29,7 +30,11 @@ if __name__ == "__main__":
         .getOrCreate()
 
     # $example on$
-    dataFrame = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+    dataFrame = spark.createDataFrame([
+        (0, Vectors.dense([1.0, 0.1, -1.0]),),
+        (1, Vectors.dense([2.0, 1.1, 1.0]),),
+        (2, Vectors.dense([3.0, 10.1, 3.0]),)
+    ], ["id", "features"])
 
     scaler = MinMaxScaler(inputCol="features", outputCol="scaledFeatures")
 
@@ -38,7 +43,8 @@ if __name__ == "__main__":
 
     # rescale each feature to range [min, max].
     scaledData = scalerModel.transform(dataFrame)
-    scaledData.show()
+    print("Features scaled to range: [%f, %f]" % (scaler.getMin(), scaler.getMax()))
+    scaledData.select("features", "scaledFeatures").show()
     # $example off$
 
     spark.stop()
diff --git a/examples/src/main/python/ml/multilayer_perceptron_classification.py b/examples/src/main/python/ml/multilayer_perceptron_classification.py
index 2cc38c2855..88fc69f753 100644
--- a/examples/src/main/python/ml/multilayer_perceptron_classification.py
+++ b/examples/src/main/python/ml/multilayer_perceptron_classification.py
@@ -52,7 +52,7 @@ if __name__ == "__main__":
     result = model.transform(test)
     predictionAndLabels = result.select("prediction", "label")
     evaluator = MulticlassClassificationEvaluator(metricName="accuracy")
-    print("Accuracy: " + str(evaluator.evaluate(predictionAndLabels)))
+    print("Test set accuracy = " + str(evaluator.evaluate(predictionAndLabels)))
     # $example off$
 
     spark.stop()
diff --git a/examples/src/main/python/ml/n_gram_example.py b/examples/src/main/python/ml/n_gram_example.py
index 55263adb46..31676e076a 100644
--- a/examples/src/main/python/ml/n_gram_example.py
+++ b/examples/src/main/python/ml/n_gram_example.py
@@ -33,13 +33,12 @@ if __name__ == "__main__":
         (0, ["Hi", "I", "heard", "about", "Spark"]),
         (1, ["I", "wish", "Java", "could", "use", "case", "classes"]),
         (2, ["Logistic", "regression", "models", "are", "neat"])
-    ], ["label", "words"])
+    ], ["id", "words"])
 
-    ngram = NGram(inputCol="words", outputCol="ngrams")
-    ngramDataFrame = ngram.transform(wordDataFrame)
+    ngram = NGram(n=2, inputCol="words", outputCol="ngrams")
 
-    for ngrams_label in ngramDataFrame.select("ngrams", "label").take(3):
-        print(ngrams_label)
+    ngramDataFrame = ngram.transform(wordDataFrame)
+    ngramDataFrame.select("ngrams").show(truncate=False)
     # $example off$
 
     spark.stop()
diff --git a/examples/src/main/python/ml/naive_bayes_example.py b/examples/src/main/python/ml/naive_bayes_example.py
index aa23f298c8..7290ab81cd 100644
--- a/examples/src/main/python/ml/naive_bayes_example.py
+++ b/examples/src/main/python/ml/naive_bayes_example.py
@@ -45,11 +45,15 @@ if __name__ == "__main__":
     # train the model
     model = nb.fit(train)
 
+    # select example rows to display.
+    predictions = model.transform(test)
+    predictions.show()
+
     # compute accuracy on the test set
-    result = model.transform(test)
-    predictionAndLabels = result.select("prediction", "label")
-    evaluator = MulticlassClassificationEvaluator(metricName="accuracy")
-    print("Accuracy: " + str(evaluator.evaluate(predictionAndLabels)))
+    evaluator = MulticlassClassificationEvaluator(labelCol="label", predictionCol="prediction",
+                                                  metricName="accuracy")
+    accuracy = evaluator.evaluate(predictions)
+    print("Test set accuracy = " + str(accuracy))
     # $example off$
 
     spark.stop()
diff --git a/examples/src/main/python/ml/normalizer_example.py b/examples/src/main/python/ml/normalizer_example.py
index 19012f51f4..510bd825fd 100644
--- a/examples/src/main/python/ml/normalizer_example.py
+++ b/examples/src/main/python/ml/normalizer_example.py
@@ -19,6 +19,7 @@ from __future__ import print_function
 
 # $example on$
 from pyspark.ml.feature import Normalizer
+from pyspark.ml.linalg import Vectors
 # $example off$
 from pyspark.sql import SparkSession
 
@@ -29,15 +30,21 @@ if __name__ == "__main__":
         .getOrCreate()
 
     # $example on$
-    dataFrame = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+    dataFrame = spark.createDataFrame([
+        (0, Vectors.dense([1.0, 0.5, -1.0]),),
+        (1, Vectors.dense([2.0, 1.0, 1.0]),),
+        (2, Vectors.dense([4.0, 10.0, 2.0]),)
+    ], ["id", "features"])
 
     # Normalize each Vector using $L^1$ norm.
     normalizer = Normalizer(inputCol="features", outputCol="normFeatures", p=1.0)
     l1NormData = normalizer.transform(dataFrame)
+    print("Normalized using L^1 norm")
     l1NormData.show()
 
     # Normalize each Vector using $L^\infty$ norm.
     lInfNormData = normalizer.transform(dataFrame, {normalizer.p: float("inf")})
+    print("Normalized using L^inf norm")
     lInfNormData.show()
     # $example off$
 
diff --git a/examples/src/main/python/ml/onehot_encoder_example.py b/examples/src/main/python/ml/onehot_encoder_example.py
index 47faf8d202..e1996c7f0a 100644
--- a/examples/src/main/python/ml/onehot_encoder_example.py
+++ b/examples/src/main/python/ml/onehot_encoder_example.py
@@ -42,9 +42,9 @@ if __name__ == "__main__":
     model = stringIndexer.fit(df)
     indexed = model.transform(df)
 
-    encoder = OneHotEncoder(dropLast=False, inputCol="categoryIndex", outputCol="categoryVec")
+    encoder = OneHotEncoder(inputCol="categoryIndex", outputCol="categoryVec")
     encoded = encoder.transform(indexed)
-    encoded.select("id", "categoryVec").show()
+    encoded.show()
     # $example off$
 
     spark.stop()
diff --git a/examples/src/main/python/ml/pipeline_example.py b/examples/src/main/python/ml/pipeline_example.py
index 2d0865578a..f63e4db434 100644
--- a/examples/src/main/python/ml/pipeline_example.py
+++ b/examples/src/main/python/ml/pipeline_example.py
@@ -60,9 +60,10 @@ if __name__ == "__main__":
 
     # Make predictions on test documents and print columns of interest.
     prediction = model.transform(test)
-    selected = prediction.select("id", "text", "prediction")
+    selected = prediction.select("id", "text", "probability", "prediction")
     for row in selected.collect():
-        print(row)
+        rid, text, prob, prediction = row
+        print("(%d, %s) --> prob=%s, prediction=%f" % (rid, text, str(prob), prediction))
     # $example off$
 
     spark.stop()
diff --git a/examples/src/main/python/ml/polynomial_expansion_example.py b/examples/src/main/python/ml/polynomial_expansion_example.py
index b464ee86b6..40bcb7b13a 100644
--- a/examples/src/main/python/ml/polynomial_expansion_example.py
+++ b/examples/src/main/python/ml/polynomial_expansion_example.py
@@ -31,16 +31,15 @@ if __name__ == "__main__":
 
     # $example on$
     df = spark.createDataFrame([
-        (Vectors.dense([-2.0, 2.3]),),
+        (Vectors.dense([2.0, 1.0]),),
         (Vectors.dense([0.0, 0.0]),),
-        (Vectors.dense([0.6, -1.1]),)
+        (Vectors.dense([3.0, -1.0]),)
     ], ["features"])
 
-    px = PolynomialExpansion(degree=3, inputCol="features", outputCol="polyFeatures")
-    polyDF = px.transform(df)
+    polyExpansion = PolynomialExpansion(degree=3, inputCol="features", outputCol="polyFeatures")
+    polyDF = polyExpansion.transform(df)
 
-    for expanded in polyDF.select("polyFeatures").take(3):
-        print(expanded)
+    polyDF.show(truncate=False)
     # $example off$
 
     spark.stop()
diff --git a/examples/src/main/python/ml/stopwords_remover_example.py b/examples/src/main/python/ml/stopwords_remover_example.py
index 8a8392cc1f..3b8e7855e3 100644
--- a/examples/src/main/python/ml/stopwords_remover_example.py
+++ b/examples/src/main/python/ml/stopwords_remover_example.py
@@ -32,7 +32,7 @@ if __name__ == "__main__":
     sentenceData = spark.createDataFrame([
         (0, ["I", "saw", "the", "red", "balloon"]),
         (1, ["Mary", "had", "a", "little", "lamb"])
-    ], ["label", "raw"])
+    ], ["id", "raw"])
 
     remover = StopWordsRemover(inputCol="raw", outputCol="filtered")
     remover.transform(sentenceData).show(truncate=False)
diff --git a/examples/src/main/python/ml/tf_idf_example.py b/examples/src/main/python/ml/tf_idf_example.py
index 4ab7eb6964..d43244fa68 100644
--- a/examples/src/main/python/ml/tf_idf_example.py
+++ b/examples/src/main/python/ml/tf_idf_example.py
@@ -30,9 +30,9 @@ if __name__ == "__main__":
 
     # $example on$
     sentenceData = spark.createDataFrame([
-        (0, "Hi I heard about Spark"),
-        (0, "I wish Java could use case classes"),
-        (1, "Logistic regression models are neat")
+        (0.0, "Hi I heard about Spark"),
+        (0.0, "I wish Java could use case classes"),
+        (1.0, "Logistic regression models are neat")
     ], ["label", "sentence"])
 
     tokenizer = Tokenizer(inputCol="sentence", outputCol="words")
@@ -46,8 +46,7 @@ if __name__ == "__main__":
     idfModel = idf.fit(featurizedData)
     rescaledData = idfModel.transform(featurizedData)
 
-    for features_label in rescaledData.select("features", "label").take(3):
-        print(features_label)
+    rescaledData.select("label", "features").show()
     # $example off$
 
     spark.stop()
diff --git a/examples/src/main/python/ml/tokenizer_example.py b/examples/src/main/python/ml/tokenizer_example.py
index 89f5060705..5c65c5c9f8 100644
--- a/examples/src/main/python/ml/tokenizer_example.py
+++ b/examples/src/main/python/ml/tokenizer_example.py
@@ -19,6 +19,8 @@ from __future__ import print_function
 
 # $example on$
 from pyspark.ml.feature import Tokenizer, RegexTokenizer
+from pyspark.sql.functions import col, udf
+from pyspark.sql.types import IntegerType
 # $example off$
 from pyspark.sql import SparkSession
 
@@ -33,20 +35,22 @@ if __name__ == "__main__":
         (0, "Hi I heard about Spark"),
         (1, "I wish Java could use case classes"),
         (2, "Logistic,regression,models,are,neat")
-    ], ["label", "sentence"])
+    ], ["id", "sentence"])
 
     tokenizer = Tokenizer(inputCol="sentence", outputCol="words")
 
     regexTokenizer = RegexTokenizer(inputCol="sentence", outputCol="words", pattern="\\W")
     # alternatively, pattern="\\w+", gaps(False)
 
+    countTokens = udf(lambda words: len(words), IntegerType())
+
     tokenized = tokenizer.transform(sentenceDataFrame)
-    for words_label in tokenized.select("words", "label").take(3):
-        print(words_label)
+    tokenized.select("sentence", "words")\
+        .withColumn("tokens", countTokens(col("words"))).show(truncate=False)
 
     regexTokenized = regexTokenizer.transform(sentenceDataFrame)
-    for words_label in regexTokenized.select("words", "label").take(3):
-        print(words_label)
+    regexTokenized.select("sentence", "words") \
+        .withColumn("tokens", countTokens(col("words"))).show(truncate=False)
     # $example off$
 
     spark.stop()
diff --git a/examples/src/main/python/ml/train_validation_split.py b/examples/src/main/python/ml/train_validation_split.py
index a92b861f83..d104f7d30a 100644
--- a/examples/src/main/python/ml/train_validation_split.py
+++ b/examples/src/main/python/ml/train_validation_split.py
@@ -66,8 +66,9 @@ if __name__ == "__main__":
 
     # Make predictions on test data. model is the model with combination of parameters
     # that performed best.
-    prediction = model.transform(test)
-    for row in prediction.take(5):
-        print(row)
+    model.transform(test)\
+        .select("features", "label", "prediction")\
+        .show()
+
     # $example off$
     spark.stop()
diff --git a/examples/src/main/python/ml/vector_assembler_example.py b/examples/src/main/python/ml/vector_assembler_example.py
index eac33711ad..98de1d5ea7 100644
--- a/examples/src/main/python/ml/vector_assembler_example.py
+++ b/examples/src/main/python/ml/vector_assembler_example.py
@@ -39,7 +39,8 @@ if __name__ == "__main__":
         outputCol="features")
 
     output = assembler.transform(dataset)
-    print(output.select("features", "clicked").first())
+    print("Assembled columns 'hour', 'mobile', 'userFeatures' to vector column 'features'")
+    output.select("features", "clicked").show(truncate=False)
     # $example off$
 
     spark.stop()
diff --git a/examples/src/main/python/ml/vector_indexer_example.py b/examples/src/main/python/ml/vector_indexer_example.py
index 3912c135be..5c2956077d 100644
--- a/examples/src/main/python/ml/vector_indexer_example.py
+++ b/examples/src/main/python/ml/vector_indexer_example.py
@@ -34,6 +34,10 @@ if __name__ == "__main__":
     indexer = VectorIndexer(inputCol="features", outputCol="indexed", maxCategories=10)
     indexerModel = indexer.fit(data)
 
+    categoricalFeatures = indexerModel.categoryMaps
+    print("Chose %d categorical features: %s" %
+          (len(categoricalFeatures), ", ".join(str(k) for k in categoricalFeatures.keys())))
+
     # Create new column "indexed" with categorical values transformed to indices
     indexedData = indexerModel.transform(data)
     indexedData.show()
diff --git a/examples/src/main/python/ml/word2vec_example.py b/examples/src/main/python/ml/word2vec_example.py
index 78a91c92fc..77f8951df0 100644
--- a/examples/src/main/python/ml/word2vec_example.py
+++ b/examples/src/main/python/ml/word2vec_example.py
@@ -41,8 +41,9 @@ if __name__ == "__main__":
     model = word2Vec.fit(documentDF)
 
     result = model.transform(documentDF)
-    for feature in result.select("result").take(3):
-        print(feature)
+    for row in result.collect():
+        text, vector = row
+        print("Text: [%s] => \nVector: %s\n" % (", ".join(text), str(vector)))
     # $example off$
 
     spark.stop()
diff --git a/examples/src/main/python/pagerank.py b/examples/src/main/python/pagerank.py
index a399a9c37c..0d6c253d39 100755
--- a/examples/src/main/python/pagerank.py
+++ b/examples/src/main/python/pagerank.py
@@ -18,6 +18,9 @@
 """
 This is an example implementation of PageRank. For more conventional use,
 Please refer to PageRank implementation provided by graphx
+
+Example Usage:
+bin/spark-submit examples/src/main/python/pagerank.py data/mllib/pagerank_data.txt 10
 """
 from __future__ import print_function
 
@@ -46,8 +49,8 @@ if __name__ == "__main__":
         print("Usage: pagerank <file> <iterations>", file=sys.stderr)
         exit(-1)
 
-    print("""WARN: This is a naive implementation of PageRank and is
-          given as an example! Please refer to PageRank implementation provided by graphx""",
+    print("WARN: This is a naive implementation of PageRank and is given as an example!\n" +
+          "Please refer to PageRank implementation provided by graphx",
           file=sys.stderr)
 
     # Initialize the spark context.