From be88383e15a86d094963de5f7e8792510bc990de Mon Sep 17 00:00:00 2001 From: GayathriMurali Date: Fri, 24 Jun 2016 13:25:40 +0200 Subject: [SPARK-15997][DOC][ML] Update user guide for HashingTF, QuantileVectorizer and CountVectorizer ## What changes were proposed in this pull request? Made changes to HashingTF,QuantileVectorizer and CountVectorizer Author: GayathriMurali Closes #13745 from GayathriMurali/SPARK-15997. --- .../apache/spark/examples/ml/JavaQuantileDiscretizerExample.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'examples/src/main/java') diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaQuantileDiscretizerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaQuantileDiscretizerExample.java index 16f58a852d..dd20cac621 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaQuantileDiscretizerExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaQuantileDiscretizerExample.java @@ -54,7 +54,12 @@ public class JavaQuantileDiscretizerExample { }); Dataset df = spark.createDataFrame(data, schema); - + // $example off$ + // Output of QuantileDiscretizer for such small datasets can depend on the number of + // partitions. Here we force a single partition to ensure consistent results. + // Note this is not necessary for normal use cases + df = df.repartition(1); + // $example on$ QuantileDiscretizer discretizer = new QuantileDiscretizer() .setInputCol("hour") .setOutputCol("result") -- cgit v1.2.3