aboutsummaryrefslogtreecommitdiff
path: root/examples/src/main/scala/org
diff options
context:
space:
mode:
authorGayathriMurali <gayathri.m@intel.com>2016-06-24 13:25:40 +0200
committerNick Pentreath <nickp@za.ibm.com>2016-06-24 13:25:40 +0200
commitbe88383e15a86d094963de5f7e8792510bc990de (patch)
tree3e4c7b8101ea1ea6a919f283e6d31b47fbee96c4 /examples/src/main/scala/org
parent158af162eac7348464c6751c8acd48fc6c117688 (diff)
downloadspark-be88383e15a86d094963de5f7e8792510bc990de.tar.gz
spark-be88383e15a86d094963de5f7e8792510bc990de.tar.bz2
spark-be88383e15a86d094963de5f7e8792510bc990de.zip
[SPARK-15997][DOC][ML] Update user guide for HashingTF, QuantileVectorizer and CountVectorizer
## What changes were proposed in this pull request? Made changes to HashingTF,QuantileVectorizer and CountVectorizer Author: GayathriMurali <gayathri.m@intel.com> Closes #13745 from GayathriMurali/SPARK-15997.
Diffstat (limited to 'examples/src/main/scala/org')
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/QuantileDiscretizerExample.scala9
1 files changed, 7 insertions, 2 deletions
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/QuantileDiscretizerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/QuantileDiscretizerExample.scala
index 1a16515594..2f7e217b8f 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/QuantileDiscretizerExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/QuantileDiscretizerExample.scala
@@ -32,8 +32,13 @@ object QuantileDiscretizerExample {
// $example on$
val data = Array((0, 18.0), (1, 19.0), (2, 8.0), (3, 5.0), (4, 2.2))
- val df = spark.createDataFrame(data).toDF("id", "hour")
-
+ var df = spark.createDataFrame(data).toDF("id", "hour")
+ // $example off$
+ // Output of QuantileDiscretizer for such small datasets can depend on the number of
+ // partitions. Here we force a single partition to ensure consistent results.
+ // Note this is not necessary for normal use cases
+ .repartition(1)
+ // $example on$
val discretizer = new QuantileDiscretizer()
.setInputCol("hour")
.setOutputCol("result")