aboutsummaryrefslogtreecommitdiff
path: root/examples/src/main/python/ml/quantile_discretizer_example.py
diff options
context:
space:
mode:
Diffstat (limited to 'examples/src/main/python/ml/quantile_discretizer_example.py')
-rw-r--r--examples/src/main/python/ml/quantile_discretizer_example.py8
1 files changed, 6 insertions, 2 deletions
diff --git a/examples/src/main/python/ml/quantile_discretizer_example.py b/examples/src/main/python/ml/quantile_discretizer_example.py
index 6f422f840a..788a0baffe 100644
--- a/examples/src/main/python/ml/quantile_discretizer_example.py
+++ b/examples/src/main/python/ml/quantile_discretizer_example.py
@@ -22,18 +22,22 @@ from pyspark.ml.feature import QuantileDiscretizer
# $example off$
from pyspark.sql import SparkSession
-
if __name__ == "__main__":
- spark = SparkSession.builder.appName("QuantileDiscretizerExample").getOrCreate()
+ spark = SparkSession\
+ .builder\
+ .appName("QuantileDiscretizerExample")\
+ .getOrCreate()
# $example on$
data = [(0, 18.0,), (1, 19.0,), (2, 8.0,), (3, 5.0,), (4, 2.2,)]
df = spark.createDataFrame(data, ["id", "hour"])
# $example off$
+
# Output of QuantileDiscretizer for such small datasets can depend on the number of
# partitions. Here we force a single partition to ensure consistent results.
# Note this is not necessary for normal use cases
df = df.repartition(1)
+
# $example on$
discretizer = QuantileDiscretizer(numBuckets=3, inputCol="hour", outputCol="result")