diff options
Diffstat (limited to 'examples/src/main/python/ml/quantile_discretizer_example.py')
-rw-r--r-- | examples/src/main/python/ml/quantile_discretizer_example.py | 8 |
1 files changed, 6 insertions, 2 deletions
diff --git a/examples/src/main/python/ml/quantile_discretizer_example.py b/examples/src/main/python/ml/quantile_discretizer_example.py index 6f422f840a..788a0baffe 100644 --- a/examples/src/main/python/ml/quantile_discretizer_example.py +++ b/examples/src/main/python/ml/quantile_discretizer_example.py @@ -22,18 +22,22 @@ from pyspark.ml.feature import QuantileDiscretizer # $example off$ from pyspark.sql import SparkSession - if __name__ == "__main__": - spark = SparkSession.builder.appName("QuantileDiscretizerExample").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("QuantileDiscretizerExample")\ + .getOrCreate() # $example on$ data = [(0, 18.0,), (1, 19.0,), (2, 8.0,), (3, 5.0,), (4, 2.2,)] df = spark.createDataFrame(data, ["id", "hour"]) # $example off$ + # Output of QuantileDiscretizer for such small datasets can depend on the number of # partitions. Here we force a single partition to ensure consistent results. # Note this is not necessary for normal use cases df = df.repartition(1) + # $example on$ discretizer = QuantileDiscretizer(numBuckets=3, inputCol="hour", outputCol="result") |