diff options
author | Zheng RuiFeng <ruifengz@foxmail.com> | 2016-05-11 09:56:36 +0200 |
---|---|---|
committer | Nick Pentreath <nickp@za.ibm.com> | 2016-05-11 09:56:36 +0200 |
commit | cef73b563864d5f8aa1b26e31e3b9af6f0a08a5d (patch) | |
tree | 425fd9da8e73e5a31fbb0e46be206692c23f64f0 /examples/src/main/scala | |
parent | ad1a8466e9c10fbe8b455dba17b16973f92ebc15 (diff) | |
download | spark-cef73b563864d5f8aa1b26e31e3b9af6f0a08a5d.tar.gz spark-cef73b563864d5f8aa1b26e31e3b9af6f0a08a5d.tar.bz2 spark-cef73b563864d5f8aa1b26e31e3b9af6f0a08a5d.zip |
[SPARK-14340][EXAMPLE][DOC] Update Examples and User Guide for ml.BisectingKMeans
## What changes were proposed in this pull request?
1, add BisectingKMeans to ml-clustering.md
2, add the missing Scala BisectingKMeansExample
3, create a new datafile `data/mllib/sample_kmeans_data.txt`
## How was this patch tested?
manual tests
Author: Zheng RuiFeng <ruifengz@foxmail.com>
Closes #11844 from zhengruifeng/doc_bkm.
Diffstat (limited to 'examples/src/main/scala')
-rw-r--r-- | examples/src/main/scala/org/apache/spark/examples/ml/BisectingKMeansExample.scala | 65 |
1 files changed, 65 insertions, 0 deletions
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/BisectingKMeansExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/BisectingKMeansExample.scala new file mode 100644 index 0000000000..5f8f2c99cb --- /dev/null +++ b/examples/src/main/scala/org/apache/spark/examples/ml/BisectingKMeansExample.scala @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.examples.ml + +// scalastyle:off println + +// $example on$ +import org.apache.spark.ml.clustering.BisectingKMeans +// $example off$ +import org.apache.spark.sql.SparkSession + +/** + * An example demonstrating bisecting k-means clustering. + * Run with + * {{{ + * bin/run-example ml.BisectingKMeansExample + * }}} + */ +object BisectingKMeansExample { + + def main(args: Array[String]): Unit = { + // Creates a SparkSession + val spark = SparkSession + .builder + .appName("BisectingKMeansExample") + .getOrCreate() + + // $example on$ + // Loads data. + val dataset = spark.read.format("libsvm").load("data/mllib/sample_kmeans_data.txt") + + // Trains a bisecting k-means model. + val bkm = new BisectingKMeans().setK(2).setSeed(1) + val model = bkm.fit(dataset) + + // Evaluate clustering. + val cost = model.computeCost(dataset) + println(s"Within Set Sum of Squared Errors = $cost") + + // Shows the result. + println("Cluster Centers: ") + val centers = model.clusterCenters + centers.foreach(println) + // $example off$ + + spark.stop() + } +} +// scalastyle:on println + |