aboutsummaryrefslogtreecommitdiff
path: root/examples/src/main/scala
diff options
context:
space:
mode:
authorZheng RuiFeng <ruifengz@foxmail.com>2016-05-11 09:56:36 +0200
committerNick Pentreath <nickp@za.ibm.com>2016-05-11 09:56:36 +0200
commitcef73b563864d5f8aa1b26e31e3b9af6f0a08a5d (patch)
tree425fd9da8e73e5a31fbb0e46be206692c23f64f0 /examples/src/main/scala
parentad1a8466e9c10fbe8b455dba17b16973f92ebc15 (diff)
downloadspark-cef73b563864d5f8aa1b26e31e3b9af6f0a08a5d.tar.gz
spark-cef73b563864d5f8aa1b26e31e3b9af6f0a08a5d.tar.bz2
spark-cef73b563864d5f8aa1b26e31e3b9af6f0a08a5d.zip
[SPARK-14340][EXAMPLE][DOC] Update Examples and User Guide for ml.BisectingKMeans
## What changes were proposed in this pull request? 1, add BisectingKMeans to ml-clustering.md 2, add the missing Scala BisectingKMeansExample 3, create a new datafile `data/mllib/sample_kmeans_data.txt` ## How was this patch tested? manual tests Author: Zheng RuiFeng <ruifengz@foxmail.com> Closes #11844 from zhengruifeng/doc_bkm.
Diffstat (limited to 'examples/src/main/scala')
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/BisectingKMeansExample.scala65
1 files changed, 65 insertions, 0 deletions
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/BisectingKMeansExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/BisectingKMeansExample.scala
new file mode 100644
index 0000000000..5f8f2c99cb
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/BisectingKMeansExample.scala
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+// scalastyle:off println
+
+// $example on$
+import org.apache.spark.ml.clustering.BisectingKMeans
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+/**
+ * An example demonstrating bisecting k-means clustering.
+ * Run with
+ * {{{
+ * bin/run-example ml.BisectingKMeansExample
+ * }}}
+ */
+object BisectingKMeansExample {
+
+ def main(args: Array[String]): Unit = {
+ // Creates a SparkSession
+ val spark = SparkSession
+ .builder
+ .appName("BisectingKMeansExample")
+ .getOrCreate()
+
+ // $example on$
+ // Loads data.
+ val dataset = spark.read.format("libsvm").load("data/mllib/sample_kmeans_data.txt")
+
+ // Trains a bisecting k-means model.
+ val bkm = new BisectingKMeans().setK(2).setSeed(1)
+ val model = bkm.fit(dataset)
+
+ // Evaluate clustering.
+ val cost = model.computeCost(dataset)
+ println(s"Within Set Sum of Squared Errors = $cost")
+
+ // Shows the result.
+ println("Cluster Centers: ")
+ val centers = model.clusterCenters
+ centers.foreach(println)
+ // $example off$
+
+ spark.stop()
+ }
+}
+// scalastyle:on println
+