diff options
author | Xiangrui Meng <meng@databricks.com> | 2015-02-18 16:29:32 -0800 |
---|---|---|
committer | Xiangrui Meng <meng@databricks.com> | 2015-02-18 16:29:32 -0800 |
commit | d12d2ad76ee673b819c92dd8093ba0a560847761 (patch) | |
tree | 76275e2bad4ebaae604be4f7a885413c8d81b9ee /examples/src/main/java | |
parent | aa8f10e82a743d59ce87348af19c0177eb618a66 (diff) | |
download | spark-d12d2ad76ee673b819c92dd8093ba0a560847761.tar.gz spark-d12d2ad76ee673b819c92dd8093ba0a560847761.tar.bz2 spark-d12d2ad76ee673b819c92dd8093ba0a560847761.zip |
[SPARK-5879][MLLIB] update PIC user guide and add a Java example
Updated PIC user guide to reflect API changes and added a simple Java example. The API is still not very Java-friendly. I created SPARK-5990 for this issue.
Author: Xiangrui Meng <meng@databricks.com>
Closes #4680 from mengxr/SPARK-5897 and squashes the following commits:
847d216 [Xiangrui Meng] apache header
87719a2 [Xiangrui Meng] remove PIC image
2dd921f [Xiangrui Meng] update PIC user guide and add a Java example
Diffstat (limited to 'examples/src/main/java')
-rw-r--r-- | examples/src/main/java/org/apache/spark/examples/mllib/JavaPowerIterationClusteringExample.java | 58 |
1 files changed, 58 insertions, 0 deletions
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaPowerIterationClusteringExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaPowerIterationClusteringExample.java new file mode 100644 index 0000000000..e9371de39f --- /dev/null +++ b/examples/src/main/java/org/apache/spark/examples/mllib/JavaPowerIterationClusteringExample.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.examples.mllib; + +import scala.Tuple2; +import scala.Tuple3; + +import com.google.common.collect.Lists; + +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.mllib.clustering.PowerIterationClustering; +import org.apache.spark.mllib.clustering.PowerIterationClusteringModel; + +/** + * Java example for graph clustering using power iteration clustering (PIC). + */ +public class JavaPowerIterationClusteringExample { + public static void main(String[] args) { + SparkConf sparkConf = new SparkConf().setAppName("JavaPowerIterationClusteringExample"); + JavaSparkContext sc = new JavaSparkContext(sparkConf); + + @SuppressWarnings("unchecked") + JavaRDD<Tuple3<Long, Long, Double>> similarities = sc.parallelize(Lists.newArrayList( + new Tuple3<Long, Long, Double>(0L, 1L, 0.9), + new Tuple3<Long, Long, Double>(1L, 2L, 0.9), + new Tuple3<Long, Long, Double>(2L, 3L, 0.9), + new Tuple3<Long, Long, Double>(3L, 4L, 0.1), + new Tuple3<Long, Long, Double>(4L, 5L, 0.9))); + + PowerIterationClustering pic = new PowerIterationClustering() + .setK(2) + .setMaxIterations(10); + PowerIterationClusteringModel model = pic.run(similarities); + + for (Tuple2<Object, Object> assignment: model.assignments().toJavaRDD().collect()) { + System.out.println(assignment._1() + " -> " + assignment._2()); + } + + sc.stop(); + } +} |