aboutsummaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
authorPrashant Sharma <prashant.s@imaginea.com>2013-09-06 17:53:01 +0530
committerPrashant Sharma <prashant.s@imaginea.com>2013-09-06 17:53:01 +0530
commit4106ae9fbf8a582697deba2198b3b966dec00bfe (patch)
tree7c3046faee5f62f9ec4c4176125988d7cb5d70e2 /examples
parente0dd24dc858777904335218f3001a24bffe73b27 (diff)
parent5c7494d7c1b7301138fb3dc155a1b0c961126ec6 (diff)
downloadspark-4106ae9fbf8a582697deba2198b3b966dec00bfe.tar.gz
spark-4106ae9fbf8a582697deba2198b3b966dec00bfe.tar.bz2
spark-4106ae9fbf8a582697deba2198b3b966dec00bfe.zip
Merged with master
Diffstat (limited to 'examples')
-rw-r--r--examples/pom.xml310
-rw-r--r--examples/src/main/java/org/apache/spark/examples/JavaHdfsLR.java (renamed from examples/src/main/java/spark/examples/JavaHdfsLR.java)27
-rw-r--r--examples/src/main/java/org/apache/spark/examples/JavaKMeans.java (renamed from examples/src/main/java/spark/examples/JavaKMeans.java)31
-rw-r--r--examples/src/main/java/org/apache/spark/examples/JavaLogQuery.java (renamed from examples/src/main/java/spark/examples/JavaLogQuery.java)29
-rw-r--r--examples/src/main/java/org/apache/spark/examples/JavaPageRank.java115
-rw-r--r--examples/src/main/java/org/apache/spark/examples/JavaSparkPi.java (renamed from examples/src/main/java/spark/examples/JavaSparkPi.java)27
-rw-r--r--examples/src/main/java/org/apache/spark/examples/JavaTC.java (renamed from examples/src/main/java/spark/examples/JavaTC.java)25
-rw-r--r--examples/src/main/java/org/apache/spark/examples/JavaWordCount.java (renamed from examples/src/main/java/spark/examples/JavaWordCount.java)31
-rw-r--r--examples/src/main/java/org/apache/spark/mllib/examples/JavaALS.java87
-rw-r--r--examples/src/main/java/org/apache/spark/mllib/examples/JavaKMeans.java81
-rw-r--r--examples/src/main/java/org/apache/spark/mllib/examples/JavaLR.java85
-rw-r--r--examples/src/main/java/org/apache/spark/streaming/examples/JavaFlumeEventCount.java (renamed from examples/src/main/java/spark/streaming/examples/JavaFlumeEventCount.java)27
-rw-r--r--examples/src/main/java/org/apache/spark/streaming/examples/JavaNetworkWordCount.java (renamed from examples/src/main/java/spark/streaming/examples/JavaNetworkWordCount.java)33
-rw-r--r--examples/src/main/java/org/apache/spark/streaming/examples/JavaQueueStream.java (renamed from examples/src/main/java/spark/streaming/examples/JavaQueueStream.java)33
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala50
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/CassandraTest.scala (renamed from examples/src/main/scala/spark/examples/CassandraTest.scala)23
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ExceptionHandlingTest.scala38
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/GroupByTest.scala (renamed from examples/src/main/scala/spark/examples/GroupByTest.scala)23
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/HBaseTest.scala (renamed from examples/src/main/scala/spark/examples/HBaseTest.scala)27
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/HdfsTest.scala37
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/LocalALS.scala (renamed from examples/src/main/scala/spark/examples/LocalALS.scala)19
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala (renamed from examples/src/main/scala/spark/examples/LocalFileLR.scala)21
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala (renamed from examples/src/main/scala/spark/examples/LocalKMeans.scala)23
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/LocalLR.scala (renamed from examples/src/main/scala/spark/examples/LocalLR.scala)21
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/LocalPi.scala34
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/LogQuery.scala (renamed from examples/src/main/scala/spark/examples/LogQuery.scala)23
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala53
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/SimpleSkewedGroupByTest.scala (renamed from examples/src/main/scala/spark/examples/SimpleSkewedGroupByTest.scala)23
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/SkewedGroupByTest.scala (renamed from examples/src/main/scala/spark/examples/SkewedGroupByTest.scala)25
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/SparkALS.scala (renamed from examples/src/main/scala/spark/examples/SparkALS.scala)21
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala (renamed from examples/src/main/scala/spark/examples/SparkHdfsLR.scala)28
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala (renamed from examples/src/main/scala/spark/examples/SparkKMeans.scala)25
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/SparkLR.scala (renamed from examples/src/main/scala/spark/examples/SparkLR.scala)23
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala63
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/SparkPi.scala43
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/SparkTC.scala (renamed from examples/src/main/scala/spark/examples/SparkTC.scala)21
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/bagel/PageRankUtils.scala124
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/bagel/WikipediaPageRank.scala101
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/bagel/WikipediaPageRankStandalone.scala220
-rw-r--r--examples/src/main/scala/org/apache/spark/streaming/examples/ActorWordCount.scala (renamed from examples/src/main/scala/spark/streaming/examples/ActorWordCount.scala)35
-rw-r--r--examples/src/main/scala/org/apache/spark/streaming/examples/FlumeEventCount.scala (renamed from examples/src/main/scala/spark/streaming/examples/FlumeEventCount.scala)25
-rw-r--r--examples/src/main/scala/org/apache/spark/streaming/examples/HdfsWordCount.scala (renamed from examples/src/main/scala/spark/streaming/examples/HdfsWordCount.scala)25
-rw-r--r--examples/src/main/scala/org/apache/spark/streaming/examples/KafkaWordCount.scala (renamed from examples/src/main/scala/spark/streaming/examples/KafkaWordCount.scala)31
-rw-r--r--examples/src/main/scala/org/apache/spark/streaming/examples/NetworkWordCount.scala (renamed from examples/src/main/scala/spark/streaming/examples/NetworkWordCount.scala)25
-rw-r--r--examples/src/main/scala/org/apache/spark/streaming/examples/QueueStream.scala (renamed from examples/src/main/scala/spark/streaming/examples/QueueStream.scala)25
-rw-r--r--examples/src/main/scala/org/apache/spark/streaming/examples/RawNetworkGrep.scala (renamed from examples/src/main/scala/spark/streaming/examples/RawNetworkGrep.scala)29
-rw-r--r--examples/src/main/scala/org/apache/spark/streaming/examples/StatefulNetworkWordCount.scala (renamed from examples/src/main/scala/spark/streaming/examples/StatefulNetworkWordCount.scala)25
-rw-r--r--examples/src/main/scala/org/apache/spark/streaming/examples/TwitterAlgebirdCMS.scala (renamed from examples/src/main/scala/spark/streaming/examples/TwitterAlgebirdCMS.scala)27
-rw-r--r--examples/src/main/scala/org/apache/spark/streaming/examples/TwitterAlgebirdHLL.scala (renamed from examples/src/main/scala/spark/streaming/examples/TwitterAlgebirdHLL.scala)25
-rw-r--r--examples/src/main/scala/org/apache/spark/streaming/examples/TwitterPopularTags.scala (renamed from examples/src/main/scala/spark/streaming/examples/TwitterPopularTags.scala)23
-rw-r--r--examples/src/main/scala/org/apache/spark/streaming/examples/ZeroMQWordCount.scala (renamed from examples/src/main/scala/spark/streaming/examples/ZeroMQWordCount.scala)27
-rw-r--r--examples/src/main/scala/org/apache/spark/streaming/examples/clickstream/PageViewGenerator.scala (renamed from examples/src/main/scala/spark/streaming/examples/clickstream/PageViewGenerator.scala)23
-rw-r--r--examples/src/main/scala/org/apache/spark/streaming/examples/clickstream/PageViewStream.scala (renamed from examples/src/main/scala/spark/streaming/examples/clickstream/PageViewStream.scala)31
-rw-r--r--examples/src/main/scala/spark/examples/BroadcastTest.scala33
-rw-r--r--examples/src/main/scala/spark/examples/ExceptionHandlingTest.scala21
-rw-r--r--examples/src/main/scala/spark/examples/HdfsTest.scala20
-rw-r--r--examples/src/main/scala/spark/examples/LocalPi.scala17
-rw-r--r--examples/src/main/scala/spark/examples/MultiBroadcastTest.scala36
-rw-r--r--examples/src/main/scala/spark/examples/SparkPi.scala27
59 files changed, 2086 insertions, 494 deletions
diff --git a/examples/pom.xml b/examples/pom.xml
index 64e68711bd..740d3be242 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -1,23 +1,75 @@
<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one or more
+ ~ contributor license agreements. See the NOTICE file distributed with
+ ~ this work for additional information regarding copyright ownership.
+ ~ The ASF licenses this file to You under the Apache License, Version 2.0
+ ~ (the "License"); you may not use this file except in compliance with
+ ~ the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
- <groupId>org.spark-project</groupId>
+ <groupId>org.apache.spark</groupId>
<artifactId>spark-parent</artifactId>
<version>0.8.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
- <groupId>org.spark-project</groupId>
+ <groupId>org.apache.spark</groupId>
<artifactId>spark-examples</artifactId>
<packaging>jar</packaging>
<name>Spark Project Examples</name>
- <url>http://spark-project.org/</url>
+ <url>http://spark.incubator.apache.org/</url>
<dependencies>
<dependency>
- <groupId>org.scala-lang</groupId>
- <artifactId>scala-library</artifactId>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-core</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-streaming</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-mllib</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-bagel</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase</artifactId>
+ <version>0.94.6</version>
+ <exclusions>
+ <exclusion>
+ <groupId>asm</groupId>
+ <artifactId>asm</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.jboss.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ </exclusions>
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>
@@ -38,182 +90,100 @@
<artifactId>scalacheck_${scala.version}</artifactId>
<scope>test</scope>
</dependency>
- <dependency>
- <groupId>org.apache.cassandra</groupId>
- <artifactId>cassandra-all</artifactId>
- <version>1.2.5</version>
- <exclusions>
- <exclusion>
- <groupId>com.google.guava</groupId>
- <artifactId>guava</artifactId>
- </exclusion>
- <exclusion>
- <groupId>com.googlecode.concurrentlinkedhashmap</groupId>
- <artifactId>concurrentlinkedhashmap-lru</artifactId>
- </exclusion>
- <exclusion>
- <groupId>com.ning</groupId>
- <artifactId>compress-lzf</artifactId>
- </exclusion>
- <exclusion>
- <groupId>io.netty</groupId>
- <artifactId>netty</artifactId>
- </exclusion>
- <exclusion>
- <groupId>jline</groupId>
- <artifactId>jline</artifactId>
- </exclusion>
- <exclusion>
- <groupId>log4j</groupId>
- <artifactId>log4j</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.apache.cassandra.deps</groupId>
- <artifactId>avro</artifactId>
- </exclusion>
- </exclusions>
- </dependency>
+ <dependency>
+ <groupId>org.apache.cassandra</groupId>
+ <artifactId>cassandra-all</artifactId>
+ <version>1.2.5</version>
+ <exclusions>
+ <exclusion>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.googlecode.concurrentlinkedhashmap</groupId>
+ <artifactId>concurrentlinkedhashmap-lru</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.ning</groupId>
+ <artifactId>compress-lzf</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>io.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>jline</groupId>
+ <artifactId>jline</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.cassandra.deps</groupId>
+ <artifactId>avro</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
</dependencies>
- <build>
- <outputDirectory>target/scala-${scala.version}/classes</outputDirectory>
- <testOutputDirectory>target/scala-${scala.version}/test-classes</testOutputDirectory>
- <plugins>
- <plugin>
- <groupId>org.scalatest</groupId>
- <artifactId>scalatest-maven-plugin</artifactId>
- </plugin>
- </plugins>
- </build>
<profiles>
<profile>
- <id>hadoop1</id>
- <dependencies>
- <dependency>
- <groupId>org.spark-project</groupId>
- <artifactId>spark-core</artifactId>
- <version>${project.version}</version>
- <classifier>hadoop1</classifier>
- </dependency>
- <dependency>
- <groupId>org.spark-project</groupId>
- <artifactId>spark-streaming</artifactId>
- <version>${project.version}</version>
- <classifier>hadoop1</classifier>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-core</artifactId>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase</artifactId>
- <version>0.94.6</version>
- </dependency>
- </dependencies>
- <build>
- <plugins>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-jar-plugin</artifactId>
- <configuration>
- <classifier>hadoop1</classifier>
- </configuration>
- </plugin>
- </plugins>
- </build>
- </profile>
- <profile>
- <id>hadoop2</id>
- <dependencies>
- <dependency>
- <groupId>org.spark-project</groupId>
- <artifactId>spark-core</artifactId>
- <version>${project.version}</version>
- <classifier>hadoop2</classifier>
- </dependency>
- <dependency>
- <groupId>org.spark-project</groupId>
- <artifactId>spark-streaming</artifactId>
- <version>${project.version}</version>
- <classifier>hadoop2</classifier>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-core</artifactId>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-client</artifactId>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase</artifactId>
- <version>0.94.6</version>
- </dependency>
- </dependencies>
- <build>
- <plugins>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-jar-plugin</artifactId>
- <configuration>
- <classifier>hadoop2</classifier>
- </configuration>
- </plugin>
- </plugins>
- </build>
- </profile>
- <profile>
<id>hadoop2-yarn</id>
<dependencies>
<dependency>
- <groupId>org.spark-project</groupId>
- <artifactId>spark-core</artifactId>
- <version>${project.version}</version>
- <classifier>hadoop2-yarn</classifier>
- </dependency>
- <dependency>
- <groupId>org.spark-project</groupId>
- <artifactId>spark-streaming</artifactId>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-yarn</artifactId>
<version>${project.version}</version>
- <classifier>hadoop2-yarn</classifier>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-client</artifactId>
<scope>provided</scope>
</dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-yarn-api</artifactId>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-yarn-common</artifactId>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase</artifactId>
- <version>0.94.6</version>
- </dependency>
</dependencies>
- <build>
- <plugins>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-jar-plugin</artifactId>
- <configuration>
- <classifier>hadoop2-yarn</classifier>
- </configuration>
- </plugin>
- </plugins>
- </build>
</profile>
</profiles>
+
+ <build>
+ <outputDirectory>target/scala-${scala.version}/classes</outputDirectory>
+ <testOutputDirectory>target/scala-${scala.version}/test-classes</testOutputDirectory>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-shade-plugin</artifactId>
+ <configuration>
+ <shadedArtifactAttached>false</shadedArtifactAttached>
+ <outputFile>${project.build.directory}/scala-${scala.version}/${project.artifactId}-assembly-${project.version}.jar</outputFile>
+ <artifactSet>
+ <includes>
+ <include>*:*</include>
+ </includes>
+ </artifactSet>
+ <filters>
+ <filter>
+ <artifact>*:*</artifact>
+ <excludes>
+ <exclude>META-INF/*.SF</exclude>
+ <exclude>META-INF/*.DSA</exclude>
+ <exclude>META-INF/*.RSA</exclude>
+ </excludes>
+ </filter>
+ </filters>
+ </configuration>
+ <executions>
+ <execution>
+ <phase>package</phase>
+ <goals>
+ <goal>shade</goal>
+ </goals>
+ <configuration>
+ <transformers>
+ <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
+ <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
+ <resource>reference.conf</resource>
+ </transformer>
+ </transformers>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
</project>
diff --git a/examples/src/main/java/spark/examples/JavaHdfsLR.java b/examples/src/main/java/org/apache/spark/examples/JavaHdfsLR.java
index 8b0a9b6808..be0d38589c 100644
--- a/examples/src/main/java/spark/examples/JavaHdfsLR.java
+++ b/examples/src/main/java/org/apache/spark/examples/JavaHdfsLR.java
@@ -1,9 +1,26 @@
-package spark.examples;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples;
-import spark.api.java.JavaRDD;
-import spark.api.java.JavaSparkContext;
-import spark.api.java.function.Function;
-import spark.api.java.function.Function2;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.api.java.function.Function2;
import java.io.Serializable;
import java.util.Arrays;
diff --git a/examples/src/main/java/spark/examples/JavaKMeans.java b/examples/src/main/java/org/apache/spark/examples/JavaKMeans.java
index 626034eb0d..5a6afe7eae 100644
--- a/examples/src/main/java/spark/examples/JavaKMeans.java
+++ b/examples/src/main/java/org/apache/spark/examples/JavaKMeans.java
@@ -1,12 +1,29 @@
-package spark.examples;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples;
import scala.Tuple2;
-import spark.api.java.JavaPairRDD;
-import spark.api.java.JavaRDD;
-import spark.api.java.JavaSparkContext;
-import spark.api.java.function.Function;
-import spark.api.java.function.PairFunction;
-import spark.util.Vector;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.api.java.function.PairFunction;
+import org.apache.spark.util.Vector;
import java.util.List;
import java.util.Map;
diff --git a/examples/src/main/java/spark/examples/JavaLogQuery.java b/examples/src/main/java/org/apache/spark/examples/JavaLogQuery.java
index 6b22e7120c..152f029213 100644
--- a/examples/src/main/java/spark/examples/JavaLogQuery.java
+++ b/examples/src/main/java/org/apache/spark/examples/JavaLogQuery.java
@@ -1,13 +1,30 @@
-package spark.examples;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples;
import com.google.common.collect.Lists;
import scala.Tuple2;
import scala.Tuple3;
-import spark.api.java.JavaPairRDD;
-import spark.api.java.JavaRDD;
-import spark.api.java.JavaSparkContext;
-import spark.api.java.function.Function2;
-import spark.api.java.function.PairFunction;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function2;
+import org.apache.spark.api.java.function.PairFunction;
import java.io.Serializable;
import java.util.Collections;
diff --git a/examples/src/main/java/org/apache/spark/examples/JavaPageRank.java b/examples/src/main/java/org/apache/spark/examples/JavaPageRank.java
new file mode 100644
index 0000000000..c5603a639b
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/JavaPageRank.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples;
+
+import scala.Tuple2;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.FlatMapFunction;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.api.java.function.Function2;
+import org.apache.spark.api.java.function.PairFlatMapFunction;
+import org.apache.spark.api.java.function.PairFunction;
+
+import java.util.List;
+import java.util.ArrayList;
+
+/**
+ * Computes the PageRank of URLs from an input file. Input file should
+ * be in format of:
+ * URL neighbor URL
+ * URL neighbor URL
+ * URL neighbor URL
+ * ...
+ * where URL and their neighbors are separated by space(s).
+ */
+public class JavaPageRank {
+ private static class Sum extends Function2<Double, Double, Double> {
+ @Override
+ public Double call(Double a, Double b) {
+ return a + b;
+ }
+ }
+
+ public static void main(String[] args) throws Exception {
+ if (args.length < 3) {
+ System.err.println("Usage: JavaPageRank <master> <file> <number_of_iterations>");
+ System.exit(1);
+ }
+
+ JavaSparkContext ctx = new JavaSparkContext(args[0], "JavaPageRank",
+ System.getenv("SPARK_HOME"), System.getenv("SPARK_EXAMPLES_JAR"));
+
+ // Loads in input file. It should be in format of:
+ // URL neighbor URL
+ // URL neighbor URL
+ // URL neighbor URL
+ // ...
+ JavaRDD<String> lines = ctx.textFile(args[1], 1);
+
+ // Loads all URLs from input file and initialize their neighbors.
+ JavaPairRDD<String, List<String>> links = lines.map(new PairFunction<String, String, String>() {
+ @Override
+ public Tuple2<String, String> call(String s) {
+ String[] parts = s.split("\\s+");
+ return new Tuple2<String, String>(parts[0], parts[1]);
+ }
+ }).distinct().groupByKey().cache();
+
+ // Loads all URLs with other URL(s) link to from input file and initialize ranks of them to one.
+ JavaPairRDD<String, Double> ranks = links.mapValues(new Function<List<String>, Double>() {
+ @Override
+ public Double call(List<String> rs) throws Exception {
+ return 1.0;
+ }
+ });
+
+ // Calculates and updates URL ranks continuously using PageRank algorithm.
+ for (int current = 0; current < Integer.parseInt(args[2]); current++) {
+ // Calculates URL contributions to the rank of other URLs.
+ JavaPairRDD<String, Double> contribs = links.join(ranks).values()
+ .flatMap(new PairFlatMapFunction<Tuple2<List<String>, Double>, String, Double>() {
+ @Override
+ public Iterable<Tuple2<String, Double>> call(Tuple2<List<String>, Double> s) {
+ List<Tuple2<String, Double>> results = new ArrayList<Tuple2<String, Double>>();
+ for (String n : s._1) {
+ results.add(new Tuple2<String, Double>(n, s._2 / s._1.size()));
+ }
+ return results;
+ }
+ });
+
+ // Re-calculates URL ranks based on neighbor contributions.
+ ranks = contribs.reduceByKey(new Sum()).mapValues(new Function<Double, Double>() {
+ @Override
+ public Double call(Double sum) throws Exception {
+ return 0.15 + sum * 0.85;
+ }
+ });
+ }
+
+ // Collects all URL ranks and dump them to console.
+ List<Tuple2<String, Double>> output = ranks.collect();
+ for (Tuple2 tuple : output) {
+ System.out.println(tuple._1 + " has rank: " + tuple._2 + ".");
+ }
+
+ System.exit(0);
+ }
+}
diff --git a/examples/src/main/java/spark/examples/JavaSparkPi.java b/examples/src/main/java/org/apache/spark/examples/JavaSparkPi.java
index a15a967de8..4a2380caf5 100644
--- a/examples/src/main/java/spark/examples/JavaSparkPi.java
+++ b/examples/src/main/java/org/apache/spark/examples/JavaSparkPi.java
@@ -1,9 +1,26 @@
-package spark.examples;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
-import spark.api.java.JavaRDD;
-import spark.api.java.JavaSparkContext;
-import spark.api.java.function.Function;
-import spark.api.java.function.Function2;
+package org.apache.spark.examples;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.api.java.function.Function2;
import java.util.ArrayList;
import java.util.List;
diff --git a/examples/src/main/java/spark/examples/JavaTC.java b/examples/src/main/java/org/apache/spark/examples/JavaTC.java
index b319bdab44..17f21f6b77 100644
--- a/examples/src/main/java/spark/examples/JavaTC.java
+++ b/examples/src/main/java/org/apache/spark/examples/JavaTC.java
@@ -1,9 +1,26 @@
-package spark.examples;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples;
import scala.Tuple2;
-import spark.api.java.JavaPairRDD;
-import spark.api.java.JavaSparkContext;
-import spark.api.java.function.PairFunction;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.PairFunction;
import java.util.ArrayList;
import java.util.HashSet;
diff --git a/examples/src/main/java/spark/examples/JavaWordCount.java b/examples/src/main/java/org/apache/spark/examples/JavaWordCount.java
index 9d4c7a252d..07d32ad659 100644
--- a/examples/src/main/java/spark/examples/JavaWordCount.java
+++ b/examples/src/main/java/org/apache/spark/examples/JavaWordCount.java
@@ -1,12 +1,29 @@
-package spark.examples;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples;
import scala.Tuple2;
-import spark.api.java.JavaPairRDD;
-import spark.api.java.JavaRDD;
-import spark.api.java.JavaSparkContext;
-import spark.api.java.function.FlatMapFunction;
-import spark.api.java.function.Function2;
-import spark.api.java.function.PairFunction;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.FlatMapFunction;
+import org.apache.spark.api.java.function.Function2;
+import org.apache.spark.api.java.function.PairFunction;
import java.util.Arrays;
import java.util.List;
diff --git a/examples/src/main/java/org/apache/spark/mllib/examples/JavaALS.java b/examples/src/main/java/org/apache/spark/mllib/examples/JavaALS.java
new file mode 100644
index 0000000000..628cb892b6
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/mllib/examples/JavaALS.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.examples;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+
+import org.apache.spark.mllib.recommendation.ALS;
+import org.apache.spark.mllib.recommendation.MatrixFactorizationModel;
+import org.apache.spark.mllib.recommendation.Rating;
+
+import java.io.Serializable;
+import java.util.Arrays;
+import java.util.StringTokenizer;
+
+import scala.Tuple2;
+
+/**
+ * Example using MLLib ALS from Java.
+ */
+public class JavaALS {
+
+ static class ParseRating extends Function<String, Rating> {
+ public Rating call(String line) {
+ StringTokenizer tok = new StringTokenizer(line, ",");
+ int x = Integer.parseInt(tok.nextToken());
+ int y = Integer.parseInt(tok.nextToken());
+ double rating = Double.parseDouble(tok.nextToken());
+ return new Rating(x, y, rating);
+ }
+ }
+
+ static class FeaturesToString extends Function<Tuple2<Object, double[]>, String> {
+ public String call(Tuple2<Object, double[]> element) {
+ return element._1().toString() + "," + Arrays.toString(element._2());
+ }
+ }
+
+ public static void main(String[] args) {
+
+ if (args.length != 5 && args.length != 6) {
+ System.err.println(
+ "Usage: JavaALS <master> <ratings_file> <rank> <iterations> <output_dir> [<blocks>]");
+ System.exit(1);
+ }
+
+ int rank = Integer.parseInt(args[2]);
+ int iterations = Integer.parseInt(args[3]);
+ String outputDir = args[4];
+ int blocks = -1;
+ if (args.length == 6) {
+ blocks = Integer.parseInt(args[5]);
+ }
+
+ JavaSparkContext sc = new JavaSparkContext(args[0], "JavaALS",
+ System.getenv("SPARK_HOME"), System.getenv("SPARK_EXAMPLES_JAR"));
+ JavaRDD<String> lines = sc.textFile(args[1]);
+
+ JavaRDD<Rating> ratings = lines.map(new ParseRating());
+
+ MatrixFactorizationModel model = ALS.train(ratings.rdd(), rank, iterations, 0.01, blocks);
+
+ model.userFeatures().toJavaRDD().map(new FeaturesToString()).saveAsTextFile(
+ outputDir + "/userFeatures");
+ model.productFeatures().toJavaRDD().map(new FeaturesToString()).saveAsTextFile(
+ outputDir + "/productFeatures");
+ System.out.println("Final user/product features written to " + outputDir);
+
+ System.exit(0);
+ }
+}
diff --git a/examples/src/main/java/org/apache/spark/mllib/examples/JavaKMeans.java b/examples/src/main/java/org/apache/spark/mllib/examples/JavaKMeans.java
new file mode 100644
index 0000000000..cd59a139b9
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/mllib/examples/JavaKMeans.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.examples;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+
+import org.apache.spark.mllib.clustering.KMeans;
+import org.apache.spark.mllib.clustering.KMeansModel;
+
+import java.util.Arrays;
+import java.util.StringTokenizer;
+
+/**
+ * Example using MLLib KMeans from Java.
+ */
+public class JavaKMeans {
+
+ static class ParsePoint extends Function<String, double[]> {
+ public double[] call(String line) {
+ StringTokenizer tok = new StringTokenizer(line, " ");
+ int numTokens = tok.countTokens();
+ double[] point = new double[numTokens];
+ for (int i = 0; i < numTokens; ++i) {
+ point[i] = Double.parseDouble(tok.nextToken());
+ }
+ return point;
+ }
+ }
+
+ public static void main(String[] args) {
+
+ if (args.length < 4) {
+ System.err.println(
+ "Usage: JavaKMeans <master> <input_file> <k> <max_iterations> [<runs>]");
+ System.exit(1);
+ }
+
+ String inputFile = args[1];
+ int k = Integer.parseInt(args[2]);
+ int iterations = Integer.parseInt(args[3]);
+ int runs = 1;
+
+ if (args.length >= 5) {
+ runs = Integer.parseInt(args[4]);
+ }
+
+ JavaSparkContext sc = new JavaSparkContext(args[0], "JavaKMeans",
+ System.getenv("SPARK_HOME"), System.getenv("SPARK_EXAMPLES_JAR"));
+ JavaRDD<String> lines = sc.textFile(args[1]);
+
+ JavaRDD<double[]> points = lines.map(new ParsePoint());
+
+ KMeansModel model = KMeans.train(points.rdd(), k, iterations, runs);
+
+ System.out.println("Cluster centers:");
+ for (double[] center : model.clusterCenters()) {
+ System.out.println(" " + Arrays.toString(center));
+ }
+ double cost = model.computeCost(points.rdd());
+ System.out.println("Cost: " + cost);
+
+ System.exit(0);
+ }
+}
diff --git a/examples/src/main/java/org/apache/spark/mllib/examples/JavaLR.java b/examples/src/main/java/org/apache/spark/mllib/examples/JavaLR.java
new file mode 100644
index 0000000000..258061c8e6
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/mllib/examples/JavaLR.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.examples;
+
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+
+import org.apache.spark.mllib.classification.LogisticRegressionWithSGD;
+import org.apache.spark.mllib.classification.LogisticRegressionModel;
+import org.apache.spark.mllib.regression.LabeledPoint;
+
+import java.util.Arrays;
+import java.util.StringTokenizer;
+
+/**
+ * Logistic regression based classification using ML Lib.
+ */
+public class JavaLR {
+
+ static class ParsePoint extends Function<String, LabeledPoint> {
+ public LabeledPoint call(String line) {
+ String[] parts = line.split(",");
+ double y = Double.parseDouble(parts[0]);
+ StringTokenizer tok = new StringTokenizer(parts[1], " ");
+ int numTokens = tok.countTokens();
+ double[] x = new double[numTokens];
+ for (int i = 0; i < numTokens; ++i) {
+ x[i] = Double.parseDouble(tok.nextToken());
+ }
+ return new LabeledPoint(y, x);
+ }
+ }
+
+ public static void printWeights(double[] a) {
+ System.out.println(Arrays.toString(a));
+ }
+
+ public static void main(String[] args) {
+ if (args.length != 4) {
+ System.err.println("Usage: JavaLR <master> <input_dir> <step_size> <niters>");
+ System.exit(1);
+ }
+
+ JavaSparkContext sc = new JavaSparkContext(args[0], "JavaLR",
+ System.getenv("SPARK_HOME"), System.getenv("SPARK_EXAMPLES_JAR"));
+ JavaRDD<String> lines = sc.textFile(args[1]);
+ JavaRDD<LabeledPoint> points = lines.map(new ParsePoint()).cache();
+ double stepSize = Double.parseDouble(args[2]);
+ int iterations = Integer.parseInt(args[3]);
+
+ // Another way to configure LogisticRegression
+ //
+ // LogisticRegressionWithSGD lr = new LogisticRegressionWithSGD();
+ // lr.optimizer().setNumIterations(iterations)
+ // .setStepSize(stepSize)
+ // .setMiniBatchFraction(1.0);
+ // lr.setIntercept(true);
+ // LogisticRegressionModel model = lr.train(points.rdd());
+
+ LogisticRegressionModel model = LogisticRegressionWithSGD.train(points.rdd(),
+ iterations, stepSize);
+
+ System.out.print("Final w: ");
+ printWeights(model.weights());
+
+ System.exit(0);
+ }
+}
diff --git a/examples/src/main/java/spark/streaming/examples/JavaFlumeEventCount.java b/examples/src/main/java/org/apache/spark/streaming/examples/JavaFlumeEventCount.java
index e24c6ddaa7..261813bf2f 100644
--- a/examples/src/main/java/spark/streaming/examples/JavaFlumeEventCount.java
+++ b/examples/src/main/java/org/apache/spark/streaming/examples/JavaFlumeEventCount.java
@@ -1,9 +1,26 @@
-package spark.streaming.examples;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.examples;
-import spark.api.java.function.Function;
-import spark.streaming.*;
-import spark.streaming.api.java.*;
-import spark.streaming.dstream.SparkFlumeEvent;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.streaming.*;
+import org.apache.spark.streaming.api.java.*;
+import org.apache.spark.streaming.dstream.SparkFlumeEvent;
/**
* Produces a count of events received from Flume.
diff --git a/examples/src/main/java/spark/streaming/examples/JavaNetworkWordCount.java b/examples/src/main/java/org/apache/spark/streaming/examples/JavaNetworkWordCount.java
index 3e57580fd4..def87c199b 100644
--- a/examples/src/main/java/spark/streaming/examples/JavaNetworkWordCount.java
+++ b/examples/src/main/java/org/apache/spark/streaming/examples/JavaNetworkWordCount.java
@@ -1,14 +1,31 @@
-package spark.streaming.examples;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.examples;
import com.google.common.collect.Lists;
import scala.Tuple2;
-import spark.api.java.function.FlatMapFunction;
-import spark.api.java.function.Function2;
-import spark.api.java.function.PairFunction;
-import spark.streaming.Duration;
-import spark.streaming.api.java.JavaDStream;
-import spark.streaming.api.java.JavaPairDStream;
-import spark.streaming.api.java.JavaStreamingContext;
+import org.apache.spark.api.java.function.FlatMapFunction;
+import org.apache.spark.api.java.function.Function2;
+import org.apache.spark.api.java.function.PairFunction;
+import org.apache.spark.streaming.Duration;
+import org.apache.spark.streaming.api.java.JavaDStream;
+import org.apache.spark.streaming.api.java.JavaPairDStream;
+import org.apache.spark.streaming.api.java.JavaStreamingContext;
/**
* Counts words in UTF8 encoded, '\n' delimited text received from the network every second.
diff --git a/examples/src/main/java/spark/streaming/examples/JavaQueueStream.java b/examples/src/main/java/org/apache/spark/streaming/examples/JavaQueueStream.java
index 15b82c8da1..c8c7389dd1 100644
--- a/examples/src/main/java/spark/streaming/examples/JavaQueueStream.java
+++ b/examples/src/main/java/org/apache/spark/streaming/examples/JavaQueueStream.java
@@ -1,14 +1,31 @@
-package spark.streaming.examples;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.examples;
import com.google.common.collect.Lists;
import scala.Tuple2;
-import spark.api.java.JavaRDD;
-import spark.api.java.function.Function2;
-import spark.api.java.function.PairFunction;
-import spark.streaming.Duration;
-import spark.streaming.api.java.JavaDStream;
-import spark.streaming.api.java.JavaPairDStream;
-import spark.streaming.api.java.JavaStreamingContext;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.function.Function2;
+import org.apache.spark.api.java.function.PairFunction;
+import org.apache.spark.streaming.Duration;
+import org.apache.spark.streaming.api.java.JavaDStream;
+import org.apache.spark.streaming.api.java.JavaPairDStream;
+import org.apache.spark.streaming.api.java.JavaStreamingContext;
import java.util.LinkedList;
import java.util.List;
diff --git a/examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala b/examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala
new file mode 100644
index 0000000000..868ff81f67
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples
+
+import org.apache.spark.SparkContext
+
+object BroadcastTest {
+ def main(args: Array[String]) {
+ if (args.length == 0) {
+ System.err.println("Usage: BroadcastTest <master> [<slices>] [numElem]")
+ System.exit(1)
+ }
+
+ val sc = new SparkContext(args(0), "Broadcast Test",
+ System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
+ val slices = if (args.length > 1) args(1).toInt else 2
+ val num = if (args.length > 2) args(2).toInt else 1000000
+
+ var arr1 = new Array[Int](num)
+ for (i <- 0 until arr1.length) {
+ arr1(i) = i
+ }
+
+ for (i <- 0 until 2) {
+ println("Iteration " + i)
+ println("===========")
+ val barr1 = sc.broadcast(arr1)
+ sc.parallelize(1 to 10, slices).foreach {
+ i => println(barr1.value.size)
+ }
+ }
+
+ System.exit(0)
+ }
+}
diff --git a/examples/src/main/scala/spark/examples/CassandraTest.scala b/examples/src/main/scala/org/apache/spark/examples/CassandraTest.scala
index 0fe1833e83..33bf7151a7 100644
--- a/examples/src/main/scala/spark/examples/CassandraTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/CassandraTest.scala
@@ -1,12 +1,29 @@
-package spark.examples
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples
import org.apache.hadoop.mapreduce.Job
import org.apache.cassandra.hadoop.ColumnFamilyOutputFormat
import org.apache.cassandra.hadoop.ConfigHelper
import org.apache.cassandra.hadoop.ColumnFamilyInputFormat
import org.apache.cassandra.thrift._
-import spark.SparkContext
-import spark.SparkContext._
+import org.apache.spark.SparkContext
+import org.apache.spark.SparkContext._
import java.nio.ByteBuffer
import java.util.SortedMap
import org.apache.cassandra.db.IColumn
diff --git a/examples/src/main/scala/org/apache/spark/examples/ExceptionHandlingTest.scala b/examples/src/main/scala/org/apache/spark/examples/ExceptionHandlingTest.scala
new file mode 100644
index 0000000000..92eb96bd8e
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ExceptionHandlingTest.scala
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples
+
+import org.apache.spark.SparkContext
+
+object ExceptionHandlingTest {
+ def main(args: Array[String]) {
+ if (args.length == 0) {
+ System.err.println("Usage: ExceptionHandlingTest <master>")
+ System.exit(1)
+ }
+
+ val sc = new SparkContext(args(0), "ExceptionHandlingTest",
+ System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
+ sc.parallelize(0 until sc.defaultParallelism).foreach { i =>
+ if (math.random > 0.75)
+ throw new Exception("Testing exception handling")
+ }
+
+ System.exit(0)
+ }
+}
diff --git a/examples/src/main/scala/spark/examples/GroupByTest.scala b/examples/src/main/scala/org/apache/spark/examples/GroupByTest.scala
index a6603653f1..42c2e0e8e1 100644
--- a/examples/src/main/scala/spark/examples/GroupByTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/GroupByTest.scala
@@ -1,7 +1,24 @@
-package spark.examples
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
-import spark.SparkContext
-import spark.SparkContext._
+package org.apache.spark.examples
+
+import org.apache.spark.SparkContext
+import org.apache.spark.SparkContext._
import java.util.Random
object GroupByTest {
diff --git a/examples/src/main/scala/spark/examples/HBaseTest.scala b/examples/src/main/scala/org/apache/spark/examples/HBaseTest.scala
index 6e910154d4..efe2e93b0d 100644
--- a/examples/src/main/scala/spark/examples/HBaseTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/HBaseTest.scala
@@ -1,7 +1,24 @@
-package spark.examples
-
-import spark._
-import spark.rdd.NewHadoopRDD
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples
+
+import org.apache.spark._
+import org.apache.spark.rdd.NewHadoopRDD
import org.apache.hadoop.hbase.{HBaseConfiguration, HTableDescriptor}
import org.apache.hadoop.hbase.client.HBaseAdmin
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
@@ -32,4 +49,4 @@ object HBaseTest {
System.exit(0)
}
-} \ No newline at end of file
+}
diff --git a/examples/src/main/scala/org/apache/spark/examples/HdfsTest.scala b/examples/src/main/scala/org/apache/spark/examples/HdfsTest.scala
new file mode 100644
index 0000000000..d6a88d3032
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/HdfsTest.scala
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples
+
+import org.apache.spark._
+
+object HdfsTest {
+ def main(args: Array[String]) {
+ val sc = new SparkContext(args(0), "HdfsTest",
+ System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
+ val file = sc.textFile(args(1))
+ val mapped = file.map(s => s.length).cache()
+ for (iter <- 1 to 10) {
+ val start = System.currentTimeMillis()
+ for (x <- mapped) { x + 2 }
+ // println("Processing: " + x)
+ val end = System.currentTimeMillis()
+ println("Iteration " + iter + " took " + (end-start) + " ms")
+ }
+ System.exit(0)
+ }
+}
diff --git a/examples/src/main/scala/spark/examples/LocalALS.scala b/examples/src/main/scala/org/apache/spark/examples/LocalALS.scala
index 2de810e062..4af45b2b4a 100644
--- a/examples/src/main/scala/spark/examples/LocalALS.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LocalALS.scala
@@ -1,4 +1,21 @@
-package spark.examples
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples
import scala.math.sqrt
import cern.jet.math._
diff --git a/examples/src/main/scala/spark/examples/LocalFileLR.scala b/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala
index f958ef9f72..fb130ea198 100644
--- a/examples/src/main/scala/spark/examples/LocalFileLR.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala
@@ -1,7 +1,24 @@
-package spark.examples
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples
import java.util.Random
-import spark.util.Vector
+import org.apache.spark.util.Vector
object LocalFileLR {
val D = 10 // Numer of dimensions
diff --git a/examples/src/main/scala/spark/examples/LocalKMeans.scala b/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala
index 4849f216fb..f90ea35cd4 100644
--- a/examples/src/main/scala/spark/examples/LocalKMeans.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala
@@ -1,8 +1,25 @@
-package spark.examples
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples
import java.util.Random
-import spark.util.Vector
-import spark.SparkContext._
+import org.apache.spark.util.Vector
+import org.apache.spark.SparkContext._
import scala.collection.mutable.HashMap
import scala.collection.mutable.HashSet
diff --git a/examples/src/main/scala/spark/examples/LocalLR.scala b/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala
index cd73f553d6..cd4e9f1af0 100644
--- a/examples/src/main/scala/spark/examples/LocalLR.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala
@@ -1,7 +1,24 @@
-package spark.examples
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples
import java.util.Random
-import spark.util.Vector
+import org.apache.spark.util.Vector
/**
* Logistic regression based classification.
diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalPi.scala b/examples/src/main/scala/org/apache/spark/examples/LocalPi.scala
new file mode 100644
index 0000000000..bb7f22ec8d
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/LocalPi.scala
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples
+
+import scala.math.random
+import org.apache.spark._
+import SparkContext._
+
+object LocalPi {
+ def main(args: Array[String]) {
+ var count = 0
+ for (i <- 1 to 100000) {
+ val x = random * 2 - 1
+ val y = random * 2 - 1
+ if (x*x + y*y < 1) count += 1
+ }
+ println("Pi is roughly " + 4 * count / 100000.0)
+ }
+}
diff --git a/examples/src/main/scala/spark/examples/LogQuery.scala b/examples/src/main/scala/org/apache/spark/examples/LogQuery.scala
index 6497596d35..17ff3ce764 100644
--- a/examples/src/main/scala/spark/examples/LogQuery.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LogQuery.scala
@@ -1,7 +1,24 @@
-package spark.examples
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples
-import spark.SparkContext
-import spark.SparkContext._
+import org.apache.spark.SparkContext
+import org.apache.spark.SparkContext._
/**
* Executes a roll up-style query against Apache logs.
*/
diff --git a/examples/src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala b/examples/src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala
new file mode 100644
index 0000000000..f79f0142b8
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples
+
+import org.apache.spark.SparkContext
+
+object MultiBroadcastTest {
+ def main(args: Array[String]) {
+ if (args.length == 0) {
+ System.err.println("Usage: BroadcastTest <master> [<slices>] [numElem]")
+ System.exit(1)
+ }
+
+ val sc = new SparkContext(args(0), "Broadcast Test",
+ System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
+
+ val slices = if (args.length > 1) args(1).toInt else 2
+ val num = if (args.length > 2) args(2).toInt else 1000000
+
+ var arr1 = new Array[Int](num)
+ for (i <- 0 until arr1.length) {
+ arr1(i) = i
+ }
+
+ var arr2 = new Array[Int](num)
+ for (i <- 0 until arr2.length) {
+ arr2(i) = i
+ }
+
+ val barr1 = sc.broadcast(arr1)
+ val barr2 = sc.broadcast(arr2)
+ sc.parallelize(1 to 10, slices).foreach {
+ i => println(barr1.value.size + barr2.value.size)
+ }
+
+ System.exit(0)
+ }
+}
diff --git a/examples/src/main/scala/spark/examples/SimpleSkewedGroupByTest.scala b/examples/src/main/scala/org/apache/spark/examples/SimpleSkewedGroupByTest.scala
index 461b84a2c6..37ddfb5db7 100644
--- a/examples/src/main/scala/spark/examples/SimpleSkewedGroupByTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SimpleSkewedGroupByTest.scala
@@ -1,7 +1,24 @@
-package spark.examples
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
-import spark.SparkContext
-import spark.SparkContext._
+package org.apache.spark.examples
+
+import org.apache.spark.SparkContext
+import org.apache.spark.SparkContext._
import java.util.Random
object SimpleSkewedGroupByTest {
diff --git a/examples/src/main/scala/spark/examples/SkewedGroupByTest.scala b/examples/src/main/scala/org/apache/spark/examples/SkewedGroupByTest.scala
index 435675f9de..9c954b2b5b 100644
--- a/examples/src/main/scala/spark/examples/SkewedGroupByTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SkewedGroupByTest.scala
@@ -1,7 +1,24 @@
-package spark.examples
-
-import spark.SparkContext
-import spark.SparkContext._
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples
+
+import org.apache.spark.SparkContext
+import org.apache.spark.SparkContext._
import java.util.Random
object SkewedGroupByTest {
diff --git a/examples/src/main/scala/spark/examples/SparkALS.scala b/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala
index 8fb3b0fb2a..814944ba1c 100644
--- a/examples/src/main/scala/spark/examples/SparkALS.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala
@@ -1,10 +1,27 @@
-package spark.examples
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples
import scala.math.sqrt
import cern.jet.math._
import cern.colt.matrix._
import cern.colt.matrix.linalg._
-import spark._
+import org.apache.spark._
/**
* Alternating least squares matrix factorization.
diff --git a/examples/src/main/scala/spark/examples/SparkHdfsLR.scala b/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala
index 3d080a0257..646682878f 100644
--- a/examples/src/main/scala/spark/examples/SparkHdfsLR.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala
@@ -1,11 +1,27 @@
-package spark.examples
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples
import java.util.Random
import scala.math.exp
-import spark.util.Vector
-import spark._
-import spark.deploy.SparkHadoopUtil
-import spark.scheduler.InputFormatInfo
+import org.apache.spark.util.Vector
+import org.apache.spark._
+import org.apache.spark.scheduler.InputFormatInfo
/**
* Logistic regression based classification.
@@ -35,7 +51,7 @@ object SparkHdfsLR {
System.exit(1)
}
val inputPath = args(1)
- val conf = SparkHadoopUtil.newConfiguration()
+ val conf = SparkEnv.get.hadoop.newConfiguration()
val sc = new SparkContext(args(0), "SparkHdfsLR",
System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")), Map(),
InputFormatInfo.computePreferredLocations(
diff --git a/examples/src/main/scala/spark/examples/SparkKMeans.scala b/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala
index 4161c59fea..f7bf75b4e5 100644
--- a/examples/src/main/scala/spark/examples/SparkKMeans.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala
@@ -1,9 +1,26 @@
-package spark.examples
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples
import java.util.Random
-import spark.SparkContext
-import spark.util.Vector
-import spark.SparkContext._
+import org.apache.spark.SparkContext
+import org.apache.spark.util.Vector
+import org.apache.spark.SparkContext._
import scala.collection.mutable.HashMap
import scala.collection.mutable.HashSet
diff --git a/examples/src/main/scala/spark/examples/SparkLR.scala b/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala
index 2f41aeb376..9ed9fe4d76 100644
--- a/examples/src/main/scala/spark/examples/SparkLR.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala
@@ -1,9 +1,26 @@
-package spark.examples
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples
import java.util.Random
import scala.math.exp
-import spark.util.Vector
-import spark._
+import org.apache.spark.util.Vector
+import org.apache.spark._
/**
* Logistic regression based classification.
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala b/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala
new file mode 100644
index 0000000000..a508c0df57
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples
+
+import org.apache.spark.SparkContext._
+import org.apache.spark.SparkContext
+
+
+/**
+ * Computes the PageRank of URLs from an input file. Input file should
+ * be in format of:
+ * URL neighbor URL
+ * URL neighbor URL
+ * URL neighbor URL
+ * ...
+ * where URL and their neighbors are separated by space(s).
+ */
+object SparkPageRank {
+ def main(args: Array[String]) {
+ if (args.length < 3) {
+ System.err.println("Usage: PageRank <master> <file> <number_of_iterations>")
+ System.exit(1)
+ }
+ var iters = args(2).toInt
+ val ctx = new SparkContext(args(0), "PageRank",
+ System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
+ val lines = ctx.textFile(args(1), 1)
+ val links = lines.map{ s =>
+ val parts = s.split("\\s+")
+ (parts(0), parts(1))
+ }.distinct().groupByKey().cache()
+ var ranks = links.mapValues(v => 1.0)
+
+ for (i <- 1 to iters) {
+ val contribs = links.join(ranks).values.flatMap{ case (urls, rank) =>
+ val size = urls.size
+ urls.map(url => (url, rank / size))
+ }
+ ranks = contribs.reduceByKey(_ + _).mapValues(0.15 + 0.85 * _)
+ }
+
+ val output = ranks.collect()
+ output.foreach(tup => println(tup._1 + " has rank: " + tup._2 + "."))
+
+ System.exit(0)
+ }
+}
+
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkPi.scala b/examples/src/main/scala/org/apache/spark/examples/SparkPi.scala
new file mode 100644
index 0000000000..5a2bc9b0d0
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkPi.scala
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples
+
+import scala.math.random
+import org.apache.spark._
+import SparkContext._
+
+/** Computes an approximation to pi */
+object SparkPi {
+ def main(args: Array[String]) {
+ if (args.length == 0) {
+ System.err.println("Usage: SparkPi <master> [<slices>]")
+ System.exit(1)
+ }
+ val spark = new SparkContext(args(0), "SparkPi",
+ System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
+ val slices = if (args.length > 1) args(1).toInt else 2
+ val n = 100000 * slices
+ val count = spark.parallelize(1 to n, slices).map { i =>
+ val x = random * 2 - 1
+ val y = random * 2 - 1
+ if (x*x + y*y < 1) 1 else 0
+ }.reduce(_ + _)
+ println("Pi is roughly " + 4.0 * count / n)
+ System.exit(0)
+ }
+}
diff --git a/examples/src/main/scala/spark/examples/SparkTC.scala b/examples/src/main/scala/org/apache/spark/examples/SparkTC.scala
index 911ae8f168..5a7a9d1bd8 100644
--- a/examples/src/main/scala/spark/examples/SparkTC.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkTC.scala
@@ -1,6 +1,23 @@
-package spark.examples
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples
-import spark._
+import org.apache.spark._
import SparkContext._
import scala.util.Random
import scala.collection.mutable
diff --git a/examples/src/main/scala/org/apache/spark/examples/bagel/PageRankUtils.scala b/examples/src/main/scala/org/apache/spark/examples/bagel/PageRankUtils.scala
new file mode 100644
index 0000000000..cfafbaf23e
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/bagel/PageRankUtils.scala
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.bagel
+
+import org.apache.spark._
+import org.apache.spark.SparkContext._
+import org.apache.spark.serializer.KryoRegistrator
+
+import org.apache.spark.bagel._
+import org.apache.spark.bagel.Bagel._
+
+import scala.collection.mutable.ArrayBuffer
+
+import java.io.{InputStream, OutputStream, DataInputStream, DataOutputStream}
+
+import com.esotericsoftware.kryo._
+
+class PageRankUtils extends Serializable {
+ def computeWithCombiner(numVertices: Long, epsilon: Double)(
+ self: PRVertex, messageSum: Option[Double], superstep: Int
+ ): (PRVertex, Array[PRMessage]) = {
+ val newValue = messageSum match {
+ case Some(msgSum) if msgSum != 0 =>
+ 0.15 / numVertices + 0.85 * msgSum
+ case _ => self.value
+ }
+
+ val terminate = superstep >= 10
+
+ val outbox: Array[PRMessage] =
+ if (!terminate)
+ self.outEdges.map(targetId =>
+ new PRMessage(targetId, newValue / self.outEdges.size))
+ else
+ Array[PRMessage]()
+
+ (new PRVertex(newValue, self.outEdges, !terminate), outbox)
+ }
+
+ def computeNoCombiner(numVertices: Long, epsilon: Double)(self: PRVertex, messages: Option[Array[PRMessage]], superstep: Int): (PRVertex, Array[PRMessage]) =
+ computeWithCombiner(numVertices, epsilon)(self, messages match {
+ case Some(msgs) => Some(msgs.map(_.value).sum)
+ case None => None
+ }, superstep)
+}
+
+class PRCombiner extends Combiner[PRMessage, Double] with Serializable {
+ def createCombiner(msg: PRMessage): Double =
+ msg.value
+ def mergeMsg(combiner: Double, msg: PRMessage): Double =
+ combiner + msg.value
+ def mergeCombiners(a: Double, b: Double): Double =
+ a + b
+}
+
+class PRVertex() extends Vertex with Serializable {
+ var value: Double = _
+ var outEdges: Array[String] = _
+ var active: Boolean = _
+
+ def this(value: Double, outEdges: Array[String], active: Boolean = true) {
+ this()
+ this.value = value
+ this.outEdges = outEdges
+ this.active = active
+ }
+
+ override def toString(): String = {
+ "PRVertex(value=%f, outEdges.length=%d, active=%s)".format(value, outEdges.length, active.toString)
+ }
+}
+
+class PRMessage() extends Message[String] with Serializable {
+ var targetId: String = _
+ var value: Double = _
+
+ def this(targetId: String, value: Double) {
+ this()
+ this.targetId = targetId
+ this.value = value
+ }
+}
+
+class PRKryoRegistrator extends KryoRegistrator {
+ def registerClasses(kryo: Kryo) {
+ kryo.register(classOf[PRVertex])
+ kryo.register(classOf[PRMessage])
+ }
+}
+
+class CustomPartitioner(partitions: Int) extends Partitioner {
+ def numPartitions = partitions
+
+ def getPartition(key: Any): Int = {
+ val hash = key match {
+ case k: Long => (k & 0x00000000FFFFFFFFL).toInt
+ case _ => key.hashCode
+ }
+
+ val mod = key.hashCode % partitions
+ if (mod < 0) mod + partitions else mod
+ }
+
+ override def equals(other: Any): Boolean = other match {
+ case c: CustomPartitioner =>
+ c.numPartitions == numPartitions
+ case _ => false
+ }
+}
diff --git a/examples/src/main/scala/org/apache/spark/examples/bagel/WikipediaPageRank.scala b/examples/src/main/scala/org/apache/spark/examples/bagel/WikipediaPageRank.scala
new file mode 100644
index 0000000000..72b5c7b88e
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/bagel/WikipediaPageRank.scala
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.bagel
+
+import org.apache.spark._
+import org.apache.spark.SparkContext._
+
+import org.apache.spark.bagel._
+import org.apache.spark.bagel.Bagel._
+
+import scala.xml.{XML,NodeSeq}
+
+/**
+ * Run PageRank on XML Wikipedia dumps from http://wiki.freebase.com/wiki/WEX. Uses the "articles"
+ * files from there, which contains one line per wiki article in a tab-separated format
+ * (http://wiki.freebase.com/wiki/WEX/Documentation#articles).
+ */
+object WikipediaPageRank {
+ def main(args: Array[String]) {
+ if (args.length < 5) {
+ System.err.println("Usage: WikipediaPageRank <inputFile> <threshold> <numPartitions> <host> <usePartitioner>")
+ System.exit(-1)
+ }
+
+ System.setProperty("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
+ System.setProperty("spark.kryo.registrator", classOf[PRKryoRegistrator].getName)
+
+ val inputFile = args(0)
+ val threshold = args(1).toDouble
+ val numPartitions = args(2).toInt
+ val host = args(3)
+ val usePartitioner = args(4).toBoolean
+ val sc = new SparkContext(host, "WikipediaPageRank")
+
+ // Parse the Wikipedia page data into a graph
+ val input = sc.textFile(inputFile)
+
+ println("Counting vertices...")
+ val numVertices = input.count()
+ println("Done counting vertices.")
+
+ println("Parsing input file...")
+ var vertices = input.map(line => {
+ val fields = line.split("\t")
+ val (title, body) = (fields(1), fields(3).replace("\\n", "\n"))
+ val links =
+ if (body == "\\N")
+ NodeSeq.Empty
+ else
+ try {
+ XML.loadString(body) \\ "link" \ "target"
+ } catch {
+ case e: org.xml.sax.SAXParseException =>
+ System.err.println("Article \""+title+"\" has malformed XML in body:\n"+body)
+ NodeSeq.Empty
+ }
+ val outEdges = links.map(link => new String(link.text)).toArray
+ val id = new String(title)
+ (id, new PRVertex(1.0 / numVertices, outEdges))
+ })
+ if (usePartitioner)
+ vertices = vertices.partitionBy(new HashPartitioner(sc.defaultParallelism)).cache
+ else
+ vertices = vertices.cache
+ println("Done parsing input file.")
+
+ // Do the computation
+ val epsilon = 0.01 / numVertices
+ val messages = sc.parallelize(Array[(String, PRMessage)]())
+ val utils = new PageRankUtils
+ val result =
+ Bagel.run(
+ sc, vertices, messages, combiner = new PRCombiner(),
+ numPartitions = numPartitions)(
+ utils.computeWithCombiner(numVertices, epsilon))
+
+ // Print the result
+ System.err.println("Articles with PageRank >= "+threshold+":")
+ val top =
+ (result
+ .filter { case (id, vertex) => vertex.value >= threshold }
+ .map { case (id, vertex) => "%s\t%s\n".format(id, vertex.value) }
+ .collect.mkString)
+ println(top)
+ }
+}
diff --git a/examples/src/main/scala/org/apache/spark/examples/bagel/WikipediaPageRankStandalone.scala b/examples/src/main/scala/org/apache/spark/examples/bagel/WikipediaPageRankStandalone.scala
new file mode 100644
index 0000000000..ddf6855325
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/bagel/WikipediaPageRankStandalone.scala
@@ -0,0 +1,220 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.bagel
+
+import java.io.{InputStream, OutputStream, DataInputStream, DataOutputStream}
+import java.nio.ByteBuffer
+
+import scala.collection.mutable.ArrayBuffer
+import scala.xml.{XML, NodeSeq}
+
+import org.apache.spark._
+import org.apache.spark.serializer.{DeserializationStream, SerializationStream, SerializerInstance}
+import org.apache.spark.SparkContext._
+import org.apache.spark.rdd.RDD
+
+object WikipediaPageRankStandalone {
+ def main(args: Array[String]) {
+ if (args.length < 5) {
+ System.err.println("Usage: WikipediaPageRankStandalone <inputFile> <threshold> <numIterations> <host> <usePartitioner>")
+ System.exit(-1)
+ }
+
+ System.setProperty("spark.serializer", "spark.bagel.examples.WPRSerializer")
+
+ val inputFile = args(0)
+ val threshold = args(1).toDouble
+ val numIterations = args(2).toInt
+ val host = args(3)
+ val usePartitioner = args(4).toBoolean
+ val sc = new SparkContext(host, "WikipediaPageRankStandalone")
+
+ val input = sc.textFile(inputFile)
+ val partitioner = new HashPartitioner(sc.defaultParallelism)
+ val links =
+ if (usePartitioner)
+ input.map(parseArticle _).partitionBy(partitioner).cache()
+ else
+ input.map(parseArticle _).cache()
+ val n = links.count()
+ val defaultRank = 1.0 / n
+ val a = 0.15
+
+ // Do the computation
+ val startTime = System.currentTimeMillis
+ val ranks =
+ pageRank(links, numIterations, defaultRank, a, n, partitioner, usePartitioner, sc.defaultParallelism)
+
+ // Print the result
+ System.err.println("Articles with PageRank >= "+threshold+":")
+ val top =
+ (ranks
+ .filter { case (id, rank) => rank >= threshold }
+ .map { case (id, rank) => "%s\t%s\n".format(id, rank) }
+ .collect().mkString)
+ println(top)
+
+ val time = (System.currentTimeMillis - startTime) / 1000.0
+ println("Completed %d iterations in %f seconds: %f seconds per iteration"
+ .format(numIterations, time, time / numIterations))
+ System.exit(0)
+ }
+
+ def parseArticle(line: String): (String, Array[String]) = {
+ val fields = line.split("\t")
+ val (title, body) = (fields(1), fields(3).replace("\\n", "\n"))
+ val id = new String(title)
+ val links =
+ if (body == "\\N")
+ NodeSeq.Empty
+ else
+ try {
+ XML.loadString(body) \\ "link" \ "target"
+ } catch {
+ case e: org.xml.sax.SAXParseException =>
+ System.err.println("Article \""+title+"\" has malformed XML in body:\n"+body)
+ NodeSeq.Empty
+ }
+ val outEdges = links.map(link => new String(link.text)).toArray
+ (id, outEdges)
+ }
+
+ def pageRank(
+ links: RDD[(String, Array[String])],
+ numIterations: Int,
+ defaultRank: Double,
+ a: Double,
+ n: Long,
+ partitioner: Partitioner,
+ usePartitioner: Boolean,
+ numPartitions: Int
+ ): RDD[(String, Double)] = {
+ var ranks = links.mapValues { edges => defaultRank }
+ for (i <- 1 to numIterations) {
+ val contribs = links.groupWith(ranks).flatMap {
+ case (id, (linksWrapper, rankWrapper)) =>
+ if (linksWrapper.length > 0) {
+ if (rankWrapper.length > 0) {
+ linksWrapper(0).map(dest => (dest, rankWrapper(0) / linksWrapper(0).size))
+ } else {
+ linksWrapper(0).map(dest => (dest, defaultRank / linksWrapper(0).size))
+ }
+ } else {
+ Array[(String, Double)]()
+ }
+ }
+ ranks = (contribs.combineByKey((x: Double) => x,
+ (x: Double, y: Double) => x + y,
+ (x: Double, y: Double) => x + y,
+ partitioner)
+ .mapValues(sum => a/n + (1-a)*sum))
+ }
+ ranks
+ }
+}
+
+class WPRSerializer extends org.apache.spark.serializer.Serializer {
+ def newInstance(): SerializerInstance = new WPRSerializerInstance()
+}
+
+class WPRSerializerInstance extends SerializerInstance {
+ def serialize[T](t: T): ByteBuffer = {
+ throw new UnsupportedOperationException()
+ }
+
+ def deserialize[T](bytes: ByteBuffer): T = {
+ throw new UnsupportedOperationException()
+ }
+
+ def deserialize[T](bytes: ByteBuffer, loader: ClassLoader): T = {
+ throw new UnsupportedOperationException()
+ }
+
+ def serializeStream(s: OutputStream): SerializationStream = {
+ new WPRSerializationStream(s)
+ }
+
+ def deserializeStream(s: InputStream): DeserializationStream = {
+ new WPRDeserializationStream(s)
+ }
+}
+
+class WPRSerializationStream(os: OutputStream) extends SerializationStream {
+ val dos = new DataOutputStream(os)
+
+ def writeObject[T](t: T): SerializationStream = t match {
+ case (id: String, wrapper: ArrayBuffer[_]) => wrapper(0) match {
+ case links: Array[String] => {
+ dos.writeInt(0) // links
+ dos.writeUTF(id)
+ dos.writeInt(links.length)
+ for (link <- links) {
+ dos.writeUTF(link)
+ }
+ this
+ }
+ case rank: Double => {
+ dos.writeInt(1) // rank
+ dos.writeUTF(id)
+ dos.writeDouble(rank)
+ this
+ }
+ }
+ case (id: String, rank: Double) => {
+ dos.writeInt(2) // rank without wrapper
+ dos.writeUTF(id)
+ dos.writeDouble(rank)
+ this
+ }
+ }
+
+ def flush() { dos.flush() }
+ def close() { dos.close() }
+}
+
+class WPRDeserializationStream(is: InputStream) extends DeserializationStream {
+ val dis = new DataInputStream(is)
+
+ def readObject[T](): T = {
+ val typeId = dis.readInt()
+ typeId match {
+ case 0 => {
+ val id = dis.readUTF()
+ val numLinks = dis.readInt()
+ val links = new Array[String](numLinks)
+ for (i <- 0 until numLinks) {
+ val link = dis.readUTF()
+ links(i) = link
+ }
+ (id, ArrayBuffer(links)).asInstanceOf[T]
+ }
+ case 1 => {
+ val id = dis.readUTF()
+ val rank = dis.readDouble()
+ (id, ArrayBuffer(rank)).asInstanceOf[T]
+ }
+ case 2 => {
+ val id = dis.readUTF()
+ val rank = dis.readDouble()
+ (id, rank).asInstanceOf[T]
+ }
+ }
+ }
+
+ def close() { dis.close() }
+}
diff --git a/examples/src/main/scala/spark/streaming/examples/ActorWordCount.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/ActorWordCount.scala
index deb1c8511a..13aa24fa1a 100644
--- a/examples/src/main/scala/spark/streaming/examples/ActorWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/ActorWordCount.scala
@@ -1,4 +1,21 @@
-package spark.streaming.examples
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.examples
import scala.collection.mutable.LinkedList
import scala.util.Random
@@ -9,11 +26,11 @@ import akka.actor.ActorRef
import akka.actor.Props
import akka.actor.actorRef2Scala
-import spark.streaming.Seconds
-import spark.streaming.StreamingContext
-import spark.streaming.StreamingContext.toPairDStreamFunctions
-import spark.streaming.receivers.Receiver
-import spark.util.AkkaUtils
+import org.apache.spark.streaming.Seconds
+import org.apache.spark.streaming.StreamingContext
+import org.apache.spark.streaming.StreamingContext.toPairDStreamFunctions
+import org.apache.spark.streaming.receivers.Receiver
+import org.apache.spark.util.AkkaUtils
case class SubscribeReceiver(receiverActor: ActorRef)
case class UnsubscribeReceiver(receiverActor: ActorRef)
@@ -64,7 +81,7 @@ class FeederActor extends Actor {
* goes and subscribe to a typical publisher/feeder actor and receives
* data.
*
- * @see [[spark.streaming.examples.FeederActor]]
+ * @see [[org.apache.spark.streaming.examples.FeederActor]]
*/
class SampleActorReceiver[T: ClassTag](urlOfPublisher: String)
extends Actor with Receiver {
@@ -116,9 +133,9 @@ object FeederActor {
* <hostname> and <port> describe the AkkaSystem that Spark Sample feeder is running on.
*
* To run this example locally, you may run Feeder Actor as
- * `$ ./run spark.streaming.examples.FeederActor 127.0.1.1 9999`
+ * `$ ./run-example spark.streaming.examples.FeederActor 127.0.1.1 9999`
* and then run the example
- * `$ ./run spark.streaming.examples.ActorWordCount local[2] 127.0.1.1 9999`
+ * `$ ./run-example spark.streaming.examples.ActorWordCount local[2] 127.0.1.1 9999`
*/
object ActorWordCount {
def main(args: Array[String]) {
diff --git a/examples/src/main/scala/spark/streaming/examples/FlumeEventCount.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/FlumeEventCount.scala
index 39c76fd98a..9f6e163454 100644
--- a/examples/src/main/scala/spark/streaming/examples/FlumeEventCount.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/FlumeEventCount.scala
@@ -1,8 +1,25 @@
-package spark.streaming.examples
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.examples
-import spark.util.IntParam
-import spark.storage.StorageLevel
-import spark.streaming._
+import org.apache.spark.util.IntParam
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming._
/**
* Produces a count of events received from Flume.
diff --git a/examples/src/main/scala/spark/streaming/examples/HdfsWordCount.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/HdfsWordCount.scala
index 9389f8a38d..bc8564b3ba 100644
--- a/examples/src/main/scala/spark/streaming/examples/HdfsWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/HdfsWordCount.scala
@@ -1,7 +1,24 @@
-package spark.streaming.examples
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.examples
-import spark.streaming.{Seconds, StreamingContext}
-import spark.streaming.StreamingContext._
+import org.apache.spark.streaming.{Seconds, StreamingContext}
+import org.apache.spark.streaming.StreamingContext._
/**
@@ -11,7 +28,7 @@ import spark.streaming.StreamingContext._
* <directory> is the directory that Spark Streaming will use to find and read new text files.
*
* To run this on your local machine on directory `localdir`, run this example
- * `$ ./run spark.streaming.examples.HdfsWordCount local[2] localdir`
+ * `$ ./run-example spark.streaming.examples.HdfsWordCount local[2] localdir`
* Then create a text file in `localdir` and the words in the file will get counted.
*/
object HdfsWordCount {
diff --git a/examples/src/main/scala/spark/streaming/examples/KafkaWordCount.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/KafkaWordCount.scala
index 9202e65e09..12f939d5a7 100644
--- a/examples/src/main/scala/spark/streaming/examples/KafkaWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/KafkaWordCount.scala
@@ -1,14 +1,31 @@
-package spark.streaming.examples
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.examples
import java.util.Properties
import kafka.message.Message
import kafka.producer.SyncProducerConfig
import kafka.producer._
-import spark.SparkContext
-import spark.streaming._
-import spark.streaming.StreamingContext._
-import spark.storage.StorageLevel
-import spark.streaming.util.RawTextHelper._
+import org.apache.spark.SparkContext
+import org.apache.spark.streaming._
+import org.apache.spark.streaming.StreamingContext._
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.util.RawTextHelper._
/**
* Consumes messages from one or more topics in Kafka and does wordcount.
@@ -20,7 +37,7 @@ import spark.streaming.util.RawTextHelper._
* <numThreads> is the number of threads the kafka consumer should use
*
* Example:
- * `./run spark.streaming.examples.KafkaWordCount local[2] zoo01,zoo02,zoo03 my-consumer-group topic1,topic2 1`
+ * `./run-example spark.streaming.examples.KafkaWordCount local[2] zoo01,zoo02,zoo03 my-consumer-group topic1,topic2 1`
*/
object KafkaWordCount {
def main(args: Array[String]) {
diff --git a/examples/src/main/scala/spark/streaming/examples/NetworkWordCount.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/NetworkWordCount.scala
index 704540c2bf..e2487dca5f 100644
--- a/examples/src/main/scala/spark/streaming/examples/NetworkWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/NetworkWordCount.scala
@@ -1,7 +1,24 @@
-package spark.streaming.examples
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.examples
-import spark.streaming.{Seconds, StreamingContext}
-import spark.streaming.StreamingContext._
+import org.apache.spark.streaming.{Seconds, StreamingContext}
+import org.apache.spark.streaming.StreamingContext._
/**
* Counts words in UTF8 encoded, '\n' delimited text received from the network every second.
@@ -12,7 +29,7 @@ import spark.streaming.StreamingContext._
* To run this on your local machine, you need to first run a Netcat server
* `$ nc -lk 9999`
* and then run the example
- * `$ ./run spark.streaming.examples.NetworkWordCount local[2] localhost 9999`
+ * `$ ./run-example spark.streaming.examples.NetworkWordCount local[2] localhost 9999`
*/
object NetworkWordCount {
def main(args: Array[String]) {
diff --git a/examples/src/main/scala/spark/streaming/examples/QueueStream.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/QueueStream.scala
index f450e21040..fad512eeba 100644
--- a/examples/src/main/scala/spark/streaming/examples/QueueStream.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/QueueStream.scala
@@ -1,8 +1,25 @@
-package spark.streaming.examples
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
-import spark.RDD
-import spark.streaming.{Seconds, StreamingContext}
-import spark.streaming.StreamingContext._
+package org.apache.spark.streaming.examples
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.streaming.{Seconds, StreamingContext}
+import org.apache.spark.streaming.StreamingContext._
import scala.collection.mutable.SynchronizedQueue
diff --git a/examples/src/main/scala/spark/streaming/examples/RawNetworkGrep.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/RawNetworkGrep.scala
index 175281e095..0b45c30d20 100644
--- a/examples/src/main/scala/spark/streaming/examples/RawNetworkGrep.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/RawNetworkGrep.scala
@@ -1,17 +1,34 @@
-package spark.streaming.examples
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.examples
-import spark.util.IntParam
-import spark.storage.StorageLevel
+import org.apache.spark.util.IntParam
+import org.apache.spark.storage.StorageLevel
-import spark.streaming._
-import spark.streaming.util.RawTextHelper
+import org.apache.spark.streaming._
+import org.apache.spark.streaming.util.RawTextHelper
/**
* Receives text from multiple rawNetworkStreams and counts how many '\n' delimited
* lines have the word 'the' in them. This is useful for benchmarking purposes. This
* will only work with spark.streaming.util.RawTextSender running on all worker nodes
* and with Spark using Kryo serialization (set Java property "spark.serializer" to
- * "spark.KryoSerializer").
+ * "org.apache.spark.serializer.KryoSerializer").
* Usage: RawNetworkGrep <master> <numStreams> <host> <port> <batchMillis>
* <master> is the Spark master URL
* <numStream> is the number rawNetworkStreams, which should be same as number
diff --git a/examples/src/main/scala/spark/streaming/examples/StatefulNetworkWordCount.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/StatefulNetworkWordCount.scala
index 51c3c9f9b4..cb30c4edb3 100644
--- a/examples/src/main/scala/spark/streaming/examples/StatefulNetworkWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/StatefulNetworkWordCount.scala
@@ -1,7 +1,24 @@
-package spark.streaming.examples
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.examples
-import spark.streaming._
-import spark.streaming.StreamingContext._
+import org.apache.spark.streaming._
+import org.apache.spark.streaming.StreamingContext._
/**
* Counts words cumulatively in UTF8 encoded, '\n' delimited text received from the network every second.
@@ -12,7 +29,7 @@ import spark.streaming.StreamingContext._
* To run this on your local machine, you need to first run a Netcat server
* `$ nc -lk 9999`
* and then run the example
- * `$ ./run spark.streaming.examples.StatefulNetworkWordCount local[2] localhost 9999`
+ * `$ ./run-example spark.streaming.examples.StatefulNetworkWordCount local[2] localhost 9999`
*/
object StatefulNetworkWordCount {
def main(args: Array[String]) {
diff --git a/examples/src/main/scala/spark/streaming/examples/TwitterAlgebirdCMS.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterAlgebirdCMS.scala
index 528778ed72..35b6329ab3 100644
--- a/examples/src/main/scala/spark/streaming/examples/TwitterAlgebirdCMS.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterAlgebirdCMS.scala
@@ -1,10 +1,27 @@
-package spark.streaming.examples
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.examples
-import spark.streaming.{Seconds, StreamingContext}
-import spark.storage.StorageLevel
+import org.apache.spark.streaming.{Seconds, StreamingContext}
+import org.apache.spark.storage.StorageLevel
import com.twitter.algebird._
-import spark.streaming.StreamingContext._
-import spark.SparkContext._
+import org.apache.spark.streaming.StreamingContext._
+import org.apache.spark.SparkContext._
/**
* Illustrates the use of the Count-Min Sketch, from Twitter's Algebird library, to compute
diff --git a/examples/src/main/scala/spark/streaming/examples/TwitterAlgebirdHLL.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterAlgebirdHLL.scala
index 896e9fd8af..8bfde2a829 100644
--- a/examples/src/main/scala/spark/streaming/examples/TwitterAlgebirdHLL.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterAlgebirdHLL.scala
@@ -1,10 +1,27 @@
-package spark.streaming.examples
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.examples
-import spark.streaming.{Seconds, StreamingContext}
-import spark.storage.StorageLevel
+import org.apache.spark.streaming.{Seconds, StreamingContext}
+import org.apache.spark.storage.StorageLevel
import com.twitter.algebird.HyperLogLog._
import com.twitter.algebird.HyperLogLogMonoid
-import spark.streaming.dstream.TwitterInputDStream
+import org.apache.spark.streaming.dstream.TwitterInputDStream
/**
* Illustrates the use of the HyperLogLog algorithm, from Twitter's Algebird library, to compute
diff --git a/examples/src/main/scala/spark/streaming/examples/TwitterPopularTags.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterPopularTags.scala
index 65f0b6d352..27aa6b14bf 100644
--- a/examples/src/main/scala/spark/streaming/examples/TwitterPopularTags.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterPopularTags.scala
@@ -1,8 +1,25 @@
-package spark.streaming.examples
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.examples
-import spark.streaming.{Seconds, StreamingContext}
+import org.apache.spark.streaming.{Seconds, StreamingContext}
import StreamingContext._
-import spark.SparkContext._
+import org.apache.spark.SparkContext._
/**
* Calculates popular hashtags (topics) over sliding 10 and 60 second windows from a Twitter
diff --git a/examples/src/main/scala/spark/streaming/examples/ZeroMQWordCount.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/ZeroMQWordCount.scala
index 74d0d338b7..c8743b9e25 100644
--- a/examples/src/main/scala/spark/streaming/examples/ZeroMQWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/ZeroMQWordCount.scala
@@ -1,10 +1,27 @@
-package spark.streaming.examples
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.examples
import akka.actor.ActorSystem
import akka.actor.actorRef2Scala
import akka.zeromq._
-import spark.streaming.{ Seconds, StreamingContext }
-import spark.streaming.StreamingContext._
+import org.apache.spark.streaming.{ Seconds, StreamingContext }
+import org.apache.spark.streaming.StreamingContext._
import akka.zeromq.Subscribe
/**
@@ -43,9 +60,9 @@ object SimpleZeroMQPublisher {
* <zeroMQurl> and <topic> describe where zeroMq publisher is running.
*
* To run this example locally, you may run publisher as
- * `$ ./run spark.streaming.examples.SimpleZeroMQPublisher tcp://127.0.1.1:1234 foo.bar`
+ * `$ ./run-example spark.streaming.examples.SimpleZeroMQPublisher tcp://127.0.1.1:1234 foo.bar`
* and run the example as
- * `$ ./run spark.streaming.examples.ZeroMQWordCount local[2] tcp://127.0.1.1:1234 foo`
+ * `$ ./run-example spark.streaming.examples.ZeroMQWordCount local[2] tcp://127.0.1.1:1234 foo`
*/
object ZeroMQWordCount {
def main(args: Array[String]) {
diff --git a/examples/src/main/scala/spark/streaming/examples/clickstream/PageViewGenerator.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/clickstream/PageViewGenerator.scala
index 4c6e08bc74..884d6d6f34 100644
--- a/examples/src/main/scala/spark/streaming/examples/clickstream/PageViewGenerator.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/clickstream/PageViewGenerator.scala
@@ -1,4 +1,21 @@
-package spark.streaming.examples.clickstream
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.examples.clickstream
import java.net.{InetAddress,ServerSocket,Socket,SocketException}
import java.io.{InputStreamReader, BufferedReader, PrintWriter}
@@ -20,8 +37,8 @@ object PageView {
/** Generates streaming events to simulate page views on a website.
*
* This should be used in tandem with PageViewStream.scala. Example:
- * $ ./run spark.streaming.examples.clickstream.PageViewGenerator 44444 10
- * $ ./run spark.streaming.examples.clickstream.PageViewStream errorRatePerZipCode localhost 44444
+ * $ ./run-example spark.streaming.examples.clickstream.PageViewGenerator 44444 10
+ * $ ./run-example spark.streaming.examples.clickstream.PageViewStream errorRatePerZipCode localhost 44444
* */
object PageViewGenerator {
val pages = Map("http://foo.com/" -> .7,
diff --git a/examples/src/main/scala/spark/streaming/examples/clickstream/PageViewStream.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/clickstream/PageViewStream.scala
index e226a4a73a..8282cc9269 100644
--- a/examples/src/main/scala/spark/streaming/examples/clickstream/PageViewStream.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/clickstream/PageViewStream.scala
@@ -1,16 +1,33 @@
-package spark.streaming.examples.clickstream
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
-import spark.streaming.{Seconds, StreamingContext}
-import spark.streaming.StreamingContext._
-import spark.SparkContext._
+package org.apache.spark.streaming.examples.clickstream
+
+import org.apache.spark.streaming.{Seconds, StreamingContext}
+import org.apache.spark.streaming.StreamingContext._
+import org.apache.spark.SparkContext._
/** Analyses a streaming dataset of web page views. This class demonstrates several types of
* operators available in Spark streaming.
*
* This should be used in tandem with PageViewStream.scala. Example:
- * $ ./run spark.streaming.examples.clickstream.PageViewGenerator 44444 10
- * $ ./run spark.streaming.examples.clickstream.PageViewStream errorRatePerZipCode localhost 44444
- * */
+ * $ ./run-example spark.streaming.examples.clickstream.PageViewGenerator 44444 10
+ * $ ./run-example spark.streaming.examples.clickstream.PageViewStream errorRatePerZipCode localhost 44444
+ */
object PageViewStream {
def main(args: Array[String]) {
if (args.length != 3) {
diff --git a/examples/src/main/scala/spark/examples/BroadcastTest.scala b/examples/src/main/scala/spark/examples/BroadcastTest.scala
deleted file mode 100644
index ba59be1687..0000000000
--- a/examples/src/main/scala/spark/examples/BroadcastTest.scala
+++ /dev/null
@@ -1,33 +0,0 @@
-package spark.examples
-
-import spark.SparkContext
-
-object BroadcastTest {
- def main(args: Array[String]) {
- if (args.length == 0) {
- System.err.println("Usage: BroadcastTest <master> [<slices>] [numElem]")
- System.exit(1)
- }
-
- val sc = new SparkContext(args(0), "Broadcast Test",
- System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
- val slices = if (args.length > 1) args(1).toInt else 2
- val num = if (args.length > 2) args(2).toInt else 1000000
-
- var arr1 = new Array[Int](num)
- for (i <- 0 until arr1.length) {
- arr1(i) = i
- }
-
- for (i <- 0 until 2) {
- println("Iteration " + i)
- println("===========")
- val barr1 = sc.broadcast(arr1)
- sc.parallelize(1 to 10, slices).foreach {
- i => println(barr1.value.size)
- }
- }
-
- System.exit(0)
- }
-}
diff --git a/examples/src/main/scala/spark/examples/ExceptionHandlingTest.scala b/examples/src/main/scala/spark/examples/ExceptionHandlingTest.scala
deleted file mode 100644
index 21a90f2e5a..0000000000
--- a/examples/src/main/scala/spark/examples/ExceptionHandlingTest.scala
+++ /dev/null
@@ -1,21 +0,0 @@
-package spark.examples
-
-import spark.SparkContext
-
-object ExceptionHandlingTest {
- def main(args: Array[String]) {
- if (args.length == 0) {
- System.err.println("Usage: ExceptionHandlingTest <master>")
- System.exit(1)
- }
-
- val sc = new SparkContext(args(0), "ExceptionHandlingTest",
- System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
- sc.parallelize(0 until sc.defaultParallelism).foreach { i =>
- if (math.random > 0.75)
- throw new Exception("Testing exception handling")
- }
-
- System.exit(0)
- }
-}
diff --git a/examples/src/main/scala/spark/examples/HdfsTest.scala b/examples/src/main/scala/spark/examples/HdfsTest.scala
deleted file mode 100644
index dd61c467f7..0000000000
--- a/examples/src/main/scala/spark/examples/HdfsTest.scala
+++ /dev/null
@@ -1,20 +0,0 @@
-package spark.examples
-
-import spark._
-
-object HdfsTest {
- def main(args: Array[String]) {
- val sc = new SparkContext(args(0), "HdfsTest",
- System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
- val file = sc.textFile(args(1))
- val mapped = file.map(s => s.length).cache()
- for (iter <- 1 to 10) {
- val start = System.currentTimeMillis()
- for (x <- mapped) { x + 2 }
- // println("Processing: " + x)
- val end = System.currentTimeMillis()
- println("Iteration " + iter + " took " + (end-start) + " ms")
- }
- System.exit(0)
- }
-}
diff --git a/examples/src/main/scala/spark/examples/LocalPi.scala b/examples/src/main/scala/spark/examples/LocalPi.scala
deleted file mode 100644
index 9457472f2d..0000000000
--- a/examples/src/main/scala/spark/examples/LocalPi.scala
+++ /dev/null
@@ -1,17 +0,0 @@
-package spark.examples
-
-import scala.math.random
-import spark._
-import SparkContext._
-
-object LocalPi {
- def main(args: Array[String]) {
- var count = 0
- for (i <- 1 to 100000) {
- val x = random * 2 - 1
- val y = random * 2 - 1
- if (x*x + y*y < 1) count += 1
- }
- println("Pi is roughly " + 4 * count / 100000.0)
- }
-}
diff --git a/examples/src/main/scala/spark/examples/MultiBroadcastTest.scala b/examples/src/main/scala/spark/examples/MultiBroadcastTest.scala
deleted file mode 100644
index a0aaf60918..0000000000
--- a/examples/src/main/scala/spark/examples/MultiBroadcastTest.scala
+++ /dev/null
@@ -1,36 +0,0 @@
-package spark.examples
-
-import spark.SparkContext
-
-object MultiBroadcastTest {
- def main(args: Array[String]) {
- if (args.length == 0) {
- System.err.println("Usage: BroadcastTest <master> [<slices>] [numElem]")
- System.exit(1)
- }
-
- val sc = new SparkContext(args(0), "Broadcast Test",
- System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
-
- val slices = if (args.length > 1) args(1).toInt else 2
- val num = if (args.length > 2) args(2).toInt else 1000000
-
- var arr1 = new Array[Int](num)
- for (i <- 0 until arr1.length) {
- arr1(i) = i
- }
-
- var arr2 = new Array[Int](num)
- for (i <- 0 until arr2.length) {
- arr2(i) = i
- }
-
- val barr1 = sc.broadcast(arr1)
- val barr2 = sc.broadcast(arr2)
- sc.parallelize(1 to 10, slices).foreach {
- i => println(barr1.value.size + barr2.value.size)
- }
-
- System.exit(0)
- }
-}
diff --git a/examples/src/main/scala/spark/examples/SparkPi.scala b/examples/src/main/scala/spark/examples/SparkPi.scala
deleted file mode 100644
index b1c736a2cd..0000000000
--- a/examples/src/main/scala/spark/examples/SparkPi.scala
+++ /dev/null
@@ -1,27 +0,0 @@
-package spark.examples
-
-import scala.math.random
-import spark._
-import SparkContext._
-
-/** Computes an approximation to pi */
-object SparkPi {
- def main(args: Array[String]) {
- if (args.length == 0) {
- System.err.println("Usage: SparkPi <master> [<slices>]")
- System.exit(1)
- }
- val spark = new SparkContext(args(0), "SparkPi",
- System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
- val slices = if (args.length > 1) args(1).toInt else 2
- val n = 100000 * slices
- val count = spark.parallelize(1 to n, slices).map { i =>
- val x = random * 2 - 1
- val y = random * 2 - 1
- if (x*x + y*y < 1) 1 else 0
- }.reduce(_ + _)
- println("Pi is roughly " + 4.0 * count / n)
- //System.exit(0)
- spark.stop()
- }
-}