aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala4
-rwxr-xr-xdev/audit-release/audit_release.py2
-rwxr-xr-xdev/run-tests.py2
-rw-r--r--dev/sparktestsupport/modules.py8
-rw-r--r--docs/streaming-kafka-integration.md14
-rw-r--r--docs/streaming-programming-guide.md4
-rw-r--r--examples/pom.xml2
-rw-r--r--external/kafka-0-8-assembly/pom.xml (renamed from external/kafka-assembly/pom.xml)6
-rw-r--r--external/kafka-0-8/pom.xml (renamed from external/kafka/pom.xml)6
-rw-r--r--external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/Broker.scala (renamed from external/kafka/src/main/scala/org/apache/spark/streaming/kafka/Broker.scala)0
-rw-r--r--external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/DirectKafkaInputDStream.scala (renamed from external/kafka/src/main/scala/org/apache/spark/streaming/kafka/DirectKafkaInputDStream.scala)0
-rw-r--r--external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala (renamed from external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala)0
-rw-r--r--external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaInputDStream.scala (renamed from external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaInputDStream.scala)0
-rw-r--r--external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDD.scala (renamed from external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDD.scala)0
-rw-r--r--external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDDPartition.scala (renamed from external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDDPartition.scala)0
-rw-r--r--external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaTestUtils.scala (renamed from external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaTestUtils.scala)0
-rw-r--r--external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala (renamed from external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala)0
-rw-r--r--external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/OffsetRange.scala (renamed from external/kafka/src/main/scala/org/apache/spark/streaming/kafka/OffsetRange.scala)0
-rw-r--r--external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/ReliableKafkaReceiver.scala (renamed from external/kafka/src/main/scala/org/apache/spark/streaming/kafka/ReliableKafkaReceiver.scala)0
-rw-r--r--external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/package-info.java (renamed from external/kafka/src/main/scala/org/apache/spark/streaming/kafka/package-info.java)0
-rw-r--r--external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/package.scala (renamed from external/kafka/src/main/scala/org/apache/spark/streaming/kafka/package.scala)0
-rw-r--r--external/kafka-0-8/src/test/java/org/apache/spark/streaming/kafka/JavaDirectKafkaStreamSuite.java (renamed from external/kafka/src/test/java/org/apache/spark/streaming/kafka/JavaDirectKafkaStreamSuite.java)0
-rw-r--r--external/kafka-0-8/src/test/java/org/apache/spark/streaming/kafka/JavaKafkaRDDSuite.java (renamed from external/kafka/src/test/java/org/apache/spark/streaming/kafka/JavaKafkaRDDSuite.java)0
-rw-r--r--external/kafka-0-8/src/test/java/org/apache/spark/streaming/kafka/JavaKafkaStreamSuite.java (renamed from external/kafka/src/test/java/org/apache/spark/streaming/kafka/JavaKafkaStreamSuite.java)0
-rw-r--r--external/kafka-0-8/src/test/resources/log4j.properties (renamed from external/kafka/src/test/resources/log4j.properties)0
-rw-r--r--external/kafka-0-8/src/test/scala/org/apache/spark/streaming/kafka/DirectKafkaStreamSuite.scala (renamed from external/kafka/src/test/scala/org/apache/spark/streaming/kafka/DirectKafkaStreamSuite.scala)0
-rw-r--r--external/kafka-0-8/src/test/scala/org/apache/spark/streaming/kafka/KafkaClusterSuite.scala (renamed from external/kafka/src/test/scala/org/apache/spark/streaming/kafka/KafkaClusterSuite.scala)0
-rw-r--r--external/kafka-0-8/src/test/scala/org/apache/spark/streaming/kafka/KafkaRDDSuite.scala (renamed from external/kafka/src/test/scala/org/apache/spark/streaming/kafka/KafkaRDDSuite.scala)0
-rw-r--r--external/kafka-0-8/src/test/scala/org/apache/spark/streaming/kafka/KafkaStreamSuite.scala (renamed from external/kafka/src/test/scala/org/apache/spark/streaming/kafka/KafkaStreamSuite.scala)0
-rw-r--r--external/kafka-0-8/src/test/scala/org/apache/spark/streaming/kafka/ReliableKafkaStreamSuite.scala (renamed from external/kafka/src/test/scala/org/apache/spark/streaming/kafka/ReliableKafkaStreamSuite.scala)0
-rw-r--r--pom.xml4
-rw-r--r--project/MimaBuild.scala10
-rw-r--r--project/SparkBuild.scala8
-rw-r--r--python/pyspark/streaming/kafka.py6
-rw-r--r--python/pyspark/streaming/tests.py6
35 files changed, 45 insertions, 37 deletions
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 9075e3eb3f..78606e06fb 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -955,9 +955,9 @@ private[spark] object SparkSubmitUtils {
// Add scala exclusion rule
md.addExcludeRule(createExclusion("*:scala-library:*", ivySettings, ivyConfName))
- // We need to specify each component explicitly, otherwise we miss spark-streaming-kafka and
+ // We need to specify each component explicitly, otherwise we miss spark-streaming-kafka-0-8 and
// other spark-streaming utility components. Underscore is there to differentiate between
- // spark-streaming_2.1x and spark-streaming-kafka-assembly_2.1x
+ // spark-streaming_2.1x and spark-streaming-kafka-0-8-assembly_2.1x
val components = Seq("catalyst_", "core_", "graphx_", "hive_", "mllib_", "repl_",
"sql_", "streaming_", "yarn_", "network-common_", "network-shuffle_", "network-yarn_")
diff --git a/dev/audit-release/audit_release.py b/dev/audit-release/audit_release.py
index ee72da4df0..b28e7a427b 100755
--- a/dev/audit-release/audit_release.py
+++ b/dev/audit-release/audit_release.py
@@ -116,7 +116,7 @@ original_dir = os.getcwd()
# dependencies within those projects.
modules = [
"spark-core", "spark-mllib", "spark-streaming", "spark-repl",
- "spark-graphx", "spark-streaming-flume", "spark-streaming-kafka",
+ "spark-graphx", "spark-streaming-flume", "spark-streaming-kafka-0-8",
"spark-catalyst", "spark-sql", "spark-hive", "spark-streaming-kinesis-asl"
]
modules = map(lambda m: "%s_%s" % (m, SCALA_BINARY_VERSION), modules)
diff --git a/dev/run-tests.py b/dev/run-tests.py
index 291f821c7f..7b3269752b 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -336,7 +336,7 @@ def build_spark_sbt(hadoop_version):
# Enable all of the profiles for the build:
build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags
sbt_goals = ["package",
- "streaming-kafka-assembly/assembly",
+ "streaming-kafka-0-8-assembly/assembly",
"streaming-flume-assembly/assembly",
"streaming-kinesis-asl-assembly/assembly"]
profiles_and_goals = build_profiles + sbt_goals
diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index 5640928643..f81db8e44a 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -223,14 +223,14 @@ streaming_kinesis_asl = Module(
streaming_kafka = Module(
- name="streaming-kafka",
+ name="streaming-kafka-0-8",
dependencies=[streaming],
source_file_regexes=[
- "external/kafka",
- "external/kafka-assembly",
+ "external/kafka-0-8",
+ "external/kafka-0-8-assembly",
],
sbt_test_goals=[
- "streaming-kafka/test",
+ "streaming-kafka-0-8/test",
]
)
diff --git a/docs/streaming-kafka-integration.md b/docs/streaming-kafka-integration.md
index 015a2f1fa0..0f1e32212e 100644
--- a/docs/streaming-kafka-integration.md
+++ b/docs/streaming-kafka-integration.md
@@ -14,7 +14,7 @@ Next, we discuss how to use this approach in your streaming application.
1. **Linking:** For Scala/Java applications using SBT/Maven project definitions, link your streaming application with the following artifact (see [Linking section](streaming-programming-guide.html#linking) in the main programming guide for further information).
groupId = org.apache.spark
- artifactId = spark-streaming-kafka_{{site.SCALA_BINARY_VERSION}}
+ artifactId = spark-streaming-kafka-0-8_{{site.SCALA_BINARY_VERSION}}
version = {{site.SPARK_VERSION_SHORT}}
For Python applications, you will have to add this above library and its dependencies when deploying your application. See the *Deploying* subsection below.
@@ -64,14 +64,14 @@ Next, we discuss how to use this approach in your streaming application.
3. **Deploying:** As with any Spark applications, `spark-submit` is used to launch your application. However, the details are slightly different for Scala/Java applications and Python applications.
- For Scala and Java applications, if you are using SBT or Maven for project management, then package `spark-streaming-kafka_{{site.SCALA_BINARY_VERSION}}` and its dependencies into the application JAR. Make sure `spark-core_{{site.SCALA_BINARY_VERSION}}` and `spark-streaming_{{site.SCALA_BINARY_VERSION}}` are marked as `provided` dependencies as those are already present in a Spark installation. Then use `spark-submit` to launch your application (see [Deploying section](streaming-programming-guide.html#deploying-applications) in the main programming guide).
+ For Scala and Java applications, if you are using SBT or Maven for project management, then package `spark-streaming-kafka-0-8_{{site.SCALA_BINARY_VERSION}}` and its dependencies into the application JAR. Make sure `spark-core_{{site.SCALA_BINARY_VERSION}}` and `spark-streaming_{{site.SCALA_BINARY_VERSION}}` are marked as `provided` dependencies as those are already present in a Spark installation. Then use `spark-submit` to launch your application (see [Deploying section](streaming-programming-guide.html#deploying-applications) in the main programming guide).
- For Python applications which lack SBT/Maven project management, `spark-streaming-kafka_{{site.SCALA_BINARY_VERSION}}` and its dependencies can be directly added to `spark-submit` using `--packages` (see [Application Submission Guide](submitting-applications.html)). That is,
+ For Python applications which lack SBT/Maven project management, `spark-streaming-kafka-0-8_{{site.SCALA_BINARY_VERSION}}` and its dependencies can be directly added to `spark-submit` using `--packages` (see [Application Submission Guide](submitting-applications.html)). That is,
- ./bin/spark-submit --packages org.apache.spark:spark-streaming-kafka_{{site.SCALA_BINARY_VERSION}}:{{site.SPARK_VERSION_SHORT}} ...
+ ./bin/spark-submit --packages org.apache.spark:spark-streaming-kafka-0-8_{{site.SCALA_BINARY_VERSION}}:{{site.SPARK_VERSION_SHORT}} ...
- Alternatively, you can also download the JAR of the Maven artifact `spark-streaming-kafka-assembly` from the
- [Maven repository](http://search.maven.org/#search|ga|1|a%3A%22spark-streaming-kafka-assembly_{{site.SCALA_BINARY_VERSION}}%22%20AND%20v%3A%22{{site.SPARK_VERSION_SHORT}}%22) and add it to `spark-submit` with `--jars`.
+ Alternatively, you can also download the JAR of the Maven artifact `spark-streaming-kafka-0-8-assembly` from the
+ [Maven repository](http://search.maven.org/#search|ga|1|a%3A%22spark-streaming-kafka-0-8-assembly_{{site.SCALA_BINARY_VERSION}}%22%20AND%20v%3A%22{{site.SPARK_VERSION_SHORT}}%22) and add it to `spark-submit` with `--jars`.
## Approach 2: Direct Approach (No Receivers)
This new receiver-less "direct" approach has been introduced in Spark 1.3 to ensure stronger end-to-end guarantees. Instead of using receivers to receive data, this approach periodically queries Kafka for the latest offsets in each topic+partition, and accordingly defines the offset ranges to process in each batch. When the jobs to process the data are launched, Kafka's simple consumer API is used to read the defined ranges of offsets from Kafka (similar to read files from a file system). Note that this is an experimental feature introduced in Spark 1.3 for the Scala and Java API, in Spark 1.4 for the Python API.
@@ -91,7 +91,7 @@ Next, we discuss how to use this approach in your streaming application.
1. **Linking:** This approach is supported only in Scala/Java application. Link your SBT/Maven project with the following artifact (see [Linking section](streaming-programming-guide.html#linking) in the main programming guide for further information).
groupId = org.apache.spark
- artifactId = spark-streaming-kafka_{{site.SCALA_BINARY_VERSION}}
+ artifactId = spark-streaming-kafka-0-8_{{site.SCALA_BINARY_VERSION}}
version = {{site.SPARK_VERSION_SHORT}}
2. **Programming:** In the streaming application code, import `KafkaUtils` and create an input DStream as follows.
diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md
index 7f6c0ed699..9ca9b1844c 100644
--- a/docs/streaming-programming-guide.md
+++ b/docs/streaming-programming-guide.md
@@ -416,7 +416,7 @@ some of the common ones are as follows.
<table class="table">
<tr><th>Source</th><th>Artifact</th></tr>
-<tr><td> Kafka </td><td> spark-streaming-kafka_{{site.SCALA_BINARY_VERSION}} </td></tr>
+<tr><td> Kafka </td><td> spark-streaming-kafka-0-8_{{site.SCALA_BINARY_VERSION}} </td></tr>
<tr><td> Flume </td><td> spark-streaming-flume_{{site.SCALA_BINARY_VERSION}} </td></tr>
<tr><td> Kinesis<br/></td><td>spark-streaming-kinesis-asl_{{site.SCALA_BINARY_VERSION}} [Amazon Software License] </td></tr>
<tr><td></td><td></td></tr>
@@ -1892,7 +1892,7 @@ To run a Spark Streaming applications, you need to have the following.
if your application uses [advanced sources](#advanced-sources) (e.g. Kafka, Flume),
then you will have to package the extra artifact they link to, along with their dependencies,
in the JAR that is used to deploy the application. For example, an application using `KafkaUtils`
- will have to include `spark-streaming-kafka_{{site.SCALA_BINARY_VERSION}}` and all its
+ will have to include `spark-streaming-kafka-0-8_{{site.SCALA_BINARY_VERSION}}` and all its
transitive dependencies in the application JAR.
- *Configuring sufficient memory for the executors* - Since the received data must be stored in
diff --git a/examples/pom.xml b/examples/pom.xml
index 4423d0fbe1..771da5b9a6 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -87,7 +87,7 @@
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
- <artifactId>spark-streaming-kafka_${scala.binary.version}</artifactId>
+ <artifactId>spark-streaming-kafka-0-8_${scala.binary.version}</artifactId>
<version>${project.version}</version>
<scope>provided</scope>
</dependency>
diff --git a/external/kafka-assembly/pom.xml b/external/kafka-0-8-assembly/pom.xml
index e1b5a7e713..3cc288abea 100644
--- a/external/kafka-assembly/pom.xml
+++ b/external/kafka-0-8-assembly/pom.xml
@@ -26,19 +26,19 @@
</parent>
<groupId>org.apache.spark</groupId>
- <artifactId>spark-streaming-kafka-assembly_2.11</artifactId>
+ <artifactId>spark-streaming-kafka-0-8-assembly_2.11</artifactId>
<packaging>jar</packaging>
<name>Spark Project External Kafka Assembly</name>
<url>http://spark.apache.org/</url>
<properties>
- <sbt.project.name>streaming-kafka-assembly</sbt.project.name>
+ <sbt.project.name>streaming-kafka-0-8-assembly</sbt.project.name>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
- <artifactId>spark-streaming-kafka_${scala.binary.version}</artifactId>
+ <artifactId>spark-streaming-kafka-0-8_${scala.binary.version}</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
diff --git a/external/kafka/pom.xml b/external/kafka-0-8/pom.xml
index 68d52e9339..cccfda3c61 100644
--- a/external/kafka/pom.xml
+++ b/external/kafka-0-8/pom.xml
@@ -26,12 +26,12 @@
</parent>
<groupId>org.apache.spark</groupId>
- <artifactId>spark-streaming-kafka_2.11</artifactId>
+ <artifactId>spark-streaming-kafka-0-8_2.11</artifactId>
<properties>
- <sbt.project.name>streaming-kafka</sbt.project.name>
+ <sbt.project.name>streaming-kafka-0-8</sbt.project.name>
</properties>
<packaging>jar</packaging>
- <name>Spark Project External Kafka</name>
+ <name>Spark Integration for Kafka 0.8</name>
<url>http://spark.apache.org/</url>
<dependencies>
diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/Broker.scala b/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/Broker.scala
index 9159051ba0..9159051ba0 100644
--- a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/Broker.scala
+++ b/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/Broker.scala
diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/DirectKafkaInputDStream.scala b/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/DirectKafkaInputDStream.scala
index fb58ed7898..fb58ed7898 100644
--- a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/DirectKafkaInputDStream.scala
+++ b/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/DirectKafkaInputDStream.scala
diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala b/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala
index 726b5d8ec3..726b5d8ec3 100644
--- a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala
+++ b/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala
diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaInputDStream.scala b/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaInputDStream.scala
index 3713bda41b..3713bda41b 100644
--- a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaInputDStream.scala
+++ b/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaInputDStream.scala
diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDD.scala b/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDD.scala
index d4881b140d..d4881b140d 100644
--- a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDD.scala
+++ b/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDD.scala
diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDDPartition.scala b/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDDPartition.scala
index 02917becf0..02917becf0 100644
--- a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDDPartition.scala
+++ b/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDDPartition.scala
diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaTestUtils.scala b/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaTestUtils.scala
index d9d4240c05..d9d4240c05 100644
--- a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaTestUtils.scala
+++ b/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaTestUtils.scala
diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala b/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala
index edaafb912c..edaafb912c 100644
--- a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala
+++ b/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala
diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/OffsetRange.scala b/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/OffsetRange.scala
index d9b856e469..d9b856e469 100644
--- a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/OffsetRange.scala
+++ b/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/OffsetRange.scala
diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/ReliableKafkaReceiver.scala b/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/ReliableKafkaReceiver.scala
index 39abe3c3e2..39abe3c3e2 100644
--- a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/ReliableKafkaReceiver.scala
+++ b/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/ReliableKafkaReceiver.scala
diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/package-info.java b/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/package-info.java
index 2e5ab0fb3b..2e5ab0fb3b 100644
--- a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/package-info.java
+++ b/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/package-info.java
diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/package.scala b/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/package.scala
index 47c5187f87..47c5187f87 100644
--- a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/package.scala
+++ b/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/package.scala
diff --git a/external/kafka/src/test/java/org/apache/spark/streaming/kafka/JavaDirectKafkaStreamSuite.java b/external/kafka-0-8/src/test/java/org/apache/spark/streaming/kafka/JavaDirectKafkaStreamSuite.java
index fa6b0dbc8c..fa6b0dbc8c 100644
--- a/external/kafka/src/test/java/org/apache/spark/streaming/kafka/JavaDirectKafkaStreamSuite.java
+++ b/external/kafka-0-8/src/test/java/org/apache/spark/streaming/kafka/JavaDirectKafkaStreamSuite.java
diff --git a/external/kafka/src/test/java/org/apache/spark/streaming/kafka/JavaKafkaRDDSuite.java b/external/kafka-0-8/src/test/java/org/apache/spark/streaming/kafka/JavaKafkaRDDSuite.java
index c41b6297b0..c41b6297b0 100644
--- a/external/kafka/src/test/java/org/apache/spark/streaming/kafka/JavaKafkaRDDSuite.java
+++ b/external/kafka-0-8/src/test/java/org/apache/spark/streaming/kafka/JavaKafkaRDDSuite.java
diff --git a/external/kafka/src/test/java/org/apache/spark/streaming/kafka/JavaKafkaStreamSuite.java b/external/kafka-0-8/src/test/java/org/apache/spark/streaming/kafka/JavaKafkaStreamSuite.java
index 868df64e8c..868df64e8c 100644
--- a/external/kafka/src/test/java/org/apache/spark/streaming/kafka/JavaKafkaStreamSuite.java
+++ b/external/kafka-0-8/src/test/java/org/apache/spark/streaming/kafka/JavaKafkaStreamSuite.java
diff --git a/external/kafka/src/test/resources/log4j.properties b/external/kafka-0-8/src/test/resources/log4j.properties
index fd51f8faf5..fd51f8faf5 100644
--- a/external/kafka/src/test/resources/log4j.properties
+++ b/external/kafka-0-8/src/test/resources/log4j.properties
diff --git a/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/DirectKafkaStreamSuite.scala b/external/kafka-0-8/src/test/scala/org/apache/spark/streaming/kafka/DirectKafkaStreamSuite.scala
index cb782d27fe..cb782d27fe 100644
--- a/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/DirectKafkaStreamSuite.scala
+++ b/external/kafka-0-8/src/test/scala/org/apache/spark/streaming/kafka/DirectKafkaStreamSuite.scala
diff --git a/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/KafkaClusterSuite.scala b/external/kafka-0-8/src/test/scala/org/apache/spark/streaming/kafka/KafkaClusterSuite.scala
index d66830cbac..d66830cbac 100644
--- a/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/KafkaClusterSuite.scala
+++ b/external/kafka-0-8/src/test/scala/org/apache/spark/streaming/kafka/KafkaClusterSuite.scala
diff --git a/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/KafkaRDDSuite.scala b/external/kafka-0-8/src/test/scala/org/apache/spark/streaming/kafka/KafkaRDDSuite.scala
index 5e539c1d79..5e539c1d79 100644
--- a/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/KafkaRDDSuite.scala
+++ b/external/kafka-0-8/src/test/scala/org/apache/spark/streaming/kafka/KafkaRDDSuite.scala
diff --git a/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/KafkaStreamSuite.scala b/external/kafka-0-8/src/test/scala/org/apache/spark/streaming/kafka/KafkaStreamSuite.scala
index 6a35ac14a8..6a35ac14a8 100644
--- a/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/KafkaStreamSuite.scala
+++ b/external/kafka-0-8/src/test/scala/org/apache/spark/streaming/kafka/KafkaStreamSuite.scala
diff --git a/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/ReliableKafkaStreamSuite.scala b/external/kafka-0-8/src/test/scala/org/apache/spark/streaming/kafka/ReliableKafkaStreamSuite.scala
index 7b9aee39ff..7b9aee39ff 100644
--- a/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/ReliableKafkaStreamSuite.scala
+++ b/external/kafka-0-8/src/test/scala/org/apache/spark/streaming/kafka/ReliableKafkaStreamSuite.scala
diff --git a/pom.xml b/pom.xml
index 3e783fa56e..d71913c958 100644
--- a/pom.xml
+++ b/pom.xml
@@ -108,8 +108,8 @@
<module>examples</module>
<module>repl</module>
<module>launcher</module>
- <module>external/kafka</module>
- <module>external/kafka-assembly</module>
+ <module>external/kafka-0-8</module>
+ <module>external/kafka-0-8-assembly</module>
</modules>
<properties>
diff --git a/project/MimaBuild.scala b/project/MimaBuild.scala
index 3dc1ceacde..2a989dd4f7 100644
--- a/project/MimaBuild.scala
+++ b/project/MimaBuild.scala
@@ -89,7 +89,15 @@ object MimaBuild {
def mimaSettings(sparkHome: File, projectRef: ProjectRef) = {
val organization = "org.apache.spark"
val previousSparkVersion = "1.6.0"
- val fullId = "spark-" + projectRef.project + "_2.11"
+ // This check can be removed post-2.0
+ val project = if (previousSparkVersion == "1.6.0" &&
+ projectRef.project == "streaming-kafka-0-8"
+ ) {
+ "streaming-kafka"
+ } else {
+ projectRef.project
+ }
+ val fullId = "spark-" + project + "_2.11"
mimaDefaultSettings ++
Seq(previousArtifact := Some(organization % fullId % previousSparkVersion),
binaryIssueFilters ++= ignoredABIProblems(sparkHome, version.value))
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index f50f41a88d..d83afa03f5 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -46,7 +46,7 @@ object BuildCommons {
val streamingProjects@Seq(
streaming, streamingFlumeSink, streamingFlume, streamingKafka
) = Seq(
- "streaming", "streaming-flume-sink", "streaming-flume", "streaming-kafka"
+ "streaming", "streaming-flume-sink", "streaming-flume", "streaming-kafka-0-8"
).map(ProjectRef(buildLocation, _))
val allProjects@Seq(
@@ -62,7 +62,7 @@ object BuildCommons {
"docker-integration-tests").map(ProjectRef(buildLocation, _))
val assemblyProjects@Seq(networkYarn, streamingFlumeAssembly, streamingKafkaAssembly, streamingKinesisAslAssembly) =
- Seq("network-yarn", "streaming-flume-assembly", "streaming-kafka-assembly", "streaming-kinesis-asl-assembly")
+ Seq("network-yarn", "streaming-flume-assembly", "streaming-kafka-0-8-assembly", "streaming-kinesis-asl-assembly")
.map(ProjectRef(buildLocation, _))
val copyJarsProjects@Seq(assembly, examples) = Seq("assembly", "examples")
@@ -581,8 +581,8 @@ object Assembly {
.getOrElse(SbtPomKeys.effectivePom.value.getProperties.get("hadoop.version").asInstanceOf[String])
},
jarName in assembly <<= (version, moduleName, hadoopVersion) map { (v, mName, hv) =>
- if (mName.contains("streaming-flume-assembly") || mName.contains("streaming-kafka-assembly") || mName.contains("streaming-kinesis-asl-assembly")) {
- // This must match the same name used in maven (see external/kafka-assembly/pom.xml)
+ if (mName.contains("streaming-flume-assembly") || mName.contains("streaming-kafka-0-8-assembly") || mName.contains("streaming-kinesis-asl-assembly")) {
+ // This must match the same name used in maven (see external/kafka-0-8-assembly/pom.xml)
s"${mName}-${v}.jar"
} else {
s"${mName}-${v}-hadoop${hv}.jar"
diff --git a/python/pyspark/streaming/kafka.py b/python/pyspark/streaming/kafka.py
index 02a88699a2..015ca77dbf 100644
--- a/python/pyspark/streaming/kafka.py
+++ b/python/pyspark/streaming/kafka.py
@@ -208,13 +208,13 @@ ________________________________________________________________________________
1. Include the Kafka library and its dependencies with in the
spark-submit command as
- $ bin/spark-submit --packages org.apache.spark:spark-streaming-kafka:%s ...
+ $ bin/spark-submit --packages org.apache.spark:spark-streaming-kafka-0-8:%s ...
2. Download the JAR of the artifact from Maven Central http://search.maven.org/,
- Group Id = org.apache.spark, Artifact Id = spark-streaming-kafka-assembly, Version = %s.
+ Group Id = org.apache.spark, Artifact Id = spark-streaming-kafka-0-8-assembly, Version = %s.
Then, include the jar in the spark-submit command as
- $ bin/spark-submit --jars <spark-streaming-kafka-assembly.jar> ...
+ $ bin/spark-submit --jars <spark-streaming-kafka-0-8-assembly.jar> ...
________________________________________________________________________________________________
diff --git a/python/pyspark/streaming/tests.py b/python/pyspark/streaming/tests.py
index f27628c895..360ba1e716 100644
--- a/python/pyspark/streaming/tests.py
+++ b/python/pyspark/streaming/tests.py
@@ -1476,13 +1476,13 @@ def search_jar(dir, name_prefix):
def search_kafka_assembly_jar():
SPARK_HOME = os.environ["SPARK_HOME"]
- kafka_assembly_dir = os.path.join(SPARK_HOME, "external/kafka-assembly")
- jars = search_jar(kafka_assembly_dir, "spark-streaming-kafka-assembly")
+ kafka_assembly_dir = os.path.join(SPARK_HOME, "external/kafka-0-8-assembly")
+ jars = search_jar(kafka_assembly_dir, "spark-streaming-kafka-0-8-assembly")
if not jars:
raise Exception(
("Failed to find Spark Streaming kafka assembly jar in %s. " % kafka_assembly_dir) +
"You need to build Spark with "
- "'build/sbt assembly/package streaming-kafka-assembly/assembly' or "
+ "'build/sbt assembly/package streaming-kafka-0-8-assembly/assembly' or "
"'build/mvn package' before running this test.")
elif len(jars) > 1:
raise Exception(("Found multiple Spark Streaming Kafka assembly JARs: %s; please "