aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCalvin Jia <jia.calvin@gmail.com>2015-04-24 17:57:41 -0400
committerSean Owen <sowen@cloudera.com>2015-04-24 17:57:41 -0400
commit438859eb7c4e605bb4041d9a547a16be9c827c75 (patch)
tree83b0589ea748fd090559532b3756c15ffbab6adb
parentcaf0136ec5838cf5bf61f39a5b3474a505a6ae11 (diff)
downloadspark-438859eb7c4e605bb4041d9a547a16be9c827c75.tar.gz
spark-438859eb7c4e605bb4041d9a547a16be9c827c75.tar.bz2
spark-438859eb7c4e605bb4041d9a547a16be9c827c75.zip
[SPARK-6122] [CORE] Upgrade tachyon-client version to 0.6.3
This is a reopening of #4867. A short summary of the issues resolved from the previous PR: 1. HTTPClient version mismatch: Selenium (used for UI tests) requires version 4.3.x, and Tachyon included 4.2.5 through a transitive dependency of its shaded thrift jar. To address this, Tachyon 0.6.3 will promote the transitive dependencies of the shaded jar so they can be excluded in spark. 2. Jackson-Mapper-ASL version mismatch: In lower versions of hadoop-client (ie. 1.0.4), version 1.0.1 is included. The parquet library used in spark sql requires version 1.8+. Its unclear to me why upgrading tachyon-client would cause this dependency to break. The solution was to exclude jackson-mapper-asl from hadoop-client. It seems that the dependency management in spark-parent will not work on transitive dependencies, one way to make sure jackson-mapper-asl is included with the correct version is to add it as a top level dependency. The best solution would be to exclude the dependency in the modules which require a higher version, but that did not fix the unit tests. Any suggestions on the best way to solve this would be appreciated! Author: Calvin Jia <jia.calvin@gmail.com> Closes #5354 from calvinjia/upgrade_tachyon_0.6.3 and squashes the following commits: 0eefe4d [Calvin Jia] Handle httpclient version in maven dependency management. Remove httpclient version setting from profiles. 7c00dfa [Calvin Jia] Set httpclient version to 4.3.2 for selenium. Specify version of httpclient for sql/hive (previously 4.2.5 transitive dependency of libthrift). 9263097 [Calvin Jia] Merge master to test latest changes dbfc1bd [Calvin Jia] Use Tachyon 0.6.4 for cleaner dependencies. e2ff80a [Calvin Jia] Exclude the jetty and curator promoted dependencies from tachyon-client. a3a29da [Calvin Jia] Update tachyon-client exclusions. 0ae6c97 [Calvin Jia] Change tachyon version to 0.6.3 a204df9 [Calvin Jia] Update make distribution tachyon version. a93c94f [Calvin Jia] Exclude jackson-mapper-asl from hadoop client since it has a lower version than spark's expected version. a8a923c [Calvin Jia] Exclude httpcomponents from Tachyon 910fabd [Calvin Jia] Update to master eed9230 [Calvin Jia] Update tachyon version to 0.6.1. 11907b3 [Calvin Jia] Use TachyonURI for tachyon paths instead of strings. 71bf441 [Calvin Jia] Upgrade Tachyon client version to 0.6.0.
-rw-r--r--assembly/pom.xml10
-rw-r--r--core/pom.xml6
-rw-r--r--core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala16
-rw-r--r--core/src/main/scala/org/apache/spark/util/Utils.scala4
-rw-r--r--examples/pom.xml5
-rw-r--r--launcher/pom.xml6
-rwxr-xr-xmake-distribution.sh2
-rw-r--r--pom.xml12
-rw-r--r--sql/hive/pom.xml5
9 files changed, 39 insertions, 27 deletions
diff --git a/assembly/pom.xml b/assembly/pom.xml
index f1f8b0d368..20593e710d 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -213,16 +213,6 @@
</plugins>
</build>
</profile>
- <profile>
- <id>kinesis-asl</id>
- <dependencies>
- <dependency>
- <groupId>org.apache.httpcomponents</groupId>
- <artifactId>httpclient</artifactId>
- <version>${commons.httpclient.version}</version>
- </dependency>
- </dependencies>
- </profile>
<!-- Profiles that disable inclusion of certain dependencies. -->
<profile>
diff --git a/core/pom.xml b/core/pom.xml
index e80829b7a7..5e89d548cd 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -74,6 +74,10 @@
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
</exclusion>
+ <exclusion>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-mapper-asl</artifactId>
+ </exclusion>
</exclusions>
</dependency>
<dependency>
@@ -275,7 +279,7 @@
<dependency>
<groupId>org.tachyonproject</groupId>
<artifactId>tachyon-client</artifactId>
- <version>0.5.0</version>
+ <version>0.6.4</version>
<exclusions>
<exclusion>
<groupId>org.apache.hadoop</groupId>
diff --git a/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala b/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala
index 951897cead..583f1fdf04 100644
--- a/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala
@@ -20,8 +20,8 @@ package org.apache.spark.storage
import java.text.SimpleDateFormat
import java.util.{Date, Random}
-import tachyon.client.TachyonFS
-import tachyon.client.TachyonFile
+import tachyon.TachyonURI
+import tachyon.client.{TachyonFile, TachyonFS}
import org.apache.spark.Logging
import org.apache.spark.executor.ExecutorExitCode
@@ -40,7 +40,7 @@ private[spark] class TachyonBlockManager(
val master: String)
extends Logging {
- val client = if (master != null && master != "") TachyonFS.get(master) else null
+ val client = if (master != null && master != "") TachyonFS.get(new TachyonURI(master)) else null
if (client == null) {
logError("Failed to connect to the Tachyon as the master address is not configured")
@@ -60,11 +60,11 @@ private[spark] class TachyonBlockManager(
addShutdownHook()
def removeFile(file: TachyonFile): Boolean = {
- client.delete(file.getPath(), false)
+ client.delete(new TachyonURI(file.getPath()), false)
}
def fileExists(file: TachyonFile): Boolean = {
- client.exist(file.getPath())
+ client.exist(new TachyonURI(file.getPath()))
}
def getFile(filename: String): TachyonFile = {
@@ -81,7 +81,7 @@ private[spark] class TachyonBlockManager(
if (old != null) {
old
} else {
- val path = tachyonDirs(dirId) + "/" + "%02x".format(subDirId)
+ val path = new TachyonURI(s"${tachyonDirs(dirId)}/${"%02x".format(subDirId)}")
client.mkdir(path)
val newDir = client.getFile(path)
subDirs(dirId)(subDirId) = newDir
@@ -89,7 +89,7 @@ private[spark] class TachyonBlockManager(
}
}
}
- val filePath = subDir + "/" + filename
+ val filePath = new TachyonURI(s"$subDir/$filename")
if(!client.exist(filePath)) {
client.createFile(filePath)
}
@@ -113,7 +113,7 @@ private[spark] class TachyonBlockManager(
tries += 1
try {
tachyonDirId = "%s-%04x".format(dateFormat.format(new Date), rand.nextInt(65536))
- val path = rootDir + "/" + "spark-tachyon-" + tachyonDirId
+ val path = new TachyonURI(s"$rootDir/spark-tachyon-$tachyonDirId")
if (!client.exist(path)) {
foundLocalDir = client.mkdir(path)
tachyonDir = client.getFile(path)
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 2feb7341b1..667aa168e7 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -42,6 +42,8 @@ import org.apache.hadoop.security.UserGroupInformation
import org.apache.log4j.PropertyConfigurator
import org.eclipse.jetty.util.MultiException
import org.json4s._
+
+import tachyon.TachyonURI
import tachyon.client.{TachyonFS, TachyonFile}
import org.apache.spark._
@@ -955,7 +957,7 @@ private[spark] object Utils extends Logging {
* Delete a file or directory and its contents recursively.
*/
def deleteRecursively(dir: TachyonFile, client: TachyonFS) {
- if (!client.delete(dir.getPath(), true)) {
+ if (!client.delete(new TachyonURI(dir.getPath()), true)) {
throw new IOException("Failed to delete the tachyon dir: " + dir)
}
}
diff --git a/examples/pom.xml b/examples/pom.xml
index afd7c6d52f..df1717403b 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -390,11 +390,6 @@
<artifactId>spark-streaming-kinesis-asl_${scala.binary.version}</artifactId>
<version>${project.version}</version>
</dependency>
- <dependency>
- <groupId>org.apache.httpcomponents</groupId>
- <artifactId>httpclient</artifactId>
- <version>${commons.httpclient.version}</version>
- </dependency>
</dependencies>
</profile>
<profile>
diff --git a/launcher/pom.xml b/launcher/pom.xml
index 182e5f6021..ebfa7685ea 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -68,6 +68,12 @@
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<scope>test</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-mapper-asl</artifactId>
+ </exclusion>
+ </exclusions>
</dependency>
</dependencies>
diff --git a/make-distribution.sh b/make-distribution.sh
index 738a9c4d69..cb65932b4a 100755
--- a/make-distribution.sh
+++ b/make-distribution.sh
@@ -32,7 +32,7 @@ SPARK_HOME="$(cd "`dirname "$0"`"; pwd)"
DISTDIR="$SPARK_HOME/dist"
SPARK_TACHYON=false
-TACHYON_VERSION="0.5.0"
+TACHYON_VERSION="0.6.4"
TACHYON_TGZ="tachyon-${TACHYON_VERSION}-bin.tar.gz"
TACHYON_URL="https://github.com/amplab/tachyon/releases/download/v${TACHYON_VERSION}/${TACHYON_TGZ}"
diff --git a/pom.xml b/pom.xml
index bcc2f57f1a..4b0b0c85ef 100644
--- a/pom.xml
+++ b/pom.xml
@@ -146,7 +146,7 @@
<jets3t.version>0.7.1</jets3t.version>
<aws.java.sdk.version>1.8.3</aws.java.sdk.version>
<aws.kinesis.client.version>1.1.0</aws.kinesis.client.version>
- <commons.httpclient.version>4.2.6</commons.httpclient.version>
+ <commons.httpclient.version>4.3.2</commons.httpclient.version>
<commons.math3.version>3.4.1</commons.math3.version>
<test_classpath_file>${project.build.directory}/spark-test-classpath.txt</test_classpath_file>
<scala.version>2.10.4</scala.version>
@@ -421,6 +421,16 @@
<version>1.3.9</version>
</dependency>
<dependency>
+ <groupId>org.apache.httpcomponents</groupId>
+ <artifactId>httpclient</artifactId>
+ <version>${commons.httpclient.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.httpcomponents</groupId>
+ <artifactId>httpcore</artifactId>
+ <version>${commons.httpclient.version}</version>
+ </dependency>
+ <dependency>
<groupId>org.seleniumhq.selenium</groupId>
<artifactId>selenium-java</artifactId>
<version>2.42.2</version>
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 04440076a2..21dce8d8a5 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -60,6 +60,11 @@
<artifactId>hive-exec</artifactId>
</dependency>
<dependency>
+ <groupId>org.apache.httpcomponents</groupId>
+ <artifactId>httpclient</artifactId>
+ <version>${commons.httpclient.version}</version>
+ </dependency>
+ <dependency>
<groupId>org.codehaus.jackson</groupId>
<artifactId>jackson-mapper-asl</artifactId>
</dependency>