diff options
author | Marcelo Vanzin <vanzin@cloudera.com> | 2015-01-08 17:15:13 -0800 |
---|---|---|
committer | Patrick Wendell <pwendell@gmail.com> | 2015-01-08 17:15:13 -0800 |
commit | 48cecf673c38ead56afa2dea49d295165c67cdf4 (patch) | |
tree | 67d1a63293ecbac130ea608d08a46e5489922b3b /sql | |
parent | c9c8b219ad81c4c30bc1598ff35b01f964570c29 (diff) | |
download | spark-48cecf673c38ead56afa2dea49d295165c67cdf4.tar.gz spark-48cecf673c38ead56afa2dea49d295165c67cdf4.tar.bz2 spark-48cecf673c38ead56afa2dea49d295165c67cdf4.zip |
[SPARK-4048] Enhance and extend hadoop-provided profile.
This change does a few things to make the hadoop-provided profile more useful:
- Create new profiles for other libraries / services that might be provided by the infrastructure
- Simplify and fix the poms so that the profiles are only activated while building assemblies.
- Fix tests so that they're able to run when the profiles are activated
- Add a new env variable to be used by distributions that use these profiles to provide the runtime
classpath for Spark jobs and daemons.
Author: Marcelo Vanzin <vanzin@cloudera.com>
Closes #2982 from vanzin/SPARK-4048 and squashes the following commits:
82eb688 [Marcelo Vanzin] Add a comment.
eb228c0 [Marcelo Vanzin] Fix borked merge.
4e38f4e [Marcelo Vanzin] Merge branch 'master' into SPARK-4048
9ef79a3 [Marcelo Vanzin] Alternative way to propagate test classpath to child processes.
371ebee [Marcelo Vanzin] Review feedback.
52f366d [Marcelo Vanzin] Merge branch 'master' into SPARK-4048
83099fc [Marcelo Vanzin] Merge branch 'master' into SPARK-4048
7377e7b [Marcelo Vanzin] Merge branch 'master' into SPARK-4048
322f882 [Marcelo Vanzin] Fix merge fail.
f24e9e7 [Marcelo Vanzin] Merge branch 'master' into SPARK-4048
8b00b6a [Marcelo Vanzin] Merge branch 'master' into SPARK-4048
9640503 [Marcelo Vanzin] Cleanup child process log message.
115fde5 [Marcelo Vanzin] Simplify a comment (and make it consistent with another pom).
e3ab2da [Marcelo Vanzin] Fix hive-thriftserver profile.
7820d58 [Marcelo Vanzin] Fix CliSuite with provided profiles.
1be73d4 [Marcelo Vanzin] Restore flume-provided profile.
d1399ed [Marcelo Vanzin] Restore jetty dependency.
82a54b9 [Marcelo Vanzin] Remove unused profile.
5c54a25 [Marcelo Vanzin] Fix HiveThriftServer2Suite with *-provided profiles.
1fc4d0b [Marcelo Vanzin] Update dependencies for hive-thriftserver.
f7b3bbe [Marcelo Vanzin] Add snappy to hadoop-provided list.
9e4e001 [Marcelo Vanzin] Remove duplicate hive profile.
d928d62 [Marcelo Vanzin] Redirect child stderr to parent's log.
4d67469 [Marcelo Vanzin] Propagate SPARK_DIST_CLASSPATH on Yarn.
417d90e [Marcelo Vanzin] Introduce "SPARK_DIST_CLASSPATH".
2f95f0d [Marcelo Vanzin] Propagate classpath to child processes during testing.
1adf91c [Marcelo Vanzin] Re-enable maven-install-plugin for a few projects.
284dda6 [Marcelo Vanzin] Rework the "hadoop-provided" profile, add new ones.
Diffstat (limited to 'sql')
5 files changed, 14 insertions, 40 deletions
diff --git a/sql/core/pom.xml b/sql/core/pom.xml index 023ce2041b..3e9ef07df9 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -56,12 +56,10 @@ <dependency> <groupId>com.twitter</groupId> <artifactId>parquet-column</artifactId> - <version>${parquet.version}</version> </dependency> <dependency> <groupId>com.twitter</groupId> <artifactId>parquet-hadoop</artifactId> - <version>${parquet.version}</version> </dependency> <dependency> <groupId>com.fasterxml.jackson.core</groupId> diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index d3a517375c..259eef0b80 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -42,19 +42,16 @@ <version>${project.version}</version> </dependency> <dependency> - <groupId>org.spark-project.hive</groupId> + <groupId>${hive.group}</groupId> <artifactId>hive-cli</artifactId> - <version>${hive.version}</version> </dependency> <dependency> - <groupId>org.spark-project.hive</groupId> + <groupId>${hive.group}</groupId> <artifactId>hive-jdbc</artifactId> - <version>${hive.version}</version> </dependency> <dependency> - <groupId>org.spark-project.hive</groupId> + <groupId>${hive.group}</groupId> <artifactId>hive-beeline</artifactId> - <version>${hive.version}</version> </dependency> </dependencies> <build> diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index e8ffbc5b95..60953576d0 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -48,6 +48,7 @@ class CliSuite extends FunSuite with BeforeAndAfterAll with Logging { | --master local | --hiveconf ${ConfVars.METASTORECONNECTURLKEY}=$jdbcUrl | --hiveconf ${ConfVars.METASTOREWAREHOUSE}=$warehousePath + | --driver-class-path ${sys.props("java.class.path")} """.stripMargin.split("\\s+").toSeq ++ extraArgs } @@ -70,7 +71,7 @@ class CliSuite extends FunSuite with BeforeAndAfterAll with Logging { } // Searching expected output line from both stdout and stderr of the CLI process - val process = (Process(command) #< queryStream).run( + val process = (Process(command, None) #< queryStream).run( ProcessLogger(captureOutput("stdout"), captureOutput("stderr"))) try { diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suite.scala index 94d5ed4f1d..7814aa38f4 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suite.scala @@ -142,6 +142,7 @@ class HiveThriftServer2Suite extends FunSuite with Logging { | --hiveconf ${ConfVars.HIVE_SERVER2_THRIFT_BIND_HOST}=localhost | --hiveconf ${ConfVars.HIVE_SERVER2_TRANSPORT_MODE}=http | --hiveconf ${ConfVars.HIVE_SERVER2_THRIFT_HTTP_PORT}=$port + | --driver-class-path ${sys.props("java.class.path")} """.stripMargin.split("\\s+").toSeq } else { s"""$startScript @@ -151,6 +152,7 @@ class HiveThriftServer2Suite extends FunSuite with Logging { | --hiveconf ${ConfVars.METASTOREWAREHOUSE}=$warehousePath | --hiveconf ${ConfVars.HIVE_SERVER2_THRIFT_BIND_HOST}=localhost | --hiveconf ${ConfVars.HIVE_SERVER2_THRIFT_PORT}=$port + | --driver-class-path ${sys.props("java.class.path")} """.stripMargin.split("\\s+").toSeq } @@ -179,8 +181,9 @@ class HiveThriftServer2Suite extends FunSuite with Logging { } } - // Resets SPARK_TESTING to avoid loading Log4J configurations in testing class paths - val env = Seq("SPARK_TESTING" -> "0") + val env = Seq( + // Resets SPARK_TESTING to avoid loading Log4J configurations in testing class paths + "SPARK_TESTING" -> "0") Process(command, None, env: _*).run(ProcessLogger( captureThriftServerOutput("stdout"), @@ -214,7 +217,7 @@ class HiveThriftServer2Suite extends FunSuite with Logging { } finally { warehousePath.delete() metastorePath.delete() - Process(stopScript).run().exitValue() + Process(stopScript, None, env: _*).run().exitValue() // The `spark-daemon.sh' script uses kill, which is not synchronous, have to wait for a while. Thread.sleep(3.seconds.toMillis) Option(logTailingProcess).map(_.destroy()) diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index 46aacad011..58b0722464 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -47,9 +47,8 @@ <version>${project.version}</version> </dependency> <dependency> - <groupId>org.spark-project.hive</groupId> + <groupId>${hive.group}</groupId> <artifactId>hive-metastore</artifactId> - <version>${hive.version}</version> </dependency> <dependency> <groupId>commons-httpclient</groupId> @@ -57,51 +56,27 @@ <version>3.1</version> </dependency> <dependency> - <groupId>org.spark-project.hive</groupId> + <groupId>${hive.group}</groupId> <artifactId>hive-exec</artifactId> - <version>${hive.version}</version> - <exclusions> - <exclusion> - <groupId>commons-logging</groupId> - <artifactId>commons-logging</artifactId> - </exclusion> - <exclusion> - <groupId>com.esotericsoftware.kryo</groupId> - <artifactId>kryo</artifactId> - </exclusion> - </exclusions> </dependency> <dependency> <groupId>org.codehaus.jackson</groupId> <artifactId>jackson-mapper-asl</artifactId> </dependency> <dependency> - <groupId>org.spark-project.hive</groupId> + <groupId>${hive.group}</groupId> <artifactId>hive-serde</artifactId> - <version>${hive.version}</version> - <exclusions> - <exclusion> - <groupId>commons-logging</groupId> - <artifactId>commons-logging</artifactId> - </exclusion> - <exclusion> - <groupId>commons-logging</groupId> - <artifactId>commons-logging-api</artifactId> - </exclusion> - </exclusions> </dependency> <!-- hive-serde already depends on avro, but this brings in customized config of avro deps from parent --> <dependency> <groupId>org.apache.avro</groupId> <artifactId>avro</artifactId> - <version>${avro.version}</version> </dependency> <!-- use the build matching the hadoop api of avro-mapred (i.e. no classifier for hadoop 1 API, hadoop2 classifier for hadoop 2 API. avro-mapred is a dependency of org.spark-project.hive:hive-serde --> <dependency> <groupId>org.apache.avro</groupId> <artifactId>avro-mapred</artifactId> - <version>${avro.version}</version> <classifier>${avro.mapred.classifier}</classifier> </dependency> <dependency> |