aboutsummaryrefslogtreecommitdiff
path: root/sql/hive
diff options
context:
space:
mode:
authorCheng Lian <lian.cs.zju@gmail.com>2014-07-25 12:20:49 -0700
committerMichael Armbrust <michael@databricks.com>2014-07-25 12:20:49 -0700
commit06dc0d2c6b69c5d59b4d194ced2ac85bfe2e05e2 (patch)
tree5f3189e690ac7f2fe68426c6763f7900e9aba5b6 /sql/hive
parent32bcf9af94b39f2c509eb54f8565fb659c70ca97 (diff)
downloadspark-06dc0d2c6b69c5d59b4d194ced2ac85bfe2e05e2.tar.gz
spark-06dc0d2c6b69c5d59b4d194ced2ac85bfe2e05e2.tar.bz2
spark-06dc0d2c6b69c5d59b4d194ced2ac85bfe2e05e2.zip
[SPARK-2410][SQL] Merging Hive Thrift/JDBC server
JIRA issue: - Main: [SPARK-2410](https://issues.apache.org/jira/browse/SPARK-2410) - Related: [SPARK-2678](https://issues.apache.org/jira/browse/SPARK-2678) Cherry picked the Hive Thrift/JDBC server from [branch-1.0-jdbc](https://github.com/apache/spark/tree/branch-1.0-jdbc). (Thanks chenghao-intel for his initial contribution of the Spark SQL CLI.) TODO - [x] Use `spark-submit` to launch the server, the CLI and beeline - [x] Migration guideline draft for Shark users ---- Hit by a bug in `SparkSubmitArguments` while working on this PR: all application options that are recognized by `SparkSubmitArguments` are stolen as `SparkSubmit` options. For example: ```bash $ spark-submit --class org.apache.hive.beeline.BeeLine spark-internal --help ``` This actually shows usage information of `SparkSubmit` rather than `BeeLine`. ~~Fixed this bug here since the `spark-internal` related stuff also touches `SparkSubmitArguments` and I'd like to avoid conflict.~~ **UPDATE** The bug mentioned above is now tracked by [SPARK-2678](https://issues.apache.org/jira/browse/SPARK-2678). Decided to revert changes to this bug since it involves more subtle considerations and worth a separate PR. Author: Cheng Lian <lian.cs.zju@gmail.com> Closes #1399 from liancheng/thriftserver and squashes the following commits: 090beea [Cheng Lian] Revert changes related to SPARK-2678, decided to move them to another PR 21c6cf4 [Cheng Lian] Updated Spark SQL programming guide docs fe0af31 [Cheng Lian] Reordered spark-submit options in spark-shell[.cmd] 199e3fb [Cheng Lian] Disabled MIMA for hive-thriftserver 1083e9d [Cheng Lian] Fixed failed test suites 7db82a1 [Cheng Lian] Fixed spark-submit application options handling logic 9cc0f06 [Cheng Lian] Starts beeline with spark-submit cfcf461 [Cheng Lian] Updated documents and build scripts for the newly added hive-thriftserver profile 061880f [Cheng Lian] Addressed all comments by @pwendell 7755062 [Cheng Lian] Adapts test suites to spark-submit settings 40bafef [Cheng Lian] Fixed more license header issues e214aab [Cheng Lian] Added missing license headers b8905ba [Cheng Lian] Fixed minor issues in spark-sql and start-thriftserver.sh f975d22 [Cheng Lian] Updated docs for Hive compatibility and Shark migration guide draft 3ad4e75 [Cheng Lian] Starts spark-sql shell with spark-submit a5310d1 [Cheng Lian] Make HiveThriftServer2 play well with spark-submit 61f39f4 [Cheng Lian] Starts Hive Thrift server via spark-submit 2c4c539 [Cheng Lian] Cherry picked the Hive Thrift server
Diffstat (limited to 'sql/hive')
-rw-r--r--sql/hive/pom.xml2
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala2
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala50
3 files changed, 26 insertions, 28 deletions
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 1699ffe06c..93d00f7c37 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -32,7 +32,7 @@
<name>Spark Project Hive</name>
<url>http://spark.apache.org/</url>
<properties>
- <sbt.project.name>hive</sbt.project.name>
+ <sbt.project.name>hive</sbt.project.name>
</properties>
<dependencies>
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
index 201c85f3d5..84d43eaeea 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -255,7 +255,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
Seq(StringType, IntegerType, LongType, DoubleType, FloatType, BooleanType, ByteType,
ShortType, DecimalType, TimestampType, BinaryType)
- protected def toHiveString(a: (Any, DataType)): String = a match {
+ protected[sql] def toHiveString(a: (Any, DataType)): String = a match {
case (struct: Row, StructType(fields)) =>
struct.zip(fields).map {
case (v, t) => s""""${t.name}":${toHiveStructString(v, t.dataType)}"""
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 6f36a4f8cb..8489f2a34e 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -416,10 +416,10 @@ class HiveQuerySuite extends HiveComparisonTest {
hql(s"set $testKey=$testVal")
assert(get(testKey, testVal + "_") == testVal)
- hql("set mapred.reduce.tasks=20")
- assert(get("mapred.reduce.tasks", "0") == "20")
- hql("set mapred.reduce.tasks = 40")
- assert(get("mapred.reduce.tasks", "0") == "40")
+ hql("set some.property=20")
+ assert(get("some.property", "0") == "20")
+ hql("set some.property = 40")
+ assert(get("some.property", "0") == "40")
hql(s"set $testKey=$testVal")
assert(get(testKey, "0") == testVal)
@@ -433,63 +433,61 @@ class HiveQuerySuite extends HiveComparisonTest {
val testKey = "spark.sql.key.usedfortestonly"
val testVal = "test.val.0"
val nonexistentKey = "nonexistent"
- def collectResults(rdd: SchemaRDD): Set[(String, String)] =
- rdd.collect().map { case Row(key: String, value: String) => key -> value }.toSet
clear()
// "set" itself returns all config variables currently specified in SQLConf.
assert(hql("SET").collect().size == 0)
- assertResult(Set(testKey -> testVal)) {
- collectResults(hql(s"SET $testKey=$testVal"))
+ assertResult(Array(s"$testKey=$testVal")) {
+ hql(s"SET $testKey=$testVal").collect().map(_.getString(0))
}
assert(hiveconf.get(testKey, "") == testVal)
- assertResult(Set(testKey -> testVal)) {
- collectResults(hql("SET"))
+ assertResult(Array(s"$testKey=$testVal")) {
+ hql(s"SET $testKey=$testVal").collect().map(_.getString(0))
}
hql(s"SET ${testKey + testKey}=${testVal + testVal}")
assert(hiveconf.get(testKey + testKey, "") == testVal + testVal)
- assertResult(Set(testKey -> testVal, (testKey + testKey) -> (testVal + testVal))) {
- collectResults(hql("SET"))
+ assertResult(Array(s"$testKey=$testVal", s"${testKey + testKey}=${testVal + testVal}")) {
+ hql(s"SET").collect().map(_.getString(0))
}
// "set key"
- assertResult(Set(testKey -> testVal)) {
- collectResults(hql(s"SET $testKey"))
+ assertResult(Array(s"$testKey=$testVal")) {
+ hql(s"SET $testKey").collect().map(_.getString(0))
}
- assertResult(Set(nonexistentKey -> "<undefined>")) {
- collectResults(hql(s"SET $nonexistentKey"))
+ assertResult(Array(s"$nonexistentKey=<undefined>")) {
+ hql(s"SET $nonexistentKey").collect().map(_.getString(0))
}
// Assert that sql() should have the same effects as hql() by repeating the above using sql().
clear()
assert(sql("SET").collect().size == 0)
- assertResult(Set(testKey -> testVal)) {
- collectResults(sql(s"SET $testKey=$testVal"))
+ assertResult(Array(s"$testKey=$testVal")) {
+ sql(s"SET $testKey=$testVal").collect().map(_.getString(0))
}
assert(hiveconf.get(testKey, "") == testVal)
- assertResult(Set(testKey -> testVal)) {
- collectResults(sql("SET"))
+ assertResult(Array(s"$testKey=$testVal")) {
+ sql("SET").collect().map(_.getString(0))
}
sql(s"SET ${testKey + testKey}=${testVal + testVal}")
assert(hiveconf.get(testKey + testKey, "") == testVal + testVal)
- assertResult(Set(testKey -> testVal, (testKey + testKey) -> (testVal + testVal))) {
- collectResults(sql("SET"))
+ assertResult(Array(s"$testKey=$testVal", s"${testKey + testKey}=${testVal + testVal}")) {
+ sql("SET").collect().map(_.getString(0))
}
- assertResult(Set(testKey -> testVal)) {
- collectResults(sql(s"SET $testKey"))
+ assertResult(Array(s"$testKey=$testVal")) {
+ sql(s"SET $testKey").collect().map(_.getString(0))
}
- assertResult(Set(nonexistentKey -> "<undefined>")) {
- collectResults(sql(s"SET $nonexistentKey"))
+ assertResult(Array(s"$nonexistentKey=<undefined>")) {
+ sql(s"SET $nonexistentKey").collect().map(_.getString(0))
}
clear()