aboutsummaryrefslogtreecommitdiff
path: root/sql/hive-thriftserver/src/main
diff options
context:
space:
mode:
authorCheng Lian <lian.cs.zju@gmail.com>2014-11-02 15:18:29 -0800
committerMichael Armbrust <michael@databricks.com>2014-11-02 15:18:29 -0800
commitc9f840046f8c45b1137f0289eeb0c980de72ea5e (patch)
treec867149b1f31ab6845ccbb412b73980b1c65b2a2 /sql/hive-thriftserver/src/main
parent495a132031ae002c787371f2fd0ba4be2437e7c8 (diff)
downloadspark-c9f840046f8c45b1137f0289eeb0c980de72ea5e.tar.gz
spark-c9f840046f8c45b1137f0289eeb0c980de72ea5e.tar.bz2
spark-c9f840046f8c45b1137f0289eeb0c980de72ea5e.zip
[SPARK-3791][SQL] Provides Spark version and Hive version in HiveThriftServer2
This PR overrides the `GetInfo` Hive Thrift API to provide correct version information. Another property `spark.sql.hive.version` is added to reveal the underlying Hive version. These are generally useful for Spark SQL ODBC driver providers. The Spark version information is extracted from the jar manifest. Also took the chance to remove the `SET -v` hack, which was a workaround for Simba ODBC driver connectivity. TODO - [x] Find a general way to figure out Hive (or even any dependency) version. This [blog post](http://blog.soebes.de/blog/2014/01/02/version-information-into-your-appas-with-maven/) suggests several methods to inspect application version. In the case of Spark, this can be tricky because the chosen method: 1. must applies to both Maven build and SBT build For Maven builds, we can retrieve the version information from the META-INF/maven directory within the assembly jar. But this doesn't work for SBT builds. 2. must not rely on the original jars of dependencies to extract specific dependency version, because Spark uses assembly jar. This implies we can't read Hive version from Hive jar files since standard Spark distribution doesn't include them. 3. should play well with `SPARK_PREPEND_CLASSES` to ease local testing during development. `SPARK_PREPEND_CLASSES` prevents classes to be loaded from the assembly jar, thus we can't locate the jar file and read its manifest. Given these, maybe the only reliable method is to generate a source file containing version information at build time. pwendell Do you have any suggestions from the perspective of the build process? **Update** Hive version is now retrieved from the newly introduced `HiveShim` object. Author: Cheng Lian <lian.cs.zju@gmail.com> Author: Cheng Lian <lian@databricks.com> Closes #2843 from liancheng/get-info and squashes the following commits: a873d0f [Cheng Lian] Updates test case 53f43cd [Cheng Lian] Retrieves underlying Hive verson via HiveShim 1d282b8 [Cheng Lian] Removes the Simba ODBC "SET -v" hack f857fce [Cheng Lian] Overrides Hive GetInfo Thrift API and adds Hive version property
Diffstat (limited to 'sql/hive-thriftserver/src/main')
-rw-r--r--sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala14
-rw-r--r--sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala11
2 files changed, 20 insertions, 5 deletions
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala
index a78311fc48..ecfb74473e 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala
@@ -17,6 +17,8 @@
package org.apache.spark.sql.hive.thriftserver
+import java.util.jar.Attributes.Name
+
import scala.collection.JavaConversions._
import java.io.IOException
@@ -29,11 +31,12 @@ import org.apache.hadoop.hive.conf.HiveConf
import org.apache.hadoop.hive.shims.ShimLoader
import org.apache.hive.service.Service.STATE
import org.apache.hive.service.auth.HiveAuthFactory
-import org.apache.hive.service.cli.CLIService
+import org.apache.hive.service.cli._
import org.apache.hive.service.{AbstractService, Service, ServiceException}
import org.apache.spark.sql.hive.HiveContext
import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._
+import org.apache.spark.util.Utils
private[hive] class SparkSQLCLIService(hiveContext: HiveContext)
extends CLIService
@@ -60,6 +63,15 @@ private[hive] class SparkSQLCLIService(hiveContext: HiveContext)
initCompositeService(hiveConf)
}
+
+ override def getInfo(sessionHandle: SessionHandle, getInfoType: GetInfoType): GetInfoValue = {
+ getInfoType match {
+ case GetInfoType.CLI_SERVER_NAME => new GetInfoValue("Spark SQL")
+ case GetInfoType.CLI_DBMS_NAME => new GetInfoValue("Spark SQL")
+ case GetInfoType.CLI_DBMS_VER => new GetInfoValue(Utils.sparkVersion)
+ case _ => super.getInfo(sessionHandle, getInfoType)
+ }
+ }
}
private[thriftserver] trait ReflectedCompositeService { this: AbstractService =>
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala
index 5042586351..89732c939b 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala
@@ -17,10 +17,11 @@
package org.apache.spark.sql.hive.thriftserver
+import scala.collection.JavaConversions._
+
import org.apache.spark.scheduler.StatsReportListener
-import org.apache.spark.sql.hive.HiveContext
+import org.apache.spark.sql.hive.{HiveShim, HiveContext}
import org.apache.spark.{Logging, SparkConf, SparkContext}
-import scala.collection.JavaConversions._
/** A singleton object for the master program. The slaves should not access this. */
private[hive] object SparkSQLEnv extends Logging {
@@ -31,8 +32,10 @@ private[hive] object SparkSQLEnv extends Logging {
def init() {
if (hiveContext == null) {
- sparkContext = new SparkContext(new SparkConf()
- .setAppName(s"SparkSQL::${java.net.InetAddress.getLocalHost.getHostName}"))
+ val sparkConf = new SparkConf()
+ .setAppName(s"SparkSQL::${java.net.InetAddress.getLocalHost.getHostName}")
+ .set("spark.sql.hive.version", HiveShim.version)
+ sparkContext = new SparkContext(sparkConf)
sparkContext.addSparkListener(new StatsReportListener())
hiveContext = new HiveContext(sparkContext)