aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYin Huai <yhuai@databricks.com>2015-05-28 17:12:30 -0700
committerYin Huai <yhuai@databricks.com>2015-05-28 17:12:30 -0700
commit572b62cafe4bc7b1d464c9dcfb449c9d53456826 (patch)
tree11dd95bd2a9584cac258d5b660e635f33e329485
parent0077af22ca5fcb2e50dcf7daa4f6804ae722bfbe (diff)
downloadspark-572b62cafe4bc7b1d464c9dcfb449c9d53456826.tar.gz
spark-572b62cafe4bc7b1d464c9dcfb449c9d53456826.tar.bz2
spark-572b62cafe4bc7b1d464c9dcfb449c9d53456826.zip
[SPARK-7853] [SQL] Fix HiveContext in Spark Shell
https://issues.apache.org/jira/browse/SPARK-7853 This fixes the problem introduced by my change in https://github.com/apache/spark/pull/6435, which causes that Hive Context fails to create in spark shell because of the class loader issue. Author: Yin Huai <yhuai@databricks.com> Closes #6459 from yhuai/SPARK-7853 and squashes the following commits: 37ad33e [Yin Huai] Do not use hiveQlTable at all. 47cdb6d [Yin Huai] Move hiveconf.set to the end of setConf. 005649b [Yin Huai] Update comment. 35d86f3 [Yin Huai] Access TTable directly to make sure Hive will not internally use any metastore utility functions. 3737766 [Yin Huai] Recursively find all jars.
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala35
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala12
2 files changed, 25 insertions, 22 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
index 9ab98fdcce..2ed71d3d52 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -189,24 +189,22 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
"Specify a vaild path to the correct hive jars using $HIVE_METASTORE_JARS " +
s"or change $HIVE_METASTORE_VERSION to $hiveExecutionVersion.")
}
- // We recursively add all jars in the class loader chain,
- // starting from the given urlClassLoader.
- def addJars(urlClassLoader: URLClassLoader): Array[URL] = {
- val jarsInParent = urlClassLoader.getParent match {
- case parent: URLClassLoader => addJars(parent)
- case other => Array.empty[URL]
- }
- urlClassLoader.getURLs ++ jarsInParent
+ // We recursively find all jars in the class loader chain,
+ // starting from the given classLoader.
+ def allJars(classLoader: ClassLoader): Array[URL] = classLoader match {
+ case null => Array.empty[URL]
+ case urlClassLoader: URLClassLoader =>
+ urlClassLoader.getURLs ++ allJars(urlClassLoader.getParent)
+ case other => allJars(other.getParent)
}
- val jars = Utils.getContextOrSparkClassLoader match {
- case urlClassLoader: URLClassLoader => addJars(urlClassLoader)
- case other =>
- throw new IllegalArgumentException(
- "Unable to locate hive jars to connect to metastore " +
- s"using classloader ${other.getClass.getName}. " +
- "Please set spark.sql.hive.metastore.jars")
+ val classLoader = Utils.getContextOrSparkClassLoader
+ val jars = allJars(classLoader)
+ if (jars.length == 0) {
+ throw new IllegalArgumentException(
+ "Unable to locate hive jars to connect to metastore. " +
+ "Please set spark.sql.hive.metastore.jars.")
}
logInfo(
@@ -356,9 +354,14 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
override def setConf(key: String, value: String): Unit = {
super.setConf(key, value)
- hiveconf.set(key, value)
executionHive.runSqlHive(s"SET $key=$value")
metadataHive.runSqlHive(s"SET $key=$value")
+ // If users put any Spark SQL setting in the spark conf (e.g. spark-defaults.conf),
+ // this setConf will be called in the constructor of the SQLContext.
+ // Also, calling hiveconf will create a default session containing a HiveConf, which
+ // will interfer with the creation of executionHive (which is a lazy val). So,
+ // we put hiveconf.set at the end of this method.
+ hiveconf.set(key, value)
}
/* A catalyst metadata catalog that points to the Hive Metastore. */
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 425a4005aa..95117f7a68 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -707,20 +707,20 @@ private[hive] case class MetastoreRelation
hiveQlTable.getMetadata
)
- implicit class SchemaAttribute(f: FieldSchema) {
+ implicit class SchemaAttribute(f: HiveColumn) {
def toAttribute: AttributeReference = AttributeReference(
- f.getName,
- HiveMetastoreTypes.toDataType(f.getType),
+ f.name,
+ HiveMetastoreTypes.toDataType(f.hiveType),
// Since data can be dumped in randomly with no validation, everything is nullable.
nullable = true
)(qualifiers = Seq(alias.getOrElse(tableName)))
}
- // Must be a stable value since new attributes are born here.
- val partitionKeys = hiveQlTable.getPartitionKeys.map(_.toAttribute)
+ /** PartitionKey attributes */
+ val partitionKeys = table.partitionColumns.map(_.toAttribute)
/** Non-partitionKey attributes */
- val attributes = hiveQlTable.getCols.map(_.toAttribute)
+ val attributes = table.schema.map(_.toAttribute)
val output = attributes ++ partitionKeys