From 8f4a86eaa1cad9a2a7607fd5446105c93e5e424e Mon Sep 17 00:00:00 2001 From: Yin Huai Date: Thu, 28 May 2015 17:12:30 -0700 Subject: [SPARK-7853] [SQL] Fix HiveContext in Spark Shell https://issues.apache.org/jira/browse/SPARK-7853 This fixes the problem introduced by my change in https://github.com/apache/spark/pull/6435, which causes that Hive Context fails to create in spark shell because of the class loader issue. Author: Yin Huai Closes #6459 from yhuai/SPARK-7853 and squashes the following commits: 37ad33e [Yin Huai] Do not use hiveQlTable at all. 47cdb6d [Yin Huai] Move hiveconf.set to the end of setConf. 005649b [Yin Huai] Update comment. 35d86f3 [Yin Huai] Access TTable directly to make sure Hive will not internally use any metastore utility functions. 3737766 [Yin Huai] Recursively find all jars. (cherry picked from commit 572b62cafe4bc7b1d464c9dcfb449c9d53456826) Signed-off-by: Yin Huai --- .../org/apache/spark/sql/hive/HiveContext.scala | 35 ++++++++++++---------- .../spark/sql/hive/HiveMetastoreCatalog.scala | 12 ++++---- 2 files changed, 25 insertions(+), 22 deletions(-) (limited to 'sql/hive/src') diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala index 9ab98fdcce..2ed71d3d52 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala @@ -189,24 +189,22 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) { "Specify a vaild path to the correct hive jars using $HIVE_METASTORE_JARS " + s"or change $HIVE_METASTORE_VERSION to $hiveExecutionVersion.") } - // We recursively add all jars in the class loader chain, - // starting from the given urlClassLoader. - def addJars(urlClassLoader: URLClassLoader): Array[URL] = { - val jarsInParent = urlClassLoader.getParent match { - case parent: URLClassLoader => addJars(parent) - case other => Array.empty[URL] - } - urlClassLoader.getURLs ++ jarsInParent + // We recursively find all jars in the class loader chain, + // starting from the given classLoader. + def allJars(classLoader: ClassLoader): Array[URL] = classLoader match { + case null => Array.empty[URL] + case urlClassLoader: URLClassLoader => + urlClassLoader.getURLs ++ allJars(urlClassLoader.getParent) + case other => allJars(other.getParent) } - val jars = Utils.getContextOrSparkClassLoader match { - case urlClassLoader: URLClassLoader => addJars(urlClassLoader) - case other => - throw new IllegalArgumentException( - "Unable to locate hive jars to connect to metastore " + - s"using classloader ${other.getClass.getName}. " + - "Please set spark.sql.hive.metastore.jars") + val classLoader = Utils.getContextOrSparkClassLoader + val jars = allJars(classLoader) + if (jars.length == 0) { + throw new IllegalArgumentException( + "Unable to locate hive jars to connect to metastore. " + + "Please set spark.sql.hive.metastore.jars.") } logInfo( @@ -356,9 +354,14 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) { override def setConf(key: String, value: String): Unit = { super.setConf(key, value) - hiveconf.set(key, value) executionHive.runSqlHive(s"SET $key=$value") metadataHive.runSqlHive(s"SET $key=$value") + // If users put any Spark SQL setting in the spark conf (e.g. spark-defaults.conf), + // this setConf will be called in the constructor of the SQLContext. + // Also, calling hiveconf will create a default session containing a HiveConf, which + // will interfer with the creation of executionHive (which is a lazy val). So, + // we put hiveconf.set at the end of this method. + hiveconf.set(key, value) } /* A catalyst metadata catalog that points to the Hive Metastore. */ diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala index 425a4005aa..95117f7a68 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala @@ -707,20 +707,20 @@ private[hive] case class MetastoreRelation hiveQlTable.getMetadata ) - implicit class SchemaAttribute(f: FieldSchema) { + implicit class SchemaAttribute(f: HiveColumn) { def toAttribute: AttributeReference = AttributeReference( - f.getName, - HiveMetastoreTypes.toDataType(f.getType), + f.name, + HiveMetastoreTypes.toDataType(f.hiveType), // Since data can be dumped in randomly with no validation, everything is nullable. nullable = true )(qualifiers = Seq(alias.getOrElse(tableName))) } - // Must be a stable value since new attributes are born here. - val partitionKeys = hiveQlTable.getPartitionKeys.map(_.toAttribute) + /** PartitionKey attributes */ + val partitionKeys = table.partitionColumns.map(_.toAttribute) /** Non-partitionKey attributes */ - val attributes = hiveQlTable.getCols.map(_.toAttribute) + val attributes = table.schema.map(_.toAttribute) val output = attributes ++ partitionKeys -- cgit v1.2.3