aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
Diffstat (limited to 'sql')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala2
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala11
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala7
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala47
4 files changed, 27 insertions, 40 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 2770552050..6e7c1bc133 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -289,7 +289,7 @@ object SQLConf {
val DEFAULT_DATA_SOURCE_NAME = SQLConfigBuilder("spark.sql.sources.default")
.doc("The default data source to use in input/output.")
.stringConf
- .createWithDefault("org.apache.spark.sql.parquet")
+ .createWithDefault("parquet")
// This is used to control the when we will split a schema's JSON string to multiple pieces
// in order to fit the JSON string in metastore's table property (by default, the value has
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 33a926e4d2..c20b022e84 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -24,7 +24,6 @@ import com.google.common.base.Objects
import com.google.common.cache.{CacheBuilder, CacheLoader, LoadingCache}
import org.apache.hadoop.fs.{FileStatus, Path}
import org.apache.hadoop.hive.common.StatsSetupConst
-import org.apache.hadoop.hive.conf.HiveConf
import org.apache.hadoop.hive.metastore.{TableType => HiveTableType}
import org.apache.hadoop.hive.metastore.api.FieldSchema
import org.apache.hadoop.hive.ql.metadata.{Table => HiveTable, _}
@@ -46,6 +45,7 @@ import org.apache.spark.sql.execution.datasources.parquet.{DefaultSource => Parq
import org.apache.spark.sql.hive.client._
import org.apache.spark.sql.hive.execution.HiveNativeCommand
import org.apache.spark.sql.hive.orc.{DefaultSource => OrcDefaultSource}
+import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
private[hive] case class HiveSerDe(
@@ -59,10 +59,10 @@ private[hive] object HiveSerDe {
*
* @param source Currently the source abbreviation can be one of the following:
* SequenceFile, RCFile, ORC, PARQUET, and case insensitive.
- * @param hiveConf Hive Conf
+ * @param conf SQLConf
* @return HiveSerDe associated with the specified source
*/
- def sourceToSerDe(source: String, hiveConf: HiveConf): Option[HiveSerDe] = {
+ def sourceToSerDe(source: String, conf: SQLConf): Option[HiveSerDe] = {
val serdeMap = Map(
"sequencefile" ->
HiveSerDe(
@@ -73,7 +73,8 @@ private[hive] object HiveSerDe {
HiveSerDe(
inputFormat = Option("org.apache.hadoop.hive.ql.io.RCFileInputFormat"),
outputFormat = Option("org.apache.hadoop.hive.ql.io.RCFileOutputFormat"),
- serde = Option(hiveConf.getVar(HiveConf.ConfVars.HIVEDEFAULTRCFILESERDE))),
+ serde = Option(conf.getConfString("hive.default.rcfile.serde",
+ "org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe"))),
"orc" ->
HiveSerDe(
@@ -297,7 +298,7 @@ private[hive] class HiveMetastoreCatalog(hive: SQLContext) extends Logging {
CatalogTableType.MANAGED_TABLE
}
- val maybeSerDe = HiveSerDe.sourceToSerDe(provider, hiveconf)
+ val maybeSerDe = HiveSerDe.sourceToSerDe(provider, conf)
val dataSource =
DataSource(
hive,
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
index 09297c27dc..2c360cb7ca 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
@@ -53,11 +53,6 @@ private[hive] class HiveSessionState(ctx: SQLContext) extends SessionState(ctx)
*/
lazy val metadataHive: HiveClient = sharedState.metadataHive.newSession()
- /**
- * A Hive helper class for substituting variables in a SQL statement.
- */
- lazy val substitutor = new VariableSubstitution
-
override lazy val conf: SQLConf = new SQLConf {
override def caseSensitiveAnalysis: Boolean = getConf(SQLConf.CASE_SENSITIVE, false)
}
@@ -114,7 +109,7 @@ private[hive] class HiveSessionState(ctx: SQLContext) extends SessionState(ctx)
/**
* Parser for HiveQl query texts.
*/
- override lazy val sqlParser: ParserInterface = new HiveSqlParser(substitutor, hiveconf)
+ override lazy val sqlParser: ParserInterface = new HiveSqlParser(conf, hiveconf)
/**
* Planner that takes into account Hive-specific strategies.
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala
index 4ff02cdbd0..90f10d5ebd 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala
@@ -17,13 +17,12 @@
package org.apache.spark.sql.hive.execution
import scala.collection.JavaConverters._
+import scala.util.Try
import org.antlr.v4.runtime.{ParserRuleContext, Token}
import org.apache.hadoop.hive.conf.HiveConf
-import org.apache.hadoop.hive.conf.HiveConf.ConfVars
-import org.apache.hadoop.hive.ql.parse.{EximUtil, VariableSubstitution}
+import org.apache.hadoop.hive.ql.parse.VariableSubstitution
import org.apache.hadoop.hive.serde.serdeConstants
-import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
import org.apache.spark.sql.catalyst.catalog._
import org.apache.spark.sql.catalyst.parser._
@@ -32,18 +31,16 @@ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.execution.SparkSqlAstBuilder
import org.apache.spark.sql.execution.command.{CreateTable, CreateTableLike}
import org.apache.spark.sql.hive.{CreateTableAsSelect => CTAS, CreateViewAsSelect => CreateView, HiveSerDe}
-import org.apache.spark.sql.hive.{HiveGenericUDTF, HiveMetastoreTypes, HiveSerDe}
-import org.apache.spark.sql.hive.HiveShim.HiveFunctionWrapper
+import org.apache.spark.sql.internal.SQLConf
/**
* Concrete parser for HiveQl statements.
*/
-class HiveSqlParser(
- substitutor: VariableSubstitution,
- hiveconf: HiveConf)
- extends AbstractSqlParser {
+class HiveSqlParser(conf: SQLConf, hiveconf: HiveConf) extends AbstractSqlParser {
- val astBuilder = new HiveSqlAstBuilder(hiveconf)
+ val astBuilder = new HiveSqlAstBuilder(conf)
+
+ lazy val substitutor = new VariableSubstitution
protected override def parse[T](command: String)(toResult: SqlBaseParser => T): T = {
super.parse(substitutor.substitute(hiveconf, command))(toResult)
@@ -57,7 +54,7 @@ class HiveSqlParser(
/**
* Builder that converts an ANTLR ParseTree into a LogicalPlan/Expression/TableIdentifier.
*/
-class HiveSqlAstBuilder(hiveConf: HiveConf) extends SparkSqlAstBuilder {
+class HiveSqlAstBuilder(conf: SQLConf) extends SparkSqlAstBuilder {
import ParserUtils._
/**
@@ -184,8 +181,8 @@ class HiveSqlAstBuilder(hiveConf: HiveConf) extends SparkSqlAstBuilder {
// Storage format
val defaultStorage: CatalogStorageFormat = {
- val defaultStorageType = hiveConf.getVar(HiveConf.ConfVars.HIVEDEFAULTFILEFORMAT)
- val defaultHiveSerde = HiveSerDe.sourceToSerDe(defaultStorageType, hiveConf)
+ val defaultStorageType = conf.getConfString("hive.default.fileformat", "textfile")
+ val defaultHiveSerde = HiveSerDe.sourceToSerDe(defaultStorageType, conf)
CatalogStorageFormat(
locationUri = None,
inputFormat = defaultHiveSerde.flatMap(_.inputFormat)
@@ -323,7 +320,7 @@ class HiveSqlAstBuilder(hiveConf: HiveConf) extends SparkSqlAstBuilder {
// Decode and input/output format.
type Format = (Seq[(String, String)], Option[String], Seq[(String, String)], Option[String])
- def format(fmt: RowFormatContext, confVar: ConfVars): Format = fmt match {
+ def format(fmt: RowFormatContext, configKey: String): Format = fmt match {
case c: RowFormatDelimitedContext =>
// TODO we should use the visitRowFormatDelimited function here. However HiveScriptIOSchema
// expects a seq of pairs in which the old parsers' token names are used as keys.
@@ -345,8 +342,8 @@ class HiveSqlAstBuilder(hiveConf: HiveConf) extends SparkSqlAstBuilder {
val CatalogStorageFormat(None, None, None, Some(name), props) = visitRowFormatSerde(c)
// SPARK-10310: Special cases LazySimpleSerDe
- val recordHandler = if (name == classOf[LazySimpleSerDe].getCanonicalName) {
- Option(hiveConf.getVar(confVar))
+ val recordHandler = if (name == "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe") {
+ Try(conf.getConfString(configKey)).toOption
} else {
None
}
@@ -354,17 +351,18 @@ class HiveSqlAstBuilder(hiveConf: HiveConf) extends SparkSqlAstBuilder {
case null =>
// Use default (serde) format.
- val name = hiveConf.getVar(ConfVars.HIVESCRIPTSERDE)
+ val name = conf.getConfString("hive.script.serde",
+ "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")
val props = Seq(serdeConstants.FIELD_DELIM -> "\t")
- val recordHandler = Option(hiveConf.getVar(confVar))
+ val recordHandler = Try(conf.getConfString(configKey)).toOption
(Nil, Option(name), props, recordHandler)
}
val (inFormat, inSerdeClass, inSerdeProps, reader) =
- format(inRowFormat, ConfVars.HIVESCRIPTRECORDREADER)
+ format(inRowFormat, "hive.script.recordreader")
val (outFormat, outSerdeClass, outSerdeProps, writer) =
- format(inRowFormat, ConfVars.HIVESCRIPTRECORDWRITER)
+ format(outRowFormat, "hive.script.recordwriter")
HiveScriptIOSchema(
inFormat, outFormat,
@@ -374,13 +372,6 @@ class HiveSqlAstBuilder(hiveConf: HiveConf) extends SparkSqlAstBuilder {
schemaLess)
}
- /**
- * Create location string.
- */
- override def visitLocationSpec(ctx: LocationSpecContext): String = {
- EximUtil.relativeToAbsolutePath(hiveConf, super.visitLocationSpec(ctx))
- }
-
/** Empty storage format for default values and copies. */
private val EmptyStorageFormat = CatalogStorageFormat(None, None, None, None, Map.empty)
@@ -402,7 +393,7 @@ class HiveSqlAstBuilder(hiveConf: HiveConf) extends SparkSqlAstBuilder {
override def visitGenericFileFormat(
ctx: GenericFileFormatContext): CatalogStorageFormat = withOrigin(ctx) {
val source = ctx.identifier.getText
- HiveSerDe.sourceToSerDe(source, hiveConf) match {
+ HiveSerDe.sourceToSerDe(source, conf) match {
case Some(s) =>
EmptyStorageFormat.copy(
inputFormat = s.inputFormat,