diff options
author | Reynold Xin <rxin@databricks.com> | 2015-01-13 13:30:35 -0800 |
---|---|---|
committer | Michael Armbrust <michael@databricks.com> | 2015-01-13 13:30:35 -0800 |
commit | 14e3f114efb906937b2d7b7ac04484b2814a3b48 (patch) | |
tree | 5f20050f930c68d7f9a52a5ee696b0f633fc4ea9 /sql/hive | |
parent | 6463e0b9e8067cce70602c5c9006a2546856a9d6 (diff) | |
download | spark-14e3f114efb906937b2d7b7ac04484b2814a3b48.tar.gz spark-14e3f114efb906937b2d7b7ac04484b2814a3b48.tar.bz2 spark-14e3f114efb906937b2d7b7ac04484b2814a3b48.zip |
[SPARK-5168] Make SQLConf a field rather than mixin in SQLContext
This change should be binary and source backward compatible since we didn't change any user facing APIs.
Author: Reynold Xin <rxin@databricks.com>
Closes #3965 from rxin/SPARK-5168-sqlconf and squashes the following commits:
42eec09 [Reynold Xin] Fix default conf value.
0ef86cc [Reynold Xin] Fix constructor ordering.
4d7f910 [Reynold Xin] Properly override config.
ccc8e6a [Reynold Xin] [SPARK-5168] Make SQLConf a field rather than mixin in SQLContext
Diffstat (limited to 'sql/hive')
7 files changed, 28 insertions, 25 deletions
diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala index 23283fd3fe..0d934620ac 100644 --- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala +++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala @@ -36,8 +36,8 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { private val originalTimeZone = TimeZone.getDefault private val originalLocale = Locale.getDefault - private val originalColumnBatchSize = TestHive.columnBatchSize - private val originalInMemoryPartitionPruning = TestHive.inMemoryPartitionPruning + private val originalColumnBatchSize = TestHive.conf.columnBatchSize + private val originalInMemoryPartitionPruning = TestHive.conf.inMemoryPartitionPruning def testCases = hiveQueryDir.listFiles.map(f => f.getName.stripSuffix(".q") -> f) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala index 09ff4cc5ab..9aeebd7e54 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala @@ -71,8 +71,9 @@ class LocalHiveContext(sc: SparkContext) extends HiveContext(sc) { class HiveContext(sc: SparkContext) extends SQLContext(sc) { self => - // Change the default SQL dialect to HiveQL - override private[spark] def dialect: String = getConf(SQLConf.DIALECT, "hiveql") + private[sql] override lazy val conf: SQLConf = new SQLConf { + override def dialect: String = getConf(SQLConf.DIALECT, "hiveql") + } /** * When true, enables an experimental feature where metastore tables that use the parquet SerDe @@ -87,12 +88,12 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) { override def sql(sqlText: String): SchemaRDD = { // TODO: Create a framework for registering parsers instead of just hardcoding if statements. - if (dialect == "sql") { + if (conf.dialect == "sql") { super.sql(sqlText) - } else if (dialect == "hiveql") { + } else if (conf.dialect == "hiveql") { new SchemaRDD(this, ddlParser(sqlText).getOrElse(HiveQl.parseSql(sqlText))) } else { - sys.error(s"Unsupported SQL dialect: $dialect. Try 'sql' or 'hiveql'") + sys.error(s"Unsupported SQL dialect: ${conf.dialect}. Try 'sql' or 'hiveql'") } } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala index daeabb6c8b..785a6a14f4 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala @@ -515,7 +515,7 @@ private[hive] case class MetastoreRelation // if the size is still less than zero, we use default size Option(totalSize).map(_.toLong).filter(_ > 0) .getOrElse(Option(rawDataSize).map(_.toLong).filter(_ > 0) - .getOrElse(sqlContext.defaultSizeInBytes))) + .getOrElse(sqlContext.conf.defaultSizeInBytes))) } ) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala index 31c7ce9639..52e1f0d94f 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala @@ -102,8 +102,10 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) { new this.QueryExecution { val logical = plan } /** Fewer partitions to speed up testing. */ - override private[spark] def numShufflePartitions: Int = - getConf(SQLConf.SHUFFLE_PARTITIONS, "5").toInt + private[sql] override lazy val conf: SQLConf = new SQLConf { + override def numShufflePartitions: Int = getConf(SQLConf.SHUFFLE_PARTITIONS, "5").toInt + override def dialect: String = getConf(SQLConf.DIALECT, "hiveql") + } /** * Returns the value of specified environmental variable as a [[java.io.File]] after checking diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/api/java/JavaHiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/api/java/JavaHiveContext.scala index 1817c78324..038f63f6c7 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/api/java/JavaHiveContext.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/api/java/JavaHiveContext.scala @@ -31,12 +31,12 @@ class JavaHiveContext(sqlContext: SQLContext) extends JavaSQLContext(sqlContext) override def sql(sqlText: String): JavaSchemaRDD = { // TODO: Create a framework for registering parsers instead of just hardcoding if statements. - if (sqlContext.dialect == "sql") { + if (sqlContext.conf.dialect == "sql") { super.sql(sqlText) - } else if (sqlContext.dialect == "hiveql") { + } else if (sqlContext.conf.dialect == "hiveql") { new JavaSchemaRDD(sqlContext, HiveQl.parseSql(sqlText)) } else { - sys.error(s"Unsupported SQL dialect: ${sqlContext.dialect}. Try 'sql' or 'hiveql'") + sys.error(s"Unsupported SQL dialect: ${sqlContext.conf.dialect}. Try 'sql' or 'hiveql'") } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala index a758f921e0..0b4e76c9d3 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala @@ -81,7 +81,7 @@ class StatisticsSuite extends QueryTest with BeforeAndAfterAll { // TODO: How does it works? needs to add it back for other hive version. if (HiveShim.version =="0.12.0") { - assert(queryTotalSize("analyzeTable") === defaultSizeInBytes) + assert(queryTotalSize("analyzeTable") === conf.defaultSizeInBytes) } sql("ANALYZE TABLE analyzeTable COMPUTE STATISTICS noscan") @@ -110,7 +110,7 @@ class StatisticsSuite extends QueryTest with BeforeAndAfterAll { |SELECT * FROM src """.stripMargin).collect() - assert(queryTotalSize("analyzeTable_part") === defaultSizeInBytes) + assert(queryTotalSize("analyzeTable_part") === conf.defaultSizeInBytes) sql("ANALYZE TABLE analyzeTable_part COMPUTE STATISTICS noscan") @@ -151,8 +151,8 @@ class StatisticsSuite extends QueryTest with BeforeAndAfterAll { val sizes = rdd.queryExecution.analyzed.collect { case r if ct.runtimeClass.isAssignableFrom(r.getClass) => r.statistics.sizeInBytes } - assert(sizes.size === 2 && sizes(0) <= autoBroadcastJoinThreshold - && sizes(1) <= autoBroadcastJoinThreshold, + assert(sizes.size === 2 && sizes(0) <= conf.autoBroadcastJoinThreshold + && sizes(1) <= conf.autoBroadcastJoinThreshold, s"query should contain two relations, each of which has size smaller than autoConvertSize") // Using `sparkPlan` because for relevant patterns in HashJoin to be @@ -163,8 +163,8 @@ class StatisticsSuite extends QueryTest with BeforeAndAfterAll { checkAnswer(rdd, expectedAnswer) // check correctness of output - TestHive.settings.synchronized { - val tmp = autoBroadcastJoinThreshold + TestHive.conf.settings.synchronized { + val tmp = conf.autoBroadcastJoinThreshold sql(s"""SET ${SQLConf.AUTO_BROADCASTJOIN_THRESHOLD}=-1""") rdd = sql(query) @@ -207,8 +207,8 @@ class StatisticsSuite extends QueryTest with BeforeAndAfterAll { .isAssignableFrom(r.getClass) => r.statistics.sizeInBytes } - assert(sizes.size === 2 && sizes(1) <= autoBroadcastJoinThreshold - && sizes(0) <= autoBroadcastJoinThreshold, + assert(sizes.size === 2 && sizes(1) <= conf.autoBroadcastJoinThreshold + && sizes(0) <= conf.autoBroadcastJoinThreshold, s"query should contain two relations, each of which has size smaller than autoConvertSize") // Using `sparkPlan` because for relevant patterns in HashJoin to be @@ -221,8 +221,8 @@ class StatisticsSuite extends QueryTest with BeforeAndAfterAll { checkAnswer(rdd, answer) // check correctness of output - TestHive.settings.synchronized { - val tmp = autoBroadcastJoinThreshold + TestHive.conf.settings.synchronized { + val tmp = conf.autoBroadcastJoinThreshold sql(s"SET ${SQLConf.AUTO_BROADCASTJOIN_THRESHOLD}=-1") rdd = sql(leftSemiJoinQuery) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala index 4decd15485..c14f0d24e0 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala @@ -847,7 +847,7 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter { case Row(key: String, value: String) => key -> value case Row(KV(key, value)) => key -> value }.toSet - clear() + conf.clear() // "SET" itself returns all config variables currently specified in SQLConf. // TODO: Should we be listing the default here always? probably... @@ -879,7 +879,7 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter { collectResults(sql(s"SET $nonexistentKey")) } - clear() + conf.clear() } createQueryTest("select from thrift based table", |