aboutsummaryrefslogtreecommitdiff
path: root/sql/core/src
diff options
context:
space:
mode:
authorCheng Lian <lian.cs.zju@gmail.com>2014-10-09 18:25:06 -0700
committerMichael Armbrust <michael@databricks.com>2014-10-09 18:25:06 -0700
commitedf02da389f75df5a42465d41f035d6b65599848 (patch)
tree1530a6ae85e804ff8ed0498e69a971bc569ee5ca /sql/core/src
parent363baacaded56047bcc63276d729ab911e0336cf (diff)
downloadspark-edf02da389f75df5a42465d41f035d6b65599848.tar.gz
spark-edf02da389f75df5a42465d41f035d6b65599848.tar.bz2
spark-edf02da389f75df5a42465d41f035d6b65599848.zip
[SPARK-3654][SQL] Unifies SQL and HiveQL parsers
This PR is a follow up of #2590, and tries to introduce a top level SQL parser entry point for all SQL dialects supported by Spark SQL. A top level parser `SparkSQLParser` is introduced to handle the syntaxes that all SQL dialects should recognize (e.g. `CACHE TABLE`, `UNCACHE TABLE` and `SET`, etc.). For all the syntaxes this parser doesn't recognize directly, it fallbacks to a specified function that tries to parse arbitrary input to a `LogicalPlan`. This function is typically another parser combinator like `SqlParser`. DDL syntaxes introduced in #2475 can be moved to here. The `ExtendedHiveQlParser` now only handle Hive specific extensions. Also took the chance to refactor/reformat `SqlParser` for better readability. Author: Cheng Lian <lian.cs.zju@gmail.com> Closes #2698 from liancheng/gen-sql-parser and squashes the following commits: ceada76 [Cheng Lian] Minor styling fixes 9738934 [Cheng Lian] Minor refactoring, removes optional trailing ";" in the parser bb2ab12 [Cheng Lian] SET property value can be empty string ce8860b [Cheng Lian] Passes test suites e86968e [Cheng Lian] Removes debugging code 8bcace5 [Cheng Lian] Replaces digit.+ to rep1(digit) (Scala style checking doesn't like it) d15d54f [Cheng Lian] Unifies SQL and HiveQL parsers
Diffstat (limited to 'sql/core/src')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala9
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala4
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala34
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala2
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala4
5 files changed, 26 insertions, 27 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 014e1e2826..23e7b2d270 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -66,12 +66,17 @@ class SQLContext(@transient val sparkContext: SparkContext)
@transient
protected[sql] lazy val analyzer: Analyzer =
new Analyzer(catalog, functionRegistry, caseSensitive = true)
+
@transient
protected[sql] val optimizer = Optimizer
+
@transient
- protected[sql] val parser = new catalyst.SqlParser
+ protected[sql] val sqlParser = {
+ val fallback = new catalyst.SqlParser
+ new catalyst.SparkSQLParser(fallback(_))
+ }
- protected[sql] def parseSql(sql: String): LogicalPlan = parser(sql)
+ protected[sql] def parseSql(sql: String): LogicalPlan = sqlParser(sql)
protected[sql] def executeSql(sql: String): this.QueryExecution = executePlan(parseSql(sql))
protected[sql] def executePlan(plan: LogicalPlan): this.QueryExecution =
new this.QueryExecution { val logical = plan }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index bbf17b9fad..4f1af7234d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -304,8 +304,8 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
case class CommandStrategy(context: SQLContext) extends Strategy {
def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
- case logical.SetCommand(key, value) =>
- Seq(execution.SetCommand(key, value, plan.output)(context))
+ case logical.SetCommand(kv) =>
+ Seq(execution.SetCommand(kv, plan.output)(context))
case logical.ExplainCommand(logicalPlan, extended) =>
Seq(execution.ExplainCommand(logicalPlan, plan.output, extended)(context))
case logical.CacheTableCommand(tableName, optPlan, isLazy) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
index d49633c24a..5859eba408 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
@@ -48,29 +48,28 @@ trait Command {
* :: DeveloperApi ::
*/
@DeveloperApi
-case class SetCommand(
- key: Option[String], value: Option[String], output: Seq[Attribute])(
+case class SetCommand(kv: Option[(String, Option[String])], output: Seq[Attribute])(
@transient context: SQLContext)
extends LeafNode with Command with Logging {
- override protected lazy val sideEffectResult: Seq[Row] = (key, value) match {
- // Set value for key k.
- case (Some(k), Some(v)) =>
- if (k == SQLConf.Deprecated.MAPRED_REDUCE_TASKS) {
+ override protected lazy val sideEffectResult: Seq[Row] = kv match {
+ // Set value for the key.
+ case Some((key, Some(value))) =>
+ if (key == SQLConf.Deprecated.MAPRED_REDUCE_TASKS) {
logWarning(s"Property ${SQLConf.Deprecated.MAPRED_REDUCE_TASKS} is deprecated, " +
s"automatically converted to ${SQLConf.SHUFFLE_PARTITIONS} instead.")
- context.setConf(SQLConf.SHUFFLE_PARTITIONS, v)
- Seq(Row(s"${SQLConf.SHUFFLE_PARTITIONS}=$v"))
+ context.setConf(SQLConf.SHUFFLE_PARTITIONS, value)
+ Seq(Row(s"${SQLConf.SHUFFLE_PARTITIONS}=$value"))
} else {
- context.setConf(k, v)
- Seq(Row(s"$k=$v"))
+ context.setConf(key, value)
+ Seq(Row(s"$key=$value"))
}
- // Query the value bound to key k.
- case (Some(k), _) =>
+ // Query the value bound to the key.
+ case Some((key, None)) =>
// TODO (lian) This is just a workaround to make the Simba ODBC driver work.
// Should remove this once we get the ODBC driver updated.
- if (k == "-v") {
+ if (key == "-v") {
val hiveJars = Seq(
"hive-exec-0.12.0.jar",
"hive-service-0.12.0.jar",
@@ -84,23 +83,20 @@ case class SetCommand(
Row("system:java.class.path=" + hiveJars),
Row("system:sun.java.command=shark.SharkServer2"))
} else {
- if (k == SQLConf.Deprecated.MAPRED_REDUCE_TASKS) {
+ if (key == SQLConf.Deprecated.MAPRED_REDUCE_TASKS) {
logWarning(s"Property ${SQLConf.Deprecated.MAPRED_REDUCE_TASKS} is deprecated, " +
s"showing ${SQLConf.SHUFFLE_PARTITIONS} instead.")
Seq(Row(s"${SQLConf.SHUFFLE_PARTITIONS}=${context.numShufflePartitions}"))
} else {
- Seq(Row(s"$k=${context.getConf(k, "<undefined>")}"))
+ Seq(Row(s"$key=${context.getConf(key, "<undefined>")}"))
}
}
// Query all key-value pairs that are set in the SQLConf of the context.
- case (None, None) =>
+ case _ =>
context.getAllConfs.map { case (k, v) =>
Row(s"$k=$v")
}.toSeq
-
- case _ =>
- throw new IllegalArgumentException()
}
override def otherCopyArgs = context :: Nil
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
index 1e624f9700..c87ded81fd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
@@ -69,7 +69,7 @@ class CachedTableSuite extends QueryTest {
test("calling .unpersist() should drop in-memory columnar cache") {
table("testData").cache()
table("testData").count()
- table("testData").unpersist(true)
+ table("testData").unpersist(blocking = true)
assertCached(table("testData"), 0)
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 79de1bb855..a94022c0cf 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -42,7 +42,6 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
TimeZone.setDefault(origZone)
}
-
test("SPARK-3176 Added Parser of SQL ABS()") {
checkAnswer(
sql("SELECT ABS(-1.3)"),
@@ -61,7 +60,6 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
4)
}
-
test("SPARK-2041 column name equals tablename") {
checkAnswer(
sql("SELECT tableName FROM tableName"),
@@ -694,6 +692,6 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
test("SPARK-3813 CASE WHEN a THEN b [WHEN c THEN d]* [ELSE e] END") {
checkAnswer(
- sql("SELECT CASE WHEN key=1 THEN 1 ELSE 2 END FROM testData WHERE key = 1 group by key"), 1)
+ sql("SELECT CASE WHEN key = 1 THEN 1 ELSE 2 END FROM testData WHERE key = 1 group by key"), 1)
}
}