aboutsummaryrefslogtreecommitdiff
path: root/sql/core
diff options
context:
space:
mode:
authorMichael Armbrust <michael@databricks.com>2014-08-03 12:28:29 -0700
committerMichael Armbrust <michael@databricks.com>2014-08-03 12:28:29 -0700
commit236dfac6769016e433b2f6517cda2d308dea74bc (patch)
tree101bf2d0e0a964901a2299378cf053a38b4095e5 /sql/core
parent2998e38a942351974da36cb619e863c6f0316e7a (diff)
downloadspark-236dfac6769016e433b2f6517cda2d308dea74bc.tar.gz
spark-236dfac6769016e433b2f6517cda2d308dea74bc.tar.bz2
spark-236dfac6769016e433b2f6517cda2d308dea74bc.zip
[SPARK-2784][SQL] Deprecate hql() method in favor of a config option, 'spark.sql.dialect'
Many users have reported being confused by the distinction between the `sql` and `hql` methods. Specifically, many users think that `sql(...)` cannot be used to read hive tables. In this PR I introduce a new configuration option `spark.sql.dialect` that picks which dialect with be used for parsing. For SQLContext this must be set to `sql`. In `HiveContext` it defaults to `hiveql` but can also be set to `sql`. The `hql` and `hiveql` methods continue to act the same but are now marked as deprecated. **This is a possibly breaking change for some users unless they set the dialect manually, though this is unlikely.** For example: `hiveContex.sql("SELECT 1")` will now throw a parsing exception by default. Author: Michael Armbrust <michael@databricks.com> Closes #1746 from marmbrus/sqlLanguageConf and squashes the following commits: ad375cc [Michael Armbrust] Merge remote-tracking branch 'apache/master' into sqlLanguageConf 20c43f8 [Michael Armbrust] override function instead of just setting the value 7e4ae93 [Michael Armbrust] Deprecate hql() method in favor of a config option, 'spark.sql.dialect'
Diffstat (limited to 'sql/core')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala17
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala11
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/api/java/JavaSQLContext.scala14
3 files changed, 36 insertions, 6 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
index 2d407077be..40bfd55e95 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
@@ -29,6 +29,7 @@ object SQLConf {
val SHUFFLE_PARTITIONS = "spark.sql.shuffle.partitions"
val JOIN_BROADCAST_TABLES = "spark.sql.join.broadcastTables"
val CODEGEN_ENABLED = "spark.sql.codegen"
+ val DIALECT = "spark.sql.dialect"
object Deprecated {
val MAPRED_REDUCE_TASKS = "mapred.reduce.tasks"
@@ -39,7 +40,7 @@ object SQLConf {
* A trait that enables the setting and getting of mutable config parameters/hints.
*
* In the presence of a SQLContext, these can be set and queried by passing SET commands
- * into Spark SQL's query functions (sql(), hql(), etc.). Otherwise, users of this trait can
+ * into Spark SQL's query functions (i.e. sql()). Otherwise, users of this trait can
* modify the hints by programmatically calling the setters and getters of this trait.
*
* SQLConf is thread-safe (internally synchronized, so safe to be used in multiple threads).
@@ -53,6 +54,20 @@ trait SQLConf {
/** ************************ Spark SQL Params/Hints ******************* */
// TODO: refactor so that these hints accessors don't pollute the name space of SQLContext?
+ /**
+ * The SQL dialect that is used when parsing queries. This defaults to 'sql' which uses
+ * a simple SQL parser provided by Spark SQL. This is currently the only option for users of
+ * SQLContext.
+ *
+ * When using a HiveContext, this value defaults to 'hiveql', which uses the Hive 0.12.0 HiveQL
+ * parser. Users can change this to 'sql' if they want to run queries that aren't supported by
+ * HiveQL (e.g., SELECT 1).
+ *
+ * Note that the choice of dialect does not affect things like what tables are available or
+ * how query execution is performed.
+ */
+ private[spark] def dialect: String = get(DIALECT, "sql")
+
/** When true tables cached using the in-memory columnar caching will be compressed. */
private[spark] def useCompression: Boolean = get(COMPRESS_CACHED, "false").toBoolean
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 567f4dca99..ecd5fbaa0b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -248,11 +248,18 @@ class SQLContext(@transient val sparkContext: SparkContext)
}
/**
- * Executes a SQL query using Spark, returning the result as a SchemaRDD.
+ * Executes a SQL query using Spark, returning the result as a SchemaRDD. The dialect that is
+ * used for SQL parsing can be configured with 'spark.sql.dialect'.
*
* @group userf
*/
- def sql(sqlText: String): SchemaRDD = new SchemaRDD(this, parseSql(sqlText))
+ def sql(sqlText: String): SchemaRDD = {
+ if (dialect == "sql") {
+ new SchemaRDD(this, parseSql(sqlText))
+ } else {
+ sys.error(s"Unsupported SQL dialect: $dialect")
+ }
+ }
/** Returns the specified table as a SchemaRDD */
def table(tableName: String): SchemaRDD =
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/java/JavaSQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/java/JavaSQLContext.scala
index dbaa16e8b0..150ff8a420 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/api/java/JavaSQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/api/java/JavaSQLContext.scala
@@ -39,10 +39,18 @@ class JavaSQLContext(val sqlContext: SQLContext) extends UDFRegistration {
def this(sparkContext: JavaSparkContext) = this(new SQLContext(sparkContext.sc))
/**
- * Executes a query expressed in SQL, returning the result as a JavaSchemaRDD
+ * Executes a SQL query using Spark, returning the result as a SchemaRDD. The dialect that is
+ * used for SQL parsing can be configured with 'spark.sql.dialect'.
+ *
+ * @group userf
*/
- def sql(sqlQuery: String): JavaSchemaRDD =
- new JavaSchemaRDD(sqlContext, sqlContext.parseSql(sqlQuery))
+ def sql(sqlText: String): JavaSchemaRDD = {
+ if (sqlContext.dialect == "sql") {
+ new JavaSchemaRDD(sqlContext, sqlContext.parseSql(sqlText))
+ } else {
+ sys.error(s"Unsupported SQL dialect: $sqlContext.dialect")
+ }
+ }
/**
* :: Experimental ::