From c5ed1deba6b3f3e597554a8d0f93f402ae62fab9 Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Sun, 3 Aug 2014 12:28:29 -0700
Subject: [SPARK-2784][SQL] Deprecate hql() method in favor of a config option,
 'spark.sql.dialect'

Many users have reported being confused by the distinction between the `sql` and `hql` methods.  Specifically, many users think that `sql(...)` cannot be used to read hive tables.  In this PR I introduce a new configuration option `spark.sql.dialect` that picks which dialect with be used for parsing.  For SQLContext this must be set to `sql`.  In `HiveContext` it defaults to `hiveql` but can also be set to `sql`.

The `hql` and `hiveql` methods continue to act the same but are now marked as deprecated.

**This is a possibly breaking change for some users unless they set the dialect manually, though this is unlikely.**

For example: `hiveContex.sql("SELECT 1")` will now throw a parsing exception by default.

Author: Michael Armbrust <michael@databricks.com>

Closes #1746 from marmbrus/sqlLanguageConf and squashes the following commits:

ad375cc [Michael Armbrust] Merge remote-tracking branch 'apache/master' into sqlLanguageConf
20c43f8 [Michael Armbrust] override function instead of just setting the value
7e4ae93 [Michael Armbrust] Deprecate hql() method in favor of a config option, 'spark.sql.dialect'

(cherry picked from commit 236dfac6769016e433b2f6517cda2d308dea74bc)
Signed-off-by: Michael Armbrust <michael@databricks.com>
---
 python/pyspark/sql.py | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

(limited to 'python')

diff --git a/python/pyspark/sql.py b/python/pyspark/sql.py
index 42b738e112..1a829c6faf 100644
--- a/python/pyspark/sql.py
+++ b/python/pyspark/sql.py
@@ -1291,16 +1291,20 @@ class HiveContext(SQLContext):
 
     def hiveql(self, hqlQuery):
         """
-        Runs a query expressed in HiveQL, returning the result as
-        a L{SchemaRDD}.
+        DEPRECATED: Use sql()
         """
+        warnings.warn("hiveql() is deprecated as the sql function now parses using HiveQL by" +
+                      "default. The SQL dialect for parsing can be set using 'spark.sql.dialect'",
+                      DeprecationWarning)
         return SchemaRDD(self._ssql_ctx.hiveql(hqlQuery), self)
 
     def hql(self, hqlQuery):
         """
-        Runs a query expressed in HiveQL, returning the result as
-        a L{SchemaRDD}.
+        DEPRECATED: Use sql()
         """
+        warnings.warn("hql() is deprecated as the sql function now parses using HiveQL by" +
+                      "default. The SQL dialect for parsing can be set using 'spark.sql.dialect'",
+                      DeprecationWarning)
         return self.hiveql(hqlQuery)
 
 
@@ -1313,16 +1317,16 @@ class LocalHiveContext(HiveContext):
     >>> import os
     >>> hiveCtx = LocalHiveContext(sc)
     >>> try:
-    ...     supress = hiveCtx.hql("DROP TABLE src")
+    ...     supress = hiveCtx.sql("DROP TABLE src")
     ... except Exception:
     ...     pass
     >>> kv1 = os.path.join(os.environ["SPARK_HOME"],
     ...        'examples/src/main/resources/kv1.txt')
-    >>> supress = hiveCtx.hql(
+    >>> supress = hiveCtx.sql(
     ...     "CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
-    >>> supress = hiveCtx.hql("LOAD DATA LOCAL INPATH '%s' INTO TABLE src"
+    >>> supress = hiveCtx.sql("LOAD DATA LOCAL INPATH '%s' INTO TABLE src"
     ...        % kv1)
-    >>> results = hiveCtx.hql("FROM src SELECT value"
+    >>> results = hiveCtx.sql("FROM src SELECT value"
     ...      ).map(lambda r: int(r.value.split('_')[1]))
     >>> num = results.count()
     >>> reduce_sum = results.reduce(lambda x, y: x + y)
-- 
cgit v1.2.3