aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorMichael Armbrust <michael@databricks.com>2014-08-03 12:28:29 -0700
committerMichael Armbrust <michael@databricks.com>2014-08-03 12:29:05 -0700
commitc5ed1deba6b3f3e597554a8d0f93f402ae62fab9 (patch)
tree101bf2d0e0a964901a2299378cf053a38b4095e5 /python
parenteaa93555a7f935b00a2f94a7fa50a12e11578bd7 (diff)
downloadspark-c5ed1deba6b3f3e597554a8d0f93f402ae62fab9.tar.gz
spark-c5ed1deba6b3f3e597554a8d0f93f402ae62fab9.tar.bz2
spark-c5ed1deba6b3f3e597554a8d0f93f402ae62fab9.zip
[SPARK-2784][SQL] Deprecate hql() method in favor of a config option, 'spark.sql.dialect'
Many users have reported being confused by the distinction between the `sql` and `hql` methods. Specifically, many users think that `sql(...)` cannot be used to read hive tables. In this PR I introduce a new configuration option `spark.sql.dialect` that picks which dialect with be used for parsing. For SQLContext this must be set to `sql`. In `HiveContext` it defaults to `hiveql` but can also be set to `sql`. The `hql` and `hiveql` methods continue to act the same but are now marked as deprecated. **This is a possibly breaking change for some users unless they set the dialect manually, though this is unlikely.** For example: `hiveContex.sql("SELECT 1")` will now throw a parsing exception by default. Author: Michael Armbrust <michael@databricks.com> Closes #1746 from marmbrus/sqlLanguageConf and squashes the following commits: ad375cc [Michael Armbrust] Merge remote-tracking branch 'apache/master' into sqlLanguageConf 20c43f8 [Michael Armbrust] override function instead of just setting the value 7e4ae93 [Michael Armbrust] Deprecate hql() method in favor of a config option, 'spark.sql.dialect' (cherry picked from commit 236dfac6769016e433b2f6517cda2d308dea74bc) Signed-off-by: Michael Armbrust <michael@databricks.com>
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/sql.py20
1 files changed, 12 insertions, 8 deletions
diff --git a/python/pyspark/sql.py b/python/pyspark/sql.py
index 42b738e112..1a829c6faf 100644
--- a/python/pyspark/sql.py
+++ b/python/pyspark/sql.py
@@ -1291,16 +1291,20 @@ class HiveContext(SQLContext):
def hiveql(self, hqlQuery):
"""
- Runs a query expressed in HiveQL, returning the result as
- a L{SchemaRDD}.
+ DEPRECATED: Use sql()
"""
+ warnings.warn("hiveql() is deprecated as the sql function now parses using HiveQL by" +
+ "default. The SQL dialect for parsing can be set using 'spark.sql.dialect'",
+ DeprecationWarning)
return SchemaRDD(self._ssql_ctx.hiveql(hqlQuery), self)
def hql(self, hqlQuery):
"""
- Runs a query expressed in HiveQL, returning the result as
- a L{SchemaRDD}.
+ DEPRECATED: Use sql()
"""
+ warnings.warn("hql() is deprecated as the sql function now parses using HiveQL by" +
+ "default. The SQL dialect for parsing can be set using 'spark.sql.dialect'",
+ DeprecationWarning)
return self.hiveql(hqlQuery)
@@ -1313,16 +1317,16 @@ class LocalHiveContext(HiveContext):
>>> import os
>>> hiveCtx = LocalHiveContext(sc)
>>> try:
- ... supress = hiveCtx.hql("DROP TABLE src")
+ ... supress = hiveCtx.sql("DROP TABLE src")
... except Exception:
... pass
>>> kv1 = os.path.join(os.environ["SPARK_HOME"],
... 'examples/src/main/resources/kv1.txt')
- >>> supress = hiveCtx.hql(
+ >>> supress = hiveCtx.sql(
... "CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
- >>> supress = hiveCtx.hql("LOAD DATA LOCAL INPATH '%s' INTO TABLE src"
+ >>> supress = hiveCtx.sql("LOAD DATA LOCAL INPATH '%s' INTO TABLE src"
... % kv1)
- >>> results = hiveCtx.hql("FROM src SELECT value"
+ >>> results = hiveCtx.sql("FROM src SELECT value"
... ).map(lambda r: int(r.value.split('_')[1]))
>>> num = results.count()
>>> reduce_sum = results.reduce(lambda x, y: x + y)