aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorAndrew Or <andrew@databricks.com>2016-05-04 17:39:30 -0700
committerAndrew Or <andrew@databricks.com>2016-05-04 17:39:30 -0700
commitfa79d346e1a79ceda6ccd20e74eb850e769556ea (patch)
tree32167c123cbed108caed8773b4985f8918de2b2e /python
parentb28137764716f56fa1a923c4278624a56364a505 (diff)
downloadspark-fa79d346e1a79ceda6ccd20e74eb850e769556ea.tar.gz
spark-fa79d346e1a79ceda6ccd20e74eb850e769556ea.tar.bz2
spark-fa79d346e1a79ceda6ccd20e74eb850e769556ea.zip
[SPARK-14896][SQL] Deprecate HiveContext in python
## What changes were proposed in this pull request? See title. ## How was this patch tested? PySpark tests. Author: Andrew Or <andrew@databricks.com> Closes #12917 from andrewor14/deprecate-hive-context-python.
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/sql/column.py2
-rw-r--r--python/pyspark/sql/context.py9
-rw-r--r--python/pyspark/sql/streaming.py2
3 files changed, 9 insertions, 4 deletions
diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
index 43e9baece2..90fb76f9b5 100644
--- a/python/pyspark/sql/column.py
+++ b/python/pyspark/sql/column.py
@@ -418,8 +418,6 @@ class Column(object):
>>> window = Window.partitionBy("name").orderBy("age").rowsBetween(-1, 1)
>>> from pyspark.sql.functions import rank, min
>>> # df.select(rank().over(window), min('age').over(window))
-
- .. note:: Window functions is only supported with HiveContext in 1.4
"""
from pyspark.sql.window import WindowSpec
if not isinstance(window, WindowSpec):
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index 2096236d7f..78ab2e81bf 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -17,6 +17,7 @@
from __future__ import print_function
import sys
+import warnings
if sys.version >= '3':
basestring = unicode = str
@@ -434,7 +435,6 @@ class SQLContext(object):
return ContinuousQueryManager(self._ssql_ctx.streams())
-# TODO(andrew): deprecate this
class HiveContext(SQLContext):
"""A variant of Spark SQL that integrates with data stored in Hive.
@@ -444,8 +444,15 @@ class HiveContext(SQLContext):
:param sparkContext: The SparkContext to wrap.
:param jhiveContext: An optional JVM Scala HiveContext. If set, we do not instantiate a new
:class:`HiveContext` in the JVM, instead we make all calls to this object.
+
+ .. note:: Deprecated in 2.0.0. Use SparkSession.builder.enableHiveSupport().getOrCreate().
"""
+ warnings.warn(
+ "HiveContext is deprecated in Spark 2.0.0. Please use " +
+ "SparkSession.builder.enableHiveSupport().getOrCreate() instead.",
+ DeprecationWarning)
+
def __init__(self, sparkContext, jhiveContext=None):
if jhiveContext is None:
sparkSession = SparkSession.withHiveSupport(sparkContext)
diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py
index bf03fdca91..8238b8e7cd 100644
--- a/python/pyspark/sql/streaming.py
+++ b/python/pyspark/sql/streaming.py
@@ -221,7 +221,7 @@ def _test():
globs['os'] = os
globs['sc'] = sc
globs['sqlContext'] = SQLContext(sc)
- globs['hiveContext'] = HiveContext(sc)
+ globs['hiveContext'] = HiveContext._createForTesting(sc)
globs['df'] = \
globs['sqlContext'].read.format('text').stream('python/test_support/sql/streaming')