aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/shell.py
diff options
context:
space:
mode:
authorDavies Liu <davies@databricks.com>2015-02-17 15:44:37 -0800
committerMichael Armbrust <michael@databricks.com>2015-02-17 15:44:37 -0800
commit4d4cc760fa9687ce563320094557ef9144488676 (patch)
tree4293fdd83f5c6872783b9fc8a377b019ce847318 /python/pyspark/shell.py
parent3df85dccbc8fd1ba19bbcdb8d359c073b1494d98 (diff)
downloadspark-4d4cc760fa9687ce563320094557ef9144488676.tar.gz
spark-4d4cc760fa9687ce563320094557ef9144488676.tar.bz2
spark-4d4cc760fa9687ce563320094557ef9144488676.zip
[SPARK-5872] [SQL] create a sqlCtx in pyspark shell
The sqlCtx will be HiveContext if hive is built in assembly jar, or SQLContext if not. It also skip the Hive tests in pyspark.sql.tests if no hive is available. Author: Davies Liu <davies@databricks.com> Closes #4659 from davies/sqlctx and squashes the following commits: 0e6629a [Davies Liu] sqlCtx in pyspark
Diffstat (limited to 'python/pyspark/shell.py')
-rw-r--r--python/pyspark/shell.py13
1 files changed, 12 insertions, 1 deletions
diff --git a/python/pyspark/shell.py b/python/pyspark/shell.py
index 4cf4b89ccf..1a02fece9c 100644
--- a/python/pyspark/shell.py
+++ b/python/pyspark/shell.py
@@ -31,8 +31,12 @@ if sys.version_info[0] != 2:
import atexit
import os
import platform
+
+import py4j
+
import pyspark
from pyspark.context import SparkContext
+from pyspark.sql import SQLContext, HiveContext
from pyspark.storagelevel import StorageLevel
# this is the deprecated equivalent of ADD_JARS
@@ -46,6 +50,13 @@ if os.environ.get("SPARK_EXECUTOR_URI"):
sc = SparkContext(appName="PySparkShell", pyFiles=add_files)
atexit.register(lambda: sc.stop())
+try:
+ # Try to access HiveConf, it will raise exception if Hive is not added
+ sc._jvm.org.apache.hadoop.hive.conf.HiveConf()
+ sqlCtx = HiveContext(sc)
+except py4j.protocol.Py4JError:
+ sqlCtx = SQLContext(sc)
+
print("""Welcome to
____ __
/ __/__ ___ _____/ /__
@@ -57,7 +68,7 @@ print("Using Python version %s (%s, %s)" % (
platform.python_version(),
platform.python_build()[0],
platform.python_build()[1]))
-print("SparkContext available as sc.")
+print("SparkContext available as sc, %s available as sqlCtx." % sqlCtx.__class__.__name__)
if add_files is not None:
print("Warning: ADD_FILES environment variable is deprecated, use --py-files argument instead")