aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/sql.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/pyspark/sql.py')
-rw-r--r--python/pyspark/sql.py33
1 files changed, 21 insertions, 12 deletions
diff --git a/python/pyspark/sql.py b/python/pyspark/sql.py
index 114644ab8b..3d5a281239 100644
--- a/python/pyspark/sql.py
+++ b/python/pyspark/sql.py
@@ -15,28 +15,37 @@
# limitations under the License.
#
+"""
+public classes of Spark SQL:
+
+ - L{SQLContext}
+ Main entry point for SQL functionality.
+ - L{SchemaRDD}
+ A Resilient Distributed Dataset (RDD) with Schema information for the data contained. In
+ addition to normal RDD operations, SchemaRDDs also support SQL.
+ - L{Row}
+ A Row of data returned by a Spark SQL query.
+ - L{HiveContext}
+ Main entry point for accessing data stored in Apache Hive..
+"""
-import sys
-import types
import itertools
-import warnings
import decimal
import datetime
import keyword
import warnings
from array import array
from operator import itemgetter
+from itertools import imap
+
+from py4j.protocol import Py4JError
+from py4j.java_collections import ListConverter, MapConverter
from pyspark.rdd import RDD
from pyspark.serializers import BatchedSerializer, PickleSerializer, CloudPickleSerializer
from pyspark.storagelevel import StorageLevel
from pyspark.traceback_utils import SCCallSiteSync
-from itertools import chain, ifilter, imap
-
-from py4j.protocol import Py4JError
-from py4j.java_collections import ListConverter, MapConverter
-
__all__ = [
"StringType", "BinaryType", "BooleanType", "TimestampType", "DecimalType",
@@ -899,8 +908,8 @@ class SQLContext(object):
def __init__(self, sparkContext, sqlContext=None):
"""Create a new SQLContext.
- @param sparkContext: The SparkContext to wrap.
- @param sqlContext: An optional JVM Scala SQLContext. If set, we do not instatiate a new
+ :param sparkContext: The SparkContext to wrap.
+ :param sqlContext: An optional JVM Scala SQLContext. If set, we do not instatiate a new
SQLContext in the JVM, instead we make all calls to this object.
>>> srdd = sqlCtx.inferSchema(rdd)
@@ -1325,8 +1334,8 @@ class HiveContext(SQLContext):
def __init__(self, sparkContext, hiveContext=None):
"""Create a new HiveContext.
- @param sparkContext: The SparkContext to wrap.
- @param hiveContext: An optional JVM Scala HiveContext. If set, we do not instatiate a new
+ :param sparkContext: The SparkContext to wrap.
+ :param hiveContext: An optional JVM Scala HiveContext. If set, we do not instatiate a new
HiveContext in the JVM, instead we make all calls to this object.
"""
SQLContext.__init__(self, sparkContext)