aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--python/docs/index.rst8
-rw-r--r--python/pyspark/sql/__init__.py14
-rw-r--r--python/pyspark/sql/dataframe.py9
3 files changed, 17 insertions, 14 deletions
diff --git a/python/docs/index.rst b/python/docs/index.rst
index d150de9d5c..f7eede9c3c 100644
--- a/python/docs/index.rst
+++ b/python/docs/index.rst
@@ -29,6 +29,14 @@ Core classes:
A Resilient Distributed Dataset (RDD), the basic abstraction in Spark.
+ :class:`pyspark.sql.SQLContext`
+
+ Main entry point for DataFrame and SQL functionality.
+
+ :class:`pyspark.sql.DataFrame`
+
+ A distributed collection of data grouped into named columns.
+
Indices and tables
==================
diff --git a/python/pyspark/sql/__init__.py b/python/pyspark/sql/__init__.py
index b9ffd6945e..54a01631d8 100644
--- a/python/pyspark/sql/__init__.py
+++ b/python/pyspark/sql/__init__.py
@@ -19,17 +19,19 @@
public classes of Spark SQL:
- L{SQLContext}
- Main entry point for SQL functionality.
+ Main entry point for :class:`DataFrame` and SQL functionality.
- L{DataFrame}
- A Resilient Distributed Dataset (RDD) with Schema information for the data contained. In
- addition to normal RDD operations, DataFrames also support SQL.
+ A distributed collection of data grouped into named columns.
- L{GroupedData}
+ Aggregation methods, returned by :func:`DataFrame.groupBy`.
- L{Column}
- Column is a DataFrame with a single column.
+ A column expression in a :class:`DataFrame`.
- L{Row}
- A Row of data returned by a Spark SQL query.
+ A row of data in a :class:`DataFrame`.
- L{HiveContext}
- Main entry point for accessing data stored in Apache Hive..
+ Main entry point for accessing data stored in Apache Hive.
+ - L{functions}
+ List of built-in functions available for :class:`DataFrame`.
"""
from pyspark.sql.context import SQLContext, HiveContext
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index d51309f7ef..23c0e63e77 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -50,13 +50,6 @@ class DataFrame(object):
ageCol = people.age
- Note that the :class:`Column` type can also be manipulated
- through its various functions::
-
- # The following creates a new column that increases everybody's age by 10.
- people.age + 10
-
-
A more concrete example::
# To create DataFrame using SQLContext
@@ -77,7 +70,7 @@ class DataFrame(object):
@property
def rdd(self):
"""
- Return the content of the :class:`DataFrame` as an :class:`RDD`
+ Return the content of the :class:`DataFrame` as an :class:`pyspark.RDD`
of :class:`Row` s.
"""
if not hasattr(self, '_lazy_rdd'):