[DOC] Improvements to Python docs.

Author: Reynold Xin <rxin@databricks.com> Closes #5238 from rxin/pyspark-docs and squashes the following commits: c285951 [Reynold Xin] Reset deprecation warning. 8c1031e [Reynold Xin] inferSchema dd91b1a [Reynold Xin] [DOC] Improvements to Python docs.
author: Reynold Xin <rxin@databricks.com> 2015-03-28 23:59:27 -0700
committer: Reynold Xin <rxin@databricks.com> 2015-03-28 23:59:27 -0700
commit: 5eef00d0c6c7cc5448aca7b1c2a2e289a4c43eb0 (patch)
tree: 853e52d983b4b2e21d8bf2161121ad785e3d05ee /python
parent: f75f633b21faaf911f04aeff847f25749b1ecd89 (diff)
download: spark-5eef00d0c6c7cc5448aca7b1c2a2e289a4c43eb0.tar.gz
spark-5eef00d0c6c7cc5448aca7b1c2a2e289a4c43eb0.tar.bz2
spark-5eef00d0c6c7cc5448aca7b1c2a2e289a4c43eb0.zip
3 files changed, 17 insertions, 14 deletions
diff --git a/python/docs/index.rst b/python/docs/index.rst
index d150de9d5c..f7eede9c3c 100644
--- a/python/docs/index.rst
+++ b/python/docs/index.rst
@@ -29,6 +29,14 @@ Core classes:
 
     A Resilient Distributed Dataset (RDD), the basic abstraction in Spark.
 
+    :class:`pyspark.sql.SQLContext`
+
+    Main entry point for DataFrame and SQL functionality.
+
+    :class:`pyspark.sql.DataFrame`
+
+    A distributed collection of data grouped into named columns.
+
 
 Indices and tables
 ==================
diff --git a/python/pyspark/sql/__init__.py b/python/pyspark/sql/__init__.py
index b9ffd6945e..54a01631d8 100644
--- a/python/pyspark/sql/__init__.py
+++ b/python/pyspark/sql/__init__.py
@@ -19,17 +19,19 @@
 public classes of Spark SQL:
 
     - L{SQLContext}
-      Main entry point for SQL functionality.
+      Main entry point for :class:`DataFrame` and SQL functionality.
     - L{DataFrame}
-      A Resilient Distributed Dataset (RDD) with Schema information for the data contained. In
-      addition to normal RDD operations, DataFrames also support SQL.
+      A distributed collection of data grouped into named columns.
     - L{GroupedData}
+      Aggregation methods, returned by :func:`DataFrame.groupBy`.
     - L{Column}
-      Column is a DataFrame with a single column.
+      A column expression in a :class:`DataFrame`.
     - L{Row}
-      A Row of data returned by a Spark SQL query.
+      A row of data in a :class:`DataFrame`.
     - L{HiveContext}
-      Main entry point for accessing data stored in Apache Hive..
+      Main entry point for accessing data stored in Apache Hive.
+    - L{functions}
+      List of built-in functions available for :class:`DataFrame`.
 """
 
 from pyspark.sql.context import SQLContext, HiveContext
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index d51309f7ef..23c0e63e77 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -50,13 +50,6 @@ class DataFrame(object):
 
         ageCol = people.age
 
-    Note that the :class:`Column` type can also be manipulated
-    through its various functions::
-
-        # The following creates a new column that increases everybody's age by 10.
-        people.age + 10
-
-
     A more concrete example::
 
         # To create DataFrame using SQLContext
@@ -77,7 +70,7 @@ class DataFrame(object):
     @property
     def rdd(self):
         """
-        Return the content of the :class:`DataFrame` as an :class:`RDD`
+        Return the content of the :class:`DataFrame` as an :class:`pyspark.RDD`
         of :class:`Row` s.
         """
         if not hasattr(self, '_lazy_rdd'):
author	Reynold Xin <rxin@databricks.com>	2015-03-28 23:59:27 -0700
committer	Reynold Xin <rxin@databricks.com>	2015-03-28 23:59:27 -0700
commit	5eef00d0c6c7cc5448aca7b1c2a2e289a4c43eb0 (patch)
tree	853e52d983b4b2e21d8bf2161121ad785e3d05ee /python
parent	f75f633b21faaf911f04aeff847f25749b1ecd89 (diff)
download	spark-5eef00d0c6c7cc5448aca7b1c2a2e289a4c43eb0.tar.gz spark-5eef00d0c6c7cc5448aca7b1c2a2e289a4c43eb0.tar.bz2 spark-5eef00d0c6c7cc5448aca7b1c2a2e289a4c43eb0.zip