From 3db08444bcb498cdd962f317c15bf6dfb34b0de0 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Sat, 28 Mar 2015 23:59:27 -0700
Subject: [DOC] Improvements to Python docs.

Author: Reynold Xin <rxin@databricks.com>

Closes #5238 from rxin/pyspark-docs and squashes the following commits:

c285951 [Reynold Xin] Reset deprecation warning.
8c1031e [Reynold Xin] inferSchema
dd91b1a [Reynold Xin] [DOC] Improvements to Python docs.

(cherry picked from commit 5eef00d0c6c7cc5448aca7b1c2a2e289a4c43eb0)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 python/docs/index.rst           |  8 ++++++++
 python/pyspark/sql/__init__.py  | 14 ++++++++------
 python/pyspark/sql/dataframe.py |  9 +--------
 3 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/python/docs/index.rst b/python/docs/index.rst
index d150de9d5c..f7eede9c3c 100644
--- a/python/docs/index.rst
+++ b/python/docs/index.rst
@@ -29,6 +29,14 @@ Core classes:
 
     A Resilient Distributed Dataset (RDD), the basic abstraction in Spark.
 
+    :class:`pyspark.sql.SQLContext`
+
+    Main entry point for DataFrame and SQL functionality.
+
+    :class:`pyspark.sql.DataFrame`
+
+    A distributed collection of data grouped into named columns.
+
 
 Indices and tables
 ==================
diff --git a/python/pyspark/sql/__init__.py b/python/pyspark/sql/__init__.py
index b9ffd6945e..54a01631d8 100644
--- a/python/pyspark/sql/__init__.py
+++ b/python/pyspark/sql/__init__.py
@@ -19,17 +19,19 @@
 public classes of Spark SQL:
 
     - L{SQLContext}
-      Main entry point for SQL functionality.
+      Main entry point for :class:`DataFrame` and SQL functionality.
     - L{DataFrame}
-      A Resilient Distributed Dataset (RDD) with Schema information for the data contained. In
-      addition to normal RDD operations, DataFrames also support SQL.
+      A distributed collection of data grouped into named columns.
     - L{GroupedData}
+      Aggregation methods, returned by :func:`DataFrame.groupBy`.
     - L{Column}
-      Column is a DataFrame with a single column.
+      A column expression in a :class:`DataFrame`.
     - L{Row}
-      A Row of data returned by a Spark SQL query.
+      A row of data in a :class:`DataFrame`.
     - L{HiveContext}
-      Main entry point for accessing data stored in Apache Hive..
+      Main entry point for accessing data stored in Apache Hive.
+    - L{functions}
+      List of built-in functions available for :class:`DataFrame`.
 """
 
 from pyspark.sql.context import SQLContext, HiveContext
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index d51309f7ef..23c0e63e77 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -50,13 +50,6 @@ class DataFrame(object):
 
         ageCol = people.age
 
-    Note that the :class:`Column` type can also be manipulated
-    through its various functions::
-
-        # The following creates a new column that increases everybody's age by 10.
-        people.age + 10
-
-
     A more concrete example::
 
         # To create DataFrame using SQLContext
@@ -77,7 +70,7 @@ class DataFrame(object):
     @property
     def rdd(self):
         """
-        Return the content of the :class:`DataFrame` as an :class:`RDD`
+        Return the content of the :class:`DataFrame` as an :class:`pyspark.RDD`
         of :class:`Row` s.
         """
         if not hasattr(self, '_lazy_rdd'):
-- 
cgit v1.2.3