aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/sql/dataframe.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/pyspark/sql/dataframe.py')
-rw-r--r--python/pyspark/sql/dataframe.py56
1 files changed, 28 insertions, 28 deletions
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 6f746d136b..6d42410020 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -96,7 +96,7 @@ class DataFrame(object):
return self._lazy_rdd
def toJSON(self, use_unicode=False):
- """Convert a DataFrame into a MappedRDD of JSON documents; one document per row.
+ """Convert a :class:`DataFrame` into a MappedRDD of JSON documents; one document per row.
>>> df.toJSON().first()
'{"age":2,"name":"Alice"}'
@@ -108,7 +108,7 @@ class DataFrame(object):
"""Save the contents as a Parquet file, preserving the schema.
Files that are written out using this method can be read back in as
- a DataFrame using the L{SQLContext.parquetFile} method.
+ a :class:`DataFrame` using the L{SQLContext.parquetFile} method.
>>> import tempfile, shutil
>>> parquetFile = tempfile.mkdtemp()
@@ -139,7 +139,7 @@ class DataFrame(object):
self.registerTempTable(name)
def insertInto(self, tableName, overwrite=False):
- """Inserts the contents of this DataFrame into the specified table.
+ """Inserts the contents of this :class:`DataFrame` into the specified table.
Optionally overwriting any existing data.
"""
@@ -165,7 +165,7 @@ class DataFrame(object):
return jmode
def saveAsTable(self, tableName, source=None, mode="append", **options):
- """Saves the contents of the DataFrame to a data source as a table.
+ """Saves the contents of the :class:`DataFrame` to a data source as a table.
The data source is specified by the `source` and a set of `options`.
If `source` is not specified, the default data source configured by
@@ -174,12 +174,13 @@ class DataFrame(object):
Additionally, mode is used to specify the behavior of the saveAsTable operation when
table already exists in the data source. There are four modes:
- * append: Contents of this DataFrame are expected to be appended to existing table.
- * overwrite: Data in the existing table is expected to be overwritten by the contents of \
- this DataFrame.
+ * append: Contents of this :class:`DataFrame` are expected to be appended \
+ to existing table.
+ * overwrite: Data in the existing table is expected to be overwritten by \
+ the contents of this DataFrame.
* error: An exception is expected to be thrown.
- * ignore: The save operation is expected to not save the contents of the DataFrame and \
- to not change the existing table.
+ * ignore: The save operation is expected to not save the contents of the \
+ :class:`DataFrame` and to not change the existing table.
"""
if source is None:
source = self.sql_ctx.getConf("spark.sql.sources.default",
@@ -190,7 +191,7 @@ class DataFrame(object):
self._jdf.saveAsTable(tableName, source, jmode, joptions)
def save(self, path=None, source=None, mode="append", **options):
- """Saves the contents of the DataFrame to a data source.
+ """Saves the contents of the :class:`DataFrame` to a data source.
The data source is specified by the `source` and a set of `options`.
If `source` is not specified, the default data source configured by
@@ -199,11 +200,11 @@ class DataFrame(object):
Additionally, mode is used to specify the behavior of the save operation when
data already exists in the data source. There are four modes:
- * append: Contents of this DataFrame are expected to be appended to existing data.
+ * append: Contents of this :class:`DataFrame` are expected to be appended to existing data.
* overwrite: Existing data is expected to be overwritten by the contents of this DataFrame.
* error: An exception is expected to be thrown.
- * ignore: The save operation is expected to not save the contents of the DataFrame and \
- to not change the existing data.
+ * ignore: The save operation is expected to not save the contents of \
+ the :class:`DataFrame` and to not change the existing data.
"""
if path is not None:
options["path"] = path
@@ -217,7 +218,7 @@ class DataFrame(object):
@property
def schema(self):
- """Returns the schema of this DataFrame (represented by
+ """Returns the schema of this :class:`DataFrame` (represented by
a L{StructType}).
>>> df.schema
@@ -275,12 +276,12 @@ class DataFrame(object):
"""
Print the first 20 rows.
+ >>> df
+ DataFrame[age: int, name: string]
>>> df.show()
age name
2 Alice
5 Bob
- >>> df
- DataFrame[age: int, name: string]
"""
print self._jdf.showString().encode('utf8', 'ignore')
@@ -481,8 +482,8 @@ class DataFrame(object):
def join(self, other, joinExprs=None, joinType=None):
"""
- Join with another DataFrame, using the given join expression.
- The following performs a full outer join between `df1` and `df2`::
+ Join with another :class:`DataFrame`, using the given join expression.
+ The following performs a full outer join between `df1` and `df2`.
:param other: Right side of the join
:param joinExprs: Join expression
@@ -582,8 +583,6 @@ class DataFrame(object):
def select(self, *cols):
""" Selecting a set of expressions.
- >>> df.select().collect()
- [Row(age=2, name=u'Alice'), Row(age=5, name=u'Bob')]
>>> df.select('*').collect()
[Row(age=2, name=u'Alice'), Row(age=5, name=u'Bob')]
>>> df.select('name', 'age').collect()
@@ -591,8 +590,6 @@ class DataFrame(object):
>>> df.select(df.name, (df.age + 10).alias('age')).collect()
[Row(name=u'Alice', age=12), Row(name=u'Bob', age=15)]
"""
- if not cols:
- cols = ["*"]
jcols = ListConverter().convert([_to_java_column(c) for c in cols],
self._sc._gateway._gateway_client)
jdf = self._jdf.select(self.sql_ctx._sc._jvm.PythonUtils.toSeq(jcols))
@@ -612,7 +609,7 @@ class DataFrame(object):
def filter(self, condition):
""" Filtering rows using the given condition, which could be
- Column expression or string of SQL expression.
+ :class:`Column` expression or string of SQL expression.
where() is an alias for filter().
@@ -666,7 +663,7 @@ class DataFrame(object):
return self.groupBy().agg(*exprs)
def unionAll(self, other):
- """ Return a new DataFrame containing union of rows in this
+ """ Return a new :class:`DataFrame` containing union of rows in this
frame and another frame.
This is equivalent to `UNION ALL` in SQL.
@@ -919,9 +916,10 @@ class Column(object):
"""
A column in a DataFrame.
- `Column` instances can be created by::
+ :class:`Column` instances can be created by::
# 1. Select a column out of a DataFrame
+
df.colName
df["colName"]
@@ -975,7 +973,7 @@ class Column(object):
def substr(self, startPos, length):
"""
- Return a Column which is a substring of the column
+ Return a :class:`Column` which is a substring of the column
:param startPos: start position (int or Column)
:param length: length of the substring (int or Column)
@@ -996,8 +994,10 @@ class Column(object):
__getslice__ = substr
# order
- asc = _unary_op("asc")
- desc = _unary_op("desc")
+ asc = _unary_op("asc", "Returns a sort expression based on the"
+ " ascending order of the given column name.")
+ desc = _unary_op("desc", "Returns a sort expression based on the"
+ " descending order of the given column name.")
isNull = _unary_op("isNull", "True if the current expression is null.")
isNotNull = _unary_op("isNotNull", "True if the current expression is not null.")