aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorReynold Xin <rxin@databricks.com>2015-03-26 12:26:13 -0700
committerReynold Xin <rxin@databricks.com>2015-03-26 12:26:13 -0700
commit784fcd532784fcfd9bf0a1db71c9f71c469ee716 (patch)
treed38d70a2d3c2b8aa14187e7b6dec0f7f8783374a /python
parentc3a52a08248db08eade29b265f02483144a282d6 (diff)
downloadspark-784fcd532784fcfd9bf0a1db71c9f71c469ee716.tar.gz
spark-784fcd532784fcfd9bf0a1db71c9f71c469ee716.tar.bz2
spark-784fcd532784fcfd9bf0a1db71c9f71c469ee716.zip
[SPARK-6117] [SQL] Improvements to DataFrame.describe()
1. Slightly modifications to the code to make it more readable. 2. Added Python implementation. 3. Updated the documentation to state that we don't guarantee the output schema for this function and it should only be used for exploratory data analysis. Author: Reynold Xin <rxin@databricks.com> Closes #5201 from rxin/df-describe and squashes the following commits: 25a7834 [Reynold Xin] Reset run-tests. 6abdfee [Reynold Xin] [SPARK-6117] [SQL] Improvements to DataFrame.describe()
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/sql/dataframe.py19
1 files changed, 19 insertions, 0 deletions
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index bf7c47b726..d51309f7ef 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -520,6 +520,25 @@ class DataFrame(object):
orderBy = sort
+ def describe(self, *cols):
+ """Computes statistics for numeric columns.
+
+ This include count, mean, stddev, min, and max. If no columns are
+ given, this function computes statistics for all numerical columns.
+
+ >>> df.describe().show()
+ summary age
+ count 2
+ mean 3.5
+ stddev 1.5
+ min 2
+ max 5
+ """
+ cols = ListConverter().convert(cols,
+ self.sql_ctx._sc._gateway._gateway_client)
+ jdf = self._jdf.describe(self.sql_ctx._sc._jvm.PythonUtils.toSeq(cols))
+ return DataFrame(jdf, self.sql_ctx)
+
def head(self, n=None):
""" Return the first `n` rows or the first row if n is None.