[SPARK-11467][SQL] add Python API for stddev/variance

Add Python API for stddev/stddev_pop/stddev_samp/variance/var_pop/var_samp/skewness/kurtosis Author: Davies Liu <davies@databricks.com> Closes #9424 from davies/py_var.
author: Davies Liu <davies@databricks.com> 2015-11-03 13:33:46 -0800
committer: Reynold Xin <rxin@databricks.com> 2015-11-03 13:33:46 -0800
commit: 1d04dc95c0d3caa485936e65b0493bcc9719f27e (patch)
tree: ace98f4fe1c54db7c61d867aade0e4210514f553 /python/pyspark/sql/group.py
parent: a9676cc7107c5df6c62a58668c4d95ced1238370 (diff)
download: spark-1d04dc95c0d3caa485936e65b0493bcc9719f27e.tar.gz
spark-1d04dc95c0d3caa485936e65b0493bcc9719f27e.tar.bz2
spark-1d04dc95c0d3caa485936e65b0493bcc9719f27e.zip
1 files changed, 88 insertions, 0 deletions
diff --git a/python/pyspark/sql/group.py b/python/pyspark/sql/group.py
index 71c0bccc5e..946b53e71c 100644
--- a/python/pyspark/sql/group.py
+++ b/python/pyspark/sql/group.py
@@ -167,6 +167,94 @@ class GroupedData(object):
         [Row(sum(age)=7, sum(height)=165)]
         """
 
+    @df_varargs_api
+    @since(1.6)
+    def stddev(self, *cols):
+        """Compute the sample standard deviation for each numeric columns for each group.
+
+        :param cols: list of column names (string). Non-numeric columns are ignored.
+
+        >>> df3.groupBy().stddev('age', 'height').collect()
+        [Row(STDDEV(age)=2.12..., STDDEV(height)=3.53...)]
+        """
+
+    @df_varargs_api
+    @since(1.6)
+    def stddev_samp(self, *cols):
+        """Compute the sample standard deviation for each numeric columns for each group.
+
+        :param cols: list of column names (string). Non-numeric columns are ignored.
+
+        >>> df3.groupBy().stddev_samp('age', 'height').collect()
+        [Row(STDDEV_SAMP(age)=2.12..., STDDEV_SAMP(height)=3.53...)]
+        """
+
+    @df_varargs_api
+    @since(1.6)
+    def stddev_pop(self, *cols):
+        """Compute the population standard deviation for each numeric columns for each group.
+
+        :param cols: list of column names (string). Non-numeric columns are ignored.
+
+        >>> df3.groupBy().stddev_pop('age', 'height').collect()
+        [Row(STDDEV_POP(age)=1.5, STDDEV_POP(height)=2.5)]
+        """
+
+    @df_varargs_api
+    @since(1.6)
+    def variance(self, *cols):
+        """Compute the sample variance for each numeric columns for each group.
+
+        :param cols: list of column names (string). Non-numeric columns are ignored.
+
+        >>> df3.groupBy().variance('age', 'height').collect()
+        [Row(VARIANCE(age)=2.25, VARIANCE(height)=6.25)]
+        """
+
+    @df_varargs_api
+    @since(1.6)
+    def var_pop(self, *cols):
+        """Compute the sample variance for each numeric columns for each group.
+
+        :param cols: list of column names (string). Non-numeric columns are ignored.
+
+        >>> df3.groupBy().var_pop('age', 'height').collect()
+        [Row(VAR_POP(age)=2.25, VAR_POP(height)=6.25)]
+        """
+
+    @df_varargs_api
+    @since(1.6)
+    def var_samp(self, *cols):
+        """Compute the sample variance for each numeric columns for each group.
+
+        :param cols: list of column names (string). Non-numeric columns are ignored.
+
+        >>> df3.groupBy().var_samp('age', 'height').collect()
+        [Row(VAR_SAMP(age)=4.5, VAR_SAMP(height)=12.5)]
+        """
+
+    @df_varargs_api
+    @since(1.6)
+    def skewness(self, *cols):
+        """Compute the skewness for each numeric columns for each group.
+
+        :param cols: list of column names (string). Non-numeric columns are ignored.
+
+        >>> df3.groupBy().skewness('age', 'height').collect()
+        [Row(SKEWNESS(age)=0.0, SKEWNESS(height)=0.0)]
+        """
+
+    @df_varargs_api
+    @since(1.6)
+    def kurtosis(self, *cols):
+        """Compute the kurtosis for each numeric columns for each group.
+
+        :param cols: list of column names (string). Non-numeric columns are ignored.
+
+        >>> df3.groupBy().kurtosis('age', 'height').collect()
+        [Row(KURTOSIS(age)=-2.0, KURTOSIS(height)=-2.0)]
+        """
+
 
 def _test():
     import doctest
author	Davies Liu <davies@databricks.com>	2015-11-03 13:33:46 -0800
committer	Reynold Xin <rxin@databricks.com>	2015-11-03 13:33:46 -0800
commit	1d04dc95c0d3caa485936e65b0493bcc9719f27e (patch)
tree	ace98f4fe1c54db7c61d867aade0e4210514f553 /python/pyspark/sql/group.py
parent	a9676cc7107c5df6c62a58668c4d95ced1238370 (diff)
download	spark-1d04dc95c0d3caa485936e65b0493bcc9719f27e.tar.gz spark-1d04dc95c0d3caa485936e65b0493bcc9719f27e.tar.bz2 spark-1d04dc95c0d3caa485936e65b0493bcc9719f27e.zip