3 files changed, 28 insertions, 207 deletions
diff --git a/python/pyspark/sql/group.py b/python/pyspark/sql/group.py
index 946b53e71c..71c0bccc5e 100644
--- a/python/pyspark/sql/group.py
+++ b/python/pyspark/sql/group.py
@@ -167,94 +167,6 @@ class GroupedData(object):
         [Row(sum(age)=7, sum(height)=165)]
         """
 
-    @df_varargs_api
-    @since(1.6)
-    def stddev(self, *cols):
-        """Compute the sample standard deviation for each numeric columns for each group.
-
-        :param cols: list of column names (string). Non-numeric columns are ignored.
-
-        >>> df3.groupBy().stddev('age', 'height').collect()
-        [Row(STDDEV(age)=2.12..., STDDEV(height)=3.53...)]
-        """
-
-    @df_varargs_api
-    @since(1.6)
-    def stddev_samp(self, *cols):
-        """Compute the sample standard deviation for each numeric columns for each group.
-
-        :param cols: list of column names (string). Non-numeric columns are ignored.
-
-        >>> df3.groupBy().stddev_samp('age', 'height').collect()
-        [Row(STDDEV_SAMP(age)=2.12..., STDDEV_SAMP(height)=3.53...)]
-        """
-
-    @df_varargs_api
-    @since(1.6)
-    def stddev_pop(self, *cols):
-        """Compute the population standard deviation for each numeric columns for each group.
-
-        :param cols: list of column names (string). Non-numeric columns are ignored.
-
-        >>> df3.groupBy().stddev_pop('age', 'height').collect()
-        [Row(STDDEV_POP(age)=1.5, STDDEV_POP(height)=2.5)]
-        """
-
-    @df_varargs_api
-    @since(1.6)
-    def variance(self, *cols):
-        """Compute the sample variance for each numeric columns for each group.
-
-        :param cols: list of column names (string). Non-numeric columns are ignored.
-
-        >>> df3.groupBy().variance('age', 'height').collect()
-        [Row(VARIANCE(age)=2.25, VARIANCE(height)=6.25)]
-        """
-
-    @df_varargs_api
-    @since(1.6)
-    def var_pop(self, *cols):
-        """Compute the sample variance for each numeric columns for each group.
-
-        :param cols: list of column names (string). Non-numeric columns are ignored.
-
-        >>> df3.groupBy().var_pop('age', 'height').collect()
-        [Row(VAR_POP(age)=2.25, VAR_POP(height)=6.25)]
-        """
-
-    @df_varargs_api
-    @since(1.6)
-    def var_samp(self, *cols):
-        """Compute the sample variance for each numeric columns for each group.
-
-        :param cols: list of column names (string). Non-numeric columns are ignored.
-
-        >>> df3.groupBy().var_samp('age', 'height').collect()
-        [Row(VAR_SAMP(age)=4.5, VAR_SAMP(height)=12.5)]
-        """
-
-    @df_varargs_api
-    @since(1.6)
-    def skewness(self, *cols):
-        """Compute the skewness for each numeric columns for each group.
-
-        :param cols: list of column names (string). Non-numeric columns are ignored.
-
-        >>> df3.groupBy().skewness('age', 'height').collect()
-        [Row(SKEWNESS(age)=0.0, SKEWNESS(height)=0.0)]
-        """
-
-    @df_varargs_api
-    @since(1.6)
-    def kurtosis(self, *cols):
-        """Compute the kurtosis for each numeric columns for each group.
-
-        :param cols: list of column names (string). Non-numeric columns are ignored.
-
-        >>> df3.groupBy().kurtosis('age', 'height').collect()
-        [Row(KURTOSIS(age)=-2.0, KURTOSIS(height)=-2.0)]
-        """
-
 
 def _test():
     import doctest
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala b/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala
index dc96384a4d..c2b2a4013d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala
@@ -26,42 +26,14 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical.{Rollup, Cube, Aggregate}
 import org.apache.spark.sql.types.NumericType
 
-/**
- * Companion object for GroupedData
- */
-private[sql] object GroupedData {
-  def apply(
-      df: DataFrame,
-      groupingExprs: Seq[Expression],
-      groupType: GroupType): GroupedData = {
-    new GroupedData(df, groupingExprs, groupType: GroupType)
-  }
-
-  /**
-   * The Grouping Type
-   */
-  private[sql] trait GroupType
-
-  /**
-   * To indicate it's the GroupBy
-   */
-  private[sql] object GroupByType extends GroupType
-
-  /**
-   * To indicate it's the CUBE
-   */
-  private[sql] object CubeType extends GroupType
-
-  /**
-   * To indicate it's the ROLLUP
-   */
-  private[sql] object RollupType extends GroupType
-}
 
 /**
  * :: Experimental ::
  * A set of methods for aggregations on a [[DataFrame]], created by [[DataFrame.groupBy]].
  *
+ * The main method is the agg function, which has multiple variants. This class also contains
+ * convenience some first order statistics such as mean, sum for convenience.
+ *
  * @since 1.3.0
  */
 @Experimental
@@ -124,7 +96,7 @@ class GroupedData protected[sql](
       case "avg" | "average" | "mean" => Average
       case "max" => Max
       case "min" => Min
-      case "stddev" => Stddev
+      case "stddev" | "std" => Stddev
       case "stddev_pop" => StddevPop
       case "stddev_samp" => StddevSamp
       case "variance" => Variance
@@ -256,30 +228,6 @@ class GroupedData protected[sql](
   }
 
   /**
-   * Compute the skewness for each numeric columns for each group.
-   * The resulting [[DataFrame]] will also contain the grouping columns.
-   * When specified columns are given, only compute the skewness values for them.
-   *
-   * @since 1.6.0
-   */
-  @scala.annotation.varargs
-  def skewness(colNames: String*): DataFrame = {
-    aggregateNumericColumns(colNames : _*)(Skewness)
-  }
-
-  /**
-   * Compute the kurtosis for each numeric columns for each group.
-   * The resulting [[DataFrame]] will also contain the grouping columns.
-   * When specified columns are given, only compute the kurtosis values for them.
-   *
-   * @since 1.6.0
-   */
-  @scala.annotation.varargs
-  def kurtosis(colNames: String*): DataFrame = {
-    aggregateNumericColumns(colNames : _*)(Kurtosis)
-  }
-
-  /**
    * Compute the max value for each numeric columns for each group.
    * The resulting [[DataFrame]] will also contain the grouping columns.
    * When specified columns are given, only compute the max values for them.
@@ -316,86 +264,48 @@ class GroupedData protected[sql](
   }
 
   /**
-   * Compute the sample standard deviation for each numeric columns for each group.
+   * Compute the sum for each numeric columns for each group.
    * The resulting [[DataFrame]] will also contain the grouping columns.
-   * When specified columns are given, only compute the stddev for them.
+   * When specified columns are given, only compute the sum for them.
    *
-   * @since 1.6.0
+   * @since 1.3.0
    */
   @scala.annotation.varargs
-  def stddev(colNames: String*): DataFrame = {
-    aggregateNumericColumns(colNames : _*)(Stddev)
+  def sum(colNames: String*): DataFrame = {
+    aggregateNumericColumns(colNames : _*)(Sum)
   }
+}
 
-  /**
-   * Compute the population standard deviation for each numeric columns for each group.
-   * The resulting [[DataFrame]] will also contain the grouping columns.
-   * When specified columns are given, only compute the stddev for them.
-   *
-   * @since 1.6.0
-   */
-  @scala.annotation.varargs
-  def stddev_pop(colNames: String*): DataFrame = {
-    aggregateNumericColumns(colNames : _*)(StddevPop)
-  }
 
-  /**
-   * Compute the sample standard deviation for each numeric columns for each group.
-   * The resulting [[DataFrame]] will also contain the grouping columns.
-   * When specified columns are given, only compute the stddev for them.
-   *
-   * @since 1.6.0
-   */
-  @scala.annotation.varargs
-  def stddev_samp(colNames: String*): DataFrame = {
-    aggregateNumericColumns(colNames : _*)(StddevSamp)
+/**
+ * Companion object for GroupedData.
+ */
+private[sql] object GroupedData {
+
+  def apply(
+      df: DataFrame,
+      groupingExprs: Seq[Expression],
+      groupType: GroupType): GroupedData = {
+    new GroupedData(df, groupingExprs, groupType: GroupType)
   }
 
   /**
-   * Compute the sum for each numeric columns for each group.
-   * The resulting [[DataFrame]] will also contain the grouping columns.
-   * When specified columns are given, only compute the sum for them.
-   *
-   * @since 1.3.0
+   * The Grouping Type
    */
-  @scala.annotation.varargs
-  def sum(colNames: String*): DataFrame = {
-    aggregateNumericColumns(colNames : _*)(Sum)
-  }
+  private[sql] trait GroupType
 
   /**
-   * Compute the sample variance for each numeric columns for each group.
-   * The resulting [[DataFrame]] will also contain the grouping columns.
-   * When specified columns are given, only compute the variance for them.
-   *
-   * @since 1.6.0
+   * To indicate it's the GroupBy
    */
-  @scala.annotation.varargs
-  def variance(colNames: String*): DataFrame = {
-    aggregateNumericColumns(colNames : _*)(Variance)
-  }
+  private[sql] object GroupByType extends GroupType
 
   /**
-   * Compute the population variance for each numeric columns for each group.
-   * The resulting [[DataFrame]] will also contain the grouping columns.
-   * When specified columns are given, only compute the variance for them.
-   *
-   * @since 1.6.0
+   * To indicate it's the CUBE
    */
-  @scala.annotation.varargs
-  def var_pop(colNames: String*): DataFrame = {
-    aggregateNumericColumns(colNames : _*)(VariancePop)
-  }
+  private[sql] object CubeType extends GroupType
 
   /**
-   * Compute the sample variance for each numeric columns for each group.
-   * The resulting [[DataFrame]] will also contain the grouping columns.
-   * When specified columns are given, only compute the variance for them.
-   *
-   * @since 1.6.0
+   * To indicate it's the ROLLUP
    */
-  @scala.annotation.varargs
-  def var_samp(colNames: String*): DataFrame = {
-    aggregateNumericColumns(colNames : _*)(VarianceSamp)
-  }
+  private[sql] object RollupType extends GroupType
 }
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java
index a1a3fdbb48..49f516e86d 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java
@@ -91,7 +91,6 @@ public class JavaDataFrameSuite {
     df.groupBy().mean("key");
     df.groupBy().max("key");
     df.groupBy().min("key");
-    df.groupBy().stddev("key");
     df.groupBy().sum("key");
 
     // Varargs in column expressions