diff options
author | Reynold Xin <rxin@databricks.com> | 2015-05-11 18:07:12 -0700 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2015-05-11 18:07:12 -0700 |
commit | 3a9b6997df3fef1052d8c410f32319018c52acff (patch) | |
tree | 8e5d13c68e929737bbed48119576fb1571a31d64 /sql | |
parent | 57255dcd794222f4db5df1e549ebc7b896cebfdc (diff) | |
download | spark-3a9b6997df3fef1052d8c410f32319018c52acff.tar.gz spark-3a9b6997df3fef1052d8c410f32319018c52acff.tar.bz2 spark-3a9b6997df3fef1052d8c410f32319018c52acff.zip |
[SPARK-7462][SQL] Update documentation for retaining grouping columns in DataFrames.
Author: Reynold Xin <rxin@databricks.com>
Closes #6062 from rxin/agg-retain-doc and squashes the following commits:
43e511e [Reynold Xin] [SPARK-7462][SQL] Update documentation for retaining grouping columns in DataFrames.
Diffstat (limited to 'sql')
-rw-r--r-- | sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala | 14 |
1 files changed, 12 insertions, 2 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala b/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala index 003a620dcc..543320e471 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala @@ -146,11 +146,21 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression]) * * // Scala: * import org.apache.spark.sql.functions._ - * df.groupBy("department").agg($"department", max($"age"), sum($"expense")) + * df.groupBy("department").agg(max("age"), sum("expense")) * * // Java: * import static org.apache.spark.sql.functions.*; - * df.groupBy("department").agg(col("department"), max(col("age")), sum(col("expense"))); + * df.groupBy("department").agg(max("age"), sum("expense")); + * }}} + * + * Note that before Spark 1.4, the default behavior is to NOT retain grouping columns. To change + * to that behavior, set config variable `spark.sql.retainGroupColumns` to `false`. + * {{{ + * // Scala, 1.3.x: + * df.groupBy("department").agg($"department", max("age"), sum("expense")) + * + * // Java, 1.3.x: + * df.groupBy("department").agg(col("department"), max("age"), sum("expense")); * }}} */ @scala.annotation.varargs |