From dcaa016610ac2c11d7dd01803f3515b02ab32e64 Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Sat, 19 Mar 2016 11:23:14 -0700 Subject: [SPARK-13897][SQL] RelationalGroupedDataset and KeyValueGroupedDataset ## What changes were proposed in this pull request? Previously, Dataset.groupBy returns a GroupedData, and Dataset.groupByKey returns a GroupedDataset. The naming is very similar, and unfortunately does not convey the real differences between the two. Assume we are grouping by some keys (K). groupByKey is a key-value style group by, in which the schema of the returned dataset is a tuple of just two fields: key and value. groupBy, on the other hand, is a relational style group by, in which the schema of the returned dataset is flattened and contain |K| + |V| fields. This pull request also removes the experimental tag from RelationalGroupedDataset. It has been with DataFrame since 1.3, and we have enough confidence now to stabilize it. ## How was this patch tested? This is a rename to improve API understandability. Should be covered by all existing tests. Author: Reynold Xin Closes #11841 from rxin/SPARK-13897. --- .../src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'sql/core/src/test/java') diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java index 79b6e61767..4b8b0d9d4f 100644 --- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java +++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java @@ -169,7 +169,7 @@ public class JavaDatasetSuite implements Serializable { public void testGroupBy() { List data = Arrays.asList("a", "foo", "bar"); Dataset ds = context.createDataset(data, Encoders.STRING()); - GroupedDataset grouped = ds.groupByKey(new MapFunction() { + KeyValueGroupedDataset grouped = ds.groupByKey(new MapFunction() { @Override public Integer call(String v) throws Exception { return v.length(); @@ -217,7 +217,7 @@ public class JavaDatasetSuite implements Serializable { List data2 = Arrays.asList(2, 6, 10); Dataset ds2 = context.createDataset(data2, Encoders.INT()); - GroupedDataset grouped2 = ds2.groupByKey(new MapFunction() { + KeyValueGroupedDataset grouped2 = ds2.groupByKey(new MapFunction() { @Override public Integer call(Integer v) throws Exception { return v / 2; @@ -249,7 +249,7 @@ public class JavaDatasetSuite implements Serializable { public void testGroupByColumn() { List data = Arrays.asList("a", "foo", "bar"); Dataset ds = context.createDataset(data, Encoders.STRING()); - GroupedDataset grouped = + KeyValueGroupedDataset grouped = ds.groupByKey(length(col("value"))).keyAs(Encoders.INT()); Dataset mapped = grouped.mapGroups( @@ -410,7 +410,7 @@ public class JavaDatasetSuite implements Serializable { Arrays.asList(tuple2("a", 1), tuple2("a", 2), tuple2("b", 3)); Dataset> ds = context.createDataset(data, encoder); - GroupedDataset> grouped = ds.groupByKey( + KeyValueGroupedDataset> grouped = ds.groupByKey( new MapFunction, String>() { @Override public String call(Tuple2 value) throws Exception { -- cgit v1.2.3