[SPARK-6210] [SQL] use prettyString as column name in agg()

use prettyString instead of toString() (which include id of expression) as column name in agg() Author: Davies Liu <davies@databricks.com> Closes #5006 from davies/prettystring and squashes the following commits: cb1fdcf [Davies Liu] use prettyString as column name in agg()
author: Davies Liu <davies@databricks.com> 2015-03-14 00:43:33 -0700
committer: Reynold Xin <rxin@databricks.com> 2015-03-14 00:43:33 -0700
commit: b38e073fee794188d5267f1812b095e51874839e (patch)
tree: 99325920b4c42688ac22c9e524e0b20d458fd87b /python/pyspark/sql/dataframe.py
parent: e360d5e4adf287444c10e72f8e4d57548839bf6e (diff)
download: spark-b38e073fee794188d5267f1812b095e51874839e.tar.gz
spark-b38e073fee794188d5267f1812b095e51874839e.tar.bz2
spark-b38e073fee794188d5267f1812b095e51874839e.zip
1 files changed, 16 insertions, 16 deletions
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index e8ce454745..94001aec37 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -631,11 +631,11 @@ class DataFrame(object):
         for all the available aggregate functions.
 
         >>> df.groupBy().avg().collect()
-        [Row(AVG(age#0)=3.5)]
+        [Row(AVG(age)=3.5)]
         >>> df.groupBy('name').agg({'age': 'mean'}).collect()
-        [Row(name=u'Bob', AVG(age#0)=5.0), Row(name=u'Alice', AVG(age#0)=2.0)]
+        [Row(name=u'Bob', AVG(age)=5.0), Row(name=u'Alice', AVG(age)=2.0)]
         >>> df.groupBy(df.name).avg().collect()
-        [Row(name=u'Bob', AVG(age#0)=5.0), Row(name=u'Alice', AVG(age#0)=2.0)]
+        [Row(name=u'Bob', AVG(age)=5.0), Row(name=u'Alice', AVG(age)=2.0)]
         """
         jcols = ListConverter().convert([_to_java_column(c) for c in cols],
                                         self._sc._gateway._gateway_client)
@@ -647,10 +647,10 @@ class DataFrame(object):
         (shorthand for df.groupBy.agg()).
 
         >>> df.agg({"age": "max"}).collect()
-        [Row(MAX(age#0)=5)]
+        [Row(MAX(age)=5)]
         >>> from pyspark.sql import functions as F
         >>> df.agg(F.min(df.age)).collect()
-        [Row(MIN(age#0)=2)]
+        [Row(MIN(age)=2)]
         """
         return self.groupBy().agg(*exprs)
 
@@ -766,7 +766,7 @@ class GroupedData(object):
 
         >>> from pyspark.sql import functions as F
         >>> gdf.agg(F.min(df.age)).collect()
-        [Row(MIN(age#0)=5), Row(MIN(age#0)=2)]
+        [Row(MIN(age)=5), Row(MIN(age)=2)]
         """
         assert exprs, "exprs should not be empty"
         if len(exprs) == 1 and isinstance(exprs[0], dict):
@@ -795,9 +795,9 @@ class GroupedData(object):
         for each group. This is an alias for `avg`.
 
         >>> df.groupBy().mean('age').collect()
-        [Row(AVG(age#0)=3.5)]
+        [Row(AVG(age)=3.5)]
         >>> df3.groupBy().mean('age', 'height').collect()
-        [Row(AVG(age#4L)=3.5, AVG(height#5L)=82.5)]
+        [Row(AVG(age)=3.5, AVG(height)=82.5)]
         """
 
     @df_varargs_api
@@ -806,9 +806,9 @@ class GroupedData(object):
         for each group.
 
         >>> df.groupBy().avg('age').collect()
-        [Row(AVG(age#0)=3.5)]
+        [Row(AVG(age)=3.5)]
         >>> df3.groupBy().avg('age', 'height').collect()
-        [Row(AVG(age#4L)=3.5, AVG(height#5L)=82.5)]
+        [Row(AVG(age)=3.5, AVG(height)=82.5)]
         """
 
     @df_varargs_api
@@ -817,9 +817,9 @@ class GroupedData(object):
         each group.
 
         >>> df.groupBy().max('age').collect()
-        [Row(MAX(age#0)=5)]
+        [Row(MAX(age)=5)]
         >>> df3.groupBy().max('age', 'height').collect()
-        [Row(MAX(age#4L)=5, MAX(height#5L)=85)]
+        [Row(MAX(age)=5, MAX(height)=85)]
         """
 
     @df_varargs_api
@@ -828,9 +828,9 @@ class GroupedData(object):
         each group.
 
         >>> df.groupBy().min('age').collect()
-        [Row(MIN(age#0)=2)]
+        [Row(MIN(age)=2)]
         >>> df3.groupBy().min('age', 'height').collect()
-        [Row(MIN(age#4L)=2, MIN(height#5L)=80)]
+        [Row(MIN(age)=2, MIN(height)=80)]
         """
 
     @df_varargs_api
@@ -839,9 +839,9 @@ class GroupedData(object):
         group.
 
         >>> df.groupBy().sum('age').collect()
-        [Row(SUM(age#0)=7)]
+        [Row(SUM(age)=7)]
         >>> df3.groupBy().sum('age', 'height').collect()
-        [Row(SUM(age#4L)=7, SUM(height#5L)=165)]
+        [Row(SUM(age)=7, SUM(height)=165)]
         """
author	Davies Liu <davies@databricks.com>	2015-03-14 00:43:33 -0700
committer	Reynold Xin <rxin@databricks.com>	2015-03-14 00:43:33 -0700
commit	b38e073fee794188d5267f1812b095e51874839e (patch)
tree	99325920b4c42688ac22c9e524e0b20d458fd87b /python/pyspark/sql/dataframe.py
parent	e360d5e4adf287444c10e72f8e4d57548839bf6e (diff)
download	spark-b38e073fee794188d5267f1812b095e51874839e.tar.gz spark-b38e073fee794188d5267f1812b095e51874839e.tar.bz2 spark-b38e073fee794188d5267f1812b095e51874839e.zip