aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavies Liu <davies@databricks.com>2015-03-14 00:43:33 -0700
committerReynold Xin <rxin@databricks.com>2015-03-14 00:43:33 -0700
commitb38e073fee794188d5267f1812b095e51874839e (patch)
tree99325920b4c42688ac22c9e524e0b20d458fd87b
parente360d5e4adf287444c10e72f8e4d57548839bf6e (diff)
downloadspark-b38e073fee794188d5267f1812b095e51874839e.tar.gz
spark-b38e073fee794188d5267f1812b095e51874839e.tar.bz2
spark-b38e073fee794188d5267f1812b095e51874839e.zip
[SPARK-6210] [SQL] use prettyString as column name in agg()
use prettyString instead of toString() (which include id of expression) as column name in agg() Author: Davies Liu <davies@databricks.com> Closes #5006 from davies/prettystring and squashes the following commits: cb1fdcf [Davies Liu] use prettyString as column name in agg()
-rw-r--r--python/pyspark/sql/dataframe.py32
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala8
3 files changed, 21 insertions, 21 deletions
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index e8ce454745..94001aec37 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -631,11 +631,11 @@ class DataFrame(object):
for all the available aggregate functions.
>>> df.groupBy().avg().collect()
- [Row(AVG(age#0)=3.5)]
+ [Row(AVG(age)=3.5)]
>>> df.groupBy('name').agg({'age': 'mean'}).collect()
- [Row(name=u'Bob', AVG(age#0)=5.0), Row(name=u'Alice', AVG(age#0)=2.0)]
+ [Row(name=u'Bob', AVG(age)=5.0), Row(name=u'Alice', AVG(age)=2.0)]
>>> df.groupBy(df.name).avg().collect()
- [Row(name=u'Bob', AVG(age#0)=5.0), Row(name=u'Alice', AVG(age#0)=2.0)]
+ [Row(name=u'Bob', AVG(age)=5.0), Row(name=u'Alice', AVG(age)=2.0)]
"""
jcols = ListConverter().convert([_to_java_column(c) for c in cols],
self._sc._gateway._gateway_client)
@@ -647,10 +647,10 @@ class DataFrame(object):
(shorthand for df.groupBy.agg()).
>>> df.agg({"age": "max"}).collect()
- [Row(MAX(age#0)=5)]
+ [Row(MAX(age)=5)]
>>> from pyspark.sql import functions as F
>>> df.agg(F.min(df.age)).collect()
- [Row(MIN(age#0)=2)]
+ [Row(MIN(age)=2)]
"""
return self.groupBy().agg(*exprs)
@@ -766,7 +766,7 @@ class GroupedData(object):
>>> from pyspark.sql import functions as F
>>> gdf.agg(F.min(df.age)).collect()
- [Row(MIN(age#0)=5), Row(MIN(age#0)=2)]
+ [Row(MIN(age)=5), Row(MIN(age)=2)]
"""
assert exprs, "exprs should not be empty"
if len(exprs) == 1 and isinstance(exprs[0], dict):
@@ -795,9 +795,9 @@ class GroupedData(object):
for each group. This is an alias for `avg`.
>>> df.groupBy().mean('age').collect()
- [Row(AVG(age#0)=3.5)]
+ [Row(AVG(age)=3.5)]
>>> df3.groupBy().mean('age', 'height').collect()
- [Row(AVG(age#4L)=3.5, AVG(height#5L)=82.5)]
+ [Row(AVG(age)=3.5, AVG(height)=82.5)]
"""
@df_varargs_api
@@ -806,9 +806,9 @@ class GroupedData(object):
for each group.
>>> df.groupBy().avg('age').collect()
- [Row(AVG(age#0)=3.5)]
+ [Row(AVG(age)=3.5)]
>>> df3.groupBy().avg('age', 'height').collect()
- [Row(AVG(age#4L)=3.5, AVG(height#5L)=82.5)]
+ [Row(AVG(age)=3.5, AVG(height)=82.5)]
"""
@df_varargs_api
@@ -817,9 +817,9 @@ class GroupedData(object):
each group.
>>> df.groupBy().max('age').collect()
- [Row(MAX(age#0)=5)]
+ [Row(MAX(age)=5)]
>>> df3.groupBy().max('age', 'height').collect()
- [Row(MAX(age#4L)=5, MAX(height#5L)=85)]
+ [Row(MAX(age)=5, MAX(height)=85)]
"""
@df_varargs_api
@@ -828,9 +828,9 @@ class GroupedData(object):
each group.
>>> df.groupBy().min('age').collect()
- [Row(MIN(age#0)=2)]
+ [Row(MIN(age)=2)]
>>> df3.groupBy().min('age', 'height').collect()
- [Row(MIN(age#4L)=2, MIN(height#5L)=80)]
+ [Row(MIN(age)=2, MIN(height)=80)]
"""
@df_varargs_api
@@ -839,9 +839,9 @@ class GroupedData(object):
group.
>>> df.groupBy().sum('age').collect()
- [Row(SUM(age#0)=7)]
+ [Row(SUM(age)=7)]
>>> df3.groupBy().sum('age', 'height').collect()
- [Row(SUM(age#4L)=7, SUM(height#5L)=165)]
+ [Row(SUM(age)=7, SUM(height)=165)]
"""
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index 7e191ad031..f84ffe4e17 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -343,7 +343,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] {
}.mkString(", ")
/** String representation of this node without any children */
- def simpleString = s"$nodeName $argString"
+ def simpleString = s"$nodeName $argString".trim
override def toString: String = treeString
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala b/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala
index d001752659..45a63ae26e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala
@@ -37,7 +37,7 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
private[this] implicit def toDF(aggExprs: Seq[NamedExpression]): DataFrame = {
val namedGroupingExprs = groupingExprs.map {
case expr: NamedExpression => expr
- case expr: Expression => Alias(expr, expr.toString)()
+ case expr: Expression => Alias(expr, expr.prettyString)()
}
DataFrame(
df.sqlContext, Aggregate(groupingExprs, namedGroupingExprs ++ aggExprs, df.logicalPlan))
@@ -63,7 +63,7 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
}
columnExprs.map { c =>
val a = f(c)
- Alias(a, a.toString)()
+ Alias(a, a.prettyString)()
}
}
@@ -115,7 +115,7 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
def agg(exprs: Map[String, String]): DataFrame = {
exprs.map { case (colName, expr) =>
val a = strToExpr(expr)(df(colName).expr)
- Alias(a, a.toString)()
+ Alias(a, a.prettyString)()
}.toSeq
}
@@ -159,7 +159,7 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
def agg(expr: Column, exprs: Column*): DataFrame = {
val aggExprs = (expr +: exprs).map(_.expr).map {
case expr: NamedExpression => expr
- case expr: Expression => Alias(expr, expr.toString)()
+ case expr: Expression => Alias(expr, expr.prettyString)()
}
DataFrame(df.sqlContext, Aggregate(groupingExprs, aggExprs, df.logicalPlan))
}