diff options
author | Wenchen Fan <cloud0fan@163.com> | 2015-10-21 13:22:35 -0700 |
---|---|---|
committer | Michael Armbrust <michael@databricks.com> | 2015-10-21 13:22:35 -0700 |
commit | 7c74ebca05f40a2d8fe8f10f24a10486ce4f76c0 (patch) | |
tree | d2ed06edd870eec7fe1f572a5f4950b93fdf9fab /sql | |
parent | 49ea0e9d7ce805d312d94a5b2936eec2053bc052 (diff) | |
download | spark-7c74ebca05f40a2d8fe8f10f24a10486ce4f76c0.tar.gz spark-7c74ebca05f40a2d8fe8f10f24a10486ce4f76c0.tar.bz2 spark-7c74ebca05f40a2d8fe8f10f24a10486ce4f76c0.zip |
[SPARK-10743][SQL] keep the name of expression if possible when do cast
Author: Wenchen Fan <cloud0fan@163.com>
Closes #8859 from cloud-fan/cast.
Diffstat (limited to 'sql')
4 files changed, 23 insertions, 25 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 9237f2f3dd..016dc293f4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -141,32 +141,31 @@ class Analyzer( */ object ResolveAliases extends Rule[LogicalPlan] { private def assignAliases(exprs: Seq[NamedExpression]) = { - // The `UnresolvedAlias`s will appear only at root of a expression tree, we don't need - // to traverse the whole tree. exprs.zipWithIndex.map { - case (u @ UnresolvedAlias(child), i) => - child match { - case _: UnresolvedAttribute => u - case ne: NamedExpression => ne - case g: Generator if g.resolved && g.elementTypes.size > 1 => MultiAlias(g, Nil) - case e if !e.resolved => u - case other => Alias(other, s"_c$i")() + case (expr, i) => + expr transform { + case u @ UnresolvedAlias(child) => child match { + case ne: NamedExpression => ne + case e if !e.resolved => u + case g: Generator if g.elementTypes.size > 1 => MultiAlias(g, Nil) + case c @ Cast(ne: NamedExpression, _) => Alias(c, ne.name)() + case other => Alias(other, s"_c$i")() + } } - case (other, _) => other - } + }.asInstanceOf[Seq[NamedExpression]] } + private def hasUnresolvedAlias(exprs: Seq[NamedExpression]) = + exprs.exists(_.find(_.isInstanceOf[UnresolvedAlias]).isDefined) + def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators { - case Aggregate(groups, aggs, child) - if child.resolved && aggs.exists(_.isInstanceOf[UnresolvedAlias]) => + case Aggregate(groups, aggs, child) if child.resolved && hasUnresolvedAlias(aggs) => Aggregate(groups, assignAliases(aggs), child) - case g: GroupingAnalytics - if g.child.resolved && g.aggregations.exists(_.isInstanceOf[UnresolvedAlias]) => + case g: GroupingAnalytics if g.child.resolved && hasUnresolvedAlias(g.aggregations) => g.withNewAggs(assignAliases(g.aggregations)) - case Project(projectList, child) - if child.resolved && projectList.exists(_.isInstanceOf[UnresolvedAlias]) => + case Project(projectList, child) if child.resolved && hasUnresolvedAlias(projectList) => Project(assignAliases(projectList), child) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala index 1f826887ac..37d559c8e4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala @@ -835,8 +835,8 @@ class Column(protected[sql] val expr: Expression) extends Logging { * @since 1.3.0 */ def cast(to: DataType): Column = expr match { - // Lift alias out of cast so we can support col.as("name").cast(IntegerType) - case Alias(childExpr, name) => Alias(Cast(childExpr, to), name)() + // keeps the name of expression if possible when do cast. + case ne: NamedExpression => UnresolvedAlias(Cast(expr, to)) case _ => Cast(expr, to) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala index 4e988f074b..fa559c9c64 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala @@ -588,12 +588,6 @@ class ColumnExpressionSuite extends QueryTest with SharedSQLContext { } } - test("lift alias out of cast") { - compareExpressions( - col("1234").as("name").cast("int").expr, - col("1234").cast("int").as("name").expr) - } - test("columns can be compared") { assert('key.desc == 'key.desc) assert('key.desc != 'key.asc) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index 832ea02cb6..6424f1f1d9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -975,4 +975,9 @@ class DataFrameSuite extends QueryTest with SharedSQLContext { expected(except) ) } + + test("SPARK-10743: keep the name of expression if possible when do cast") { + val df = (1 to 10).map(Tuple1.apply).toDF("i").as("src") + assert(df.select($"src.i".cast(StringType)).columns.head === "i") + } } |