aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorWenchen Fan <wenchen@databricks.com>2015-11-05 14:53:16 -0800
committerYin Huai <yhuai@databricks.com>2015-11-05 14:53:16 -0800
commitd9e30c59cede7f57786bb19e64ba422eda43bdcb (patch)
tree6914b0d2abfb5c0c1fd3513a1ad673b60541165b /sql
parentb9455d1f1810e1e3f472014f665ad3ad3122bcc0 (diff)
downloadspark-d9e30c59cede7f57786bb19e64ba422eda43bdcb.tar.gz
spark-d9e30c59cede7f57786bb19e64ba422eda43bdcb.tar.bz2
spark-d9e30c59cede7f57786bb19e64ba422eda43bdcb.zip
[SPARK-10656][SQL] completely support special chars in DataFrame
the main problem is: we interpret column name with special handling of `.` for DataFrame. This enables us to write something like `df("a.b")` to get the field `b` of `a`. However, we don't need this feature in `DataFrame.apply("*")` or `DataFrame.withColumnRenamed`. In these 2 cases, the column name is the final name already, we don't need extra process to interpret it. The solution is simple, use `queryExecution.analyzed.output` to get resolved column directly, instead of using `DataFrame.resolve`. close https://github.com/apache/spark/pull/8811 Author: Wenchen Fan <wenchen@databricks.com> Closes #9462 from cloud-fan/special-chars.
Diffstat (limited to 'sql')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala16
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala6
2 files changed, 16 insertions, 6 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index 6336dee7be..f2d4db5550 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -698,7 +698,7 @@ class DataFrame private[sql](
*/
def col(colName: String): Column = colName match {
case "*" =>
- Column(ResolvedStar(schema.fieldNames.map(resolve)))
+ Column(ResolvedStar(queryExecution.analyzed.output))
case _ =>
val expr = resolve(colName)
Column(expr)
@@ -1259,13 +1259,17 @@ class DataFrame private[sql](
*/
def withColumnRenamed(existingName: String, newName: String): DataFrame = {
val resolver = sqlContext.analyzer.resolver
- val shouldRename = schema.exists(f => resolver(f.name, existingName))
+ val output = queryExecution.analyzed.output
+ val shouldRename = output.exists(f => resolver(f.name, existingName))
if (shouldRename) {
- val colNames = schema.map { field =>
- val name = field.name
- if (resolver(name, existingName)) Column(name).as(newName) else Column(name)
+ val columns = output.map { col =>
+ if (resolver(col.name, existingName)) {
+ Column(col).as(newName)
+ } else {
+ Column(col)
+ }
}
- select(colNames : _*)
+ select(columns : _*)
} else {
this
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 84a616d0b9..f3a7aa2803 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -1128,4 +1128,10 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
}
}
}
+
+ test("SPARK-10656: completely support special chars") {
+ val df = Seq(1 -> "a").toDF("i_$.a", "d^'a.")
+ checkAnswer(df.select(df("*")), Row(1, "a"))
+ checkAnswer(df.withColumnRenamed("d^'a.", "a"), Row(1, "a"))
+ }
}