diff options
author | Wenchen Fan <cloud0fan@outlook.com> | 2015-05-13 12:47:48 -0700 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2015-05-13 12:47:48 -0700 |
commit | 213a6f30fee4a1c416ea76b678c71877fd36ef18 (patch) | |
tree | 10d0647738d67c28ebdb2f4748454430da778b21 /sql/core | |
parent | 7ff16e8abef9fbf4a4855e23c256b22e62e560a6 (diff) | |
download | spark-213a6f30fee4a1c416ea76b678c71877fd36ef18.tar.gz spark-213a6f30fee4a1c416ea76b678c71877fd36ef18.tar.bz2 spark-213a6f30fee4a1c416ea76b678c71877fd36ef18.zip |
[SPARK-7551][DataFrame] support backticks for DataFrame attribute resolution
Author: Wenchen Fan <cloud0fan@outlook.com>
Closes #6074 from cloud-fan/7551 and squashes the following commits:
e6f579e [Wenchen Fan] allow space
2b86699 [Wenchen Fan] handle blank
e218d99 [Wenchen Fan] address comments
54c4209 [Wenchen Fan] fix 7551
Diffstat (limited to 'sql/core')
-rw-r--r-- | sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala | 4 | ||||
-rw-r--r-- | sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala | 27 |
2 files changed, 29 insertions, 2 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala index c820a67357..4fd5105c27 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala @@ -160,7 +160,7 @@ class DataFrame private[sql]( } protected[sql] def resolve(colName: String): NamedExpression = { - queryExecution.analyzed.resolve(colName.split("\\."), sqlContext.analyzer.resolver).getOrElse { + queryExecution.analyzed.resolveQuoted(colName, sqlContext.analyzer.resolver).getOrElse { throw new AnalysisException( s"""Cannot resolve column name "$colName" among (${schema.fieldNames.mkString(", ")})""") } @@ -168,7 +168,7 @@ class DataFrame private[sql]( protected[sql] def numericColumns: Seq[Expression] = { schema.fields.filter(_.dataType.isInstanceOf[NumericType]).map { n => - queryExecution.analyzed.resolve(n.name.split("\\."), sqlContext.analyzer.resolver).get + queryExecution.analyzed.resolveQuoted(n.name, sqlContext.analyzer.resolver).get } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index 52aa1f6558..1d5f6b3aad 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -459,6 +459,33 @@ class DataFrameSuite extends QueryTest { assert(complexData.filter(complexData("m")(complexData("s")("value")) === 1).count() == 1) } + test("SPARK-7551: support backticks for DataFrame attribute resolution") { + val df = TestSQLContext.jsonRDD(TestSQLContext.sparkContext.makeRDD( + """{"a.b": {"c": {"d..e": {"f": 1}}}}""" :: Nil)) + checkAnswer( + df.select(df("`a.b`.c.`d..e`.`f`")), + Row(1) + ) + + val df2 = TestSQLContext.jsonRDD(TestSQLContext.sparkContext.makeRDD( + """{"a b": {"c": {"d e": {"f": 1}}}}""" :: Nil)) + checkAnswer( + df2.select(df2("`a b`.c.d e.f")), + Row(1) + ) + + def checkError(testFun: => Unit): Unit = { + val e = intercept[org.apache.spark.sql.AnalysisException] { + testFun + } + assert(e.getMessage.contains("syntax error in attribute name:")) + } + checkError(df("`abc.`c`")) + checkError(df("`abc`..d")) + checkError(df("`a`.b.")) + checkError(df("`a.b`.c.`d")) + } + test("SPARK-7324 dropDuplicates") { val testData = TestSQLContext.sparkContext.parallelize( (2, 1, 2) :: (1, 1, 1) :: |