diff options
author | Josh Rosen <joshrosen@databricks.com> | 2015-01-29 18:23:05 -0800 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2015-01-29 18:23:05 -0800 |
commit | 22271f969363fd139e6cfb5a2d95a2607fb4e572 (patch) | |
tree | 04ee988f3c772ea327ecce6962f1353a8bc2cd62 | |
parent | 5c746eedda8cff2fc1692cf6dce376f4b0ca6fac (diff) | |
download | spark-22271f969363fd139e6cfb5a2d95a2607fb4e572.tar.gz spark-22271f969363fd139e6cfb5a2d95a2607fb4e572.tar.bz2 spark-22271f969363fd139e6cfb5a2d95a2607fb4e572.zip |
[SPARK-5462] [SQL] Use analyzed query plan in DataFrame.apply()
This patch changes DataFrame's `apply()` method to use an analyzed query plan when resolving column names. This fixes a bug where `apply` would throw "invalid call to qualifiers on unresolved object" errors when called on DataFrames constructed via `SQLContext.sql()`.
Author: Josh Rosen <joshrosen@databricks.com>
Closes #4282 from JoshRosen/SPARK-5462 and squashes the following commits:
b9e6da2 [Josh Rosen] [SPARK-5462] Use analyzed query plan in DataFrame.apply().
-rw-r--r-- | sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala | 8 | ||||
-rw-r--r-- | sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala | 4 |
2 files changed, 9 insertions, 3 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala index 1ff25adcf8..2694e81eac 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala @@ -111,14 +111,16 @@ class DataFrame protected[sql]( /** Returns the list of numeric columns, useful for doing aggregation. */ protected[sql] def numericColumns: Seq[Expression] = { schema.fields.filter(_.dataType.isInstanceOf[NumericType]).map { n => - logicalPlan.resolve(n.name, sqlContext.analyzer.resolver).get + queryExecution.analyzed.resolve(n.name, sqlContext.analyzer.resolver).get } } /** Resolves a column name into a Catalyst [[NamedExpression]]. */ protected[sql] def resolve(colName: String): NamedExpression = { - logicalPlan.resolve(colName, sqlContext.analyzer.resolver).getOrElse(throw new RuntimeException( - s"""Cannot resolve column name "$colName" among (${schema.fieldNames.mkString(", ")})""")) + queryExecution.analyzed.resolve(colName, sqlContext.analyzer.resolver).getOrElse { + throw new RuntimeException( + s"""Cannot resolve column name "$colName" among (${schema.fieldNames.mkString(", ")})""") + } } /** Left here for compatibility reasons. */ diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index db83a906d9..df343adc79 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -276,5 +276,9 @@ class DataFrameSuite extends QueryTest { ) } + test("apply on query results (SPARK-5462)") { + val df = testData.sqlContext.sql("select key from testData") + checkAnswer(df("key"), testData.select('key).collect().toSeq) + } } |