aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorJosh Rosen <joshrosen@databricks.com>2015-01-29 18:23:05 -0800
committerReynold Xin <rxin@databricks.com>2015-01-29 18:23:05 -0800
commit22271f969363fd139e6cfb5a2d95a2607fb4e572 (patch)
tree04ee988f3c772ea327ecce6962f1353a8bc2cd62 /sql
parent5c746eedda8cff2fc1692cf6dce376f4b0ca6fac (diff)
downloadspark-22271f969363fd139e6cfb5a2d95a2607fb4e572.tar.gz
spark-22271f969363fd139e6cfb5a2d95a2607fb4e572.tar.bz2
spark-22271f969363fd139e6cfb5a2d95a2607fb4e572.zip
[SPARK-5462] [SQL] Use analyzed query plan in DataFrame.apply()
This patch changes DataFrame's `apply()` method to use an analyzed query plan when resolving column names. This fixes a bug where `apply` would throw "invalid call to qualifiers on unresolved object" errors when called on DataFrames constructed via `SQLContext.sql()`. Author: Josh Rosen <joshrosen@databricks.com> Closes #4282 from JoshRosen/SPARK-5462 and squashes the following commits: b9e6da2 [Josh Rosen] [SPARK-5462] Use analyzed query plan in DataFrame.apply().
Diffstat (limited to 'sql')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala8
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala4
2 files changed, 9 insertions, 3 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index 1ff25adcf8..2694e81eac 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -111,14 +111,16 @@ class DataFrame protected[sql](
/** Returns the list of numeric columns, useful for doing aggregation. */
protected[sql] def numericColumns: Seq[Expression] = {
schema.fields.filter(_.dataType.isInstanceOf[NumericType]).map { n =>
- logicalPlan.resolve(n.name, sqlContext.analyzer.resolver).get
+ queryExecution.analyzed.resolve(n.name, sqlContext.analyzer.resolver).get
}
}
/** Resolves a column name into a Catalyst [[NamedExpression]]. */
protected[sql] def resolve(colName: String): NamedExpression = {
- logicalPlan.resolve(colName, sqlContext.analyzer.resolver).getOrElse(throw new RuntimeException(
- s"""Cannot resolve column name "$colName" among (${schema.fieldNames.mkString(", ")})"""))
+ queryExecution.analyzed.resolve(colName, sqlContext.analyzer.resolver).getOrElse {
+ throw new RuntimeException(
+ s"""Cannot resolve column name "$colName" among (${schema.fieldNames.mkString(", ")})""")
+ }
}
/** Left here for compatibility reasons. */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index db83a906d9..df343adc79 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -276,5 +276,9 @@ class DataFrameSuite extends QueryTest {
)
}
+ test("apply on query results (SPARK-5462)") {
+ val df = testData.sqlContext.sql("select key from testData")
+ checkAnswer(df("key"), testData.select('key).collect().toSeq)
+ }
}