aboutsummaryrefslogtreecommitdiff
path: root/sql/core
diff options
context:
space:
mode:
authorReynold Xin <rxin@databricks.com>2015-01-29 19:09:08 -0800
committerReynold Xin <rxin@databricks.com>2015-01-29 19:09:08 -0800
commit80def9deb3bfc30d5b622b32aecb0322341a7f62 (patch)
tree9b42a7be87468d451aa3f7e3c3d06438b5b007e5 /sql/core
parent22271f969363fd139e6cfb5a2d95a2607fb4e572 (diff)
downloadspark-80def9deb3bfc30d5b622b32aecb0322341a7f62.tar.gz
spark-80def9deb3bfc30d5b622b32aecb0322341a7f62.tar.bz2
spark-80def9deb3bfc30d5b622b32aecb0322341a7f62.zip
[SQL] Support df("*") to select all columns in a data frame.
This PR makes Star a trait, and provides two implementations: UnresolvedStar (used for *, tblName.*) and ResolvedStar (used for df("*")). Author: Reynold Xin <rxin@databricks.com> Closes #4283 from rxin/df-star and squashes the following commits: c9cba3e [Reynold Xin] Removed mapFunction in UnresolvedStar. 1a3a1d7 [Reynold Xin] [SQL] Support df("*") to select all columns in a data frame.
Diffstat (limited to 'sql/core')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/Column.scala6
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala4
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala8
3 files changed, 11 insertions, 7 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index 68c9cb0c02..174c403059 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql
import scala.language.implicitConversions
import org.apache.spark.sql.Dsl.lit
-import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, Star}
+import org.apache.spark.sql.catalyst.analysis.{UnresolvedStar, UnresolvedAttribute}
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.plans.logical.{Project, LogicalPlan}
import org.apache.spark.sql.types._
@@ -71,8 +71,8 @@ class Column(
* - "df.*" becomes an expression selecting all columns in data frame "df".
*/
def this(name: String) = this(name match {
- case "*" => Star(None)
- case _ if name.endsWith(".*") => Star(Some(name.substring(0, name.length - 2)))
+ case "*" => UnresolvedStar(None)
+ case _ if name.endsWith(".*") => UnresolvedStar(Some(name.substring(0, name.length - 2)))
case _ => UnresolvedAttribute(name)
})
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index 2694e81eac..1096e39659 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -31,7 +31,7 @@ import org.apache.spark.api.python.SerDeUtil
import org.apache.spark.rdd.RDD
import org.apache.spark.storage.StorageLevel
import org.apache.spark.sql.catalyst.ScalaReflection
-import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
+import org.apache.spark.sql.catalyst.analysis.{ResolvedStar, UnresolvedRelation}
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.plans.{JoinType, Inner}
import org.apache.spark.sql.catalyst.plans.logical._
@@ -265,7 +265,7 @@ class DataFrame protected[sql](
*/
override def apply(colName: String): Column = colName match {
case "*" =>
- Column("*")
+ new Column(ResolvedStar(schema.fieldNames.map(resolve)))
case _ =>
val expr = resolve(colName)
new Column(Some(sqlContext), Some(Project(Seq(expr), logicalPlan)), expr)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
index 6428554ec7..2d464c2b53 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
@@ -31,10 +31,14 @@ class ColumnExpressionSuite extends QueryTest {
checkAnswer(testData.select($"*"), testData.collect().toSeq)
}
- ignore("star qualified by data frame object") {
+ test("star qualified by data frame object") {
// This is not yet supported.
val df = testData.toDataFrame
- checkAnswer(df.select(df("*")), df.collect().toSeq)
+ val goldAnswer = df.collect().toSeq
+ checkAnswer(df.select(df("*")), goldAnswer)
+
+ val df1 = df.select(df("*"), lit("abcd").as("litCol"))
+ checkAnswer(df1.select(df("*")), goldAnswer)
}
test("star qualified by table name") {