aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authortedyu <yuzhihong@gmail.com>2015-12-07 14:58:09 -0800
committerMichael Armbrust <michael@databricks.com>2015-12-07 14:58:09 -0800
commit84b809445f39b9030f272528bdaa39d1559cbc6e (patch)
tree55ad638904ed6b205ae9297c8fd787d1f3f8c908 /sql
parent871e85d9c14c6b19068cc732951a8ae8db61b411 (diff)
downloadspark-84b809445f39b9030f272528bdaa39d1559cbc6e.tar.gz
spark-84b809445f39b9030f272528bdaa39d1559cbc6e.tar.bz2
spark-84b809445f39b9030f272528bdaa39d1559cbc6e.zip
[SPARK-11884] Drop multiple columns in the DataFrame API
See the thread Ben started: http://search-hadoop.com/m/q3RTtveEuhjsr7g/ This PR adds drop() method to DataFrame which accepts multiple column names Author: tedyu <yuzhihong@gmail.com> Closes #9862 from ted-yu/master.
Diffstat (limited to 'sql')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala24
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala7
2 files changed, 23 insertions, 8 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index eb87003692..243a8c853f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -1261,16 +1261,24 @@ class DataFrame private[sql](
* @since 1.4.0
*/
def drop(colName: String): DataFrame = {
+ drop(Seq(colName) : _*)
+ }
+
+ /**
+ * Returns a new [[DataFrame]] with columns dropped.
+ * This is a no-op if schema doesn't contain column name(s).
+ * @group dfops
+ * @since 1.6.0
+ */
+ @scala.annotation.varargs
+ def drop(colNames: String*): DataFrame = {
val resolver = sqlContext.analyzer.resolver
- val shouldDrop = schema.exists(f => resolver(f.name, colName))
- if (shouldDrop) {
- val colsAfterDrop = schema.filter { field =>
- val name = field.name
- !resolver(name, colName)
- }.map(f => Column(f.name))
- select(colsAfterDrop : _*)
- } else {
+ val remainingCols =
+ schema.filter(f => colNames.forall(n => !resolver(f.name, n))).map(f => Column(f.name))
+ if (remainingCols.size == this.schema.size) {
this
+ } else {
+ this.select(remainingCols: _*)
}
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 76e9648aa7..605a6549dd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -378,6 +378,13 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
assert(df.schema.map(_.name) === Seq("value"))
}
+ test("drop columns using drop") {
+ val src = Seq((0, 2, 3)).toDF("a", "b", "c")
+ val df = src.drop("a", "b")
+ checkAnswer(df, Row(3))
+ assert(df.schema.map(_.name) === Seq("c"))
+ }
+
test("drop unknown column (no-op)") {
val df = testData.drop("random")
checkAnswer(