aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorYu ISHIKAWA <yuu.ishikawa@gmail.com>2015-06-18 23:13:05 -0700
committerReynold Xin <rxin@databricks.com>2015-06-18 23:13:05 -0700
commit754929b153aba3a8f8fbafa1581957da4ccc18be (patch)
treedf00538b6ef544095e1c60da6ff8eb78a1f3cfb6 /sql
parenta71cbbdea581573192a59bf8472861c463c40fcb (diff)
downloadspark-754929b153aba3a8f8fbafa1581957da4ccc18be.tar.gz
spark-754929b153aba3a8f8fbafa1581957da4ccc18be.tar.bz2
spark-754929b153aba3a8f8fbafa1581957da4ccc18be.zip
[SPARK-8348][SQL] Add in operator to DataFrame Column
I have added it for only Scala. TODO: we should also support `in` operator in Python. Author: Yu ISHIKAWA <yuu.ishikawa@gmail.com> Closes #6824 from yu-iskw/SPARK-8348 and squashes the following commits: e76d02f [Yu ISHIKAWA] Not use infix notation 6f744ac [Yu ISHIKAWA] Fit the test cases because these used the old test data set. 00077d3 [Yu ISHIKAWA] [SPARK-8348][SQL] Add in operator to DataFrame Column
Diffstat (limited to 'sql')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/Column.scala2
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala16
2 files changed, 17 insertions, 1 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index d3efa83380..b4e008a6e8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -621,7 +621,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* @since 1.3.0
*/
@scala.annotation.varargs
- def in(list: Column*): Column = In(expr, list.map(_.expr))
+ def in(list: Any*): Column = In(expr, list.map(lit(_).expr))
/**
* SQL like expression.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
index 5a08578e7b..88bb743ab0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
@@ -296,6 +296,22 @@ class ColumnExpressionSuite extends QueryTest {
checkAnswer(testData.filter($"a".between($"b", $"c")), expectAnswer)
}
+ test("in") {
+ val df = Seq((1, "x"), (2, "y"), (3, "z")).toDF("a", "b")
+ checkAnswer(df.filter($"a".in(1, 2)),
+ df.collect().toSeq.filter(r => r.getInt(0) == 1 || r.getInt(0) == 2))
+ checkAnswer(df.filter($"a".in(3, 2)),
+ df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 2))
+ checkAnswer(df.filter($"a".in(3, 1)),
+ df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 1))
+ checkAnswer(df.filter($"b".in("y", "x")),
+ df.collect().toSeq.filter(r => r.getString(1) == "y" || r.getString(1) == "x"))
+ checkAnswer(df.filter($"b".in("z", "x")),
+ df.collect().toSeq.filter(r => r.getString(1) == "z" || r.getString(1) == "x"))
+ checkAnswer(df.filter($"b".in("z", "y")),
+ df.collect().toSeq.filter(r => r.getString(1) == "z" || r.getString(1) == "y"))
+ }
+
val booleanData = ctx.createDataFrame(ctx.sparkContext.parallelize(
Row(false, false) ::
Row(false, true) ::