aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorTakuya UESHIN <ueshin@happy-camper.st>2014-07-10 19:27:24 -0700
committerMichael Armbrust <michael@databricks.com>2014-07-10 19:27:24 -0700
commit10b59ba230cb426f2a5d43cd0a4964a556e24c3f (patch)
tree0bcdcf0307775fa6b1bb5e383a11ba50c58d77df /sql
parentf5abd271292f5c98eb8b1974c1df31d08ed388dd (diff)
downloadspark-10b59ba230cb426f2a5d43cd0a4964a556e24c3f.tar.gz
spark-10b59ba230cb426f2a5d43cd0a4964a556e24c3f.tar.bz2
spark-10b59ba230cb426f2a5d43cd0a4964a556e24c3f.zip
[SPARK-2428][SQL] Add except and intersect methods to SchemaRDD.
Author: Takuya UESHIN <ueshin@happy-camper.st> Closes #1355 from ueshin/issues/SPARK-2428 and squashes the following commits: b6fa264 [Takuya UESHIN] Add except and intersect methods to SchemaRDD.
Diffstat (limited to 'sql')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala20
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala21
2 files changed, 41 insertions, 0 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala
index 8bcfc7c064..0c95b66854 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala
@@ -257,6 +257,26 @@ class SchemaRDD(
new SchemaRDD(sqlContext, Union(logicalPlan, otherPlan.logicalPlan))
/**
+ * Performs a relational except on two SchemaRDDs
+ *
+ * @param otherPlan the [[SchemaRDD]] that should be excepted from this one.
+ *
+ * @group Query
+ */
+ def except(otherPlan: SchemaRDD): SchemaRDD =
+ new SchemaRDD(sqlContext, Except(logicalPlan, otherPlan.logicalPlan))
+
+ /**
+ * Performs a relational intersect on two SchemaRDDs
+ *
+ * @param otherPlan the [[SchemaRDD]] that should be intersected with this one.
+ *
+ * @group Query
+ */
+ def intersect(otherPlan: SchemaRDD): SchemaRDD =
+ new SchemaRDD(sqlContext, Intersect(logicalPlan, otherPlan.logicalPlan))
+
+ /**
* Filters tuples using a function over the value of the specified column.
*
* {{{
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala
index 04ac008682..68dae58728 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala
@@ -168,4 +168,25 @@ class DslQuerySuite extends QueryTest {
test("zero count") {
assert(emptyTableData.count() === 0)
}
+
+ test("except") {
+ checkAnswer(
+ lowerCaseData.except(upperCaseData),
+ (1, "a") ::
+ (2, "b") ::
+ (3, "c") ::
+ (4, "d") :: Nil)
+ checkAnswer(lowerCaseData.except(lowerCaseData), Nil)
+ checkAnswer(upperCaseData.except(upperCaseData), Nil)
+ }
+
+ test("intersect") {
+ checkAnswer(
+ lowerCaseData.intersect(lowerCaseData),
+ (1, "a") ::
+ (2, "b") ::
+ (3, "c") ::
+ (4, "d") :: Nil)
+ checkAnswer(lowerCaseData.intersect(upperCaseData), Nil)
+ }
}