diff options
author | Takuya UESHIN <ueshin@happy-camper.st> | 2014-07-10 19:27:24 -0700 |
---|---|---|
committer | Michael Armbrust <michael@databricks.com> | 2014-07-10 19:27:24 -0700 |
commit | 10b59ba230cb426f2a5d43cd0a4964a556e24c3f (patch) | |
tree | 0bcdcf0307775fa6b1bb5e383a11ba50c58d77df /sql | |
parent | f5abd271292f5c98eb8b1974c1df31d08ed388dd (diff) | |
download | spark-10b59ba230cb426f2a5d43cd0a4964a556e24c3f.tar.gz spark-10b59ba230cb426f2a5d43cd0a4964a556e24c3f.tar.bz2 spark-10b59ba230cb426f2a5d43cd0a4964a556e24c3f.zip |
[SPARK-2428][SQL] Add except and intersect methods to SchemaRDD.
Author: Takuya UESHIN <ueshin@happy-camper.st>
Closes #1355 from ueshin/issues/SPARK-2428 and squashes the following commits:
b6fa264 [Takuya UESHIN] Add except and intersect methods to SchemaRDD.
Diffstat (limited to 'sql')
-rw-r--r-- | sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala | 20 | ||||
-rw-r--r-- | sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala | 21 |
2 files changed, 41 insertions, 0 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala index 8bcfc7c064..0c95b66854 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala @@ -257,6 +257,26 @@ class SchemaRDD( new SchemaRDD(sqlContext, Union(logicalPlan, otherPlan.logicalPlan)) /** + * Performs a relational except on two SchemaRDDs + * + * @param otherPlan the [[SchemaRDD]] that should be excepted from this one. + * + * @group Query + */ + def except(otherPlan: SchemaRDD): SchemaRDD = + new SchemaRDD(sqlContext, Except(logicalPlan, otherPlan.logicalPlan)) + + /** + * Performs a relational intersect on two SchemaRDDs + * + * @param otherPlan the [[SchemaRDD]] that should be intersected with this one. + * + * @group Query + */ + def intersect(otherPlan: SchemaRDD): SchemaRDD = + new SchemaRDD(sqlContext, Intersect(logicalPlan, otherPlan.logicalPlan)) + + /** * Filters tuples using a function over the value of the specified column. * * {{{ diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala index 04ac008682..68dae58728 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala @@ -168,4 +168,25 @@ class DslQuerySuite extends QueryTest { test("zero count") { assert(emptyTableData.count() === 0) } + + test("except") { + checkAnswer( + lowerCaseData.except(upperCaseData), + (1, "a") :: + (2, "b") :: + (3, "c") :: + (4, "d") :: Nil) + checkAnswer(lowerCaseData.except(lowerCaseData), Nil) + checkAnswer(upperCaseData.except(upperCaseData), Nil) + } + + test("intersect") { + checkAnswer( + lowerCaseData.intersect(lowerCaseData), + (1, "a") :: + (2, "b") :: + (3, "c") :: + (4, "d") :: Nil) + checkAnswer(lowerCaseData.intersect(upperCaseData), Nil) + } } |