aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorDaoyuan <daoyuan.wang@intel.com>2014-06-09 11:31:36 -0700
committerMichael Armbrust <michael@databricks.com>2014-06-09 11:31:36 -0700
commit0cf600280167a94faec75736223256e8f2e48085 (patch)
tree94d0bd6d1c02263ac0d3e9dbd9bd793e452944af /sql
parent35630c86ff0e27862c9d902887eb0a24d25867ae (diff)
downloadspark-0cf600280167a94faec75736223256e8f2e48085.tar.gz
spark-0cf600280167a94faec75736223256e8f2e48085.tar.bz2
spark-0cf600280167a94faec75736223256e8f2e48085.zip
[SPARK-1495][SQL]add support for left semi join
Just submit another solution for #395 Author: Daoyuan <daoyuan.wang@intel.com> Author: Michael Armbrust <michael@databricks.com> Author: Daoyuan Wang <daoyuan.wang@intel.com> Closes #837 from adrian-wang/left-semi-join-support and squashes the following commits: d39cd12 [Daoyuan Wang] Merge pull request #1 from marmbrus/pr/837 6713c09 [Michael Armbrust] Better debugging for failed query tests. 035b73e [Michael Armbrust] Add test for left semi that can't be done with a hash join. 5ec6fa4 [Michael Armbrust] Add left semi to SQL Parser. 4c726e5 [Daoyuan] improvement according to Michael 8d4a121 [Daoyuan] add golden files for leftsemijoin 83a3c8a [Daoyuan] scala style fix 14cff80 [Daoyuan] add support for left semi join
Diffstat (limited to 'sql')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala5
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala1
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala9
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala1
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala16
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/joins.scala131
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala2
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala7
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala1
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala1
-rw-r--r--sql/hive/src/test/resources/golden/leftsemijoin-0-80b6466213face7fbcb0de044611e1f50
-rw-r--r--sql/hive/src/test/resources/golden/leftsemijoin-1-d1f6a3dea28a5f0fee08026bf33d91290
-rw-r--r--sql/hive/src/test/resources/golden/leftsemijoin-10-89737a8857b5b61cc909e0c797f86aea4
-rw-r--r--sql/hive/src/test/resources/golden/leftsemijoin-11-80b6466213face7fbcb0de044611e1f50
-rw-r--r--sql/hive/src/test/resources/golden/leftsemijoin-12-d1f6a3dea28a5f0fee08026bf33d91290
-rw-r--r--sql/hive/src/test/resources/golden/leftsemijoin-2-43d53504df013e6b35f81811138a167a1
-rw-r--r--sql/hive/src/test/resources/golden/leftsemijoin-3-b07d292423312aafa5e5762a579decd20
-rw-r--r--sql/hive/src/test/resources/golden/leftsemijoin-4-3ac2226efe7cb5d999c1c5e4ac2114be0
-rw-r--r--sql/hive/src/test/resources/golden/leftsemijoin-5-9c307c0559d735960ce77efa95b2b17b0
-rw-r--r--sql/hive/src/test/resources/golden/leftsemijoin-6-82921fc96eef547ec0f71027ee88298c0
-rw-r--r--sql/hive/src/test/resources/golden/leftsemijoin-7-b30aa3b4a45db6b64bb46b4d9bd32ff00
-rw-r--r--sql/hive/src/test/resources/golden/leftsemijoin-8-73cad58a10a1483ccb15e94a8570134
-rw-r--r--sql/hive/src/test/resources/golden/leftsemijoin-9-c5efa6b8771a51610d655be461670e1e2
-rw-r--r--sql/hive/src/test/resources/golden/leftsemijoin_mr-0-7087fb6281a34d00f1812d2ff4ba8b750
-rw-r--r--sql/hive/src/test/resources/golden/leftsemijoin_mr-1-aa3f07f028027ffd13ab5535dc8215930
-rw-r--r--sql/hive/src/test/resources/golden/leftsemijoin_mr-10-9914f44ecb6ae7587b62e5349ff60d041
-rw-r--r--sql/hive/src/test/resources/golden/leftsemijoin_mr-11-2027ecb1495d5550c5d56abf6b95b0a72
-rw-r--r--sql/hive/src/test/resources/golden/leftsemijoin_mr-2-3f65953ae60375156367c545339787820
-rw-r--r--sql/hive/src/test/resources/golden/leftsemijoin_mr-3-645cf8b871c9b27418d6fa1d1bda9a520
-rw-r--r--sql/hive/src/test/resources/golden/leftsemijoin_mr-4-333895fe6abca27c8edb5c91bfe10d2f2
-rw-r--r--sql/hive/src/test/resources/golden/leftsemijoin_mr-5-896d0948c1df849df9764a6d8ad8fff920
-rw-r--r--sql/hive/src/test/resources/golden/leftsemijoin_mr-6-b1e2ade89ae898650f0be4f796d8947b1
-rw-r--r--sql/hive/src/test/resources/golden/leftsemijoin_mr-7-8e9c2969b999557363e40f9ebb3f6d7c1
-rw-r--r--sql/hive/src/test/resources/golden/leftsemijoin_mr-8-c61b972d4409babe41d8963e841af45b1
-rw-r--r--sql/hive/src/test/resources/golden/leftsemijoin_mr-9-2027ecb1495d5550c5d56abf6b95b0a72
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala2
37 files changed, 216 insertions, 3 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
index a404e7441a..cc650128c2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
@@ -131,6 +131,7 @@ class SqlParser extends StandardTokenParsers with PackratParsers {
protected val OUTER = Keyword("OUTER")
protected val RIGHT = Keyword("RIGHT")
protected val SELECT = Keyword("SELECT")
+ protected val SEMI = Keyword("SEMI")
protected val STRING = Keyword("STRING")
protected val SUM = Keyword("SUM")
protected val TRUE = Keyword("TRUE")
@@ -241,6 +242,7 @@ class SqlParser extends StandardTokenParsers with PackratParsers {
protected lazy val joinType: Parser[JoinType] =
INNER ^^^ Inner |
+ LEFT ~ SEMI ^^^ LeftSemi |
LEFT ~ opt(OUTER) ^^^ LeftOuter |
RIGHT ~ opt(OUTER) ^^^ RightOuter |
FULL ~ opt(OUTER) ^^^ FullOuter
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
index 4544b32958..820ecfb78b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
@@ -119,6 +119,11 @@ object HashFilteredJoin extends Logging with PredicateHelper {
case FilteredOperation(predicates, join @ Join(left, right, Inner, condition)) =>
logger.debug(s"Considering hash inner join on: ${predicates ++ condition}")
splitPredicates(predicates ++ condition, join)
+ // All predicates can be evaluated for left semi join (those that are in the WHERE
+ // clause can only from left table, so they can all be pushed down.)
+ case FilteredOperation(predicates, join @ Join(left, right, LeftSemi, condition)) =>
+ logger.debug(s"Considering hash left semi join on: ${predicates ++ condition}")
+ splitPredicates(predicates ++ condition, join)
case join @ Join(left, right, joinType, condition) =>
logger.debug(s"Considering hash join on: $condition")
splitPredicates(condition.toSeq, join)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
index ae8d7d3e42..613f4bb09d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
@@ -22,3 +22,4 @@ case object Inner extends JoinType
case object LeftOuter extends JoinType
case object RightOuter extends JoinType
case object FullOuter extends JoinType
+case object LeftSemi extends JoinType
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
index 732708e146..d3347b622f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
@@ -18,7 +18,7 @@
package org.apache.spark.sql.catalyst.plans.logical
import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans.JoinType
+import org.apache.spark.sql.catalyst.plans.{LeftSemi, JoinType}
import org.apache.spark.sql.catalyst.types._
case class Project(projectList: Seq[NamedExpression], child: LogicalPlan) extends UnaryNode {
@@ -81,7 +81,12 @@ case class Join(
condition: Option[Expression]) extends BinaryNode {
def references = condition.map(_.references).getOrElse(Set.empty)
- def output = left.output ++ right.output
+ def output = joinType match {
+ case LeftSemi =>
+ left.output
+ case _ =>
+ left.output ++ right.output
+ }
}
case class InsertIntoTable(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 043be58edc..e371c82d81 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -193,6 +193,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
val strategies: Seq[Strategy] =
TakeOrdered ::
PartialAggregation ::
+ LeftSemiJoin ::
HashJoin ::
ParquetOperations ::
BasicOperators ::
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index cfa8bdae58..6463f47510 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -28,6 +28,22 @@ import org.apache.spark.sql.parquet._
private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
self: SQLContext#SparkPlanner =>
+ object LeftSemiJoin extends Strategy with PredicateHelper {
+ def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
+ // Find left semi joins where at least some predicates can be evaluated by matching hash
+ // keys using the HashFilteredJoin pattern.
+ case HashFilteredJoin(LeftSemi, leftKeys, rightKeys, condition, left, right) =>
+ val semiJoin = execution.LeftSemiJoinHash(
+ leftKeys, rightKeys, planLater(left), planLater(right))
+ condition.map(Filter(_, semiJoin)).getOrElse(semiJoin) :: Nil
+ // no predicate can be evaluated by matching hash keys
+ case logical.Join(left, right, LeftSemi, condition) =>
+ execution.LeftSemiJoinBNL(
+ planLater(left), planLater(right), condition)(sparkContext) :: Nil
+ case _ => Nil
+ }
+ }
+
object HashJoin extends Strategy with PredicateHelper {
def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
// Find inner joins where at least some predicates can be evaluated by matching hash keys
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins.scala
index 31cc26962a..88ff3d49a7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins.scala
@@ -142,6 +142,137 @@ case class HashJoin(
/**
* :: DeveloperApi ::
+ * Build the right table's join keys into a HashSet, and iteratively go through the left
+ * table, to find the if join keys are in the Hash set.
+ */
+@DeveloperApi
+case class LeftSemiJoinHash(
+ leftKeys: Seq[Expression],
+ rightKeys: Seq[Expression],
+ left: SparkPlan,
+ right: SparkPlan) extends BinaryNode {
+
+ override def outputPartitioning: Partitioning = left.outputPartitioning
+
+ override def requiredChildDistribution =
+ ClusteredDistribution(leftKeys) :: ClusteredDistribution(rightKeys) :: Nil
+
+ val (buildPlan, streamedPlan) = (right, left)
+ val (buildKeys, streamedKeys) = (rightKeys, leftKeys)
+
+ def output = left.output
+
+ @transient lazy val buildSideKeyGenerator = new Projection(buildKeys, buildPlan.output)
+ @transient lazy val streamSideKeyGenerator =
+ () => new MutableProjection(streamedKeys, streamedPlan.output)
+
+ def execute() = {
+
+ buildPlan.execute().zipPartitions(streamedPlan.execute()) { (buildIter, streamIter) =>
+ val hashTable = new java.util.HashSet[Row]()
+ var currentRow: Row = null
+
+ // Create a Hash set of buildKeys
+ while (buildIter.hasNext) {
+ currentRow = buildIter.next()
+ val rowKey = buildSideKeyGenerator(currentRow)
+ if(!rowKey.anyNull) {
+ val keyExists = hashTable.contains(rowKey)
+ if (!keyExists) {
+ hashTable.add(rowKey)
+ }
+ }
+ }
+
+ new Iterator[Row] {
+ private[this] var currentStreamedRow: Row = _
+ private[this] var currentHashMatched: Boolean = false
+
+ private[this] val joinKeys = streamSideKeyGenerator()
+
+ override final def hasNext: Boolean =
+ streamIter.hasNext && fetchNext()
+
+ override final def next() = {
+ currentStreamedRow
+ }
+
+ /**
+ * Searches the streamed iterator for the next row that has at least one match in hashtable.
+ *
+ * @return true if the search is successful, and false the streamed iterator runs out of
+ * tuples.
+ */
+ private final def fetchNext(): Boolean = {
+ currentHashMatched = false
+ while (!currentHashMatched && streamIter.hasNext) {
+ currentStreamedRow = streamIter.next()
+ if (!joinKeys(currentStreamedRow).anyNull) {
+ currentHashMatched = hashTable.contains(joinKeys.currentValue)
+ }
+ }
+ currentHashMatched
+ }
+ }
+ }
+ }
+}
+
+/**
+ * :: DeveloperApi ::
+ * Using BroadcastNestedLoopJoin to calculate left semi join result when there's no join keys
+ * for hash join.
+ */
+@DeveloperApi
+case class LeftSemiJoinBNL(
+ streamed: SparkPlan, broadcast: SparkPlan, condition: Option[Expression])
+ (@transient sc: SparkContext)
+ extends BinaryNode {
+ // TODO: Override requiredChildDistribution.
+
+ override def outputPartitioning: Partitioning = streamed.outputPartitioning
+
+ override def otherCopyArgs = sc :: Nil
+
+ def output = left.output
+
+ /** The Streamed Relation */
+ def left = streamed
+ /** The Broadcast relation */
+ def right = broadcast
+
+ @transient lazy val boundCondition =
+ InterpretedPredicate(
+ condition
+ .map(c => BindReferences.bindReference(c, left.output ++ right.output))
+ .getOrElse(Literal(true)))
+
+
+ def execute() = {
+ val broadcastedRelation = sc.broadcast(broadcast.execute().map(_.copy()).collect().toIndexedSeq)
+
+ streamed.execute().mapPartitions { streamedIter =>
+ val joinedRow = new JoinedRow
+
+ streamedIter.filter(streamedRow => {
+ var i = 0
+ var matched = false
+
+ while (i < broadcastedRelation.value.size && !matched) {
+ val broadcastedRow = broadcastedRelation.value(i)
+ if (boundCondition(joinedRow(streamedRow, broadcastedRow))) {
+ matched = true
+ }
+ i += 1
+ }
+ matched
+ })
+ }
+ }
+}
+
+/**
+ * :: DeveloperApi ::
*/
@DeveloperApi
case class CartesianProduct(left: SparkPlan, right: SparkPlan) extends BinaryNode {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
index d6072b402a..d7f6abaf5d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
@@ -44,7 +44,7 @@ class QueryTest extends FunSuite {
fail(
s"""
|Exception thrown while executing query:
- |${rdd.logicalPlan}
+ |${rdd.queryExecution}
|== Exception ==
|$e
""".stripMargin)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index aa0c426f6f..d651b967a6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -40,6 +40,13 @@ class SQLQuerySuite extends QueryTest {
arrayData.map(d => (d.data, d.data(0), d.data(0) + d.data(1), d.data(1))).collect().toSeq)
}
+ test("left semi greater than predicate") {
+ checkAnswer(
+ sql("SELECT * FROM testData2 x LEFT SEMI JOIN testData2 y ON x.a >= y.a + 2"),
+ Seq((3,1), (3,2))
+ )
+ }
+
test("index into array of arrays") {
checkAnswer(
sql(
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
index b21f24dad7..fbab2ac16b 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -224,6 +224,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
DataSinks,
Scripts,
PartialAggregation,
+ LeftSemiJoin,
HashJoin,
BasicOperators,
CartesianProduct,
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
index 7e91c16c6b..cc9e24a057 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
@@ -685,6 +685,7 @@ private[hive] object HiveQl {
case "TOK_RIGHTOUTERJOIN" => RightOuter
case "TOK_LEFTOUTERJOIN" => LeftOuter
case "TOK_FULLOUTERJOIN" => FullOuter
+ case "TOK_LEFTSEMIJOIN" => LeftSemi
}
assert(other.size <= 1, "Unhandled join clauses.")
Join(nodeToRelation(relation1),
diff --git a/sql/hive/src/test/resources/golden/leftsemijoin-0-80b6466213face7fbcb0de044611e1f5 b/sql/hive/src/test/resources/golden/leftsemijoin-0-80b6466213face7fbcb0de044611e1f5
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/leftsemijoin-0-80b6466213face7fbcb0de044611e1f5
diff --git a/sql/hive/src/test/resources/golden/leftsemijoin-1-d1f6a3dea28a5f0fee08026bf33d9129 b/sql/hive/src/test/resources/golden/leftsemijoin-1-d1f6a3dea28a5f0fee08026bf33d9129
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/leftsemijoin-1-d1f6a3dea28a5f0fee08026bf33d9129
diff --git a/sql/hive/src/test/resources/golden/leftsemijoin-10-89737a8857b5b61cc909e0c797f86aea b/sql/hive/src/test/resources/golden/leftsemijoin-10-89737a8857b5b61cc909e0c797f86aea
new file mode 100644
index 0000000000..25ce912507
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/leftsemijoin-10-89737a8857b5b61cc909e0c797f86aea
@@ -0,0 +1,4 @@
+Hank 2
+Hank 2
+Joe 2
+Joe 2
diff --git a/sql/hive/src/test/resources/golden/leftsemijoin-11-80b6466213face7fbcb0de044611e1f5 b/sql/hive/src/test/resources/golden/leftsemijoin-11-80b6466213face7fbcb0de044611e1f5
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/leftsemijoin-11-80b6466213face7fbcb0de044611e1f5
diff --git a/sql/hive/src/test/resources/golden/leftsemijoin-12-d1f6a3dea28a5f0fee08026bf33d9129 b/sql/hive/src/test/resources/golden/leftsemijoin-12-d1f6a3dea28a5f0fee08026bf33d9129
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/leftsemijoin-12-d1f6a3dea28a5f0fee08026bf33d9129
diff --git a/sql/hive/src/test/resources/golden/leftsemijoin-2-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/leftsemijoin-2-43d53504df013e6b35f81811138a167a
new file mode 100644
index 0000000000..573541ac97
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/leftsemijoin-2-43d53504df013e6b35f81811138a167a
@@ -0,0 +1 @@
+0
diff --git a/sql/hive/src/test/resources/golden/leftsemijoin-3-b07d292423312aafa5e5762a579decd2 b/sql/hive/src/test/resources/golden/leftsemijoin-3-b07d292423312aafa5e5762a579decd2
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/leftsemijoin-3-b07d292423312aafa5e5762a579decd2
diff --git a/sql/hive/src/test/resources/golden/leftsemijoin-4-3ac2226efe7cb5d999c1c5e4ac2114be b/sql/hive/src/test/resources/golden/leftsemijoin-4-3ac2226efe7cb5d999c1c5e4ac2114be
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/leftsemijoin-4-3ac2226efe7cb5d999c1c5e4ac2114be
diff --git a/sql/hive/src/test/resources/golden/leftsemijoin-5-9c307c0559d735960ce77efa95b2b17b b/sql/hive/src/test/resources/golden/leftsemijoin-5-9c307c0559d735960ce77efa95b2b17b
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/leftsemijoin-5-9c307c0559d735960ce77efa95b2b17b
diff --git a/sql/hive/src/test/resources/golden/leftsemijoin-6-82921fc96eef547ec0f71027ee88298c b/sql/hive/src/test/resources/golden/leftsemijoin-6-82921fc96eef547ec0f71027ee88298c
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/leftsemijoin-6-82921fc96eef547ec0f71027ee88298c
diff --git a/sql/hive/src/test/resources/golden/leftsemijoin-7-b30aa3b4a45db6b64bb46b4d9bd32ff0 b/sql/hive/src/test/resources/golden/leftsemijoin-7-b30aa3b4a45db6b64bb46b4d9bd32ff0
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/leftsemijoin-7-b30aa3b4a45db6b64bb46b4d9bd32ff0
diff --git a/sql/hive/src/test/resources/golden/leftsemijoin-8-73cad58a10a1483ccb15e94a857013 b/sql/hive/src/test/resources/golden/leftsemijoin-8-73cad58a10a1483ccb15e94a857013
new file mode 100644
index 0000000000..25ce912507
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/leftsemijoin-8-73cad58a10a1483ccb15e94a857013
@@ -0,0 +1,4 @@
+Hank 2
+Hank 2
+Joe 2
+Joe 2
diff --git a/sql/hive/src/test/resources/golden/leftsemijoin-9-c5efa6b8771a51610d655be461670e1e b/sql/hive/src/test/resources/golden/leftsemijoin-9-c5efa6b8771a51610d655be461670e1e
new file mode 100644
index 0000000000..f1470bad57
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/leftsemijoin-9-c5efa6b8771a51610d655be461670e1e
@@ -0,0 +1,2 @@
+2 Tie
+2 Tie
diff --git a/sql/hive/src/test/resources/golden/leftsemijoin_mr-0-7087fb6281a34d00f1812d2ff4ba8b75 b/sql/hive/src/test/resources/golden/leftsemijoin_mr-0-7087fb6281a34d00f1812d2ff4ba8b75
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/leftsemijoin_mr-0-7087fb6281a34d00f1812d2ff4ba8b75
diff --git a/sql/hive/src/test/resources/golden/leftsemijoin_mr-1-aa3f07f028027ffd13ab5535dc821593 b/sql/hive/src/test/resources/golden/leftsemijoin_mr-1-aa3f07f028027ffd13ab5535dc821593
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/leftsemijoin_mr-1-aa3f07f028027ffd13ab5535dc821593
diff --git a/sql/hive/src/test/resources/golden/leftsemijoin_mr-10-9914f44ecb6ae7587b62e5349ff60d04 b/sql/hive/src/test/resources/golden/leftsemijoin_mr-10-9914f44ecb6ae7587b62e5349ff60d04
new file mode 100644
index 0000000000..573541ac97
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/leftsemijoin_mr-10-9914f44ecb6ae7587b62e5349ff60d04
@@ -0,0 +1 @@
+0
diff --git a/sql/hive/src/test/resources/golden/leftsemijoin_mr-11-2027ecb1495d5550c5d56abf6b95b0a7 b/sql/hive/src/test/resources/golden/leftsemijoin_mr-11-2027ecb1495d5550c5d56abf6b95b0a7
new file mode 100644
index 0000000000..6ed281c757
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/leftsemijoin_mr-11-2027ecb1495d5550c5d56abf6b95b0a7
@@ -0,0 +1,2 @@
+1
+1
diff --git a/sql/hive/src/test/resources/golden/leftsemijoin_mr-2-3f65953ae60375156367c54533978782 b/sql/hive/src/test/resources/golden/leftsemijoin_mr-2-3f65953ae60375156367c54533978782
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/leftsemijoin_mr-2-3f65953ae60375156367c54533978782
diff --git a/sql/hive/src/test/resources/golden/leftsemijoin_mr-3-645cf8b871c9b27418d6fa1d1bda9a52 b/sql/hive/src/test/resources/golden/leftsemijoin_mr-3-645cf8b871c9b27418d6fa1d1bda9a52
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/leftsemijoin_mr-3-645cf8b871c9b27418d6fa1d1bda9a52
diff --git a/sql/hive/src/test/resources/golden/leftsemijoin_mr-4-333895fe6abca27c8edb5c91bfe10d2f b/sql/hive/src/test/resources/golden/leftsemijoin_mr-4-333895fe6abca27c8edb5c91bfe10d2f
new file mode 100644
index 0000000000..6ed281c757
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/leftsemijoin_mr-4-333895fe6abca27c8edb5c91bfe10d2f
@@ -0,0 +1,2 @@
+1
+1
diff --git a/sql/hive/src/test/resources/golden/leftsemijoin_mr-5-896d0948c1df849df9764a6d8ad8fff9 b/sql/hive/src/test/resources/golden/leftsemijoin_mr-5-896d0948c1df849df9764a6d8ad8fff9
new file mode 100644
index 0000000000..179ef0e020
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/leftsemijoin_mr-5-896d0948c1df849df9764a6d8ad8fff9
@@ -0,0 +1,20 @@
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
diff --git a/sql/hive/src/test/resources/golden/leftsemijoin_mr-6-b1e2ade89ae898650f0be4f796d8947b b/sql/hive/src/test/resources/golden/leftsemijoin_mr-6-b1e2ade89ae898650f0be4f796d8947b
new file mode 100644
index 0000000000..573541ac97
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/leftsemijoin_mr-6-b1e2ade89ae898650f0be4f796d8947b
@@ -0,0 +1 @@
+0
diff --git a/sql/hive/src/test/resources/golden/leftsemijoin_mr-7-8e9c2969b999557363e40f9ebb3f6d7c b/sql/hive/src/test/resources/golden/leftsemijoin_mr-7-8e9c2969b999557363e40f9ebb3f6d7c
new file mode 100644
index 0000000000..573541ac97
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/leftsemijoin_mr-7-8e9c2969b999557363e40f9ebb3f6d7c
@@ -0,0 +1 @@
+0
diff --git a/sql/hive/src/test/resources/golden/leftsemijoin_mr-8-c61b972d4409babe41d8963e841af45b b/sql/hive/src/test/resources/golden/leftsemijoin_mr-8-c61b972d4409babe41d8963e841af45b
new file mode 100644
index 0000000000..573541ac97
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/leftsemijoin_mr-8-c61b972d4409babe41d8963e841af45b
@@ -0,0 +1 @@
+0
diff --git a/sql/hive/src/test/resources/golden/leftsemijoin_mr-9-2027ecb1495d5550c5d56abf6b95b0a7 b/sql/hive/src/test/resources/golden/leftsemijoin_mr-9-2027ecb1495d5550c5d56abf6b95b0a7
new file mode 100644
index 0000000000..6ed281c757
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/leftsemijoin_mr-9-2027ecb1495d5550c5d56abf6b95b0a7
@@ -0,0 +1,2 @@
+1
+1
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index 9031abf733..fb8f272d5a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -480,6 +480,8 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"lateral_view",
"lateral_view_cp",
"lateral_view_ppd",
+ "leftsemijoin",
+ "leftsemijoin_mr",
"lineage1",
"literal_double",
"literal_ints",