aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorSteve Loughran <stevel@hortonworks.com>2015-04-01 16:26:54 +0100
committerSean Owen <sowen@cloudera.com>2015-04-01 16:26:54 +0100
commitee11be258251adf900680927ba200bf46512cc04 (patch)
tree80c1ce4eea558679dc22bd27f926ee7c215af3de /sql
parentd36c5fca7b9227c4c6e1b0c1455269b5fd8d4852 (diff)
downloadspark-ee11be258251adf900680927ba200bf46512cc04.tar.gz
spark-ee11be258251adf900680927ba200bf46512cc04.tar.bz2
spark-ee11be258251adf900680927ba200bf46512cc04.zip
SPARK-6433 hive tests to import spark-sql test JAR for QueryTest access
1. Test JARs are built & published 1. log4j.resources is explicitly excluded. Without this, downstream test run logging depends on the order the JARs are listed/loaded 1. sql/hive pulls in spark-sql &...spark-catalyst for its test runs 1. The copied in test classes were rm'd, and a test edited to remove its now duplicate assert method 1. Spark streaming is now build with the same plugin/phase as the rest, but its shade plugin declaration is kept in (so different from the rest of the test plugins). Due to (#2), this means the test JAR no longer includes its log4j file. Outstanding issues: * should the JARs be shaded? `spark-streaming-test.jar` does, but given these are test jars for developers only, especially in the same spark source tree, it's hard to justify. * `maven-jar-plugin` v 2.6 was explicitly selected; without this the apache-1.4 parent template JAR version (2.4) chosen. * Are there any other resources to exclude? Author: Steve Loughran <stevel@hortonworks.com> Closes #5119 from steveloughran/stevel/patches/SPARK-6433-test-jars and squashes the following commits: 81ceb01 [Steve Loughran] SPARK-6433 add a clearer comment explaining what the plugin is doing & why a6dca33 [Steve Loughran] SPARK-6433 : pull configuration section form archive plugin c2b5f89 [Steve Loughran] SPARK-6433 omit "jar" goal from jar plugin fdac51b [Steve Loughran] SPARK-6433 -002; indentation & delegate plugin version to parent 650f442 [Steve Loughran] SPARK-6433 patch 001: test JARs are built; sql/hive pulls in spark-sql & spark-catalyst for its test runs
Diffstat (limited to 'sql')
-rw-r--r--sql/hive/pom.xml14
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/QueryTest.scala140
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala57
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala15
4 files changed, 14 insertions, 212 deletions
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index a9816f6c38..04440076a2 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -89,6 +89,20 @@
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-sql_${scala.binary.version}</artifactId>
+ <type>test-jar</type>
+ <version>${project.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
+ <type>test-jar</type>
+ <version>${project.version}</version>
+ <scope>test</scope>
+ </dependency>
</dependencies>
<profiles>
<profile>
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/QueryTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/QueryTest.scala
deleted file mode 100644
index 0270e63557..0000000000
--- a/sql/hive/src/test/scala/org/apache/spark/sql/QueryTest.scala
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql
-
-import scala.collection.JavaConversions._
-
-import org.apache.spark.sql.catalyst.plans._
-import org.apache.spark.sql.catalyst.util._
-
-
-/**
- * *** DUPLICATED FROM sql/core. ***
- *
- * It is hard to have maven allow one subproject depend on another subprojects test code.
- * So, we duplicate this code here.
- */
-class QueryTest extends PlanTest {
-
- /**
- * Runs the plan and makes sure the answer contains all of the keywords, or the
- * none of keywords are listed in the answer
- * @param rdd the [[DataFrame]] to be executed
- * @param exists true for make sure the keywords are listed in the output, otherwise
- * to make sure none of the keyword are not listed in the output
- * @param keywords keyword in string array
- */
- def checkExistence(rdd: DataFrame, exists: Boolean, keywords: String*) {
- val outputs = rdd.collect().map(_.mkString).mkString
- for (key <- keywords) {
- if (exists) {
- assert(outputs.contains(key), s"Failed for $rdd ($key doens't exist in result)")
- } else {
- assert(!outputs.contains(key), s"Failed for $rdd ($key existed in the result)")
- }
- }
- }
-
- /**
- * Runs the plan and makes sure the answer matches the expected result.
- * @param rdd the [[DataFrame]] to be executed
- * @param expectedAnswer the expected result in a [[Seq]] of [[Row]]s.
- */
- protected def checkAnswer(rdd: DataFrame, expectedAnswer: Seq[Row]): Unit = {
- QueryTest.checkAnswer(rdd, expectedAnswer) match {
- case Some(errorMessage) => fail(errorMessage)
- case None =>
- }
- }
-
- protected def checkAnswer(rdd: DataFrame, expectedAnswer: Row): Unit = {
- checkAnswer(rdd, Seq(expectedAnswer))
- }
-
- def sqlTest(sqlString: String, expectedAnswer: Seq[Row])(implicit sqlContext: SQLContext): Unit = {
- test(sqlString) {
- checkAnswer(sqlContext.sql(sqlString), expectedAnswer)
- }
- }
-}
-
-object QueryTest {
- /**
- * Runs the plan and makes sure the answer matches the expected result.
- * If there was exception during the execution or the contents of the DataFrame does not
- * match the expected result, an error message will be returned. Otherwise, a [[None]] will
- * be returned.
- * @param rdd the [[DataFrame]] to be executed
- * @param expectedAnswer the expected result in a [[Seq]] of [[Row]]s.
- */
- def checkAnswer(rdd: DataFrame, expectedAnswer: Seq[Row]): Option[String] = {
- val isSorted = rdd.logicalPlan.collect { case s: logical.Sort => s }.nonEmpty
- def prepareAnswer(answer: Seq[Row]): Seq[Row] = {
- // Converts data to types that we can do equality comparison using Scala collections.
- // For BigDecimal type, the Scala type has a better definition of equality test (similar to
- // Java's java.math.BigDecimal.compareTo).
- val converted: Seq[Row] = answer.map { s =>
- Row.fromSeq(s.toSeq.map {
- case d: java.math.BigDecimal => BigDecimal(d)
- case o => o
- })
- }
- if (!isSorted) converted.sortBy(_.toString) else converted
- }
- val sparkAnswer = try rdd.collect().toSeq catch {
- case e: Exception =>
- val errorMessage =
- s"""
- |Exception thrown while executing query:
- |${rdd.queryExecution}
- |== Exception ==
- |$e
- |${org.apache.spark.sql.catalyst.util.stackTraceToString(e)}
- """.stripMargin
- return Some(errorMessage)
- }
-
- if (prepareAnswer(expectedAnswer) != prepareAnswer(sparkAnswer)) {
- val errorMessage =
- s"""
- |Results do not match for query:
- |${rdd.logicalPlan}
- |== Analyzed Plan ==
- |${rdd.queryExecution.analyzed}
- |== Physical Plan ==
- |${rdd.queryExecution.executedPlan}
- |== Results ==
- |${sideBySide(
- s"== Correct Answer - ${expectedAnswer.size} ==" +:
- prepareAnswer(expectedAnswer).map(_.toString),
- s"== Spark Answer - ${sparkAnswer.size} ==" +:
- prepareAnswer(sparkAnswer).map(_.toString)).mkString("\n")}
- """.stripMargin
- return Some(errorMessage)
- }
-
- return None
- }
-
- def checkAnswer(rdd: DataFrame, expectedAnswer: java.util.List[Row]): String = {
- checkAnswer(rdd, expectedAnswer.toSeq) match {
- case Some(errorMessage) => errorMessage
- case None => null
- }
- }
-}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
deleted file mode 100644
index 98f1c0e69e..0000000000
--- a/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.plans
-
-import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, ExprId}
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.util._
-import org.scalatest.FunSuite
-
-/**
- * *** DUPLICATED FROM sql/catalyst/plans. ***
- *
- * It is hard to have maven allow one subproject depend on another subprojects test code.
- * So, we duplicate this code here.
- */
-class PlanTest extends FunSuite {
-
- /**
- * Since attribute references are given globally unique ids during analysis,
- * we must normalize them to check if two different queries are identical.
- */
- protected def normalizeExprIds(plan: LogicalPlan) = {
- plan transformAllExpressions {
- case a: AttributeReference =>
- AttributeReference(a.name, a.dataType, a.nullable)(exprId = ExprId(0))
- case a: Alias =>
- Alias(a.child, a.name)(exprId = ExprId(0))
- }
- }
-
- /** Fails the test if the two plans do not match */
- protected def comparePlans(plan1: LogicalPlan, plan2: LogicalPlan) {
- val normalized1 = normalizeExprIds(plan1)
- val normalized2 = normalizeExprIds(plan2)
- if (normalized1 != normalized2)
- fail(
- s"""
- |== FAIL: Plans do not match ===
- |${sideBySide(normalized1.treeString, normalized2.treeString).mkString("\n")}
- """.stripMargin)
- }
-}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
index 221a0c263d..c188264072 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
@@ -24,21 +24,6 @@ import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest}
import org.apache.spark.storage.RDDBlockId
class CachedTableSuite extends QueryTest {
- /**
- * Throws a test failed exception when the number of cached tables differs from the expected
- * number.
- */
- def assertCached(query: DataFrame, numCachedTables: Int = 1): Unit = {
- val planWithCaching = query.queryExecution.withCachedData
- val cachedData = planWithCaching collect {
- case cached: InMemoryRelation => cached
- }
-
- assert(
- cachedData.size == numCachedTables,
- s"Expected query to contain $numCachedTables, but it actually had ${cachedData.size}\n" +
- planWithCaching)
- }
def rddIdOf(tableName: String): Int = {
val executedPlan = table(tableName).queryExecution.executedPlan