From 3619fec1ec395a66ad5ae1f614ce67fe173cf159 Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Thu, 24 Mar 2016 22:34:55 -0700 Subject: [SPARK-14142][SQL] Replace internal use of unionAll with union ## What changes were proposed in this pull request? unionAll has been deprecated in SPARK-14088. ## How was this patch tested? Should be covered by all existing tests. Author: Reynold Xin Closes #11946 from rxin/SPARK-14142. --- python/pyspark/sql/dataframe.py | 4 ++-- python/pyspark/sql/tests.py | 4 ++-- .../scala/org/apache/spark/sql/catalyst/dsl/package.scala | 2 +- .../spark/sql/catalyst/analysis/AnalysisErrorSuite.scala | 2 +- .../apache/spark/sql/catalyst/analysis/AnalysisSuite.scala | 2 +- .../spark/sql/catalyst/optimizer/PruneFiltersSuite.scala | 4 ++-- .../sql/catalyst/plans/ConstraintPropagationSuite.scala | 8 ++++---- .../org/apache/spark/sql/execution/streaming/memory.scala | 2 +- .../test/scala/org/apache/spark/sql/CachedTableSuite.scala | 2 +- .../scala/org/apache/spark/sql/DataFrameStatSuite.scala | 2 +- .../test/scala/org/apache/spark/sql/DataFrameSuite.scala | 12 ++++++------ .../src/test/scala/org/apache/spark/sql/JoinSuite.scala | 2 +- .../test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 4 ++-- .../spark/sql/execution/ExchangeCoordinatorSuite.scala | 2 +- .../apache/spark/sql/sources/PartitionedWriteSuite.scala | 2 +- .../scala/org/apache/spark/sql/sources/SaveLoadSuite.scala | 2 +- .../java/org/apache/spark/sql/hive/JavaDataFrameSuite.java | 2 +- .../scala/org/apache/spark/sql/hive/CachedTableSuite.scala | 4 ++-- .../org/apache/spark/sql/hive/MultiDatabaseSuite.scala | 14 +++++++------- .../spark/sql/sources/ParquetHadoopFsRelationSuite.scala | 2 +- .../apache/spark/sql/sources/hadoopFsRelationSuites.scala | 12 ++++++------ 21 files changed, 45 insertions(+), 45 deletions(-) diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 5cfc348a69..7a69c4c70c 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -360,7 +360,7 @@ class DataFrame(object): >>> df.repartition(10).rdd.getNumPartitions() 10 - >>> data = df.unionAll(df).repartition("age") + >>> data = df.union(df).repartition("age") >>> data.show() +---+-----+ |age| name| @@ -919,7 +919,7 @@ class DataFrame(object): This is equivalent to `UNION ALL` in SQL. To do a SQL-style set union (that does deduplication of elements), use this function followed by a distinct. """ - return DataFrame(self._jdf.unionAll(other._jdf), self.sql_ctx) + return DataFrame(self._jdf.union(other._jdf), self.sql_ctx) @since(1.3) def unionAll(self, other): diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index 9722e9e9ca..83ef76c13c 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -599,7 +599,7 @@ class SQLTests(ReusedPySparkTestCase): point = df1.head().point self.assertEqual(point, PythonOnlyPoint(1.0, 2.0)) - def test_unionAll_with_udt(self): + def test_union_with_udt(self): from pyspark.sql.tests import ExamplePoint, ExamplePointUDT row1 = (1.0, ExamplePoint(1.0, 2.0)) row2 = (2.0, ExamplePoint(3.0, 4.0)) @@ -608,7 +608,7 @@ class SQLTests(ReusedPySparkTestCase): df1 = self.sqlCtx.createDataFrame([row1], schema) df2 = self.sqlCtx.createDataFrame([row2], schema) - result = df1.unionAll(df2).orderBy("label").collect() + result = df1.union(df2).orderBy("label").collect() self.assertEqual( result, [ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala index dc5264e266..3540014c3e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala @@ -280,7 +280,7 @@ package object dsl { def intersect(otherPlan: LogicalPlan): LogicalPlan = Intersect(logicalPlan, otherPlan) - def unionAll(otherPlan: LogicalPlan): LogicalPlan = Union(logicalPlan, otherPlan) + def union(otherPlan: LogicalPlan): LogicalPlan = Union(logicalPlan, otherPlan) def generate( generator: Generator, diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index c87a2e24bd..a90dfc5039 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -250,7 +250,7 @@ class AnalysisErrorSuite extends AnalysisTest { errorTest( "union with unequal number of columns", - testRelation.unionAll(testRelation2), + testRelation.union(testRelation2), "union" :: "number of columns" :: testRelation2.output.length.toString :: testRelation.output.length.toString :: Nil) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala index 8b568b6dd6..9563f43259 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala @@ -32,7 +32,7 @@ class AnalysisSuite extends AnalysisTest { val plan = (1 to 100) .map(_ => testRelation) .fold[LogicalPlan](testRelation) { (a, b) => - a.select(UnresolvedStar(None)).select('a).unionAll(b.select(UnresolvedStar(None))) + a.select(UnresolvedStar(None)).select('a).union(b.select(UnresolvedStar(None))) } assertAnalysisSuccess(plan) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PruneFiltersSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PruneFiltersSuite.scala index 0ee7cf9209..14fb72a8a3 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PruneFiltersSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PruneFiltersSuite.scala @@ -60,8 +60,8 @@ class PruneFiltersSuite extends PlanTest { val query = tr1.where('a.attr > 10) - .unionAll(tr2.where('d.attr > 10) - .unionAll(tr3.where('g.attr > 10))) + .union(tr2.where('d.attr > 10) + .union(tr3.where('g.attr > 10))) val queryWithUselessFilter = query.where('a.attr > 10) val optimized = Optimize.execute(queryWithUselessFilter.analyze) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala index a9375a740d..f3ab026192 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala @@ -109,14 +109,14 @@ class ConstraintPropagationSuite extends SparkFunSuite { assert(tr1 .where('a.attr > 10) - .unionAll(tr2.where('e.attr > 10) - .unionAll(tr3.where('i.attr > 10))) + .union(tr2.where('e.attr > 10) + .union(tr3.where('i.attr > 10))) .analyze.constraints.isEmpty) verifyConstraints(tr1 .where('a.attr > 10) - .unionAll(tr2.where('d.attr > 10) - .unionAll(tr3.where('g.attr > 10))) + .union(tr2.where('d.attr > 10) + .union(tr3.where('g.attr > 10))) .analyze.constraints, ExpressionSet(Seq(resolveColumn(tr1, "a") > 10, IsNotNull(resolveColumn(tr1, "a"))))) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala index 8bc8bcaa96..0f91e59e04 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala @@ -97,7 +97,7 @@ case class MemoryStream[A : Encoder](id: Int, sqlContext: SQLContext) s"MemoryBatch [$startOrdinal, $endOrdinal]: ${newBlocks.flatMap(_.collect()).mkString(", ")}") newBlocks .map(_.toDF()) - .reduceOption(_ unionAll _) + .reduceOption(_ union _) .getOrElse { sys.error("No data selected!") } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala index efa2eeaf4d..82b79c791d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala @@ -363,7 +363,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext } test("A cached table preserves the partitioning and ordering of its cached SparkPlan") { - val table3x = testData.unionAll(testData).unionAll(testData) + val table3x = testData.union(testData).union(testData) table3x.registerTempTable("testData3x") sql("SELECT key, value FROM testData3x ORDER BY key").registerTempTable("orderedTable") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala index fe12aa8099..0ea7727e45 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala @@ -57,7 +57,7 @@ class DataFrameStatSuite extends QueryTest with SharedSQLContext { val splits = data.randomSplit(Array[Double](1, 2, 3), seed) assert(splits.length == 3, "wrong number of splits") - assert(splits.reduce((a, b) => a.unionAll(b)).sort("id").collect().toList == + assert(splits.reduce((a, b) => a.union(b)).sort("id").collect().toList == data.collect().toList, "incomplete or wrong split") val s = splits.map(_.count()) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index ec4e7b2042..86c6405522 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -94,8 +94,8 @@ class DataFrameSuite extends QueryTest with SharedSQLContext { } test("union all") { - val unionDF = testData.unionAll(testData).unionAll(testData) - .unionAll(testData).unionAll(testData) + val unionDF = testData.union(testData).union(testData) + .union(testData).union(testData) // Before optimizer, Union should be combined. assert(unionDF.queryExecution.analyzed.collect { @@ -107,7 +107,7 @@ class DataFrameSuite extends QueryTest with SharedSQLContext { ) } - test("unionAll should union DataFrames with UDTs (SPARK-13410)") { + test("union should union DataFrames with UDTs (SPARK-13410)") { val rowRDD1 = sparkContext.parallelize(Seq(Row(1, new ExamplePoint(1.0, 2.0)))) val schema1 = StructType(Array(StructField("label", IntegerType, false), StructField("point", new ExamplePointUDT(), false))) @@ -118,7 +118,7 @@ class DataFrameSuite extends QueryTest with SharedSQLContext { val df2 = sqlContext.createDataFrame(rowRDD2, schema2) checkAnswer( - df1.unionAll(df2).orderBy("label"), + df1.union(df2).orderBy("label"), Seq(Row(1, new ExamplePoint(1.0, 2.0)), Row(2, new ExamplePoint(3.0, 4.0))) ) } @@ -636,7 +636,7 @@ class DataFrameSuite extends QueryTest with SharedSQLContext { val jsonDF = sqlContext.read.json(jsonDir) assert(parquetDF.inputFiles.nonEmpty) - val unioned = jsonDF.unionAll(parquetDF).inputFiles.sorted + val unioned = jsonDF.union(parquetDF).inputFiles.sorted val allFiles = (jsonDF.inputFiles ++ parquetDF.inputFiles).distinct.sorted assert(unioned === allFiles) } @@ -1104,7 +1104,7 @@ class DataFrameSuite extends QueryTest with SharedSQLContext { } } - val union = df1.unionAll(df2) + val union = df1.union(df2) checkAnswer( union.filter('i < rand(7) * 10), expected(union) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala index dfffa4bc8b..5af1a4fcd7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala @@ -184,7 +184,7 @@ class JoinSuite extends QueryTest with SharedSQLContext { } test("big inner join, 4 matches per row") { - val bigData = testData.unionAll(testData).unionAll(testData).unionAll(testData) + val bigData = testData.union(testData).union(testData).union(testData) val bigDataX = bigData.as("x") val bigDataY = bigData.as("y") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 61358fda76..077e579931 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -251,8 +251,8 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { test("aggregation with codegen") { // Prepare a table that we can group some rows. sqlContext.table("testData") - .unionAll(sqlContext.table("testData")) - .unionAll(sqlContext.table("testData")) + .union(sqlContext.table("testData")) + .union(sqlContext.table("testData")) .registerTempTable("testData3x") try { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ExchangeCoordinatorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ExchangeCoordinatorSuite.scala index 4f01e46633..01d485ce2d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ExchangeCoordinatorSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ExchangeCoordinatorSuite.scala @@ -342,7 +342,7 @@ class ExchangeCoordinatorSuite extends SparkFunSuite with BeforeAndAfterAll { sqlContext .range(0, 1000) .selectExpr("id % 500 as key", "id as value") - .unionAll(sqlContext.range(0, 1000).selectExpr("id % 500 as key", "id as value")) + .union(sqlContext.range(0, 1000).selectExpr("id % 500 as key", "id as value")) checkAnswer( join, expectedAnswer.collect()) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/PartitionedWriteSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/PartitionedWriteSuite.scala index 27b02d6e1a..a9b1970a7c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/PartitionedWriteSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/PartitionedWriteSuite.scala @@ -44,7 +44,7 @@ class PartitionedWriteSuite extends QueryTest with SharedSQLContext { path.delete() val base = sqlContext.range(100) - val df = base.unionAll(base).select($"id", lit(1).as("data")) + val df = base.union(base).select($"id", lit(1).as("data")) df.write.partitionBy("id").save(path.getCanonicalPath) checkAnswer( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala index 588f6e268f..bb2c54aa64 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala @@ -122,7 +122,7 @@ class SaveLoadSuite extends DataSourceTest with SharedSQLContext with BeforeAndA // verify the append mode df.write.mode(SaveMode.Append).json(path.toString) - val df2 = df.unionAll(df) + val df2 = df.union(df) df2.registerTempTable("jsonTable2") checkLoad(df2, "jsonTable2") diff --git a/sql/hive/src/test/java/org/apache/spark/sql/hive/JavaDataFrameSuite.java b/sql/hive/src/test/java/org/apache/spark/sql/hive/JavaDataFrameSuite.java index 63fb4b7cf7..397421ae92 100644 --- a/sql/hive/src/test/java/org/apache/spark/sql/hive/JavaDataFrameSuite.java +++ b/sql/hive/src/test/java/org/apache/spark/sql/hive/JavaDataFrameSuite.java @@ -82,7 +82,7 @@ public class JavaDataFrameSuite { @Test public void testUDAF() { - Dataset df = hc.range(0, 100).unionAll(hc.range(0, 100)).select(col("id").as("value")); + Dataset df = hc.range(0, 100).union(hc.range(0, 100)).select(col("id").as("value")); UserDefinedAggregateFunction udaf = new MyDoubleSum(); UserDefinedAggregateFunction registeredUDAF = hc.udf().register("mydoublesum", udaf); // Create Columns for the UDAF. For now, callUDF does not take an argument to specific if diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala index 656c1317c1..11384a0275 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala @@ -186,7 +186,7 @@ class CachedTableSuite extends QueryTest with TestHiveSingleton { assertCached(table("refreshTable")) checkAnswer( table("refreshTable"), - table("src").unionAll(table("src")).collect()) + table("src").union(table("src")).collect()) // Drop the table and create it again. sql("DROP TABLE refreshTable") @@ -198,7 +198,7 @@ class CachedTableSuite extends QueryTest with TestHiveSingleton { sql("REFRESH TABLE refreshTable") checkAnswer( table("refreshTable"), - table("src").unionAll(table("src")).collect()) + table("src").union(table("src")).collect()) // It is not cached. assert(!isCached("refreshTable"), "refreshTable should not be cached.") diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MultiDatabaseSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MultiDatabaseSuite.scala index d275190744..f3af60a018 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MultiDatabaseSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MultiDatabaseSuite.scala @@ -113,11 +113,11 @@ class MultiDatabaseSuite extends QueryTest with SQLTestUtils with TestHiveSingle df.write.mode(SaveMode.Overwrite).saveAsTable("t") df.write.mode(SaveMode.Append).saveAsTable("t") assert(sqlContext.tableNames().contains("t")) - checkAnswer(sqlContext.table("t"), df.unionAll(df)) + checkAnswer(sqlContext.table("t"), df.union(df)) } assert(sqlContext.tableNames(db).contains("t")) - checkAnswer(sqlContext.table(s"$db.t"), df.unionAll(df)) + checkAnswer(sqlContext.table(s"$db.t"), df.union(df)) checkTablePath(db, "t") } @@ -128,7 +128,7 @@ class MultiDatabaseSuite extends QueryTest with SQLTestUtils with TestHiveSingle df.write.mode(SaveMode.Overwrite).saveAsTable(s"$db.t") df.write.mode(SaveMode.Append).saveAsTable(s"$db.t") assert(sqlContext.tableNames(db).contains("t")) - checkAnswer(sqlContext.table(s"$db.t"), df.unionAll(df)) + checkAnswer(sqlContext.table(s"$db.t"), df.union(df)) checkTablePath(db, "t") } @@ -141,7 +141,7 @@ class MultiDatabaseSuite extends QueryTest with SQLTestUtils with TestHiveSingle assert(sqlContext.tableNames().contains("t")) df.write.insertInto(s"$db.t") - checkAnswer(sqlContext.table(s"$db.t"), df.unionAll(df)) + checkAnswer(sqlContext.table(s"$db.t"), df.union(df)) } } } @@ -156,7 +156,7 @@ class MultiDatabaseSuite extends QueryTest with SQLTestUtils with TestHiveSingle assert(sqlContext.tableNames(db).contains("t")) df.write.insertInto(s"$db.t") - checkAnswer(sqlContext.table(s"$db.t"), df.unionAll(df)) + checkAnswer(sqlContext.table(s"$db.t"), df.union(df)) } } @@ -220,7 +220,7 @@ class MultiDatabaseSuite extends QueryTest with SQLTestUtils with TestHiveSingle hiveContext.refreshTable("t") checkAnswer( sqlContext.table("t"), - df.withColumn("p", lit(1)).unionAll(df.withColumn("p", lit(2)))) + df.withColumn("p", lit(1)).union(df.withColumn("p", lit(2)))) } } } @@ -252,7 +252,7 @@ class MultiDatabaseSuite extends QueryTest with SQLTestUtils with TestHiveSingle hiveContext.refreshTable(s"$db.t") checkAnswer( sqlContext.table(s"$db.t"), - df.withColumn("p", lit(1)).unionAll(df.withColumn("p", lit(2)))) + df.withColumn("p", lit(1)).union(df.withColumn("p", lit(2)))) } } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/ParquetHadoopFsRelationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/ParquetHadoopFsRelationSuite.scala index 1e5dbd991e..a15bd227a9 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/ParquetHadoopFsRelationSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/ParquetHadoopFsRelationSuite.scala @@ -137,7 +137,7 @@ class ParquetHadoopFsRelationSuite extends HadoopFsRelationTest { fs.delete(commonSummaryPath, true) df.write.mode(SaveMode.Append).parquet(path) - checkAnswer(sqlContext.read.parquet(path), df.unionAll(df)) + checkAnswer(sqlContext.read.parquet(path), df.union(df)) assert(fs.exists(summaryPath)) assert(fs.exists(commonSummaryPath)) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/hadoopFsRelationSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/hadoopFsRelationSuites.scala index e842caf5be..ea7e905742 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/hadoopFsRelationSuites.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/hadoopFsRelationSuites.scala @@ -60,7 +60,7 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes p2 <- Seq("foo", "bar") } yield (i, s"val_$i", 2, p2)).toDF("a", "b", "p1", "p2") - lazy val partitionedTestDF = partitionedTestDF1.unionAll(partitionedTestDF2) + lazy val partitionedTestDF = partitionedTestDF1.union(partitionedTestDF2) def checkQueries(df: DataFrame): Unit = { // Selects everything @@ -191,7 +191,7 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes sqlContext.read.format(dataSourceName) .option("dataSchema", dataSchema.json) .load(file.getCanonicalPath).orderBy("a"), - testDF.unionAll(testDF).orderBy("a").collect()) + testDF.union(testDF).orderBy("a").collect()) } } @@ -268,7 +268,7 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes sqlContext.read.format(dataSourceName) .option("dataSchema", dataSchema.json) .load(file.getCanonicalPath), - partitionedTestDF.unionAll(partitionedTestDF).collect()) + partitionedTestDF.union(partitionedTestDF).collect()) } } @@ -332,7 +332,7 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes testDF.write.format(dataSourceName).mode(SaveMode.Append).saveAsTable("t") withTable("t") { - checkAnswer(sqlContext.table("t"), testDF.unionAll(testDF).orderBy("a").collect()) + checkAnswer(sqlContext.table("t"), testDF.union(testDF).orderBy("a").collect()) } } @@ -415,7 +415,7 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes .saveAsTable("t") withTable("t") { - checkAnswer(sqlContext.table("t"), partitionedTestDF.unionAll(partitionedTestDF).collect()) + checkAnswer(sqlContext.table("t"), partitionedTestDF.union(partitionedTestDF).collect()) } } @@ -625,7 +625,7 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes .format(dataSourceName) .option("dataSchema", df.schema.json) .load(dir.getCanonicalPath), - df.unionAll(df)) + df.union(df)) // This will fail because AlwaysFailOutputCommitter is used when we do append. intercept[Exception] { -- cgit v1.2.3