From 7d789e117d6ddaf66159e708db600f2d8db8d787 Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Wed, 4 Feb 2015 23:44:34 -0800 Subject: [SPARK-5612][SQL] Move DataFrame implicit functions into SQLContext.implicits. Author: Reynold Xin Closes #4386 from rxin/df-implicits and squashes the following commits: 9d96606 [Reynold Xin] style fix edd296b [Reynold Xin] ReplSuite 1c946ab [Reynold Xin] [SPARK-5612][SQL] Move DataFrame implicit functions into SQLContext.implicits. --- sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala | 4 ++-- .../src/main/scala/org/apache/spark/sql/parquet/ParquetTest.scala | 1 + sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala | 2 ++ sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala | 3 ++- sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 2 ++ .../scala/org/apache/spark/sql/ScalaReflectionRelationSuite.scala | 3 +++ sql/core/src/test/scala/org/apache/spark/sql/TestData.scala | 3 +-- .../src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala | 3 ++- .../org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala | 2 ++ .../org/apache/spark/sql/columnar/PartitionBatchPruningSuite.scala | 2 ++ .../src/test/scala/org/apache/spark/sql/parquet/ParquetIOSuite.scala | 2 ++ .../scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala | 2 ++ .../scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala | 3 +++ .../scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala | 2 ++ .../org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala | 3 ++- .../test/scala/org/apache/spark/sql/hive/execution/HiveUdfSuite.scala | 4 +++- .../scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala | 3 +++ .../src/test/scala/org/apache/spark/sql/parquet/parquetSuites.scala | 2 ++ 18 files changed, 38 insertions(+), 8 deletions(-) (limited to 'sql') diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala index 5ab5494f80..01620aa0ac 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala @@ -166,6 +166,7 @@ class SQLContext(@transient val sparkContext: SparkContext) // scalastyle:off // Disable style checker so "implicits" object can start with lowercase i /** + * (Scala-specific) * Implicit methods available in Scala for converting common Scala objects into [[DataFrame]]s. */ object implicits { @@ -192,8 +193,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * * @group userf */ - // TODO: Remove implicit here. - implicit def createDataFrame[A <: Product : TypeTag](rdd: RDD[A]): DataFrame = { + def createDataFrame[A <: Product : TypeTag](rdd: RDD[A]): DataFrame = { SparkPlan.currentContext.set(self) val schema = ScalaReflection.schemaFor[A].dataType.asInstanceOf[StructType] val attributeSeq = schema.toAttributes diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTest.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTest.scala index 9d6c529574..8d3e094e33 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTest.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTest.scala @@ -88,6 +88,7 @@ trait ParquetTest { protected def withParquetFile[T <: Product: ClassTag: TypeTag] (data: Seq[T]) (f: String => Unit): Unit = { + import sqlContext.implicits._ withTempPath { file => sparkContext.parallelize(data).saveAsParquetFile(file.getCanonicalPath) f(file.getCanonicalPath) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala index acb5677c4b..1318750a4a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala @@ -34,6 +34,8 @@ case class BigData(s: String) class CachedTableSuite extends QueryTest { TestData // Load test tables. + import org.apache.spark.sql.test.TestSQLContext.implicits._ + def rddIdOf(tableName: String): Int = { val executedPlan = table(tableName).queryExecution.executedPlan executedPlan.collect { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index e588555ad0..74c29459d2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -21,7 +21,8 @@ import org.apache.spark.sql.Dsl._ import org.apache.spark.sql.types._ /* Implicits */ -import org.apache.spark.sql.test.TestSQLContext.{createDataFrame, logicalPlanToSparkQuery} +import org.apache.spark.sql.test.TestSQLContext.logicalPlanToSparkQuery +import org.apache.spark.sql.test.TestSQLContext.implicits._ import scala.language.postfixOps diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 8f3d4265a2..dc8ee41712 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -35,6 +35,8 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll { // Make sure the tables are loaded. TestData + import org.apache.spark.sql.test.TestSQLContext.implicits._ + var origZone: TimeZone = _ override protected def beforeAll() { origZone = TimeZone.getDefault diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ScalaReflectionRelationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ScalaReflectionRelationSuite.scala index f26fcc0385..9378261982 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ScalaReflectionRelationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ScalaReflectionRelationSuite.scala @@ -75,6 +75,9 @@ case class ComplexReflectData( dataField: Data) class ScalaReflectionRelationSuite extends FunSuite { + + import org.apache.spark.sql.test.TestSQLContext.implicits._ + test("query case class RDD") { val data = ReflectData("a", 1, 1L, 1.toFloat, 1.toDouble, 1.toShort, 1.toByte, true, new java.math.BigDecimal(1), new Date(12345), new Timestamp(12345), Seq(1,2,3)) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala b/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala index dd781169ca..0ed437edd0 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala @@ -22,9 +22,8 @@ import java.sql.Timestamp import org.apache.spark.sql.catalyst.plans.logical import org.apache.spark.sql.Dsl._ import org.apache.spark.sql.test._ +import org.apache.spark.sql.test.TestSQLContext.implicits._ -/* Implicits */ -import org.apache.spark.sql.test.TestSQLContext._ case class TestData(key: Int, value: String) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala index 117a511734..3c1657cd5f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala @@ -22,7 +22,8 @@ import scala.beans.{BeanInfo, BeanProperty} import org.apache.spark.rdd.RDD import org.apache.spark.sql.Dsl._ import org.apache.spark.sql.test.TestSQLContext -import org.apache.spark.sql.test.TestSQLContext.{udf => _, _} +import org.apache.spark.sql.test.TestSQLContext.{sparkContext, sql} +import org.apache.spark.sql.test.TestSQLContext.implicits._ import org.apache.spark.sql.types._ diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala index 3d33484ab0..86b1b5fda1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala @@ -28,6 +28,8 @@ class InMemoryColumnarQuerySuite extends QueryTest { // Make sure the tables are loaded. TestData + import org.apache.spark.sql.test.TestSQLContext.implicits._ + test("simple columnar query") { val plan = executePlan(testData.logicalPlan).executedPlan val scan = InMemoryRelation(useCompression = true, 5, MEMORY_ONLY, plan, None) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/PartitionBatchPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/PartitionBatchPruningSuite.scala index fe9a69edbb..55a9f735b3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/columnar/PartitionBatchPruningSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/columnar/PartitionBatchPruningSuite.scala @@ -26,6 +26,8 @@ class PartitionBatchPruningSuite extends FunSuite with BeforeAndAfterAll with Be val originalColumnBatchSize = conf.columnBatchSize val originalInMemoryPartitionPruning = conf.inMemoryPartitionPruning + import org.apache.spark.sql.test.TestSQLContext.implicits._ + override protected def beforeAll(): Unit = { // Make a table with 5 partitions, 2 batches per partition, 10 elements per batch setConf(SQLConf.COLUMN_BATCH_SIZE, "10") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetIOSuite.scala index d9ab16baf9..0bc246c645 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetIOSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetIOSuite.scala @@ -98,6 +98,8 @@ class ParquetIOSuite extends QueryTest with ParquetTest { } test("fixed-length decimals") { + import org.apache.spark.sql.test.TestSQLContext.implicits._ + def makeDecimalRDD(decimal: DecimalType): DataFrame = sparkContext .parallelize(0 to 1000) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala index 4dd96bd5a1..869d01eb39 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala @@ -30,6 +30,8 @@ import org.apache.spark.sql.hive.test.TestHive._ case class TestData(key: Int, value: String) class InsertIntoHiveTableSuite extends QueryTest { + import org.apache.spark.sql.hive.test.TestHive.implicits._ + val testData = TestHive.sparkContext.parallelize( (1 to 100).map(i => TestData(i, i.toString))) testData.registerTempTable("testData") diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala index 85795acb65..c23575fe96 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala @@ -36,6 +36,9 @@ import org.apache.spark.sql.hive.test.TestHive._ * Tests for persisting tables created though the data sources API into the metastore. */ class MetastoreDataSourcesSuite extends QueryTest with BeforeAndAfterEach { + + import org.apache.spark.sql.hive.test.TestHive.implicits._ + override def afterEach(): Unit = { reset() if (ctasPath.exists()) Utils.deleteRecursively(ctasPath) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala index a321452cef..407d6058c3 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala @@ -43,6 +43,8 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter { private val originalTimeZone = TimeZone.getDefault private val originalLocale = Locale.getDefault + import org.apache.spark.sql.hive.test.TestHive.implicits._ + override def beforeAll() { TestHive.cacheTables = true // Timezone is fixed to America/Los_Angeles for those timezone sensitive tests (timestamp_*) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala index 422e843d2b..178ece4477 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala @@ -18,7 +18,8 @@ package org.apache.spark.sql.hive.execution import org.apache.spark.sql.hive.test.TestHive -import org.apache.spark.sql.hive.test.TestHive._ +import org.apache.spark.sql.hive.test.TestHive.{sparkContext, sql} +import org.apache.spark.sql.hive.test.TestHive.implicits._ case class Nested(a: Int, B: Int) case class Data(a: Int, B: Int, n: Nested, nestedArray: Seq[Nested]) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUdfSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUdfSuite.scala index dd0df1a9f6..1e99003d3e 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUdfSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUdfSuite.scala @@ -47,7 +47,9 @@ case class ListStringCaseClass(l: Seq[String]) * A test suite for Hive custom UDFs. */ class HiveUdfSuite extends QueryTest { - import TestHive._ + + import TestHive.{udf, sql} + import TestHive.implicits._ test("spark sql udf test that returns a struct") { udf.register("getStruct", (_: Int) => Fields(1, 2, 3, 4, 5)) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 268e5f6f01..22310ffadd 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -35,6 +35,9 @@ case class Nested3(f3: Int) * valid, but Hive currently cannot execute it. */ class SQLQuerySuite extends QueryTest { + + import org.apache.spark.sql.hive.test.TestHive.implicits._ + test("SPARK-4512 Fix attribute reference resolution error when using SORT BY") { checkAnswer( sql("SELECT * FROM (SELECT key + key AS a FROM src SORT BY value) t ORDER BY t.a"), diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/parquet/parquetSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/parquet/parquetSuites.scala index 79fd99d9f8..30441bbbdf 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/parquet/parquetSuites.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/parquet/parquetSuites.scala @@ -149,6 +149,8 @@ abstract class ParquetPartitioningTest extends QueryTest with BeforeAndAfterAll var partitionedTableDir: File = null var partitionedTableDirWithKey: File = null + import org.apache.spark.sql.hive.test.TestHive.implicits._ + override def beforeAll(): Unit = { partitionedTableDir = File.createTempFile("parquettests", "sparksql") partitionedTableDir.delete() -- cgit v1.2.3