aboutsummaryrefslogtreecommitdiff
path: root/sql/core
diff options
context:
space:
mode:
authorMichael Armbrust <michael@databricks.com>2015-02-11 12:31:56 -0800
committerMichael Armbrust <michael@databricks.com>2015-02-11 12:31:56 -0800
commita60d2b70adff3a8fb3bdfac226b1d86fdb443da4 (patch)
tree92f50bd56e8ffc48b77d7845585d15327f169431 /sql/core
parent03bf704bf442ac7dd960795295b51957ce972491 (diff)
downloadspark-a60d2b70adff3a8fb3bdfac226b1d86fdb443da4.tar.gz
spark-a60d2b70adff3a8fb3bdfac226b1d86fdb443da4.tar.bz2
spark-a60d2b70adff3a8fb3bdfac226b1d86fdb443da4.zip
[SPARK-5454] More robust handling of self joins
Also I fix a bunch of bad output in test cases. Author: Michael Armbrust <michael@databricks.com> Closes #4520 from marmbrus/selfJoin and squashes the following commits: 4f4a85c [Michael Armbrust] comments 49c8e26 [Michael Armbrust] fix tests 6fc38de [Michael Armbrust] fix style 55d64b3 [Michael Armbrust] fix dataframe selfjoins
Diffstat (limited to 'sql/core')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala2
-rw-r--r--sql/core/src/test/resources/log4j.properties3
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala10
3 files changed, 15 insertions, 0 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 05ac1623d7..fd121ce056 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -122,6 +122,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
case _ =>
}
+ @transient
protected[sql] val cacheManager = new CacheManager(this)
/**
@@ -159,6 +160,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
* DataTypes.StringType);
* }}}
*/
+ @transient
val udf: UDFRegistration = new UDFRegistration(this)
/** Returns true if the table is currently cached in-memory. */
diff --git a/sql/core/src/test/resources/log4j.properties b/sql/core/src/test/resources/log4j.properties
index fbed0a782d..28e90b9520 100644
--- a/sql/core/src/test/resources/log4j.properties
+++ b/sql/core/src/test/resources/log4j.properties
@@ -39,6 +39,9 @@ log4j.appender.FA.Threshold = INFO
log4j.additivity.parquet.hadoop.ParquetRecordReader=false
log4j.logger.parquet.hadoop.ParquetRecordReader=OFF
+log4j.additivity.parquet.hadoop.ParquetOutputCommitter=false
+log4j.logger.parquet.hadoop.ParquetOutputCommitter=OFF
+
log4j.additivity.org.apache.hadoop.hive.serde2.lazy.LazyStruct=false
log4j.logger.org.apache.hadoop.hive.serde2.lazy.LazyStruct=OFF
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 02623f73c7..7be9215a44 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.types._
import org.apache.spark.sql.test.TestSQLContext
import org.apache.spark.sql.test.TestSQLContext.logicalPlanToSparkQuery
import org.apache.spark.sql.test.TestSQLContext.implicits._
+import org.apache.spark.sql.test.TestSQLContext.sql
class DataFrameSuite extends QueryTest {
@@ -88,6 +89,15 @@ class DataFrameSuite extends QueryTest {
testData.collect().toSeq)
}
+ test("self join") {
+ val df1 = testData.select(testData("key")).as('df1)
+ val df2 = testData.select(testData("key")).as('df2)
+
+ checkAnswer(
+ df1.join(df2, $"df1.key" === $"df2.key"),
+ sql("SELECT a.key, b.key FROM testData a JOIN testData b ON a.key = b.key").collect().toSeq)
+ }
+
test("selectExpr") {
checkAnswer(
testData.selectExpr("abs(key)", "value"),