aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorOleg Sidorkin <oleg.sidorkin@gmail.com>2015-05-10 01:31:34 -0700
committerReynold Xin <rxin@databricks.com>2015-05-10 01:31:34 -0700
commitd7a37bcaf123389fb0828eefb92659c6d9cb3460 (patch)
tree964d8b29f7f92d5c8b430a1124261ff0926fd71f /sql
parentbf7e81a51cd81706570615cd67362c86602dec88 (diff)
downloadspark-d7a37bcaf123389fb0828eefb92659c6d9cb3460.tar.gz
spark-d7a37bcaf123389fb0828eefb92659c6d9cb3460.tar.bz2
spark-d7a37bcaf123389fb0828eefb92659c6d9cb3460.zip
[SPARK-7345][SQL] Spark cannot detect renamed columns using JDBC connector
Issue appears when one tries to create DataFrame using sqlContext.load("jdbc"...) statement when "dbtable" contains query with renamed columns. If original column is used in SQL query once the resulting DataFrame will contain non-renamed column. If original column is used in SQL query several times with different aliases, sqlContext.load will fail. Original implementation of JDBCRDD.resolveTable uses getColumnName to detect column names in RDD schema. Suggested implementation uses getColumnLabel to handle column renames in SQL statement which is aware of SQL "AS" statement. Readings: http://stackoverflow.com/questions/4271152/getcolumnlabel-vs-getcolumnname http://stackoverflow.com/questions/12259829/jdbc-getcolumnname-getcolumnlabel-db2 Official documentation unfortunately a bit misleading in definition of "suggested title" purpose however clearly defines behavior of AS keyword in SQL statement. http://docs.oracle.com/javase/7/docs/api/java/sql/ResultSetMetaData.html getColumnLabel - Gets the designated column's suggested title for use in printouts and displays. The suggested title is usually specified by the SQL AS clause. If a SQL AS is not specified, the value returned from getColumnLabel will be the same as the value returned by the getColumnName method. Author: Oleg Sidorkin <oleg.sidorkin@gmail.com> Closes #6032 from osidorkin/master and squashes the following commits: 10fc44b [Oleg Sidorkin] [SPARK-7345][SQL] Regression test for JDBCSuite (resolved scala style test error) 2aaf6f7 [Oleg Sidorkin] [SPARK-7345][SQL] Regression test for JDBCSuite (renamed fields in JDBC query) b7d5b22 [Oleg Sidorkin] [SPARK-7345][SQL] Regression test for JDBCSuite 09559a0 [Oleg Sidorkin] [SPARK-7345][SQL] Spark cannot detect renamed columns using JDBC connector
Diffstat (limited to 'sql')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala2
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala16
2 files changed, 17 insertions, 1 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
index 1a5083dbe0..a03ade3881 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
@@ -109,7 +109,7 @@ private[sql] object JDBCRDD extends Logging {
val fields = new Array[StructField](ncols)
var i = 0
while (i < ncols) {
- val columnName = rsmd.getColumnName(i + 1)
+ val columnName = rsmd.getColumnLabel(i + 1)
val dataType = rsmd.getColumnType(i + 1)
val typeName = rsmd.getColumnTypeName(i + 1)
val fieldSize = rsmd.getPrecision(i + 1)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index 021affafe3..2abfe7f167 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -204,6 +204,22 @@ class JDBCSuite extends FunSuite with BeforeAndAfter {
assert(ids(2) === 3)
}
+ test("Register JDBC query with renamed fields") {
+ // Regression test for bug SPARK-7345
+ sql(
+ s"""
+ |CREATE TEMPORARY TABLE renamed
+ |USING org.apache.spark.sql.jdbc
+ |OPTIONS (url '$url', dbtable '(select NAME as NAME1, NAME as NAME2 from TEST.PEOPLE)',
+ |user 'testUser', password 'testPass')
+ """.stripMargin.replaceAll("\n", " "))
+
+ val df = sql("SELECT * FROM renamed")
+ assert(df.schema.fields.size == 2)
+ assert(df.schema.fields(0).name == "NAME1")
+ assert(df.schema.fields(1).name == "NAME2")
+ }
+
test("Basic API") {
assert(TestSQLContext.jdbc(urlWithUserAndPass, "TEST.PEOPLE").collect().size === 3)
}