aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorsureshthalamati <suresh.thalamati@gmail.com>2016-01-05 17:48:05 -0800
committerMichael Armbrust <michael@databricks.com>2016-01-05 17:48:05 -0800
commit0d42292f6a2dbe626e8f6a50e6c61dd79533f235 (patch)
treeabe11b66b3b07bc74b397021a1b8c93b03a81246 /sql
parentdf8bd97520fc67dad95141c5a8cf2e0d5332e693 (diff)
downloadspark-0d42292f6a2dbe626e8f6a50e6c61dd79533f235.tar.gz
spark-0d42292f6a2dbe626e8f6a50e6c61dd79533f235.tar.bz2
spark-0d42292f6a2dbe626e8f6a50e6c61dd79533f235.zip
[SPARK-12504][SQL] Masking credentials in the sql plan explain output for JDBC data sources.
This fix masks JDBC credentials in the explain output. URL patterns to specify credential seems to be vary between different databases. Added a new method to dialect to mask the credentials according to the database specific URL pattern. While adding tests I noticed explain output includes array variable for partitions ([Lorg.apache.spark.Partition;3ff74546,). Modified the code to include the first, and last partition information. Author: sureshthalamati <suresh.thalamati@gmail.com> Closes #10452 from sureshthalamati/mask_jdbc_credentials_spark-12504.
Diffstat (limited to 'sql')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala5
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala22
2 files changed, 27 insertions, 0 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
index 1d40d23edc..572be823ca 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
@@ -108,4 +108,9 @@ private[sql] case class JDBCRelation(
.mode(if (overwrite) SaveMode.Overwrite else SaveMode.Append)
.jdbc(url, table, properties)
}
+
+ override def toString: String = {
+ // credentials should not be included in the plan output, table information is sufficient.
+ s"JDBCRelation(${table})"
+ }
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index dae72e8acb..73e548e00f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -27,6 +27,8 @@ import org.scalatest.PrivateMethodTester
import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.Row
+import org.apache.spark.sql.execution.ExplainCommand
+import org.apache.spark.sql.execution.datasources.LogicalRelation
import org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD
import org.apache.spark.sql.test.SharedSQLContext
import org.apache.spark.sql.types._
@@ -551,4 +553,24 @@ class JDBCSuite extends SparkFunSuite
assert(rows(0).getAs[java.sql.Timestamp](2)
=== java.sql.Timestamp.valueOf("2002-02-20 11:22:33.543543"))
}
+
+ test("test credentials in the properties are not in plan output") {
+ val df = sql("SELECT * FROM parts")
+ val explain = ExplainCommand(df.queryExecution.logical, extended = true)
+ sqlContext.executePlan(explain).executedPlan.executeCollect().foreach {
+ r => assert(!List("testPass", "testUser").exists(r.toString.contains))
+ }
+ // test the JdbcRelation toString output
+ df.queryExecution.analyzed.collect {
+ case r: LogicalRelation => assert(r.relation.toString == "JDBCRelation(TEST.PEOPLE)")
+ }
+ }
+
+ test("test credentials in the connection url are not in the plan output") {
+ val df = sqlContext.read.jdbc(urlWithUserAndPass, "TEST.PEOPLE", new Properties)
+ val explain = ExplainCommand(df.queryExecution.logical, extended = true)
+ sqlContext.executePlan(explain).executedPlan.executeCollect().foreach {
+ r => assert(!List("testPass", "testUser").exists(r.toString.contains))
+ }
+ }
}