diff options
author | sureshthalamati <suresh.thalamati@gmail.com> | 2016-01-05 17:48:05 -0800 |
---|---|---|
committer | Michael Armbrust <michael@databricks.com> | 2016-01-05 17:48:05 -0800 |
commit | 0d42292f6a2dbe626e8f6a50e6c61dd79533f235 (patch) | |
tree | abe11b66b3b07bc74b397021a1b8c93b03a81246 | |
parent | df8bd97520fc67dad95141c5a8cf2e0d5332e693 (diff) | |
download | spark-0d42292f6a2dbe626e8f6a50e6c61dd79533f235.tar.gz spark-0d42292f6a2dbe626e8f6a50e6c61dd79533f235.tar.bz2 spark-0d42292f6a2dbe626e8f6a50e6c61dd79533f235.zip |
[SPARK-12504][SQL] Masking credentials in the sql plan explain output for JDBC data sources.
This fix masks JDBC credentials in the explain output. URL patterns to specify credential seems to be vary between different databases. Added a new method to dialect to mask the credentials according to the database specific URL pattern.
While adding tests I noticed explain output includes array variable for partitions ([Lorg.apache.spark.Partition;3ff74546,). Modified the code to include the first, and last partition information.
Author: sureshthalamati <suresh.thalamati@gmail.com>
Closes #10452 from sureshthalamati/mask_jdbc_credentials_spark-12504.
-rw-r--r-- | sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala | 5 | ||||
-rw-r--r-- | sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala | 22 |
2 files changed, 27 insertions, 0 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala index 1d40d23edc..572be823ca 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala @@ -108,4 +108,9 @@ private[sql] case class JDBCRelation( .mode(if (overwrite) SaveMode.Overwrite else SaveMode.Append) .jdbc(url, table, properties) } + + override def toString: String = { + // credentials should not be included in the plan output, table information is sufficient. + s"JDBCRelation(${table})" + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala index dae72e8acb..73e548e00f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala @@ -27,6 +27,8 @@ import org.scalatest.PrivateMethodTester import org.apache.spark.SparkFunSuite import org.apache.spark.sql.Row +import org.apache.spark.sql.execution.ExplainCommand +import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD import org.apache.spark.sql.test.SharedSQLContext import org.apache.spark.sql.types._ @@ -551,4 +553,24 @@ class JDBCSuite extends SparkFunSuite assert(rows(0).getAs[java.sql.Timestamp](2) === java.sql.Timestamp.valueOf("2002-02-20 11:22:33.543543")) } + + test("test credentials in the properties are not in plan output") { + val df = sql("SELECT * FROM parts") + val explain = ExplainCommand(df.queryExecution.logical, extended = true) + sqlContext.executePlan(explain).executedPlan.executeCollect().foreach { + r => assert(!List("testPass", "testUser").exists(r.toString.contains)) + } + // test the JdbcRelation toString output + df.queryExecution.analyzed.collect { + case r: LogicalRelation => assert(r.relation.toString == "JDBCRelation(TEST.PEOPLE)") + } + } + + test("test credentials in the connection url are not in the plan output") { + val df = sqlContext.read.jdbc(urlWithUserAndPass, "TEST.PEOPLE", new Properties) + val explain = ExplainCommand(df.queryExecution.logical, extended = true) + sqlContext.executePlan(explain).executedPlan.executeCollect().foreach { + r => assert(!List("testPass", "testUser").exists(r.toString.contains)) + } + } } |