[SPARK-12504][SQL] Masking credentials in the sql plan explain output for JDBC data sources.

This fix masks JDBC credentials in the explain output. URL patterns to specify credential seems to be vary between different databases. Added a new method to dialect to mask the credentials according to the database specific URL pattern. While adding tests I noticed explain output includes array variable for partitions ([Lorg.apache.spark.Partition;3ff74546,). Modified the code to include the first, and last partition information. Author: sureshthalamati <suresh.thalamati@gmail.com> Closes #10452 from sureshthalamati/mask_jdbc_credentials_spark-12504.
author: sureshthalamati <suresh.thalamati@gmail.com> 2016-01-05 17:48:05 -0800
committer: Michael Armbrust <michael@databricks.com> 2016-01-05 17:48:05 -0800
commit: 0d42292f6a2dbe626e8f6a50e6c61dd79533f235 (patch)
tree: abe11b66b3b07bc74b397021a1b8c93b03a81246 /sql
parent: df8bd97520fc67dad95141c5a8cf2e0d5332e693 (diff)
download: spark-0d42292f6a2dbe626e8f6a50e6c61dd79533f235.tar.gz
spark-0d42292f6a2dbe626e8f6a50e6c61dd79533f235.tar.bz2
spark-0d42292f6a2dbe626e8f6a50e6c61dd79533f235.zip
2 files changed, 27 insertions, 0 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
index 1d40d23edc..572be823ca 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
@@ -108,4 +108,9 @@ private[sql] case class JDBCRelation(
       .mode(if (overwrite) SaveMode.Overwrite else SaveMode.Append)
       .jdbc(url, table, properties)
   }
+
+  override def toString: String = {
+    // credentials should not be included in the plan output, table information is sufficient.
+    s"JDBCRelation(${table})"
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index dae72e8acb..73e548e00f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -27,6 +27,8 @@ import org.scalatest.PrivateMethodTester
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.Row
+import org.apache.spark.sql.execution.ExplainCommand
+import org.apache.spark.sql.execution.datasources.LogicalRelation
 import org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types._
@@ -551,4 +553,24 @@ class JDBCSuite extends SparkFunSuite
     assert(rows(0).getAs[java.sql.Timestamp](2)
       === java.sql.Timestamp.valueOf("2002-02-20 11:22:33.543543"))
   }
+
+  test("test credentials in the properties are not in plan output") {
+    val df = sql("SELECT * FROM parts")
+    val explain = ExplainCommand(df.queryExecution.logical, extended = true)
+    sqlContext.executePlan(explain).executedPlan.executeCollect().foreach {
+      r => assert(!List("testPass", "testUser").exists(r.toString.contains))
+    }
+    // test the JdbcRelation toString output
+    df.queryExecution.analyzed.collect {
+      case r: LogicalRelation => assert(r.relation.toString == "JDBCRelation(TEST.PEOPLE)")
+    }
+  }
+
+  test("test credentials in the connection url are not in the plan output") {
+    val df = sqlContext.read.jdbc(urlWithUserAndPass, "TEST.PEOPLE", new Properties)
+    val explain = ExplainCommand(df.queryExecution.logical, extended = true)
+    sqlContext.executePlan(explain).executedPlan.executeCollect().foreach {
+      r => assert(!List("testPass", "testUser").exists(r.toString.contains))
+    }
+  }
 }
author	sureshthalamati <suresh.thalamati@gmail.com>	2016-01-05 17:48:05 -0800
committer	Michael Armbrust <michael@databricks.com>	2016-01-05 17:48:05 -0800
commit	0d42292f6a2dbe626e8f6a50e6c61dd79533f235 (patch)
tree	abe11b66b3b07bc74b397021a1b8c93b03a81246 /sql
parent	df8bd97520fc67dad95141c5a8cf2e0d5332e693 (diff)
download	spark-0d42292f6a2dbe626e8f6a50e6c61dd79533f235.tar.gz spark-0d42292f6a2dbe626e8f6a50e6c61dd79533f235.tar.bz2 spark-0d42292f6a2dbe626e8f6a50e6c61dd79533f235.zip