diff options
author | Josh Rosen <joshrosen@databricks.com> | 2015-07-14 16:08:17 -0700 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2015-07-14 16:08:17 -0700 |
commit | 11e5c372862ec00e57460b37ccfee51c6d93c5f7 (patch) | |
tree | e12b1ab4a40a6b3f090669490c1563970bb7a4aa /sql/hive | |
parent | 740b034f1ca885a386f5a9ef7e0c81c714b047ff (diff) | |
download | spark-11e5c372862ec00e57460b37ccfee51c6d93c5f7.tar.gz spark-11e5c372862ec00e57460b37ccfee51c6d93c5f7.tar.bz2 spark-11e5c372862ec00e57460b37ccfee51c6d93c5f7.zip |
[SPARK-8962] Add Scalastyle rule to ban direct use of Class.forName; fix existing uses
This pull request adds a Scalastyle regex rule which fails the style check if `Class.forName` is used directly. `Class.forName` always loads classes from the default / system classloader, but in a majority of cases, we should be using Spark's own `Utils.classForName` instead, which tries to load classes from the current thread's context classloader and falls back to the classloader which loaded Spark when the context classloader is not defined.
<!-- Reviewable:start -->
[<img src="https://reviewable.io/review_button.png" height=40 alt="Review on Reviewable"/>](https://reviewable.io/reviews/apache/spark/7350)
<!-- Reviewable:end -->
Author: Josh Rosen <joshrosen@databricks.com>
Closes #7350 from JoshRosen/ban-Class.forName and squashes the following commits:
e3e96f7 [Josh Rosen] Merge remote-tracking branch 'origin/master' into ban-Class.forName
c0b7885 [Josh Rosen] Hopefully fix the last two cases
d707ba7 [Josh Rosen] Fix uses of Class.forName that I missed in my first cleanup pass
046470d [Josh Rosen] Merge remote-tracking branch 'origin/master' into ban-Class.forName
62882ee [Josh Rosen] Fix uses of Class.forName or add exclusion.
d9abade [Josh Rosen] Add stylechecker rule to ban uses of Class.forName
Diffstat (limited to 'sql/hive')
3 files changed, 9 insertions, 12 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala index d65d29daac..dc35569085 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala @@ -78,9 +78,7 @@ class HadoopTableReader( override def makeRDDForTable(hiveTable: HiveTable): RDD[InternalRow] = makeRDDForTable( hiveTable, - Class.forName( - relation.tableDesc.getSerdeClassName, true, Utils.getContextOrSparkClassLoader) - .asInstanceOf[Class[Deserializer]], + Utils.classForName(relation.tableDesc.getSerdeClassName).asInstanceOf[Class[Deserializer]], filterOpt = None) /** diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ClientWrapper.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ClientWrapper.scala index 1f280c6429..8adda54754 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ClientWrapper.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ClientWrapper.scala @@ -21,9 +21,6 @@ import java.io.{File, PrintStream} import java.util.{Map => JMap} import javax.annotation.concurrent.GuardedBy -import org.apache.spark.sql.catalyst.expressions.Expression -import org.apache.spark.util.CircularBuffer - import scala.collection.JavaConversions._ import scala.language.reflectiveCalls @@ -37,7 +34,9 @@ import org.apache.hadoop.hive.ql.session.SessionState import org.apache.hadoop.hive.ql.{Driver, metadata} import org.apache.spark.Logging +import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.execution.QueryExecutionException +import org.apache.spark.util.{CircularBuffer, Utils} /** @@ -249,10 +248,10 @@ private[hive] class ClientWrapper( } private def toInputFormat(name: String) = - Class.forName(name).asInstanceOf[Class[_ <: org.apache.hadoop.mapred.InputFormat[_, _]]] + Utils.classForName(name).asInstanceOf[Class[_ <: org.apache.hadoop.mapred.InputFormat[_, _]]] private def toOutputFormat(name: String) = - Class.forName(name) + Utils.classForName(name) .asInstanceOf[Class[_ <: org.apache.hadoop.hive.ql.io.HiveOutputFormat[_, _]]] private def toQlTable(table: HiveTable): metadata.Table = { diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala index 917900e5f4..bee2ecbedb 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala @@ -120,8 +120,8 @@ object SparkSubmitClassLoaderTest extends Logging { logInfo("Testing load classes at the driver side.") // First, we load classes at driver side. try { - Class.forName(args(0), true, Thread.currentThread().getContextClassLoader) - Class.forName(args(1), true, Thread.currentThread().getContextClassLoader) + Utils.classForName(args(0)) + Utils.classForName(args(1)) } catch { case t: Throwable => throw new Exception("Could not load user class from jar:\n", t) @@ -131,8 +131,8 @@ object SparkSubmitClassLoaderTest extends Logging { val result = df.mapPartitions { x => var exception: String = null try { - Class.forName(args(0), true, Thread.currentThread().getContextClassLoader) - Class.forName(args(1), true, Thread.currentThread().getContextClassLoader) + Utils.classForName(args(0)) + Utils.classForName(args(1)) } catch { case t: Throwable => exception = t + "\n" + t.getStackTraceString |