aboutsummaryrefslogtreecommitdiff
path: root/core/src/main/scala/org/apache
diff options
context:
space:
mode:
authorThomas Graves <tgraves@apache.org>2016-01-08 14:38:19 -0600
committerTom Graves <tgraves@yahoo-inc.com>2016-01-08 14:38:19 -0600
commit553fd7b912a32476b481fd3f80c1d0664b6c6484 (patch)
treee3cbc5f693c18175be7bb06ebce274bf757a9f57 /core/src/main/scala/org/apache
parent8c70cb4c62a353bea99f37965dfc829c4accc391 (diff)
downloadspark-553fd7b912a32476b481fd3f80c1d0664b6c6484.tar.gz
spark-553fd7b912a32476b481fd3f80c1d0664b6c6484.tar.bz2
spark-553fd7b912a32476b481fd3f80c1d0664b6c6484.zip
[SPARK-12654] sc.wholeTextFiles with spark.hadoop.cloneConf=true fail…
…s on secure Hadoop https://issues.apache.org/jira/browse/SPARK-12654 So the bug here is that WholeTextFileRDD.getPartitions has: val conf = getConf in getConf if the cloneConf=true it creates a new Hadoop Configuration. Then it uses that to create a new newJobContext. The newJobContext will copy credentials around, but credentials are only present in a JobConf not in a Hadoop Configuration. So basically when it is cloning the hadoop configuration its changing it from a JobConf to Configuration and dropping the credentials that were there. NewHadoopRDD just uses the conf passed in for the getPartitions (not getConf) which is why it works. Author: Thomas Graves <tgraves@staydecay.corp.gq1.yahoo.com> Closes #10651 from tgravescs/SPARK-12654.
Diffstat (limited to 'core/src/main/scala/org/apache')
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala9
1 files changed, 8 insertions, 1 deletions
diff --git a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
index 146609ae39..7a11978304 100644
--- a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
@@ -24,6 +24,7 @@ import scala.reflect.ClassTag
import org.apache.hadoop.conf.{Configurable, Configuration}
import org.apache.hadoop.io.Writable
+import org.apache.hadoop.mapred.JobConf
import org.apache.hadoop.mapreduce._
import org.apache.hadoop.mapreduce.lib.input.{CombineFileSplit, FileSplit}
import org.apache.hadoop.mapreduce.task.{JobContextImpl, TaskAttemptContextImpl}
@@ -93,7 +94,13 @@ class NewHadoopRDD[K, V](
// issues, this cloning is disabled by default.
NewHadoopRDD.CONFIGURATION_INSTANTIATION_LOCK.synchronized {
logDebug("Cloning Hadoop Configuration")
- new Configuration(conf)
+ // The Configuration passed in is actually a JobConf and possibly contains credentials.
+ // To keep those credentials properly we have to create a new JobConf not a Configuration.
+ if (conf.isInstanceOf[JobConf]) {
+ new JobConf(conf)
+ } else {
+ new Configuration(conf)
+ }
}
} else {
conf