[SPARK-12517] add default RDD name for one created via sc.textFile

The feature was first added at commit: 7b877b27053bfb7092e250e01a3b887e1b50a109 but was later removed (probably by mistake) at commit: fc8b58195afa67fbb75b4c8303e022f703cbf007. This change sets the default path of RDDs created via sc.textFile(...) to the path argument. Here is the symptom: * Using spark-1.5.2-bin-hadoop2.6: scala> sc.textFile("/home/root/.bashrc").name res5: String = null scala> sc.binaryFiles("/home/root/.bashrc").name res6: String = /home/root/.bashrc * while using Spark 1.3.1: scala> sc.textFile("/home/root/.bashrc").name res0: String = /home/root/.bashrc scala> sc.binaryFiles("/home/root/.bashrc").name res1: String = /home/root/.bashrc Author: Yaron Weinsberg <wyaron@gmail.com> Author: yaron <yaron@il.ibm.com> Closes #10456 from wyaron/master.
author: Yaron Weinsberg <wyaron@gmail.com> 2015-12-29 05:19:11 +0900
committer: Kousuke Saruta <sarutak@oss.nttdata.co.jp> 2015-12-29 05:19:11 +0900
commit: 73b70f076d4e22396b7e145f2ce5974fbf788048 (patch)
tree: e9aaa63177a5d9529c3349dc2b52a217f288f4d3 /core
parent: fd50df413fbb3b7528cdff311cc040a6212340b9 (diff)
download: spark-73b70f076d4e22396b7e145f2ce5974fbf788048.tar.gz
spark-73b70f076d4e22396b7e145f2ce5974fbf788048.tar.bz2
spark-73b70f076d4e22396b7e145f2ce5974fbf788048.zip
2 files changed, 27 insertions, 2 deletions
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index d506782b73..bbdc9158d8 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -836,7 +836,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
       minPartitions: Int = defaultMinPartitions): RDD[String] = withScope {
     assertNotStopped()
     hadoopFile(path, classOf[TextInputFormat], classOf[LongWritable], classOf[Text],
-      minPartitions).map(pair => pair._2.toString)
+      minPartitions).map(pair => pair._2.toString).setName(path)
   }
 
   /**
@@ -885,7 +885,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
       classOf[Text],
       classOf[Text],
       updateConf,
-      minPartitions).setName(path).map(record => (record._1.toString, record._2.toString))
+      minPartitions).map(record => (record._1.toString, record._2.toString)).setName(path)
   }
 
   /**
diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
index d4f2ea8765..172ef050cc 100644
--- a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
@@ -274,6 +274,31 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext {
     }
   }
 
+  test("Default path for file based RDDs is properly set (SPARK-12517)") {
+    sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local"))
+
+    // Test filetextFile, wholeTextFiles, binaryFiles, hadoopFile and
+    // newAPIHadoopFile for setting the default path as the RDD name
+    val mockPath = "default/path/for/"
+
+    var targetPath = mockPath + "textFile"
+    assert(sc.textFile(targetPath).name === targetPath)
+
+    targetPath = mockPath + "wholeTextFiles"
+    assert(sc.wholeTextFiles(targetPath).name === targetPath)
+
+    targetPath = mockPath + "binaryFiles"
+    assert(sc.binaryFiles(targetPath).name === targetPath)
+
+    targetPath = mockPath + "hadoopFile"
+    assert(sc.hadoopFile(targetPath).name === targetPath)
+
+    targetPath = mockPath + "newAPIHadoopFile"
+    assert(sc.newAPIHadoopFile(targetPath).name === targetPath)
+
+    sc.stop()
+  }
+
   test("calling multiple sc.stop() must not throw any exception") {
     noException should be thrownBy {
       sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local"))
author	Yaron Weinsberg <wyaron@gmail.com>	2015-12-29 05:19:11 +0900
committer	Kousuke Saruta <sarutak@oss.nttdata.co.jp>	2015-12-29 05:19:11 +0900
commit	73b70f076d4e22396b7e145f2ce5974fbf788048 (patch)
tree	e9aaa63177a5d9529c3349dc2b52a217f288f4d3 /core
parent	fd50df413fbb3b7528cdff311cc040a6212340b9 (diff)
download	spark-73b70f076d4e22396b7e145f2ce5974fbf788048.tar.gz spark-73b70f076d4e22396b7e145f2ce5974fbf788048.tar.bz2 spark-73b70f076d4e22396b7e145f2ce5974fbf788048.zip