[SPARK-13265][ML] Refactoring of basic ML import/export for other file system besides HDFS

jkbradley I tried to improve the function to export a model. When I tried to export a model to S3 under Spark 1.6, we couldn't do that. So, it should offer S3 besides HDFS. Can you review it when you have time? Thanks! Author: Yu ISHIKAWA <yuu.ishikawa@gmail.com> Closes #11151 from yu-iskw/SPARK-13265.
author: Yu ISHIKAWA <yuu.ishikawa@gmail.com> 2016-02-11 15:00:23 -0800
committer: Xiangrui Meng <meng@databricks.com> 2016-02-11 15:00:23 -0800
commit: efb65e09bcfa4542348f5cd37fe5c14047b862e5 (patch)
tree: 96a689bcee02ad50e5966267749a3a0a80a58658 /mllib
parent: c86009ceb9613201b41319245526a13b1f0b5451 (diff)
download: spark-efb65e09bcfa4542348f5cd37fe5c14047b862e5.tar.gz
spark-efb65e09bcfa4542348f5cd37fe5c14047b862e5.tar.bz2
spark-efb65e09bcfa4542348f5cd37fe5c14047b862e5.zip
1 files changed, 7 insertions, 6 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
index 8484b1f801..7b2504361a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
@@ -19,10 +19,10 @@ package org.apache.spark.ml.util
 
 import java.io.IOException
 
-import org.apache.hadoop.fs.{FileSystem, Path}
+import org.apache.hadoop.fs.Path
 import org.json4s._
-import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
+import org.json4s.JsonDSL._
 
 import org.apache.spark.{Logging, SparkContext}
 import org.apache.spark.annotation.{Experimental, Since}
@@ -75,13 +75,14 @@ abstract class MLWriter extends BaseReadWrite with Logging {
   @throws[IOException]("If the input path already exists but overwrite is not enabled.")
   def save(path: String): Unit = {
     val hadoopConf = sc.hadoopConfiguration
-    val fs = FileSystem.get(hadoopConf)
-    val p = new Path(path)
-    if (fs.exists(p)) {
+    val outputPath = new Path(path)
+    val fs = outputPath.getFileSystem(hadoopConf)
+    val qualifiedOutputPath = outputPath.makeQualified(fs.getUri, fs.getWorkingDirectory)
+    if (fs.exists(qualifiedOutputPath)) {
       if (shouldOverwrite) {
         logInfo(s"Path $path already exists. It will be overwritten.")
         // TODO: Revert back to the original content if save is not successful.
-        fs.delete(p, true)
+        fs.delete(qualifiedOutputPath, true)
       } else {
         throw new IOException(
           s"Path $path already exists. Please use write.overwrite().save(path) to overwrite it.")
author	Yu ISHIKAWA <yuu.ishikawa@gmail.com>	2016-02-11 15:00:23 -0800
committer	Xiangrui Meng <meng@databricks.com>	2016-02-11 15:00:23 -0800
commit	efb65e09bcfa4542348f5cd37fe5c14047b862e5 (patch)
tree	96a689bcee02ad50e5966267749a3a0a80a58658 /mllib
parent	c86009ceb9613201b41319245526a13b1f0b5451 (diff)
download	spark-efb65e09bcfa4542348f5cd37fe5c14047b862e5.tar.gz spark-efb65e09bcfa4542348f5cd37fe5c14047b862e5.tar.bz2 spark-efb65e09bcfa4542348f5cd37fe5c14047b862e5.zip