diff options
author | hyukjinkwon <gurwls223@gmail.com> | 2016-09-07 19:24:03 +0900 |
---|---|---|
committer | Kousuke Saruta <sarutak@oss.nttdata.co.jp> | 2016-09-07 19:24:03 +0900 |
commit | 6b41195bca65de6236168d96758f93b85f1dd7ca (patch) | |
tree | 039cd03fcbf3901c2ca839030d6695c9fd8e8bca | |
parent | 3ce3a282c8463408f9a2db93c1748e8df8087e07 (diff) | |
download | spark-6b41195bca65de6236168d96758f93b85f1dd7ca.tar.gz spark-6b41195bca65de6236168d96758f93b85f1dd7ca.tar.bz2 spark-6b41195bca65de6236168d96758f93b85f1dd7ca.zip |
[SPARK-17339][SPARKR][CORE] Fix some R tests and use Path.toUri in SparkContext for Windows paths in SparkR
## What changes were proposed in this pull request?
This PR fixes the Windows path issues in several APIs. Please refer https://issues.apache.org/jira/browse/SPARK-17339 for more details.
## How was this patch tested?
Tests via AppVeyor CI - https://ci.appveyor.com/project/HyukjinKwon/spark/build/82-SPARK-17339-fix-r
Also, manually,
![2016-09-06 3 14 38](https://cloud.githubusercontent.com/assets/6477701/18263406/b93a98be-7444-11e6-9521-b28ee65a4771.png)
Author: hyukjinkwon <gurwls223@gmail.com>
Closes #14960 from HyukjinKwon/SPARK-17339.
-rw-r--r-- | R/pkg/inst/tests/testthat/test_mllib.R | 14 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/SparkContext.scala | 4 |
2 files changed, 12 insertions, 6 deletions
diff --git a/R/pkg/inst/tests/testthat/test_mllib.R b/R/pkg/inst/tests/testthat/test_mllib.R index ca25f2c7e8..ac896cfbcf 100644 --- a/R/pkg/inst/tests/testthat/test_mllib.R +++ b/R/pkg/inst/tests/testthat/test_mllib.R @@ -22,6 +22,11 @@ context("MLlib functions") # Tests for MLlib functions in SparkR sparkSession <- sparkR.session(enableHiveSupport = FALSE) +absoluteSparkPath <- function(x) { + sparkHome <- sparkR.conf("spark.home") + file.path(sparkHome, x) +} + test_that("formula of spark.glm", { training <- suppressWarnings(createDataFrame(iris)) # directly calling the spark API @@ -354,7 +359,8 @@ test_that("spark.kmeans", { }) test_that("spark.mlp", { - df <- read.df("data/mllib/sample_multiclass_classification_data.txt", source = "libsvm") + df <- read.df(absoluteSparkPath("data/mllib/sample_multiclass_classification_data.txt"), + source = "libsvm") model <- spark.mlp(df, blockSize = 128, layers = c(4, 5, 4, 3), solver = "l-bfgs", maxIter = 100, tol = 0.5, stepSize = 1, seed = 1) @@ -616,7 +622,7 @@ test_that("spark.gaussianMixture", { }) test_that("spark.lda with libsvm", { - text <- read.df("data/mllib/sample_lda_libsvm_data.txt", source = "libsvm") + text <- read.df(absoluteSparkPath("data/mllib/sample_lda_libsvm_data.txt"), source = "libsvm") model <- spark.lda(text, optimizer = "em") stats <- summary(model, 10) @@ -652,7 +658,7 @@ test_that("spark.lda with libsvm", { }) test_that("spark.lda with text input", { - text <- read.text("data/mllib/sample_lda_data.txt") + text <- read.text(absoluteSparkPath("data/mllib/sample_lda_data.txt")) model <- spark.lda(text, optimizer = "online", features = "value") stats <- summary(model) @@ -688,7 +694,7 @@ test_that("spark.lda with text input", { }) test_that("spark.posterior and spark.perplexity", { - text <- read.text("data/mllib/sample_lda_data.txt") + text <- read.text(absoluteSparkPath("data/mllib/sample_lda_data.txt")) model <- spark.lda(text, features = "value", k = 3) # Assert perplexities are equal diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 744d5d0f7a..4aa795a58a 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -992,7 +992,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli // This is a hack to enforce loading hdfs-site.xml. // See SPARK-11227 for details. - FileSystem.get(new URI(path), hadoopConfiguration) + FileSystem.getLocal(hadoopConfiguration) // A Hadoop configuration can be about 10 KB, which is pretty big, so broadcast it. val confBroadcast = broadcast(new SerializableConfiguration(hadoopConfiguration)) @@ -1081,7 +1081,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli // This is a hack to enforce loading hdfs-site.xml. // See SPARK-11227 for details. - FileSystem.get(new URI(path), hadoopConfiguration) + FileSystem.getLocal(hadoopConfiguration) // The call to NewHadoopJob automatically adds security credentials to conf, // so we don't need to explicitly add them ourselves |