diff options
author | Dongjoon Hyun <dongjoon@apache.org> | 2016-04-26 13:58:29 -0700 |
---|---|---|
committer | Joseph K. Bradley <joseph@databricks.com> | 2016-04-26 13:58:29 -0700 |
commit | e4f3eec5b750389ce3f7c6d023bb7db3b0f8cf29 (patch) | |
tree | 035ddd1e085c4d558495150c34ba7a9fdd7f6294 | |
parent | 7131b03bcf00cdda99e350f697946d4020a0822f (diff) | |
download | spark-e4f3eec5b750389ce3f7c6d023bb7db3b0f8cf29.tar.gz spark-e4f3eec5b750389ce3f7c6d023bb7db3b0f8cf29.tar.bz2 spark-e4f3eec5b750389ce3f7c6d023bb7db3b0f8cf29.zip |
[SPARK-14907][MLLIB] Use repartition in GLMRegressionModel.save
## What changes were proposed in this pull request?
This PR changes `GLMRegressionModel.save` function like the following code that is similar to other algorithms' parquet write.
```
- val dataRDD: DataFrame = sc.parallelize(Seq(data), 1).toDF()
- // TODO: repartition with 1 partition after SPARK-5532 gets fixed
- dataRDD.write.parquet(Loader.dataPath(path))
+ sqlContext.createDataFrame(Seq(data)).repartition(1).write.parquet(Loader.dataPath(path))
```
## How was this patch tested?
Manual.
Author: Dongjoon Hyun <dongjoon@apache.org>
Closes #12676 from dongjoon-hyun/SPARK-14907.
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala | 5 |
1 files changed, 1 insertions, 4 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala index a6e1767fe2..7696fdf2dc 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala @@ -48,7 +48,6 @@ private[regression] object GLMRegressionModel { weights: Vector, intercept: Double): Unit = { val sqlContext = SQLContext.getOrCreate(sc) - import sqlContext.implicits._ // Create JSON metadata. val metadata = compact(render( @@ -58,9 +57,7 @@ private[regression] object GLMRegressionModel { // Create Parquet data. val data = Data(weights, intercept) - val dataRDD: DataFrame = sc.parallelize(Seq(data), 1).toDF() - // TODO: repartition with 1 partition after SPARK-5532 gets fixed - dataRDD.write.parquet(Loader.dataPath(path)) + sqlContext.createDataFrame(Seq(data)).repartition(1).write.parquet(Loader.dataPath(path)) } /** |