aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDongjoon Hyun <dongjoon@apache.org>2016-04-26 13:58:29 -0700
committerJoseph K. Bradley <joseph@databricks.com>2016-04-26 13:58:29 -0700
commite4f3eec5b750389ce3f7c6d023bb7db3b0f8cf29 (patch)
tree035ddd1e085c4d558495150c34ba7a9fdd7f6294
parent7131b03bcf00cdda99e350f697946d4020a0822f (diff)
downloadspark-e4f3eec5b750389ce3f7c6d023bb7db3b0f8cf29.tar.gz
spark-e4f3eec5b750389ce3f7c6d023bb7db3b0f8cf29.tar.bz2
spark-e4f3eec5b750389ce3f7c6d023bb7db3b0f8cf29.zip
[SPARK-14907][MLLIB] Use repartition in GLMRegressionModel.save
## What changes were proposed in this pull request? This PR changes `GLMRegressionModel.save` function like the following code that is similar to other algorithms' parquet write. ``` - val dataRDD: DataFrame = sc.parallelize(Seq(data), 1).toDF() - // TODO: repartition with 1 partition after SPARK-5532 gets fixed - dataRDD.write.parquet(Loader.dataPath(path)) + sqlContext.createDataFrame(Seq(data)).repartition(1).write.parquet(Loader.dataPath(path)) ``` ## How was this patch tested? Manual. Author: Dongjoon Hyun <dongjoon@apache.org> Closes #12676 from dongjoon-hyun/SPARK-14907.
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala5
1 files changed, 1 insertions, 4 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala
index a6e1767fe2..7696fdf2dc 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala
@@ -48,7 +48,6 @@ private[regression] object GLMRegressionModel {
weights: Vector,
intercept: Double): Unit = {
val sqlContext = SQLContext.getOrCreate(sc)
- import sqlContext.implicits._
// Create JSON metadata.
val metadata = compact(render(
@@ -58,9 +57,7 @@ private[regression] object GLMRegressionModel {
// Create Parquet data.
val data = Data(weights, intercept)
- val dataRDD: DataFrame = sc.parallelize(Seq(data), 1).toDF()
- // TODO: repartition with 1 partition after SPARK-5532 gets fixed
- dataRDD.write.parquet(Loader.dataPath(path))
+ sqlContext.createDataFrame(Seq(data)).repartition(1).write.parquet(Loader.dataPath(path))
}
/**