From e4f3eec5b750389ce3f7c6d023bb7db3b0f8cf29 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Tue, 26 Apr 2016 13:58:29 -0700 Subject: [SPARK-14907][MLLIB] Use repartition in GLMRegressionModel.save ## What changes were proposed in this pull request? This PR changes `GLMRegressionModel.save` function like the following code that is similar to other algorithms' parquet write. ``` - val dataRDD: DataFrame = sc.parallelize(Seq(data), 1).toDF() - // TODO: repartition with 1 partition after SPARK-5532 gets fixed - dataRDD.write.parquet(Loader.dataPath(path)) + sqlContext.createDataFrame(Seq(data)).repartition(1).write.parquet(Loader.dataPath(path)) ``` ## How was this patch tested? Manual. Author: Dongjoon Hyun Closes #12676 from dongjoon-hyun/SPARK-14907. --- .../org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala index a6e1767fe2..7696fdf2dc 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala @@ -48,7 +48,6 @@ private[regression] object GLMRegressionModel { weights: Vector, intercept: Double): Unit = { val sqlContext = SQLContext.getOrCreate(sc) - import sqlContext.implicits._ // Create JSON metadata. val metadata = compact(render( @@ -58,9 +57,7 @@ private[regression] object GLMRegressionModel { // Create Parquet data. val data = Data(weights, intercept) - val dataRDD: DataFrame = sc.parallelize(Seq(data), 1).toDF() - // TODO: repartition with 1 partition after SPARK-5532 gets fixed - dataRDD.write.parquet(Loader.dataPath(path)) + sqlContext.createDataFrame(Seq(data)).repartition(1).write.parquet(Loader.dataPath(path)) } /** -- cgit v1.2.3