aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorPeter Rudenko <petro.rudenko@gmail.com>2015-02-15 20:51:32 -0800
committerXiangrui Meng <meng@databricks.com>2015-02-15 20:51:38 -0800
commit9cf7d7088d245b9b41ec78295cd2d6e3e395793d (patch)
tree8e23aac48197a870a20912293e13c15f5803aa17 /mllib
parentdb3c539f20e17e327b2f284bf6fbb3f1abd7fe64 (diff)
downloadspark-9cf7d7088d245b9b41ec78295cd2d6e3e395793d.tar.gz
spark-9cf7d7088d245b9b41ec78295cd2d6e3e395793d.tar.bz2
spark-9cf7d7088d245b9b41ec78295cd2d6e3e395793d.zip
[Ml] SPARK-5796 Don't transform data on a last estimator in Pipeline
If it's a last estimator in Pipeline there's no need to transform data, since there's no next stage that would consume this data. Author: Peter Rudenko <petro.rudenko@gmail.com> Closes #4590 from petro-rudenko/patch-1 and squashes the following commits: d13ec33 [Peter Rudenko] [Ml] SPARK-5796 Don't transform data on a last estimator in Pipeline (cherry picked from commit c78a12c4cc4d4312c4ee1069d3b218882d32d678) Signed-off-by: Xiangrui Meng <meng@databricks.com>
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala4
1 files changed, 3 insertions, 1 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
index bb291e6e1f..5607ed21af 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
@@ -114,7 +114,9 @@ class Pipeline extends Estimator[PipelineModel] {
throw new IllegalArgumentException(
s"Do not support stage $stage of type ${stage.getClass}")
}
- curDataset = transformer.transform(curDataset, paramMap)
+ if (index < indexOfLastEstimator) {
+ curDataset = transformer.transform(curDataset, paramMap)
+ }
transformers += transformer
} else {
transformers += stage.asInstanceOf[Transformer]