From 09da43d514dc4487af88056404953a1f8fd8bee1 Mon Sep 17 00:00:00 2001 From: "Joseph K. Bradley" Date: Fri, 29 Apr 2016 20:51:24 -0700 Subject: [SPARK-13786][ML][PYTHON] Removed save/load for python tuning ## What changes were proposed in this pull request? Per discussion on [https://github.com/apache/spark/pull/12604], this removes ML persistence for Python tuning (TrainValidationSplit, CrossValidator, and their Models) since they do not handle nesting easily. This support should be re-designed and added in the next release. ## How was this patch tested? Removed unit test elements saving and loading the tuning algorithms, but kept tests to save and load their bestModel fields. Author: Joseph K. Bradley Closes #12782 from jkbradley/remove-python-tuning-saveload. --- python/pyspark/ml/tuning.py | 244 +------------------------------------------- 1 file changed, 4 insertions(+), 240 deletions(-) (limited to 'python/pyspark/ml/tuning.py') diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py index 22f9680cab..eb1f029ebb 100644 --- a/python/pyspark/ml/tuning.py +++ b/python/pyspark/ml/tuning.py @@ -23,7 +23,6 @@ from pyspark import since, keyword_only from pyspark.ml import Estimator, Model from pyspark.ml.param import Params, Param, TypeConverters from pyspark.ml.param.shared import HasSeed -from pyspark.ml.util import JavaMLWriter, JavaMLReader, MLReadable, MLWritable from pyspark.ml.wrapper import JavaParams from pyspark.sql.functions import rand from pyspark.mllib.common import inherit_doc, _py2java @@ -141,37 +140,8 @@ class ValidatorParams(HasSeed): """ return self.getOrDefault(self.evaluator) - @classmethod - def _from_java_impl(cls, java_stage): - """ - Return Python estimator, estimatorParamMaps, and evaluator from a Java ValidatorParams. - """ - - # Load information from java_stage to the instance. - estimator = JavaParams._from_java(java_stage.getEstimator()) - evaluator = JavaParams._from_java(java_stage.getEvaluator()) - epms = [estimator._transfer_param_map_from_java(epm) - for epm in java_stage.getEstimatorParamMaps()] - return estimator, epms, evaluator - - def _to_java_impl(self): - """ - Return Java estimator, estimatorParamMaps, and evaluator from this Python instance. - """ - - gateway = SparkContext._gateway - cls = SparkContext._jvm.org.apache.spark.ml.param.ParamMap - - java_epms = gateway.new_array(cls, len(self.getEstimatorParamMaps())) - for idx, epm in enumerate(self.getEstimatorParamMaps()): - java_epms[idx] = self.getEstimator()._transfer_param_map_to_java(epm) - - java_estimator = self.getEstimator()._to_java() - java_evaluator = self.getEvaluator()._to_java() - return java_estimator, java_epms, java_evaluator - -class CrossValidator(Estimator, ValidatorParams, MLReadable, MLWritable): +class CrossValidator(Estimator, ValidatorParams): """ K-fold cross validation. @@ -288,58 +258,8 @@ class CrossValidator(Estimator, ValidatorParams, MLReadable, MLWritable): newCV.setEvaluator(self.getEvaluator().copy(extra)) return newCV - @since("2.0.0") - def write(self): - """Returns an MLWriter instance for this ML instance.""" - return JavaMLWriter(self) - - @since("2.0.0") - def save(self, path): - """Save this ML instance to the given path, a shortcut of `write().save(path)`.""" - self.write().save(path) - - @classmethod - @since("2.0.0") - def read(cls): - """Returns an MLReader instance for this class.""" - return JavaMLReader(cls) - @classmethod - def _from_java(cls, java_stage): - """ - Given a Java CrossValidator, create and return a Python wrapper of it. - Used for ML persistence. - """ - - estimator, epms, evaluator = super(CrossValidator, cls)._from_java_impl(java_stage) - numFolds = java_stage.getNumFolds() - seed = java_stage.getSeed() - # Create a new instance of this stage. - py_stage = cls(estimator=estimator, estimatorParamMaps=epms, evaluator=evaluator, - numFolds=numFolds, seed=seed) - py_stage._resetUid(java_stage.uid()) - return py_stage - - def _to_java(self): - """ - Transfer this instance to a Java CrossValidator. Used for ML persistence. - - :return: Java object equivalent to this instance. - """ - - estimator, epms, evaluator = super(CrossValidator, self)._to_java_impl() - - _java_obj = JavaParams._new_java_obj("org.apache.spark.ml.tuning.CrossValidator", self.uid) - _java_obj.setEstimatorParamMaps(epms) - _java_obj.setEvaluator(evaluator) - _java_obj.setEstimator(estimator) - _java_obj.setSeed(self.getSeed()) - _java_obj.setNumFolds(self.getNumFolds()) - - return _java_obj - - -class CrossValidatorModel(Model, ValidatorParams, MLReadable, MLWritable): +class CrossValidatorModel(Model, ValidatorParams): """ Model from k-fold cross validation. @@ -372,59 +292,8 @@ class CrossValidatorModel(Model, ValidatorParams, MLReadable, MLWritable): avgMetrics = self.avgMetrics return CrossValidatorModel(bestModel, avgMetrics) - @since("2.0.0") - def write(self): - """Returns an MLWriter instance for this ML instance.""" - return JavaMLWriter(self) - - @since("2.0.0") - def save(self, path): - """Save this ML instance to the given path, a shortcut of `write().save(path)`.""" - self.write().save(path) - @classmethod - @since("2.0.0") - def read(cls): - """Returns an MLReader instance for this class.""" - return JavaMLReader(cls) - - @classmethod - def _from_java(cls, java_stage): - """ - Given a Java CrossValidatorModel, create and return a Python wrapper of it. - Used for ML persistence. - """ - - # Load information from java_stage to the instance. - bestModel = JavaParams._from_java(java_stage.bestModel()) - avgMetrics = list(java_stage.avgMetrics()) - estimator, epms, evaluator = super(CrossValidatorModel, cls)._from_java_impl(java_stage) - # Create a new instance of this stage. - py_stage = cls(bestModel=bestModel, avgMetrics=avgMetrics)\ - .setEstimator(estimator).setEstimatorParamMaps(epms).setEvaluator(evaluator) - py_stage._resetUid(java_stage.uid()) - return py_stage - - def _to_java(self): - """ - Transfer this instance to a Java CrossValidatorModel. Used for ML persistence. - - :return: Java object equivalent to this instance. - """ - - _java_obj = JavaParams._new_java_obj("org.apache.spark.ml.tuning.CrossValidatorModel", - self.uid, - self.bestModel._to_java(), - self.avgMetrics) - estimator, epms, evaluator = super(CrossValidatorModel, self)._to_java_impl() - - _java_obj.set("evaluator", evaluator) - _java_obj.set("estimator", estimator) - _java_obj.set("estimatorParamMaps", epms) - return _java_obj - - -class TrainValidationSplit(Estimator, ValidatorParams, MLReadable, MLWritable): +class TrainValidationSplit(Estimator, ValidatorParams): """ Train-Validation-Split. @@ -535,59 +404,8 @@ class TrainValidationSplit(Estimator, ValidatorParams, MLReadable, MLWritable): newTVS.setEvaluator(self.getEvaluator().copy(extra)) return newTVS - @since("2.0.0") - def write(self): - """Returns an MLWriter instance for this ML instance.""" - return JavaMLWriter(self) - - @since("2.0.0") - def save(self, path): - """Save this ML instance to the given path, a shortcut of `write().save(path)`.""" - self.write().save(path) - - @classmethod - @since("2.0.0") - def read(cls): - """Returns an MLReader instance for this class.""" - return JavaMLReader(cls) - - @classmethod - def _from_java(cls, java_stage): - """ - Given a Java TrainValidationSplit, create and return a Python wrapper of it. - Used for ML persistence. - """ - - estimator, epms, evaluator = super(TrainValidationSplit, cls)._from_java_impl(java_stage) - trainRatio = java_stage.getTrainRatio() - seed = java_stage.getSeed() - # Create a new instance of this stage. - py_stage = cls(estimator=estimator, estimatorParamMaps=epms, evaluator=evaluator, - trainRatio=trainRatio, seed=seed) - py_stage._resetUid(java_stage.uid()) - return py_stage - - def _to_java(self): - """ - Transfer this instance to a Java TrainValidationSplit. Used for ML persistence. - - :return: Java object equivalent to this instance. - """ - - estimator, epms, evaluator = super(TrainValidationSplit, self)._to_java_impl() - - _java_obj = JavaParams._new_java_obj("org.apache.spark.ml.tuning.TrainValidationSplit", - self.uid) - _java_obj.setEstimatorParamMaps(epms) - _java_obj.setEvaluator(evaluator) - _java_obj.setEstimator(estimator) - _java_obj.setTrainRatio(self.getTrainRatio()) - _java_obj.setSeed(self.getSeed()) - return _java_obj - - -class TrainValidationSplitModel(Model, ValidatorParams, MLReadable, MLWritable): +class TrainValidationSplitModel(Model, ValidatorParams): """ Model from train validation split. @@ -617,60 +435,6 @@ class TrainValidationSplitModel(Model, ValidatorParams, MLReadable, MLWritable): extra = dict() return TrainValidationSplitModel(self.bestModel.copy(extra)) - @since("2.0.0") - def write(self): - """Returns an MLWriter instance for this ML instance.""" - return JavaMLWriter(self) - - @since("2.0.0") - def save(self, path): - """Save this ML instance to the given path, a shortcut of `write().save(path)`.""" - self.write().save(path) - - @classmethod - @since("2.0.0") - def read(cls): - """Returns an MLReader instance for this class.""" - return JavaMLReader(cls) - - @classmethod - def _from_java(cls, java_stage): - """ - Given a Java TrainValidationSplitModel, create and return a Python wrapper of it. - Used for ML persistence. - """ - - # Load information from java_stage to the instance. - bestModel = JavaParams._from_java(java_stage.bestModel()) - estimator, epms, evaluator = \ - super(TrainValidationSplitModel, cls)._from_java_impl(java_stage) - # Create a new instance of this stage. - py_stage = cls(bestModel=bestModel)\ - .setEstimator(estimator).setEstimatorParamMaps(epms).setEvaluator(evaluator) - py_stage._resetUid(java_stage.uid()) - return py_stage - - def _to_java(self): - """ - Transfer this instance to a Java TrainValidationSplitModel. Used for ML persistence. - - :return: Java object equivalent to this instance. - """ - - sc = SparkContext._active_spark_context - - _java_obj = JavaParams._new_java_obj( - "org.apache.spark.ml.tuning.TrainValidationSplitModel", - self.uid, - self.bestModel._to_java(), - _py2java(sc, [])) - estimator, epms, evaluator = super(TrainValidationSplitModel, self)._to_java_impl() - - _java_obj.set("evaluator", evaluator) - _java_obj.set("estimator", estimator) - _java_obj.set("estimatorParamMaps", epms) - return _java_obj - if __name__ == "__main__": import doctest -- cgit v1.2.3