diff options
author | krishnakalyan3 <krishnakalyan3@gmail.com> | 2016-07-27 15:37:38 +0200 |
---|---|---|
committer | Nick Pentreath <nickp@za.ibm.com> | 2016-07-27 15:37:38 +0200 |
commit | 7e8279fde176b08687adf2b410693b35cfbd4b46 (patch) | |
tree | 9342eb9868f5f5a1ed7358a8883605c31a15d24d /python | |
parent | 045fc3606698b017a4addf5277808883e6fe76b6 (diff) | |
download | spark-7e8279fde176b08687adf2b410693b35cfbd4b46.tar.gz spark-7e8279fde176b08687adf2b410693b35cfbd4b46.tar.bz2 spark-7e8279fde176b08687adf2b410693b35cfbd4b46.zip |
[SPARK-15254][DOC] Improve ML pipeline Cross Validation Scaladoc & PyDoc
## What changes were proposed in this pull request?
Updated ML pipeline Cross Validation Scaladoc & PyDoc.
## How was this patch tested?
Documentation update
(If this patch involves UI changes, please attach a screenshot; otherwise, remove this)
Author: krishnakalyan3 <krishnakalyan3@gmail.com>
Closes #13894 from krishnakalyan3/kfold-cv.
Diffstat (limited to 'python')
-rw-r--r-- | python/pyspark/ml/tuning.py | 13 |
1 files changed, 11 insertions, 2 deletions
diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py index 298314d46c..7f967e5463 100644 --- a/python/pyspark/ml/tuning.py +++ b/python/pyspark/ml/tuning.py @@ -143,7 +143,13 @@ class ValidatorParams(HasSeed): class CrossValidator(Estimator, ValidatorParams): """ - K-fold cross validation. + + K-fold cross validation performs model selection by splitting the dataset into a set of + non-overlapping randomly partitioned folds which are used as separate training and test datasets + e.g., with k=3 folds, K-fold cross validation will generate 3 (training, test) dataset pairs, + each of which uses 2/3 of the data for training and 1/3 for testing. Each fold is used as the + test set exactly once. + >>> from pyspark.ml.classification import LogisticRegression >>> from pyspark.ml.evaluation import BinaryClassificationEvaluator @@ -260,7 +266,10 @@ class CrossValidator(Estimator, ValidatorParams): class CrossValidatorModel(Model, ValidatorParams): """ - Model from k-fold cross validation. + + CrossValidatorModel contains the model with the highest average cross-validation + metric across folds and uses this model to transform input data. CrossValidatorModel + also tracks the metrics for each param map evaluated. .. versionadded:: 1.4.0 """ |