From 50a4fa774a0e8a17d7743b33ce8941bf4041144d Mon Sep 17 00:00:00 2001 From: Xiangrui Meng Date: Mon, 8 Sep 2014 18:59:57 -0700 Subject: [SPARK-3443][MLLIB] update default values of tree: Adjust the default values of decision tree, based on the memory requirement discussed in https://github.com/apache/spark/pull/2125 : 1. maxMemoryInMB: 128 -> 256 2. maxBins: 100 -> 32 3. maxDepth: 4 -> 5 (in some example code) jkbradley Author: Xiangrui Meng Closes #2322 from mengxr/tree-defaults and squashes the following commits: cda453a [Xiangrui Meng] fix tests 5900445 [Xiangrui Meng] update comments 8c81831 [Xiangrui Meng] update default values of tree: --- python/pyspark/mllib/tree.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'python/pyspark/mllib/tree.py') diff --git a/python/pyspark/mllib/tree.py b/python/pyspark/mllib/tree.py index a2fade61e9..ccc000ac70 100644 --- a/python/pyspark/mllib/tree.py +++ b/python/pyspark/mllib/tree.py @@ -138,7 +138,7 @@ class DecisionTree(object): @staticmethod def trainClassifier(data, numClasses, categoricalFeaturesInfo, - impurity="gini", maxDepth=4, maxBins=100): + impurity="gini", maxDepth=5, maxBins=32): """ Train a DecisionTreeModel for classification. @@ -170,7 +170,7 @@ class DecisionTree(object): @staticmethod def trainRegressor(data, categoricalFeaturesInfo, - impurity="variance", maxDepth=4, maxBins=100): + impurity="variance", maxDepth=5, maxBins=32): """ Train a DecisionTreeModel for regression. -- cgit v1.2.3