aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/tree/impl/DecisionTreeMetadata.scala8
1 files changed, 6 insertions, 2 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/DecisionTreeMetadata.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/DecisionTreeMetadata.scala
index 380291ac22..9fe264656e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/DecisionTreeMetadata.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/DecisionTreeMetadata.scala
@@ -128,9 +128,13 @@ private[spark] object DecisionTreeMetadata extends Logging {
// based on the number of training examples.
if (strategy.categoricalFeaturesInfo.nonEmpty) {
val maxCategoriesPerFeature = strategy.categoricalFeaturesInfo.values.max
+ val maxCategory =
+ strategy.categoricalFeaturesInfo.find(_._2 == maxCategoriesPerFeature).get._1
require(maxCategoriesPerFeature <= maxPossibleBins,
- s"DecisionTree requires maxBins (= $maxPossibleBins) >= max categories " +
- s"in categorical features (= $maxCategoriesPerFeature)")
+ s"DecisionTree requires maxBins (= $maxPossibleBins) to be at least as large as the " +
+ s"number of values in each categorical feature, but categorical feature $maxCategory " +
+ s"has $maxCategoriesPerFeature values. Considering remove this and other categorical " +
+ "features with a large number of values, or add more training examples.")
}
val unorderedFeatures = new mutable.HashSet[Int]()