4 files changed, 14 insertions, 8 deletions
diff --git a/examples/src/main/python/mllib/decision_tree_runner.py b/examples/src/main/python/mllib/decision_tree_runner.py
index 8efadb5223..db96a7cb37 100755
--- a/examples/src/main/python/mllib/decision_tree_runner.py
+++ b/examples/src/main/python/mllib/decision_tree_runner.py
@@ -124,7 +124,9 @@ if __name__ == "__main__":
     (reindexedData, origToNewLabels) = reindexClassLabels(points)
 
     # Train a classifier.
-    model = DecisionTree.trainClassifier(reindexedData, numClasses=2)
+    categoricalFeaturesInfo={} # no categorical features
+    model = DecisionTree.trainClassifier(reindexedData, numClasses=2,
+                                         categoricalFeaturesInfo=categoricalFeaturesInfo)
     # Print learned tree and stats.
     print "Trained DecisionTree for classification:"
     print "  Model numNodes: %d\n" % model.numNodes()
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
index 6b9a8f72c2..5cdd258f6c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
@@ -22,6 +22,7 @@ import scala.collection.JavaConverters._
 import org.apache.spark.annotation.Experimental
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.Logging
+import org.apache.spark.mllib.rdd.RDDFunctions._
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.tree.configuration.Strategy
 import org.apache.spark.mllib.tree.configuration.Algo._
@@ -826,7 +827,7 @@ object DecisionTree extends Serializable with Logging {
     // Calculate bin aggregates.
     timer.start("aggregation")
     val binAggregates = {
-      input.aggregate(Array.fill[Double](binAggregateLength)(0))(binSeqOp, binCombOp)
+      input.treeAggregate(Array.fill[Double](binAggregateLength)(0))(binSeqOp, binCombOp)
     }
     timer.stop("aggregation")
     logDebug("binAggregates.length = " + binAggregates.length)
diff --git a/python/pyspark/mllib/tree.py b/python/pyspark/mllib/tree.py
index e1a4671709..e9d778df5a 100644
--- a/python/pyspark/mllib/tree.py
+++ b/python/pyspark/mllib/tree.py
@@ -88,7 +88,8 @@ class DecisionTree(object):
                   It will probably be modified for Spark v1.2.
 
     Example usage:
-    >>> from numpy import array, ndarray
+    >>> from numpy import array
+    >>> import sys
     >>> from pyspark.mllib.regression import LabeledPoint
     >>> from pyspark.mllib.tree import DecisionTree
     >>> from pyspark.mllib.linalg import SparseVector
@@ -99,15 +100,15 @@ class DecisionTree(object):
     ...     LabeledPoint(1.0, [2.0]),
     ...     LabeledPoint(1.0, [3.0])
     ... ]
-    >>>
-    >>> model = DecisionTree.trainClassifier(sc.parallelize(data), numClasses=2)
-    >>> print(model)
+    >>> categoricalFeaturesInfo = {} # no categorical features
+    >>> model = DecisionTree.trainClassifier(sc.parallelize(data), numClasses=2,
+    ...                                      categoricalFeaturesInfo=categoricalFeaturesInfo)
+    >>> sys.stdout.write(model)
     DecisionTreeModel classifier
       If (feature 0 <= 0.5)
        Predict: 0.0
       Else (feature 0 > 0.5)
        Predict: 1.0
-
     >>> model.predict(array([1.0])) > 0
     True
     >>> model.predict(array([0.0])) == 0
@@ -119,7 +120,8 @@ class DecisionTree(object):
     ...     LabeledPoint(1.0, SparseVector(2, {1: 2.0}))
     ... ]
     >>>
-    >>> model = DecisionTree.trainRegressor(sc.parallelize(sparse_data))
+    >>> model = DecisionTree.trainRegressor(sc.parallelize(sparse_data),
+    ...                                     categoricalFeaturesInfo=categoricalFeaturesInfo)
     >>> model.predict(array([0.0, 1.0])) == 1
     True
     >>> model.predict(array([0.0, 0.0])) == 0
diff --git a/python/run-tests b/python/run-tests
index 1218edcbd7..a6271e0cf5 100755
--- a/python/run-tests
+++ b/python/run-tests
@@ -79,6 +79,7 @@ run_test "pyspark/mllib/random.py"
 run_test "pyspark/mllib/recommendation.py"
 run_test "pyspark/mllib/regression.py"
 run_test "pyspark/mllib/tests.py"
+run_test "pyspark/mllib/tree.py"
 run_test "pyspark/mllib/util.py"
 
 if [[ $FAILED == 0 ]]; then