[SPARK-3381] [MLlib] Eliminate bins for unordered features in DecisionTrees

For unordered features, it is sufficient to use splits since the threshold of the split corresponds the threshold of the HighSplit of the bin and there is no use of the LowSplit. Author: MechCoder <manojkumarsivaraj334@gmail.com> Closes #4231 from MechCoder/spark-3381 and squashes the following commits: 58c19a5 [MechCoder] COSMIT c274b74 [MechCoder] Remove unordered feature calculation in labeledPointToTreePoint b2b9b89 [MechCoder] COSMIT d3ee042 [MechCoder] [SPARK-3381] [MLlib] Eliminate bins for unordered features
author: MechCoder <manojkumarsivaraj334@gmail.com> 2015-02-17 11:19:23 -0800
committer: Joseph K. Bradley <joseph@databricks.com> 2015-02-17 11:19:23 -0800
commit: 9b746f380869b54d673e3758ca5e4475f76c864a (patch)
tree: 9ba48802eaa0e716419ad31e25880f47908eb306 /mllib/src/test
parent: b271c265b742fa6947522eda4592e9e6a7fd1f3a (diff)
download: spark-9b746f380869b54d673e3758ca5e4475f76c864a.tar.gz
spark-9b746f380869b54d673e3758ca5e4475f76c864a.tar.bz2
spark-9b746f380869b54d673e3758ca5e4475f76c864a.zip
1 files changed, 1 insertions, 36 deletions
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala
index 7b1aed5ffe..4c162df810 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala
@@ -190,7 +190,7 @@ class DecisionTreeSuite extends FunSuite with MLlibTestSparkContext {
     assert(splits.length === 2)
     assert(bins.length === 2)
     assert(splits(0).length === 3)
-    assert(bins(0).length === 6)
+    assert(bins(0).length === 0)
 
     // Expecting 2^2 - 1 = 3 bins/splits
     assert(splits(0)(0).feature === 0)
@@ -228,41 +228,6 @@ class DecisionTreeSuite extends FunSuite with MLlibTestSparkContext {
     assert(splits(1)(2).categories.contains(0.0))
     assert(splits(1)(2).categories.contains(1.0))
 
-    // Check bins.
-
-    assert(bins(0)(0).category === Double.MinValue)
-    assert(bins(0)(0).lowSplit.categories.length === 0)
-    assert(bins(0)(0).highSplit.categories.length === 1)
-    assert(bins(0)(0).highSplit.categories.contains(0.0))
-    assert(bins(1)(0).category === Double.MinValue)
-    assert(bins(1)(0).lowSplit.categories.length === 0)
-    assert(bins(1)(0).highSplit.categories.length === 1)
-    assert(bins(1)(0).highSplit.categories.contains(0.0))
-
-    assert(bins(0)(1).category === Double.MinValue)
-    assert(bins(0)(1).lowSplit.categories.length === 1)
-    assert(bins(0)(1).lowSplit.categories.contains(0.0))
-    assert(bins(0)(1).highSplit.categories.length === 1)
-    assert(bins(0)(1).highSplit.categories.contains(1.0))
-    assert(bins(1)(1).category === Double.MinValue)
-    assert(bins(1)(1).lowSplit.categories.length === 1)
-    assert(bins(1)(1).lowSplit.categories.contains(0.0))
-    assert(bins(1)(1).highSplit.categories.length === 1)
-    assert(bins(1)(1).highSplit.categories.contains(1.0))
-
-    assert(bins(0)(2).category === Double.MinValue)
-    assert(bins(0)(2).lowSplit.categories.length === 1)
-    assert(bins(0)(2).lowSplit.categories.contains(1.0))
-    assert(bins(0)(2).highSplit.categories.length === 2)
-    assert(bins(0)(2).highSplit.categories.contains(1.0))
-    assert(bins(0)(2).highSplit.categories.contains(0.0))
-    assert(bins(1)(2).category === Double.MinValue)
-    assert(bins(1)(2).lowSplit.categories.length === 1)
-    assert(bins(1)(2).lowSplit.categories.contains(1.0))
-    assert(bins(1)(2).highSplit.categories.length === 2)
-    assert(bins(1)(2).highSplit.categories.contains(1.0))
-    assert(bins(1)(2).highSplit.categories.contains(0.0))
-
   }
 
   test("Multiclass classification with ordered categorical features: split and bin calculations") {
author	MechCoder <manojkumarsivaraj334@gmail.com>	2015-02-17 11:19:23 -0800
committer	Joseph K. Bradley <joseph@databricks.com>	2015-02-17 11:19:23 -0800
commit	9b746f380869b54d673e3758ca5e4475f76c864a (patch)
tree	9ba48802eaa0e716419ad31e25880f47908eb306 /mllib/src/test
parent	b271c265b742fa6947522eda4592e9e6a7fd1f3a (diff)
download	spark-9b746f380869b54d673e3758ca5e4475f76c864a.tar.gz spark-9b746f380869b54d673e3758ca5e4475f76c864a.tar.bz2 spark-9b746f380869b54d673e3758ca5e4475f76c864a.zip