aboutsummaryrefslogtreecommitdiff
path: root/mllib/src/test
diff options
context:
space:
mode:
authorMechCoder <manojkumarsivaraj334@gmail.com>2015-02-17 11:19:23 -0800
committerJoseph K. Bradley <joseph@databricks.com>2015-02-17 11:19:23 -0800
commit9b746f380869b54d673e3758ca5e4475f76c864a (patch)
tree9ba48802eaa0e716419ad31e25880f47908eb306 /mllib/src/test
parentb271c265b742fa6947522eda4592e9e6a7fd1f3a (diff)
downloadspark-9b746f380869b54d673e3758ca5e4475f76c864a.tar.gz
spark-9b746f380869b54d673e3758ca5e4475f76c864a.tar.bz2
spark-9b746f380869b54d673e3758ca5e4475f76c864a.zip
[SPARK-3381] [MLlib] Eliminate bins for unordered features in DecisionTrees
For unordered features, it is sufficient to use splits since the threshold of the split corresponds the threshold of the HighSplit of the bin and there is no use of the LowSplit. Author: MechCoder <manojkumarsivaraj334@gmail.com> Closes #4231 from MechCoder/spark-3381 and squashes the following commits: 58c19a5 [MechCoder] COSMIT c274b74 [MechCoder] Remove unordered feature calculation in labeledPointToTreePoint b2b9b89 [MechCoder] COSMIT d3ee042 [MechCoder] [SPARK-3381] [MLlib] Eliminate bins for unordered features
Diffstat (limited to 'mllib/src/test')
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala37
1 files changed, 1 insertions, 36 deletions
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala
index 7b1aed5ffe..4c162df810 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala
@@ -190,7 +190,7 @@ class DecisionTreeSuite extends FunSuite with MLlibTestSparkContext {
assert(splits.length === 2)
assert(bins.length === 2)
assert(splits(0).length === 3)
- assert(bins(0).length === 6)
+ assert(bins(0).length === 0)
// Expecting 2^2 - 1 = 3 bins/splits
assert(splits(0)(0).feature === 0)
@@ -228,41 +228,6 @@ class DecisionTreeSuite extends FunSuite with MLlibTestSparkContext {
assert(splits(1)(2).categories.contains(0.0))
assert(splits(1)(2).categories.contains(1.0))
- // Check bins.
-
- assert(bins(0)(0).category === Double.MinValue)
- assert(bins(0)(0).lowSplit.categories.length === 0)
- assert(bins(0)(0).highSplit.categories.length === 1)
- assert(bins(0)(0).highSplit.categories.contains(0.0))
- assert(bins(1)(0).category === Double.MinValue)
- assert(bins(1)(0).lowSplit.categories.length === 0)
- assert(bins(1)(0).highSplit.categories.length === 1)
- assert(bins(1)(0).highSplit.categories.contains(0.0))
-
- assert(bins(0)(1).category === Double.MinValue)
- assert(bins(0)(1).lowSplit.categories.length === 1)
- assert(bins(0)(1).lowSplit.categories.contains(0.0))
- assert(bins(0)(1).highSplit.categories.length === 1)
- assert(bins(0)(1).highSplit.categories.contains(1.0))
- assert(bins(1)(1).category === Double.MinValue)
- assert(bins(1)(1).lowSplit.categories.length === 1)
- assert(bins(1)(1).lowSplit.categories.contains(0.0))
- assert(bins(1)(1).highSplit.categories.length === 1)
- assert(bins(1)(1).highSplit.categories.contains(1.0))
-
- assert(bins(0)(2).category === Double.MinValue)
- assert(bins(0)(2).lowSplit.categories.length === 1)
- assert(bins(0)(2).lowSplit.categories.contains(1.0))
- assert(bins(0)(2).highSplit.categories.length === 2)
- assert(bins(0)(2).highSplit.categories.contains(1.0))
- assert(bins(0)(2).highSplit.categories.contains(0.0))
- assert(bins(1)(2).category === Double.MinValue)
- assert(bins(1)(2).lowSplit.categories.length === 1)
- assert(bins(1)(2).lowSplit.categories.contains(1.0))
- assert(bins(1)(2).highSplit.categories.length === 2)
- assert(bins(1)(2).highSplit.categories.contains(1.0))
- assert(bins(1)(2).highSplit.categories.contains(0.0))
-
}
test("Multiclass classification with ordered categorical features: split and bin calculations") {