diff options
author | Sung Chung <schung@alpinenow.com> | 2014-11-01 16:58:26 -0700 |
---|---|---|
committer | Xiangrui Meng <meng@databricks.com> | 2014-11-01 16:58:26 -0700 |
commit | 56f2c61cde3f5d906c2a58e9af1a661222f2c679 (patch) | |
tree | caa90f8479249480567dd75cec39629ff4f865ab /examples/src | |
parent | d8176b1c2f22247ee724041aefa1af9118cf861d (diff) | |
download | spark-56f2c61cde3f5d906c2a58e9af1a661222f2c679.tar.gz spark-56f2c61cde3f5d906c2a58e9af1a661222f2c679.tar.bz2 spark-56f2c61cde3f5d906c2a58e9af1a661222f2c679.zip |
[SPARK-3161][MLLIB] Adding a node Id caching mechanism for training deci...
...sion trees. jkbradley mengxr chouqin Please review this.
Author: Sung Chung <schung@alpinenow.com>
Closes #2868 from codedeft/SPARK-3161 and squashes the following commits:
5f5a156 [Sung Chung] [SPARK-3161][MLLIB] Adding a node Id caching mechanism for training decision trees.
Diffstat (limited to 'examples/src')
-rw-r--r-- | examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala | 25 |
1 files changed, 23 insertions, 2 deletions
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala index f98730366b..49751a3049 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala @@ -62,7 +62,10 @@ object DecisionTreeRunner { minInfoGain: Double = 0.0, numTrees: Int = 1, featureSubsetStrategy: String = "auto", - fracTest: Double = 0.2) extends AbstractParams[Params] + fracTest: Double = 0.2, + useNodeIdCache: Boolean = false, + checkpointDir: Option[String] = None, + checkpointInterval: Int = 10) extends AbstractParams[Params] def main(args: Array[String]) { val defaultParams = Params() @@ -102,6 +105,21 @@ object DecisionTreeRunner { .text(s"fraction of data to hold out for testing. If given option testInput, " + s"this option is ignored. default: ${defaultParams.fracTest}") .action((x, c) => c.copy(fracTest = x)) + opt[Boolean]("useNodeIdCache") + .text(s"whether to use node Id cache during training, " + + s"default: ${defaultParams.useNodeIdCache}") + .action((x, c) => c.copy(useNodeIdCache = x)) + opt[String]("checkpointDir") + .text(s"checkpoint directory where intermediate node Id caches will be stored, " + + s"default: ${defaultParams.checkpointDir match { + case Some(strVal) => strVal + case None => "None" + }}") + .action((x, c) => c.copy(checkpointDir = Some(x))) + opt[Int]("checkpointInterval") + .text(s"how often to checkpoint the node Id cache, " + + s"default: ${defaultParams.checkpointInterval}") + .action((x, c) => c.copy(checkpointInterval = x)) opt[String]("testInput") .text(s"input path to test dataset. If given, option fracTest is ignored." + s" default: ${defaultParams.testInput}") @@ -236,7 +254,10 @@ object DecisionTreeRunner { maxBins = params.maxBins, numClassesForClassification = numClasses, minInstancesPerNode = params.minInstancesPerNode, - minInfoGain = params.minInfoGain) + minInfoGain = params.minInfoGain, + useNodeIdCache = params.useNodeIdCache, + checkpointDir = params.checkpointDir, + checkpointInterval = params.checkpointInterval) if (params.numTrees == 1) { val startTime = System.nanoTime() val model = DecisionTree.train(training, strategy) |