[SPARK-6113] [ML] Small cleanups after original tree API PR

This does a few clean-ups. With this PR, all spark.ml tree components have ```private[ml]``` constructors. CC: mengxr Author: Joseph K. Bradley <joseph@databricks.com> Closes #5567 from jkbradley/dt-api-dt2 and squashes the following commits: 2263b5b [Joseph K. Bradley] Added note about tree example issue. bb9f610 [Joseph K. Bradley] Small cleanups after original tree API PR
author: Joseph K. Bradley <joseph@databricks.com> 2015-04-21 21:44:44 -0700
committer: Xiangrui Meng <meng@databricks.com> 2015-04-21 21:44:44 -0700
commit: 607eff0edfc10a1473fa9713a0500bf09f105c13 (patch)
tree: 534b2b61a9876750dd73fbc9016bf795427459c1 /examples
parent: 70f9f8ff38560967f2c84de77263a5455c45c495 (diff)
download: spark-607eff0edfc10a1473fa9713a0500bf09f105c13.tar.gz
spark-607eff0edfc10a1473fa9713a0500bf09f105c13.tar.bz2
spark-607eff0edfc10a1473fa9713a0500bf09f105c13.zip
1 files changed, 19 insertions, 6 deletions
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala
index 921b396e79..2cd515c89d 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala
@@ -44,6 +44,13 @@ import org.apache.spark.sql.{SQLContext, DataFrame}
  * {{{
  * ./bin/run-example ml.DecisionTreeExample [options]
  * }}}
+ * Note that Decision Trees can take a large amount of memory.  If the run-example command above
+ * fails, try running via spark-submit and specifying the amount of memory as at least 1g.
+ * For local mode, run
+ * {{{
+ * ./bin/spark-submit --class org.apache.spark.examples.ml.DecisionTreeExample --driver-memory 1g
+ *   [examples JAR path] [options]
+ * }}}
  * If you use it as a template to create your own app, please use `spark-submit` to submit your app.
  */
 object DecisionTreeExample {
@@ -70,7 +77,7 @@ object DecisionTreeExample {
     val parser = new OptionParser[Params]("DecisionTreeExample") {
       head("DecisionTreeExample: an example decision tree app.")
       opt[String]("algo")
-        .text(s"algorithm (Classification, Regression), default: ${defaultParams.algo}")
+        .text(s"algorithm (classification, regression), default: ${defaultParams.algo}")
         .action((x, c) => c.copy(algo = x))
       opt[Int]("maxDepth")
         .text(s"max depth of the tree, default: ${defaultParams.maxDepth}")
@@ -222,18 +229,23 @@ object DecisionTreeExample {
     // (1) For classification, re-index classes.
     val labelColName = if (algo == "classification") "indexedLabel" else "label"
     if (algo == "classification") {
-      val labelIndexer = new StringIndexer().setInputCol("labelString").setOutputCol(labelColName)
+      val labelIndexer = new StringIndexer()
+        .setInputCol("labelString")
+        .setOutputCol(labelColName)
       stages += labelIndexer
     }
     // (2) Identify categorical features using VectorIndexer.
     //     Features with more than maxCategories values will be treated as continuous.
-    val featuresIndexer = new VectorIndexer().setInputCol("features")
-      .setOutputCol("indexedFeatures").setMaxCategories(10)
+    val featuresIndexer = new VectorIndexer()
+      .setInputCol("features")
+      .setOutputCol("indexedFeatures")
+      .setMaxCategories(10)
     stages += featuresIndexer
     // (3) Learn DecisionTree
     val dt = algo match {
       case "classification" =>
-        new DecisionTreeClassifier().setFeaturesCol("indexedFeatures")
+        new DecisionTreeClassifier()
+          .setFeaturesCol("indexedFeatures")
           .setLabelCol(labelColName)
           .setMaxDepth(params.maxDepth)
           .setMaxBins(params.maxBins)
@@ -242,7 +254,8 @@ object DecisionTreeExample {
           .setCacheNodeIds(params.cacheNodeIds)
           .setCheckpointInterval(params.checkpointInterval)
       case "regression" =>
-        new DecisionTreeRegressor().setFeaturesCol("indexedFeatures")
+        new DecisionTreeRegressor()
+          .setFeaturesCol("indexedFeatures")
           .setLabelCol(labelColName)
           .setMaxDepth(params.maxDepth)
           .setMaxBins(params.maxBins)
author	Joseph K. Bradley <joseph@databricks.com>	2015-04-21 21:44:44 -0700
committer	Xiangrui Meng <meng@databricks.com>	2015-04-21 21:44:44 -0700
commit	607eff0edfc10a1473fa9713a0500bf09f105c13 (patch)
tree	534b2b61a9876750dd73fbc9016bf795427459c1 /examples
parent	70f9f8ff38560967f2c84de77263a5455c45c495 (diff)
download	spark-607eff0edfc10a1473fa9713a0500bf09f105c13.tar.gz spark-607eff0edfc10a1473fa9713a0500bf09f105c13.tar.bz2 spark-607eff0edfc10a1473fa9713a0500bf09f105c13.zip