[SPARK-1439, SPARK-1440] Generate unified Scaladoc across projects and Javadocs

I used the sbt-unidoc plugin (https://github.com/sbt/sbt-unidoc) to create a unified Scaladoc of our public packages, and generate Javadocs as well. One limitation is that I haven't found an easy way to exclude packages in the Javadoc; there is a SBT task that identifies Java sources to run javadoc on, but it's been very difficult to modify it from outside to change what is set in the unidoc package. Some SBT-savvy people should help with this. The Javadoc site also lacks package-level descriptions and things like that, so we may want to look into that. We may decide not to post these right now if it's too limited compared to the Scala one. Example of the built doc site: http://people.csail.mit.edu/matei/spark-unified-docs/ Author: Matei Zaharia <matei@databricks.com> This patch had conflicts when merged, resolved by Committer: Patrick Wendell <pwendell@gmail.com> Closes #457 from mateiz/better-docs and squashes the following commits: a63d4a3 [Matei Zaharia] Skip Java/Scala API docs for Python package 5ea1f43 [Matei Zaharia] Fix links to Java classes in Java guide, fix some JS for scrolling to anchors on page load f05abc0 [Matei Zaharia] Don't include java.lang package names 995e992 [Matei Zaharia] Skip internal packages and class names with $ in JavaDoc a14a93c [Matei Zaharia] typo 76ce64d [Matei Zaharia] Add groups to Javadoc index page, and a first package-info.java ed6f994 [Matei Zaharia] Generate JavaDoc as well, add titles, update doc site to use unified docs acb993d [Matei Zaharia] Add Unidoc plugin for the projects we want Unidoced
author: Matei Zaharia <matei@databricks.com> 2014-04-21 21:57:40 -0700
committer: Patrick Wendell <pwendell@gmail.com> 2014-04-21 21:57:40 -0700
commit: fc7838470465474f777bd17791c1bb5f9c348521 (patch)
tree: 6809dfd66ebafa6dced2018585a3a1f9ba270d53 /project
parent: 04c37b6f749dc2418cc28c89964cdc687dfcbd51 (diff)
download: spark-fc7838470465474f777bd17791c1bb5f9c348521.tar.gz
spark-fc7838470465474f777bd17791c1bb5f9c348521.tar.bz2
spark-fc7838470465474f777bd17791c1bb5f9c348521.zip
2 files changed, 63 insertions, 12 deletions
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 33f9d644ca..f115f0d3a0 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -23,6 +23,8 @@ import AssemblyKeys._
 import scala.util.Properties
 import org.scalastyle.sbt.ScalastylePlugin.{Settings => ScalaStyleSettings}
 import com.typesafe.tools.mima.plugin.MimaKeys.previousArtifact
+import sbtunidoc.Plugin._
+import UnidocKeys._
 
 import scala.collection.JavaConversions._
 
@@ -31,6 +33,7 @@ import scala.collection.JavaConversions._
 
 object SparkBuild extends Build {
   val SPARK_VERSION = "1.0.0-SNAPSHOT"
+  val SPARK_VERSION_SHORT = SPARK_VERSION.replaceAll("-SNAPSHOT", "")
 
   // Hadoop version to build against. For example, "1.0.4" for Apache releases, or
   // "2.0.0-mr1-cdh4.2.0" for Cloudera Hadoop. Note that these variables can be set
@@ -184,12 +187,17 @@ object SparkBuild extends Build {
     // Show full stack trace and duration in test cases.
     testOptions in Test += Tests.Argument("-oDF"),
     // Remove certain packages from Scaladoc
-    scalacOptions in (Compile,doc) := Seq("-groups", "-skip-packages", Seq(
-      "akka",
-      "org.apache.spark.network",
-      "org.apache.spark.deploy",
-      "org.apache.spark.util.collection"
-      ).mkString(":")),
+    scalacOptions in (Compile, doc) := Seq(
+      "-groups",
+      "-skip-packages", Seq(
+        "akka",
+        "org.apache.spark.api.python",
+        "org.apache.spark.network",
+        "org.apache.spark.deploy",
+        "org.apache.spark.util.collection"
+      ).mkString(":"),
+      "-doc-title", "Spark " + SPARK_VERSION_SHORT + " ScalaDoc"
+    ),
 
     // Only allow one test at a time, even across projects, since they run in the same JVM
     concurrentRestrictions in Global += Tags.limit(Tags.Test, 1),
@@ -283,7 +291,7 @@ object SparkBuild extends Build {
     publishMavenStyle in MavenCompile := true,
     publishLocal in MavenCompile <<= publishTask(publishLocalConfiguration in MavenCompile, deliverLocal),
     publishLocalBoth <<= Seq(publishLocal in MavenCompile, publishLocal).dependOn
-  ) ++ net.virtualvoid.sbt.graph.Plugin.graphSettings ++ ScalaStyleSettings
+  ) ++ net.virtualvoid.sbt.graph.Plugin.graphSettings ++ ScalaStyleSettings ++ genjavadocSettings
 
   val akkaVersion = "2.2.3-shaded-protobuf"
   val chillVersion = "0.3.1"
@@ -349,15 +357,57 @@ object SparkBuild extends Build {
     libraryDependencies ++= maybeAvro
   )
 
-  def rootSettings = sharedSettings ++ Seq(
-    publish := {}
+  // Create a colon-separate package list adding "org.apache.spark" in front of all of them,
+  // for easier specification of JavaDoc package groups
+  def packageList(names: String*): String = {
+    names.map(s => "org.apache.spark." + s).mkString(":")
+  }
+
+  def rootSettings = sharedSettings ++ scalaJavaUnidocSettings ++ Seq(
+    publish := {},
+
+    unidocProjectFilter in (ScalaUnidoc, unidoc) :=
+      inAnyProject -- inProjects(repl, examples, tools, yarn, yarnAlpha),
+    unidocProjectFilter in (JavaUnidoc, unidoc) :=
+      inAnyProject -- inProjects(repl, examples, bagel, graphx, catalyst, tools, yarn, yarnAlpha),
+
+    // Skip class names containing $ and some internal packages in Javadocs
+    unidocAllSources in (JavaUnidoc, unidoc) := {
+      (unidocAllSources in (JavaUnidoc, unidoc)).value
+        .map(_.filterNot(_.getName.contains("$")))
+        .map(_.filterNot(_.getCanonicalPath.contains("akka")))
+        .map(_.filterNot(_.getCanonicalPath.contains("deploy")))
+        .map(_.filterNot(_.getCanonicalPath.contains("network")))
+        .map(_.filterNot(_.getCanonicalPath.contains("executor")))
+        .map(_.filterNot(_.getCanonicalPath.contains("python")))
+        .map(_.filterNot(_.getCanonicalPath.contains("collection")))
+    },
+
+    // Javadoc options: create a window title, and group key packages on index page
+    javacOptions in doc := Seq(
+      "-windowtitle", "Spark " + SPARK_VERSION_SHORT + " JavaDoc",
+      "-public",
+      "-group", "Core Java API", packageList("api.java", "api.java.function"),
+      "-group", "Spark Streaming", packageList(
+        "streaming.api.java", "streaming.flume", "streaming.kafka",
+        "streaming.mqtt", "streaming.twitter", "streaming.zeromq"
+      ),
+      "-group", "MLlib", packageList(
+        "mllib.classification", "mllib.clustering", "mllib.evaluation.binary", "mllib.linalg",
+        "mllib.linalg.distributed", "mllib.optimization", "mllib.rdd", "mllib.recommendation",
+        "mllib.regression", "mllib.stat", "mllib.tree", "mllib.tree.configuration",
+        "mllib.tree.impurity", "mllib.tree.model", "mllib.util"
+      ),
+      "-group", "Spark SQL", packageList("sql.api.java", "sql.hive.api.java"),
+      "-noqualifier", "java.lang"
+    )
   )
 
   def replSettings = sharedSettings ++ Seq(
     name := "spark-repl",
-   libraryDependencies <+= scalaVersion(v => "org.scala-lang"  % "scala-compiler" % v ),
-   libraryDependencies <+= scalaVersion(v => "org.scala-lang"  % "jline"          % v ),
-   libraryDependencies <+= scalaVersion(v => "org.scala-lang"  % "scala-reflect"  % v )
+    libraryDependencies <+= scalaVersion(v => "org.scala-lang"  % "scala-compiler" % v),
+    libraryDependencies <+= scalaVersion(v => "org.scala-lang"  % "jline"          % v),
+    libraryDependencies <+= scalaVersion(v => "org.scala-lang"  % "scala-reflect"  % v)
   )
 
   def examplesSettings = sharedSettings ++ Seq(
diff --git a/project/plugins.sbt b/project/plugins.sbt
index c25a25863d..0cd16fd5be 100644
--- a/project/plugins.sbt
+++ b/project/plugins.sbt
@@ -23,3 +23,4 @@ addSbtPlugin("com.typesafe" % "sbt-mima-plugin" % "0.1.6")
 
 addSbtPlugin("com.alpinenow" % "junit_xml_listener" % "0.5.0")
 
+addSbtPlugin("com.eed3si9n" % "sbt-unidoc" % "0.3.0")
author	Matei Zaharia <matei@databricks.com>	2014-04-21 21:57:40 -0700
committer	Patrick Wendell <pwendell@gmail.com>	2014-04-21 21:57:40 -0700
commit	fc7838470465474f777bd17791c1bb5f9c348521 (patch)
tree	6809dfd66ebafa6dced2018585a3a1f9ba270d53 /project
parent	04c37b6f749dc2418cc28c89964cdc687dfcbd51 (diff)
download	spark-fc7838470465474f777bd17791c1bb5f9c348521.tar.gz spark-fc7838470465474f777bd17791c1bb5f9c348521.tar.bz2 spark-fc7838470465474f777bd17791c1bb5f9c348521.zip