[SPARK-12046][DOC] Fixes various ScalaDoc/JavaDoc issues

This PR backports PR #10039 to master Author: Cheng Lian <lian@databricks.com> Closes #10063 from liancheng/spark-12046.doc-fix.master.
author: Cheng Lian <lian@databricks.com> 2015-12-01 10:21:31 -0800
committer: Michael Armbrust <michael@databricks.com> 2015-12-01 10:21:31 -0800
commit: 69dbe6b40df35d488d4ee343098ac70d00bbdafb (patch)
tree: a9e966eeb648874ba5c09d4c5855828d928d8f15 /core
parent: 1401166576c7018c5f9c31e0a6703d5fb16ea339 (diff)
download: spark-69dbe6b40df35d488d4ee343098ac70d00bbdafb.tar.gz
spark-69dbe6b40df35d488d4ee343098ac70d00bbdafb.tar.bz2
spark-69dbe6b40df35d488d4ee343098ac70d00bbdafb.zip
13 files changed, 53 insertions, 49 deletions
diff --git a/core/src/main/java/org/apache/spark/api/java/function/Function4.java b/core/src/main/java/org/apache/spark/api/java/function/Function4.java
index fd727d6486..9c35a22ca9 100644
--- a/core/src/main/java/org/apache/spark/api/java/function/Function4.java
+++ b/core/src/main/java/org/apache/spark/api/java/function/Function4.java
@@ -23,5 +23,5 @@ import java.io.Serializable;
  * A four-argument function that takes arguments of type T1, T2, T3 and T4 and returns an R.
  */
 public interface Function4<T1, T2, T3, T4, R> extends Serializable {
-  public R call(T1 v1, T2 v2, T3 v3, T4 v4) throws Exception;
+  R call(T1 v1, T2 v2, T3 v3, T4 v4) throws Exception;
 }
diff --git a/core/src/main/java/org/apache/spark/api/java/function/VoidFunction.java b/core/src/main/java/org/apache/spark/api/java/function/VoidFunction.java
index 2a10435b75..f30d42ee57 100644
--- a/core/src/main/java/org/apache/spark/api/java/function/VoidFunction.java
+++ b/core/src/main/java/org/apache/spark/api/java/function/VoidFunction.java
@@ -23,5 +23,5 @@ import java.io.Serializable;
  * A function with no return value.
  */
 public interface VoidFunction<T> extends Serializable {
-  public void call(T t) throws Exception;
+  void call(T t) throws Exception;
 }
diff --git a/core/src/main/java/org/apache/spark/api/java/function/VoidFunction2.java b/core/src/main/java/org/apache/spark/api/java/function/VoidFunction2.java
index 6c576ab678..da9ae1c9c5 100644
--- a/core/src/main/java/org/apache/spark/api/java/function/VoidFunction2.java
+++ b/core/src/main/java/org/apache/spark/api/java/function/VoidFunction2.java
@@ -23,5 +23,5 @@ import java.io.Serializable;
  * A two-argument function that takes arguments of type T1 and T2 with no return value.
  */
 public interface VoidFunction2<T1, T2> extends Serializable {
-  public void call(T1 v1, T2 v2) throws Exception;
+  void call(T1 v1, T2 v2) throws Exception;
 }
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
index 0b0c6e5bb8..87deaf20e2 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
@@ -215,13 +215,13 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
   /**
    * Generic function to combine the elements for each key using a custom set of aggregation
    * functions. Turns a JavaPairRDD[(K, V)] into a result of type JavaPairRDD[(K, C)], for a
-   * "combined type" C * Note that V and C can be different -- for example, one might group an
+   * "combined type" C. Note that V and C can be different -- for example, one might group an
    * RDD of type (Int, Int) into an RDD of type (Int, List[Int]). Users provide three
    * functions:
    *
-   * - `createCombiner`, which turns a V into a C (e.g., creates a one-element list)
-   * - `mergeValue`, to merge a V into a C (e.g., adds it to the end of a list)
-   * - `mergeCombiners`, to combine two C's into a single one.
+   *  - `createCombiner`, which turns a V into a C (e.g., creates a one-element list)
+   *  - `mergeValue`, to merge a V into a C (e.g., adds it to the end of a list)
+   *  - `mergeCombiners`, to combine two C's into a single one.
    *
    * In addition, users can control the partitioning of the output RDD, the serializer that is use
    * for the shuffle, and whether to perform map-side aggregation (if a mapper can produce multiple
@@ -247,13 +247,13 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
   /**
    * Generic function to combine the elements for each key using a custom set of aggregation
    * functions. Turns a JavaPairRDD[(K, V)] into a result of type JavaPairRDD[(K, C)], for a
-   * "combined type" C * Note that V and C can be different -- for example, one might group an
+   * "combined type" C. Note that V and C can be different -- for example, one might group an
    * RDD of type (Int, Int) into an RDD of type (Int, List[Int]). Users provide three
    * functions:
    *
-   * - `createCombiner`, which turns a V into a C (e.g., creates a one-element list)
-   * - `mergeValue`, to merge a V into a C (e.g., adds it to the end of a list)
-   * - `mergeCombiners`, to combine two C's into a single one.
+   *  - `createCombiner`, which turns a V into a C (e.g., creates a one-element list)
+   *  - `mergeValue`, to merge a V into a C (e.g., adds it to the end of a list)
+   *  - `mergeCombiners`, to combine two C's into a single one.
    *
    * In addition, users can control the partitioning of the output RDD. This method automatically
    * uses map-side aggregation in shuffling the RDD.
diff --git a/core/src/main/scala/org/apache/spark/memory/package.scala b/core/src/main/scala/org/apache/spark/memory/package.scala
index 564e30d2ff..3d00cd9cb6 100644
--- a/core/src/main/scala/org/apache/spark/memory/package.scala
+++ b/core/src/main/scala/org/apache/spark/memory/package.scala
@@ -21,13 +21,13 @@ package org.apache.spark
  * This package implements Spark's memory management system. This system consists of two main
  * components, a JVM-wide memory manager and a per-task manager:
  *
- * - [[org.apache.spark.memory.MemoryManager]] manages Spark's overall memory usage within a JVM.
- *   This component implements the policies for dividing the available memory across tasks and for
- *   allocating memory between storage (memory used caching and data transfer) and execution (memory
- *   used by computations, such as shuffles, joins, sorts, and aggregations).
- * - [[org.apache.spark.memory.TaskMemoryManager]] manages the memory allocated by individual tasks.
- *   Tasks interact with TaskMemoryManager and never directly interact with the JVM-wide
- *   MemoryManager.
+ *  - [[org.apache.spark.memory.MemoryManager]] manages Spark's overall memory usage within a JVM.
+ *    This component implements the policies for dividing the available memory across tasks and for
+ *    allocating memory between storage (memory used caching and data transfer) and execution
+ *    (memory used by computations, such as shuffles, joins, sorts, and aggregations).
+ *  - [[org.apache.spark.memory.TaskMemoryManager]] manages the memory allocated by individual
+ *    tasks. Tasks interact with TaskMemoryManager and never directly interact with the JVM-wide
+ *    MemoryManager.
  *
  * Internally, each of these components have additional abstractions for memory bookkeeping:
  *
diff --git a/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
index 935c3babd8..3a0ca1d813 100644
--- a/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
@@ -70,7 +70,7 @@ private[spark] class CoGroupPartition(
  *
  * Note: This is an internal API. We recommend users use RDD.cogroup(...) instead of
  * instantiating this directly.
-
+ *
  * @param rdds parent RDDs.
  * @param part partitioner used to partition the shuffle output
  */
diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
index c6181902ac..44d195587a 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
@@ -65,9 +65,9 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
    * Note that V and C can be different -- for example, one might group an RDD of type
    * (Int, Int) into an RDD of type (Int, Seq[Int]). Users provide three functions:
    *
-   * - `createCombiner`, which turns a V into a C (e.g., creates a one-element list)
-   * - `mergeValue`, to merge a V into a C (e.g., adds it to the end of a list)
-   * - `mergeCombiners`, to combine two C's into a single one.
+   *  - `createCombiner`, which turns a V into a C (e.g., creates a one-element list)
+   *  - `mergeValue`, to merge a V into a C (e.g., adds it to the end of a list)
+   *  - `mergeCombiners`, to combine two C's into a single one.
    *
    * In addition, users can control the partitioning of the output RDD, and whether to perform
    * map-side aggregation (if a mapper can produce multiple items with the same key).
diff --git a/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala
index a013c3f66a..3ef506e156 100644
--- a/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala
@@ -86,7 +86,7 @@ class ShuffledRDD[K: ClassTag, V: ClassTag, C: ClassTag](
     Array.tabulate[Partition](part.numPartitions)(i => new ShuffledRDDPartition(i))
   }
 
-  override def getPreferredLocations(partition: Partition): Seq[String] = {
+  override protected def getPreferredLocations(partition: Partition): Seq[String] = {
     val tracker = SparkEnv.get.mapOutputTracker.asInstanceOf[MapOutputTrackerMaster]
     val dep = dependencies.head.asInstanceOf[ShuffleDependency[K, V, C]]
     tracker.getPreferredLocationsForShuffle(dep, partition.index)
diff --git a/core/src/main/scala/org/apache/spark/scheduler/Task.scala b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
index 4fb32ba8cb..2fcd5aa57d 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/Task.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
@@ -33,8 +33,9 @@ import org.apache.spark.util.Utils
 
 /**
  * A unit of execution. We have two kinds of Task's in Spark:
- * - [[org.apache.spark.scheduler.ShuffleMapTask]]
- * - [[org.apache.spark.scheduler.ResultTask]]
+ *
+ *  - [[org.apache.spark.scheduler.ShuffleMapTask]]
+ *  - [[org.apache.spark.scheduler.ResultTask]]
  *
  * A Spark job consists of one or more stages. The very last stage in a job consists of multiple
  * ResultTasks, while earlier stages consist of ShuffleMapTasks. A ResultTask executes the task
diff --git a/core/src/main/scala/org/apache/spark/serializer/SerializationDebugger.scala b/core/src/main/scala/org/apache/spark/serializer/SerializationDebugger.scala
index a1b1e1631e..e2951d8a3e 100644
--- a/core/src/main/scala/org/apache/spark/serializer/SerializationDebugger.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/SerializationDebugger.scala
@@ -53,12 +53,13 @@ private[spark] object SerializationDebugger extends Logging {
   /**
    * Find the path leading to a not serializable object. This method is modeled after OpenJDK's
    * serialization mechanism, and handles the following cases:
-   * - primitives
-   * - arrays of primitives
-   * - arrays of non-primitive objects
-   * - Serializable objects
-   * - Externalizable objects
-   * - writeReplace
+   *
+   *  - primitives
+   *  - arrays of primitives
+   *  - arrays of non-primitive objects
+   *  - Serializable objects
+   *  - Externalizable objects
+   *  - writeReplace
    *
    * It does not yet handle writeObject override, but that shouldn't be too hard to do either.
    */
diff --git a/core/src/main/scala/org/apache/spark/util/Vector.scala b/core/src/main/scala/org/apache/spark/util/Vector.scala
index 2ed827eab4..6b3fa84919 100644
--- a/core/src/main/scala/org/apache/spark/util/Vector.scala
+++ b/core/src/main/scala/org/apache/spark/util/Vector.scala
@@ -122,6 +122,7 @@ class Vector(val elements: Array[Double]) extends Serializable {
   override def toString: String = elements.mkString("(", ", ", ")")
 }
 
+@deprecated("Use Vectors.dense from Spark's mllib.linalg package instead.", "1.0.0")
 object Vector {
   def apply(elements: Array[Double]): Vector = new Vector(elements)
 
diff --git a/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala b/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala
index 2440139ac9..44b1d90667 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala
@@ -67,24 +67,24 @@ import org.apache.spark.storage.{BlockId, DiskBlockObjectWriter}
  *
  * At a high level, this class works internally as follows:
  *
- * - We repeatedly fill up buffers of in-memory data, using either a PartitionedAppendOnlyMap if
- *   we want to combine by key, or a PartitionedPairBuffer if we don't.
- *   Inside these buffers, we sort elements by partition ID and then possibly also by key.
- *   To avoid calling the partitioner multiple times with each key, we store the partition ID
- *   alongside each record.
+ *  - We repeatedly fill up buffers of in-memory data, using either a PartitionedAppendOnlyMap if
+ *    we want to combine by key, or a PartitionedPairBuffer if we don't.
+ *    Inside these buffers, we sort elements by partition ID and then possibly also by key.
+ *    To avoid calling the partitioner multiple times with each key, we store the partition ID
+ *    alongside each record.
  *
- * - When each buffer reaches our memory limit, we spill it to a file. This file is sorted first
- *   by partition ID and possibly second by key or by hash code of the key, if we want to do
- *   aggregation. For each file, we track how many objects were in each partition in memory, so we
- *   don't have to write out the partition ID for every element.
+ *  - When each buffer reaches our memory limit, we spill it to a file. This file is sorted first
+ *    by partition ID and possibly second by key or by hash code of the key, if we want to do
+ *    aggregation. For each file, we track how many objects were in each partition in memory, so we
+ *    don't have to write out the partition ID for every element.
  *
- * - When the user requests an iterator or file output, the spilled files are merged, along with
- *   any remaining in-memory data, using the same sort order defined above (unless both sorting
- *   and aggregation are disabled). If we need to aggregate by key, we either use a total ordering
- *   from the ordering parameter, or read the keys with the same hash code and compare them with
- *   each other for equality to merge values.
+ *  - When the user requests an iterator or file output, the spilled files are merged, along with
+ *    any remaining in-memory data, using the same sort order defined above (unless both sorting
+ *    and aggregation are disabled). If we need to aggregate by key, we either use a total ordering
+ *    from the ordering parameter, or read the keys with the same hash code and compare them with
+ *    each other for equality to merge values.
  *
- * - Users are expected to call stop() at the end to delete all the intermediate files.
+ *  - Users are expected to call stop() at the end to delete all the intermediate files.
  */
 private[spark] class ExternalSorter[K, V, C](
     context: TaskContext,
diff --git a/core/src/main/scala/org/apache/spark/util/collection/WritablePartitionedPairCollection.scala b/core/src/main/scala/org/apache/spark/util/collection/WritablePartitionedPairCollection.scala
index 38848e9018..5232c2bd8d 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/WritablePartitionedPairCollection.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/WritablePartitionedPairCollection.scala
@@ -23,9 +23,10 @@ import org.apache.spark.storage.DiskBlockObjectWriter
 
 /**
  * A common interface for size-tracking collections of key-value pairs that
- * - Have an associated partition for each key-value pair.
- * - Support a memory-efficient sorted iterator
- * - Support a WritablePartitionedIterator for writing the contents directly as bytes.
+ *
+ *  - Have an associated partition for each key-value pair.
+ *  - Support a memory-efficient sorted iterator
+ *  - Support a WritablePartitionedIterator for writing the contents directly as bytes.
  */
 private[spark] trait WritablePartitionedPairCollection[K, V] {
   /**
author	Cheng Lian <lian@databricks.com>	2015-12-01 10:21:31 -0800
committer	Michael Armbrust <michael@databricks.com>	2015-12-01 10:21:31 -0800
commit	69dbe6b40df35d488d4ee343098ac70d00bbdafb (patch)
tree	a9e966eeb648874ba5c09d4c5855828d928d8f15 /core
parent	1401166576c7018c5f9c31e0a6703d5fb16ea339 (diff)
download	spark-69dbe6b40df35d488d4ee343098ac70d00bbdafb.tar.gz spark-69dbe6b40df35d488d4ee343098ac70d00bbdafb.tar.bz2 spark-69dbe6b40df35d488d4ee343098ac70d00bbdafb.zip