aboutsummaryrefslogtreecommitdiff
path: root/external
diff options
context:
space:
mode:
authorhyukjinkwon <gurwls223@gmail.com>2016-11-19 11:24:15 +0000
committerSean Owen <sowen@cloudera.com>2016-11-19 11:24:15 +0000
commitd5b1d5fc80153571c308130833d0c0774de62c92 (patch)
treea194c154699e2edb55c146232c66251d4ac77f18 /external
parentdb9fb9baacbf8640dd37a507b7450db727c7e6ea (diff)
downloadspark-d5b1d5fc80153571c308130833d0c0774de62c92.tar.gz
spark-d5b1d5fc80153571c308130833d0c0774de62c92.tar.bz2
spark-d5b1d5fc80153571c308130833d0c0774de62c92.zip
[SPARK-18445][BUILD][DOCS] Fix the markdown for `Note:`/`NOTE:`/`Note that`/`'''Note:'''` across Scala/Java API documentation
## What changes were proposed in this pull request? It seems in Scala/Java, - `Note:` - `NOTE:` - `Note that` - `'''Note:'''` - `note` This PR proposes to fix those to `note` to be consistent. **Before** - Scala ![2016-11-17 6 16 39](https://cloud.githubusercontent.com/assets/6477701/20383180/1a7aed8c-acf2-11e6-9611-5eaf6d52c2e0.png) - Java ![2016-11-17 6 14 41](https://cloud.githubusercontent.com/assets/6477701/20383096/c8ffc680-acf1-11e6-914a-33460bf1401d.png) **After** - Scala ![2016-11-17 6 16 44](https://cloud.githubusercontent.com/assets/6477701/20383167/09940490-acf2-11e6-937a-0d5e1dc2cadf.png) - Java ![2016-11-17 6 13 39](https://cloud.githubusercontent.com/assets/6477701/20383132/e7c2a57e-acf1-11e6-9c47-b849674d4d88.png) ## How was this patch tested? The notes were found via ```bash grep -r "NOTE: " . | \ # Note:|NOTE:|Note that|'''Note:''' grep -v "// NOTE: " | \ # starting with // does not appear in API documentation. grep -E '.scala|.java' | \ # java/scala files grep -v Suite | \ # exclude tests grep -v Test | \ # exclude tests grep -e 'org.apache.spark.api.java' \ # packages appear in API documenation -e 'org.apache.spark.api.java.function' \ # note that this is a regular expression. So actual matches were mostly `org/apache/spark/api/java/functions ...` -e 'org.apache.spark.api.r' \ ... ``` ```bash grep -r "Note that " . | \ # Note:|NOTE:|Note that|'''Note:''' grep -v "// Note that " | \ # starting with // does not appear in API documentation. grep -E '.scala|.java' | \ # java/scala files grep -v Suite | \ # exclude tests grep -v Test | \ # exclude tests grep -e 'org.apache.spark.api.java' \ # packages appear in API documenation -e 'org.apache.spark.api.java.function' \ -e 'org.apache.spark.api.r' \ ... ``` ```bash grep -r "Note: " . | \ # Note:|NOTE:|Note that|'''Note:''' grep -v "// Note: " | \ # starting with // does not appear in API documentation. grep -E '.scala|.java' | \ # java/scala files grep -v Suite | \ # exclude tests grep -v Test | \ # exclude tests grep -e 'org.apache.spark.api.java' \ # packages appear in API documenation -e 'org.apache.spark.api.java.function' \ -e 'org.apache.spark.api.r' \ ... ``` ```bash grep -r "'''Note:'''" . | \ # Note:|NOTE:|Note that|'''Note:''' grep -v "// '''Note:''' " | \ # starting with // does not appear in API documentation. grep -E '.scala|.java' | \ # java/scala files grep -v Suite | \ # exclude tests grep -v Test | \ # exclude tests grep -e 'org.apache.spark.api.java' \ # packages appear in API documenation -e 'org.apache.spark.api.java.function' \ -e 'org.apache.spark.api.r' \ ... ``` And then fixed one by one comparing with API documentation/access modifiers. After that, manually tested via `jekyll build`. Author: hyukjinkwon <gurwls223@gmail.com> Closes #15889 from HyukjinKwon/SPARK-18437.
Diffstat (limited to 'external')
-rw-r--r--external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala2
-rw-r--r--external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala8
-rw-r--r--external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala60
-rw-r--r--external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDDSuite.scala2
4 files changed, 34 insertions, 38 deletions
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
index 5bcc5124b0..341081a338 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
@@ -279,7 +279,7 @@ private[kafka010] case class KafkaSource(
}
}.toArray
- // Create a RDD that reads from Kafka and get the (key, value) pair as byte arrays.
+ // Create an RDD that reads from Kafka and get the (key, value) pair as byte arrays.
val rdd = new KafkaSourceRDD(
sc, executorKafkaParams, offsetRanges, pollTimeoutMs).map { cr =>
Row(cr.key, cr.value, cr.topic, cr.partition, cr.offset, cr.timestamp, cr.timestampType.id)
diff --git a/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala b/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala
index b17e198077..56f0cb0b16 100644
--- a/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala
+++ b/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala
@@ -223,7 +223,7 @@ object KafkaUtils {
}
/**
- * Create a RDD from Kafka using offset ranges for each topic and partition.
+ * Create an RDD from Kafka using offset ranges for each topic and partition.
*
* @param sc SparkContext object
* @param kafkaParams Kafka <a href="http://kafka.apache.org/documentation.html#configuration">
@@ -255,7 +255,7 @@ object KafkaUtils {
}
/**
- * Create a RDD from Kafka using offset ranges for each topic and partition. This allows you
+ * Create an RDD from Kafka using offset ranges for each topic and partition. This allows you
* specify the Kafka leader to connect to (to optimize fetching) and access the message as well
* as the metadata.
*
@@ -303,7 +303,7 @@ object KafkaUtils {
}
/**
- * Create a RDD from Kafka using offset ranges for each topic and partition.
+ * Create an RDD from Kafka using offset ranges for each topic and partition.
*
* @param jsc JavaSparkContext object
* @param kafkaParams Kafka <a href="http://kafka.apache.org/documentation.html#configuration">
@@ -340,7 +340,7 @@ object KafkaUtils {
}
/**
- * Create a RDD from Kafka using offset ranges for each topic and partition. This allows you
+ * Create an RDD from Kafka using offset ranges for each topic and partition. This allows you
* specify the Kafka leader to connect to (to optimize fetching) and access the message as well
* as the metadata.
*
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala
index a0007d33d6..b2daffa34c 100644
--- a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala
+++ b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala
@@ -33,10 +33,6 @@ object KinesisUtils {
* Create an input stream that pulls messages from a Kinesis stream.
* This uses the Kinesis Client Library (KCL) to pull messages from Kinesis.
*
- * Note: The AWS credentials will be discovered using the DefaultAWSCredentialsProviderChain
- * on the workers. See AWS documentation to understand how DefaultAWSCredentialsProviderChain
- * gets the AWS credentials.
- *
* @param ssc StreamingContext object
* @param kinesisAppName Kinesis application name used by the Kinesis Client Library
* (KCL) to update DynamoDB
@@ -57,6 +53,10 @@ object KinesisUtils {
* StorageLevel.MEMORY_AND_DISK_2 is recommended.
* @param messageHandler A custom message handler that can generate a generic output from a
* Kinesis `Record`, which contains both message data, and metadata.
+ *
+ * @note The AWS credentials will be discovered using the DefaultAWSCredentialsProviderChain
+ * on the workers. See AWS documentation to understand how DefaultAWSCredentialsProviderChain
+ * gets the AWS credentials.
*/
def createStream[T: ClassTag](
ssc: StreamingContext,
@@ -81,10 +81,6 @@ object KinesisUtils {
* Create an input stream that pulls messages from a Kinesis stream.
* This uses the Kinesis Client Library (KCL) to pull messages from Kinesis.
*
- * Note:
- * The given AWS credentials will get saved in DStream checkpoints if checkpointing
- * is enabled. Make sure that your checkpoint directory is secure.
- *
* @param ssc StreamingContext object
* @param kinesisAppName Kinesis application name used by the Kinesis Client Library
* (KCL) to update DynamoDB
@@ -107,6 +103,9 @@ object KinesisUtils {
* Kinesis `Record`, which contains both message data, and metadata.
* @param awsAccessKeyId AWS AccessKeyId (if null, will use DefaultAWSCredentialsProviderChain)
* @param awsSecretKey AWS SecretKey (if null, will use DefaultAWSCredentialsProviderChain)
+ *
+ * @note The given AWS credentials will get saved in DStream checkpoints if checkpointing
+ * is enabled. Make sure that your checkpoint directory is secure.
*/
// scalastyle:off
def createStream[T: ClassTag](
@@ -134,10 +133,6 @@ object KinesisUtils {
* Create an input stream that pulls messages from a Kinesis stream.
* This uses the Kinesis Client Library (KCL) to pull messages from Kinesis.
*
- * Note: The AWS credentials will be discovered using the DefaultAWSCredentialsProviderChain
- * on the workers. See AWS documentation to understand how DefaultAWSCredentialsProviderChain
- * gets the AWS credentials.
- *
* @param ssc StreamingContext object
* @param kinesisAppName Kinesis application name used by the Kinesis Client Library
* (KCL) to update DynamoDB
@@ -156,6 +151,10 @@ object KinesisUtils {
* details on the different types of checkpoints.
* @param storageLevel Storage level to use for storing the received objects.
* StorageLevel.MEMORY_AND_DISK_2 is recommended.
+ *
+ * @note The AWS credentials will be discovered using the DefaultAWSCredentialsProviderChain
+ * on the workers. See AWS documentation to understand how DefaultAWSCredentialsProviderChain
+ * gets the AWS credentials.
*/
def createStream(
ssc: StreamingContext,
@@ -178,10 +177,6 @@ object KinesisUtils {
* Create an input stream that pulls messages from a Kinesis stream.
* This uses the Kinesis Client Library (KCL) to pull messages from Kinesis.
*
- * Note:
- * The given AWS credentials will get saved in DStream checkpoints if checkpointing
- * is enabled. Make sure that your checkpoint directory is secure.
- *
* @param ssc StreamingContext object
* @param kinesisAppName Kinesis application name used by the Kinesis Client Library
* (KCL) to update DynamoDB
@@ -202,6 +197,9 @@ object KinesisUtils {
* StorageLevel.MEMORY_AND_DISK_2 is recommended.
* @param awsAccessKeyId AWS AccessKeyId (if null, will use DefaultAWSCredentialsProviderChain)
* @param awsSecretKey AWS SecretKey (if null, will use DefaultAWSCredentialsProviderChain)
+ *
+ * @note The given AWS credentials will get saved in DStream checkpoints if checkpointing
+ * is enabled. Make sure that your checkpoint directory is secure.
*/
def createStream(
ssc: StreamingContext,
@@ -225,10 +223,6 @@ object KinesisUtils {
* Create an input stream that pulls messages from a Kinesis stream.
* This uses the Kinesis Client Library (KCL) to pull messages from Kinesis.
*
- * Note: The AWS credentials will be discovered using the DefaultAWSCredentialsProviderChain
- * on the workers. See AWS documentation to understand how DefaultAWSCredentialsProviderChain
- * gets the AWS credentials.
- *
* @param jssc Java StreamingContext object
* @param kinesisAppName Kinesis application name used by the Kinesis Client Library
* (KCL) to update DynamoDB
@@ -250,6 +244,10 @@ object KinesisUtils {
* @param messageHandler A custom message handler that can generate a generic output from a
* Kinesis `Record`, which contains both message data, and metadata.
* @param recordClass Class of the records in DStream
+ *
+ * @note The AWS credentials will be discovered using the DefaultAWSCredentialsProviderChain
+ * on the workers. See AWS documentation to understand how DefaultAWSCredentialsProviderChain
+ * gets the AWS credentials.
*/
def createStream[T](
jssc: JavaStreamingContext,
@@ -272,10 +270,6 @@ object KinesisUtils {
* Create an input stream that pulls messages from a Kinesis stream.
* This uses the Kinesis Client Library (KCL) to pull messages from Kinesis.
*
- * Note:
- * The given AWS credentials will get saved in DStream checkpoints if checkpointing
- * is enabled. Make sure that your checkpoint directory is secure.
- *
* @param jssc Java StreamingContext object
* @param kinesisAppName Kinesis application name used by the Kinesis Client Library
* (KCL) to update DynamoDB
@@ -299,6 +293,9 @@ object KinesisUtils {
* @param recordClass Class of the records in DStream
* @param awsAccessKeyId AWS AccessKeyId (if null, will use DefaultAWSCredentialsProviderChain)
* @param awsSecretKey AWS SecretKey (if null, will use DefaultAWSCredentialsProviderChain)
+ *
+ * @note The given AWS credentials will get saved in DStream checkpoints if checkpointing
+ * is enabled. Make sure that your checkpoint directory is secure.
*/
// scalastyle:off
def createStream[T](
@@ -326,10 +323,6 @@ object KinesisUtils {
* Create an input stream that pulls messages from a Kinesis stream.
* This uses the Kinesis Client Library (KCL) to pull messages from Kinesis.
*
- * Note: The AWS credentials will be discovered using the DefaultAWSCredentialsProviderChain
- * on the workers. See AWS documentation to understand how DefaultAWSCredentialsProviderChain
- * gets the AWS credentials.
- *
* @param jssc Java StreamingContext object
* @param kinesisAppName Kinesis application name used by the Kinesis Client Library
* (KCL) to update DynamoDB
@@ -348,6 +341,10 @@ object KinesisUtils {
* details on the different types of checkpoints.
* @param storageLevel Storage level to use for storing the received objects.
* StorageLevel.MEMORY_AND_DISK_2 is recommended.
+ *
+ * @note The AWS credentials will be discovered using the DefaultAWSCredentialsProviderChain
+ * on the workers. See AWS documentation to understand how DefaultAWSCredentialsProviderChain
+ * gets the AWS credentials.
*/
def createStream(
jssc: JavaStreamingContext,
@@ -367,10 +364,6 @@ object KinesisUtils {
* Create an input stream that pulls messages from a Kinesis stream.
* This uses the Kinesis Client Library (KCL) to pull messages from Kinesis.
*
- * Note:
- * The given AWS credentials will get saved in DStream checkpoints if checkpointing
- * is enabled. Make sure that your checkpoint directory is secure.
- *
* @param jssc Java StreamingContext object
* @param kinesisAppName Kinesis application name used by the Kinesis Client Library
* (KCL) to update DynamoDB
@@ -391,6 +384,9 @@ object KinesisUtils {
* StorageLevel.MEMORY_AND_DISK_2 is recommended.
* @param awsAccessKeyId AWS AccessKeyId (if null, will use DefaultAWSCredentialsProviderChain)
* @param awsSecretKey AWS SecretKey (if null, will use DefaultAWSCredentialsProviderChain)
+ *
+ * @note The given AWS credentials will get saved in DStream checkpoints if checkpointing
+ * is enabled. Make sure that your checkpoint directory is secure.
*/
def createStream(
jssc: JavaStreamingContext,
diff --git a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDDSuite.scala b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDDSuite.scala
index 905c33834d..a4d81a6809 100644
--- a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDDSuite.scala
+++ b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDDSuite.scala
@@ -221,7 +221,7 @@ abstract class KinesisBackedBlockRDDTests(aggregateTestData: Boolean)
assert(collectedData.toSet === testData.toSet)
// Verify that the block fetching is skipped when isBlockValid is set to false.
- // This is done by using a RDD whose data is only in memory but is set to skip block fetching
+ // This is done by using an RDD whose data is only in memory but is set to skip block fetching
// Using that RDD will throw exception, as it skips block fetching even if the blocks are in
// in BlockManager.
if (testIsBlockValid) {