From d4cd975718716be11a42ce92a47c45be1a46bd60 Mon Sep 17 00:00:00 2001 From: CodingCat Date: Tue, 7 Feb 2017 20:25:18 -0800 Subject: [SPARK-19499][SS] Add more notes in the comments of Sink.addBatch() ## What changes were proposed in this pull request? addBatch method in Sink trait is supposed to be a synchronous method to coordinate with the fault-tolerance design in StreamingExecution (being different with the compute() method in DStream) We need to add more notes in the comments of this method to remind the developers ## How was this patch tested? existing tests Author: CodingCat Closes #16840 from CodingCat/SPARK-19499. --- .../main/scala/org/apache/spark/sql/execution/streaming/Sink.scala | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala index 2571b59be5..d10cd3044e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala @@ -31,8 +31,11 @@ trait Sink { * this method is called more than once with the same batchId (which will happen in the case of * failures), then `data` should only be added once. * - * Note: You cannot apply any operators on `data` except consuming it (e.g., `collect/foreach`). + * Note 1: You cannot apply any operators on `data` except consuming it (e.g., `collect/foreach`). * Otherwise, you may get a wrong result. + * + * Note 2: The method is supposed to be executed synchronously, i.e. the method should only return + * after data is consumed by sink successfully. */ def addBatch(batchId: Long, data: DataFrame): Unit } -- cgit v1.2.3