From 439e361010e51d2213c92ccabed5093be92a72ee Mon Sep 17 00:00:00 2001 From: François Garillot Date: Tue, 3 May 2016 11:42:47 -0700 Subject: [SPARK-9819][STREAMING][DOCUMENTATION] Clarify doc for invReduceFunc in incremental versions of reduceByWindow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - that reduceFunc and invReduceFunc should be associative - that the intermediate result in iterated applications of inverseReduceFunc is its first argument Author: François Garillot Closes #8103 from huitseeker/issue/invReduceFuncDoc. --- python/pyspark/streaming/dstream.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'python') diff --git a/python/pyspark/streaming/dstream.py b/python/pyspark/streaming/dstream.py index 2056663872..67a0819601 100644 --- a/python/pyspark/streaming/dstream.py +++ b/python/pyspark/streaming/dstream.py @@ -454,7 +454,9 @@ class DStream(object): This is more efficient than `invReduceFunc` is None. @param reduceFunc: associative and commutative reduce function - @param invReduceFunc: inverse reduce function of `reduceFunc` + @param invReduceFunc: inverse reduce function of `reduceFunc`; such that for all y, + and invertible x: + `invReduceFunc(reduceFunc(x, y), x) = y` @param windowDuration: width of the window; must be a multiple of this DStream's batching interval @param slideDuration: sliding interval of the window (i.e., the interval after which -- cgit v1.2.3