From 80bf48f437939ddc3bb82c8c7530c8ae419f8427 Mon Sep 17 00:00:00 2001 From: Burak Yavuz Date: Wed, 20 Apr 2016 10:32:01 -0700 Subject: [SPARK-14555] First cut of Python API for Structured Streaming ## What changes were proposed in this pull request? This patch provides a first cut of python APIs for structured streaming. This PR provides the new classes: - ContinuousQuery - Trigger - ProcessingTime in pyspark under `pyspark.sql.streaming`. In addition, it contains the new methods added under: - `DataFrameWriter` a) `startStream` b) `trigger` c) `queryName` - `DataFrameReader` a) `stream` - `DataFrame` a) `isStreaming` This PR doesn't contain all methods exposed for `ContinuousQuery`, for example: - `exception` - `sourceStatuses` - `sinkStatus` They may be added in a follow up. This PR also contains some very minor doc fixes in the Scala side. ## How was this patch tested? Python doc tests TODO: - [ ] verify Python docs look good Author: Burak Yavuz Author: Burak Yavuz Closes #12320 from brkyvz/stream-python. --- python/pyspark/sql/dataframe.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'python/pyspark/sql/dataframe.py') diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 328bda6601..bbe15f5f90 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -197,6 +197,18 @@ class DataFrame(object): """ return self._jdf.isLocal() + @property + @since(2.0) + def isStreaming(self): + """Returns true if this :class:`Dataset` contains one or more sources that continuously + return data as it arrives. A :class:`Dataset` that reads data from a streaming source + must be executed as a :class:`ContinuousQuery` using the :func:`startStream` method in + :class:`DataFrameWriter`. Methods that return a single answer, (e.g., :func:`count` or + :func:`collect`) will throw an :class:`AnalysisException` when there is a streaming + source present. + """ + return self._jdf.isStreaming() + @since(1.3) def show(self, n=20, truncate=True): """Prints the first ``n`` rows to the console. -- cgit v1.2.3