From 37c617e4f580482b59e1abbe3c0c27c7125cf605 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Mon, 23 May 2016 14:19:25 -0700
Subject: [MINOR][SQL][DOCS] Add notes of the deterministic assumption on UDF
 functions

## What changes were proposed in this pull request?

Spark assumes that UDF functions are deterministic. This PR adds explicit notes about that.

## How was this patch tested?

It's only about docs.

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #13087 from dongjoon-hyun/SPARK-15282.
---
 python/pyspark/sql/functions.py | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'python')

diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index dac842c0ce..716b16fdc9 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -1756,6 +1756,9 @@ class UserDefinedFunction(object):
 @since(1.3)
 def udf(f, returnType=StringType()):
     """Creates a :class:`Column` expression representing a user defined function (UDF).
+    Note that the user-defined functions must be deterministic. Due to optimization,
+    duplicate invocations may be eliminated or the function may even be invoked more times than
+    it is present in the query.
 
     >>> from pyspark.sql.types import IntegerType
     >>> slen = udf(lambda s: len(s), IntegerType())
-- 
cgit v1.2.3