[SPARK-20132][DOCS] Add documentation for column string functions

## What changes were proposed in this pull request? Add docstrings to column.py for the Column functions `rlike`, `like`, `startswith`, and `endswith`. Pass these docstrings through `_bin_op` There may be a better place to put the docstrings. I put them immediately above the Column class. ## How was this patch tested? I ran `make html` on my local computer to remake the documentation, and verified that the html pages were displaying the docstrings correctly. I tried running `dev-tests`, and the formatting tests passed. However, my mvn build didn't work I think due to issues on my computer. These docstrings are my original work and free license. davies has done the most recent work reorganizing `_bin_op` Author: Michael Patterson <map222@gmail.com> Closes #17469 from map222/patterson-documentation.
author: Michael Patterson <map222@gmail.com> 2017-04-22 19:58:54 -0700
committer: Holden Karau <holden@us.ibm.com> 2017-04-22 19:58:54 -0700
commit: 8765bc17d0439032d0378686c4f2b17df2432abc (patch)
tree: 0bd4babba524ee46881558bde6a62df379fc2c01
parent: b3c572a6b332b79fef72c309b9038b3c939dcba2 (diff)
download: spark-8765bc17d0439032d0378686c4f2b17df2432abc.tar.gz
spark-8765bc17d0439032d0378686c4f2b17df2432abc.tar.bz2
spark-8765bc17d0439032d0378686c4f2b17df2432abc.zip
1 files changed, 64 insertions, 6 deletions
diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
index ec05c18d4f..46c1707cb6 100644
--- a/python/pyspark/sql/column.py
+++ b/python/pyspark/sql/column.py
@@ -250,11 +250,50 @@ class Column(object):
         raise TypeError("Column is not iterable")
 
     # string methods
+    _rlike_doc = """
+    Return a Boolean :class:`Column` based on a regex match.
+
+    :param other: an extended regex expression
+
+    >>> df.filter(df.name.rlike('ice$')).collect()
+    [Row(age=2, name=u'Alice')]
+    """
+    _like_doc = """
+    Return a Boolean :class:`Column` based on a SQL LIKE match.
+
+    :param other: a SQL LIKE pattern
+
+    See :func:`rlike` for a regex version
+
+    >>> df.filter(df.name.like('Al%')).collect()
+    [Row(age=2, name=u'Alice')]
+    """
+    _startswith_doc = """
+    Return a Boolean :class:`Column` based on a string match.
+
+    :param other: string at end of line (do not use a regex `^`)
+
+    >>> df.filter(df.name.startswith('Al')).collect()
+    [Row(age=2, name=u'Alice')]
+    >>> df.filter(df.name.startswith('^Al')).collect()
+    []
+    """
+    _endswith_doc = """
+    Return a Boolean :class:`Column` based on matching end of string.
+
+    :param other: string at end of line (do not use a regex `$`)
+
+    >>> df.filter(df.name.endswith('ice')).collect()
+    [Row(age=2, name=u'Alice')]
+    >>> df.filter(df.name.endswith('ice$')).collect()
+    []
+    """
+
     contains = _bin_op("contains")
-    rlike = _bin_op("rlike")
-    like = _bin_op("like")
-    startswith = _bin_op("startsWith")
-    endswith = _bin_op("endsWith")
+    rlike = ignore_unicode_prefix(_bin_op("rlike", _rlike_doc))
+    like = ignore_unicode_prefix(_bin_op("like", _like_doc))
+    startswith = ignore_unicode_prefix(_bin_op("startsWith", _startswith_doc))
+    endswith = ignore_unicode_prefix(_bin_op("endsWith", _endswith_doc))
 
     @ignore_unicode_prefix
     @since(1.3)
@@ -303,8 +342,27 @@ class Column(object):
     desc = _unary_op("desc", "Returns a sort expression based on the"
                              " descending order of the given column name.")
 
-    isNull = _unary_op("isNull", "True if the current expression is null.")
-    isNotNull = _unary_op("isNotNull", "True if the current expression is not null.")
+    _isNull_doc = """
+    True if the current expression is null. Often combined with
+    :func:`DataFrame.filter` to select rows with null values.
+
+    >>> from pyspark.sql import Row
+    >>> df2 = sc.parallelize([Row(name=u'Tom', height=80), Row(name=u'Alice', height=None)]).toDF()
+    >>> df2.filter(df2.height.isNull()).collect()
+    [Row(height=None, name=u'Alice')]
+    """
+    _isNotNull_doc = """
+    True if the current expression is null. Often combined with
+    :func:`DataFrame.filter` to select rows with non-null values.
+
+    >>> from pyspark.sql import Row
+    >>> df2 = sc.parallelize([Row(name=u'Tom', height=80), Row(name=u'Alice', height=None)]).toDF()
+    >>> df2.filter(df2.height.isNotNull()).collect()
+    [Row(height=80, name=u'Tom')]
+    """
+
+    isNull = ignore_unicode_prefix(_unary_op("isNull", _isNull_doc))
+    isNotNull = ignore_unicode_prefix(_unary_op("isNotNull", _isNotNull_doc))
 
     @since(1.3)
     def alias(self, *alias, **kwargs):
author	Michael Patterson <map222@gmail.com>	2017-04-22 19:58:54 -0700
committer	Holden Karau <holden@us.ibm.com>	2017-04-22 19:58:54 -0700
commit	8765bc17d0439032d0378686c4f2b17df2432abc (patch)
tree	0bd4babba524ee46881558bde6a62df379fc2c01
parent	b3c572a6b332b79fef72c309b9038b3c939dcba2 (diff)
download	spark-8765bc17d0439032d0378686c4f2b17df2432abc.tar.gz spark-8765bc17d0439032d0378686c4f2b17df2432abc.tar.bz2 spark-8765bc17d0439032d0378686c4f2b17df2432abc.zip