diff options
author | hyukjinkwon <gurwls223@gmail.com> | 2016-11-05 21:47:33 -0700 |
---|---|---|
committer | Felix Cheung <felixcheung@apache.org> | 2016-11-05 21:47:33 -0700 |
commit | 15d392688456ad9f963417843c52a7b610f771d2 (patch) | |
tree | 4a969525408e41c1bc971c9b79ab20ad741a3962 /python/pyspark/sql | |
parent | 9a87c313859a6557bbf7bca7239043cb77ea23be (diff) | |
download | spark-15d392688456ad9f963417843c52a7b610f771d2.tar.gz spark-15d392688456ad9f963417843c52a7b610f771d2.tar.bz2 spark-15d392688456ad9f963417843c52a7b610f771d2.zip |
[MINOR][DOCUMENTATION] Fix some minor descriptions in functions consistently with expressions
## What changes were proposed in this pull request?
This PR proposes to improve documentation and fix some descriptions equivalent to several minor fixes identified in https://github.com/apache/spark/pull/15677
Also, this suggests to change `Note:` and `NOTE:` to `.. note::` consistently with the others which marks up pretty.
## How was this patch tested?
Jenkins tests and manually.
For PySpark, `Note:` and `NOTE:` to `.. note::` make the document as below:
**From**
![2016-11-04 6 53 35](https://cloud.githubusercontent.com/assets/6477701/20002648/42989922-a2c5-11e6-8a32-b73eda49e8c3.png)
![2016-11-04 6 53 45](https://cloud.githubusercontent.com/assets/6477701/20002650/429fb310-a2c5-11e6-926b-e030d7eb0185.png)
![2016-11-04 6 54 11](https://cloud.githubusercontent.com/assets/6477701/20002649/429d570a-a2c5-11e6-9e7e-44090f337e32.png)
![2016-11-04 6 53 51](https://cloud.githubusercontent.com/assets/6477701/20002647/4297fc74-a2c5-11e6-801a-b89fbcbfca44.png)
![2016-11-04 6 53 51](https://cloud.githubusercontent.com/assets/6477701/20002697/749f5780-a2c5-11e6-835f-022e1f2f82e3.png)
**To**
![2016-11-04 7 03 48](https://cloud.githubusercontent.com/assets/6477701/20002659/4961b504-a2c5-11e6-9ee0-ef0751482f47.png)
![2016-11-04 7 04 03](https://cloud.githubusercontent.com/assets/6477701/20002660/49871d3a-a2c5-11e6-85ea-d9a5d11efeff.png)
![2016-11-04 7 04 28](https://cloud.githubusercontent.com/assets/6477701/20002662/498e0f14-a2c5-11e6-803d-c0c5aeda4153.png)
![2016-11-04 7 33 39](https://cloud.githubusercontent.com/assets/6477701/20002731/a76e30d2-a2c5-11e6-993b-0481b8342d6b.png)
![2016-11-04 7 33 39](https://cloud.githubusercontent.com/assets/6477701/20002731/a76e30d2-a2c5-11e6-993b-0481b8342d6b.png)
Author: hyukjinkwon <gurwls223@gmail.com>
Closes #15765 from HyukjinKwon/minor-function-doc.
Diffstat (limited to 'python/pyspark/sql')
-rw-r--r-- | python/pyspark/sql/functions.py | 35 |
1 files changed, 20 insertions, 15 deletions
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 245357a4ba..46a092f16d 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -359,8 +359,8 @@ def grouping_id(*cols): (grouping(c1) << (n-1)) + (grouping(c2) << (n-2)) + ... + grouping(cn) - Note: the list of columns should match with grouping columns exactly, or empty (means all the - grouping columns). + .. note:: the list of columns should match with grouping columns exactly, or empty (means all + the grouping columns). >>> df.cube("name").agg(grouping_id(), sum("age")).orderBy("name").show() +-----+-------------+--------+ @@ -457,7 +457,8 @@ def nanvl(col1, col2): @since(1.4) def rand(seed=None): - """Generates a random column with i.i.d. samples from U[0.0, 1.0]. + """Generates a random column with independent and identically distributed (i.i.d.) samples + from U[0.0, 1.0]. """ sc = SparkContext._active_spark_context if seed is not None: @@ -469,7 +470,8 @@ def rand(seed=None): @since(1.4) def randn(seed=None): - """Generates a column with i.i.d. samples from the standard normal distribution. + """Generates a column with independent and identically distributed (i.i.d.) samples from + the standard normal distribution. """ sc = SparkContext._active_spark_context if seed is not None: @@ -518,7 +520,7 @@ def shiftLeft(col, numBits): @since(1.5) def shiftRight(col, numBits): - """Shift the given value numBits right. + """(Signed) shift the given value numBits right. >>> spark.createDataFrame([(42,)], ['a']).select(shiftRight('a', 1).alias('r')).collect() [Row(r=21)] @@ -777,8 +779,8 @@ def date_format(date, format): A pattern could be for instance `dd.MM.yyyy` and could return a string like '18.03.1993'. All pattern letters of the Java class `java.text.SimpleDateFormat` can be used. - NOTE: Use when ever possible specialized functions like `year`. These benefit from a - specialized implementation. + .. note:: Use when ever possible specialized functions like `year`. These benefit from a + specialized implementation. >>> df = spark.createDataFrame([('2015-04-08',)], ['a']) >>> df.select(date_format('a', 'MM/dd/yyy').alias('date')).collect() @@ -1059,7 +1061,8 @@ def unix_timestamp(timestamp=None, format='yyyy-MM-dd HH:mm:ss'): @since(1.5) def from_utc_timestamp(timestamp, tz): """ - Assumes given timestamp is UTC and converts to given timezone. + Given a timestamp, which corresponds to a certain time of day in UTC, returns another timestamp + that corresponds to the same time of day in the given timezone. >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t']) >>> df.select(from_utc_timestamp(df.t, "PST").alias('t')).collect() @@ -1072,7 +1075,8 @@ def from_utc_timestamp(timestamp, tz): @since(1.5) def to_utc_timestamp(timestamp, tz): """ - Assumes given timestamp is in given timezone and converts to UTC. + Given a timestamp, which corresponds to a certain time of day in the given timezone, returns + another timestamp that corresponds to the same time of day in UTC. >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t']) >>> df.select(to_utc_timestamp(df.t, "PST").alias('t')).collect() @@ -1314,8 +1318,8 @@ def instr(str, substr): Locate the position of the first occurrence of substr column in the given string. Returns null if either of the arguments are null. - NOTE: The position is not zero based, but 1 based index, returns 0 if substr - could not be found in str. + .. note:: The position is not zero based, but 1 based index. Returns 0 if substr + could not be found in str. >>> df = spark.createDataFrame([('abcd',)], ['s',]) >>> df.select(instr(df.s, 'b').alias('s')).collect() @@ -1379,8 +1383,8 @@ def locate(substr, str, pos=1): """ Locate the position of the first occurrence of substr in a string column, after position pos. - NOTE: The position is not zero based, but 1 based index. returns 0 if substr - could not be found in str. + .. note:: The position is not zero based, but 1 based index. Returns 0 if substr + could not be found in str. :param substr: a string :param str: a Column of :class:`pyspark.sql.types.StringType` @@ -1442,7 +1446,7 @@ def split(str, pattern): """ Splits str around pattern (pattern is a regular expression). - NOTE: pattern is a string represent the regular expression. + .. note:: pattern is a string represent the regular expression. >>> df = spark.createDataFrame([('ab12cd',)], ['s',]) >>> df.select(split(df.s, '[0-9]+').alias('s')).collect() @@ -1785,7 +1789,8 @@ def size(col): @since(1.5) def sort_array(col, asc=True): """ - Collection function: sorts the input array for the given column in ascending order. + Collection function: sorts the input array in ascending or descending order according + to the natural ordering of the array elements. :param col: name of column or expression |