From 9c4405e8e801cbab3a5c78c9f4334775925dfcc4 Mon Sep 17 00:00:00 2001 From: zero323 Date: Tue, 14 Feb 2017 09:42:24 -0800 Subject: [SPARK-19453][PYTHON][SQL][DOC] Correct and extend DataFrame.replace docstring ## What changes were proposed in this pull request? - Provides correct description of the semantics of a `dict` argument passed as `to_replace`. - Describes type requirements for collection arguments. - Describes behavior with `to_replace: List[T]` and `value: T` ## How was this patch tested? Manual testing, documentation build. Author: zero323 Closes #16792 from zero323/SPARK-19453. --- python/pyspark/sql/dataframe.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'python/pyspark/sql') diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 50373b8585..188808b431 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -1271,16 +1271,22 @@ class DataFrame(object): """Returns a new :class:`DataFrame` replacing a value with another value. :func:`DataFrame.replace` and :func:`DataFrameNaFunctions.replace` are aliases of each other. - - :param to_replace: int, long, float, string, or list. + Values to_replace and value should contain either all numerics, all booleans, + or all strings. When replacing, the new value will be cast + to the type of the existing column. + For numeric replacements all values to be replaced should have unique + floating point representation. In case of conflicts (for example with `{42: -1, 42.0: 1}`) + and arbitrary replacement will be used. + + :param to_replace: bool, int, long, float, string, list or dict. Value to be replaced. If the value is a dict, then `value` is ignored and `to_replace` must be a - mapping from column name (string) to replacement value. The value to be - replaced must be an int, long, float, or string. + mapping between a value and a replacement. :param value: int, long, float, string, or list. - Value to use to replace holes. The replacement value must be an int, long, float, or string. If `value` is a - list or tuple, `value` should be of the same length with `to_replace`. + list, `value` should be of the same length and type as `to_replace`. + If `value` is a scalar and `to_replace` is a sequence, then `value` is + used as a replacement for each item in `to_replace`. :param subset: optional list of column names to consider. Columns specified in subset that do not have matching data type are ignored. For example, if `value` is a string, and subset contains a non-string column, -- cgit v1.2.3