diff options
author | Xiangrui Meng <meng@databricks.com> | 2015-05-18 08:35:14 -0700 |
---|---|---|
committer | Xiangrui Meng <meng@databricks.com> | 2015-05-18 08:35:14 -0700 |
commit | 1ecfac6e387b0934bfb5a9bbb4ad74b81ec210a4 (patch) | |
tree | 8c42b42af34ef1f2e484212d74ed42e20a6388a0 /python | |
parent | e32c0f69f38ad729e25c2d5f90eb73b4453f8279 (diff) | |
download | spark-1ecfac6e387b0934bfb5a9bbb4ad74b81ec210a4.tar.gz spark-1ecfac6e387b0934bfb5a9bbb4ad74b81ec210a4.tar.bz2 spark-1ecfac6e387b0934bfb5a9bbb4ad74b81ec210a4.zip |
[SPARK-6657] [PYSPARK] Fix doc warnings
Fixed the following warnings in `make clean html` under `python/docs`:
~~~
/Users/meng/src/spark/python/pyspark/mllib/evaluation.py:docstring of pyspark.mllib.evaluation.RankingMetrics.ndcgAt:3: ERROR: Unexpected indentation.
/Users/meng/src/spark/python/pyspark/mllib/evaluation.py:docstring of pyspark.mllib.evaluation.RankingMetrics.ndcgAt:4: WARNING: Block quote ends without a blank line; unexpected unindent.
/Users/meng/src/spark/python/pyspark/mllib/fpm.py:docstring of pyspark.mllib.fpm.FPGrowth.train:3: ERROR: Unexpected indentation.
/Users/meng/src/spark/python/pyspark/mllib/fpm.py:docstring of pyspark.mllib.fpm.FPGrowth.train:4: WARNING: Block quote ends without a blank line; unexpected unindent.
/Users/meng/src/spark/python/pyspark/sql/__init__.py:docstring of pyspark.sql.DataFrame.replace:16: WARNING: Field list ends without a blank line; unexpected unindent.
/Users/meng/src/spark/python/pyspark/streaming/kafka.py:docstring of pyspark.streaming.kafka.KafkaUtils.createRDD:8: ERROR: Unexpected indentation.
/Users/meng/src/spark/python/pyspark/streaming/kafka.py:docstring of pyspark.streaming.kafka.KafkaUtils.createRDD:9: WARNING: Block quote ends without a blank line; unexpected unindent.
~~~
davies
Author: Xiangrui Meng <meng@databricks.com>
Closes #6221 from mengxr/SPARK-6657 and squashes the following commits:
e3f83fe [Xiangrui Meng] fix sql and streaming doc warnings
2b4371e [Xiangrui Meng] fix mllib python doc warnings
Diffstat (limited to 'python')
-rw-r--r-- | python/pyspark/mllib/evaluation.py | 5 | ||||
-rw-r--r-- | python/pyspark/mllib/fpm.py | 12 | ||||
-rw-r--r-- | python/pyspark/sql/dataframe.py | 1 | ||||
-rw-r--r-- | python/pyspark/streaming/kafka.py | 3 |
4 files changed, 11 insertions, 10 deletions
diff --git a/python/pyspark/mllib/evaluation.py b/python/pyspark/mllib/evaluation.py index 4c777f2180..a5e5ddc8fe 100644 --- a/python/pyspark/mllib/evaluation.py +++ b/python/pyspark/mllib/evaluation.py @@ -334,11 +334,10 @@ class RankingMetrics(JavaModelWrapper): """ Compute the average NDCG value of all the queries, truncated at ranking position k. The discounted cumulative gain at position k is computed as: - sum,,i=1,,^k^ (2^{relevance of ''i''th item}^ - 1) / log(i + 1), + sum,,i=1,,^k^ (2^{relevance of ''i''th item}^ - 1) / log(i + 1), and the NDCG is obtained by dividing the DCG value on the ground truth set. In the current implementation, the relevance value is binary. - - If a query has an empty ground truth set, zero will be used as ndcg together with + If a query has an empty ground truth set, zero will be used as NDCG together with a log warning. """ return self.call("ndcgAt", int(k)) diff --git a/python/pyspark/mllib/fpm.py b/python/pyspark/mllib/fpm.py index d8df02bdba..bdc4a132b1 100644 --- a/python/pyspark/mllib/fpm.py +++ b/python/pyspark/mllib/fpm.py @@ -61,12 +61,12 @@ class FPGrowth(object): def train(cls, data, minSupport=0.3, numPartitions=-1): """ Computes an FP-Growth model that contains frequent itemsets. - :param data: The input data set, each element - contains a transaction. - :param minSupport: The minimal support level - (default: `0.3`). - :param numPartitions: The number of partitions used by parallel - FP-growth (default: same as input data). + + :param data: The input data set, each element contains a + transaction. + :param minSupport: The minimal support level (default: `0.3`). + :param numPartitions: The number of partitions used by + parallel FP-growth (default: same as input data). """ model = callMLlibFunc("trainFPGrowthModel", data, float(minSupport), int(numPartitions)) return FPGrowthModel(model) diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 96d927b9ba..e4a191a9ef 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -943,6 +943,7 @@ class DataFrame(object): Columns specified in subset that do not have matching data type are ignored. For example, if `value` is a string, and subset contains a non-string column, then the non-string column is simply ignored. + >>> df4.replace(10, 20).show() +----+------+-----+ | age|height| name| diff --git a/python/pyspark/streaming/kafka.py b/python/pyspark/streaming/kafka.py index e278b29003..10a859a532 100644 --- a/python/pyspark/streaming/kafka.py +++ b/python/pyspark/streaming/kafka.py @@ -132,11 +132,12 @@ class KafkaUtils(object): .. note:: Experimental Create a RDD from Kafka using offset ranges for each topic and partition. + :param sc: SparkContext object :param kafkaParams: Additional params for Kafka :param offsetRanges: list of offsetRange to specify topic:partition:[start, end) to consume :param leaders: Kafka brokers for each TopicAndPartition in offsetRanges. May be an empty - map, in which case leaders will be looked up on the driver. + map, in which case leaders will be looked up on the driver. :param keyDecoder: A function used to decode key (default is utf8_decoder) :param valueDecoder: A function used to decode value (default is utf8_decoder) :return: A RDD object |