aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorXiangrui Meng <meng@databricks.com>2015-05-18 08:35:14 -0700
committerXiangrui Meng <meng@databricks.com>2015-05-18 08:35:14 -0700
commit1ecfac6e387b0934bfb5a9bbb4ad74b81ec210a4 (patch)
tree8c42b42af34ef1f2e484212d74ed42e20a6388a0 /python
parente32c0f69f38ad729e25c2d5f90eb73b4453f8279 (diff)
downloadspark-1ecfac6e387b0934bfb5a9bbb4ad74b81ec210a4.tar.gz
spark-1ecfac6e387b0934bfb5a9bbb4ad74b81ec210a4.tar.bz2
spark-1ecfac6e387b0934bfb5a9bbb4ad74b81ec210a4.zip
[SPARK-6657] [PYSPARK] Fix doc warnings
Fixed the following warnings in `make clean html` under `python/docs`: ~~~ /Users/meng/src/spark/python/pyspark/mllib/evaluation.py:docstring of pyspark.mllib.evaluation.RankingMetrics.ndcgAt:3: ERROR: Unexpected indentation. /Users/meng/src/spark/python/pyspark/mllib/evaluation.py:docstring of pyspark.mllib.evaluation.RankingMetrics.ndcgAt:4: WARNING: Block quote ends without a blank line; unexpected unindent. /Users/meng/src/spark/python/pyspark/mllib/fpm.py:docstring of pyspark.mllib.fpm.FPGrowth.train:3: ERROR: Unexpected indentation. /Users/meng/src/spark/python/pyspark/mllib/fpm.py:docstring of pyspark.mllib.fpm.FPGrowth.train:4: WARNING: Block quote ends without a blank line; unexpected unindent. /Users/meng/src/spark/python/pyspark/sql/__init__.py:docstring of pyspark.sql.DataFrame.replace:16: WARNING: Field list ends without a blank line; unexpected unindent. /Users/meng/src/spark/python/pyspark/streaming/kafka.py:docstring of pyspark.streaming.kafka.KafkaUtils.createRDD:8: ERROR: Unexpected indentation. /Users/meng/src/spark/python/pyspark/streaming/kafka.py:docstring of pyspark.streaming.kafka.KafkaUtils.createRDD:9: WARNING: Block quote ends without a blank line; unexpected unindent. ~~~ davies Author: Xiangrui Meng <meng@databricks.com> Closes #6221 from mengxr/SPARK-6657 and squashes the following commits: e3f83fe [Xiangrui Meng] fix sql and streaming doc warnings 2b4371e [Xiangrui Meng] fix mllib python doc warnings
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/mllib/evaluation.py5
-rw-r--r--python/pyspark/mllib/fpm.py12
-rw-r--r--python/pyspark/sql/dataframe.py1
-rw-r--r--python/pyspark/streaming/kafka.py3
4 files changed, 11 insertions, 10 deletions
diff --git a/python/pyspark/mllib/evaluation.py b/python/pyspark/mllib/evaluation.py
index 4c777f2180..a5e5ddc8fe 100644
--- a/python/pyspark/mllib/evaluation.py
+++ b/python/pyspark/mllib/evaluation.py
@@ -334,11 +334,10 @@ class RankingMetrics(JavaModelWrapper):
"""
Compute the average NDCG value of all the queries, truncated at ranking position k.
The discounted cumulative gain at position k is computed as:
- sum,,i=1,,^k^ (2^{relevance of ''i''th item}^ - 1) / log(i + 1),
+ sum,,i=1,,^k^ (2^{relevance of ''i''th item}^ - 1) / log(i + 1),
and the NDCG is obtained by dividing the DCG value on the ground truth set.
In the current implementation, the relevance value is binary.
-
- If a query has an empty ground truth set, zero will be used as ndcg together with
+ If a query has an empty ground truth set, zero will be used as NDCG together with
a log warning.
"""
return self.call("ndcgAt", int(k))
diff --git a/python/pyspark/mllib/fpm.py b/python/pyspark/mllib/fpm.py
index d8df02bdba..bdc4a132b1 100644
--- a/python/pyspark/mllib/fpm.py
+++ b/python/pyspark/mllib/fpm.py
@@ -61,12 +61,12 @@ class FPGrowth(object):
def train(cls, data, minSupport=0.3, numPartitions=-1):
"""
Computes an FP-Growth model that contains frequent itemsets.
- :param data: The input data set, each element
- contains a transaction.
- :param minSupport: The minimal support level
- (default: `0.3`).
- :param numPartitions: The number of partitions used by parallel
- FP-growth (default: same as input data).
+
+ :param data: The input data set, each element contains a
+ transaction.
+ :param minSupport: The minimal support level (default: `0.3`).
+ :param numPartitions: The number of partitions used by
+ parallel FP-growth (default: same as input data).
"""
model = callMLlibFunc("trainFPGrowthModel", data, float(minSupport), int(numPartitions))
return FPGrowthModel(model)
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 96d927b9ba..e4a191a9ef 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -943,6 +943,7 @@ class DataFrame(object):
Columns specified in subset that do not have matching data type are ignored.
For example, if `value` is a string, and subset contains a non-string column,
then the non-string column is simply ignored.
+
>>> df4.replace(10, 20).show()
+----+------+-----+
| age|height| name|
diff --git a/python/pyspark/streaming/kafka.py b/python/pyspark/streaming/kafka.py
index e278b29003..10a859a532 100644
--- a/python/pyspark/streaming/kafka.py
+++ b/python/pyspark/streaming/kafka.py
@@ -132,11 +132,12 @@ class KafkaUtils(object):
.. note:: Experimental
Create a RDD from Kafka using offset ranges for each topic and partition.
+
:param sc: SparkContext object
:param kafkaParams: Additional params for Kafka
:param offsetRanges: list of offsetRange to specify topic:partition:[start, end) to consume
:param leaders: Kafka brokers for each TopicAndPartition in offsetRanges. May be an empty
- map, in which case leaders will be looked up on the driver.
+ map, in which case leaders will be looked up on the driver.
:param keyDecoder: A function used to decode key (default is utf8_decoder)
:param valueDecoder: A function used to decode value (default is utf8_decoder)
:return: A RDD object