aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorXiangrui Meng <meng@databricks.com>2014-08-19 16:06:48 -0700
committerXiangrui Meng <meng@databricks.com>2014-08-19 16:06:48 -0700
commit825d4fe47b9c4d48de88622dd48dcf83beb8b80a (patch)
treed51775e9f88bff51458e57a5ec16de6e0b93b91a /python
parentd7e80c2597d4a9cae2e0cb35a86f7889323f4cbb (diff)
downloadspark-825d4fe47b9c4d48de88622dd48dcf83beb8b80a.tar.gz
spark-825d4fe47b9c4d48de88622dd48dcf83beb8b80a.tar.bz2
spark-825d4fe47b9c4d48de88622dd48dcf83beb8b80a.zip
[SPARK-3136][MLLIB] Create Java-friendly methods in RandomRDDs
Though we don't use default argument for methods in RandomRDDs, it is still not easy for Java users to use because the output type is either `RDD[Double]` or `RDD[Vector]`. Java users should expect `JavaDoubleRDD` and `JavaRDD[Vector]`, respectively. We should create dedicated methods for Java users, and allow default arguments in Scala methods in RandomRDDs, to make life easier for both Java and Scala users. This PR also contains documentation for random data generation. brkyvz Author: Xiangrui Meng <meng@databricks.com> Closes #2041 from mengxr/stat-doc and squashes the following commits: fc5eedf [Xiangrui Meng] add missing comma ffde810 [Xiangrui Meng] address comments aef6d07 [Xiangrui Meng] add doc for random data generation b99d94b [Xiangrui Meng] add java-friendly methods to RandomRDDs
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/mllib/random.py20
1 files changed, 10 insertions, 10 deletions
diff --git a/python/pyspark/mllib/random.py b/python/pyspark/mllib/random.py
index 3f3b19053d..4dc1a4a912 100644
--- a/python/pyspark/mllib/random.py
+++ b/python/pyspark/mllib/random.py
@@ -35,10 +35,10 @@ class RandomRDDs:
def uniformRDD(sc, size, numPartitions=None, seed=None):
"""
Generates an RDD comprised of i.i.d. samples from the
- uniform distribution on [0.0, 1.0].
+ uniform distribution U(0.0, 1.0).
- To transform the distribution in the generated RDD from U[0.0, 1.0]
- to U[a, b], use
+ To transform the distribution in the generated RDD from U(0.0, 1.0)
+ to U(a, b), use
C{RandomRDDs.uniformRDD(sc, n, p, seed)\
.map(lambda v: a + (b - a) * v)}
@@ -60,11 +60,11 @@ class RandomRDDs:
@staticmethod
def normalRDD(sc, size, numPartitions=None, seed=None):
"""
- Generates an RDD comprised of i.i.d samples from the standard normal
+ Generates an RDD comprised of i.i.d. samples from the standard normal
distribution.
To transform the distribution in the generated RDD from standard normal
- to some other normal N(mean, sigma), use
+ to some other normal N(mean, sigma^2), use
C{RandomRDDs.normal(sc, n, p, seed)\
.map(lambda v: mean + sigma * v)}
@@ -84,7 +84,7 @@ class RandomRDDs:
@staticmethod
def poissonRDD(sc, mean, size, numPartitions=None, seed=None):
"""
- Generates an RDD comprised of i.i.d samples from the Poisson
+ Generates an RDD comprised of i.i.d. samples from the Poisson
distribution with the input mean.
>>> mean = 100.0
@@ -105,8 +105,8 @@ class RandomRDDs:
@staticmethod
def uniformVectorRDD(sc, numRows, numCols, numPartitions=None, seed=None):
"""
- Generates an RDD comprised of vectors containing i.i.d samples drawn
- from the uniform distribution on [0.0 1.0].
+ Generates an RDD comprised of vectors containing i.i.d. samples drawn
+ from the uniform distribution U(0.0, 1.0).
>>> import numpy as np
>>> mat = np.matrix(RandomRDDs.uniformVectorRDD(sc, 10, 10).collect())
@@ -125,7 +125,7 @@ class RandomRDDs:
@staticmethod
def normalVectorRDD(sc, numRows, numCols, numPartitions=None, seed=None):
"""
- Generates an RDD comprised of vectors containing i.i.d samples drawn
+ Generates an RDD comprised of vectors containing i.i.d. samples drawn
from the standard normal distribution.
>>> import numpy as np
@@ -145,7 +145,7 @@ class RandomRDDs:
@staticmethod
def poissonVectorRDD(sc, mean, numRows, numCols, numPartitions=None, seed=None):
"""
- Generates an RDD comprised of vectors containing i.i.d samples drawn
+ Generates an RDD comprised of vectors containing i.i.d. samples drawn
from the Poisson distribution with the input mean.
>>> import numpy as np