aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorXiangrui Meng <meng@databricks.com>2014-08-16 15:14:43 -0700
committerXiangrui Meng <meng@databricks.com>2014-08-16 15:14:52 -0700
commita12d3ae3223535e6e4c774e4a289b8b2f2e5228b (patch)
tree38d3aa38fe2ad2720b0a9000de0235e21ce1f3c2 /python
parent0b354be2f9ec35547a60591acf4f4773a4869690 (diff)
downloadspark-a12d3ae3223535e6e4c774e4a289b8b2f2e5228b.tar.gz
spark-a12d3ae3223535e6e4c774e4a289b8b2f2e5228b.tar.bz2
spark-a12d3ae3223535e6e4c774e4a289b8b2f2e5228b.zip
[SPARK-3081][MLLIB] rename RandomRDDGenerators to RandomRDDs
`RandomRDDGenerators` means factory for `RandomRDDGenerator`. However, its methods return RDDs but not RDDGenerators. So a more proper (and shorter) name would be `RandomRDDs`. dorx brkyvz Author: Xiangrui Meng <meng@databricks.com> Closes #1979 from mengxr/randomrdds and squashes the following commits: b161a2d [Xiangrui Meng] rename RandomRDDGenerators to RandomRDDs (cherry picked from commit ac6411c6e75906997c78de23dfdbc8d225b87cfd) Signed-off-by: Xiangrui Meng <meng@databricks.com>
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/mllib/random.py25
1 files changed, 12 insertions, 13 deletions
diff --git a/python/pyspark/mllib/random.py b/python/pyspark/mllib/random.py
index eb496688b6..3f3b19053d 100644
--- a/python/pyspark/mllib/random.py
+++ b/python/pyspark/mllib/random.py
@@ -25,8 +25,7 @@ from pyspark.mllib._common import _deserialize_double, _deserialize_double_vecto
from pyspark.serializers import NoOpSerializer
-class RandomRDDGenerators:
-
+class RandomRDDs:
"""
Generator methods for creating RDDs comprised of i.i.d samples from
some distribution.
@@ -40,17 +39,17 @@ class RandomRDDGenerators:
To transform the distribution in the generated RDD from U[0.0, 1.0]
to U[a, b], use
- C{RandomRDDGenerators.uniformRDD(sc, n, p, seed)\
+ C{RandomRDDs.uniformRDD(sc, n, p, seed)\
.map(lambda v: a + (b - a) * v)}
- >>> x = RandomRDDGenerators.uniformRDD(sc, 100).collect()
+ >>> x = RandomRDDs.uniformRDD(sc, 100).collect()
>>> len(x)
100
>>> max(x) <= 1.0 and min(x) >= 0.0
True
- >>> RandomRDDGenerators.uniformRDD(sc, 100, 4).getNumPartitions()
+ >>> RandomRDDs.uniformRDD(sc, 100, 4).getNumPartitions()
4
- >>> parts = RandomRDDGenerators.uniformRDD(sc, 100, seed=4).getNumPartitions()
+ >>> parts = RandomRDDs.uniformRDD(sc, 100, seed=4).getNumPartitions()
>>> parts == sc.defaultParallelism
True
"""
@@ -66,10 +65,10 @@ class RandomRDDGenerators:
To transform the distribution in the generated RDD from standard normal
to some other normal N(mean, sigma), use
- C{RandomRDDGenerators.normal(sc, n, p, seed)\
+ C{RandomRDDs.normal(sc, n, p, seed)\
.map(lambda v: mean + sigma * v)}
- >>> x = RandomRDDGenerators.normalRDD(sc, 1000, seed=1L)
+ >>> x = RandomRDDs.normalRDD(sc, 1000, seed=1L)
>>> stats = x.stats()
>>> stats.count()
1000L
@@ -89,7 +88,7 @@ class RandomRDDGenerators:
distribution with the input mean.
>>> mean = 100.0
- >>> x = RandomRDDGenerators.poissonRDD(sc, mean, 1000, seed=1L)
+ >>> x = RandomRDDs.poissonRDD(sc, mean, 1000, seed=1L)
>>> stats = x.stats()
>>> stats.count()
1000L
@@ -110,12 +109,12 @@ class RandomRDDGenerators:
from the uniform distribution on [0.0 1.0].
>>> import numpy as np
- >>> mat = np.matrix(RandomRDDGenerators.uniformVectorRDD(sc, 10, 10).collect())
+ >>> mat = np.matrix(RandomRDDs.uniformVectorRDD(sc, 10, 10).collect())
>>> mat.shape
(10, 10)
>>> mat.max() <= 1.0 and mat.min() >= 0.0
True
- >>> RandomRDDGenerators.uniformVectorRDD(sc, 10, 10, 4).getNumPartitions()
+ >>> RandomRDDs.uniformVectorRDD(sc, 10, 10, 4).getNumPartitions()
4
"""
jrdd = sc._jvm.PythonMLLibAPI() \
@@ -130,7 +129,7 @@ class RandomRDDGenerators:
from the standard normal distribution.
>>> import numpy as np
- >>> mat = np.matrix(RandomRDDGenerators.normalVectorRDD(sc, 100, 100, seed=1L).collect())
+ >>> mat = np.matrix(RandomRDDs.normalVectorRDD(sc, 100, 100, seed=1L).collect())
>>> mat.shape
(100, 100)
>>> abs(mat.mean() - 0.0) < 0.1
@@ -151,7 +150,7 @@ class RandomRDDGenerators:
>>> import numpy as np
>>> mean = 100.0
- >>> rdd = RandomRDDGenerators.poissonVectorRDD(sc, mean, 100, 100, seed=1L)
+ >>> rdd = RandomRDDs.poissonVectorRDD(sc, mean, 100, 100, seed=1L)
>>> mat = np.mat(rdd.collect())
>>> mat.shape
(100, 100)