aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/ml/feature.py20
-rw-r--r--python/pyspark/ml/recommendation.py6
-rw-r--r--python/pyspark/mllib/recommendation.py4
-rw-r--r--python/pyspark/sql/dataframe.py6
4 files changed, 18 insertions, 18 deletions
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index c7b6dd926c..b02d41b52a 100644
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -1788,21 +1788,21 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has
+----+--------------------+
|word| vector|
+----+--------------------+
- | a|[-0.3511952459812...|
- | b|[0.29077222943305...|
- | c|[0.02315592765808...|
+ | a|[0.09461779892444...|
+ | b|[1.15474212169647...|
+ | c|[-0.3794820010662...|
+----+--------------------+
...
>>> model.findSynonyms("a", 2).show()
- +----+-------------------+
- |word| similarity|
- +----+-------------------+
- | b|0.29255685145799626|
- | c|-0.5414068302988307|
- +----+-------------------+
+ +----+--------------------+
+ |word| similarity|
+ +----+--------------------+
+ | b| 0.16782984556103436|
+ | c|-0.46761559092107646|
+ +----+--------------------+
...
>>> model.transform(doc).head().model
- DenseVector([-0.0422, -0.5138, -0.2546, 0.6885, 0.276])
+ DenseVector([0.5524, -0.4995, -0.3599, 0.0241, 0.3461])
.. versionadded:: 1.4.0
"""
diff --git a/python/pyspark/ml/recommendation.py b/python/pyspark/ml/recommendation.py
index ec5748a1cf..b44c66f73c 100644
--- a/python/pyspark/ml/recommendation.py
+++ b/python/pyspark/ml/recommendation.py
@@ -76,11 +76,11 @@ class ALS(JavaEstimator, HasCheckpointInterval, HasMaxIter, HasPredictionCol, Ha
>>> test = sqlContext.createDataFrame([(0, 2), (1, 0), (2, 0)], ["user", "item"])
>>> predictions = sorted(model.transform(test).collect(), key=lambda r: r[0])
>>> predictions[0]
- Row(user=0, item=2, prediction=0.39...)
+ Row(user=0, item=2, prediction=-0.13807615637779236)
>>> predictions[1]
- Row(user=1, item=0, prediction=3.19...)
+ Row(user=1, item=0, prediction=2.6258413791656494)
>>> predictions[2]
- Row(user=2, item=0, prediction=-1.15...)
+ Row(user=2, item=0, prediction=-1.5018409490585327)
.. versionadded:: 1.4.0
"""
diff --git a/python/pyspark/mllib/recommendation.py b/python/pyspark/mllib/recommendation.py
index b9442b0d16..93e47a797f 100644
--- a/python/pyspark/mllib/recommendation.py
+++ b/python/pyspark/mllib/recommendation.py
@@ -101,12 +101,12 @@ class MatrixFactorizationModel(JavaModelWrapper, JavaSaveable, JavaLoader):
>>> model = ALS.train(ratings, 1, nonnegative=True, seed=10)
>>> model.predict(2, 2)
- 3.8...
+ 3.73...
>>> df = sqlContext.createDataFrame([Rating(1, 1, 1.0), Rating(1, 2, 2.0), Rating(2, 1, 2.0)])
>>> model = ALS.train(df, 1, nonnegative=True, seed=10)
>>> model.predict(2, 2)
- 3.8...
+ 3.73...
>>> model = ALS.trainImplicit(ratings, 1, nonnegative=True, seed=10)
>>> model.predict(2, 2)
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 3baff81477..765a4511b6 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -436,7 +436,7 @@ class DataFrame(object):
"""Returns a sampled subset of this :class:`DataFrame`.
>>> df.sample(False, 0.5, 42).count()
- 1
+ 2
"""
assert fraction >= 0.0, "Negative fraction value: %s" % fraction
seed = seed if seed is not None else random.randint(0, sys.maxsize)
@@ -463,8 +463,8 @@ class DataFrame(object):
+---+-----+
|key|count|
+---+-----+
- | 0| 3|
- | 1| 8|
+ | 0| 5|
+ | 1| 9|
+---+-----+
"""