aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorVinod K C <vinod.kc@huawei.com>2015-05-09 10:03:15 +0100
committerSean Owen <sowen@cloudera.com>2015-05-09 10:03:15 +0100
commitdda6d9f4045fa2d1265abffa9d7dbdc967448417 (patch)
treecff2d6a2034b32f4dc71ba2f2927fb90a1cd1dda /python
parent29926238418223b0888d418d163feebf0217b35e (diff)
downloadspark-dda6d9f4045fa2d1265abffa9d7dbdc967448417.tar.gz
spark-dda6d9f4045fa2d1265abffa9d7dbdc967448417.tar.bz2
spark-dda6d9f4045fa2d1265abffa9d7dbdc967448417.zip
[SPARK-7438] [SPARK CORE] Fixed validation of relativeSD in countApproxDistinct
Author: Vinod K C <vinod.kc@huawei.com> Closes #5974 from vinodkc/fix_countApproxDistinct_Validation and squashes the following commits: 3a3d59c [Vinod K C] Reverted removal of validation relativeSD<0.000017 799976e [Vinod K C] Removed testcase to assert IAE when relativeSD>3.7 8ddbfae [Vinod K C] Remove blank line b1b00a3 [Vinod K C] Removed relativeSD validation from python API,RDD.scala will do validation 122d378 [Vinod K C] Fixed validation of relativeSD in countApproxDistinct
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/rdd.py2
-rw-r--r--python/pyspark/tests.py1
2 files changed, 0 insertions, 3 deletions
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index d254deb527..545c5ad20c 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -2239,8 +2239,6 @@ class RDD(object):
"""
if relativeSD < 0.000017:
raise ValueError("relativeSD should be greater than 0.000017")
- if relativeSD > 0.37:
- raise ValueError("relativeSD should be smaller than 0.37")
# the hash space in Java is 2^32
hashRDD = self.map(lambda x: portable_hash(x) & 0xFFFFFFFF)
return hashRDD._to_java_object_rdd().countApproxDistinct(relativeSD)
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index ea63a396da..09de4d159f 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -644,7 +644,6 @@ class RDDTests(ReusedPySparkTestCase):
self.assertTrue(18 < rdd.map(lambda x: (x, -x)).countApproxDistinct() < 22)
self.assertRaises(ValueError, lambda: rdd.countApproxDistinct(0.00000001))
- self.assertRaises(ValueError, lambda: rdd.countApproxDistinct(0.5))
def test_histogram(self):
# empty