From dda6d9f4045fa2d1265abffa9d7dbdc967448417 Mon Sep 17 00:00:00 2001 From: Vinod K C Date: Sat, 9 May 2015 10:03:15 +0100 Subject: [SPARK-7438] [SPARK CORE] Fixed validation of relativeSD in countApproxDistinct Author: Vinod K C Closes #5974 from vinodkc/fix_countApproxDistinct_Validation and squashes the following commits: 3a3d59c [Vinod K C] Reverted removal of validation relativeSD<0.000017 799976e [Vinod K C] Removed testcase to assert IAE when relativeSD>3.7 8ddbfae [Vinod K C] Remove blank line b1b00a3 [Vinod K C] Removed relativeSD validation from python API,RDD.scala will do validation 122d378 [Vinod K C] Fixed validation of relativeSD in countApproxDistinct --- python/pyspark/rdd.py | 2 -- python/pyspark/tests.py | 1 - 2 files changed, 3 deletions(-) (limited to 'python') diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py index d254deb527..545c5ad20c 100644 --- a/python/pyspark/rdd.py +++ b/python/pyspark/rdd.py @@ -2239,8 +2239,6 @@ class RDD(object): """ if relativeSD < 0.000017: raise ValueError("relativeSD should be greater than 0.000017") - if relativeSD > 0.37: - raise ValueError("relativeSD should be smaller than 0.37") # the hash space in Java is 2^32 hashRDD = self.map(lambda x: portable_hash(x) & 0xFFFFFFFF) return hashRDD._to_java_object_rdd().countApproxDistinct(relativeSD) diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py index ea63a396da..09de4d159f 100644 --- a/python/pyspark/tests.py +++ b/python/pyspark/tests.py @@ -644,7 +644,6 @@ class RDDTests(ReusedPySparkTestCase): self.assertTrue(18 < rdd.map(lambda x: (x, -x)).countApproxDistinct() < 22) self.assertRaises(ValueError, lambda: rdd.countApproxDistinct(0.00000001)) - self.assertRaises(ValueError, lambda: rdd.countApproxDistinct(0.5)) def test_histogram(self): # empty -- cgit v1.2.3