2 files changed, 5 insertions, 0 deletions
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 3f81550bbb..ac8ceff055 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -598,6 +598,8 @@ class RDD(object):
         # the key-space into bins such that the bins have roughly the same
         # number of (key, value) pairs falling into them
         rddSize = self.count()
+        if not rddSize:
+            return self  # empty RDD
         maxSampleSize = numPartitions * 20.0  # constant from Spark's RangePartitioner
         fraction = min(maxSampleSize / max(rddSize, 1), 1.0)
         samples = self.sample(False, fraction, 1).map(lambda (k, v): k).collect()
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index 5cea1b03ea..b4a9c59790 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -470,6 +470,9 @@ class TestRDDFunctions(PySparkTestCase):
         self.assertEquals(([1, "b"], [5]), rdd.histogram(1))
         self.assertRaises(TypeError, lambda: rdd.histogram(2))
 
+    def test_sort_on_empty_rdd(self):
+        self.assertEqual([], self.sc.parallelize(zip([], [])).sortByKey().collect())
+
     def test_sample(self):
         rdd = self.sc.parallelize(range(0, 100), 4)
         wo = rdd.sample(False, 0.1, 2).collect()