aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark
diff options
context:
space:
mode:
Diffstat (limited to 'python/pyspark')
-rw-r--r--python/pyspark/sql.py29
1 files changed, 29 insertions, 0 deletions
diff --git a/python/pyspark/sql.py b/python/pyspark/sql.py
index 1a62031db5..6789d7002b 100644
--- a/python/pyspark/sql.py
+++ b/python/pyspark/sql.py
@@ -360,6 +360,35 @@ class SchemaRDD(RDD):
else:
return None
+ def coalesce(self, numPartitions, shuffle=False):
+ rdd = self._jschema_rdd.coalesce(numPartitions, shuffle)
+ return SchemaRDD(rdd, self.sql_ctx)
+
+ def distinct(self):
+ rdd = self._jschema_rdd.distinct()
+ return SchemaRDD(rdd, self.sql_ctx)
+
+ def intersection(self, other):
+ if (other.__class__ is SchemaRDD):
+ rdd = self._jschema_rdd.intersection(other._jschema_rdd)
+ return SchemaRDD(rdd, self.sql_ctx)
+ else:
+ raise ValueError("Can only intersect with another SchemaRDD")
+
+ def repartition(self, numPartitions):
+ rdd = self._jschema_rdd.repartition(numPartitions)
+ return SchemaRDD(rdd, self.sql_ctx)
+
+ def subtract(self, other, numPartitions=None):
+ if (other.__class__ is SchemaRDD):
+ if numPartitions is None:
+ rdd = self._jschema_rdd.subtract(other._jschema_rdd)
+ else:
+ rdd = self._jschema_rdd.subtract(other._jschema_rdd, numPartitions)
+ return SchemaRDD(rdd, self.sql_ctx)
+ else:
+ raise ValueError("Can only subtract another SchemaRDD")
+
def _test():
import doctest
from pyspark.context import SparkContext