diff options
author | Reynold Xin <rxin@databricks.com> | 2016-01-04 18:02:38 -0800 |
---|---|---|
committer | Michael Armbrust <michael@databricks.com> | 2016-01-04 18:02:38 -0800 |
commit | 77ab49b8575d2ebd678065fa70b0343d532ab9c2 (patch) | |
tree | f9c4a990499d1856494f787f8bfc095d68a69735 /python/pyspark/sql/readwriter.py | |
parent | fdfac22d08fc4fdc640843dd93a29e2ce4aee2ef (diff) | |
download | spark-77ab49b8575d2ebd678065fa70b0343d532ab9c2.tar.gz spark-77ab49b8575d2ebd678065fa70b0343d532ab9c2.tar.bz2 spark-77ab49b8575d2ebd678065fa70b0343d532ab9c2.zip |
[SPARK-12600][SQL] Remove deprecated methods in Spark SQL
Author: Reynold Xin <rxin@databricks.com>
Closes #10559 from rxin/remove-deprecated-sql.
Diffstat (limited to 'python/pyspark/sql/readwriter.py')
-rw-r--r-- | python/pyspark/sql/readwriter.py | 20 |
1 files changed, 14 insertions, 6 deletions
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py index a2771daabe..0b20022b14 100644 --- a/python/pyspark/sql/readwriter.py +++ b/python/pyspark/sql/readwriter.py @@ -130,11 +130,9 @@ class DataFrameReader(object): self.schema(schema) self.options(**options) if path is not None: - if type(path) == list: - return self._df( - self._jreader.load(self._sqlContext._sc._jvm.PythonUtils.toSeq(path))) - else: - return self._df(self._jreader.load(path)) + if type(path) != list: + path = [path] + return self._df(self._jreader.load(self._sqlContext._sc._jvm.PythonUtils.toSeq(path))) else: return self._df(self._jreader.load()) @@ -179,7 +177,17 @@ class DataFrameReader(object): elif type(path) == list: return self._df(self._jreader.json(self._sqlContext._sc._jvm.PythonUtils.toSeq(path))) elif isinstance(path, RDD): - return self._df(self._jreader.json(path._jrdd)) + def func(iterator): + for x in iterator: + if not isinstance(x, basestring): + x = unicode(x) + if isinstance(x, unicode): + x = x.encode("utf-8") + yield x + keyed = path.mapPartitions(func) + keyed._bypass_serializer = True + jrdd = keyed._jrdd.map(self._sqlContext._jvm.BytesToString()) + return self._df(self._jreader.json(jrdd)) else: raise TypeError("path can be only string or RDD") |