aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/sql
diff options
context:
space:
mode:
Diffstat (limited to 'python/pyspark/sql')
-rw-r--r--python/pyspark/sql/readwriter.py19
1 files changed, 12 insertions, 7 deletions
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index e8f0d7ec77..2e75f0c8a1 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -109,7 +109,7 @@ class DataFrameReader(object):
def load(self, path=None, format=None, schema=None, **options):
"""Loads data from a data source and returns it as a :class`DataFrame`.
- :param path: optional string for file-system backed data sources.
+ :param path: optional string or a list of string for file-system backed data sources.
:param format: optional string for format of the data source. Default to 'parquet'.
:param schema: optional :class:`StructType` for the input schema.
:param options: all other string options
@@ -118,6 +118,7 @@ class DataFrameReader(object):
... opt2=1, opt3='str')
>>> df.dtypes
[('name', 'string'), ('year', 'int'), ('month', 'int'), ('day', 'int')]
+
>>> df = sqlContext.read.format('json').load(['python/test_support/sql/people.json',
... 'python/test_support/sql/people1.json'])
>>> df.dtypes
@@ -130,10 +131,8 @@ class DataFrameReader(object):
self.options(**options)
if path is not None:
if type(path) == list:
- paths = path
- gateway = self._sqlContext._sc._gateway
- jpaths = utils.toJArray(gateway, gateway.jvm.java.lang.String, paths)
- return self._df(self._jreader.load(jpaths))
+ return self._df(
+ self._jreader.load(self._sqlContext._sc._jvm.PythonUtils.toSeq(path)))
else:
return self._df(self._jreader.load(path))
else:
@@ -175,6 +174,8 @@ class DataFrameReader(object):
self.schema(schema)
if isinstance(path, basestring):
return self._df(self._jreader.json(path))
+ elif type(path) == list:
+ return self._df(self._jreader.json(self._sqlContext._sc._jvm.PythonUtils.toSeq(path)))
elif isinstance(path, RDD):
return self._df(self._jreader.json(path._jrdd))
else:
@@ -205,16 +206,20 @@ class DataFrameReader(object):
@ignore_unicode_prefix
@since(1.6)
- def text(self, path):
+ def text(self, paths):
"""Loads a text file and returns a [[DataFrame]] with a single string column named "text".
Each line in the text file is a new row in the resulting DataFrame.
+ :param paths: string, or list of strings, for input path(s).
+
>>> df = sqlContext.read.text('python/test_support/sql/text-test.txt')
>>> df.collect()
[Row(value=u'hello'), Row(value=u'this')]
"""
- return self._df(self._jreader.text(path))
+ if isinstance(paths, basestring):
+ paths = [paths]
+ return self._df(self._jreader.text(self._sqlContext._sc._jvm.PythonUtils.toSeq(paths)))
@since(1.5)
def orc(self, path):