aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorKoert Kuipers <koert@tresata.com>2015-10-17 14:56:24 -0700
committerDavies Liu <davies.liu@gmail.com>2015-10-17 14:56:24 -0700
commit57f83e36d63bbd79663c49a6c1e8f6c3c8fe4789 (patch)
treebb148c00e30b44acca2bcc913cadb81dab76b3a3 /python
parent254937420678a299f06b6f4e2696c623da56cf3a (diff)
downloadspark-57f83e36d63bbd79663c49a6c1e8f6c3c8fe4789.tar.gz
spark-57f83e36d63bbd79663c49a6c1e8f6c3c8fe4789.tar.bz2
spark-57f83e36d63bbd79663c49a6c1e8f6c3c8fe4789.zip
[SPARK-10185] [SQL] Feat sql comma separated paths
Make sure comma-separated paths get processed correcly in ResolvedDataSource for a HadoopFsRelationProvider Author: Koert Kuipers <koert@tresata.com> Closes #8416 from koertkuipers/feat-sql-comma-separated-paths.
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/sql/readwriter.py14
-rw-r--r--python/test_support/sql/people1.json2
2 files changed, 15 insertions, 1 deletions
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index f43d8bf646..93832d4c71 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -116,6 +116,10 @@ class DataFrameReader(object):
... opt2=1, opt3='str')
>>> df.dtypes
[('name', 'string'), ('year', 'int'), ('month', 'int'), ('day', 'int')]
+ >>> df = sqlContext.read.format('json').load(['python/test_support/sql/people.json',
+ ... 'python/test_support/sql/people1.json'])
+ >>> df.dtypes
+ [('age', 'bigint'), ('aka', 'string'), ('name', 'string')]
"""
if format is not None:
self.format(format)
@@ -123,7 +127,15 @@ class DataFrameReader(object):
self.schema(schema)
self.options(**options)
if path is not None:
- return self._df(self._jreader.load(path))
+ if type(path) == list:
+ paths = path
+ gateway = self._sqlContext._sc._gateway
+ jpaths = gateway.new_array(gateway.jvm.java.lang.String, len(paths))
+ for i in range(0, len(paths)):
+ jpaths[i] = paths[i]
+ return self._df(self._jreader.load(jpaths))
+ else:
+ return self._df(self._jreader.load(path))
else:
return self._df(self._jreader.load())
diff --git a/python/test_support/sql/people1.json b/python/test_support/sql/people1.json
new file mode 100644
index 0000000000..6d217da77d
--- /dev/null
+++ b/python/test_support/sql/people1.json
@@ -0,0 +1,2 @@
+{"name":"Jonathan", "aka": "John"}
+