diff options
author | Bryan Cutler <cutlerb@gmail.com> | 2016-10-07 00:27:55 -0700 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2016-10-07 00:27:55 -0700 |
commit | bcaa799cb01289f73e9f48526e94653a07628983 (patch) | |
tree | e16db6394580e12540b147536546c2590398e472 | |
parent | 3713bb199142c5e06e2e527c99650f02f41f47b1 (diff) | |
download | spark-bcaa799cb01289f73e9f48526e94653a07628983.tar.gz spark-bcaa799cb01289f73e9f48526e94653a07628983.tar.bz2 spark-bcaa799cb01289f73e9f48526e94653a07628983.zip |
[SPARK-17805][PYSPARK] Fix in sqlContext.read.text when pass in list of paths
## What changes were proposed in this pull request?
If given a list of paths, `pyspark.sql.readwriter.text` will attempt to use an undefined variable `paths`. This change checks if the param `paths` is a basestring and then converts it to a list, so that the same variable `paths` can be used for both cases
## How was this patch tested?
Added unit test for reading list of files
Author: Bryan Cutler <cutlerb@gmail.com>
Closes #15379 from BryanCutler/sql-readtext-paths-SPARK-17805.
-rw-r--r-- | python/pyspark/sql/readwriter.py | 4 | ||||
-rw-r--r-- | python/pyspark/sql/tests.py | 6 |
2 files changed, 8 insertions, 2 deletions
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py index 3ad6f80de9..91c2b17049 100644 --- a/python/pyspark/sql/readwriter.py +++ b/python/pyspark/sql/readwriter.py @@ -289,8 +289,8 @@ class DataFrameReader(OptionUtils): [Row(value=u'hello'), Row(value=u'this')] """ if isinstance(paths, basestring): - path = [paths] - return self._df(self._jreader.text(self._spark._sc._jvm.PythonUtils.toSeq(path))) + paths = [paths] + return self._df(self._jreader.text(self._spark._sc._jvm.PythonUtils.toSeq(paths))) @since(2.0) def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=None, diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index c2171c277c..a9e455565a 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -1702,6 +1702,12 @@ class SQLTests(ReusedPySparkTestCase): "does_not_exist", lambda: spark.catalog.uncacheTable("does_not_exist")) + def test_read_text_file_list(self): + df = self.spark.read.text(['python/test_support/sql/text-test.txt', + 'python/test_support/sql/text-test.txt']) + count = df.count() + self.assertEquals(count, 4) + class HiveSparkSubmitTests(SparkSubmitTests): |