aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/sql/readwriter.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/pyspark/sql/readwriter.py')
-rw-r--r--python/pyspark/sql/readwriter.py27
1 files changed, 25 insertions, 2 deletions
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index 93832d4c71..97bd90c4db 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -23,6 +23,7 @@ if sys.version >= '3':
from py4j.java_gateway import JavaClass
from pyspark import RDD, since
+from pyspark.rdd import ignore_unicode_prefix
from pyspark.sql.column import _to_seq
from pyspark.sql.types import *
@@ -193,10 +194,22 @@ class DataFrameReader(object):
"""
return self._df(self._jreader.parquet(_to_seq(self._sqlContext._sc, paths)))
+ @ignore_unicode_prefix
+ @since(1.6)
+ def text(self, path):
+ """Loads a text file and returns a [[DataFrame]] with a single string column named "text".
+
+ Each line in the text file is a new row in the resulting DataFrame.
+
+ >>> df = sqlContext.read.text('python/test_support/sql/text-test.txt')
+ >>> df.collect()
+ [Row(text=u'hello'), Row(text=u'this')]
+ """
+ return self._df(self._jreader.text(path))
+
@since(1.5)
def orc(self, path):
- """
- Loads an ORC file, returning the result as a :class:`DataFrame`.
+ """Loads an ORC file, returning the result as a :class:`DataFrame`.
::Note: Currently ORC support is only available together with
:class:`HiveContext`.
@@ -432,6 +445,16 @@ class DataFrameWriter(object):
self.partitionBy(partitionBy)
self._jwrite.parquet(path)
+ @since(1.6)
+ def text(self, path):
+ """Saves the content of the DataFrame in a text file at the specified path.
+
+ The DataFrame must have only one column that is of string type.
+ Each row becomes a new line in the output file.
+ """
+ self._jwrite.text(path)
+
+ @since(1.5)
def orc(self, path, mode=None, partitionBy=None):
"""Saves the content of the :class:`DataFrame` in ORC format at the specified path.