aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark
diff options
context:
space:
mode:
authorMichael Armbrust <michael@databricks.com>2014-04-15 20:40:40 -0700
committerReynold Xin <rxin@apache.org>2014-04-15 20:40:40 -0700
commit273c2fd08deb49e970ec471c857dcf0b2953f922 (patch)
tree770a343087f84426800fa5278e4853d216859f47 /python/pyspark
parent63ca581d9c84176549b1ea0a1d8d7c0cca982acc (diff)
downloadspark-273c2fd08deb49e970ec471c857dcf0b2953f922.tar.gz
spark-273c2fd08deb49e970ec471c857dcf0b2953f922.tar.bz2
spark-273c2fd08deb49e970ec471c857dcf0b2953f922.zip
[SQL] SPARK-1424 Generalize insertIntoTable functions on SchemaRDDs
This makes it possible to create tables and insert into them using the DSL and SQL for the scala and java apis. Author: Michael Armbrust <michael@databricks.com> Closes #354 from marmbrus/insertIntoTable and squashes the following commits: 6c6f227 [Michael Armbrust] Create random temporary files in python parquet unit tests. f5e6d5c [Michael Armbrust] Merge remote-tracking branch 'origin/master' into insertIntoTable 765c506 [Michael Armbrust] Add to JavaAPI. 77b512c [Michael Armbrust] typos. 5c3ef95 [Michael Armbrust] use names for boolean args. 882afdf [Michael Armbrust] Change createTableAs to saveAsTable. Clean up api annotations. d07d94b [Michael Armbrust] Add tests, support for creating parquet files and hive tables. fa3fe81 [Michael Armbrust] Make insertInto available on JavaSchemaRDD as well. Add createTableAs function.
Diffstat (limited to 'python/pyspark')
-rw-r--r--python/pyspark/sql.py14
1 files changed, 10 insertions, 4 deletions
diff --git a/python/pyspark/sql.py b/python/pyspark/sql.py
index 67e6eee3f4..27753d5ba5 100644
--- a/python/pyspark/sql.py
+++ b/python/pyspark/sql.py
@@ -106,9 +106,12 @@ class SQLContext:
"""
Loads a Parquet file, returning the result as a L{SchemaRDD}.
+ >>> import tempfile, shutil
+ >>> parquetFile = tempfile.mkdtemp()
+ >>> shutil.rmtree(parquetFile)
>>> srdd = sqlCtx.inferSchema(rdd)
- >>> srdd.saveAsParquetFile("/tmp/tmp.parquet")
- >>> srdd2 = sqlCtx.parquetFile("/tmp/tmp.parquet")
+ >>> srdd.saveAsParquetFile(parquetFile)
+ >>> srdd2 = sqlCtx.parquetFile(parquetFile)
>>> srdd.collect() == srdd2.collect()
True
"""
@@ -278,9 +281,12 @@ class SchemaRDD(RDD):
that are written out using this method can be read back in as a SchemaRDD using the
L{SQLContext.parquetFile} method.
+ >>> import tempfile, shutil
+ >>> parquetFile = tempfile.mkdtemp()
+ >>> shutil.rmtree(parquetFile)
>>> srdd = sqlCtx.inferSchema(rdd)
- >>> srdd.saveAsParquetFile("/tmp/test.parquet")
- >>> srdd2 = sqlCtx.parquetFile("/tmp/test.parquet")
+ >>> srdd.saveAsParquetFile(parquetFile)
+ >>> srdd2 = sqlCtx.parquetFile(parquetFile)
>>> srdd2.collect() == srdd.collect()
True
"""