diff options
author | Michael Armbrust <michael@databricks.com> | 2014-04-15 20:40:40 -0700 |
---|---|---|
committer | Reynold Xin <rxin@apache.org> | 2014-04-15 20:40:40 -0700 |
commit | 273c2fd08deb49e970ec471c857dcf0b2953f922 (patch) | |
tree | 770a343087f84426800fa5278e4853d216859f47 /python/pyspark/sql.py | |
parent | 63ca581d9c84176549b1ea0a1d8d7c0cca982acc (diff) | |
download | spark-273c2fd08deb49e970ec471c857dcf0b2953f922.tar.gz spark-273c2fd08deb49e970ec471c857dcf0b2953f922.tar.bz2 spark-273c2fd08deb49e970ec471c857dcf0b2953f922.zip |
[SQL] SPARK-1424 Generalize insertIntoTable functions on SchemaRDDs
This makes it possible to create tables and insert into them using the DSL and SQL for the scala and java apis.
Author: Michael Armbrust <michael@databricks.com>
Closes #354 from marmbrus/insertIntoTable and squashes the following commits:
6c6f227 [Michael Armbrust] Create random temporary files in python parquet unit tests.
f5e6d5c [Michael Armbrust] Merge remote-tracking branch 'origin/master' into insertIntoTable
765c506 [Michael Armbrust] Add to JavaAPI.
77b512c [Michael Armbrust] typos.
5c3ef95 [Michael Armbrust] use names for boolean args.
882afdf [Michael Armbrust] Change createTableAs to saveAsTable. Clean up api annotations.
d07d94b [Michael Armbrust] Add tests, support for creating parquet files and hive tables.
fa3fe81 [Michael Armbrust] Make insertInto available on JavaSchemaRDD as well. Add createTableAs function.
Diffstat (limited to 'python/pyspark/sql.py')
-rw-r--r-- | python/pyspark/sql.py | 14 |
1 files changed, 10 insertions, 4 deletions
diff --git a/python/pyspark/sql.py b/python/pyspark/sql.py index 67e6eee3f4..27753d5ba5 100644 --- a/python/pyspark/sql.py +++ b/python/pyspark/sql.py @@ -106,9 +106,12 @@ class SQLContext: """ Loads a Parquet file, returning the result as a L{SchemaRDD}. + >>> import tempfile, shutil + >>> parquetFile = tempfile.mkdtemp() + >>> shutil.rmtree(parquetFile) >>> srdd = sqlCtx.inferSchema(rdd) - >>> srdd.saveAsParquetFile("/tmp/tmp.parquet") - >>> srdd2 = sqlCtx.parquetFile("/tmp/tmp.parquet") + >>> srdd.saveAsParquetFile(parquetFile) + >>> srdd2 = sqlCtx.parquetFile(parquetFile) >>> srdd.collect() == srdd2.collect() True """ @@ -278,9 +281,12 @@ class SchemaRDD(RDD): that are written out using this method can be read back in as a SchemaRDD using the L{SQLContext.parquetFile} method. + >>> import tempfile, shutil + >>> parquetFile = tempfile.mkdtemp() + >>> shutil.rmtree(parquetFile) >>> srdd = sqlCtx.inferSchema(rdd) - >>> srdd.saveAsParquetFile("/tmp/test.parquet") - >>> srdd2 = sqlCtx.parquetFile("/tmp/test.parquet") + >>> srdd.saveAsParquetFile(parquetFile) + >>> srdd2 = sqlCtx.parquetFile(parquetFile) >>> srdd2.collect() == srdd.collect() True """ |