[SPARK-15084][PYTHON][SQL] Use builder pattern to create SparkSession in PySpark.

## What changes were proposed in this pull request? This is a python port of corresponding Scala builder pattern code. `sql.py` is modified as a target example case. ## How was this patch tested? Manual. Author: Dongjoon Hyun <dongjoon@apache.org> Closes #12860 from dongjoon-hyun/SPARK-15084.
author: Dongjoon Hyun <dongjoon@apache.org> 2016-05-03 18:05:40 -0700
committer: Andrew Or <andrew@databricks.com> 2016-05-03 18:05:40 -0700
commit: 0903a185c7ebc57c75301a27d215b08efd347f99 (patch)
tree: 6ded1ba4dce606712f5d946c5c23bd50b40cfa9a /examples
parent: c1839c9911e37488230a68dec9041eb5958b6f1c (diff)
download: spark-0903a185c7ebc57c75301a27d215b08efd347f99.tar.gz
spark-0903a185c7ebc57c75301a27d215b08efd347f99.tar.bz2
spark-0903a185c7ebc57c75301a27d215b08efd347f99.zip
1 files changed, 15 insertions, 20 deletions
diff --git a/examples/src/main/python/sql.py b/examples/src/main/python/sql.py
index 2c18875932..ea6a22dbfe 100644
--- a/examples/src/main/python/sql.py
+++ b/examples/src/main/python/sql.py
@@ -20,33 +20,28 @@ from __future__ import print_function
 import os
 import sys
 
-from pyspark import SparkContext
-from pyspark.sql import SQLContext
+from pyspark.sql import SparkSession
 from pyspark.sql.types import Row, StructField, StructType, StringType, IntegerType
 
 
 if __name__ == "__main__":
-    sc = SparkContext(appName="PythonSQL")
-    sqlContext = SQLContext(sc)
+    spark = SparkSession.builder.appName("PythonSQL").getOrCreate()
 
-    # RDD is created from a list of rows
-    some_rdd = sc.parallelize([Row(name="John", age=19),
-                              Row(name="Smith", age=23),
-                              Row(name="Sarah", age=18)])
-    # Infer schema from the first row, create a DataFrame and print the schema
-    some_df = sqlContext.createDataFrame(some_rdd)
+    # A list of Rows. Infer schema from the first row, create a DataFrame and print the schema
+    rows = [Row(name="John", age=19), Row(name="Smith", age=23), Row(name="Sarah", age=18)]
+    some_df = spark.createDataFrame(rows)
     some_df.printSchema()
 
-    # Another RDD is created from a list of tuples
-    another_rdd = sc.parallelize([("John", 19), ("Smith", 23), ("Sarah", 18)])
+    # A list of tuples
+    tuples = [("John", 19), ("Smith", 23), ("Sarah", 18)]
     # Schema with two fields - person_name and person_age
     schema = StructType([StructField("person_name", StringType(), False),
                         StructField("person_age", IntegerType(), False)])
     # Create a DataFrame by applying the schema to the RDD and print the schema
-    another_df = sqlContext.createDataFrame(another_rdd, schema)
+    another_df = spark.createDataFrame(tuples, schema)
     another_df.printSchema()
     # root
-    #  |-- age: integer (nullable = true)
+    #  |-- age: long (nullable = true)
     #  |-- name: string (nullable = true)
 
     # A JSON dataset is pointed to by path.
@@ -57,7 +52,7 @@ if __name__ == "__main__":
     else:
         path = sys.argv[1]
     # Create a DataFrame from the file(s) pointed to by path
-    people = sqlContext.jsonFile(path)
+    people = spark.read.json(path)
     # root
     #  |-- person_name: string (nullable = false)
     #  |-- person_age: integer (nullable = false)
@@ -65,16 +60,16 @@ if __name__ == "__main__":
     # The inferred schema can be visualized using the printSchema() method.
     people.printSchema()
     # root
-    #  |-- age: IntegerType
-    #  |-- name: StringType
+    #  |-- age: long (nullable = true)
+    #  |-- name: string (nullable = true)
 
     # Register this DataFrame as a table.
-    people.registerAsTable("people")
+    people.registerTempTable("people")
 
     # SQL statements can be run by using the sql methods provided by sqlContext
-    teenagers = sqlContext.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19")
+    teenagers = spark.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19")
 
     for each in teenagers.collect():
         print(each[0])
 
-    sc.stop()
+    spark.stop()
author	Dongjoon Hyun <dongjoon@apache.org>	2016-05-03 18:05:40 -0700
committer	Andrew Or <andrew@databricks.com>	2016-05-03 18:05:40 -0700
commit	0903a185c7ebc57c75301a27d215b08efd347f99 (patch)
tree	6ded1ba4dce606712f5d946c5c23bd50b40cfa9a /examples
parent	c1839c9911e37488230a68dec9041eb5958b6f1c (diff)
download	spark-0903a185c7ebc57c75301a27d215b08efd347f99.tar.gz spark-0903a185c7ebc57c75301a27d215b08efd347f99.tar.bz2 spark-0903a185c7ebc57c75301a27d215b08efd347f99.zip