aboutsummaryrefslogtreecommitdiff
path: root/examples/src
diff options
context:
space:
mode:
authorjyotiska <jyotiska123@gmail.com>2014-10-01 13:52:50 -0700
committerMichael Armbrust <michael@databricks.com>2014-10-01 13:52:50 -0700
commit17333c7a3c26ca6d28e8f3ca097da37d6b655217 (patch)
tree6b116dcf9b5e50f3c37bf3c1d12815d4f6e925e9 /examples/src
parentb81ee0b46d63c2122b88941696654100fd736942 (diff)
downloadspark-17333c7a3c26ca6d28e8f3ca097da37d6b655217.tar.gz
spark-17333c7a3c26ca6d28e8f3ca097da37d6b655217.tar.bz2
spark-17333c7a3c26ca6d28e8f3ca097da37d6b655217.zip
Python SQL Example Code
SQL example code for Python, as shown on [SQL Programming Guide](https://spark.apache.org/docs/1.0.2/sql-programming-guide.html) Author: jyotiska <jyotiska123@gmail.com> Closes #2521 from jyotiska/sql_example and squashes the following commits: 1471dcb [jyotiska] added imports for sql b25e436 [jyotiska] pep 8 compliance 43fd10a [jyotiska] lines broken to maintain 80 char limit b4fdf4e [jyotiska] removed blank lines 83d5ab7 [jyotiska] added inferschema and applyschema to the demo 306667e [jyotiska] replaced blank line with end line c90502a [jyotiska] fixed new line 4939a70 [jyotiska] added new line at end for python style 0b46148 [jyotiska] fixed appname for python sql example 8f67b5b [jyotiska] added python sql example
Diffstat (limited to 'examples/src')
-rw-r--r--examples/src/main/python/sql.py73
1 files changed, 73 insertions, 0 deletions
diff --git a/examples/src/main/python/sql.py b/examples/src/main/python/sql.py
new file mode 100644
index 0000000000..eefa022f19
--- /dev/null
+++ b/examples/src/main/python/sql.py
@@ -0,0 +1,73 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+
+from pyspark import SparkContext
+from pyspark.sql import SQLContext
+from pyspark.sql import Row, StructField, StructType, StringType, IntegerType
+
+
+if __name__ == "__main__":
+ sc = SparkContext(appName="PythonSQL")
+ sqlContext = SQLContext(sc)
+
+ # RDD is created from a list of rows
+ some_rdd = sc.parallelize([Row(name="John", age=19),
+ Row(name="Smith", age=23),
+ Row(name="Sarah", age=18)])
+ # Infer schema from the first row, create a SchemaRDD and print the schema
+ some_schemardd = sqlContext.inferSchema(some_rdd)
+ some_schemardd.printSchema()
+
+ # Another RDD is created from a list of tuples
+ another_rdd = sc.parallelize([("John", 19), ("Smith", 23), ("Sarah", 18)])
+ # Schema with two fields - person_name and person_age
+ schema = StructType([StructField("person_name", StringType(), False),
+ StructField("person_age", IntegerType(), False)])
+ # Create a SchemaRDD by applying the schema to the RDD and print the schema
+ another_schemardd = sqlContext.applySchema(another_rdd, schema)
+ another_schemardd.printSchema()
+ # root
+ # |-- age: integer (nullable = true)
+ # |-- name: string (nullable = true)
+
+ # A JSON dataset is pointed to by path.
+ # The path can be either a single text file or a directory storing text files.
+ path = os.environ['SPARK_HOME'] + "examples/src/main/resources/people.json"
+ # Create a SchemaRDD from the file(s) pointed to by path
+ people = sqlContext.jsonFile(path)
+ # root
+ # |-- person_name: string (nullable = false)
+ # |-- person_age: integer (nullable = false)
+
+ # The inferred schema can be visualized using the printSchema() method.
+ people.printSchema()
+ # root
+ # |-- age: IntegerType
+ # |-- name: StringType
+
+ # Register this SchemaRDD as a table.
+ people.registerAsTable("people")
+
+ # SQL statements can be run by using the sql methods provided by sqlContext
+ teenagers = sqlContext.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19")
+
+ for each in teenagers.collect():
+ print each[0]
+
+ sc.stop()