[SPARK-6781] [SQL] use sqlContext in python shell

Use `sqlContext` in PySpark shell, make it consistent with SQL programming guide. `sqlCtx` is also kept for compatibility. Author: Davies Liu <davies@databricks.com> Closes #5425 from davies/sqlCtx and squashes the following commits: af67340 [Davies Liu] sqlCtx -> sqlContext 15a278f [Davies Liu] use sqlContext in python shell
author: Davies Liu <davies@databricks.com> 2015-04-08 13:31:45 -0700
committer: Reynold Xin <rxin@databricks.com> 2015-04-08 13:31:45 -0700
commit: 6ada4f6f52cf1d992c7ab0c32318790cf08b0a0d (patch)
tree: 495c9bb86bb98de40365538bebcf9144547d8cce /examples
parent: 66159c35010af35098dd1ec75475bb5d4d0fd6ca (diff)
download: spark-6ada4f6f52cf1d992c7ab0c32318790cf08b0a0d.tar.gz
spark-6ada4f6f52cf1d992c7ab0c32318790cf08b0a0d.tar.bz2
spark-6ada4f6f52cf1d992c7ab0c32318790cf08b0a0d.zip
3 files changed, 14 insertions, 14 deletions
diff --git a/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java b/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java
index dee794840a..8159ffbe2d 100644
--- a/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java
+++ b/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java
@@ -55,7 +55,7 @@ public class JavaSparkSQL {
   public static void main(String[] args) throws Exception {
     SparkConf sparkConf = new SparkConf().setAppName("JavaSparkSQL");
     JavaSparkContext ctx = new JavaSparkContext(sparkConf);
-    SQLContext sqlCtx = new SQLContext(ctx);
+    SQLContext sqlContext = new SQLContext(ctx);
 
     System.out.println("=== Data source: RDD ===");
     // Load a text file and convert each line to a Java Bean.
@@ -74,11 +74,11 @@ public class JavaSparkSQL {
       });
 
     // Apply a schema to an RDD of Java Beans and register it as a table.
-    DataFrame schemaPeople = sqlCtx.createDataFrame(people, Person.class);
+    DataFrame schemaPeople = sqlContext.createDataFrame(people, Person.class);
     schemaPeople.registerTempTable("people");
 
     // SQL can be run over RDDs that have been registered as tables.
-    DataFrame teenagers = sqlCtx.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19");
+    DataFrame teenagers = sqlContext.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19");
 
     // The results of SQL queries are DataFrames and support all the normal RDD operations.
     // The columns of a row in the result can be accessed by ordinal.
@@ -99,12 +99,12 @@ public class JavaSparkSQL {
     // Read in the parquet file created above.
     // Parquet files are self-describing so the schema is preserved.
     // The result of loading a parquet file is also a DataFrame.
-    DataFrame parquetFile = sqlCtx.parquetFile("people.parquet");
+    DataFrame parquetFile = sqlContext.parquetFile("people.parquet");
 
     //Parquet files can also be registered as tables and then used in SQL statements.
     parquetFile.registerTempTable("parquetFile");
     DataFrame teenagers2 =
-      sqlCtx.sql("SELECT name FROM parquetFile WHERE age >= 13 AND age <= 19");
+      sqlContext.sql("SELECT name FROM parquetFile WHERE age >= 13 AND age <= 19");
     teenagerNames = teenagers2.toJavaRDD().map(new Function<Row, String>() {
       @Override
       public String call(Row row) {
@@ -120,7 +120,7 @@ public class JavaSparkSQL {
     // The path can be either a single text file or a directory storing text files.
     String path = "examples/src/main/resources/people.json";
     // Create a DataFrame from the file(s) pointed by path
-    DataFrame peopleFromJsonFile = sqlCtx.jsonFile(path);
+    DataFrame peopleFromJsonFile = sqlContext.jsonFile(path);
 
     // Because the schema of a JSON dataset is automatically inferred, to write queries,
     // it is better to take a look at what is the schema.
@@ -133,8 +133,8 @@ public class JavaSparkSQL {
     // Register this DataFrame as a table.
     peopleFromJsonFile.registerTempTable("people");
 
-    // SQL statements can be run by using the sql methods provided by sqlCtx.
-    DataFrame teenagers3 = sqlCtx.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19");
+    // SQL statements can be run by using the sql methods provided by sqlContext.
+    DataFrame teenagers3 = sqlContext.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19");
 
     // The results of SQL queries are DataFrame and support all the normal RDD operations.
     // The columns of a row in the result can be accessed by ordinal.
@@ -151,7 +151,7 @@ public class JavaSparkSQL {
     List<String> jsonData = Arrays.asList(
           "{\"name\":\"Yin\",\"address\":{\"city\":\"Columbus\",\"state\":\"Ohio\"}}");
     JavaRDD<String> anotherPeopleRDD = ctx.parallelize(jsonData);
-    DataFrame peopleFromJsonRDD = sqlCtx.jsonRDD(anotherPeopleRDD.rdd());
+    DataFrame peopleFromJsonRDD = sqlContext.jsonRDD(anotherPeopleRDD.rdd());
 
     // Take a look at the schema of this new DataFrame.
     peopleFromJsonRDD.printSchema();
@@ -164,7 +164,7 @@ public class JavaSparkSQL {
 
     peopleFromJsonRDD.registerTempTable("people2");
 
-    DataFrame peopleWithCity = sqlCtx.sql("SELECT name, address.city FROM people2");
+    DataFrame peopleWithCity = sqlContext.sql("SELECT name, address.city FROM people2");
     List<String> nameAndCity = peopleWithCity.toJavaRDD().map(new Function<Row, String>() {
       @Override
       public String call(Row row) {
diff --git a/examples/src/main/python/ml/simple_text_classification_pipeline.py b/examples/src/main/python/ml/simple_text_classification_pipeline.py
index d281f4fa44..c73edb7fd6 100644
--- a/examples/src/main/python/ml/simple_text_classification_pipeline.py
+++ b/examples/src/main/python/ml/simple_text_classification_pipeline.py
@@ -33,7 +33,7 @@ pipeline in Python. Run with:
 
 if __name__ == "__main__":
     sc = SparkContext(appName="SimpleTextClassificationPipeline")
-    sqlCtx = SQLContext(sc)
+    sqlContext = SQLContext(sc)
 
     # Prepare training documents, which are labeled.
     LabeledDocument = Row("id", "text", "label")
diff --git a/examples/src/main/python/mllib/dataset_example.py b/examples/src/main/python/mllib/dataset_example.py
index b5a70db2b9..fcbf56cbf0 100644
--- a/examples/src/main/python/mllib/dataset_example.py
+++ b/examples/src/main/python/mllib/dataset_example.py
@@ -44,19 +44,19 @@ if __name__ == "__main__":
         print >> sys.stderr, "Usage: dataset_example.py <libsvm file>"
         exit(-1)
     sc = SparkContext(appName="DatasetExample")
-    sqlCtx = SQLContext(sc)
+    sqlContext = SQLContext(sc)
     if len(sys.argv) == 2:
         input = sys.argv[1]
     else:
         input = "data/mllib/sample_libsvm_data.txt"
     points = MLUtils.loadLibSVMFile(sc, input)
-    dataset0 = sqlCtx.inferSchema(points).setName("dataset0").cache()
+    dataset0 = sqlContext.inferSchema(points).setName("dataset0").cache()
     summarize(dataset0)
     tempdir = tempfile.NamedTemporaryFile(delete=False).name
     os.unlink(tempdir)
     print "Save dataset as a Parquet file to %s." % tempdir
     dataset0.saveAsParquetFile(tempdir)
     print "Load it back and summarize it again."
-    dataset1 = sqlCtx.parquetFile(tempdir).setName("dataset1").cache()
+    dataset1 = sqlContext.parquetFile(tempdir).setName("dataset1").cache()
     summarize(dataset1)
     shutil.rmtree(tempdir)
author	Davies Liu <davies@databricks.com>	2015-04-08 13:31:45 -0700
committer	Reynold Xin <rxin@databricks.com>	2015-04-08 13:31:45 -0700
commit	6ada4f6f52cf1d992c7ab0c32318790cf08b0a0d (patch)
tree	495c9bb86bb98de40365538bebcf9144547d8cce /examples
parent	66159c35010af35098dd1ec75475bb5d4d0fd6ca (diff)
download	spark-6ada4f6f52cf1d992c7ab0c32318790cf08b0a0d.tar.gz spark-6ada4f6f52cf1d992c7ab0c32318790cf08b0a0d.tar.bz2 spark-6ada4f6f52cf1d992c7ab0c32318790cf08b0a0d.zip