[SQL] SPARK-1366 Consistent sql function across different types of SQLContexts

Now users who want to use HiveQL should explicitly say `hiveql` or `hql`. Author: Michael Armbrust <michael@databricks.com> Closes #319 from marmbrus/standardizeSqlHql and squashes the following commits: de68d0e [Michael Armbrust] Fix sampling test. fbe4a54 [Michael Armbrust] Make `sql` always use spark sql parser, users of hive context can now use hql or hiveql to run queries using HiveQL instead.
author: Michael Armbrust <michael@databricks.com> 2014-04-04 21:15:33 -0700
committer: Reynold Xin <rxin@apache.org> 2014-04-04 21:15:33 -0700
commit: 8de038eb366ded2ac74f72517e40545dbbab8cdd (patch)
tree: fe8982fdf853011aa9c326f5bb11d3752131418a /sql/hive/src/main
parent: b50ddfde0342990979979e58348f54c10b500c90 (diff)
download: spark-8de038eb366ded2ac74f72517e40545dbbab8cdd.tar.gz
spark-8de038eb366ded2ac74f72517e40545dbbab8cdd.tar.bz2
spark-8de038eb366ded2ac74f72517e40545dbbab8cdd.zip
2 files changed, 16 insertions, 13 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
index ff8eaacded..f66a667c0a 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -67,14 +67,13 @@ class LocalHiveContext(sc: SparkContext) extends HiveContext(sc) {
 class HiveContext(sc: SparkContext) extends SQLContext(sc) {
   self =>
 
-  override def parseSql(sql: String): LogicalPlan = HiveQl.parseSql(sql)
-  override def executePlan(plan: LogicalPlan): this.QueryExecution =
+  override protected[sql] def executePlan(plan: LogicalPlan): this.QueryExecution =
     new this.QueryExecution { val logical = plan }
 
   /**
    * Executes a query expressed in HiveQL using Spark, returning the result as a SchemaRDD.
    */
-  def hql(hqlQuery: String): SchemaRDD = {
+  def hiveql(hqlQuery: String): SchemaRDD = {
     val result = new SchemaRDD(this, HiveQl.parseSql(hqlQuery))
     // We force query optimization to happen right away instead of letting it happen lazily like
     // when using the query DSL.  This is so DDL commands behave as expected.  This is only
@@ -83,6 +82,9 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
     result
   }
 
+  /** An alias for `hiveql`. */
+  def hql(hqlQuery: String): SchemaRDD = hiveql(hqlQuery)
+
   // Circular buffer to hold what hive prints to STDOUT and ERR.  Only printed when failures occur.
   @transient
   protected val outputBuffer =  new java.io.OutputStream {
@@ -120,7 +122,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
 
   /* A catalyst metadata catalog that points to the Hive Metastore. */
   @transient
-  override lazy val catalog = new HiveMetastoreCatalog(this) with OverrideCatalog {
+  override protected[sql] lazy val catalog = new HiveMetastoreCatalog(this) with OverrideCatalog {
     override def lookupRelation(
       databaseName: Option[String],
       tableName: String,
@@ -132,7 +134,8 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
 
   /* An analyzer that uses the Hive metastore. */
   @transient
-  override lazy val analyzer = new Analyzer(catalog, HiveFunctionRegistry, caseSensitive = false)
+  override protected[sql] lazy val analyzer =
+    new Analyzer(catalog, HiveFunctionRegistry, caseSensitive = false)
 
   /**
    * Runs the specified SQL query using Hive.
@@ -214,14 +217,14 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
   }
 
   @transient
-  override val planner = hivePlanner
+  override protected[sql] val planner = hivePlanner
 
   @transient
   protected lazy val emptyResult =
     sparkContext.parallelize(Seq(new GenericRow(Array[Any]()): Row), 1)
 
   /** Extends QueryExecution with hive specific features. */
-  abstract class QueryExecution extends super.QueryExecution {
+  protected[sql] abstract class QueryExecution extends super.QueryExecution {
     // TODO: Create mixin for the analyzer instead of overriding things here.
     override lazy val optimizedPlan =
       optimizer(catalog.PreInsertionCasts(catalog.CreateTables(analyzed)))
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala
index 0a6bea0162..2fea970295 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala
@@ -110,10 +110,10 @@ class TestHiveContext(sc: SparkContext) extends LocalHiveContext(sc) {
 
   val describedTable = "DESCRIBE (\\w+)".r
 
-  class SqlQueryExecution(sql: String) extends this.QueryExecution {
-    lazy val logical = HiveQl.parseSql(sql)
-    def hiveExec() = runSqlHive(sql)
-    override def toString = sql + "\n" + super.toString
+  protected[hive] class HiveQLQueryExecution(hql: String) extends this.QueryExecution {
+    lazy val logical = HiveQl.parseSql(hql)
+    def hiveExec() = runSqlHive(hql)
+    override def toString = hql + "\n" + super.toString
   }
 
   /**
@@ -140,8 +140,8 @@ class TestHiveContext(sc: SparkContext) extends LocalHiveContext(sc) {
 
   case class TestTable(name: String, commands: (()=>Unit)*)
 
-  implicit class SqlCmd(sql: String) {
-    def cmd = () => new SqlQueryExecution(sql).stringResult(): Unit
+  protected[hive] implicit class SqlCmd(sql: String) {
+    def cmd = () => new HiveQLQueryExecution(sql).stringResult(): Unit
   }
 
   /**
author	Michael Armbrust <michael@databricks.com>	2014-04-04 21:15:33 -0700
committer	Reynold Xin <rxin@apache.org>	2014-04-04 21:15:33 -0700
commit	8de038eb366ded2ac74f72517e40545dbbab8cdd (patch)
tree	fe8982fdf853011aa9c326f5bb11d3752131418a /sql/hive/src/main
parent	b50ddfde0342990979979e58348f54c10b500c90 (diff)
download	spark-8de038eb366ded2ac74f72517e40545dbbab8cdd.tar.gz spark-8de038eb366ded2ac74f72517e40545dbbab8cdd.tar.bz2 spark-8de038eb366ded2ac74f72517e40545dbbab8cdd.zip