aboutsummaryrefslogtreecommitdiff
path: root/examples/src
diff options
context:
space:
mode:
authorMichael Armbrust <michael@databricks.com>2014-08-03 12:28:29 -0700
committerMichael Armbrust <michael@databricks.com>2014-08-03 12:28:29 -0700
commit236dfac6769016e433b2f6517cda2d308dea74bc (patch)
tree101bf2d0e0a964901a2299378cf053a38b4095e5 /examples/src
parent2998e38a942351974da36cb619e863c6f0316e7a (diff)
downloadspark-236dfac6769016e433b2f6517cda2d308dea74bc.tar.gz
spark-236dfac6769016e433b2f6517cda2d308dea74bc.tar.bz2
spark-236dfac6769016e433b2f6517cda2d308dea74bc.zip
[SPARK-2784][SQL] Deprecate hql() method in favor of a config option, 'spark.sql.dialect'
Many users have reported being confused by the distinction between the `sql` and `hql` methods. Specifically, many users think that `sql(...)` cannot be used to read hive tables. In this PR I introduce a new configuration option `spark.sql.dialect` that picks which dialect with be used for parsing. For SQLContext this must be set to `sql`. In `HiveContext` it defaults to `hiveql` but can also be set to `sql`. The `hql` and `hiveql` methods continue to act the same but are now marked as deprecated. **This is a possibly breaking change for some users unless they set the dialect manually, though this is unlikely.** For example: `hiveContex.sql("SELECT 1")` will now throw a parsing exception by default. Author: Michael Armbrust <michael@databricks.com> Closes #1746 from marmbrus/sqlLanguageConf and squashes the following commits: ad375cc [Michael Armbrust] Merge remote-tracking branch 'apache/master' into sqlLanguageConf 20c43f8 [Michael Armbrust] override function instead of just setting the value 7e4ae93 [Michael Armbrust] Deprecate hql() method in favor of a config option, 'spark.sql.dialect'
Diffstat (limited to 'examples/src')
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala12
1 files changed, 6 insertions, 6 deletions
diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala b/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala
index 12530c8490..3423fac0ad 100644
--- a/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala
@@ -34,20 +34,20 @@ object HiveFromSpark {
val hiveContext = new HiveContext(sc)
import hiveContext._
- hql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
- hql("LOAD DATA LOCAL INPATH 'src/main/resources/kv1.txt' INTO TABLE src")
+ sql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
+ sql("LOAD DATA LOCAL INPATH 'src/main/resources/kv1.txt' INTO TABLE src")
// Queries are expressed in HiveQL
println("Result of 'SELECT *': ")
- hql("SELECT * FROM src").collect.foreach(println)
+ sql("SELECT * FROM src").collect.foreach(println)
// Aggregation queries are also supported.
- val count = hql("SELECT COUNT(*) FROM src").collect().head.getLong(0)
+ val count = sql("SELECT COUNT(*) FROM src").collect().head.getLong(0)
println(s"COUNT(*): $count")
// The results of SQL queries are themselves RDDs and support all normal RDD functions. The
// items in the RDD are of type Row, which allows you to access each column by ordinal.
- val rddFromSql = hql("SELECT key, value FROM src WHERE key < 10 ORDER BY key")
+ val rddFromSql = sql("SELECT key, value FROM src WHERE key < 10 ORDER BY key")
println("Result of RDD.map:")
val rddAsStrings = rddFromSql.map {
@@ -60,6 +60,6 @@ object HiveFromSpark {
// Queries can then join RDD data with data stored in Hive.
println("Result of SELECT *:")
- hql("SELECT * FROM records r JOIN src s ON r.key = s.key").collect().foreach(println)
+ sql("SELECT * FROM records r JOIN src s ON r.key = s.key").collect().foreach(println)
}
}