aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--dev/audit-release/sbt_app_hive/src/main/scala/HiveApp.scala4
-rw-r--r--docs/sql-programming-guide.md6
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala4
-rw-r--r--python/pyspark/sql.py6
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala7
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala15
6 files changed, 30 insertions, 12 deletions
diff --git a/dev/audit-release/sbt_app_hive/src/main/scala/HiveApp.scala b/dev/audit-release/sbt_app_hive/src/main/scala/HiveApp.scala
index 7257d17d10..a21410f3b9 100644
--- a/dev/audit-release/sbt_app_hive/src/main/scala/HiveApp.scala
+++ b/dev/audit-release/sbt_app_hive/src/main/scala/HiveApp.scala
@@ -22,7 +22,7 @@ import scala.collection.mutable.{ListBuffer, Queue}
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.hive.LocalHiveContext
+import org.apache.spark.sql.hive.HiveContext
case class Person(name: String, age: Int)
@@ -34,7 +34,7 @@ object SparkSqlExample {
case None => new SparkConf().setAppName("Simple Sql App")
}
val sc = new SparkContext(conf)
- val hiveContext = new LocalHiveContext(sc)
+ val hiveContext = new HiveContext(sc)
import hiveContext._
hql("DROP TABLE IF EXISTS src")
diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index 156e0aebde..a047d32b6e 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -487,9 +487,9 @@ Configuration of Hive is done by placing your `hive-site.xml` file in `conf/`.
When working with Hive one must construct a `HiveContext`, which inherits from `SQLContext`, and
adds support for finding tables in in the MetaStore and writing queries using HiveQL. Users who do
-not have an existing Hive deployment can also experiment with the `LocalHiveContext`,
-which is similar to `HiveContext`, but creates a local copy of the `metastore` and `warehouse`
-automatically.
+not have an existing Hive deployment can still create a HiveContext. When not configured by the
+hive-site.xml, the context automatically creates `metastore_db` and `warehouse` in the current
+directory.
{% highlight scala %}
// sc is an existing SparkContext.
diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala b/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala
index 66a23fac39..dc5290fb4f 100644
--- a/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala
@@ -19,7 +19,7 @@ package org.apache.spark.examples.sql.hive
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql._
-import org.apache.spark.sql.hive.LocalHiveContext
+import org.apache.spark.sql.hive.HiveContext
object HiveFromSpark {
case class Record(key: Int, value: String)
@@ -31,7 +31,7 @@ object HiveFromSpark {
// A local hive context creates an instance of the Hive Metastore in process, storing the
// the warehouse data in the current directory. This location can be overridden by
// specifying a second parameter to the constructor.
- val hiveContext = new LocalHiveContext(sc)
+ val hiveContext = new HiveContext(sc)
import hiveContext._
hql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
diff --git a/python/pyspark/sql.py b/python/pyspark/sql.py
index 13f0ed4e35..9388ead5ea 100644
--- a/python/pyspark/sql.py
+++ b/python/pyspark/sql.py
@@ -15,6 +15,8 @@
# limitations under the License.
#
+import warnings
+
from pyspark.rdd import RDD, PipelinedRDD
from pyspark.serializers import BatchedSerializer, PickleSerializer
@@ -813,6 +815,10 @@ class LocalHiveContext(HiveContext):
130091
"""
+ def __init__(self, sparkContext, sqlContext=None):
+ HiveContext.__init__(self, sparkContext, sqlContext)
+ warnings.warn("LocalHiveContext is deprecated. Use HiveContext instead.", DeprecationWarning)
+
def _get_hive_ctx(self):
return self._jvm.LocalHiveContext(self._jsc.sc())
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
index b413373345..27b444daba 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -42,9 +42,12 @@ import org.apache.spark.sql.execution.{Command => PhysicalCommand}
import org.apache.spark.sql.hive.execution.DescribeHiveTableCommand
/**
- * Starts up an instance of hive where metadata is stored locally. An in-process metadata data is
- * created with data stored in ./metadata. Warehouse data is stored in in ./warehouse.
+ * DEPRECATED: Use HiveContext instead.
*/
+@deprecated("""
+ Use HiveContext instead. It will still create a local metastore if one is not specified.
+ However, note that the default directory is ./metastore_db, not ./metastore
+ """, "1.1")
class LocalHiveContext(sc: SparkContext) extends HiveContext(sc) {
lazy val metastorePath = new File("metastore").getCanonicalPath
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala
index 9386008d02..c50e8c4b5c 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala
@@ -53,15 +53,24 @@ object TestHive
* hive metastore seems to lead to weird non-deterministic failures. Therefore, the execution of
* test cases that rely on TestHive must be serialized.
*/
-class TestHiveContext(sc: SparkContext) extends LocalHiveContext(sc) {
+class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
self =>
// By clearing the port we force Spark to pick a new one. This allows us to rerun tests
// without restarting the JVM.
System.clearProperty("spark.hostPort")
- override lazy val warehousePath = getTempFilePath("sparkHiveWarehouse").getCanonicalPath
- override lazy val metastorePath = getTempFilePath("sparkHiveMetastore").getCanonicalPath
+ lazy val warehousePath = getTempFilePath("sparkHiveWarehouse").getCanonicalPath
+ lazy val metastorePath = getTempFilePath("sparkHiveMetastore").getCanonicalPath
+
+ /** Sets up the system initially or after a RESET command */
+ protected def configure() {
+ set("javax.jdo.option.ConnectionURL",
+ s"jdbc:derby:;databaseName=$metastorePath;create=true")
+ set("hive.metastore.warehouse.dir", warehousePath)
+ }
+
+ configure() // Must be called before initializing the catalog below.
/** The location of the compiled hive distribution */
lazy val hiveHome = envVarToFile("HIVE_HOME")