aboutsummaryrefslogtreecommitdiff
path: root/sql/hive
diff options
context:
space:
mode:
authorMichael Armbrust <michael@databricks.com>2015-02-17 10:21:17 -0800
committerMichael Armbrust <michael@databricks.com>2015-02-17 10:21:17 -0800
commitc74b07fa94a8da50437d952ae05cf6ac70fbb93e (patch)
tree63b906b18df550b9f8ccfe13dcff0c1d318cf546 /sql/hive
parentc76da36c2163276b5c34e59fbb139eeb34ed0faa (diff)
downloadspark-c74b07fa94a8da50437d952ae05cf6ac70fbb93e.tar.gz
spark-c74b07fa94a8da50437d952ae05cf6ac70fbb93e.tar.bz2
spark-c74b07fa94a8da50437d952ae05cf6ac70fbb93e.zip
[SPARK-5166][SPARK-5247][SPARK-5258][SQL] API Cleanup / Documentation
Author: Michael Armbrust <michael@databricks.com> Closes #4642 from marmbrus/docs and squashes the following commits: d291c34 [Michael Armbrust] python tests 9be66e3 [Michael Armbrust] comments d56afc2 [Michael Armbrust] fix style f004747 [Michael Armbrust] fix build c4a907b [Michael Armbrust] fix tests 42e2b73 [Michael Armbrust] [SQL] Documentation / API Clean-up.
Diffstat (limited to 'sql/hive')
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/DescribeHiveTableCommand.scala4
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala8
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/package.scala25
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/package.scala10
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/parquet/FakeParquetSerDe.scala56
-rw-r--r--sql/hive/v0.12.0/src/main/scala/org/apache/spark/sql/hive/Shim12.scala9
-rw-r--r--sql/hive/v0.13.1/src/main/scala/org/apache/spark/sql/hive/Shim13.scala9
7 files changed, 59 insertions, 62 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/DescribeHiveTableCommand.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/DescribeHiveTableCommand.scala
index bfacc51ef5..07b5a84fb6 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/DescribeHiveTableCommand.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/DescribeHiveTableCommand.scala
@@ -29,9 +29,9 @@ import org.apache.spark.sql.hive.HiveShim
import org.apache.spark.sql.SQLContext
/**
- * Implementation for "describe [extended] table".
- *
* :: DeveloperApi ::
+ *
+ * Implementation for "describe [extended] table".
*/
@DeveloperApi
case class DescribeHiveTableCommand(
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
index 0aa5f7f7b8..6afd8eea05 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
@@ -102,6 +102,10 @@ case class AddFile(path: String) extends RunnableCommand {
}
}
+/**
+ * :: DeveloperApi ::
+ */
+@DeveloperApi
case class CreateMetastoreDataSource(
tableName: String,
userSpecifiedSchema: Option[StructType],
@@ -141,6 +145,10 @@ case class CreateMetastoreDataSource(
}
}
+/**
+ * :: DeveloperApi ::
+ */
+@DeveloperApi
case class CreateMetastoreDataSourceAsSelect(
tableName: String,
provider: String,
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/package.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/package.scala
new file mode 100644
index 0000000000..4989c42e96
--- /dev/null
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/package.scala
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive
+
+/**
+ * Physical execution operators used for running queries against data stored in Hive. These
+ * are not intended for use by users, but are documents so that it is easier to understand
+ * the output of EXPLAIN queries.
+ */
+package object execution
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/package.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/package.scala
index a6c8ed4f7e..db074361ef 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/package.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/package.scala
@@ -17,4 +17,14 @@
package org.apache.spark.sql
+/**
+ * Support for running Spark SQL queries using functionality from Apache Hive (does not require an
+ * existing Hive installation). Supported Hive features include:
+ * - Using HiveQL to express queries.
+ * - Reading metadata from the Hive Metastore using HiveSerDes.
+ * - Hive UDFs, UDAs, UDTs
+ *
+ * Users that would like access to this functionality should create a
+ * [[hive.HiveContext HiveContext]] instead of a [[SQLContext]].
+ */
package object hive
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/parquet/FakeParquetSerDe.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/parquet/FakeParquetSerDe.scala
deleted file mode 100644
index 2a16c9d1a2..0000000000
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/parquet/FakeParquetSerDe.scala
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.hive.parquet
-
-import java.util.Properties
-
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category
-import org.apache.hadoop.hive.serde2.{SerDeStats, SerDe}
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector
-import org.apache.hadoop.io.Writable
-
-/**
- * A placeholder that allows Spark SQL users to create metastore tables that are stored as
- * parquet files. It is only intended to pass the checks that the serde is valid and exists
- * when a CREATE TABLE is run. The actual work of decoding will be done by ParquetTableScan
- * when "spark.sql.hive.convertMetastoreParquet" is set to true.
- */
-@deprecated("No code should depend on FakeParquetHiveSerDe as it is only intended as a " +
- "placeholder in the Hive MetaStore", "1.2.0")
-class FakeParquetSerDe extends SerDe {
- override def getObjectInspector: ObjectInspector = new ObjectInspector {
- override def getCategory: Category = Category.PRIMITIVE
-
- override def getTypeName: String = "string"
- }
-
- override def deserialize(p1: Writable): AnyRef = throwError
-
- override def initialize(p1: Configuration, p2: Properties): Unit = {}
-
- override def getSerializedClass: Class[_ <: Writable] = throwError
-
- override def getSerDeStats: SerDeStats = throwError
-
- override def serialize(p1: scala.Any, p2: ObjectInspector): Writable = throwError
-
- private def throwError =
- sys.error(
- "spark.sql.hive.convertMetastoreParquet must be set to true to use FakeParquetSerDe")
-}
diff --git a/sql/hive/v0.12.0/src/main/scala/org/apache/spark/sql/hive/Shim12.scala b/sql/hive/v0.12.0/src/main/scala/org/apache/spark/sql/hive/Shim12.scala
index 8534c7d706..30646ddbc2 100644
--- a/sql/hive/v0.12.0/src/main/scala/org/apache/spark/sql/hive/Shim12.scala
+++ b/sql/hive/v0.12.0/src/main/scala/org/apache/spark/sql/hive/Shim12.scala
@@ -43,7 +43,9 @@ import org.apache.hadoop.mapred.InputFormat
import org.apache.spark.sql.types.{Decimal, DecimalType}
-case class HiveFunctionWrapper(functionClassName: String) extends java.io.Serializable {
+private[hive] case class HiveFunctionWrapper(functionClassName: String)
+ extends java.io.Serializable {
+
// for Serialization
def this() = this(null)
@@ -249,6 +251,9 @@ private[hive] object HiveShim {
def setTblNullFormat(crtTbl: CreateTableDesc, tbl: Table) = {}
}
-class ShimFileSinkDesc(var dir: String, var tableInfo: TableDesc, var compressed: Boolean)
+private[hive] class ShimFileSinkDesc(
+ var dir: String,
+ var tableInfo: TableDesc,
+ var compressed: Boolean)
extends FileSinkDesc(dir, tableInfo, compressed) {
}
diff --git a/sql/hive/v0.13.1/src/main/scala/org/apache/spark/sql/hive/Shim13.scala b/sql/hive/v0.13.1/src/main/scala/org/apache/spark/sql/hive/Shim13.scala
index 72104f5b55..f9fcbdae15 100644
--- a/sql/hive/v0.13.1/src/main/scala/org/apache/spark/sql/hive/Shim13.scala
+++ b/sql/hive/v0.13.1/src/main/scala/org/apache/spark/sql/hive/Shim13.scala
@@ -56,7 +56,9 @@ import org.apache.spark.sql.types.{Decimal, DecimalType}
*
* @param functionClassName UDF class name
*/
-case class HiveFunctionWrapper(var functionClassName: String) extends java.io.Externalizable {
+private[hive] case class HiveFunctionWrapper(var functionClassName: String)
+ extends java.io.Externalizable {
+
// for Serialization
def this() = this(null)
@@ -423,7 +425,10 @@ private[hive] object HiveShim {
* Bug introduced in hive-0.13. FileSinkDesc is serilizable, but its member path is not.
* Fix it through wrapper.
*/
-class ShimFileSinkDesc(var dir: String, var tableInfo: TableDesc, var compressed: Boolean)
+private[hive] class ShimFileSinkDesc(
+ var dir: String,
+ var tableInfo: TableDesc,
+ var compressed: Boolean)
extends Serializable with Logging {
var compressCodec: String = _
var compressType: String = _