aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorMichael Armbrust <michael@databricks.com>2014-04-22 20:02:33 -0700
committerPatrick Wendell <pwendell@gmail.com>2014-04-22 20:02:33 -0700
commitaa77f8a6a604efe0d02bc8412b3f1ba3903b7a57 (patch)
treec1d0d64b74f80861acc13349ce5c1af2fa1158d4 /sql
parent662c860ebcec5565493a7dff4812e6b7a37b1d7d (diff)
downloadspark-aa77f8a6a604efe0d02bc8412b3f1ba3903b7a57.tar.gz
spark-aa77f8a6a604efe0d02bc8412b3f1ba3903b7a57.tar.bz2
spark-aa77f8a6a604efe0d02bc8412b3f1ba3903b7a57.zip
SPARK-1562 Fix visibility / annotation of Spark SQL APIs
Author: Michael Armbrust <michael@databricks.com> Closes #489 from marmbrus/sqlDocFixes and squashes the following commits: acee4f3 [Michael Armbrust] Fix visibility / annotation of Spark SQL APIs
Diffstat (limited to 'sql')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/SchemaRDDLike.scala2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/Aggregate.scala3
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala7
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala3
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala13
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlSerializer.scala8
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala34
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/debug.scala4
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/joins.scala18
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/package.scala1
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTestData.scala2
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/SparkHadoopWriter.scala9
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala8
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala14
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala3
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/ScriptTransformation.scala6
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala3
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveOperators.scala13
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala18
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala1
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala5
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/api/java/JavaHiveSuite.scala2
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/BigDataBenchmarkSuite.scala2
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ConcurrentHiveSuite.scala2
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala2
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala2
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala2
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala4
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala2
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/parquet/HiveParquetSuite.scala3
31 files changed, 141 insertions, 57 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDDLike.scala b/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDDLike.scala
index a390ab6005..3a895e15a4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDDLike.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDDLike.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.plans.logical._
/**
* Contains functions that are shared between all SchemaRDD types (i.e., Scala, Java)
*/
-trait SchemaRDDLike {
+private[sql] trait SchemaRDDLike {
@transient val sqlContext: SQLContext
@transient protected[spark] val logicalPlan: LogicalPlan
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Aggregate.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Aggregate.scala
index 3a4f071eeb..36b3b956da 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Aggregate.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Aggregate.scala
@@ -19,12 +19,14 @@ package org.apache.spark.sql.execution
import java.util.HashMap
+import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.SparkContext
import org.apache.spark.sql.catalyst.errors._
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.plans.physical._
/**
+ * :: DeveloperApi ::
* Groups input data by `groupingExpressions` and computes the `aggregateExpressions` for each
* group.
*
@@ -34,6 +36,7 @@ import org.apache.spark.sql.catalyst.plans.physical._
* @param aggregateExpressions expressions that are computed for each group.
* @param child the input data source.
*/
+@DeveloperApi
case class Aggregate(
partial: Boolean,
groupingExpressions: Seq[Expression],
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala
index 070557e47c..3b4acb72e8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala
@@ -17,6 +17,7 @@
package org.apache.spark.sql.execution
+import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.{HashPartitioner, RangePartitioner, SparkConf}
import org.apache.spark.rdd.ShuffledRDD
import org.apache.spark.sql.Row
@@ -26,6 +27,10 @@ import org.apache.spark.sql.catalyst.plans.physical._
import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.spark.util.MutablePair
+/**
+ * :: DeveloperApi ::
+ */
+@DeveloperApi
case class Exchange(newPartitioning: Partitioning, child: SparkPlan) extends UnaryNode {
override def outputPartitioning = newPartitioning
@@ -81,7 +86,7 @@ case class Exchange(newPartitioning: Partitioning, child: SparkPlan) extends Una
* [[catalyst.plans.physical.Distribution Distribution]] requirements for each operator by inserting
* [[Exchange]] Operators where required.
*/
-object AddExchange extends Rule[SparkPlan] {
+private[sql] object AddExchange extends Rule[SparkPlan] {
// TODO: Determine the number of partitions.
val numPartitions = 150
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala
index cff4887936..da1e08be59 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala
@@ -17,9 +17,11 @@
package org.apache.spark.sql.execution
+import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.sql.catalyst.expressions.{Generator, JoinedRow, Literal, Projection}
/**
+ * :: DeveloperApi ::
* Applies a [[catalyst.expressions.Generator Generator]] to a stream of input rows, combining the
* output of each into a new stream of rows. This operation is similar to a `flatMap` in functional
* programming with one important additional feature, which allows the input rows to be joined with
@@ -29,6 +31,7 @@ import org.apache.spark.sql.catalyst.expressions.{Generator, JoinedRow, Literal,
* @param outer when true, each input row will be output at least once, even if the output of the
* given `generator` is empty. `outer` has no effect when `join` is false.
*/
+@DeveloperApi
case class Generate(
generator: Generator,
join: Boolean,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
index 5d89697db5..50124dd407 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
@@ -17,6 +17,7 @@
package org.apache.spark.sql.execution
+import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{Logging, Row}
import org.apache.spark.sql.catalyst.trees
@@ -26,6 +27,10 @@ import org.apache.spark.sql.catalyst.plans.{QueryPlan, logical}
import org.apache.spark.sql.catalyst.plans.physical._
import org.apache.spark.sql.columnar.InMemoryColumnarTableScan
+/**
+ * :: DeveloperApi ::
+ */
+@DeveloperApi
abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging {
self: Product =>
@@ -51,6 +56,7 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging {
}
/**
+ * :: DeveloperApi ::
* Allows already planned SparkQueries to be linked into logical query plans.
*
* Note that in general it is not valid to use this class to link multiple copies of the same
@@ -59,6 +65,7 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging {
* replace the output attributes with new copies of themselves without breaking any attribute
* linking.
*/
+@DeveloperApi
case class SparkLogicalPlan(alreadyPlanned: SparkPlan)
extends logical.LogicalPlan with MultiInstanceRelation {
@@ -77,15 +84,15 @@ case class SparkLogicalPlan(alreadyPlanned: SparkPlan)
}
}
-trait LeafNode extends SparkPlan with trees.LeafNode[SparkPlan] {
+private[sql] trait LeafNode extends SparkPlan with trees.LeafNode[SparkPlan] {
self: Product =>
}
-trait UnaryNode extends SparkPlan with trees.UnaryNode[SparkPlan] {
+private[sql] trait UnaryNode extends SparkPlan with trees.UnaryNode[SparkPlan] {
self: Product =>
override def outputPartitioning: Partitioning = child.outputPartitioning
}
-trait BinaryNode extends SparkPlan with trees.BinaryNode[SparkPlan] {
+private[sql] trait BinaryNode extends SparkPlan with trees.BinaryNode[SparkPlan] {
self: Product =>
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlSerializer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlSerializer.scala
index c30ae5bcc0..5067c14ddf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlSerializer.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlSerializer.scala
@@ -27,7 +27,7 @@ import org.apache.spark.serializer.KryoSerializer
import org.apache.spark.util.MutablePair
import org.apache.spark.util.Utils
-class SparkSqlSerializer(conf: SparkConf) extends KryoSerializer(conf) {
+private[sql] class SparkSqlSerializer(conf: SparkConf) extends KryoSerializer(conf) {
override def newKryo(): Kryo = {
val kryo = new Kryo()
kryo.setRegistrationRequired(false)
@@ -50,7 +50,7 @@ class SparkSqlSerializer(conf: SparkConf) extends KryoSerializer(conf) {
}
}
-object SparkSqlSerializer {
+private[sql] object SparkSqlSerializer {
// TODO (lian) Using KryoSerializer here is workaround, needs further investigation
// Using SparkSqlSerializer here makes BasicQuerySuite to fail because of Kryo serialization
// related error.
@@ -68,7 +68,7 @@ object SparkSqlSerializer {
}
}
-class BigDecimalSerializer extends Serializer[BigDecimal] {
+private[sql] class BigDecimalSerializer extends Serializer[BigDecimal] {
def write(kryo: Kryo, output: Output, bd: math.BigDecimal) {
// TODO: There are probably more efficient representations than strings...
output.writeString(bd.toString())
@@ -83,7 +83,7 @@ class BigDecimalSerializer extends Serializer[BigDecimal] {
* Maps do not have a no arg constructor and so cannot be serialized by default. So, we serialize
* them as `Array[(k,v)]`.
*/
-class MapSerializer extends Serializer[Map[_,_]] {
+private[sql] class MapSerializer extends Serializer[Map[_,_]] {
def write(kryo: Kryo, output: Output, map: Map[_,_]) {
kryo.writeObject(output, map.flatMap(e => Seq(e._1, e._2)).toArray)
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index fe8bd5a508..500fde1971 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.catalyst.plans.physical._
import org.apache.spark.sql.parquet._
-abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
+private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
self: SQLContext#SparkPlanner =>
object HashJoin extends Strategy {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala
index eedcc7dda0..e4cf2020a1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution
import scala.reflect.runtime.universe.TypeTag
+import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.{HashPartitioner, SparkConf, SparkContext}
import org.apache.spark.rdd.{RDD, ShuffledRDD}
import org.apache.spark.sql.catalyst.ScalaReflection
@@ -27,6 +28,10 @@ import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.plans.physical.{OrderedDistribution, UnspecifiedDistribution}
import org.apache.spark.util.MutablePair
+/**
+ * :: DeveloperApi ::
+ */
+@DeveloperApi
case class Project(projectList: Seq[NamedExpression], child: SparkPlan) extends UnaryNode {
override def output = projectList.map(_.toAttribute)
@@ -36,6 +41,10 @@ case class Project(projectList: Seq[NamedExpression], child: SparkPlan) extends
}
}
+/**
+ * :: DeveloperApi ::
+ */
+@DeveloperApi
case class Filter(condition: Expression, child: SparkPlan) extends UnaryNode {
override def output = child.output
@@ -44,6 +53,10 @@ case class Filter(condition: Expression, child: SparkPlan) extends UnaryNode {
}
}
+/**
+ * :: DeveloperApi ::
+ */
+@DeveloperApi
case class Sample(fraction: Double, withReplacement: Boolean, seed: Int, child: SparkPlan)
extends UnaryNode {
@@ -53,6 +66,10 @@ case class Sample(fraction: Double, withReplacement: Boolean, seed: Int, child:
override def execute() = child.execute().sample(withReplacement, fraction, seed)
}
+/**
+ * :: DeveloperApi ::
+ */
+@DeveloperApi
case class Union(children: Seq[SparkPlan])(@transient sc: SparkContext) extends SparkPlan {
// TODO: attributes output by union should be distinct for nullability purposes
override def output = children.head.output
@@ -62,12 +79,14 @@ case class Union(children: Seq[SparkPlan])(@transient sc: SparkContext) extends
}
/**
+ * :: DeveloperApi ::
* Take the first limit elements. Note that the implementation is different depending on whether
* this is a terminal operator or not. If it is terminal and is invoked using executeCollect,
* this operator uses Spark's take method on the Spark driver. If it is not terminal or is
* invoked using execute, we first take the limit on each partition, and then repartition all the
* data to a single partition to compute the global limit.
*/
+@DeveloperApi
case class Limit(limit: Int, child: SparkPlan)(@transient sc: SparkContext) extends UnaryNode {
// TODO: Implement a partition local limit, and use a strategy to generate the proper limit plan:
// partition local limit -> exchange into one partition -> partition local limit again
@@ -91,10 +110,12 @@ case class Limit(limit: Int, child: SparkPlan)(@transient sc: SparkContext) exte
}
/**
+ * :: DeveloperApi ::
* Take the first limit elements as defined by the sortOrder. This is logically equivalent to
* having a [[Limit]] operator after a [[Sort]] operator. This could have been named TopK, but
* Spark's top operator does the opposite in ordering so we name it TakeOrdered to avoid confusion.
*/
+@DeveloperApi
case class TakeOrdered(limit: Int, sortOrder: Seq[SortOrder], child: SparkPlan)
(@transient sc: SparkContext) extends UnaryNode {
override def otherCopyArgs = sc :: Nil
@@ -111,7 +132,10 @@ case class TakeOrdered(limit: Int, sortOrder: Seq[SortOrder], child: SparkPlan)
override def execute() = sc.makeRDD(executeCollect(), 1)
}
-
+/**
+ * :: DeveloperApi ::
+ */
+@DeveloperApi
case class Sort(
sortOrder: Seq[SortOrder],
global: Boolean,
@@ -134,6 +158,10 @@ case class Sort(
override def output = child.output
}
+/**
+ * :: DeveloperApi ::
+ */
+@DeveloperApi
object ExistingRdd {
def convertToCatalyst(a: Any): Any = a match {
case s: Seq[Any] => s.map(convertToCatalyst)
@@ -167,6 +195,10 @@ object ExistingRdd {
}
}
+/**
+ * :: DeveloperApi ::
+ */
+@DeveloperApi
case class ExistingRdd(output: Seq[Attribute], rdd: RDD[Row]) extends LeafNode {
override def execute() = rdd
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/debug.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/debug.scala
index 40982f1fff..a0d29100f5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/debug.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/debug.scala
@@ -17,7 +17,7 @@
package org.apache.spark.sql.execution
-object DebugQuery {
+private[sql] object DebugQuery {
def apply(plan: SparkPlan): SparkPlan = {
val visited = new collection.mutable.HashSet[Long]()
plan transform {
@@ -28,7 +28,7 @@ object DebugQuery {
}
}
-case class DebugNode(child: SparkPlan) extends UnaryNode {
+private[sql] case class DebugNode(child: SparkPlan) extends UnaryNode {
def references = Set.empty
def output = child.output
def execute() = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins.scala
index c89dae9358..31cc26962a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins.scala
@@ -21,14 +21,24 @@ import scala.collection.mutable.{ArrayBuffer, BitSet}
import org.apache.spark.SparkContext
+import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.plans._
import org.apache.spark.sql.catalyst.plans.physical.{ClusteredDistribution, Partitioning}
+@DeveloperApi
sealed abstract class BuildSide
+
+@DeveloperApi
case object BuildLeft extends BuildSide
+
+@DeveloperApi
case object BuildRight extends BuildSide
+/**
+ * :: DeveloperApi ::
+ */
+@DeveloperApi
case class HashJoin(
leftKeys: Seq[Expression],
rightKeys: Seq[Expression],
@@ -130,6 +140,10 @@ case class HashJoin(
}
}
+/**
+ * :: DeveloperApi ::
+ */
+@DeveloperApi
case class CartesianProduct(left: SparkPlan, right: SparkPlan) extends BinaryNode {
def output = left.output ++ right.output
@@ -138,6 +152,10 @@ case class CartesianProduct(left: SparkPlan, right: SparkPlan) extends BinaryNod
}
}
+/**
+ * :: DeveloperApi ::
+ */
+@DeveloperApi
case class BroadcastNestedLoopJoin(
streamed: SparkPlan, broadcast: SparkPlan, joinType: JoinType, condition: Option[Expression])
(@transient sc: SparkContext)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/package.scala
index e4a2dec332..66237f8f13 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/package.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/package.scala
@@ -18,6 +18,7 @@
package org.apache.spark.sql
/**
+ * :: DeveloperApi ::
* An execution engine for relational query plans that runs on top Spark and returns RDDs.
*
* Note that the operators in this package are created automatically by a query planner using a
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTestData.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTestData.scala
index 728e3dd1dc..f37976f731 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTestData.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTestData.scala
@@ -28,7 +28,7 @@ import parquet.schema.{MessageType, MessageTypeParser}
import org.apache.spark.sql.catalyst.expressions.GenericRow
import org.apache.spark.util.Utils
-object ParquetTestData {
+private[sql] object ParquetTestData {
val testSchema =
"""message myrecord {
diff --git a/sql/hive/src/main/scala/org/apache/spark/SparkHadoopWriter.scala b/sql/hive/src/main/scala/org/apache/spark/SparkHadoopWriter.scala
index 7219c030cb..ab7862f4f9 100644
--- a/sql/hive/src/main/scala/org/apache/spark/SparkHadoopWriter.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/SparkHadoopWriter.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.spark
+package org.apache.spark.sql.hive
import java.io.IOException
import java.text.NumberFormat
@@ -28,12 +28,13 @@ import org.apache.hadoop.hive.ql.plan.FileSinkDesc
import org.apache.hadoop.mapred._
import org.apache.hadoop.io.Writable
+import org.apache.spark.{Logging, SerializableWritable, SparkHadoopWriter}
+
/**
* Internal helper class that saves an RDD using a Hive OutputFormat.
* It is based on [[SparkHadoopWriter]].
*/
-protected[spark]
-class SparkHiveHadoopWriter(
+private[hive] class SparkHiveHadoopWriter(
@transient jobConf: JobConf,
fileSinkConf: FileSinkDesc)
extends Logging
@@ -179,7 +180,7 @@ class SparkHiveHadoopWriter(
}
}
-object SparkHiveHadoopWriter {
+private[hive] object SparkHiveHadoopWriter {
def createPathFromString(path: String, conf: JobConf): Path = {
if (path == null) {
throw new IllegalArgumentException("Output path is null")
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index ca75cecf7d..6c907887db 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -34,12 +34,13 @@ import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.catalyst.rules._
import org.apache.spark.sql.catalyst.types._
import org.apache.spark.sql.execution.SparkLogicalPlan
+import org.apache.spark.sql.hive.execution.{HiveTableScan, InsertIntoHiveTable}
import org.apache.spark.sql.columnar.InMemoryColumnarTableScan
/* Implicit conversions */
import scala.collection.JavaConversions._
-class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with Logging {
+private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with Logging {
import HiveMetastoreTypes._
val client = Hive.get(hive.hiveconf)
@@ -171,7 +172,7 @@ class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with Logging {
override def unregisterAllTables() = {}
}
-object HiveMetastoreTypes extends RegexParsers {
+private[hive] object HiveMetastoreTypes extends RegexParsers {
protected lazy val primitiveType: Parser[DataType] =
"string" ^^^ StringType |
"float" ^^^ FloatType |
@@ -229,7 +230,8 @@ object HiveMetastoreTypes extends RegexParsers {
}
}
-case class MetastoreRelation(databaseName: String, tableName: String, alias: Option[String])
+private[hive] case class MetastoreRelation
+ (databaseName: String, tableName: String, alias: Option[String])
(val table: TTable, val partitions: Seq[TPartition])
extends BaseRelation {
// TODO: Can we use org.apache.hadoop.hive.ql.metadata.Table as the type of table and
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
index 4dac25b3f6..1777e96b67 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
@@ -36,20 +36,20 @@ import scala.collection.JavaConversions._
* back for Hive to execute natively. Will be replaced with a native command that contains the
* cmd string.
*/
-case object NativePlaceholder extends Command
+private[hive] case object NativePlaceholder extends Command
-case class DfsCommand(cmd: String) extends Command
+private[hive] case class DfsCommand(cmd: String) extends Command
-case class ShellCommand(cmd: String) extends Command
+private[hive] case class ShellCommand(cmd: String) extends Command
-case class SourceCommand(filePath: String) extends Command
+private[hive] case class SourceCommand(filePath: String) extends Command
-case class AddJar(jarPath: String) extends Command
+private[hive] case class AddJar(jarPath: String) extends Command
-case class AddFile(filePath: String) extends Command
+private[hive] case class AddFile(filePath: String) extends Command
/** Provides a mapping from HiveQL statements to catalyst logical plans and expression trees. */
-object HiveQl {
+private[hive] object HiveQl {
protected val nativeCommands = Seq(
"TOK_DESCFUNCTION",
"TOK_DESCTABLE",
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
index ac817b21a1..d9a6e0e889 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
@@ -23,9 +23,10 @@ import org.apache.spark.sql.catalyst.planning._
import org.apache.spark.sql.catalyst.plans._
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.execution._
+import org.apache.spark.sql.hive.execution._
import org.apache.spark.sql.columnar.InMemoryColumnarTableScan
-trait HiveStrategies {
+private[hive] trait HiveStrategies {
// Possibly being too clever with types here... or not clever enough.
self: SQLContext#SparkPlanner =>
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/ScriptTransformation.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/ScriptTransformation.scala
index 2610100043..610fa9cb84 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/ScriptTransformation.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/ScriptTransformation.scala
@@ -15,23 +15,27 @@
* limitations under the License.
*/
-package org.apache.spark.sql.hive
+package org.apache.spark.sql.hive.execution
import java.io.{BufferedReader, InputStreamReader}
+import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.execution._
+import org.apache.spark.sql.hive.HiveContext
/* Implicit conversions */
import scala.collection.JavaConversions._
/**
+ * :: DeveloperApi ::
* Transforms the input by forking and running the specified script.
*
* @param input the set of expression that should be passed to the script.
* @param script the command that should be executed.
* @param output the attributes that are produced by the script.
*/
+@DeveloperApi
case class ScriptTransformation(
input: Seq[Expression],
script: String,
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala
index b1a26fdabb..74110ee27b 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.spark.sql.hive
+package org.apache.spark.sql.hive.test
import java.io.File
import java.util.{Set => JavaSet}
@@ -34,6 +34,7 @@ import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.catalyst.analysis._
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, NativeCommand}
import org.apache.spark.sql.catalyst.util._
+import org.apache.spark.sql.hive._
/* Implicit conversions */
import scala.collection.JavaConversions._
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveOperators.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveOperators.scala
index 821fb22112..96faebc5a8 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveOperators.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveOperators.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.spark.sql.hive
+package org.apache.spark.sql.hive.execution
import org.apache.hadoop.hive.common.`type`.{HiveDecimal, HiveVarchar}
import org.apache.hadoop.hive.metastore.MetaStoreUtils
@@ -30,23 +30,26 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaHiveVarcharOb
import org.apache.hadoop.io.Writable
import org.apache.hadoop.mapred._
+import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.types.{BooleanType, DataType}
import org.apache.spark.sql.execution._
-import org.apache.spark.{SparkHiveHadoopWriter, TaskContext, SparkException}
+import org.apache.spark.sql.hive._
+import org.apache.spark.{TaskContext, SparkException}
/* Implicits */
import scala.collection.JavaConversions._
/**
+ * :: DeveloperApi ::
* The Hive table scan operator. Column and partition pruning are both handled.
*
- * @constructor
* @param attributes Attributes to be fetched from the Hive table.
* @param relation The Hive table be be scanned.
* @param partitionPruningPred An optional partition pruning predicate for partitioned table.
*/
+@DeveloperApi
case class HiveTableScan(
attributes: Seq[Attribute],
relation: MetastoreRelation,
@@ -160,6 +163,10 @@ case class HiveTableScan(
def output = attributes
}
+/**
+ * :: DeveloperApi ::
+ */
+@DeveloperApi
case class InsertIntoHiveTable(
table: MetastoreRelation,
partition: Map[String, Option[String]],
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
index 55a4363af6..a09270eb7b 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
@@ -37,7 +37,7 @@ import org.apache.spark.sql.catalyst.types._
/* Implicit conversions */
import scala.collection.JavaConversions._
-object HiveFunctionRegistry
+private[hive] object HiveFunctionRegistry
extends analysis.FunctionRegistry with HiveFunctionFactory with HiveInspectors {
def lookupFunction(name: String, children: Seq[Expression]): Expression = {
@@ -99,7 +99,7 @@ object HiveFunctionRegistry
}
}
-trait HiveFunctionFactory {
+private[hive] trait HiveFunctionFactory {
def getFunctionInfo(name: String) = FunctionRegistry.getFunctionInfo(name)
def getFunctionClass(name: String) = getFunctionInfo(name).getFunctionClass
def createFunction[UDFType](name: String) =
@@ -130,7 +130,7 @@ trait HiveFunctionFactory {
}
}
-abstract class HiveUdf extends Expression with Logging with HiveFunctionFactory {
+private[hive] abstract class HiveUdf extends Expression with Logging with HiveFunctionFactory {
self: Product =>
type UDFType
@@ -148,7 +148,7 @@ abstract class HiveUdf extends Expression with Logging with HiveFunctionFactory
override def toString = s"$nodeName#${functionInfo.getDisplayName}(${children.mkString(",")})"
}
-case class HiveSimpleUdf(name: String, children: Seq[Expression]) extends HiveUdf {
+private[hive] case class HiveSimpleUdf(name: String, children: Seq[Expression]) extends HiveUdf {
import org.apache.spark.sql.hive.HiveFunctionRegistry._
type UDFType = UDF
@@ -201,7 +201,7 @@ case class HiveSimpleUdf(name: String, children: Seq[Expression]) extends HiveUd
}
}
-case class HiveGenericUdf(name: String, children: Seq[Expression])
+private[hive] case class HiveGenericUdf(name: String, children: Seq[Expression])
extends HiveUdf with HiveInspectors {
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF._
@@ -228,7 +228,7 @@ case class HiveGenericUdf(name: String, children: Seq[Expression])
}
}
-trait HiveInspectors {
+private[hive] trait HiveInspectors {
def unwrapData(data: Any, oi: ObjectInspector): Any = oi match {
case pi: PrimitiveObjectInspector => pi.getPrimitiveJavaObject(data)
@@ -329,7 +329,7 @@ trait HiveInspectors {
}
}
-case class HiveGenericUdaf(
+private[hive] case class HiveGenericUdaf(
name: String,
children: Seq[Expression]) extends AggregateExpression
with HiveInspectors
@@ -371,7 +371,7 @@ case class HiveGenericUdaf(
* Operators that require maintaining state in between input rows should instead be implemented as
* user defined aggregations, which have clean semantics even in a partitioned execution.
*/
-case class HiveGenericUdtf(
+private[hive] case class HiveGenericUdtf(
name: String,
aliasNames: Seq[String],
children: Seq[Expression])
@@ -438,7 +438,7 @@ case class HiveGenericUdtf(
override def toString = s"$nodeName#$name(${children.mkString(",")})"
}
-case class HiveUdafFunction(
+private[hive] case class HiveUdafFunction(
functionName: String,
exprs: Seq[Expression],
base: AggregateExpression)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
index 79ec1f1cde..f9a162ef4e 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.hive
import org.apache.spark.sql.execution.SparkLogicalPlan
import org.apache.spark.sql.columnar.InMemoryColumnarTableScan
import org.apache.spark.sql.hive.execution.HiveComparisonTest
+import org.apache.spark.sql.hive.test.TestHive
class CachedTableSuite extends HiveComparisonTest {
TestHive.loadTestTable("src")
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala
index ad29e06905..833f350215 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala
@@ -17,12 +17,11 @@
package org.apache.spark.sql.hive
-import java.io.File
-
import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.hive.test.TestHive
/* Implicits */
-import org.apache.spark.sql.hive.TestHive._
+import org.apache.spark.sql.hive.test.TestHive._
case class TestData(key: Int, value: String)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/api/java/JavaHiveSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/api/java/JavaHiveSuite.scala
index 8137f99b22..9c5d7c81f7 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/api/java/JavaHiveSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/api/java/JavaHiveSuite.scala
@@ -21,7 +21,7 @@ import org.scalatest.FunSuite
import org.apache.spark.api.java.JavaSparkContext
import org.apache.spark.sql.test.TestSQLContext
-import org.apache.spark.sql.hive.TestHive
+import org.apache.spark.sql.hive.test.TestHive
// Implicits
import scala.collection.JavaConversions._
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/BigDataBenchmarkSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/BigDataBenchmarkSuite.scala
index 8488f23abd..9b9a823b6e 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/BigDataBenchmarkSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/BigDataBenchmarkSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.hive.execution
import java.io.File
-import org.apache.spark.sql.hive.TestHive._
+import org.apache.spark.sql.hive.test.TestHive._
/**
* A set of test cases based on the big-data-benchmark.
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ConcurrentHiveSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ConcurrentHiveSuite.scala
index ac87f2cb10..23ece7e7cf 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ConcurrentHiveSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ConcurrentHiveSuite.scala
@@ -18,7 +18,7 @@
package org.apache.spark.sql.hive.execution
import org.apache.spark.{SparkConf, SparkContext}
-import org.apache.spark.sql.hive.TestHiveContext
+import org.apache.spark.sql.hive.test.TestHiveContext
import org.scalatest.{BeforeAndAfterAll, FunSuite}
class ConcurrentHiveSuite extends FunSuite with BeforeAndAfterAll {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
index 6c91f40d0f..ea17e6e93b 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{ExplainCommand, NativeComman
import org.apache.spark.sql.catalyst.util._
import org.apache.spark.sql.execution.Sort
import org.scalatest.{BeforeAndAfterAll, FunSuite, GivenWhenThen}
-import org.apache.spark.sql.hive.TestHive
+import org.apache.spark.sql.hive.test.TestHive
/**
* Allows the creations of tests that execute the same query against both hive
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index c3cfa3d25a..dfe88b960b 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.hive.execution
import org.scalatest.BeforeAndAfter
-import org.apache.spark.sql.hive.TestHive
+import org.apache.spark.sql.hive.test.TestHive
/**
* Runs the test cases that are included in the hive distribution.
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index a09667ac84..d224d2ee60 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -17,7 +17,7 @@
package org.apache.spark.sql.hive.execution
-import org.apache.spark.sql.hive.TestHive._
+import org.apache.spark.sql.hive.test.TestHive._
/**
* A set of test cases expressed in Hive QL that are not covered by the tests included in the hive distribution.
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala
index 8883e5b16d..67594b57d3 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala
@@ -17,8 +17,8 @@
package org.apache.spark.sql.hive.execution
-import org.apache.spark.sql.hive.TestHive
-import org.apache.spark.sql.hive.TestHive._
+import org.apache.spark.sql.hive.test.TestHive
+import org.apache.spark.sql.hive.test.TestHive._
case class Data(a: Int, B: Int, n: Nested)
case class Nested(a: Int, B: Int)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala
index d9ccb93e23..25eca39746 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala
@@ -17,7 +17,7 @@
package org.apache.spark.sql.hive.execution
-import org.apache.spark.sql.hive.{HiveTableScan, TestHive}
+import org.apache.spark.sql.hive.test.TestHive
/* Implicit conversions */
import scala.collection.JavaConversions._
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/parquet/HiveParquetSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/parquet/HiveParquetSuite.scala
index 843c681e0d..91ad59d7f8 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/parquet/HiveParquetSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/parquet/HiveParquetSuite.scala
@@ -22,11 +22,10 @@ import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite}
import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Row}
import org.apache.spark.sql.catalyst.types.{DataType, StringType, IntegerType}
import org.apache.spark.sql.{parquet, SchemaRDD}
-import org.apache.spark.sql.hive.TestHive
import org.apache.spark.util.Utils
// Implicits
-import org.apache.spark.sql.hive.TestHive._
+import org.apache.spark.sql.hive.test.TestHive._
class HiveParquetSuite extends FunSuite with BeforeAndAfterAll with BeforeAndAfterEach {