aboutsummaryrefslogtreecommitdiff
path: root/sql/hive
diff options
context:
space:
mode:
authorCheng Lian <lian.cs.zju@gmail.com>2014-03-23 15:21:40 -0700
committerPatrick Wendell <pwendell@gmail.com>2014-03-23 15:21:40 -0700
commit8265dc7739caccc59bc2456b2df055ca96337fe4 (patch)
tree5c68d011bf66a81471c118c85034687fab48ab55 /sql/hive
parent57a4379c031e5d5901ba580422207d6aa2f19749 (diff)
downloadspark-8265dc7739caccc59bc2456b2df055ca96337fe4.tar.gz
spark-8265dc7739caccc59bc2456b2df055ca96337fe4.tar.bz2
spark-8265dc7739caccc59bc2456b2df055ca96337fe4.zip
Fixed coding style issues in Spark SQL
This PR addresses various coding style issues in Spark SQL, including but not limited to those mentioned by @mateiz in PR #146. As this PR affects lots of source files and may cause potential conflicts, it would be better to merge this as soon as possible *after* PR #205 (In-memory columnar representation for Spark SQL) is merged. Author: Cheng Lian <lian.cs.zju@gmail.com> Closes #208 from liancheng/fixCodingStyle and squashes the following commits: fc2b528 [Cheng Lian] Merge branch 'master' into fixCodingStyle b531273 [Cheng Lian] Fixed coding style issues in sql/hive 0b56f77 [Cheng Lian] Fixed coding style issues in sql/core fae7b02 [Cheng Lian] Addressed styling issues mentioned by @marmbrus 9265366 [Cheng Lian] Fixed coding style issues in sql/core 3dcbbbd [Cheng Lian] Fixed relative package imports for package catalyst
Diffstat (limited to 'sql/hive')
-rw-r--r--sql/hive/src/main/scala/org/apache/hadoop/mapred/SparkHadoopWriter.scala7
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala25
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala22
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala25
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala11
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/ScriptTransformation.scala5
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala26
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala21
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveOperators.scala28
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala22
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ConcurrentHiveSuite.scala3
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala12
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala5
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQueryFileTest.scala6
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala3
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala4
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala5
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/parquet/HiveParquetSuite.scala16
18 files changed, 120 insertions, 126 deletions
diff --git a/sql/hive/src/main/scala/org/apache/hadoop/mapred/SparkHadoopWriter.scala b/sql/hive/src/main/scala/org/apache/hadoop/mapred/SparkHadoopWriter.scala
index 08d390e887..0b38731919 100644
--- a/sql/hive/src/main/scala/org/apache/hadoop/mapred/SparkHadoopWriter.scala
+++ b/sql/hive/src/main/scala/org/apache/hadoop/mapred/SparkHadoopWriter.scala
@@ -22,15 +22,14 @@ import java.text.NumberFormat
import java.util.Date
import org.apache.hadoop.fs.Path
+import org.apache.hadoop.hive.ql.exec.{FileSinkOperator, Utilities}
+import org.apache.hadoop.hive.ql.io.{HiveFileFormatUtils, HiveOutputFormat}
+import org.apache.hadoop.hive.ql.plan.FileSinkDesc
import org.apache.hadoop.io.Writable
import org.apache.spark.Logging
import org.apache.spark.SerializableWritable
-import org.apache.hadoop.hive.ql.exec.{Utilities, FileSinkOperator}
-import org.apache.hadoop.hive.ql.io.{HiveFileFormatUtils, HiveOutputFormat}
-import org.apache.hadoop.hive.ql.plan.FileSinkDesc
-
/**
* Internal helper class that saves an RDD using a Hive OutputFormat.
* It is based on [[SparkHadoopWriter]].
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
index 4aad876cc0..491b3a6271 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -18,25 +18,26 @@
package org.apache.spark.sql
package hive
-import java.io.{PrintStream, InputStreamReader, BufferedReader, File}
-import java.util.{ArrayList => JArrayList}
import scala.language.implicitConversions
-import org.apache.spark.SparkContext
+import java.io.{BufferedReader, File, InputStreamReader, PrintStream}
+import java.util.{ArrayList => JArrayList}
+
import org.apache.hadoop.hive.conf.HiveConf
-import org.apache.hadoop.hive.ql.session.SessionState
-import org.apache.hadoop.hive.ql.processors.{CommandProcessorResponse, CommandProcessorFactory}
-import org.apache.hadoop.hive.ql.processors.CommandProcessor
import org.apache.hadoop.hive.ql.Driver
-import org.apache.spark.rdd.RDD
-
-import catalyst.analysis.{Analyzer, OverrideCatalog}
-import catalyst.expressions.GenericRow
-import catalyst.plans.logical.{BaseRelation, LogicalPlan, NativeCommand, ExplainCommand}
-import catalyst.types._
+import org.apache.hadoop.hive.ql.processors._
+import org.apache.hadoop.hive.ql.session.SessionState
+import org.apache.spark.SparkContext
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.analysis.{Analyzer, OverrideCatalog}
+import org.apache.spark.sql.catalyst.expressions.GenericRow
+import org.apache.spark.sql.catalyst.plans.logical.{BaseRelation, LogicalPlan}
+import org.apache.spark.sql.catalyst.plans.logical.{NativeCommand, ExplainCommand}
+import org.apache.spark.sql.catalyst.types._
import org.apache.spark.sql.execution._
+/* Implicit conversions */
import scala.collection.JavaConversions._
/**
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index e4d50722ce..a5db283765 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -27,12 +27,12 @@ import org.apache.hadoop.hive.ql.plan.TableDesc
import org.apache.hadoop.hive.ql.session.SessionState
import org.apache.hadoop.hive.serde2.Deserializer
-import catalyst.analysis.Catalog
-import catalyst.expressions._
-import catalyst.plans.logical
-import catalyst.plans.logical._
-import catalyst.rules._
-import catalyst.types._
+import org.apache.spark.sql.catalyst.analysis.Catalog
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.logical
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.rules._
+import org.apache.spark.sql.catalyst.types._
import scala.collection.JavaConversions._
@@ -45,7 +45,7 @@ class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with Logging {
db: Option[String],
tableName: String,
alias: Option[String]): LogicalPlan = {
- val databaseName = db.getOrElse(hive.sessionState.getCurrentDatabase())
+ val databaseName = db.getOrElse(hive.sessionState.getCurrentDatabase)
val table = client.getTable(databaseName, tableName)
val partitions: Seq[Partition] =
if (table.isPartitioned) {
@@ -91,7 +91,7 @@ class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with Logging {
object CreateTables extends Rule[LogicalPlan] {
def apply(plan: LogicalPlan): LogicalPlan = plan transform {
case InsertIntoCreatedTable(db, tableName, child) =>
- val databaseName = db.getOrElse(SessionState.get.getCurrentDatabase())
+ val databaseName = db.getOrElse(SessionState.get.getCurrentDatabase)
createTable(databaseName, tableName, child.output)
@@ -123,8 +123,8 @@ class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with Logging {
} else {
// Only do the casting when child output data types differ from table output data types.
val castedChildOutput = child.output.zip(table.output).map {
- case (input, table) if input.dataType != table.dataType =>
- Alias(Cast(input, table.dataType), input.name)()
+ case (input, output) if input.dataType != output.dataType =>
+ Alias(Cast(input, output.dataType), input.name)()
case (input, _) => input
}
@@ -135,7 +135,7 @@ class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with Logging {
/**
* UNIMPLEMENTED: It needs to be decided how we will persist in-memory tables to the metastore.
- * For now, if this functionallity is desired mix in the in-memory [[OverrideCatalog]].
+ * For now, if this functionality is desired mix in the in-memory [[OverrideCatalog]].
*/
override def registerTable(
databaseName: Option[String], tableName: String, plan: LogicalPlan): Unit = ???
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
index 4f33a293c3..8e76a7348e 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
@@ -18,18 +18,19 @@
package org.apache.spark.sql
package hive
-import scala.collection.JavaConversions._
-
import org.apache.hadoop.hive.ql.lib.Node
import org.apache.hadoop.hive.ql.parse._
import org.apache.hadoop.hive.ql.plan.PlanUtils
-import catalyst.analysis._
-import catalyst.expressions._
-import catalyst.plans._
-import catalyst.plans.logical
-import catalyst.plans.logical._
-import catalyst.types._
+import org.apache.spark.sql.catalyst.analysis._
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans._
+import org.apache.spark.sql.catalyst.plans.logical
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.types._
+
+/* Implicit conversions */
+import scala.collection.JavaConversions._
/**
* Used when we need to start parsing the AST before deciding that we are going to pass the command
@@ -48,7 +49,7 @@ case class AddJar(jarPath: String) extends Command
case class AddFile(filePath: String) extends Command
-/** Provides a mapping from HiveQL statments to catalyst logical plans and expression trees. */
+/** Provides a mapping from HiveQL statements to catalyst logical plans and expression trees. */
object HiveQl {
protected val nativeCommands = Seq(
"TOK_DESCFUNCTION",
@@ -150,13 +151,13 @@ object HiveQl {
}
/**
- * Returns a scala.Seq equivilent to [s] or Nil if [s] is null.
+ * Returns a scala.Seq equivalent to [s] or Nil if [s] is null.
*/
private def nilIfEmpty[A](s: java.util.List[A]): Seq[A] =
Option(s).map(_.toSeq).getOrElse(Nil)
/**
- * Returns this ASTNode with the text changed to `newText``.
+ * Returns this ASTNode with the text changed to `newText`.
*/
def withText(newText: String): ASTNode = {
n.token.asInstanceOf[org.antlr.runtime.CommonToken].setText(newText)
@@ -667,7 +668,7 @@ object HiveQl {
case Token(allJoinTokens(joinToken),
relation1 ::
relation2 :: other) =>
- assert(other.size <= 1, s"Unhandled join child ${other}")
+ assert(other.size <= 1, s"Unhandled join child $other")
val joinType = joinToken match {
case "TOK_JOIN" => Inner
case "TOK_RIGHTOUTERJOIN" => RightOuter
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
index 92d84208ab..c71141c419 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
@@ -18,13 +18,12 @@
package org.apache.spark.sql
package hive
-import catalyst.expressions._
-import catalyst.planning._
-import catalyst.plans._
-import catalyst.plans.logical.{BaseRelation, LogicalPlan}
-
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.planning._
+import org.apache.spark.sql.catalyst.plans._
+import org.apache.spark.sql.catalyst.plans.logical.{BaseRelation, LogicalPlan}
import org.apache.spark.sql.execution._
-import org.apache.spark.sql.parquet.{ParquetRelation, InsertIntoParquetTable, ParquetTableScan}
+import org.apache.spark.sql.parquet.{InsertIntoParquetTable, ParquetRelation, ParquetTableScan}
trait HiveStrategies {
// Possibly being too clever with types here... or not clever enough.
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/ScriptTransformation.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/ScriptTransformation.scala
index f20e9d4de4..dc4181ec99 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/ScriptTransformation.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/ScriptTransformation.scala
@@ -18,11 +18,12 @@
package org.apache.spark.sql
package hive
-import java.io.{InputStreamReader, BufferedReader}
+import java.io.{BufferedReader, InputStreamReader}
-import catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.execution._
+/* Implicit conversions */
import scala.collection.JavaConversions._
/**
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
index 71d751cbc4..99dc85ec19 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
@@ -19,19 +19,18 @@ package org.apache.spark.sql
package hive
import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{Path, PathFilter}
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants._
+import org.apache.hadoop.hive.ql.exec.Utilities
import org.apache.hadoop.hive.ql.metadata.{Partition => HivePartition, Table => HiveTable}
import org.apache.hadoop.hive.ql.plan.TableDesc
import org.apache.hadoop.hive.serde2.Deserializer
-import org.apache.hadoop.hive.ql.exec.Utilities
import org.apache.hadoop.io.Writable
-import org.apache.hadoop.fs.{Path, PathFilter}
-import org.apache.hadoop.mapred.{FileInputFormat, JobConf, InputFormat}
+import org.apache.hadoop.mapred.{FileInputFormat, InputFormat, JobConf}
import org.apache.spark.SerializableWritable
import org.apache.spark.broadcast.Broadcast
-import org.apache.spark.rdd.{HadoopRDD, UnionRDD, EmptyRDD, RDD}
-
+import org.apache.spark.rdd.{EmptyRDD, HadoopRDD, RDD, UnionRDD}
/**
* A trait for subclasses that handle table scans.
@@ -40,7 +39,6 @@ private[hive] sealed trait TableReader {
def makeRDDForTable(hiveTable: HiveTable): RDD[_]
def makeRDDForPartitionedTable(partitions: Seq[HivePartition]): RDD[_]
-
}
@@ -57,7 +55,6 @@ class HadoopTableReader(@transient _tableDesc: TableDesc, @transient sc: HiveCon
private val _minSplitsPerRDD = math.max(
sc.hiveconf.getInt("mapred.map.tasks", 1), sc.sparkContext.defaultMinSplits)
-
// TODO: set aws s3 credentials.
private val _broadcastedHiveConf =
@@ -85,8 +82,8 @@ class HadoopTableReader(@transient _tableDesc: TableDesc, @transient sc: HiveCon
def makeRDDForTable(
hiveTable: HiveTable,
deserializerClass: Class[_ <: Deserializer],
- filterOpt: Option[PathFilter]): RDD[_] =
- {
+ filterOpt: Option[PathFilter]): RDD[_] = {
+
assert(!hiveTable.isPartitioned, """makeRDDForTable() cannot be called on a partitioned table,
since input formats may differ across partitions. Use makeRDDForTablePartitions() instead.""")
@@ -115,6 +112,7 @@ class HadoopTableReader(@transient _tableDesc: TableDesc, @transient sc: HiveCon
sys.error(s"Unable to deserialize non-Writable: $value of ${value.getClass.getName}")
}
}
+
deserializedHadoopRDD
}
@@ -136,8 +134,8 @@ class HadoopTableReader(@transient _tableDesc: TableDesc, @transient sc: HiveCon
*/
def makeRDDForPartitionedTable(
partitionToDeserializer: Map[HivePartition, Class[_ <: Deserializer]],
- filterOpt: Option[PathFilter]): RDD[_] =
- {
+ filterOpt: Option[PathFilter]): RDD[_] = {
+
val hivePartitionRDDs = partitionToDeserializer.map { case (partition, partDeserializer) =>
val partDesc = Utilities.getPartitionDesc(partition)
val partPath = partition.getPartitionPath
@@ -178,6 +176,7 @@ class HadoopTableReader(@transient _tableDesc: TableDesc, @transient sc: HiveCon
}
}
}.toSeq
+
// Even if we don't use any partitions, we still need an empty RDD
if (hivePartitionRDDs.size == 0) {
new EmptyRDD[Object](sc.sparkContext)
@@ -207,8 +206,8 @@ class HadoopTableReader(@transient _tableDesc: TableDesc, @transient sc: HiveCon
private def createHadoopRdd(
tableDesc: TableDesc,
path: String,
- inputFormatClass: Class[InputFormat[Writable, Writable]])
- : RDD[Writable] = {
+ inputFormatClass: Class[InputFormat[Writable, Writable]]): RDD[Writable] = {
+
val initializeJobConfFunc = HadoopTableReader.initializeLocalJobConfFunc(path, tableDesc) _
val rdd = new HadoopRDD(
@@ -227,7 +226,6 @@ class HadoopTableReader(@transient _tableDesc: TableDesc, @transient sc: HiveCon
}
private[hive] object HadoopTableReader {
-
/**
* Curried. After given an argument for 'path', the resulting JobConf => Unit closure is used to
* instantiate a HadoopRDD.
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala
index 17ae4ef63c..a26b0ff231 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala
@@ -22,23 +22,22 @@ import java.io.File
import java.util.{Set => JavaSet}
import scala.collection.mutable
-import scala.collection.JavaConversions._
import scala.language.implicitConversions
-import org.apache.hadoop.hive.metastore.api.{SerDeInfo, StorageDescriptor}
-import org.apache.hadoop.hive.metastore.MetaStoreUtils
import org.apache.hadoop.hive.ql.exec.FunctionRegistry
-import org.apache.hadoop.hive.ql.io.avro.{AvroContainerOutputFormat, AvroContainerInputFormat}
+import org.apache.hadoop.hive.ql.io.avro.{AvroContainerInputFormat, AvroContainerOutputFormat}
import org.apache.hadoop.hive.ql.metadata.Table
-import org.apache.hadoop.hive.serde2.avro.AvroSerDe
-import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
import org.apache.hadoop.hive.serde2.RegexSerDe
+import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
+import org.apache.hadoop.hive.serde2.avro.AvroSerDe
-import org.apache.spark.{SparkContext, SparkConf}
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.sql.catalyst.analysis._
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, NativeCommand}
+import org.apache.spark.sql.catalyst.util._
-import catalyst.analysis._
-import catalyst.plans.logical.{LogicalPlan, NativeCommand}
-import catalyst.util._
+/* Implicit conversions */
+import scala.collection.JavaConversions._
object TestHive
extends TestHiveContext(new SparkContext("local", "TestSQLContext", new SparkConf()))
@@ -52,7 +51,7 @@ object TestHive
*
* TestHive is singleton object version of this class because instantiating multiple copies of the
* hive metastore seems to lead to weird non-deterministic failures. Therefore, the execution of
- * testcases that rely on TestHive must be serialized.
+ * test cases that rely on TestHive must be serialized.
*/
class TestHiveContext(sc: SparkContext) extends LocalHiveContext(sc) {
self =>
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveOperators.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveOperators.scala
index d20fd87f34..9aa9e173a8 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveOperators.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveOperators.scala
@@ -24,24 +24,18 @@ import org.apache.hadoop.hive.ql.Context
import org.apache.hadoop.hive.ql.metadata.{Partition => HivePartition, Hive}
import org.apache.hadoop.hive.ql.plan.{TableDesc, FileSinkDesc}
import org.apache.hadoop.hive.serde2.Serializer
-import org.apache.hadoop.hive.serde2.objectinspector._
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption
+import org.apache.hadoop.hive.serde2.objectinspector._
import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaHiveDecimalObjectInspector
import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaHiveVarcharObjectInspector
-
-import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.Writable
import org.apache.hadoop.mapred._
-import catalyst.expressions._
-import catalyst.types.{BooleanType, DataType}
-import org.apache.spark.{TaskContext, SparkException}
-import catalyst.expressions.Cast
import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.types.{BooleanType, DataType}
import org.apache.spark.sql.execution._
-
-import scala.Some
-import scala.collection.immutable.ListMap
+import org.apache.spark.{TaskContext, SparkException}
/* Implicits */
import scala.collection.JavaConversions._
@@ -194,20 +188,26 @@ case class InsertIntoHiveTable(
* TODO: Consolidate all hive OI/data interface code.
*/
protected def wrap(a: (Any, ObjectInspector)): Any = a match {
- case (s: String, oi: JavaHiveVarcharObjectInspector) => new HiveVarchar(s, s.size)
+ case (s: String, oi: JavaHiveVarcharObjectInspector) =>
+ new HiveVarchar(s, s.size)
+
case (bd: BigDecimal, oi: JavaHiveDecimalObjectInspector) =>
new HiveDecimal(bd.underlying())
+
case (row: Row, oi: StandardStructObjectInspector) =>
val struct = oi.create()
- row.zip(oi.getAllStructFieldRefs).foreach {
+ row.zip(oi.getAllStructFieldRefs: Seq[StructField]).foreach {
case (data, field) =>
oi.setStructFieldData(struct, field, wrap(data, field.getFieldObjectInspector))
}
struct
+
case (s: Seq[_], oi: ListObjectInspector) =>
val wrappedSeq = s.map(wrap(_, oi.getListElementObjectInspector))
seqAsJavaList(wrappedSeq)
- case (obj, _) => obj
+
+ case (obj, _) =>
+ obj
}
def saveAsHiveFile(
@@ -324,7 +324,7 @@ case class InsertIntoHiveTable(
case (key, Some(value)) => key -> value
case (key, None) => key -> "" // Should not reach here right now.
}
- val partVals = MetaStoreUtils.getPvals(table.hiveQlTable.getPartCols(), partitionSpec)
+ val partVals = MetaStoreUtils.getPvals(table.hiveQlTable.getPartCols, partitionSpec)
db.validatePartitionNameCharacters(partVals)
// inheritTableSpecs is set to true. It should be set to false for a IMPORT query
// which is currently considered as a Hive native command.
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
index 5e775d6a04..72ccd4f4a4 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
@@ -18,22 +18,24 @@
package org.apache.spark.sql
package hive
-import scala.collection.JavaConversions._
import scala.collection.mutable.ArrayBuffer
import org.apache.hadoop.hive.common.`type`.HiveDecimal
-import org.apache.hadoop.hive.serde2.{io => hiveIo}
-import org.apache.hadoop.hive.serde2.objectinspector.primitive._
-import org.apache.hadoop.hive.serde2.objectinspector._
+import org.apache.hadoop.hive.ql.exec.UDF
import org.apache.hadoop.hive.ql.exec.{FunctionInfo, FunctionRegistry}
import org.apache.hadoop.hive.ql.udf.generic._
-import org.apache.hadoop.hive.ql.exec.UDF
+import org.apache.hadoop.hive.serde2.objectinspector._
+import org.apache.hadoop.hive.serde2.objectinspector.primitive._
+import org.apache.hadoop.hive.serde2.{io => hiveIo}
import org.apache.hadoop.{io => hadoopIo}
-import catalyst.analysis
-import catalyst.expressions._
-import catalyst.types
-import catalyst.types._
+import org.apache.spark.sql.catalyst.analysis
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.types
+import org.apache.spark.sql.catalyst.types._
+
+/* Implicit conversions */
+import scala.collection.JavaConversions._
object HiveFunctionRegistry
extends analysis.FunctionRegistry with HiveFunctionFactory with HiveInspectors {
@@ -148,7 +150,7 @@ abstract class HiveUdf
}
case class HiveSimpleUdf(name: String, children: Seq[Expression]) extends HiveUdf {
- import HiveFunctionRegistry._
+ import org.apache.spark.sql.hive.HiveFunctionRegistry._
type UDFType = UDF
@transient
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ConcurrentHiveSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ConcurrentHiveSuite.scala
index a12ab23946..02ee2a0ebc 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ConcurrentHiveSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ConcurrentHiveSuite.scala
@@ -20,7 +20,6 @@ package sql
package hive
package execution
-
import org.scalatest.{FunSuite, BeforeAndAfterAll}
class ConcurrentHiveSuite extends FunSuite with BeforeAndAfterAll {
@@ -35,4 +34,4 @@ class ConcurrentHiveSuite extends FunSuite with BeforeAndAfterAll {
}
}
}
-} \ No newline at end of file
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
index 8a5b97b7a0..e8fcc27235 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
@@ -20,12 +20,11 @@ package hive
package execution
import java.io._
-import org.scalatest.{BeforeAndAfterAll, FunSuite, GivenWhenThen}
-import catalyst.plans.logical.{ExplainCommand, NativeCommand}
-import catalyst.plans._
-import catalyst.util._
+import org.scalatest.{BeforeAndAfterAll, FunSuite, GivenWhenThen}
+import org.apache.spark.sql.catalyst.plans.logical.{ExplainCommand, NativeCommand}
+import org.apache.spark.sql.catalyst.util._
import org.apache.spark.sql.execution.Sort
/**
@@ -38,7 +37,8 @@ import org.apache.spark.sql.execution.Sort
* See the documentation of public vals in this class for information on how test execution can be
* configured using system properties.
*/
-abstract class HiveComparisonTest extends FunSuite with BeforeAndAfterAll with GivenWhenThen with Logging {
+abstract class HiveComparisonTest
+ extends FunSuite with BeforeAndAfterAll with GivenWhenThen with Logging {
/**
* When set, any cache files that result in test failures will be deleted. Used when the test
@@ -376,4 +376,4 @@ abstract class HiveComparisonTest extends FunSuite with BeforeAndAfterAll with G
}
}
}
-} \ No newline at end of file
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index d010023f78..16bcded8a4 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -19,11 +19,6 @@ package org.apache.spark.sql
package hive
package execution
-
-import java.io._
-
-import util._
-
/**
* Runs the test cases that are included in the hive distribution.
*/
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQueryFileTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQueryFileTest.scala
index f0a4ec3c02..2d2f13333a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQueryFileTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQueryFileTest.scala
@@ -19,9 +19,9 @@ package org.apache.spark.sql
package hive
package execution
-import java.io._
+import java.io.File
-import catalyst.util._
+import org.apache.spark.sql.catalyst.util._
/**
* A framework for running the query tests that are listed as a set of text files.
@@ -67,4 +67,4 @@ abstract class HiveQueryFileTest extends HiveComparisonTest {
ignore(testCaseName) {}
}
}
-} \ No newline at end of file
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 28a5d260b3..b804634db1 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql
package hive
package execution
-
/**
* A set of test cases expressed in Hive QL that are not covered by the tests included in the hive distribution.
*/
@@ -141,4 +140,4 @@ class HiveQuerySuite extends HiveComparisonTest {
sql("SELECT * FROM src TABLESAMPLE(0.1 PERCENT) s")
}
-} \ No newline at end of file
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala
index 0dd79faa15..996bd4efec 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala
@@ -23,8 +23,6 @@ package execution
* A set of test cases expressed in Hive QL that are not covered by the tests included in the hive distribution.
*/
class HiveResolutionSuite extends HiveComparisonTest {
- import TestHive._
-
createQueryTest("table.attr",
"SELECT src.key FROM src ORDER BY key LIMIT 1")
@@ -62,4 +60,4 @@ class HiveResolutionSuite extends HiveComparisonTest {
createQueryTest("tableName.attr from aliased subquery",
"SELECT src.key FROM (SELECT * FROM src ORDER BY key LIMIT 1) a") */
-} \ No newline at end of file
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala
index 8542f42aa9..bb65c91e2a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala
@@ -19,10 +19,11 @@ package org.apache.spark.sql
package hive
package execution
-import scala.collection.JavaConversions._
-
import org.apache.spark.sql.hive.TestHive
+/* Implicit conversions */
+import scala.collection.JavaConversions._
+
/**
* A set of test cases that validate partition and column pruning.
*/
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/parquet/HiveParquetSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/parquet/HiveParquetSuite.scala
index ee90061c7c..05ad85b622 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/parquet/HiveParquetSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/parquet/HiveParquetSuite.scala
@@ -19,21 +19,23 @@ package org.apache.spark.sql.parquet
import java.io.File
-import org.scalatest.{BeforeAndAfterEach, BeforeAndAfterAll, FunSuite}
+import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite}
+import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
import org.apache.spark.sql.catalyst.expressions.Row
import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
import org.apache.spark.sql.catalyst.util.getTempFilePath
import org.apache.spark.sql.hive.TestHive
-
class HiveParquetSuite extends FunSuite with BeforeAndAfterAll with BeforeAndAfterEach {
-
val filename = getTempFilePath("parquettest").getCanonicalFile.toURI.toString
// runs a SQL and optionally resolves one Parquet table
- def runQuery(querystr: String, tableName: Option[String] = None, filename: Option[String] = None): Array[Row] = {
+ def runQuery(
+ querystr: String,
+ tableName: Option[String] = None,
+ filename: Option[String] = None): Array[Row] = {
+
// call to resolve references in order to get CREATE TABLE AS to work
val query = TestHive
.parseSql(querystr)
@@ -90,7 +92,7 @@ class HiveParquetSuite extends FunSuite with BeforeAndAfterAll with BeforeAndAft
override def beforeAll() {
// write test data
- ParquetTestData.writeFile
+ ParquetTestData.writeFile()
// Override initial Parquet test table
TestHive.catalog.registerTable(Some[String]("parquet"), "testsource", ParquetTestData.testData)
}
@@ -151,7 +153,7 @@ class HiveParquetSuite extends FunSuite with BeforeAndAfterAll with BeforeAndAft
(rddOne, rddTwo).zipped.foreach {
(a,b) => (a,b).zipped.toArray.zipWithIndex.foreach {
case ((value_1:Array[Byte], value_2:Array[Byte]), index) =>
- assert(new String(value_1) === new String(value_2), s"table $tableName row ${counter} field ${fieldNames(index)} don't match")
+ assert(new String(value_1) === new String(value_2), s"table $tableName row $counter field ${fieldNames(index)} don't match")
case ((value_1, value_2), index) =>
assert(value_1 === value_2, s"table $tableName row $counter field ${fieldNames(index)} don't match")
}