From 8265dc7739caccc59bc2456b2df055ca96337fe4 Mon Sep 17 00:00:00 2001 From: Cheng Lian Date: Sun, 23 Mar 2014 15:21:40 -0700 Subject: Fixed coding style issues in Spark SQL This PR addresses various coding style issues in Spark SQL, including but not limited to those mentioned by @mateiz in PR #146. As this PR affects lots of source files and may cause potential conflicts, it would be better to merge this as soon as possible *after* PR #205 (In-memory columnar representation for Spark SQL) is merged. Author: Cheng Lian Closes #208 from liancheng/fixCodingStyle and squashes the following commits: fc2b528 [Cheng Lian] Merge branch 'master' into fixCodingStyle b531273 [Cheng Lian] Fixed coding style issues in sql/hive 0b56f77 [Cheng Lian] Fixed coding style issues in sql/core fae7b02 [Cheng Lian] Addressed styling issues mentioned by @marmbrus 9265366 [Cheng Lian] Fixed coding style issues in sql/core 3dcbbbd [Cheng Lian] Fixed relative package imports for package catalyst --- .../apache/hadoop/mapred/SparkHadoopWriter.scala | 7 +++--- .../org/apache/spark/sql/hive/HiveContext.scala | 25 +++++++++---------- .../spark/sql/hive/HiveMetastoreCatalog.scala | 22 ++++++++--------- .../scala/org/apache/spark/sql/hive/HiveQl.scala | 25 +++++++++---------- .../org/apache/spark/sql/hive/HiveStrategies.scala | 11 ++++----- .../spark/sql/hive/ScriptTransformation.scala | 5 ++-- .../org/apache/spark/sql/hive/TableReader.scala | 26 ++++++++++---------- .../scala/org/apache/spark/sql/hive/TestHive.scala | 21 ++++++++-------- .../org/apache/spark/sql/hive/hiveOperators.scala | 28 +++++++++++----------- .../scala/org/apache/spark/sql/hive/hiveUdfs.scala | 22 +++++++++-------- .../sql/hive/execution/ConcurrentHiveSuite.scala | 3 +-- .../sql/hive/execution/HiveComparisonTest.scala | 12 +++++----- .../hive/execution/HiveCompatibilitySuite.scala | 5 ---- .../sql/hive/execution/HiveQueryFileTest.scala | 6 ++--- .../spark/sql/hive/execution/HiveQuerySuite.scala | 3 +-- .../sql/hive/execution/HiveResolutionSuite.scala | 4 +--- .../spark/sql/hive/execution/PruningSuite.scala | 5 ++-- .../spark/sql/parquet/HiveParquetSuite.scala | 16 +++++++------ 18 files changed, 120 insertions(+), 126 deletions(-) (limited to 'sql/hive/src') diff --git a/sql/hive/src/main/scala/org/apache/hadoop/mapred/SparkHadoopWriter.scala b/sql/hive/src/main/scala/org/apache/hadoop/mapred/SparkHadoopWriter.scala index 08d390e887..0b38731919 100644 --- a/sql/hive/src/main/scala/org/apache/hadoop/mapred/SparkHadoopWriter.scala +++ b/sql/hive/src/main/scala/org/apache/hadoop/mapred/SparkHadoopWriter.scala @@ -22,15 +22,14 @@ import java.text.NumberFormat import java.util.Date import org.apache.hadoop.fs.Path +import org.apache.hadoop.hive.ql.exec.{FileSinkOperator, Utilities} +import org.apache.hadoop.hive.ql.io.{HiveFileFormatUtils, HiveOutputFormat} +import org.apache.hadoop.hive.ql.plan.FileSinkDesc import org.apache.hadoop.io.Writable import org.apache.spark.Logging import org.apache.spark.SerializableWritable -import org.apache.hadoop.hive.ql.exec.{Utilities, FileSinkOperator} -import org.apache.hadoop.hive.ql.io.{HiveFileFormatUtils, HiveOutputFormat} -import org.apache.hadoop.hive.ql.plan.FileSinkDesc - /** * Internal helper class that saves an RDD using a Hive OutputFormat. * It is based on [[SparkHadoopWriter]]. diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala index 4aad876cc0..491b3a6271 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala @@ -18,25 +18,26 @@ package org.apache.spark.sql package hive -import java.io.{PrintStream, InputStreamReader, BufferedReader, File} -import java.util.{ArrayList => JArrayList} import scala.language.implicitConversions -import org.apache.spark.SparkContext +import java.io.{BufferedReader, File, InputStreamReader, PrintStream} +import java.util.{ArrayList => JArrayList} + import org.apache.hadoop.hive.conf.HiveConf -import org.apache.hadoop.hive.ql.session.SessionState -import org.apache.hadoop.hive.ql.processors.{CommandProcessorResponse, CommandProcessorFactory} -import org.apache.hadoop.hive.ql.processors.CommandProcessor import org.apache.hadoop.hive.ql.Driver -import org.apache.spark.rdd.RDD - -import catalyst.analysis.{Analyzer, OverrideCatalog} -import catalyst.expressions.GenericRow -import catalyst.plans.logical.{BaseRelation, LogicalPlan, NativeCommand, ExplainCommand} -import catalyst.types._ +import org.apache.hadoop.hive.ql.processors._ +import org.apache.hadoop.hive.ql.session.SessionState +import org.apache.spark.SparkContext +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.catalyst.analysis.{Analyzer, OverrideCatalog} +import org.apache.spark.sql.catalyst.expressions.GenericRow +import org.apache.spark.sql.catalyst.plans.logical.{BaseRelation, LogicalPlan} +import org.apache.spark.sql.catalyst.plans.logical.{NativeCommand, ExplainCommand} +import org.apache.spark.sql.catalyst.types._ import org.apache.spark.sql.execution._ +/* Implicit conversions */ import scala.collection.JavaConversions._ /** diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala index e4d50722ce..a5db283765 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala @@ -27,12 +27,12 @@ import org.apache.hadoop.hive.ql.plan.TableDesc import org.apache.hadoop.hive.ql.session.SessionState import org.apache.hadoop.hive.serde2.Deserializer -import catalyst.analysis.Catalog -import catalyst.expressions._ -import catalyst.plans.logical -import catalyst.plans.logical._ -import catalyst.rules._ -import catalyst.types._ +import org.apache.spark.sql.catalyst.analysis.Catalog +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.plans.logical +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.rules._ +import org.apache.spark.sql.catalyst.types._ import scala.collection.JavaConversions._ @@ -45,7 +45,7 @@ class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with Logging { db: Option[String], tableName: String, alias: Option[String]): LogicalPlan = { - val databaseName = db.getOrElse(hive.sessionState.getCurrentDatabase()) + val databaseName = db.getOrElse(hive.sessionState.getCurrentDatabase) val table = client.getTable(databaseName, tableName) val partitions: Seq[Partition] = if (table.isPartitioned) { @@ -91,7 +91,7 @@ class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with Logging { object CreateTables extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan transform { case InsertIntoCreatedTable(db, tableName, child) => - val databaseName = db.getOrElse(SessionState.get.getCurrentDatabase()) + val databaseName = db.getOrElse(SessionState.get.getCurrentDatabase) createTable(databaseName, tableName, child.output) @@ -123,8 +123,8 @@ class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with Logging { } else { // Only do the casting when child output data types differ from table output data types. val castedChildOutput = child.output.zip(table.output).map { - case (input, table) if input.dataType != table.dataType => - Alias(Cast(input, table.dataType), input.name)() + case (input, output) if input.dataType != output.dataType => + Alias(Cast(input, output.dataType), input.name)() case (input, _) => input } @@ -135,7 +135,7 @@ class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with Logging { /** * UNIMPLEMENTED: It needs to be decided how we will persist in-memory tables to the metastore. - * For now, if this functionallity is desired mix in the in-memory [[OverrideCatalog]]. + * For now, if this functionality is desired mix in the in-memory [[OverrideCatalog]]. */ override def registerTable( databaseName: Option[String], tableName: String, plan: LogicalPlan): Unit = ??? diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala index 4f33a293c3..8e76a7348e 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala @@ -18,18 +18,19 @@ package org.apache.spark.sql package hive -import scala.collection.JavaConversions._ - import org.apache.hadoop.hive.ql.lib.Node import org.apache.hadoop.hive.ql.parse._ import org.apache.hadoop.hive.ql.plan.PlanUtils -import catalyst.analysis._ -import catalyst.expressions._ -import catalyst.plans._ -import catalyst.plans.logical -import catalyst.plans.logical._ -import catalyst.types._ +import org.apache.spark.sql.catalyst.analysis._ +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.plans._ +import org.apache.spark.sql.catalyst.plans.logical +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.types._ + +/* Implicit conversions */ +import scala.collection.JavaConversions._ /** * Used when we need to start parsing the AST before deciding that we are going to pass the command @@ -48,7 +49,7 @@ case class AddJar(jarPath: String) extends Command case class AddFile(filePath: String) extends Command -/** Provides a mapping from HiveQL statments to catalyst logical plans and expression trees. */ +/** Provides a mapping from HiveQL statements to catalyst logical plans and expression trees. */ object HiveQl { protected val nativeCommands = Seq( "TOK_DESCFUNCTION", @@ -150,13 +151,13 @@ object HiveQl { } /** - * Returns a scala.Seq equivilent to [s] or Nil if [s] is null. + * Returns a scala.Seq equivalent to [s] or Nil if [s] is null. */ private def nilIfEmpty[A](s: java.util.List[A]): Seq[A] = Option(s).map(_.toSeq).getOrElse(Nil) /** - * Returns this ASTNode with the text changed to `newText``. + * Returns this ASTNode with the text changed to `newText`. */ def withText(newText: String): ASTNode = { n.token.asInstanceOf[org.antlr.runtime.CommonToken].setText(newText) @@ -667,7 +668,7 @@ object HiveQl { case Token(allJoinTokens(joinToken), relation1 :: relation2 :: other) => - assert(other.size <= 1, s"Unhandled join child ${other}") + assert(other.size <= 1, s"Unhandled join child $other") val joinType = joinToken match { case "TOK_JOIN" => Inner case "TOK_RIGHTOUTERJOIN" => RightOuter diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala index 92d84208ab..c71141c419 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala @@ -18,13 +18,12 @@ package org.apache.spark.sql package hive -import catalyst.expressions._ -import catalyst.planning._ -import catalyst.plans._ -import catalyst.plans.logical.{BaseRelation, LogicalPlan} - +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.planning._ +import org.apache.spark.sql.catalyst.plans._ +import org.apache.spark.sql.catalyst.plans.logical.{BaseRelation, LogicalPlan} import org.apache.spark.sql.execution._ -import org.apache.spark.sql.parquet.{ParquetRelation, InsertIntoParquetTable, ParquetTableScan} +import org.apache.spark.sql.parquet.{InsertIntoParquetTable, ParquetRelation, ParquetTableScan} trait HiveStrategies { // Possibly being too clever with types here... or not clever enough. diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/ScriptTransformation.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/ScriptTransformation.scala index f20e9d4de4..dc4181ec99 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/ScriptTransformation.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/ScriptTransformation.scala @@ -18,11 +18,12 @@ package org.apache.spark.sql package hive -import java.io.{InputStreamReader, BufferedReader} +import java.io.{BufferedReader, InputStreamReader} -import catalyst.expressions._ +import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.execution._ +/* Implicit conversions */ import scala.collection.JavaConversions._ /** diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala index 71d751cbc4..99dc85ec19 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala @@ -19,19 +19,18 @@ package org.apache.spark.sql package hive import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{Path, PathFilter} import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants._ +import org.apache.hadoop.hive.ql.exec.Utilities import org.apache.hadoop.hive.ql.metadata.{Partition => HivePartition, Table => HiveTable} import org.apache.hadoop.hive.ql.plan.TableDesc import org.apache.hadoop.hive.serde2.Deserializer -import org.apache.hadoop.hive.ql.exec.Utilities import org.apache.hadoop.io.Writable -import org.apache.hadoop.fs.{Path, PathFilter} -import org.apache.hadoop.mapred.{FileInputFormat, JobConf, InputFormat} +import org.apache.hadoop.mapred.{FileInputFormat, InputFormat, JobConf} import org.apache.spark.SerializableWritable import org.apache.spark.broadcast.Broadcast -import org.apache.spark.rdd.{HadoopRDD, UnionRDD, EmptyRDD, RDD} - +import org.apache.spark.rdd.{EmptyRDD, HadoopRDD, RDD, UnionRDD} /** * A trait for subclasses that handle table scans. @@ -40,7 +39,6 @@ private[hive] sealed trait TableReader { def makeRDDForTable(hiveTable: HiveTable): RDD[_] def makeRDDForPartitionedTable(partitions: Seq[HivePartition]): RDD[_] - } @@ -57,7 +55,6 @@ class HadoopTableReader(@transient _tableDesc: TableDesc, @transient sc: HiveCon private val _minSplitsPerRDD = math.max( sc.hiveconf.getInt("mapred.map.tasks", 1), sc.sparkContext.defaultMinSplits) - // TODO: set aws s3 credentials. private val _broadcastedHiveConf = @@ -85,8 +82,8 @@ class HadoopTableReader(@transient _tableDesc: TableDesc, @transient sc: HiveCon def makeRDDForTable( hiveTable: HiveTable, deserializerClass: Class[_ <: Deserializer], - filterOpt: Option[PathFilter]): RDD[_] = - { + filterOpt: Option[PathFilter]): RDD[_] = { + assert(!hiveTable.isPartitioned, """makeRDDForTable() cannot be called on a partitioned table, since input formats may differ across partitions. Use makeRDDForTablePartitions() instead.""") @@ -115,6 +112,7 @@ class HadoopTableReader(@transient _tableDesc: TableDesc, @transient sc: HiveCon sys.error(s"Unable to deserialize non-Writable: $value of ${value.getClass.getName}") } } + deserializedHadoopRDD } @@ -136,8 +134,8 @@ class HadoopTableReader(@transient _tableDesc: TableDesc, @transient sc: HiveCon */ def makeRDDForPartitionedTable( partitionToDeserializer: Map[HivePartition, Class[_ <: Deserializer]], - filterOpt: Option[PathFilter]): RDD[_] = - { + filterOpt: Option[PathFilter]): RDD[_] = { + val hivePartitionRDDs = partitionToDeserializer.map { case (partition, partDeserializer) => val partDesc = Utilities.getPartitionDesc(partition) val partPath = partition.getPartitionPath @@ -178,6 +176,7 @@ class HadoopTableReader(@transient _tableDesc: TableDesc, @transient sc: HiveCon } } }.toSeq + // Even if we don't use any partitions, we still need an empty RDD if (hivePartitionRDDs.size == 0) { new EmptyRDD[Object](sc.sparkContext) @@ -207,8 +206,8 @@ class HadoopTableReader(@transient _tableDesc: TableDesc, @transient sc: HiveCon private def createHadoopRdd( tableDesc: TableDesc, path: String, - inputFormatClass: Class[InputFormat[Writable, Writable]]) - : RDD[Writable] = { + inputFormatClass: Class[InputFormat[Writable, Writable]]): RDD[Writable] = { + val initializeJobConfFunc = HadoopTableReader.initializeLocalJobConfFunc(path, tableDesc) _ val rdd = new HadoopRDD( @@ -227,7 +226,6 @@ class HadoopTableReader(@transient _tableDesc: TableDesc, @transient sc: HiveCon } private[hive] object HadoopTableReader { - /** * Curried. After given an argument for 'path', the resulting JobConf => Unit closure is used to * instantiate a HadoopRDD. diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala index 17ae4ef63c..a26b0ff231 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala @@ -22,23 +22,22 @@ import java.io.File import java.util.{Set => JavaSet} import scala.collection.mutable -import scala.collection.JavaConversions._ import scala.language.implicitConversions -import org.apache.hadoop.hive.metastore.api.{SerDeInfo, StorageDescriptor} -import org.apache.hadoop.hive.metastore.MetaStoreUtils import org.apache.hadoop.hive.ql.exec.FunctionRegistry -import org.apache.hadoop.hive.ql.io.avro.{AvroContainerOutputFormat, AvroContainerInputFormat} +import org.apache.hadoop.hive.ql.io.avro.{AvroContainerInputFormat, AvroContainerOutputFormat} import org.apache.hadoop.hive.ql.metadata.Table -import org.apache.hadoop.hive.serde2.avro.AvroSerDe -import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe import org.apache.hadoop.hive.serde2.RegexSerDe +import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe +import org.apache.hadoop.hive.serde2.avro.AvroSerDe -import org.apache.spark.{SparkContext, SparkConf} +import org.apache.spark.{SparkConf, SparkContext} +import org.apache.spark.sql.catalyst.analysis._ +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, NativeCommand} +import org.apache.spark.sql.catalyst.util._ -import catalyst.analysis._ -import catalyst.plans.logical.{LogicalPlan, NativeCommand} -import catalyst.util._ +/* Implicit conversions */ +import scala.collection.JavaConversions._ object TestHive extends TestHiveContext(new SparkContext("local", "TestSQLContext", new SparkConf())) @@ -52,7 +51,7 @@ object TestHive * * TestHive is singleton object version of this class because instantiating multiple copies of the * hive metastore seems to lead to weird non-deterministic failures. Therefore, the execution of - * testcases that rely on TestHive must be serialized. + * test cases that rely on TestHive must be serialized. */ class TestHiveContext(sc: SparkContext) extends LocalHiveContext(sc) { self => diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveOperators.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveOperators.scala index d20fd87f34..9aa9e173a8 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveOperators.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveOperators.scala @@ -24,24 +24,18 @@ import org.apache.hadoop.hive.ql.Context import org.apache.hadoop.hive.ql.metadata.{Partition => HivePartition, Hive} import org.apache.hadoop.hive.ql.plan.{TableDesc, FileSinkDesc} import org.apache.hadoop.hive.serde2.Serializer -import org.apache.hadoop.hive.serde2.objectinspector._ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption +import org.apache.hadoop.hive.serde2.objectinspector._ import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaHiveDecimalObjectInspector import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaHiveVarcharObjectInspector - -import org.apache.hadoop.fs.Path import org.apache.hadoop.io.Writable import org.apache.hadoop.mapred._ -import catalyst.expressions._ -import catalyst.types.{BooleanType, DataType} -import org.apache.spark.{TaskContext, SparkException} -import catalyst.expressions.Cast import org.apache.spark.rdd.RDD +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.types.{BooleanType, DataType} import org.apache.spark.sql.execution._ - -import scala.Some -import scala.collection.immutable.ListMap +import org.apache.spark.{TaskContext, SparkException} /* Implicits */ import scala.collection.JavaConversions._ @@ -194,20 +188,26 @@ case class InsertIntoHiveTable( * TODO: Consolidate all hive OI/data interface code. */ protected def wrap(a: (Any, ObjectInspector)): Any = a match { - case (s: String, oi: JavaHiveVarcharObjectInspector) => new HiveVarchar(s, s.size) + case (s: String, oi: JavaHiveVarcharObjectInspector) => + new HiveVarchar(s, s.size) + case (bd: BigDecimal, oi: JavaHiveDecimalObjectInspector) => new HiveDecimal(bd.underlying()) + case (row: Row, oi: StandardStructObjectInspector) => val struct = oi.create() - row.zip(oi.getAllStructFieldRefs).foreach { + row.zip(oi.getAllStructFieldRefs: Seq[StructField]).foreach { case (data, field) => oi.setStructFieldData(struct, field, wrap(data, field.getFieldObjectInspector)) } struct + case (s: Seq[_], oi: ListObjectInspector) => val wrappedSeq = s.map(wrap(_, oi.getListElementObjectInspector)) seqAsJavaList(wrappedSeq) - case (obj, _) => obj + + case (obj, _) => + obj } def saveAsHiveFile( @@ -324,7 +324,7 @@ case class InsertIntoHiveTable( case (key, Some(value)) => key -> value case (key, None) => key -> "" // Should not reach here right now. } - val partVals = MetaStoreUtils.getPvals(table.hiveQlTable.getPartCols(), partitionSpec) + val partVals = MetaStoreUtils.getPvals(table.hiveQlTable.getPartCols, partitionSpec) db.validatePartitionNameCharacters(partVals) // inheritTableSpecs is set to true. It should be set to false for a IMPORT query // which is currently considered as a Hive native command. diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala index 5e775d6a04..72ccd4f4a4 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala @@ -18,22 +18,24 @@ package org.apache.spark.sql package hive -import scala.collection.JavaConversions._ import scala.collection.mutable.ArrayBuffer import org.apache.hadoop.hive.common.`type`.HiveDecimal -import org.apache.hadoop.hive.serde2.{io => hiveIo} -import org.apache.hadoop.hive.serde2.objectinspector.primitive._ -import org.apache.hadoop.hive.serde2.objectinspector._ +import org.apache.hadoop.hive.ql.exec.UDF import org.apache.hadoop.hive.ql.exec.{FunctionInfo, FunctionRegistry} import org.apache.hadoop.hive.ql.udf.generic._ -import org.apache.hadoop.hive.ql.exec.UDF +import org.apache.hadoop.hive.serde2.objectinspector._ +import org.apache.hadoop.hive.serde2.objectinspector.primitive._ +import org.apache.hadoop.hive.serde2.{io => hiveIo} import org.apache.hadoop.{io => hadoopIo} -import catalyst.analysis -import catalyst.expressions._ -import catalyst.types -import catalyst.types._ +import org.apache.spark.sql.catalyst.analysis +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.types +import org.apache.spark.sql.catalyst.types._ + +/* Implicit conversions */ +import scala.collection.JavaConversions._ object HiveFunctionRegistry extends analysis.FunctionRegistry with HiveFunctionFactory with HiveInspectors { @@ -148,7 +150,7 @@ abstract class HiveUdf } case class HiveSimpleUdf(name: String, children: Seq[Expression]) extends HiveUdf { - import HiveFunctionRegistry._ + import org.apache.spark.sql.hive.HiveFunctionRegistry._ type UDFType = UDF @transient diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ConcurrentHiveSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ConcurrentHiveSuite.scala index a12ab23946..02ee2a0ebc 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ConcurrentHiveSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ConcurrentHiveSuite.scala @@ -20,7 +20,6 @@ package sql package hive package execution - import org.scalatest.{FunSuite, BeforeAndAfterAll} class ConcurrentHiveSuite extends FunSuite with BeforeAndAfterAll { @@ -35,4 +34,4 @@ class ConcurrentHiveSuite extends FunSuite with BeforeAndAfterAll { } } } -} \ No newline at end of file +} diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala index 8a5b97b7a0..e8fcc27235 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala @@ -20,12 +20,11 @@ package hive package execution import java.io._ -import org.scalatest.{BeforeAndAfterAll, FunSuite, GivenWhenThen} -import catalyst.plans.logical.{ExplainCommand, NativeCommand} -import catalyst.plans._ -import catalyst.util._ +import org.scalatest.{BeforeAndAfterAll, FunSuite, GivenWhenThen} +import org.apache.spark.sql.catalyst.plans.logical.{ExplainCommand, NativeCommand} +import org.apache.spark.sql.catalyst.util._ import org.apache.spark.sql.execution.Sort /** @@ -38,7 +37,8 @@ import org.apache.spark.sql.execution.Sort * See the documentation of public vals in this class for information on how test execution can be * configured using system properties. */ -abstract class HiveComparisonTest extends FunSuite with BeforeAndAfterAll with GivenWhenThen with Logging { +abstract class HiveComparisonTest + extends FunSuite with BeforeAndAfterAll with GivenWhenThen with Logging { /** * When set, any cache files that result in test failures will be deleted. Used when the test @@ -376,4 +376,4 @@ abstract class HiveComparisonTest extends FunSuite with BeforeAndAfterAll with G } } } -} \ No newline at end of file +} diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala index d010023f78..16bcded8a4 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala @@ -19,11 +19,6 @@ package org.apache.spark.sql package hive package execution - -import java.io._ - -import util._ - /** * Runs the test cases that are included in the hive distribution. */ diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQueryFileTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQueryFileTest.scala index f0a4ec3c02..2d2f13333a 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQueryFileTest.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQueryFileTest.scala @@ -19,9 +19,9 @@ package org.apache.spark.sql package hive package execution -import java.io._ +import java.io.File -import catalyst.util._ +import org.apache.spark.sql.catalyst.util._ /** * A framework for running the query tests that are listed as a set of text files. @@ -67,4 +67,4 @@ abstract class HiveQueryFileTest extends HiveComparisonTest { ignore(testCaseName) {} } } -} \ No newline at end of file +} diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala index 28a5d260b3..b804634db1 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala @@ -19,7 +19,6 @@ package org.apache.spark.sql package hive package execution - /** * A set of test cases expressed in Hive QL that are not covered by the tests included in the hive distribution. */ @@ -141,4 +140,4 @@ class HiveQuerySuite extends HiveComparisonTest { sql("SELECT * FROM src TABLESAMPLE(0.1 PERCENT) s") } -} \ No newline at end of file +} diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala index 0dd79faa15..996bd4efec 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala @@ -23,8 +23,6 @@ package execution * A set of test cases expressed in Hive QL that are not covered by the tests included in the hive distribution. */ class HiveResolutionSuite extends HiveComparisonTest { - import TestHive._ - createQueryTest("table.attr", "SELECT src.key FROM src ORDER BY key LIMIT 1") @@ -62,4 +60,4 @@ class HiveResolutionSuite extends HiveComparisonTest { createQueryTest("tableName.attr from aliased subquery", "SELECT src.key FROM (SELECT * FROM src ORDER BY key LIMIT 1) a") */ -} \ No newline at end of file +} diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala index 8542f42aa9..bb65c91e2a 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala @@ -19,10 +19,11 @@ package org.apache.spark.sql package hive package execution -import scala.collection.JavaConversions._ - import org.apache.spark.sql.hive.TestHive +/* Implicit conversions */ +import scala.collection.JavaConversions._ + /** * A set of test cases that validate partition and column pruning. */ diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/parquet/HiveParquetSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/parquet/HiveParquetSuite.scala index ee90061c7c..05ad85b622 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/parquet/HiveParquetSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/parquet/HiveParquetSuite.scala @@ -19,21 +19,23 @@ package org.apache.spark.sql.parquet import java.io.File -import org.scalatest.{BeforeAndAfterEach, BeforeAndAfterAll, FunSuite} +import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite} +import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation import org.apache.spark.sql.catalyst.expressions.Row import org.apache.spark.sql.catalyst.plans.logical._ -import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation import org.apache.spark.sql.catalyst.util.getTempFilePath import org.apache.spark.sql.hive.TestHive - class HiveParquetSuite extends FunSuite with BeforeAndAfterAll with BeforeAndAfterEach { - val filename = getTempFilePath("parquettest").getCanonicalFile.toURI.toString // runs a SQL and optionally resolves one Parquet table - def runQuery(querystr: String, tableName: Option[String] = None, filename: Option[String] = None): Array[Row] = { + def runQuery( + querystr: String, + tableName: Option[String] = None, + filename: Option[String] = None): Array[Row] = { + // call to resolve references in order to get CREATE TABLE AS to work val query = TestHive .parseSql(querystr) @@ -90,7 +92,7 @@ class HiveParquetSuite extends FunSuite with BeforeAndAfterAll with BeforeAndAft override def beforeAll() { // write test data - ParquetTestData.writeFile + ParquetTestData.writeFile() // Override initial Parquet test table TestHive.catalog.registerTable(Some[String]("parquet"), "testsource", ParquetTestData.testData) } @@ -151,7 +153,7 @@ class HiveParquetSuite extends FunSuite with BeforeAndAfterAll with BeforeAndAft (rddOne, rddTwo).zipped.foreach { (a,b) => (a,b).zipped.toArray.zipWithIndex.foreach { case ((value_1:Array[Byte], value_2:Array[Byte]), index) => - assert(new String(value_1) === new String(value_2), s"table $tableName row ${counter} field ${fieldNames(index)} don't match") + assert(new String(value_1) === new String(value_2), s"table $tableName row $counter field ${fieldNames(index)} don't match") case ((value_1, value_2), index) => assert(value_1 === value_2, s"table $tableName row $counter field ${fieldNames(index)} don't match") } -- cgit v1.2.3