aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVenkata Ramana Gollamudi <ramana.gollamudi@huawei.com>2014-10-13 13:45:34 -0700
committerMichael Armbrust <michael@databricks.com>2014-10-13 13:45:34 -0700
commite10d71e7e58bf2ec0f1942cb2f0602396ab866b4 (patch)
treed37f2ea8a19ac25dd3d21c390e7c40f3f5c2d796
parent73da9c26b0e2e8bf0ab055906211727a7097c963 (diff)
downloadspark-e10d71e7e58bf2ec0f1942cb2f0602396ab866b4.tar.gz
spark-e10d71e7e58bf2ec0f1942cb2f0602396ab866b4.tar.bz2
spark-e10d71e7e58bf2ec0f1942cb2f0602396ab866b4.zip
[SPARK-3559][SQL] Remove unnecessary columns from List of needed Column Ids in Hive Conf
Author: Venkata Ramana G <ramana.gollamudihuawei.com> Author: Venkata Ramana Gollamudi <ramana.gollamudi@huawei.com> Closes #2713 from gvramana/remove_unnecessary_columns and squashes the following commits: b7ba768 [Venkata Ramana Gollamudi] Added comment and checkstyle fix 6a93459 [Venkata Ramana Gollamudi] cloned hiveconf for each TableScanOperators so that only required columns are added
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala6
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala10
2 files changed, 12 insertions, 4 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
index 84fafcde63..0de29d5cff 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.hive
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{Path, PathFilter}
+import org.apache.hadoop.hive.conf.HiveConf
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants._
import org.apache.hadoop.hive.ql.exec.Utilities
import org.apache.hadoop.hive.ql.metadata.{Partition => HivePartition, Table => HiveTable}
@@ -52,7 +53,8 @@ private[hive]
class HadoopTableReader(
@transient attributes: Seq[Attribute],
@transient relation: MetastoreRelation,
- @transient sc: HiveContext)
+ @transient sc: HiveContext,
+ @transient hiveExtraConf: HiveConf)
extends TableReader {
// Choose the minimum number of splits. If mapred.map.tasks is set, then use that unless
@@ -63,7 +65,7 @@ class HadoopTableReader(
// TODO: set aws s3 credentials.
private val _broadcastedHiveConf =
- sc.sparkContext.broadcast(new SerializableWritable(sc.hiveconf))
+ sc.sparkContext.broadcast(new SerializableWritable(hiveExtraConf))
def broadcastedHiveConf = _broadcastedHiveConf
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala
index 577ca928b4..a32147584f 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala
@@ -64,8 +64,14 @@ case class HiveTableScan(
BindReferences.bindReference(pred, relation.partitionKeys)
}
+ // Create a local copy of hiveconf,so that scan specific modifications should not impact
+ // other queries
@transient
- private[this] val hadoopReader = new HadoopTableReader(attributes, relation, context)
+ private[this] val hiveExtraConf = new HiveConf(context.hiveconf)
+
+ @transient
+ private[this] val hadoopReader =
+ new HadoopTableReader(attributes, relation, context, hiveExtraConf)
private[this] def castFromString(value: String, dataType: DataType) = {
Cast(Literal(value), dataType).eval(null)
@@ -97,7 +103,7 @@ case class HiveTableScan(
hiveConf.set(serdeConstants.LIST_COLUMNS, relation.attributes.map(_.name).mkString(","))
}
- addColumnMetadataToConf(context.hiveconf)
+ addColumnMetadataToConf(hiveExtraConf)
/**
* Prunes partitions not involve the query plan.