aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
Diffstat (limited to 'sql')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala10
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala6
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala2
3 files changed, 9 insertions, 9 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
index 07e6e2eccd..279495aa64 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
@@ -79,13 +79,13 @@ private[sql] trait SQLConf {
private[spark] def dialect: String = getConf(DIALECT, "sql")
/** When true tables cached using the in-memory columnar caching will be compressed. */
- private[spark] def useCompression: Boolean = getConf(COMPRESS_CACHED, "false").toBoolean
+ private[spark] def useCompression: Boolean = getConf(COMPRESS_CACHED, "true").toBoolean
/** The compression codec for writing to a Parquetfile */
- private[spark] def parquetCompressionCodec: String = getConf(PARQUET_COMPRESSION, "snappy")
+ private[spark] def parquetCompressionCodec: String = getConf(PARQUET_COMPRESSION, "gzip")
/** The number of rows that will be */
- private[spark] def columnBatchSize: Int = getConf(COLUMN_BATCH_SIZE, "1000").toInt
+ private[spark] def columnBatchSize: Int = getConf(COLUMN_BATCH_SIZE, "10000").toInt
/** Number of partitions to use for shuffle operators. */
private[spark] def numShufflePartitions: Int = getConf(SHUFFLE_PARTITIONS, "200").toInt
@@ -106,10 +106,10 @@ private[sql] trait SQLConf {
* a broadcast value during the physical executions of join operations. Setting this to -1
* effectively disables auto conversion.
*
- * Hive setting: hive.auto.convert.join.noconditionaltask.size, whose default value is also 10000.
+ * Hive setting: hive.auto.convert.join.noconditionaltask.size, whose default value is 10000.
*/
private[spark] def autoBroadcastJoinThreshold: Int =
- getConf(AUTO_BROADCASTJOIN_THRESHOLD, "10000").toInt
+ getConf(AUTO_BROADCASTJOIN_THRESHOLD, (10 * 1024 * 1024).toString).toInt
/**
* The default size in bytes to assign to a logical operator's estimation statistics. By default,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala
index 9664c565a0..d00860a8bb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala
@@ -123,7 +123,7 @@ case class ParquetTableScan(
// Tell FilteringParquetRowInputFormat whether it's okay to cache Parquet and FS metadata
conf.set(
SQLConf.PARQUET_CACHE_METADATA,
- sqlContext.getConf(SQLConf.PARQUET_CACHE_METADATA, "false"))
+ sqlContext.getConf(SQLConf.PARQUET_CACHE_METADATA, "true"))
val baseRDD =
new org.apache.spark.rdd.NewHadoopRDD(
@@ -394,7 +394,7 @@ private[parquet] class FilteringParquetRowInputFormat
if (footers eq null) {
val conf = ContextUtil.getConfiguration(jobContext)
- val cacheMetadata = conf.getBoolean(SQLConf.PARQUET_CACHE_METADATA, false)
+ val cacheMetadata = conf.getBoolean(SQLConf.PARQUET_CACHE_METADATA, true)
val statuses = listStatus(jobContext)
fileStatuses = statuses.map(file => file.getPath -> file).toMap
if (statuses.isEmpty) {
@@ -493,7 +493,7 @@ private[parquet] class FilteringParquetRowInputFormat
import parquet.filter2.compat.FilterCompat.Filter;
import parquet.filter2.compat.RowGroupFilter;
- val cacheMetadata = configuration.getBoolean(SQLConf.PARQUET_CACHE_METADATA, false)
+ val cacheMetadata = configuration.getBoolean(SQLConf.PARQUET_CACHE_METADATA, true)
val splits = mutable.ArrayBuffer.empty[ParquetInputSplit]
val filter: Filter = ParquetInputFormat.getFilter(configuration)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
index f025169ad5..e88afaaf00 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -90,7 +90,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
* SerDe.
*/
private[spark] def convertMetastoreParquet: Boolean =
- getConf("spark.sql.hive.convertMetastoreParquet", "false") == "true"
+ getConf("spark.sql.hive.convertMetastoreParquet", "true") == "true"
override protected[sql] def executePlan(plan: LogicalPlan): this.QueryExecution =
new this.QueryExecution { val logical = plan }