diff options
author | Michael Armbrust <michael@databricks.com> | 2014-11-03 14:08:27 -0800 |
---|---|---|
committer | Michael Armbrust <michael@databricks.com> | 2014-11-03 14:08:27 -0800 |
commit | 25bef7e6951301e93004567fc0cef96bf8d1a224 (patch) | |
tree | 73941695b30cb7cdf96c9805935697162c578b14 /sql/hive | |
parent | e83f13e8d37ca33f4e183e977d077221b90c6025 (diff) | |
download | spark-25bef7e6951301e93004567fc0cef96bf8d1a224.tar.gz spark-25bef7e6951301e93004567fc0cef96bf8d1a224.tar.bz2 spark-25bef7e6951301e93004567fc0cef96bf8d1a224.zip |
[SQL] More aggressive defaults
- Turns on compression for in-memory cached data by default
- Changes the default parquet compression format back to gzip (we have seen more OOMs with production workloads due to the way Snappy allocates memory)
- Ups the batch size to 10,000 rows
- Increases the broadcast threshold to 10mb.
- Uses our parquet implementation instead of the hive one by default.
- Cache parquet metadata by default.
Author: Michael Armbrust <michael@databricks.com>
Closes #3064 from marmbrus/fasterDefaults and squashes the following commits:
97ee9f8 [Michael Armbrust] parquet codec docs
e641694 [Michael Armbrust] Remote also
a12866a [Michael Armbrust] Cache metadata.
2d73acc [Michael Armbrust] Update docs defaults.
d63d2d5 [Michael Armbrust] document parquet option
da373f9 [Michael Armbrust] More aggressive defaults
Diffstat (limited to 'sql/hive')
-rw-r--r-- | sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala index f025169ad5..e88afaaf00 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala @@ -90,7 +90,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) { * SerDe. */ private[spark] def convertMetastoreParquet: Boolean = - getConf("spark.sql.hive.convertMetastoreParquet", "false") == "true" + getConf("spark.sql.hive.convertMetastoreParquet", "true") == "true" override protected[sql] def executePlan(plan: LogicalPlan): this.QueryExecution = new this.QueryExecution { val logical = plan } |