From 64e826f91eabb1a22d3d163d71fbb7b6d2185f25 Mon Sep 17 00:00:00 2001
From: Yadong Qi <qiyadong2010@gmail.com>
Date: Tue, 6 Sep 2016 10:57:21 +0800
Subject: [SPARK-17358][SQL] Cached table(parquet/orc) should be shard between
 beelines

## What changes were proposed in this pull request?
Cached table(parquet/orc) couldn't be shard between beelines, because the `sameResult` method used by `CacheManager` always return false(`sparkSession` are different) when compare two `HadoopFsRelation` in different beelines. So we make `sparkSession` a curry parameter.

## How was this patch tested?
Beeline1
```
1: jdbc:hive2://localhost:10000> CACHE TABLE src_pqt;
+---------+--+
| Result  |
+---------+--+
+---------+--+
No rows selected (5.143 seconds)
1: jdbc:hive2://localhost:10000> EXPLAIN SELECT * FROM src_pqt;
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--+
|                                                                                                                                                                                                            plan                                                                                                                                                                                                            |
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--+
| == Physical Plan ==
InMemoryTableScan [key#49, value#50]
   +- InMemoryRelation [key#49, value#50], true, 10000, StorageLevel(disk, memory, deserialized, 1 replicas), `src_pqt`
         +- *FileScan parquet default.src_pqt[key#0,value#1] Batched: true, Format: ParquetFormat, InputPaths: hdfs://199.0.0.1:9000/qiyadong/src_pqt, PartitionFilters: [], PushedFilters: [], ReadSchema: struct<key:int,value:string>  |
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--+
```

Beeline2
```
0: jdbc:hive2://localhost:10000> EXPLAIN SELECT * FROM src_pqt;
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--+
|                                                                                                                                                                                                            plan                                                                                                                                                                                                            |
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--+
| == Physical Plan ==
InMemoryTableScan [key#68, value#69]
   +- InMemoryRelation [key#68, value#69], true, 10000, StorageLevel(disk, memory, deserialized, 1 replicas), `src_pqt`
         +- *FileScan parquet default.src_pqt[key#0,value#1] Batched: true, Format: ParquetFormat, InputPaths: hdfs://199.0.0.1:9000/qiyadong/src_pqt, PartitionFilters: [], PushedFilters: [], ReadSchema: struct<key:int,value:string>  |
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--+
```

Author: Yadong Qi <qiyadong2010@gmail.com>

Closes #14913 from watermen/SPARK-17358.
---
 .../main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala    | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'sql/hive/src')

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index d31a8d643a..c48d4ed608 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -249,13 +249,12 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
         }
 
         val relation = HadoopFsRelation(
-          sparkSession = sparkSession,
           location = fileCatalog,
           partitionSchema = partitionSchema,
           dataSchema = inferredSchema,
           bucketSpec = bucketSpec,
           fileFormat = defaultSource,
-          options = options)
+          options = options)(sparkSession = sparkSession)
 
         val created = LogicalRelation(relation, catalogTable = Some(metastoreRelation.catalogTable))
         cachedDataSourceTables.put(tableIdentifier, created)
-- 
cgit v1.2.3