aboutsummaryrefslogtreecommitdiff
path: root/sql/hive/compatibility
diff options
context:
space:
mode:
authorgatorsmile <gatorsmile@gmail.com>2016-11-07 01:16:37 -0800
committerReynold Xin <rxin@databricks.com>2016-11-07 01:16:37 -0800
commit57626a55703a189e03148398f67c36cd0e557044 (patch)
tree0f4ae99e29245041cae4d41f58aa208dba6beea4 /sql/hive/compatibility
parent9db06c442cf85e41d51c7b167817f4e7971bf0da (diff)
downloadspark-57626a55703a189e03148398f67c36cd0e557044.tar.gz
spark-57626a55703a189e03148398f67c36cd0e557044.tar.bz2
spark-57626a55703a189e03148398f67c36cd0e557044.zip
[SPARK-16904][SQL] Removal of Hive Built-in Hash Functions and TestHiveFunctionRegistry
### What changes were proposed in this pull request? Currently, the Hive built-in `hash` function is not being used in Spark since Spark 2.0. The public interface does not allow users to unregister the Spark built-in functions. Thus, users will never use Hive's built-in `hash` function. The only exception here is `TestHiveFunctionRegistry`, which allows users to unregister the built-in functions. Thus, we can load Hive's hash function in the test cases. If we disable it, 10+ test cases will fail because the results are different from the Hive golden answer files. This PR is to remove `hash` from the list of `hiveFunctions` in `HiveSessionCatalog`. It will also remove `TestHiveFunctionRegistry`. This removal makes us easier to remove `TestHiveSessionState` in the future. ### How was this patch tested? N/A Author: gatorsmile <gatorsmile@gmail.com> Closes #14498 from gatorsmile/removeHash.
Diffstat (limited to 'sql/hive/compatibility')
-rw-r--r--sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala41
1 files changed, 20 insertions, 21 deletions
diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index f5d10de8cd..5cd4935e22 100644
--- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -57,8 +57,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
TestHive.setConf(SQLConf.COLUMN_BATCH_SIZE, 5)
// Enable in-memory partition pruning for testing purposes
TestHive.setConf(SQLConf.IN_MEMORY_PARTITION_PRUNING, true)
- // Use Hive hash expression instead of the native one
- TestHive.sessionState.functionRegistry.unregisterFunction("hash")
// Ensures that the plans generation use metastore relation and not OrcRelation
// Was done because SqlBuilder does not work with plans having logical relation
TestHive.setConf(HiveUtils.CONVERT_METASTORE_ORC, false)
@@ -76,7 +74,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
TestHive.setConf(SQLConf.IN_MEMORY_PARTITION_PRUNING, originalInMemoryPartitionPruning)
TestHive.setConf(HiveUtils.CONVERT_METASTORE_ORC, originalConvertMetastoreOrc)
TestHive.setConf(SQLConf.CROSS_JOINS_ENABLED, originalCrossJoinEnabled)
- TestHive.sessionState.functionRegistry.restore()
// For debugging dump some statistics about how much time was spent in various optimizer rules
logWarning(RuleExecutor.dumpTimeSpent())
@@ -581,7 +578,26 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"auto_join6",
"auto_join7",
"auto_join8",
- "auto_join9"
+ "auto_join9",
+
+ // These tests are based on the Hive's hash function, which is different from Spark
+ "auto_join19",
+ "auto_join22",
+ "auto_join25",
+ "auto_join26",
+ "auto_join27",
+ "auto_join28",
+ "auto_join30",
+ "auto_join31",
+ "auto_join_nulls",
+ "auto_join_reordering_values",
+ "correlationoptimizer1",
+ "correlationoptimizer2",
+ "correlationoptimizer3",
+ "correlationoptimizer4",
+ "multiMapJoin1",
+ "orc_dictionary_threshold",
+ "udf_hash"
)
/**
@@ -601,16 +617,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"annotate_stats_part",
"annotate_stats_table",
"annotate_stats_union",
- "auto_join19",
- "auto_join22",
- "auto_join25",
- "auto_join26",
- "auto_join27",
- "auto_join28",
- "auto_join30",
- "auto_join31",
- "auto_join_nulls",
- "auto_join_reordering_values",
"binary_constant",
"binarysortable_1",
"cast1",
@@ -623,15 +629,11 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"compute_stats_long",
"compute_stats_string",
"convert_enum_to_string",
- "correlationoptimizer1",
"correlationoptimizer10",
"correlationoptimizer11",
"correlationoptimizer13",
"correlationoptimizer14",
"correlationoptimizer15",
- "correlationoptimizer2",
- "correlationoptimizer3",
- "correlationoptimizer4",
"correlationoptimizer6",
"correlationoptimizer7",
"correlationoptimizer8",
@@ -871,7 +873,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"merge2",
"merge4",
"mergejoins",
- "multiMapJoin1",
"multiMapJoin2",
"multi_insert_gby",
"multi_insert_gby3",
@@ -893,7 +894,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"nullinput2",
"nullscript",
"optional_outer",
- "orc_dictionary_threshold",
"order",
"order2",
"outer_join_ppr",
@@ -1026,7 +1026,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"udf_from_unixtime",
"udf_greaterthan",
"udf_greaterthanorequal",
- "udf_hash",
"udf_hex",
"udf_if",
"udf_index",