diff options
author | gatorsmile <gatorsmile@gmail.com> | 2016-11-07 01:16:37 -0800 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2016-11-07 01:16:37 -0800 |
commit | 57626a55703a189e03148398f67c36cd0e557044 (patch) | |
tree | 0f4ae99e29245041cae4d41f58aa208dba6beea4 /sql/hive/compatibility | |
parent | 9db06c442cf85e41d51c7b167817f4e7971bf0da (diff) | |
download | spark-57626a55703a189e03148398f67c36cd0e557044.tar.gz spark-57626a55703a189e03148398f67c36cd0e557044.tar.bz2 spark-57626a55703a189e03148398f67c36cd0e557044.zip |
[SPARK-16904][SQL] Removal of Hive Built-in Hash Functions and TestHiveFunctionRegistry
### What changes were proposed in this pull request?
Currently, the Hive built-in `hash` function is not being used in Spark since Spark 2.0. The public interface does not allow users to unregister the Spark built-in functions. Thus, users will never use Hive's built-in `hash` function.
The only exception here is `TestHiveFunctionRegistry`, which allows users to unregister the built-in functions. Thus, we can load Hive's hash function in the test cases. If we disable it, 10+ test cases will fail because the results are different from the Hive golden answer files.
This PR is to remove `hash` from the list of `hiveFunctions` in `HiveSessionCatalog`. It will also remove `TestHiveFunctionRegistry`. This removal makes us easier to remove `TestHiveSessionState` in the future.
### How was this patch tested?
N/A
Author: gatorsmile <gatorsmile@gmail.com>
Closes #14498 from gatorsmile/removeHash.
Diffstat (limited to 'sql/hive/compatibility')
-rw-r--r-- | sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala | 41 |
1 files changed, 20 insertions, 21 deletions
diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala index f5d10de8cd..5cd4935e22 100644 --- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala +++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala @@ -57,8 +57,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { TestHive.setConf(SQLConf.COLUMN_BATCH_SIZE, 5) // Enable in-memory partition pruning for testing purposes TestHive.setConf(SQLConf.IN_MEMORY_PARTITION_PRUNING, true) - // Use Hive hash expression instead of the native one - TestHive.sessionState.functionRegistry.unregisterFunction("hash") // Ensures that the plans generation use metastore relation and not OrcRelation // Was done because SqlBuilder does not work with plans having logical relation TestHive.setConf(HiveUtils.CONVERT_METASTORE_ORC, false) @@ -76,7 +74,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { TestHive.setConf(SQLConf.IN_MEMORY_PARTITION_PRUNING, originalInMemoryPartitionPruning) TestHive.setConf(HiveUtils.CONVERT_METASTORE_ORC, originalConvertMetastoreOrc) TestHive.setConf(SQLConf.CROSS_JOINS_ENABLED, originalCrossJoinEnabled) - TestHive.sessionState.functionRegistry.restore() // For debugging dump some statistics about how much time was spent in various optimizer rules logWarning(RuleExecutor.dumpTimeSpent()) @@ -581,7 +578,26 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { "auto_join6", "auto_join7", "auto_join8", - "auto_join9" + "auto_join9", + + // These tests are based on the Hive's hash function, which is different from Spark + "auto_join19", + "auto_join22", + "auto_join25", + "auto_join26", + "auto_join27", + "auto_join28", + "auto_join30", + "auto_join31", + "auto_join_nulls", + "auto_join_reordering_values", + "correlationoptimizer1", + "correlationoptimizer2", + "correlationoptimizer3", + "correlationoptimizer4", + "multiMapJoin1", + "orc_dictionary_threshold", + "udf_hash" ) /** @@ -601,16 +617,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { "annotate_stats_part", "annotate_stats_table", "annotate_stats_union", - "auto_join19", - "auto_join22", - "auto_join25", - "auto_join26", - "auto_join27", - "auto_join28", - "auto_join30", - "auto_join31", - "auto_join_nulls", - "auto_join_reordering_values", "binary_constant", "binarysortable_1", "cast1", @@ -623,15 +629,11 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { "compute_stats_long", "compute_stats_string", "convert_enum_to_string", - "correlationoptimizer1", "correlationoptimizer10", "correlationoptimizer11", "correlationoptimizer13", "correlationoptimizer14", "correlationoptimizer15", - "correlationoptimizer2", - "correlationoptimizer3", - "correlationoptimizer4", "correlationoptimizer6", "correlationoptimizer7", "correlationoptimizer8", @@ -871,7 +873,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { "merge2", "merge4", "mergejoins", - "multiMapJoin1", "multiMapJoin2", "multi_insert_gby", "multi_insert_gby3", @@ -893,7 +894,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { "nullinput2", "nullscript", "optional_outer", - "orc_dictionary_threshold", "order", "order2", "outer_join_ppr", @@ -1026,7 +1026,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { "udf_from_unixtime", "udf_greaterthan", "udf_greaterthanorequal", - "udf_hash", "udf_hex", "udf_if", "udf_index", |