diff options
Diffstat (limited to 'sql')
3 files changed, 48 insertions, 1 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 6d569b612d..2f8489de6b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -117,6 +117,8 @@ class Analyzer( Batch("Hints", fixedPoint, new ResolveHints.ResolveBroadcastHints(conf), ResolveHints.RemoveAllHints), + Batch("Simple Sanity Check", Once, + LookupFunctions), Batch("Substitution", fixedPoint, CTESubstitution, WindowsSubstitution, @@ -1039,6 +1041,25 @@ class Analyzer( } /** + * Checks whether a function identifier referenced by an [[UnresolvedFunction]] is defined in the + * function registry. Note that this rule doesn't try to resolve the [[UnresolvedFunction]]. It + * only performs simple existence check according to the function identifier to quickly identify + * undefined functions without triggering relation resolution, which may incur potentially + * expensive partition/schema discovery process in some cases. + * + * @see [[ResolveFunctions]] + * @see https://issues.apache.org/jira/browse/SPARK-19737 + */ + object LookupFunctions extends Rule[LogicalPlan] { + override def apply(plan: LogicalPlan): LogicalPlan = plan.transformAllExpressions { + case f: UnresolvedFunction if !catalog.functionExists(f.name) => + withPosition(f) { + throw new NoSuchFunctionException(f.name.database.getOrElse("default"), f.name.funcName) + } + } + } + + /** * Replaces [[UnresolvedFunction]]s with concrete [[Expression]]s. */ object ResolveFunctions extends Rule[LogicalPlan] { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala index a755231962..ffc272c6c0 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql.catalyst.catalog import org.apache.spark.sql.AnalysisException -import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} +import org.apache.spark.sql.catalyst.{FunctionIdentifier, SimpleCatalystConf, TableIdentifier} import org.apache.spark.sql.catalyst.analysis._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.parser.CatalystSqlParser @@ -1196,4 +1196,25 @@ class SessionCatalogSuite extends PlanTest { catalog.listFunctions("unknown_db", "func*") } } + + test("SPARK-19737: detect undefined functions without triggering relation resolution") { + import org.apache.spark.sql.catalyst.dsl.plans._ + + Seq(true, false) foreach { caseSensitive => + val conf = SimpleCatalystConf(caseSensitive) + val catalog = new SessionCatalog(newBasicCatalog(), new SimpleFunctionRegistry, conf) + val analyzer = new Analyzer(catalog, conf) + + // The analyzer should report the undefined function rather than the undefined table first. + val cause = intercept[AnalysisException] { + analyzer.execute( + UnresolvedRelation(TableIdentifier("undefined_table")).select( + UnresolvedFunction("undefined_fn", Nil, isDistinct = false) + ) + ) + } + + assert(cause.getMessage.contains("Undefined function: 'undefined_fn'")) + } + } } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala index c9be1b9d10..f1ea86890c 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala @@ -199,6 +199,11 @@ private[sql] class HiveSessionCatalog( } } + // TODO Removes this method after implementing Spark native "histogram_numeric". + override def functionExists(name: FunctionIdentifier): Boolean = { + super.functionExists(name) || hiveFunctions.contains(name.funcName) + } + /** List of functions we pass over to Hive. Note that over time this list should go to 0. */ // We have a list of Hive built-in functions that we do not support. So, we will check // Hive's function registry and lazily load needed functions into our own function registry. |