aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala21
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala23
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala5
3 files changed, 48 insertions, 1 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 6d569b612d..2f8489de6b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -117,6 +117,8 @@ class Analyzer(
Batch("Hints", fixedPoint,
new ResolveHints.ResolveBroadcastHints(conf),
ResolveHints.RemoveAllHints),
+ Batch("Simple Sanity Check", Once,
+ LookupFunctions),
Batch("Substitution", fixedPoint,
CTESubstitution,
WindowsSubstitution,
@@ -1039,6 +1041,25 @@ class Analyzer(
}
/**
+ * Checks whether a function identifier referenced by an [[UnresolvedFunction]] is defined in the
+ * function registry. Note that this rule doesn't try to resolve the [[UnresolvedFunction]]. It
+ * only performs simple existence check according to the function identifier to quickly identify
+ * undefined functions without triggering relation resolution, which may incur potentially
+ * expensive partition/schema discovery process in some cases.
+ *
+ * @see [[ResolveFunctions]]
+ * @see https://issues.apache.org/jira/browse/SPARK-19737
+ */
+ object LookupFunctions extends Rule[LogicalPlan] {
+ override def apply(plan: LogicalPlan): LogicalPlan = plan.transformAllExpressions {
+ case f: UnresolvedFunction if !catalog.functionExists(f.name) =>
+ withPosition(f) {
+ throw new NoSuchFunctionException(f.name.database.getOrElse("default"), f.name.funcName)
+ }
+ }
+ }
+
+ /**
* Replaces [[UnresolvedFunction]]s with concrete [[Expression]]s.
*/
object ResolveFunctions extends Rule[LogicalPlan] {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
index a755231962..ffc272c6c0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
@@ -18,7 +18,7 @@
package org.apache.spark.sql.catalyst.catalog
import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
+import org.apache.spark.sql.catalyst.{FunctionIdentifier, SimpleCatalystConf, TableIdentifier}
import org.apache.spark.sql.catalyst.analysis._
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
@@ -1196,4 +1196,25 @@ class SessionCatalogSuite extends PlanTest {
catalog.listFunctions("unknown_db", "func*")
}
}
+
+ test("SPARK-19737: detect undefined functions without triggering relation resolution") {
+ import org.apache.spark.sql.catalyst.dsl.plans._
+
+ Seq(true, false) foreach { caseSensitive =>
+ val conf = SimpleCatalystConf(caseSensitive)
+ val catalog = new SessionCatalog(newBasicCatalog(), new SimpleFunctionRegistry, conf)
+ val analyzer = new Analyzer(catalog, conf)
+
+ // The analyzer should report the undefined function rather than the undefined table first.
+ val cause = intercept[AnalysisException] {
+ analyzer.execute(
+ UnresolvedRelation(TableIdentifier("undefined_table")).select(
+ UnresolvedFunction("undefined_fn", Nil, isDistinct = false)
+ )
+ )
+ }
+
+ assert(cause.getMessage.contains("Undefined function: 'undefined_fn'"))
+ }
+ }
}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
index c9be1b9d10..f1ea86890c 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
@@ -199,6 +199,11 @@ private[sql] class HiveSessionCatalog(
}
}
+ // TODO Removes this method after implementing Spark native "histogram_numeric".
+ override def functionExists(name: FunctionIdentifier): Boolean = {
+ super.functionExists(name) || hiveFunctions.contains(name.funcName)
+ }
+
/** List of functions we pass over to Hive. Note that over time this list should go to 0. */
// We have a list of Hive built-in functions that we do not support. So, we will check
// Hive's function registry and lazily load needed functions into our own function registry.