From 890abd1279014d692548c9f3b557483644a0ee32 Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Sat, 23 Apr 2016 12:49:36 -0700 Subject: [SPARK-14869][SQL] Don't mask exceptions in ResolveRelations ## What changes were proposed in this pull request? In order to support running SQL directly on files, we added some code in ResolveRelations to catch the exception thrown by catalog.lookupRelation and ignore it. This unfortunately masks all the exceptions. This patch changes the logic to simply test the table's existence. ## How was this patch tested? I manually hacked some bugs into Spark and made sure the exceptions were being propagated up. Author: Reynold Xin Closes #12634 from rxin/SPARK-14869. --- .../org/apache/spark/sql/catalyst/CatalystConf.scala | 8 +++++--- .../apache/spark/sql/catalyst/analysis/Analyzer.scala | 18 ++++++++++++------ .../spark/sql/catalyst/analysis/CheckAnalysis.scala | 2 +- .../spark/sql/catalyst/catalog/InMemoryCatalog.scala | 2 +- 4 files changed, 19 insertions(+), 11 deletions(-) (limited to 'sql/catalyst/src') diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystConf.scala index 6e798a53ad..179dab11a2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystConf.scala @@ -32,6 +32,8 @@ trait CatalystConf { def optimizerInSetConversionThreshold: Int def maxCaseBranchesForCodegen: Int + def runSQLonFile: Boolean + /** * Returns the [[Resolver]] for the current configuration, which can be used to determine if two * identifiers are equal. @@ -49,6 +51,6 @@ case class SimpleCatalystConf( groupByOrdinal: Boolean = true, optimizerMaxIterations: Int = 100, optimizerInSetConversionThreshold: Int = 10, - maxCaseBranchesForCodegen: Int = 20) - extends CatalystConf { -} + maxCaseBranchesForCodegen: Int = 20, + runSQLonFile: Boolean = true) + extends CatalystConf diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 24558d5b8c..50957e8661 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -412,7 +412,7 @@ class Analyzer( catalog.lookupRelation(u.tableIdentifier, u.alias) } catch { case _: NoSuchTableException => - u.failAnalysis(s"Table or View not found: ${u.tableName}") + u.failAnalysis(s"Table or view not found: ${u.tableName}") } } @@ -420,12 +420,18 @@ class Analyzer( case i @ InsertIntoTable(u: UnresolvedRelation, _, _, _, _) => i.copy(table = EliminateSubqueryAliases(lookupTableFromCatalog(u))) case u: UnresolvedRelation => - try { + val table = u.tableIdentifier + if (table.database.isDefined && conf.runSQLonFile && + (!catalog.databaseExists(table.database.get) || !catalog.tableExists(table))) { + // If the table does not exist, and the database part is specified, and we support + // running SQL directly on files, then let's just return the original UnresolvedRelation. + // It is possible we are matching a query like "select * from parquet.`/path/to/query`". + // The plan will get resolved later. + // Note that we are testing (!db_exists || !table_exists) because the catalog throws + // an exception from tableExists if the database does not exist. + u + } else { lookupTableFromCatalog(u) - } catch { - case _: AnalysisException if u.tableIdentifier.database.isDefined => - // delay the exception into CheckAnalysis, then it could be resolved as data source. - u } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index a50b9a1e1a..6b737d6b78 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -52,7 +52,7 @@ trait CheckAnalysis extends PredicateHelper { case p if p.analyzed => // Skip already analyzed sub-plans case u: UnresolvedRelation => - u.failAnalysis(s"Table or View not found: ${u.tableIdentifier}") + u.failAnalysis(s"Table or view not found: ${u.tableIdentifier}") case operator: LogicalPlan => operator transformExpressionsUp { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala index 36f4f29068..b8f0e458fa 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala @@ -62,7 +62,7 @@ class InMemoryCatalog extends ExternalCatalog { private def requireTableExists(db: String, table: String): Unit = { if (!tableExists(db, table)) { throw new AnalysisException( - s"Table or View not found: '$table' does not exist in database '$db'") + s"Table or view not found: '$table' does not exist in database '$db'") } } -- cgit v1.2.3