From 9dd2741ebe5f9b5fa0a3b0e9c594d0e94b6226f9 Mon Sep 17 00:00:00 2001 From: Herman van Hovell Date: Wed, 3 Feb 2016 12:31:30 -0800 Subject: [SPARK-13157] [SQL] Support any kind of input for SQL commands. The ```SparkSqlLexer``` currently swallows characters which have not been defined in the grammar. This causes problems with SQL commands, such as: ```add jar file:///tmp/ab/TestUDTF.jar```. In this example the `````` is swallowed. This PR adds an extra Lexer rule to handle such input, and makes a tiny modification to the ```ASTNode```. cc davies liancheng Author: Herman van Hovell Closes #11052 from hvanhovell/SPARK-13157. --- .../spark/sql/catalyst/parser/SparkSqlLexer.g | 4 +++ .../apache/spark/sql/catalyst/parser/ASTNode.scala | 4 +-- .../spark/sql/catalyst/parser/ASTNodeSuite.scala | 38 ++++++++++++++++++++++ .../thriftserver/HiveThriftServer2Suites.scala | 6 ++-- 4 files changed, 46 insertions(+), 6 deletions(-) create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ASTNodeSuite.scala diff --git a/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlLexer.g b/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlLexer.g index e930caa291..1d07a27353 100644 --- a/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlLexer.g +++ b/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlLexer.g @@ -483,3 +483,7 @@ COMMENT { $channel=HIDDEN; } ; +/* Prevent that the lexer swallows unknown characters. */ +ANY + :. + ; diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ASTNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ASTNode.scala index ec9812414e..28f7b10ed6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ASTNode.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ASTNode.scala @@ -58,12 +58,12 @@ case class ASTNode( override val origin: Origin = Origin(Some(line), Some(positionInLine)) /** Source text. */ - lazy val source: String = stream.toString(startIndex, stopIndex) + lazy val source: String = stream.toOriginalString(startIndex, stopIndex) /** Get the source text that remains after this token. */ lazy val remainder: String = { stream.fill() - stream.toString(stopIndex + 1, stream.size() - 1).trim() + stream.toOriginalString(stopIndex + 1, stream.size() - 1).trim() } def text: String = token.getText diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ASTNodeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ASTNodeSuite.scala new file mode 100644 index 0000000000..8b05f9e33d --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ASTNodeSuite.scala @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.catalyst.parser + +import org.apache.spark.SparkFunSuite + +class ASTNodeSuite extends SparkFunSuite { + test("SPARK-13157 - remainder must return all input chars") { + val inputs = Seq( + ("add jar", "file:///tmp/ab/TestUDTF.jar"), + ("add jar", "file:///tmp/a@b/TestUDTF.jar"), + ("add jar", "c:\\windows32\\TestUDTF.jar"), + ("add jar", "some \nbad\t\tfile\r\n.\njar"), + ("ADD JAR", "@*#&@(!#@$^*!@^@#(*!@#"), + ("SET", "foo=bar"), + ("SET", "foo*)(@#^*@&!#^=bar") + ) + inputs.foreach { + case (command, arguments) => + val node = ParseDriver.parsePlan(s"$command $arguments", null) + assert(node.remainder === arguments) + } + } +} diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala index 9860e40fe8..ba3b26e1b7 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala @@ -488,8 +488,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest { } } - // TODO: enable this - ignore("SPARK-11595 ADD JAR with input path having URL scheme") { + test("SPARK-11595 ADD JAR with input path having URL scheme") { withJdbcStatement { statement => val jarPath = "../hive/src/test/resources/TestUDTF.jar" val jarURL = s"file://${System.getProperty("user.dir")}/$jarPath" @@ -547,8 +546,7 @@ class SingleSessionSuite extends HiveThriftJdbcTest { override protected def extraConf: Seq[String] = "--conf spark.sql.hive.thriftServer.singleSession=true" :: Nil - // TODO: enable this - ignore("test single session") { + test("test single session") { withMultipleConnectionJdbcStatement( { statement => val jarPath = "../hive/src/test/resources/TestUDTF.jar" -- cgit v1.2.3