/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.spark.sql.catalyst.util import java.util.regex.{Pattern, PatternSyntaxException} import org.apache.spark.sql.AnalysisException import org.apache.spark.unsafe.types.UTF8String object StringUtils { /** * Validate and convert SQL 'like' pattern to a Java regular expression. * * Underscores (_) are converted to '.' and percent signs (%) are converted to '.*', other * characters are quoted literally. Escaping is done according to the rules specified in * [[org.apache.spark.sql.catalyst.expressions.Like]] usage documentation. An invalid pattern will * throw an [[AnalysisException]]. * * @param pattern the SQL pattern to convert * @return the equivalent Java regular expression of the pattern */ def escapeLikeRegex(pattern: String): String = { val in = pattern.toIterator val out = new StringBuilder() def fail(message: String) = throw new AnalysisException( s"the pattern '$pattern' is invalid, $message") while (in.hasNext) { in.next match { case '\\' if in.hasNext => val c = in.next c match { case '_' | '%' | '\\' => out ++= Pattern.quote(Character.toString(c)) case _ => fail(s"the escape character is not allowed to precede '$c'") } case '\\' => fail("it is not allowed to end with the escape character") case '_' => out ++= "." case '%' => out ++= ".*" case c => out ++= Pattern.quote(Character.toString(c)) } } "(?s)" + out.result() // (?s) enables dotall mode, causing "." to match new lines } private[this] val trueStrings = Set("t", "true", "y", "yes", "1").map(UTF8String.fromString) private[this] val falseStrings = Set("f", "false", "n", "no", "0").map(UTF8String.fromString) def isTrueString(s: UTF8String): Boolean = trueStrings.contains(s.toLowerCase) def isFalseString(s: UTF8String): Boolean = falseStrings.contains(s.toLowerCase) /** * This utility can be used for filtering pattern in the "Like" of "Show Tables / Functions" DDL * @param names the names list to be filtered * @param pattern the filter pattern, only '*' and '|' are allowed as wildcards, others will * follow regular expression convention, case insensitive match and white spaces * on both ends will be ignored * @return the filtered names list in order */ def filterPattern(names: Seq[String], pattern: String): Seq[String] = { val funcNames = scala.collection.mutable.SortedSet.empty[String] pattern.trim().split("\\|").foreach { subPattern => try { val regex = ("(?i)" + subPattern.replaceAll("\\*", ".*")).r funcNames ++= names.filter{ name => regex.pattern.matcher(name).matches() } } catch { case _: PatternSyntaxException => } } funcNames.toSeq } }