aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst/src/main
diff options
context:
space:
mode:
authorpetermaxlee <petermaxlee@gmail.com>2016-06-30 09:27:48 +0800
committerWenchen Fan <wenchen@databricks.com>2016-06-30 09:27:48 +0800
commitd3af6731fa270842818ed91d6b4d14708ddae2db (patch)
treece2a17e0864be2cf6bc7cde83a451c80ffba3de1 /sql/catalyst/src/main
parent831a04f5d152d1839c0edfdf65bb728aa5957f16 (diff)
downloadspark-d3af6731fa270842818ed91d6b4d14708ddae2db.tar.gz
spark-d3af6731fa270842818ed91d6b4d14708ddae2db.tar.bz2
spark-d3af6731fa270842818ed91d6b4d14708ddae2db.zip
[SPARK-16274][SQL] Implement xpath_boolean
## What changes were proposed in this pull request? This patch implements xpath_boolean expression for Spark SQL, a xpath function that returns true or false. The implementation is modelled after Hive's xpath_boolean, except that how the expression handles null inputs. Hive throws a NullPointerException at runtime if either of the input is null. This implementation returns null if either of the input is null. ## How was this patch tested? Created two new test suites. One for unit tests covering the expression, and the other for end-to-end test in SQL. Author: petermaxlee <petermaxlee@gmail.com> Closes #13964 from petermaxlee/SPARK-16274.
Diffstat (limited to 'sql/catalyst/src/main')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/XPathBoolean.scala58
2 files changed, 60 insertions, 0 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 0bde48ce57..3f9227a8ae 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -25,6 +25,7 @@ import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.aggregate._
+import org.apache.spark.sql.catalyst.expressions.xml._
import org.apache.spark.sql.catalyst.util.StringKeyHashMap
@@ -301,6 +302,7 @@ object FunctionRegistry {
expression[UnBase64]("unbase64"),
expression[Unhex]("unhex"),
expression[Upper]("upper"),
+ expression[XPathBoolean]("xpath_boolean"),
// datetime functions
expression[AddMonths]("add_months"),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/XPathBoolean.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/XPathBoolean.scala
new file mode 100644
index 0000000000..2a5256c7f5
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/XPathBoolean.scala
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions.xml
+
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.types.{AbstractDataType, BooleanType, DataType, StringType}
+import org.apache.spark.unsafe.types.UTF8String
+
+
+@ExpressionDescription(
+ usage = "_FUNC_(xml, xpath) - Evaluates a boolean xpath expression.",
+ extended = "> SELECT _FUNC_('<a><b>1</b></a>','a/b');\ntrue")
+case class XPathBoolean(xml: Expression, path: Expression)
+ extends BinaryExpression with ExpectsInputTypes with CodegenFallback {
+
+ @transient private lazy val xpathUtil = new UDFXPathUtil
+
+ // If the path is a constant, cache the path string so that we don't need to convert path
+ // from UTF8String to String for every row.
+ @transient lazy val pathLiteral: String = path match {
+ case Literal(str: UTF8String, _) => str.toString
+ case _ => null
+ }
+
+ override def prettyName: String = "xpath_boolean"
+
+ override def dataType: DataType = BooleanType
+
+ override def inputTypes: Seq[AbstractDataType] = Seq(StringType, StringType)
+
+ override def left: Expression = xml
+ override def right: Expression = path
+
+ override protected def nullSafeEval(xml: Any, path: Any): Any = {
+ val xmlString = xml.asInstanceOf[UTF8String].toString
+ if (pathLiteral ne null) {
+ xpathUtil.evalBoolean(xmlString, pathLiteral)
+ } else {
+ xpathUtil.evalBoolean(xmlString, path.asInstanceOf[UTF8String].toString)
+ }
+ }
+}