diff options
author | petermaxlee <petermaxlee@gmail.com> | 2016-06-30 09:27:48 +0800 |
---|---|---|
committer | Wenchen Fan <wenchen@databricks.com> | 2016-06-30 09:27:48 +0800 |
commit | d3af6731fa270842818ed91d6b4d14708ddae2db (patch) | |
tree | ce2a17e0864be2cf6bc7cde83a451c80ffba3de1 /sql/catalyst/src/main | |
parent | 831a04f5d152d1839c0edfdf65bb728aa5957f16 (diff) | |
download | spark-d3af6731fa270842818ed91d6b4d14708ddae2db.tar.gz spark-d3af6731fa270842818ed91d6b4d14708ddae2db.tar.bz2 spark-d3af6731fa270842818ed91d6b4d14708ddae2db.zip |
[SPARK-16274][SQL] Implement xpath_boolean
## What changes were proposed in this pull request?
This patch implements xpath_boolean expression for Spark SQL, a xpath function that returns true or false. The implementation is modelled after Hive's xpath_boolean, except that how the expression handles null inputs. Hive throws a NullPointerException at runtime if either of the input is null. This implementation returns null if either of the input is null.
## How was this patch tested?
Created two new test suites. One for unit tests covering the expression, and the other for end-to-end test in SQL.
Author: petermaxlee <petermaxlee@gmail.com>
Closes #13964 from petermaxlee/SPARK-16274.
Diffstat (limited to 'sql/catalyst/src/main')
2 files changed, 60 insertions, 0 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 0bde48ce57..3f9227a8ae 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -25,6 +25,7 @@ import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.aggregate._ +import org.apache.spark.sql.catalyst.expressions.xml._ import org.apache.spark.sql.catalyst.util.StringKeyHashMap @@ -301,6 +302,7 @@ object FunctionRegistry { expression[UnBase64]("unbase64"), expression[Unhex]("unhex"), expression[Upper]("upper"), + expression[XPathBoolean]("xpath_boolean"), // datetime functions expression[AddMonths]("add_months"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/XPathBoolean.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/XPathBoolean.scala new file mode 100644 index 0000000000..2a5256c7f5 --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/XPathBoolean.scala @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.expressions.xml + +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback +import org.apache.spark.sql.types.{AbstractDataType, BooleanType, DataType, StringType} +import org.apache.spark.unsafe.types.UTF8String + + +@ExpressionDescription( + usage = "_FUNC_(xml, xpath) - Evaluates a boolean xpath expression.", + extended = "> SELECT _FUNC_('<a><b>1</b></a>','a/b');\ntrue") +case class XPathBoolean(xml: Expression, path: Expression) + extends BinaryExpression with ExpectsInputTypes with CodegenFallback { + + @transient private lazy val xpathUtil = new UDFXPathUtil + + // If the path is a constant, cache the path string so that we don't need to convert path + // from UTF8String to String for every row. + @transient lazy val pathLiteral: String = path match { + case Literal(str: UTF8String, _) => str.toString + case _ => null + } + + override def prettyName: String = "xpath_boolean" + + override def dataType: DataType = BooleanType + + override def inputTypes: Seq[AbstractDataType] = Seq(StringType, StringType) + + override def left: Expression = xml + override def right: Expression = path + + override protected def nullSafeEval(xml: Any, path: Any): Any = { + val xmlString = xml.asInstanceOf[UTF8String].toString + if (pathLiteral ne null) { + xpathUtil.evalBoolean(xmlString, pathLiteral) + } else { + xpathUtil.evalBoolean(xmlString, path.asInstanceOf[UTF8String].toString) + } + } +} |