diff options
author | navis.ryu <navis@apache.org> | 2015-10-21 15:07:08 -0700 |
---|---|---|
committer | Josh Rosen <joshrosen@databricks.com> | 2015-10-21 15:07:08 -0700 |
commit | f481090a71940f06602a73f5bbd004980dea026f (patch) | |
tree | fe7d29da9769022027aec8371bef572a16e13848 /sql/hive/src | |
parent | dce2f8c9d74bda46f3c7a7ebe7372b04d6b17a61 (diff) | |
download | spark-f481090a71940f06602a73f5bbd004980dea026f.tar.gz spark-f481090a71940f06602a73f5bbd004980dea026f.tar.bz2 spark-f481090a71940f06602a73f5bbd004980dea026f.zip |
[SPARK-10151][SQL] Support invocation of hive macro
Macro in hive (which is GenericUDFMacro) contains real function inside of it but it's not conveyed to tasks, resulting null-pointer exception.
Author: navis.ryu <navis@apache.org>
Closes #8354 from navis/SPARK-10151.
Diffstat (limited to 'sql/hive/src')
27 files changed, 27 insertions, 8 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala index c7460d46b6..c328734df3 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala @@ -21,6 +21,7 @@ import java.io.File import java.net.{URL, URLClassLoader} import java.sql.Timestamp import java.util.concurrent.TimeUnit +import java.util.regex.Pattern import scala.collection.JavaConverters._ import scala.collection.mutable.HashMap @@ -592,10 +593,14 @@ class HiveContext private[hive]( ) } + private def functionOrMacroDDLPattern(command: String) = Pattern.compile( + ".*(create|drop)\\s+(temporary\\s+)?(function|macro).+", Pattern.DOTALL).matcher(command) + protected[hive] def runSqlHive(sql: String): Seq[String] = { - if (sql.toLowerCase.contains("create temporary function")) { + val command = sql.trim.toLowerCase + if (functionOrMacroDDLPattern(command).matches()) { executionHive.runSqlHive(sql) - } else if (sql.trim.toLowerCase.startsWith("set")) { + } else if (command.startsWith("set")) { metadataHive.runSqlHive(sql) executionHive.runSqlHive(sql) } else { diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala index cfe2bb05ad..43c238fd49 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala @@ -723,6 +723,10 @@ private[hive] trait HiveInspectors { inspectorToDataType(m.getMapValueObjectInspector)) case _: WritableStringObjectInspector => StringType case _: JavaStringObjectInspector => StringType + case _: WritableHiveVarcharObjectInspector => StringType + case _: JavaHiveVarcharObjectInspector => StringType + case _: WritableHiveCharObjectInspector => StringType + case _: JavaHiveCharObjectInspector => StringType case _: WritableIntObjectInspector => IntegerType case _: JavaIntObjectInspector => IntegerType case _: WritableDoubleObjectInspector => DoubleType diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala index d4ff5cc0f1..3697761f20 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala @@ -117,6 +117,7 @@ private[hive] object HiveQl extends Logging { "TOK_CREATEDATABASE", "TOK_CREATEFUNCTION", "TOK_CREATEINDEX", + "TOK_CREATEMACRO", "TOK_CREATEROLE", "TOK_DESCDATABASE", @@ -125,6 +126,7 @@ private[hive] object HiveQl extends Logging { "TOK_DROPDATABASE", "TOK_DROPFUNCTION", "TOK_DROPINDEX", + "TOK_DROPMACRO", "TOK_DROPROLE", "TOK_DROPTABLE_PROPERTIES", "TOK_DROPVIEW", diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala index 004805f3ae..f0697613cf 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala @@ -117,9 +117,10 @@ private[hive] object HiveShim { * Detail discussion can be found at https://github.com/apache/spark/pull/3640 * * @param functionClassName UDF class name + * @param instance optional UDF instance which contains additional information (for macro) */ - private[hive] case class HiveFunctionWrapper(var functionClassName: String) - extends java.io.Externalizable { + private[hive] case class HiveFunctionWrapper(var functionClassName: String, + private var instance: AnyRef = null) extends java.io.Externalizable { // for Serialization def this() = this(null) @@ -154,8 +155,6 @@ private[hive] object HiveShim { serializeObjectByKryo(Utilities.runtimeSerializationKryo.get(), function, out) } - private var instance: AnyRef = null - def writeExternal(out: java.io.ObjectOutput) { // output the function name out.writeUTF(functionClassName) @@ -184,7 +183,7 @@ private[hive] object HiveShim { // read the function in bytes val functionInBytesLength = in.readInt() val functionInBytes = new Array[Byte](functionInBytesLength) - in.read(functionInBytes, 0, functionInBytesLength) + in.readFully(functionInBytes) // deserialize the function object via Hive Utilities instance = deserializePlan[AnyRef](new java.io.ByteArrayInputStream(functionInBytes), diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala index a2ebf6552f..f57b206999 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala @@ -64,7 +64,10 @@ private[hive] class HiveFunctionRegistry(underlying: analysis.FunctionRegistry) // don't satisfy the hive UDF, such as type mismatch, input number mismatch, etc. Here we // catch the exception and throw AnalysisException instead. try { - if (classOf[UDF].isAssignableFrom(functionInfo.getFunctionClass)) { + if (classOf[GenericUDFMacro].isAssignableFrom(functionInfo.getFunctionClass)) { + HiveGenericUDF( + new HiveFunctionWrapper(functionClassName, functionInfo.getGenericUDF), children) + } else if (classOf[UDF].isAssignableFrom(functionInfo.getFunctionClass)) { HiveSimpleUDF(new HiveFunctionWrapper(functionClassName), children) } else if (classOf[GenericUDF].isAssignableFrom(functionInfo.getFunctionClass)) { HiveGenericUDF(new HiveFunctionWrapper(functionClassName), children) diff --git a/sql/hive/src/test/resources/golden/macro-0-50131c0ba7b7a6b65c789a5a8497bada b/sql/hive/src/test/resources/golden/macro-0-50131c0ba7b7a6b65c789a5a8497bada new file mode 100644 index 0000000000..573541ac97 --- /dev/null +++ b/sql/hive/src/test/resources/golden/macro-0-50131c0ba7b7a6b65c789a5a8497bada @@ -0,0 +1 @@ +0 diff --git a/sql/hive/src/test/resources/golden/macro-1-5ff5e8795c13303db5d3ea88e1e918b6 b/sql/hive/src/test/resources/golden/macro-1-5ff5e8795c13303db5d3ea88e1e918b6 new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/sql/hive/src/test/resources/golden/macro-1-5ff5e8795c13303db5d3ea88e1e918b6 diff --git a/sql/hive/src/test/resources/golden/macro-10-45148a37f6ee9cf498dc7308cbd81a1c b/sql/hive/src/test/resources/golden/macro-10-45148a37f6ee9cf498dc7308cbd81a1c new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/sql/hive/src/test/resources/golden/macro-10-45148a37f6ee9cf498dc7308cbd81a1c diff --git a/sql/hive/src/test/resources/golden/macro-11-f55b8684c77f6eefc2618ba79e5e0587 b/sql/hive/src/test/resources/golden/macro-11-f55b8684c77f6eefc2618ba79e5e0587 new file mode 100644 index 0000000000..573541ac97 --- /dev/null +++ b/sql/hive/src/test/resources/golden/macro-11-f55b8684c77f6eefc2618ba79e5e0587 @@ -0,0 +1 @@ +0 diff --git a/sql/hive/src/test/resources/golden/macro-12-62b999122975c2a5de8e49fee089c041 b/sql/hive/src/test/resources/golden/macro-12-62b999122975c2a5de8e49fee089c041 new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/sql/hive/src/test/resources/golden/macro-12-62b999122975c2a5de8e49fee089c041 diff --git a/sql/hive/src/test/resources/golden/macro-13-87e53d2b4c84098e662779e8f0a59084 b/sql/hive/src/test/resources/golden/macro-13-87e53d2b4c84098e662779e8f0a59084 new file mode 100644 index 0000000000..d00491fd7e --- /dev/null +++ b/sql/hive/src/test/resources/golden/macro-13-87e53d2b4c84098e662779e8f0a59084 @@ -0,0 +1 @@ +1 diff --git a/sql/hive/src/test/resources/golden/macro-14-3a31df84432674ad410f44b137e32c2d b/sql/hive/src/test/resources/golden/macro-14-3a31df84432674ad410f44b137e32c2d new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/sql/hive/src/test/resources/golden/macro-14-3a31df84432674ad410f44b137e32c2d diff --git a/sql/hive/src/test/resources/golden/macro-15-56966c45104c0d9bc407e79538c2c029 b/sql/hive/src/test/resources/golden/macro-15-56966c45104c0d9bc407e79538c2c029 new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/sql/hive/src/test/resources/golden/macro-15-56966c45104c0d9bc407e79538c2c029 diff --git a/sql/hive/src/test/resources/golden/macro-16-56966c45104c0d9bc407e79538c2c029 b/sql/hive/src/test/resources/golden/macro-16-56966c45104c0d9bc407e79538c2c029 new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/sql/hive/src/test/resources/golden/macro-16-56966c45104c0d9bc407e79538c2c029 diff --git a/sql/hive/src/test/resources/golden/macro-17-b3864f1d19fdb88b3b74f6d74a0ba548 b/sql/hive/src/test/resources/golden/macro-17-b3864f1d19fdb88b3b74f6d74a0ba548 new file mode 100644 index 0000000000..f599e28b8a --- /dev/null +++ b/sql/hive/src/test/resources/golden/macro-17-b3864f1d19fdb88b3b74f6d74a0ba548 @@ -0,0 +1 @@ +10 diff --git a/sql/hive/src/test/resources/golden/macro-18-bddb2fe17cd4d850c4462b7eb2b9bc2a b/sql/hive/src/test/resources/golden/macro-18-bddb2fe17cd4d850c4462b7eb2b9bc2a new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/sql/hive/src/test/resources/golden/macro-18-bddb2fe17cd4d850c4462b7eb2b9bc2a diff --git a/sql/hive/src/test/resources/golden/macro-19-e3c828c372607b8bf7be00a99359b662 b/sql/hive/src/test/resources/golden/macro-19-e3c828c372607b8bf7be00a99359b662 new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/sql/hive/src/test/resources/golden/macro-19-e3c828c372607b8bf7be00a99359b662 diff --git a/sql/hive/src/test/resources/golden/macro-2-fde44c7854a9897acb4c2f78f24c8eec b/sql/hive/src/test/resources/golden/macro-2-fde44c7854a9897acb4c2f78f24c8eec new file mode 100644 index 0000000000..b49805ff63 --- /dev/null +++ b/sql/hive/src/test/resources/golden/macro-2-fde44c7854a9897acb4c2f78f24c8eec @@ -0,0 +1 @@ +0.8807970779778823 diff --git a/sql/hive/src/test/resources/golden/macro-20-cb252a243d59809930a4ff371cbfa292 b/sql/hive/src/test/resources/golden/macro-20-cb252a243d59809930a4ff371cbfa292 new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/sql/hive/src/test/resources/golden/macro-20-cb252a243d59809930a4ff371cbfa292 diff --git a/sql/hive/src/test/resources/golden/macro-21-cb252a243d59809930a4ff371cbfa292 b/sql/hive/src/test/resources/golden/macro-21-cb252a243d59809930a4ff371cbfa292 new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/sql/hive/src/test/resources/golden/macro-21-cb252a243d59809930a4ff371cbfa292 diff --git a/sql/hive/src/test/resources/golden/macro-3-ddc4cb920b0a68e06551cd34ae4e29ff b/sql/hive/src/test/resources/golden/macro-3-ddc4cb920b0a68e06551cd34ae4e29ff new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/sql/hive/src/test/resources/golden/macro-3-ddc4cb920b0a68e06551cd34ae4e29ff diff --git a/sql/hive/src/test/resources/golden/macro-4-86292bbb7f147393c38bca051768dbda b/sql/hive/src/test/resources/golden/macro-4-86292bbb7f147393c38bca051768dbda new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/sql/hive/src/test/resources/golden/macro-4-86292bbb7f147393c38bca051768dbda diff --git a/sql/hive/src/test/resources/golden/macro-5-ca270bff813e5ab18a6a799016693aa8 b/sql/hive/src/test/resources/golden/macro-5-ca270bff813e5ab18a6a799016693aa8 new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/sql/hive/src/test/resources/golden/macro-5-ca270bff813e5ab18a6a799016693aa8 diff --git a/sql/hive/src/test/resources/golden/macro-6-8976be22af3aba0cc4905e014b4e24fe b/sql/hive/src/test/resources/golden/macro-6-8976be22af3aba0cc4905e014b4e24fe new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/sql/hive/src/test/resources/golden/macro-6-8976be22af3aba0cc4905e014b4e24fe diff --git a/sql/hive/src/test/resources/golden/macro-7-decde0a59183a393e580941c633d3c5c b/sql/hive/src/test/resources/golden/macro-7-decde0a59183a393e580941c633d3c5c new file mode 100644 index 0000000000..0cfbf08886 --- /dev/null +++ b/sql/hive/src/test/resources/golden/macro-7-decde0a59183a393e580941c633d3c5c @@ -0,0 +1 @@ +2 diff --git a/sql/hive/src/test/resources/golden/macro-8-3d25ffda9ab348f3e39ad967fc0e5020 b/sql/hive/src/test/resources/golden/macro-8-3d25ffda9ab348f3e39ad967fc0e5020 new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/sql/hive/src/test/resources/golden/macro-8-3d25ffda9ab348f3e39ad967fc0e5020 diff --git a/sql/hive/src/test/resources/golden/macro-9-db5f5172704da1e6dd5d59c136b83e7e b/sql/hive/src/test/resources/golden/macro-9-db5f5172704da1e6dd5d59c136b83e7e new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/sql/hive/src/test/resources/golden/macro-9-db5f5172704da1e6dd5d59c136b83e7e |