From f481090a71940f06602a73f5bbd004980dea026f Mon Sep 17 00:00:00 2001 From: "navis.ryu" Date: Wed, 21 Oct 2015 15:07:08 -0700 Subject: [SPARK-10151][SQL] Support invocation of hive macro Macro in hive (which is GenericUDFMacro) contains real function inside of it but it's not conveyed to tasks, resulting null-pointer exception. Author: navis.ryu Closes #8354 from navis/SPARK-10151. --- .../apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala | 1 + .../src/main/scala/org/apache/spark/sql/hive/HiveContext.scala | 9 +++++++-- .../main/scala/org/apache/spark/sql/hive/HiveInspectors.scala | 4 ++++ sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala | 2 ++ sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala | 9 ++++----- sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala | 5 ++++- .../resources/golden/macro-0-50131c0ba7b7a6b65c789a5a8497bada | 1 + .../resources/golden/macro-1-5ff5e8795c13303db5d3ea88e1e918b6 | 0 .../resources/golden/macro-10-45148a37f6ee9cf498dc7308cbd81a1c | 0 .../resources/golden/macro-11-f55b8684c77f6eefc2618ba79e5e0587 | 1 + .../resources/golden/macro-12-62b999122975c2a5de8e49fee089c041 | 0 .../resources/golden/macro-13-87e53d2b4c84098e662779e8f0a59084 | 1 + .../resources/golden/macro-14-3a31df84432674ad410f44b137e32c2d | 0 .../resources/golden/macro-15-56966c45104c0d9bc407e79538c2c029 | 0 .../resources/golden/macro-16-56966c45104c0d9bc407e79538c2c029 | 0 .../resources/golden/macro-17-b3864f1d19fdb88b3b74f6d74a0ba548 | 1 + .../resources/golden/macro-18-bddb2fe17cd4d850c4462b7eb2b9bc2a | 0 .../resources/golden/macro-19-e3c828c372607b8bf7be00a99359b662 | 0 .../resources/golden/macro-2-fde44c7854a9897acb4c2f78f24c8eec | 1 + .../resources/golden/macro-20-cb252a243d59809930a4ff371cbfa292 | 0 .../resources/golden/macro-21-cb252a243d59809930a4ff371cbfa292 | 0 .../resources/golden/macro-3-ddc4cb920b0a68e06551cd34ae4e29ff | 0 .../resources/golden/macro-4-86292bbb7f147393c38bca051768dbda | 0 .../resources/golden/macro-5-ca270bff813e5ab18a6a799016693aa8 | 0 .../resources/golden/macro-6-8976be22af3aba0cc4905e014b4e24fe | 0 .../resources/golden/macro-7-decde0a59183a393e580941c633d3c5c | 1 + .../resources/golden/macro-8-3d25ffda9ab348f3e39ad967fc0e5020 | 0 .../resources/golden/macro-9-db5f5172704da1e6dd5d59c136b83e7e | 0 28 files changed, 28 insertions(+), 8 deletions(-) create mode 100644 sql/hive/src/test/resources/golden/macro-0-50131c0ba7b7a6b65c789a5a8497bada create mode 100644 sql/hive/src/test/resources/golden/macro-1-5ff5e8795c13303db5d3ea88e1e918b6 create mode 100644 sql/hive/src/test/resources/golden/macro-10-45148a37f6ee9cf498dc7308cbd81a1c create mode 100644 sql/hive/src/test/resources/golden/macro-11-f55b8684c77f6eefc2618ba79e5e0587 create mode 100644 sql/hive/src/test/resources/golden/macro-12-62b999122975c2a5de8e49fee089c041 create mode 100644 sql/hive/src/test/resources/golden/macro-13-87e53d2b4c84098e662779e8f0a59084 create mode 100644 sql/hive/src/test/resources/golden/macro-14-3a31df84432674ad410f44b137e32c2d create mode 100644 sql/hive/src/test/resources/golden/macro-15-56966c45104c0d9bc407e79538c2c029 create mode 100644 sql/hive/src/test/resources/golden/macro-16-56966c45104c0d9bc407e79538c2c029 create mode 100644 sql/hive/src/test/resources/golden/macro-17-b3864f1d19fdb88b3b74f6d74a0ba548 create mode 100644 sql/hive/src/test/resources/golden/macro-18-bddb2fe17cd4d850c4462b7eb2b9bc2a create mode 100644 sql/hive/src/test/resources/golden/macro-19-e3c828c372607b8bf7be00a99359b662 create mode 100644 sql/hive/src/test/resources/golden/macro-2-fde44c7854a9897acb4c2f78f24c8eec create mode 100644 sql/hive/src/test/resources/golden/macro-20-cb252a243d59809930a4ff371cbfa292 create mode 100644 sql/hive/src/test/resources/golden/macro-21-cb252a243d59809930a4ff371cbfa292 create mode 100644 sql/hive/src/test/resources/golden/macro-3-ddc4cb920b0a68e06551cd34ae4e29ff create mode 100644 sql/hive/src/test/resources/golden/macro-4-86292bbb7f147393c38bca051768dbda create mode 100644 sql/hive/src/test/resources/golden/macro-5-ca270bff813e5ab18a6a799016693aa8 create mode 100644 sql/hive/src/test/resources/golden/macro-6-8976be22af3aba0cc4905e014b4e24fe create mode 100644 sql/hive/src/test/resources/golden/macro-7-decde0a59183a393e580941c633d3c5c create mode 100644 sql/hive/src/test/resources/golden/macro-8-3d25ffda9ab348f3e39ad967fc0e5020 create mode 100644 sql/hive/src/test/resources/golden/macro-9-db5f5172704da1e6dd5d59c136b83e7e (limited to 'sql/hive') diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala index 8f29fa91f7..eed9e436f9 100644 --- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala +++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala @@ -684,6 +684,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { "load_file_with_space_in_the_name", "loadpart1", "louter_join_ppr", + "macro", "mapjoin_distinct", "mapjoin_filter_on_outerjoin", "mapjoin_mapjoin", diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala index c7460d46b6..c328734df3 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala @@ -21,6 +21,7 @@ import java.io.File import java.net.{URL, URLClassLoader} import java.sql.Timestamp import java.util.concurrent.TimeUnit +import java.util.regex.Pattern import scala.collection.JavaConverters._ import scala.collection.mutable.HashMap @@ -592,10 +593,14 @@ class HiveContext private[hive]( ) } + private def functionOrMacroDDLPattern(command: String) = Pattern.compile( + ".*(create|drop)\\s+(temporary\\s+)?(function|macro).+", Pattern.DOTALL).matcher(command) + protected[hive] def runSqlHive(sql: String): Seq[String] = { - if (sql.toLowerCase.contains("create temporary function")) { + val command = sql.trim.toLowerCase + if (functionOrMacroDDLPattern(command).matches()) { executionHive.runSqlHive(sql) - } else if (sql.trim.toLowerCase.startsWith("set")) { + } else if (command.startsWith("set")) { metadataHive.runSqlHive(sql) executionHive.runSqlHive(sql) } else { diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala index cfe2bb05ad..43c238fd49 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala @@ -723,6 +723,10 @@ private[hive] trait HiveInspectors { inspectorToDataType(m.getMapValueObjectInspector)) case _: WritableStringObjectInspector => StringType case _: JavaStringObjectInspector => StringType + case _: WritableHiveVarcharObjectInspector => StringType + case _: JavaHiveVarcharObjectInspector => StringType + case _: WritableHiveCharObjectInspector => StringType + case _: JavaHiveCharObjectInspector => StringType case _: WritableIntObjectInspector => IntegerType case _: JavaIntObjectInspector => IntegerType case _: WritableDoubleObjectInspector => DoubleType diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala index d4ff5cc0f1..3697761f20 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala @@ -117,6 +117,7 @@ private[hive] object HiveQl extends Logging { "TOK_CREATEDATABASE", "TOK_CREATEFUNCTION", "TOK_CREATEINDEX", + "TOK_CREATEMACRO", "TOK_CREATEROLE", "TOK_DESCDATABASE", @@ -125,6 +126,7 @@ private[hive] object HiveQl extends Logging { "TOK_DROPDATABASE", "TOK_DROPFUNCTION", "TOK_DROPINDEX", + "TOK_DROPMACRO", "TOK_DROPROLE", "TOK_DROPTABLE_PROPERTIES", "TOK_DROPVIEW", diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala index 004805f3ae..f0697613cf 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala @@ -117,9 +117,10 @@ private[hive] object HiveShim { * Detail discussion can be found at https://github.com/apache/spark/pull/3640 * * @param functionClassName UDF class name + * @param instance optional UDF instance which contains additional information (for macro) */ - private[hive] case class HiveFunctionWrapper(var functionClassName: String) - extends java.io.Externalizable { + private[hive] case class HiveFunctionWrapper(var functionClassName: String, + private var instance: AnyRef = null) extends java.io.Externalizable { // for Serialization def this() = this(null) @@ -154,8 +155,6 @@ private[hive] object HiveShim { serializeObjectByKryo(Utilities.runtimeSerializationKryo.get(), function, out) } - private var instance: AnyRef = null - def writeExternal(out: java.io.ObjectOutput) { // output the function name out.writeUTF(functionClassName) @@ -184,7 +183,7 @@ private[hive] object HiveShim { // read the function in bytes val functionInBytesLength = in.readInt() val functionInBytes = new Array[Byte](functionInBytesLength) - in.read(functionInBytes, 0, functionInBytesLength) + in.readFully(functionInBytes) // deserialize the function object via Hive Utilities instance = deserializePlan[AnyRef](new java.io.ByteArrayInputStream(functionInBytes), diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala index a2ebf6552f..f57b206999 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala @@ -64,7 +64,10 @@ private[hive] class HiveFunctionRegistry(underlying: analysis.FunctionRegistry) // don't satisfy the hive UDF, such as type mismatch, input number mismatch, etc. Here we // catch the exception and throw AnalysisException instead. try { - if (classOf[UDF].isAssignableFrom(functionInfo.getFunctionClass)) { + if (classOf[GenericUDFMacro].isAssignableFrom(functionInfo.getFunctionClass)) { + HiveGenericUDF( + new HiveFunctionWrapper(functionClassName, functionInfo.getGenericUDF), children) + } else if (classOf[UDF].isAssignableFrom(functionInfo.getFunctionClass)) { HiveSimpleUDF(new HiveFunctionWrapper(functionClassName), children) } else if (classOf[GenericUDF].isAssignableFrom(functionInfo.getFunctionClass)) { HiveGenericUDF(new HiveFunctionWrapper(functionClassName), children) diff --git a/sql/hive/src/test/resources/golden/macro-0-50131c0ba7b7a6b65c789a5a8497bada b/sql/hive/src/test/resources/golden/macro-0-50131c0ba7b7a6b65c789a5a8497bada new file mode 100644 index 0000000000..573541ac97 --- /dev/null +++ b/sql/hive/src/test/resources/golden/macro-0-50131c0ba7b7a6b65c789a5a8497bada @@ -0,0 +1 @@ +0 diff --git a/sql/hive/src/test/resources/golden/macro-1-5ff5e8795c13303db5d3ea88e1e918b6 b/sql/hive/src/test/resources/golden/macro-1-5ff5e8795c13303db5d3ea88e1e918b6 new file mode 100644 index 0000000000..e69de29bb2 diff --git a/sql/hive/src/test/resources/golden/macro-10-45148a37f6ee9cf498dc7308cbd81a1c b/sql/hive/src/test/resources/golden/macro-10-45148a37f6ee9cf498dc7308cbd81a1c new file mode 100644 index 0000000000..e69de29bb2 diff --git a/sql/hive/src/test/resources/golden/macro-11-f55b8684c77f6eefc2618ba79e5e0587 b/sql/hive/src/test/resources/golden/macro-11-f55b8684c77f6eefc2618ba79e5e0587 new file mode 100644 index 0000000000..573541ac97 --- /dev/null +++ b/sql/hive/src/test/resources/golden/macro-11-f55b8684c77f6eefc2618ba79e5e0587 @@ -0,0 +1 @@ +0 diff --git a/sql/hive/src/test/resources/golden/macro-12-62b999122975c2a5de8e49fee089c041 b/sql/hive/src/test/resources/golden/macro-12-62b999122975c2a5de8e49fee089c041 new file mode 100644 index 0000000000..e69de29bb2 diff --git a/sql/hive/src/test/resources/golden/macro-13-87e53d2b4c84098e662779e8f0a59084 b/sql/hive/src/test/resources/golden/macro-13-87e53d2b4c84098e662779e8f0a59084 new file mode 100644 index 0000000000..d00491fd7e --- /dev/null +++ b/sql/hive/src/test/resources/golden/macro-13-87e53d2b4c84098e662779e8f0a59084 @@ -0,0 +1 @@ +1 diff --git a/sql/hive/src/test/resources/golden/macro-14-3a31df84432674ad410f44b137e32c2d b/sql/hive/src/test/resources/golden/macro-14-3a31df84432674ad410f44b137e32c2d new file mode 100644 index 0000000000..e69de29bb2 diff --git a/sql/hive/src/test/resources/golden/macro-15-56966c45104c0d9bc407e79538c2c029 b/sql/hive/src/test/resources/golden/macro-15-56966c45104c0d9bc407e79538c2c029 new file mode 100644 index 0000000000..e69de29bb2 diff --git a/sql/hive/src/test/resources/golden/macro-16-56966c45104c0d9bc407e79538c2c029 b/sql/hive/src/test/resources/golden/macro-16-56966c45104c0d9bc407e79538c2c029 new file mode 100644 index 0000000000..e69de29bb2 diff --git a/sql/hive/src/test/resources/golden/macro-17-b3864f1d19fdb88b3b74f6d74a0ba548 b/sql/hive/src/test/resources/golden/macro-17-b3864f1d19fdb88b3b74f6d74a0ba548 new file mode 100644 index 0000000000..f599e28b8a --- /dev/null +++ b/sql/hive/src/test/resources/golden/macro-17-b3864f1d19fdb88b3b74f6d74a0ba548 @@ -0,0 +1 @@ +10 diff --git a/sql/hive/src/test/resources/golden/macro-18-bddb2fe17cd4d850c4462b7eb2b9bc2a b/sql/hive/src/test/resources/golden/macro-18-bddb2fe17cd4d850c4462b7eb2b9bc2a new file mode 100644 index 0000000000..e69de29bb2 diff --git a/sql/hive/src/test/resources/golden/macro-19-e3c828c372607b8bf7be00a99359b662 b/sql/hive/src/test/resources/golden/macro-19-e3c828c372607b8bf7be00a99359b662 new file mode 100644 index 0000000000..e69de29bb2 diff --git a/sql/hive/src/test/resources/golden/macro-2-fde44c7854a9897acb4c2f78f24c8eec b/sql/hive/src/test/resources/golden/macro-2-fde44c7854a9897acb4c2f78f24c8eec new file mode 100644 index 0000000000..b49805ff63 --- /dev/null +++ b/sql/hive/src/test/resources/golden/macro-2-fde44c7854a9897acb4c2f78f24c8eec @@ -0,0 +1 @@ +0.8807970779778823 diff --git a/sql/hive/src/test/resources/golden/macro-20-cb252a243d59809930a4ff371cbfa292 b/sql/hive/src/test/resources/golden/macro-20-cb252a243d59809930a4ff371cbfa292 new file mode 100644 index 0000000000..e69de29bb2 diff --git a/sql/hive/src/test/resources/golden/macro-21-cb252a243d59809930a4ff371cbfa292 b/sql/hive/src/test/resources/golden/macro-21-cb252a243d59809930a4ff371cbfa292 new file mode 100644 index 0000000000..e69de29bb2 diff --git a/sql/hive/src/test/resources/golden/macro-3-ddc4cb920b0a68e06551cd34ae4e29ff b/sql/hive/src/test/resources/golden/macro-3-ddc4cb920b0a68e06551cd34ae4e29ff new file mode 100644 index 0000000000..e69de29bb2 diff --git a/sql/hive/src/test/resources/golden/macro-4-86292bbb7f147393c38bca051768dbda b/sql/hive/src/test/resources/golden/macro-4-86292bbb7f147393c38bca051768dbda new file mode 100644 index 0000000000..e69de29bb2 diff --git a/sql/hive/src/test/resources/golden/macro-5-ca270bff813e5ab18a6a799016693aa8 b/sql/hive/src/test/resources/golden/macro-5-ca270bff813e5ab18a6a799016693aa8 new file mode 100644 index 0000000000..e69de29bb2 diff --git a/sql/hive/src/test/resources/golden/macro-6-8976be22af3aba0cc4905e014b4e24fe b/sql/hive/src/test/resources/golden/macro-6-8976be22af3aba0cc4905e014b4e24fe new file mode 100644 index 0000000000..e69de29bb2 diff --git a/sql/hive/src/test/resources/golden/macro-7-decde0a59183a393e580941c633d3c5c b/sql/hive/src/test/resources/golden/macro-7-decde0a59183a393e580941c633d3c5c new file mode 100644 index 0000000000..0cfbf08886 --- /dev/null +++ b/sql/hive/src/test/resources/golden/macro-7-decde0a59183a393e580941c633d3c5c @@ -0,0 +1 @@ +2 diff --git a/sql/hive/src/test/resources/golden/macro-8-3d25ffda9ab348f3e39ad967fc0e5020 b/sql/hive/src/test/resources/golden/macro-8-3d25ffda9ab348f3e39ad967fc0e5020 new file mode 100644 index 0000000000..e69de29bb2 diff --git a/sql/hive/src/test/resources/golden/macro-9-db5f5172704da1e6dd5d59c136b83e7e b/sql/hive/src/test/resources/golden/macro-9-db5f5172704da1e6dd5d59c136b83e7e new file mode 100644 index 0000000000..e69de29bb2 -- cgit v1.2.3