[SPARK-3771][SQL] AppendingParquetOutputFormat should use reflection to prevent from breaking binary-compatibility.

Original problem is [SPARK-3764](https://issues.apache.org/jira/browse/SPARK-3764). `AppendingParquetOutputFormat` uses a binary-incompatible method `context.getTaskAttemptID`. This causes binary-incompatible of Spark itself, i.e. if Spark itself is built against hadoop-1, the artifact is for only hadoop-1, and vice versa. Author: Takuya UESHIN <ueshin@happy-camper.st> Closes #2638 from ueshin/issues/SPARK-3771 and squashes the following commits: efd3784 [Takuya UESHIN] Add a comment to explain the reason to use reflection. ec213c1 [Takuya UESHIN] Use reflection to prevent breaking binary-compatibility.
author: Takuya UESHIN <ueshin@happy-camper.st> 2014-10-13 13:43:41 -0700
committer: Michael Armbrust <michael@databricks.com> 2014-10-13 13:43:41 -0700
commit: 73da9c26b0e2e8bf0ab055906211727a7097c963 (patch)
tree: 192007dc030b89c96eb4a03d305845f354701d3b
parent: d3cdf9128ae828dc7f1893439f66a0de68c6e527 (diff)
download: spark-73da9c26b0e2e8bf0ab055906211727a7097c963.tar.gz
spark-73da9c26b0e2e8bf0ab055906211727a7097c963.tar.bz2
spark-73da9c26b0e2e8bf0ab055906211727a7097c963.zip
1 files changed, 9 insertions, 1 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala
index ffb732347d..1f4237d7ed 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala
@@ -331,13 +331,21 @@ private[parquet] class AppendingParquetOutputFormat(offset: Int)
 
   // override to choose output filename so not overwrite existing ones
   override def getDefaultWorkFile(context: TaskAttemptContext, extension: String): Path = {
-    val taskId: TaskID = context.getTaskAttemptID.getTaskID
+    val taskId: TaskID = getTaskAttemptID(context).getTaskID
     val partition: Int = taskId.getId
     val filename = s"part-r-${partition + offset}.parquet"
     val committer: FileOutputCommitter =
       getOutputCommitter(context).asInstanceOf[FileOutputCommitter]
     new Path(committer.getWorkPath, filename)
   }
+
+  // The TaskAttemptContext is a class in hadoop-1 but is an interface in hadoop-2.
+  // The signatures of the method TaskAttemptContext.getTaskAttemptID for the both versions
+  // are the same, so the method calls are source-compatible but NOT binary-compatible because
+  // the opcode of method call for class is INVOKEVIRTUAL and for interface is INVOKEINTERFACE.
+  private def getTaskAttemptID(context: TaskAttemptContext): TaskAttemptID = {
+    context.getClass.getMethod("getTaskAttemptID").invoke(context).asInstanceOf[TaskAttemptID]
+  }
 }
 
 /**
author	Takuya UESHIN <ueshin@happy-camper.st>	2014-10-13 13:43:41 -0700
committer	Michael Armbrust <michael@databricks.com>	2014-10-13 13:43:41 -0700
commit	73da9c26b0e2e8bf0ab055906211727a7097c963 (patch)
tree	192007dc030b89c96eb4a03d305845f354701d3b
parent	d3cdf9128ae828dc7f1893439f66a0de68c6e527 (diff)
download	spark-73da9c26b0e2e8bf0ab055906211727a7097c963.tar.gz spark-73da9c26b0e2e8bf0ab055906211727a7097c963.tar.bz2 spark-73da9c26b0e2e8bf0ab055906211727a7097c963.zip