diff options
author | Rajesh Balamohan <rbalamohan@apache.org> | 2016-01-20 11:20:26 -0800 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2016-01-20 11:20:26 -0800 |
commit | e75e340a406b765608258b49f7e2f1107d4605fb (patch) | |
tree | cab53c6e5ccab42c43cce2e84ced920afd03801b | |
parent | 9753835cf3acc135e61bf668223046e29306c80d (diff) | |
download | spark-e75e340a406b765608258b49f7e2f1107d4605fb.tar.gz spark-e75e340a406b765608258b49f7e2f1107d4605fb.tar.bz2 spark-e75e340a406b765608258b49f7e2f1107d4605fb.zip |
[SPARK-12925][SQL] Improve HiveInspectors.unwrap for StringObjectIns…
Text is in UTF-8 and converting it via "UTF8String.fromString" incurs decoding and encoding, which turns out to be expensive and redundant. Profiler snapshot details is attached in the JIRA (ref:https://issues.apache.org/jira/secure/attachment/12783331/SPARK-12925_profiler_cpu_samples.png)
Author: Rajesh Balamohan <rbalamohan@apache.org>
Closes #10848 from rajeshbalamohan/SPARK-12925.
-rw-r--r-- | sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala | 4 |
1 files changed, 3 insertions, 1 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala index 7a260e72eb..5d84feb483 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala @@ -320,7 +320,9 @@ private[hive] trait HiveInspectors { case hvoi: HiveCharObjectInspector => UTF8String.fromString(hvoi.getPrimitiveJavaObject(data).getValue) case x: StringObjectInspector if x.preferWritable() => - UTF8String.fromString(x.getPrimitiveWritableObject(data).toString) + // Text is in UTF-8 already. No need to convert again via fromString + val wObj = x.getPrimitiveWritableObject(data) + UTF8String.fromBytes(wObj.getBytes, 0, wObj.getLength) case x: StringObjectInspector => UTF8String.fromString(x.getPrimitiveJavaObject(data)) case x: IntObjectInspector if x.preferWritable() => x.get(data) |