From e75e340a406b765608258b49f7e2f1107d4605fb Mon Sep 17 00:00:00 2001 From: Rajesh Balamohan Date: Wed, 20 Jan 2016 11:20:26 -0800 Subject: [SPARK-12925][SQL] Improve HiveInspectors.unwrap for StringObjectIns… MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Text is in UTF-8 and converting it via "UTF8String.fromString" incurs decoding and encoding, which turns out to be expensive and redundant. Profiler snapshot details is attached in the JIRA (ref:https://issues.apache.org/jira/secure/attachment/12783331/SPARK-12925_profiler_cpu_samples.png) Author: Rajesh Balamohan Closes #10848 from rajeshbalamohan/SPARK-12925. --- .../src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala index 7a260e72eb..5d84feb483 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala @@ -320,7 +320,9 @@ private[hive] trait HiveInspectors { case hvoi: HiveCharObjectInspector => UTF8String.fromString(hvoi.getPrimitiveJavaObject(data).getValue) case x: StringObjectInspector if x.preferWritable() => - UTF8String.fromString(x.getPrimitiveWritableObject(data).toString) + // Text is in UTF-8 already. No need to convert again via fromString + val wObj = x.getPrimitiveWritableObject(data) + UTF8String.fromBytes(wObj.getBytes, 0, wObj.getLength) case x: StringObjectInspector => UTF8String.fromString(x.getPrimitiveJavaObject(data)) case x: IntObjectInspector if x.preferWritable() => x.get(data) -- cgit v1.2.3