aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRajesh Balamohan <rbalamohan@apache.org>2016-01-20 11:20:26 -0800
committerReynold Xin <rxin@databricks.com>2016-01-20 11:20:26 -0800
commite75e340a406b765608258b49f7e2f1107d4605fb (patch)
treecab53c6e5ccab42c43cce2e84ced920afd03801b
parent9753835cf3acc135e61bf668223046e29306c80d (diff)
downloadspark-e75e340a406b765608258b49f7e2f1107d4605fb.tar.gz
spark-e75e340a406b765608258b49f7e2f1107d4605fb.tar.bz2
spark-e75e340a406b765608258b49f7e2f1107d4605fb.zip
[SPARK-12925][SQL] Improve HiveInspectors.unwrap for StringObjectIns…
Text is in UTF-8 and converting it via "UTF8String.fromString" incurs decoding and encoding, which turns out to be expensive and redundant. Profiler snapshot details is attached in the JIRA (ref:https://issues.apache.org/jira/secure/attachment/12783331/SPARK-12925_profiler_cpu_samples.png) Author: Rajesh Balamohan <rbalamohan@apache.org> Closes #10848 from rajeshbalamohan/SPARK-12925.
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala4
1 files changed, 3 insertions, 1 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
index 7a260e72eb..5d84feb483 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
@@ -320,7 +320,9 @@ private[hive] trait HiveInspectors {
case hvoi: HiveCharObjectInspector =>
UTF8String.fromString(hvoi.getPrimitiveJavaObject(data).getValue)
case x: StringObjectInspector if x.preferWritable() =>
- UTF8String.fromString(x.getPrimitiveWritableObject(data).toString)
+ // Text is in UTF-8 already. No need to convert again via fromString
+ val wObj = x.getPrimitiveWritableObject(data)
+ UTF8String.fromBytes(wObj.getBytes, 0, wObj.getLength)
case x: StringObjectInspector =>
UTF8String.fromString(x.getPrimitiveJavaObject(data))
case x: IntObjectInspector if x.preferWritable() => x.get(data)