aboutsummaryrefslogtreecommitdiff
path: root/sql/hive
diff options
context:
space:
mode:
authorhyukjinkwon <gurwls223@gmail.com>2016-09-17 16:52:30 +0100
committerSean Owen <sowen@cloudera.com>2016-09-17 16:52:30 +0100
commit86c2d393a56bf1e5114bc5a781253c0460efb8af (patch)
tree781bed6a1c59c58ac3b5d6bcd76b48aeb56e9098 /sql/hive
parentbbe0b1d623741decce98827130cc67eb1fff1240 (diff)
downloadspark-86c2d393a56bf1e5114bc5a781253c0460efb8af.tar.gz
spark-86c2d393a56bf1e5114bc5a781253c0460efb8af.tar.bz2
spark-86c2d393a56bf1e5114bc5a781253c0460efb8af.zip
[SPARK-17480][SQL][FOLLOWUP] Fix more instances which calls List.length/size which is O(n)
## What changes were proposed in this pull request? This PR fixes all the instances which was fixed in the previous PR. To make sure, I manually debugged and also checked the Scala source. `length` in [LinearSeqOptimized.scala#L49-L57](https://github.com/scala/scala/blob/2.11.x/src/library/scala/collection/LinearSeqOptimized.scala#L49-L57) is O(n). Also, `size` calls `length` via [SeqLike.scala#L106](https://github.com/scala/scala/blob/2.11.x/src/library/scala/collection/SeqLike.scala#L106). For debugging, I have created these as below: ```scala ArrayBuffer(1, 2, 3) Array(1, 2, 3) List(1, 2, 3) Seq(1, 2, 3) ``` and then called `size` and `length` for each to debug. ## How was this patch tested? I ran the bash as below on Mac ```bash find . -name *.scala -type f -exec grep -il "while (.*\\.length)" {} \; | grep "src/main" find . -name *.scala -type f -exec grep -il "while (.*\\.size)" {} \; | grep "src/main" ``` and then checked each. Author: hyukjinkwon <gurwls223@gmail.com> Closes #15093 from HyukjinKwon/SPARK-17480-followup.
Diffstat (limited to 'sql/hive')
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala6
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala3
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala3
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala6
4 files changed, 12 insertions, 6 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
index 4e74452f6c..e4b963efea 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
@@ -703,7 +703,8 @@ private[hive] trait HiveInspectors {
// 1. create the pojo (most likely) object
val result = x.create()
var i = 0
- while (i < fieldRefs.size) {
+ val size = fieldRefs.size
+ while (i < size) {
// 2. set the property for the pojo
val tpe = structType(i).dataType
x.setStructFieldData(
@@ -720,7 +721,8 @@ private[hive] trait HiveInspectors {
val row = a.asInstanceOf[InternalRow]
val result = new java.util.ArrayList[AnyRef](fieldRefs.size)
var i = 0
- while (i < fieldRefs.size) {
+ val size = fieldRefs.size
+ while (i < size) {
val tpe = structType(i).dataType
result.add(wrap(row.get(i, tpe), fieldRefs.get(i).getFieldObjectInspector, tpe))
i += 1
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
index b4808fdbed..ec7e53efc8 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
@@ -427,7 +427,8 @@ private[hive] object HadoopTableReader extends HiveInspectors with Logging {
iterator.map { value =>
val raw = converter.convert(rawDeser.deserialize(value))
var i = 0
- while (i < fieldRefs.length) {
+ val length = fieldRefs.length
+ while (i < length) {
val fieldValue = soi.getStructFieldData(raw, fieldRefs(i))
if (fieldValue == null) {
mutableRow.setNullAt(fieldOrdinals(i))
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
index 9347aeb8e0..962dd5a52e 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
@@ -153,7 +153,8 @@ private[hive] case class HiveGenericUDF(
returnInspector // Make sure initialized.
var i = 0
- while (i < children.length) {
+ val length = children.length
+ while (i < length) {
val idx = i
deferredObjects(i).asInstanceOf[DeferredObjectAdapter]
.set(() => children(idx).eval(input))
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
index 286197b50e..03b508e11a 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
@@ -190,7 +190,8 @@ private[orc] class OrcSerializer(dataSchema: StructType, conf: Configuration)
row: InternalRow): Unit = {
val fieldRefs = oi.getAllStructFieldRefs
var i = 0
- while (i < fieldRefs.size) {
+ val size = fieldRefs.size
+ while (i < size) {
oi.setStructFieldData(
struct,
@@ -289,7 +290,8 @@ private[orc] object OrcRelation extends HiveInspectors {
iterator.map { value =>
val raw = deserializer.deserialize(value)
var i = 0
- while (i < fieldRefs.length) {
+ val length = fieldRefs.length
+ while (i < length) {
val fieldValue = oi.getStructFieldData(raw, fieldRefs(i))
if (fieldValue == null) {
mutableRow.setNullAt(fieldOrdinals(i))