From 3b461d9ecd633c4fd659998b99e700d76f58d18a Mon Sep 17 00:00:00 2001 From: Sean Owen Date: Wed, 16 Mar 2016 09:36:34 +0000 Subject: [SPARK-13823][SPARK-13397][SPARK-13395][CORE] More warnings, StandardCharset follow up ## What changes were proposed in this pull request? Follow up to https://github.com/apache/spark/pull/11657 - Also update `String.getBytes("UTF-8")` to use `StandardCharsets.UTF_8` - And fix one last new Coverity warning that turned up (use of unguarded `wait()` replaced by simpler/more robust `java.util.concurrent` classes in tests) - And while we're here cleaning up Coverity warnings, just fix about 15 more build warnings ## How was this patch tested? Jenkins tests Author: Sean Owen Closes #11725 from srowen/SPARK-13823.2. --- .../spark/sql/catalyst/expressions/MathFunctionsSuite.scala | 4 ++-- .../datasources/parquet/UnsafeRowParquetRecordReader.java | 5 +++-- .../org/apache/spark/sql/execution/python/EvaluatePython.scala | 9 ++++++--- .../apache/spark/sql/execution/columnar/ColumnTypeSuite.scala | 3 ++- .../sql/execution/columnar/InMemoryColumnarQuerySuite.scala | 3 ++- .../spark/sql/execution/datasources/json/JsonSuite.scala | 4 ++-- .../datasources/parquet/ParquetAvroCompatibilitySuite.scala | 10 +++++----- .../sql/execution/datasources/parquet/ParquetFilterSuite.scala | 4 +++- .../org/apache/spark/sql/execution/joins/OuterJoinSuite.scala | 1 + .../apache/spark/sql/hive/execution/ScriptTransformation.scala | 2 +- .../apache/spark/sql/hive/execution/HiveComparisonTest.scala | 3 ++- .../scala/org/apache/spark/sql/hive/orc/OrcFilterSuite.scala | 4 +++- .../scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala | 2 +- 13 files changed, 33 insertions(+), 21 deletions(-) (limited to 'sql') diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala index d6ac4040b7..bd674dadd0 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala @@ -444,7 +444,7 @@ class MathFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(Hex(Literal("helloHex".getBytes(StandardCharsets.UTF_8))), "68656C6C6F486578") // scalastyle:off // Turn off scala style for non-ascii chars - checkEvaluation(Hex(Literal("三重的".getBytes("UTF8"))), "E4B889E9878DE79A84") + checkEvaluation(Hex(Literal("三重的".getBytes(StandardCharsets.UTF_8))), "E4B889E9878DE79A84") // scalastyle:on Seq(LongType, BinaryType, StringType).foreach { dt => checkConsistencyBetweenInterpretedAndCodegen(Hex.apply _, dt) @@ -460,7 +460,7 @@ class MathFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(Unhex(Literal("GG")), null) // scalastyle:off // Turn off scala style for non-ascii chars - checkEvaluation(Unhex(Literal("E4B889E9878DE79A84")), "三重的".getBytes("UTF-8")) + checkEvaluation(Unhex(Literal("E4B889E9878DE79A84")), "三重的".getBytes(StandardCharsets.UTF_8)) checkEvaluation(Unhex(Literal("三重的")), null) // scalastyle:on checkConsistencyBetweenInterpretedAndCodegen(Unhex, StringType) diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/UnsafeRowParquetRecordReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/UnsafeRowParquetRecordReader.java index 7d768b165f..7234726633 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/UnsafeRowParquetRecordReader.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/UnsafeRowParquetRecordReader.java @@ -846,8 +846,9 @@ public class UnsafeRowParquetRecordReader extends SpecificParquetRecordReaderBas " as the dictionary was missing for encoding " + dataEncoding); } if (vectorizedDecode()) { - if (dataEncoding != Encoding.PLAIN_DICTIONARY && - dataEncoding != Encoding.RLE_DICTIONARY) { + @SuppressWarnings("deprecation") + Encoding plainDict = Encoding.PLAIN_DICTIONARY; // var to allow warning suppression + if (dataEncoding != plainDict && dataEncoding != Encoding.RLE_DICTIONARY) { throw new NotImplementedException("Unsupported encoding: " + dataEncoding); } this.dataColumn = new VectorizedRleValuesReader(); diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvaluatePython.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvaluatePython.scala index 8c46516594..da28ec4f53 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvaluatePython.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvaluatePython.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql.execution.python import java.io.OutputStream +import java.nio.charset.StandardCharsets import scala.collection.JavaConverters._ @@ -136,7 +137,7 @@ object EvaluatePython { case (c, StringType) => UTF8String.fromString(c.toString) - case (c: String, BinaryType) => c.getBytes("utf-8") + case (c: String, BinaryType) => c.getBytes(StandardCharsets.UTF_8) case (c, BinaryType) if c.getClass.isArray && c.getClass.getComponentType.getName == "byte" => c case (c: java.util.List[_], ArrayType(elementType, _)) => @@ -185,7 +186,8 @@ object EvaluatePython { def pickle(obj: Object, out: OutputStream, pickler: Pickler): Unit = { out.write(Opcodes.GLOBAL) - out.write((module + "\n" + "_parse_datatype_json_string" + "\n").getBytes("utf-8")) + out.write( + (module + "\n" + "_parse_datatype_json_string" + "\n").getBytes(StandardCharsets.UTF_8)) val schema = obj.asInstanceOf[StructType] pickler.save(schema.json) out.write(Opcodes.TUPLE1) @@ -209,7 +211,8 @@ object EvaluatePython { def pickle(obj: Object, out: OutputStream, pickler: Pickler): Unit = { if (obj == this) { out.write(Opcodes.GLOBAL) - out.write((module + "\n" + "_create_row_inbound_converter" + "\n").getBytes("utf-8")) + out.write( + (module + "\n" + "_create_row_inbound_converter" + "\n").getBytes(StandardCharsets.UTF_8)) } else { // it will be memorized by Pickler to save some bytes pickler.save(this) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnTypeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnTypeSuite.scala index 9ca8c4d2ed..9d7570fe7a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnTypeSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnTypeSuite.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql.execution.columnar import java.nio.{ByteBuffer, ByteOrder} +import java.nio.charset.StandardCharsets import org.apache.spark.{Logging, SparkFunSuite} import org.apache.spark.sql.Row @@ -67,7 +68,7 @@ class ColumnTypeSuite extends SparkFunSuite with Logging { checkActualSize(LONG, Long.MaxValue, 8) checkActualSize(FLOAT, Float.MaxValue, 4) checkActualSize(DOUBLE, Double.MaxValue, 8) - checkActualSize(STRING, "hello", 4 + "hello".getBytes("utf-8").length) + checkActualSize(STRING, "hello", 4 + "hello".getBytes(StandardCharsets.UTF_8).length) checkActualSize(BINARY, Array.fill[Byte](4)(0.toByte), 4 + 4) checkActualSize(COMPACT_DECIMAL(15, 10), Decimal(0, 15, 10), 8) checkActualSize(LARGE_DECIMAL(20, 10), Decimal(0, 20, 10), 5) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala index 6e21d5a061..0940878e38 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql.execution.columnar +import java.nio.charset.StandardCharsets import java.sql.{Date, Timestamp} import org.apache.spark.sql.{QueryTest, Row} @@ -160,7 +161,7 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext { sparkContext.parallelize((1 to 10000), 10).map { i => Row( s"str${i}: test cache.", - s"binary${i}: test cache.".getBytes("UTF-8"), + s"binary${i}: test cache.".getBytes(StandardCharsets.UTF_8), null, i % 2 == 0, i.toByte, diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala index 4671b2dca9..4a8c128fa9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql.execution.datasources.json import java.io.{File, StringWriter} +import java.nio.charset.StandardCharsets import java.sql.{Date, Timestamp} import scala.collection.JavaConverters._ @@ -27,7 +28,6 @@ import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{Path, PathFilter} import org.apache.hadoop.io.SequenceFile.CompressionType import org.apache.hadoop.io.compress.GzipCodec -import org.scalactic.Tolerance._ import org.apache.spark.rdd.RDD import org.apache.spark.sql._ @@ -1292,7 +1292,7 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData { val constantValues = Seq( - "a string in binary".getBytes("UTF-8"), + "a string in binary".getBytes(StandardCharsets.UTF_8), null, true, 1.toByte, diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetAvroCompatibilitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetAvroCompatibilitySuite.scala index 36b929ee1f..f98ea8c5ae 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetAvroCompatibilitySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetAvroCompatibilitySuite.scala @@ -17,8 +17,8 @@ package org.apache.spark.sql.execution.datasources.parquet -import java.io.File import java.nio.ByteBuffer +import java.nio.charset.StandardCharsets import java.util.{List => JList, Map => JMap} import scala.collection.JavaConverters._ @@ -59,7 +59,7 @@ class ParquetAvroCompatibilitySuite extends ParquetCompatibilityTest with Shared .setLongColumn(i.toLong * 10) .setFloatColumn(i.toFloat + 0.1f) .setDoubleColumn(i.toDouble + 0.2d) - .setBinaryColumn(ByteBuffer.wrap(s"val_$i".getBytes("UTF-8"))) + .setBinaryColumn(ByteBuffer.wrap(s"val_$i".getBytes(StandardCharsets.UTF_8))) .setStringColumn(s"val_$i") .build()) } @@ -74,7 +74,7 @@ class ParquetAvroCompatibilitySuite extends ParquetCompatibilityTest with Shared i.toLong * 10, i.toFloat + 0.1f, i.toDouble + 0.2d, - s"val_$i".getBytes("UTF-8"), + s"val_$i".getBytes(StandardCharsets.UTF_8), s"val_$i") }) } @@ -103,7 +103,7 @@ class ParquetAvroCompatibilitySuite extends ParquetCompatibilityTest with Shared .setMaybeLongColumn(i.toLong * 10) .setMaybeFloatColumn(i.toFloat + 0.1f) .setMaybeDoubleColumn(i.toDouble + 0.2d) - .setMaybeBinaryColumn(ByteBuffer.wrap(s"val_$i".getBytes("UTF-8"))) + .setMaybeBinaryColumn(ByteBuffer.wrap(s"val_$i".getBytes(StandardCharsets.UTF_8))) .setMaybeStringColumn(s"val_$i") .build() } @@ -124,7 +124,7 @@ class ParquetAvroCompatibilitySuite extends ParquetCompatibilityTest with Shared i.toLong * 10, i.toFloat + 0.1f, i.toDouble + 0.2d, - s"val_$i".getBytes("UTF-8"), + s"val_$i".getBytes(StandardCharsets.UTF_8), s"val_$i") } }) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala index a64df435d8..b394ffb366 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.execution.datasources.parquet +import java.nio.charset.StandardCharsets + import org.apache.parquet.filter2.predicate.{FilterPredicate, Operators} import org.apache.parquet.filter2.predicate.FilterApi._ import org.apache.parquet.filter2.predicate.Operators.{Column => _, _} @@ -260,7 +262,7 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex // See https://issues.apache.org/jira/browse/SPARK-11153 ignore("filter pushdown - binary") { implicit class IntToBinary(int: Int) { - def b: Array[Byte] = int.toString.getBytes("UTF-8") + def b: Array[Byte] = int.toString.getBytes(StandardCharsets.UTF_8) } withParquetDataFrame((1 to 4).map(i => Tuple1(i.b))) { implicit df => diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/OuterJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/OuterJoinSuite.scala index 547d06236b..1c8b2ea808 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/OuterJoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/OuterJoinSuite.scala @@ -81,6 +81,7 @@ class OuterJoinSuite extends SparkPlanTest with SharedSQLContext { val buildSide = joinType match { case LeftOuter => BuildRight case RightOuter => BuildLeft + case _ => fail(s"Unsupported join type $joinType") } extractJoinParts().foreach { case (_, leftKeys, rightKeys, boundCondition, _, _) => withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "1") { diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformation.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformation.scala index b6e2f1f6b3..3b53716898 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformation.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformation.scala @@ -272,7 +272,7 @@ private class ScriptTransformationWriterThread( sb.append(ioschema.inputRowFormatMap("TOK_TABLEROWFORMATLINES")) sb.toString() } - outputStream.write(data.getBytes("utf-8")) + outputStream.write(data.getBytes(StandardCharsets.UTF_8)) } else { val writable = inputSerde.serialize( row.asInstanceOf[GenericInternalRow].values, inputSoi) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala index 5e452d107d..d21bb573d4 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql.hive.execution import java.io._ +import java.nio.charset.StandardCharsets import scala.util.control.NonFatal @@ -127,7 +128,7 @@ abstract class HiveComparisonTest protected val cacheDigest = java.security.MessageDigest.getInstance("MD5") protected def getMd5(str: String): String = { val digest = java.security.MessageDigest.getInstance("MD5") - digest.update(str.replaceAll(System.lineSeparator(), "\n").getBytes("utf-8")) + digest.update(str.replaceAll(System.lineSeparator(), "\n").getBytes(StandardCharsets.UTF_8)) new java.math.BigInteger(1, digest.digest).toString(16) } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcFilterSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcFilterSuite.scala index d76d0c44f5..7b0c7a9f00 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcFilterSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcFilterSuite.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.hive.orc +import java.nio.charset.StandardCharsets + import scala.collection.JavaConverters._ import org.apache.hadoop.hive.ql.io.sarg.{PredicateLeaf, SearchArgument} @@ -190,7 +192,7 @@ class OrcFilterSuite extends QueryTest with OrcTest { test("filter pushdown - binary") { implicit class IntToBinary(int: Int) { - def b: Array[Byte] = int.toString.getBytes("UTF-8") + def b: Array[Byte] = int.toString.getBytes(StandardCharsets.UTF_8) } withOrcDataFrame((1 to 4).map(i => Tuple1(i.b))) { implicit df => diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala index 57c4ad4248..c395d361a1 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala @@ -72,7 +72,7 @@ class OrcQuerySuite extends QueryTest with BeforeAndAfterAll with OrcTest { } test("Read/write binary data") { - withOrcFile(BinaryData("test".getBytes("utf8")) :: Nil) { file => + withOrcFile(BinaryData("test".getBytes(StandardCharsets.UTF_8)) :: Nil) { file => val bytes = read.orc(file).head().getAs[Array[Byte]](0) assert(new String(bytes, StandardCharsets.UTF_8) === "test") } -- cgit v1.2.3