aboutsummaryrefslogtreecommitdiff
path: root/yarn
diff options
context:
space:
mode:
authorSean Owen <sowen@cloudera.com>2016-03-13 21:03:49 -0700
committerReynold Xin <rxin@databricks.com>2016-03-13 21:03:49 -0700
commit184085284185011d7cc6d054b54d2d38eaf1dd77 (patch)
tree7b068f5bcf02ea959ab3a49c49fbc1cdae979a26 /yarn
parent473263f9598d1cf880f421aae1b51eb0b6e3cf79 (diff)
downloadspark-184085284185011d7cc6d054b54d2d38eaf1dd77.tar.gz
spark-184085284185011d7cc6d054b54d2d38eaf1dd77.tar.bz2
spark-184085284185011d7cc6d054b54d2d38eaf1dd77.zip
[SPARK-13823][CORE][STREAMING][SQL] Always specify Charset in String <-> byte[] conversions (and remaining Coverity items)
## What changes were proposed in this pull request? - Fixes calls to `new String(byte[])` or `String.getBytes()` that rely on platform default encoding, to use UTF-8 - Same for `InputStreamReader` and `OutputStreamWriter` constructors - Standardizes on UTF-8 everywhere - Standardizes specifying the encoding with `StandardCharsets.UTF-8`, not the Guava constant or "UTF-8" (which means handling `UnuspportedEncodingException`) - (also addresses the other remaining Coverity scan issues, which are pretty trivial; these are separated into commit https://github.com/srowen/spark/commit/1deecd8d9ca986d8adb1a42d315890ce5349d29c ) ## How was this patch tested? Jenkins tests Author: Sean Owen <sowen@cloudera.com> Closes #11657 from srowen/SPARK-13823.
Diffstat (limited to 'yarn')
-rw-r--r--yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala4
-rw-r--r--yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala8
-rw-r--r--yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala12
-rw-r--r--yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala4
-rw-r--r--yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala3
5 files changed, 16 insertions, 15 deletions
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 0b5ceb768c..1035056457 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -21,6 +21,7 @@ import java.io.{ByteArrayInputStream, DataInputStream, File, FileOutputStream, I
OutputStreamWriter}
import java.net.{InetAddress, UnknownHostException, URI}
import java.nio.ByteBuffer
+import java.nio.charset.StandardCharsets
import java.util.{Properties, UUID}
import java.util.zip.{ZipEntry, ZipOutputStream}
@@ -29,7 +30,6 @@ import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet, ListBuffer, Map}
import scala.util.{Failure, Success, Try}
import scala.util.control.NonFatal
-import com.google.common.base.Charsets.UTF_8
import com.google.common.base.Objects
import com.google.common.io.Files
import org.apache.hadoop.conf.Configuration
@@ -619,7 +619,7 @@ private[spark] class Client(
val props = new Properties()
sparkConf.getAll.foreach { case (k, v) => props.setProperty(k, v) }
confStream.putNextEntry(new ZipEntry(SPARK_CONF_FILE))
- val writer = new OutputStreamWriter(confStream, UTF_8)
+ val writer = new OutputStreamWriter(confStream, StandardCharsets.UTF_8)
props.store(writer, "Spark configuration.")
writer.flush()
confStream.closeEntry()
diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
index b12e506033..78b57da482 100644
--- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
+++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
@@ -18,6 +18,7 @@
package org.apache.spark.deploy.yarn
import java.io.{File, FileOutputStream, OutputStreamWriter}
+import java.nio.charset.StandardCharsets
import java.util.Properties
import java.util.concurrent.TimeUnit
@@ -25,7 +26,6 @@ import scala.collection.JavaConverters._
import scala.concurrent.duration._
import scala.language.postfixOps
-import com.google.common.base.Charsets.UTF_8
import com.google.common.io.Files
import org.apache.commons.lang3.SerializationUtils
import org.apache.hadoop.yarn.conf.YarnConfiguration
@@ -75,7 +75,7 @@ abstract class BaseYarnClusterSuite
System.setProperty("SPARK_YARN_MODE", "true")
val logConfFile = new File(logConfDir, "log4j.properties")
- Files.write(LOG4J_CONF, logConfFile, UTF_8)
+ Files.write(LOG4J_CONF, logConfFile, StandardCharsets.UTF_8)
// Disable the disk utilization check to avoid the test hanging when people's disks are
// getting full.
@@ -191,7 +191,7 @@ abstract class BaseYarnClusterSuite
result: File,
expected: String): Unit = {
finalState should be (SparkAppHandle.State.FINISHED)
- val resultString = Files.toString(result, UTF_8)
+ val resultString = Files.toString(result, StandardCharsets.UTF_8)
resultString should be (expected)
}
@@ -231,7 +231,7 @@ abstract class BaseYarnClusterSuite
extraConf.foreach { case (k, v) => props.setProperty(k, v) }
val propsFile = File.createTempFile("spark", ".properties", tempDir)
- val writer = new OutputStreamWriter(new FileOutputStream(propsFile), UTF_8)
+ val writer = new OutputStreamWriter(new FileOutputStream(propsFile), StandardCharsets.UTF_8)
props.store(writer, "Spark properties.")
writer.close()
propsFile.getAbsolutePath()
diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
index e935163c34..5068c0cd20 100644
--- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
+++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
@@ -19,13 +19,13 @@ package org.apache.spark.deploy.yarn
import java.io.File
import java.net.URL
+import java.nio.charset.StandardCharsets
import java.util.{HashMap => JHashMap}
import scala.collection.mutable
import scala.concurrent.duration._
import scala.language.postfixOps
-import com.google.common.base.Charsets.UTF_8
import com.google.common.io.{ByteStreams, Files}
import org.apache.hadoop.yarn.conf.YarnConfiguration
import org.scalatest.Matchers
@@ -147,7 +147,7 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
private def testPySpark(clientMode: Boolean): Unit = {
val primaryPyFile = new File(tempDir, "test.py")
- Files.write(TEST_PYFILE, primaryPyFile, UTF_8)
+ Files.write(TEST_PYFILE, primaryPyFile, StandardCharsets.UTF_8)
// When running tests, let's not assume the user has built the assembly module, which also
// creates the pyspark archive. Instead, let's use PYSPARK_ARCHIVES_PATH to point at the
@@ -171,7 +171,7 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
subdir
}
val pyModule = new File(moduleDir, "mod1.py")
- Files.write(TEST_PYMODULE, pyModule, UTF_8)
+ Files.write(TEST_PYMODULE, pyModule, StandardCharsets.UTF_8)
val mod2Archive = TestUtils.createJarWithFiles(Map("mod2.py" -> TEST_PYMODULE), moduleDir)
val pyFiles = Seq(pyModule.getAbsolutePath(), mod2Archive.getPath()).mkString(",")
@@ -245,7 +245,7 @@ private object YarnClusterDriver extends Logging with Matchers {
data should be (Set(1, 2, 3, 4))
result = "success"
} finally {
- Files.write(result, status, UTF_8)
+ Files.write(result, status, StandardCharsets.UTF_8)
sc.stop()
}
@@ -319,14 +319,14 @@ private object YarnClasspathTest extends Logging {
val ccl = Thread.currentThread().getContextClassLoader()
val resource = ccl.getResourceAsStream("test.resource")
val bytes = ByteStreams.toByteArray(resource)
- result = new String(bytes, 0, bytes.length, UTF_8)
+ result = new String(bytes, 0, bytes.length, StandardCharsets.UTF_8)
} catch {
case t: Throwable =>
error(s"loading test.resource to $resultPath", t)
// set the exit code if not yet set
exitCode = 2
} finally {
- Files.write(result, new File(resultPath), UTF_8)
+ Files.write(result, new File(resultPath), StandardCharsets.UTF_8)
}
}
diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala
index c17e8695c2..1538ff75be 100644
--- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala
+++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala
@@ -18,8 +18,8 @@
package org.apache.spark.deploy.yarn
import java.io.File
+import java.nio.charset.StandardCharsets
-import com.google.common.base.Charsets.UTF_8
import com.google.common.io.Files
import org.apache.commons.io.FileUtils
import org.apache.hadoop.yarn.conf.YarnConfiguration
@@ -104,7 +104,7 @@ private object YarnExternalShuffleDriver extends Logging with Matchers {
} finally {
sc.stop()
FileUtils.deleteDirectory(execStateCopy)
- Files.write(result, status, UTF_8)
+ Files.write(result, status, StandardCharsets.UTF_8)
}
}
diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala
index 9202bd892f..70b8732946 100644
--- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala
+++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.deploy.yarn
import java.io.{File, IOException}
import java.lang.reflect.InvocationTargetException
+import java.nio.charset.StandardCharsets
import com.google.common.io.{ByteStreams, Files}
import org.apache.hadoop.conf.Configuration
@@ -59,7 +60,7 @@ class YarnSparkHadoopUtilSuite extends SparkFunSuite with Matchers with Logging
val args = Array("arg1", "${arg.2}", "\"arg3\"", "'arg4'", "$arg5", "\\arg6")
try {
val argLine = args.map(a => YarnSparkHadoopUtil.escapeForShell(a)).mkString(" ")
- Files.write(("bash -c \"echo " + argLine + "\"").getBytes(), scriptFile)
+ Files.write(("bash -c \"echo " + argLine + "\"").getBytes(StandardCharsets.UTF_8), scriptFile)
scriptFile.setExecutable(true)
val proc = Runtime.getRuntime().exec(Array(scriptFile.getAbsolutePath()))