diff options
Diffstat (limited to 'sql')
7 files changed, 55 insertions, 19 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnType.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnType.scala index 5be76890af..4cd52d8288 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnType.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnType.scala @@ -200,10 +200,10 @@ private[sql] object SHORT extends NativeColumnType(ShortType, 6, 2) { } private[sql] object STRING extends NativeColumnType(StringType, 7, 8) { - override def actualSize(v: String): Int = v.getBytes.length + 4 + override def actualSize(v: String): Int = v.getBytes("utf-8").length + 4 override def append(v: String, buffer: ByteBuffer) { - val stringBytes = v.getBytes() + val stringBytes = v.getBytes("utf-8") buffer.putInt(stringBytes.length).put(stringBytes, 0, stringBytes.length) } @@ -211,7 +211,7 @@ private[sql] object STRING extends NativeColumnType(StringType, 7, 8) { val length = buffer.getInt() val stringBytes = new Array[Byte](length) buffer.get(stringBytes, 0, length) - new String(stringBytes) + new String(stringBytes, "utf-8") } override def setField(row: MutableRow, ordinal: Int, value: String) { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/ColumnTypeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/ColumnTypeSuite.scala index 1d3608ed2d..325173cf95 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/columnar/ColumnTypeSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/columnar/ColumnTypeSuite.scala @@ -58,7 +58,7 @@ class ColumnTypeSuite extends FunSuite { checkActualSize(DOUBLE, Double.MaxValue, 8) checkActualSize(FLOAT, Float.MaxValue, 4) checkActualSize(BOOLEAN, true, 1) - checkActualSize(STRING, "hello", 4 + 5) + checkActualSize(STRING, "hello", 4 + "hello".getBytes("utf-8").length) val binary = Array.fill[Byte](4)(0: Byte) checkActualSize(BINARY, binary, 4 + 4) @@ -91,14 +91,16 @@ class ColumnTypeSuite extends FunSuite { testNativeColumnType[StringType.type]( STRING, (buffer: ByteBuffer, string: String) => { - val bytes = string.getBytes() - buffer.putInt(bytes.length).put(string.getBytes) + + val bytes = string.getBytes("utf-8") + buffer.putInt(bytes.length) + buffer.put(bytes) }, (buffer: ByteBuffer) => { val length = buffer.getInt() val bytes = new Array[Byte](length) - buffer.get(bytes, 0, length) - new String(bytes) + buffer.get(bytes) + new String(bytes, "utf-8") }) testColumnType[BinaryType.type, Array[Byte]]( @@ -161,9 +163,13 @@ class ColumnTypeSuite extends FunSuite { buffer.rewind() seq.foreach { expected => + println("buffer = " + buffer + ", expected = " + expected) + val extracted = columnType.extract(buffer) assert( - expected === columnType.extract(buffer), - "Extracted value didn't equal to the original one") + expected === extracted, + "Extracted value didn't equal to the original one. " + + hexDump(expected) + " != " + hexDump(extracted) + + ", buffer = " + dumpBuffer(buffer.duplicate().rewind().asInstanceOf[ByteBuffer])) } } @@ -179,4 +185,28 @@ class ColumnTypeSuite extends FunSuite { } } } + + private def hexDump(value: Any): String = { + if (value.isInstanceOf[String]) { + val sb = new StringBuilder() + for (ch <- value.asInstanceOf[String].toCharArray) { + sb.append(Integer.toHexString(ch & 0xffff)).append(' ') + } + if (! sb.isEmpty) sb.setLength(sb.length - 1) + sb.toString() + } else { + // for now .. + hexDump(value.toString) + } + } + + private def dumpBuffer(buff: ByteBuffer): Any = { + val sb = new StringBuilder() + while (buff.hasRemaining) { + val b = buff.get() + sb.append(Integer.toHexString(b & 0xff)).append(' ') + } + if (! sb.isEmpty) sb.setLength(sb.length - 1) + sb.toString() + } } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/ScriptTransformation.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/ScriptTransformation.scala index 610fa9cb84..8258ee5fef 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/ScriptTransformation.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/ScriptTransformation.scala @@ -71,7 +71,7 @@ case class ScriptTransformation( iter .map(outputProjection) // TODO: Use SerDe - .map(_.mkString("", "\t", "\n").getBytes).foreach(outputStream.write) + .map(_.mkString("", "\t", "\n").getBytes("utf-8")).foreach(outputStream.write) outputStream.close() readerThread.join() outputLines.toIterator diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala index 74110ee27b..3ad66a3d7f 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala @@ -100,14 +100,15 @@ class TestHiveContext(sc: SparkContext) extends LocalHiveContext(sc) { hiveFilesTemp.delete() hiveFilesTemp.mkdir() - val inRepoTests = if (System.getProperty("user.dir").endsWith("sql/hive")) { - new File("src/test/resources/") + val inRepoTests = if (System.getProperty("user.dir").endsWith("sql" + File.separator + "hive")) { + new File("src" + File.separator + "test" + File.separator + "resources" + File.separator) } else { - new File("sql/hive/src/test/resources") + new File("sql" + File.separator + "hive" + File.separator + "src" + File.separator + "test" + + File.separator + "resources") } def getHiveFile(path: String): File = { - val stripped = path.replaceAll("""\.\.\/""", "") + val stripped = path.replaceAll("""\.\.\/""", "").replace('/', File.separatorChar) hiveDevHome .map(new File(_, stripped)) .filter(_.exists) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/BigDataBenchmarkSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/BigDataBenchmarkSuite.scala index 9b9a823b6e..42a82c1fbf 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/BigDataBenchmarkSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/BigDataBenchmarkSuite.scala @@ -26,7 +26,7 @@ import org.apache.spark.sql.hive.test.TestHive._ * https://amplab.cs.berkeley.edu/benchmark/ */ class BigDataBenchmarkSuite extends HiveComparisonTest { - val testDataDirectory = new File("target/big-data-benchmark-testdata") + val testDataDirectory = new File("target" + File.separator + "big-data-benchmark-testdata") val testTables = Seq( TestTable( diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala index ea17e6e93b..edff38b901 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala @@ -78,7 +78,8 @@ abstract class HiveComparisonTest .map(name => new File(targetDir, s"$suiteName.$name")) /** The local directory with cached golden answer will be stored. */ - protected val answerCache = new File("src/test/resources/golden") + protected val answerCache = new File("src" + File.separator + "test" + + File.separator + "resources" + File.separator + "golden") if (!answerCache.exists) { answerCache.mkdir() } @@ -120,7 +121,7 @@ abstract class HiveComparisonTest protected val cacheDigest = java.security.MessageDigest.getInstance("MD5") protected def getMd5(str: String): String = { val digest = java.security.MessageDigest.getInstance("MD5") - digest.update(str.getBytes) + digest.update(str.getBytes("utf-8")) new java.math.BigInteger(1, digest.digest).toString(16) } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala index dfe88b960b..0bb76f31c3 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.hive.execution +import java.io.File + import org.scalatest.BeforeAndAfter import org.apache.spark.sql.hive.test.TestHive @@ -26,7 +28,9 @@ import org.apache.spark.sql.hive.test.TestHive */ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { // TODO: bundle in jar files... get from classpath - lazy val hiveQueryDir = TestHive.getHiveFile("ql/src/test/queries/clientpositive") + lazy val hiveQueryDir = TestHive.getHiveFile("ql" + File.separator + "src" + + File.separator + "test" + File.separator + "queries" + File.separator + "clientpositive") + def testCases = hiveQueryDir.listFiles.map(f => f.getName.stripSuffix(".q") -> f) override def beforeAll() { |