aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorMridul Muralidharan <mridulm80@apache.org>2014-04-24 20:48:33 -0700
committerMatei Zaharia <matei@databricks.com>2014-04-24 20:48:33 -0700
commit968c0187a12f5ae4a696c02c1ff088e998ed7edd (patch)
treea08997fe5f5debfaae4b55770cee37d9d53c739c /sql
parentd5c6ae6cc3305b9aa3185486b5b6ba0a6e5aca90 (diff)
downloadspark-968c0187a12f5ae4a696c02c1ff088e998ed7edd.tar.gz
spark-968c0187a12f5ae4a696c02c1ff088e998ed7edd.tar.bz2
spark-968c0187a12f5ae4a696c02c1ff088e998ed7edd.zip
SPARK-1586 Windows build fixes
Unfortunately, this is not exhaustive - particularly hive tests still fail due to path issues. Author: Mridul Muralidharan <mridulm80@apache.org> This patch had conflicts when merged, resolved by Committer: Matei Zaharia <matei@databricks.com> Closes #505 from mridulm/windows_fixes and squashes the following commits: ef12283 [Mridul Muralidharan] Move to org.apache.commons.lang3 for StringEscapeUtils. Earlier version was buggy appparently cdae406 [Mridul Muralidharan] Remove leaked changes from > 2G fix branch 3267f4b [Mridul Muralidharan] Fix build failures 35b277a [Mridul Muralidharan] Fix Scalastyle failures bc69d14 [Mridul Muralidharan] Change from hardcoded path separator 10c4d78 [Mridul Muralidharan] Use explicit encoding while using getBytes 1337abd [Mridul Muralidharan] fix classpath while running in windows
Diffstat (limited to 'sql')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnType.scala6
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/columnar/ColumnTypeSuite.scala44
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/ScriptTransformation.scala2
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala9
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/BigDataBenchmarkSuite.scala2
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala5
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala6
7 files changed, 55 insertions, 19 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnType.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnType.scala
index 5be76890af..4cd52d8288 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnType.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnType.scala
@@ -200,10 +200,10 @@ private[sql] object SHORT extends NativeColumnType(ShortType, 6, 2) {
}
private[sql] object STRING extends NativeColumnType(StringType, 7, 8) {
- override def actualSize(v: String): Int = v.getBytes.length + 4
+ override def actualSize(v: String): Int = v.getBytes("utf-8").length + 4
override def append(v: String, buffer: ByteBuffer) {
- val stringBytes = v.getBytes()
+ val stringBytes = v.getBytes("utf-8")
buffer.putInt(stringBytes.length).put(stringBytes, 0, stringBytes.length)
}
@@ -211,7 +211,7 @@ private[sql] object STRING extends NativeColumnType(StringType, 7, 8) {
val length = buffer.getInt()
val stringBytes = new Array[Byte](length)
buffer.get(stringBytes, 0, length)
- new String(stringBytes)
+ new String(stringBytes, "utf-8")
}
override def setField(row: MutableRow, ordinal: Int, value: String) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/ColumnTypeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/ColumnTypeSuite.scala
index 1d3608ed2d..325173cf95 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/columnar/ColumnTypeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/columnar/ColumnTypeSuite.scala
@@ -58,7 +58,7 @@ class ColumnTypeSuite extends FunSuite {
checkActualSize(DOUBLE, Double.MaxValue, 8)
checkActualSize(FLOAT, Float.MaxValue, 4)
checkActualSize(BOOLEAN, true, 1)
- checkActualSize(STRING, "hello", 4 + 5)
+ checkActualSize(STRING, "hello", 4 + "hello".getBytes("utf-8").length)
val binary = Array.fill[Byte](4)(0: Byte)
checkActualSize(BINARY, binary, 4 + 4)
@@ -91,14 +91,16 @@ class ColumnTypeSuite extends FunSuite {
testNativeColumnType[StringType.type](
STRING,
(buffer: ByteBuffer, string: String) => {
- val bytes = string.getBytes()
- buffer.putInt(bytes.length).put(string.getBytes)
+
+ val bytes = string.getBytes("utf-8")
+ buffer.putInt(bytes.length)
+ buffer.put(bytes)
},
(buffer: ByteBuffer) => {
val length = buffer.getInt()
val bytes = new Array[Byte](length)
- buffer.get(bytes, 0, length)
- new String(bytes)
+ buffer.get(bytes)
+ new String(bytes, "utf-8")
})
testColumnType[BinaryType.type, Array[Byte]](
@@ -161,9 +163,13 @@ class ColumnTypeSuite extends FunSuite {
buffer.rewind()
seq.foreach { expected =>
+ println("buffer = " + buffer + ", expected = " + expected)
+ val extracted = columnType.extract(buffer)
assert(
- expected === columnType.extract(buffer),
- "Extracted value didn't equal to the original one")
+ expected === extracted,
+ "Extracted value didn't equal to the original one. " +
+ hexDump(expected) + " != " + hexDump(extracted) +
+ ", buffer = " + dumpBuffer(buffer.duplicate().rewind().asInstanceOf[ByteBuffer]))
}
}
@@ -179,4 +185,28 @@ class ColumnTypeSuite extends FunSuite {
}
}
}
+
+ private def hexDump(value: Any): String = {
+ if (value.isInstanceOf[String]) {
+ val sb = new StringBuilder()
+ for (ch <- value.asInstanceOf[String].toCharArray) {
+ sb.append(Integer.toHexString(ch & 0xffff)).append(' ')
+ }
+ if (! sb.isEmpty) sb.setLength(sb.length - 1)
+ sb.toString()
+ } else {
+ // for now ..
+ hexDump(value.toString)
+ }
+ }
+
+ private def dumpBuffer(buff: ByteBuffer): Any = {
+ val sb = new StringBuilder()
+ while (buff.hasRemaining) {
+ val b = buff.get()
+ sb.append(Integer.toHexString(b & 0xff)).append(' ')
+ }
+ if (! sb.isEmpty) sb.setLength(sb.length - 1)
+ sb.toString()
+ }
}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/ScriptTransformation.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/ScriptTransformation.scala
index 610fa9cb84..8258ee5fef 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/ScriptTransformation.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/ScriptTransformation.scala
@@ -71,7 +71,7 @@ case class ScriptTransformation(
iter
.map(outputProjection)
// TODO: Use SerDe
- .map(_.mkString("", "\t", "\n").getBytes).foreach(outputStream.write)
+ .map(_.mkString("", "\t", "\n").getBytes("utf-8")).foreach(outputStream.write)
outputStream.close()
readerThread.join()
outputLines.toIterator
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala
index 74110ee27b..3ad66a3d7f 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala
@@ -100,14 +100,15 @@ class TestHiveContext(sc: SparkContext) extends LocalHiveContext(sc) {
hiveFilesTemp.delete()
hiveFilesTemp.mkdir()
- val inRepoTests = if (System.getProperty("user.dir").endsWith("sql/hive")) {
- new File("src/test/resources/")
+ val inRepoTests = if (System.getProperty("user.dir").endsWith("sql" + File.separator + "hive")) {
+ new File("src" + File.separator + "test" + File.separator + "resources" + File.separator)
} else {
- new File("sql/hive/src/test/resources")
+ new File("sql" + File.separator + "hive" + File.separator + "src" + File.separator + "test" +
+ File.separator + "resources")
}
def getHiveFile(path: String): File = {
- val stripped = path.replaceAll("""\.\.\/""", "")
+ val stripped = path.replaceAll("""\.\.\/""", "").replace('/', File.separatorChar)
hiveDevHome
.map(new File(_, stripped))
.filter(_.exists)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/BigDataBenchmarkSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/BigDataBenchmarkSuite.scala
index 9b9a823b6e..42a82c1fbf 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/BigDataBenchmarkSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/BigDataBenchmarkSuite.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.hive.test.TestHive._
* https://amplab.cs.berkeley.edu/benchmark/
*/
class BigDataBenchmarkSuite extends HiveComparisonTest {
- val testDataDirectory = new File("target/big-data-benchmark-testdata")
+ val testDataDirectory = new File("target" + File.separator + "big-data-benchmark-testdata")
val testTables = Seq(
TestTable(
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
index ea17e6e93b..edff38b901 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
@@ -78,7 +78,8 @@ abstract class HiveComparisonTest
.map(name => new File(targetDir, s"$suiteName.$name"))
/** The local directory with cached golden answer will be stored. */
- protected val answerCache = new File("src/test/resources/golden")
+ protected val answerCache = new File("src" + File.separator + "test" +
+ File.separator + "resources" + File.separator + "golden")
if (!answerCache.exists) {
answerCache.mkdir()
}
@@ -120,7 +121,7 @@ abstract class HiveComparisonTest
protected val cacheDigest = java.security.MessageDigest.getInstance("MD5")
protected def getMd5(str: String): String = {
val digest = java.security.MessageDigest.getInstance("MD5")
- digest.update(str.getBytes)
+ digest.update(str.getBytes("utf-8"))
new java.math.BigInteger(1, digest.digest).toString(16)
}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index dfe88b960b..0bb76f31c3 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -17,6 +17,8 @@
package org.apache.spark.sql.hive.execution
+import java.io.File
+
import org.scalatest.BeforeAndAfter
import org.apache.spark.sql.hive.test.TestHive
@@ -26,7 +28,9 @@ import org.apache.spark.sql.hive.test.TestHive
*/
class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
// TODO: bundle in jar files... get from classpath
- lazy val hiveQueryDir = TestHive.getHiveFile("ql/src/test/queries/clientpositive")
+ lazy val hiveQueryDir = TestHive.getHiveFile("ql" + File.separator + "src" +
+ File.separator + "test" + File.separator + "queries" + File.separator + "clientpositive")
+
def testCases = hiveQueryDir.listFiles.map(f => f.getName.stripSuffix(".q") -> f)
override def beforeAll() {