aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorwangyang <wangyang@haizhi.com>2016-05-22 19:30:14 -0700
committerWenchen Fan <wenchen@databricks.com>2016-05-22 19:30:14 -0700
commitfc44b694bf5162b3a044768da4627b9969909829 (patch)
tree93dd5a380ed94e07264c9f77cc81e7bc4cee8977
parent3eff65f82d5fc29b21f330966ac763489d217f4d (diff)
downloadspark-fc44b694bf5162b3a044768da4627b9969909829.tar.gz
spark-fc44b694bf5162b3a044768da4627b9969909829.tar.bz2
spark-fc44b694bf5162b3a044768da4627b9969909829.zip
[SPARK-15379][SQL] check special invalid date
## What changes were proposed in this pull request? When invalid date string like "2015-02-29 00:00:00" are cast as date or timestamp using spark sql, it used to not return null but another valid date (2015-03-01 in this case). In this pr, invalid date string like "2016-02-29" and "2016-04-31" are returned as null when cast as date or timestamp. ## How was this patch tested? Unit tests are added. (If this patch involves UI changes, please attach a screenshot; otherwise, remove this) Author: wangyang <wangyang@haizhi.com> Closes #13169 from wangyang1992/invalid_date.
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala27
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala19
2 files changed, 42 insertions, 4 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index f84c6592c6..e08328a320 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -58,6 +58,7 @@ object DateTimeUtils {
final val YearZero = -17999
final val toYearZero = to2001 + 7304850
final val TimeZoneGMT = TimeZone.getTimeZone("GMT")
+ final val MonthOf31Days = Set(1, 3, 5, 7, 8, 10, 12)
@transient lazy val defaultTimeZone = TimeZone.getDefault
@@ -333,8 +334,7 @@ object DateTimeUtils {
digitsMilli += 1
}
- if (!justTime && (segments(0) < 0 || segments(0) > 9999 || segments(1) < 1 ||
- segments(1) > 12 || segments(2) < 1 || segments(2) > 31)) {
+ if (!justTime && isInvalidDate(segments(0), segments(1), segments(2))) {
return None
}
@@ -414,10 +414,10 @@ object DateTimeUtils {
return None
}
segments(i) = currentSegmentValue
- if (segments(0) < 0 || segments(0) > 9999 || segments(1) < 1 || segments(1) > 12 ||
- segments(2) < 1 || segments(2) > 31) {
+ if (isInvalidDate(segments(0), segments(1), segments(2))) {
return None
}
+
val c = threadLocalGmtCalendar.get()
c.clear()
c.set(segments(0), segments(1) - 1, segments(2), 0, 0, 0)
@@ -426,6 +426,25 @@ object DateTimeUtils {
}
/**
+ * Return true if the date is invalid.
+ */
+ private def isInvalidDate(year: Int, month: Int, day: Int): Boolean = {
+ if (year < 0 || year > 9999 || month < 1 || month > 12 || day < 1 || day > 31) {
+ return true
+ }
+ if (month == 2) {
+ if (isLeapYear(year) && day > 29) {
+ return true
+ } else if (!isLeapYear(year) && day > 28) {
+ return true
+ }
+ } else if (!MonthOf31Days.contains(month) && day > 30) {
+ return true
+ }
+ false
+ }
+
+ /**
* Returns the microseconds since year zero (-17999) from microseconds since epoch.
*/
private def absoluteMicroSecond(microsec: SQLTimestamp): SQLTimestamp = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index 6745b4b6c3..28e30c2219 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -353,6 +353,25 @@ class DateTimeUtilsSuite extends SparkFunSuite {
c.getTimeInMillis * 1000 + 123456)
}
+ test("SPARK-15379: special invalid date string") {
+ // Test stringToDate
+ assert(stringToDate(
+ UTF8String.fromString("2015-02-29 00:00:00")).isEmpty)
+ assert(stringToDate(
+ UTF8String.fromString("2015-04-31 00:00:00")).isEmpty)
+ assert(stringToDate(UTF8String.fromString("2015-02-29")).isEmpty)
+ assert(stringToDate(UTF8String.fromString("2015-04-31")).isEmpty)
+
+
+ // Test stringToTimestamp
+ assert(stringToTimestamp(
+ UTF8String.fromString("2015-02-29 00:00:00")).isEmpty)
+ assert(stringToTimestamp(
+ UTF8String.fromString("2015-04-31 00:00:00")).isEmpty)
+ assert(stringToTimestamp(UTF8String.fromString("2015-02-29")).isEmpty)
+ assert(stringToTimestamp(UTF8String.fromString("2015-04-31")).isEmpty)
+ }
+
test("hours") {
val c = Calendar.getInstance()
c.set(2015, 2, 18, 13, 2, 11)