From fc44b694bf5162b3a044768da4627b9969909829 Mon Sep 17 00:00:00 2001 From: wangyang Date: Sun, 22 May 2016 19:30:14 -0700 Subject: [SPARK-15379][SQL] check special invalid date ## What changes were proposed in this pull request? When invalid date string like "2015-02-29 00:00:00" are cast as date or timestamp using spark sql, it used to not return null but another valid date (2015-03-01 in this case). In this pr, invalid date string like "2016-02-29" and "2016-04-31" are returned as null when cast as date or timestamp. ## How was this patch tested? Unit tests are added. (If this patch involves UI changes, please attach a screenshot; otherwise, remove this) Author: wangyang Closes #13169 from wangyang1992/invalid_date. --- .../spark/sql/catalyst/util/DateTimeUtils.scala | 27 ++++++++++++++++++---- .../sql/catalyst/util/DateTimeUtilsSuite.scala | 19 +++++++++++++++ 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index f84c6592c6..e08328a320 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -58,6 +58,7 @@ object DateTimeUtils { final val YearZero = -17999 final val toYearZero = to2001 + 7304850 final val TimeZoneGMT = TimeZone.getTimeZone("GMT") + final val MonthOf31Days = Set(1, 3, 5, 7, 8, 10, 12) @transient lazy val defaultTimeZone = TimeZone.getDefault @@ -333,8 +334,7 @@ object DateTimeUtils { digitsMilli += 1 } - if (!justTime && (segments(0) < 0 || segments(0) > 9999 || segments(1) < 1 || - segments(1) > 12 || segments(2) < 1 || segments(2) > 31)) { + if (!justTime && isInvalidDate(segments(0), segments(1), segments(2))) { return None } @@ -414,10 +414,10 @@ object DateTimeUtils { return None } segments(i) = currentSegmentValue - if (segments(0) < 0 || segments(0) > 9999 || segments(1) < 1 || segments(1) > 12 || - segments(2) < 1 || segments(2) > 31) { + if (isInvalidDate(segments(0), segments(1), segments(2))) { return None } + val c = threadLocalGmtCalendar.get() c.clear() c.set(segments(0), segments(1) - 1, segments(2), 0, 0, 0) @@ -425,6 +425,25 @@ object DateTimeUtils { Some((c.getTimeInMillis / MILLIS_PER_DAY).toInt) } + /** + * Return true if the date is invalid. + */ + private def isInvalidDate(year: Int, month: Int, day: Int): Boolean = { + if (year < 0 || year > 9999 || month < 1 || month > 12 || day < 1 || day > 31) { + return true + } + if (month == 2) { + if (isLeapYear(year) && day > 29) { + return true + } else if (!isLeapYear(year) && day > 28) { + return true + } + } else if (!MonthOf31Days.contains(month) && day > 30) { + return true + } + false + } + /** * Returns the microseconds since year zero (-17999) from microseconds since epoch. */ diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index 6745b4b6c3..28e30c2219 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -353,6 +353,25 @@ class DateTimeUtilsSuite extends SparkFunSuite { c.getTimeInMillis * 1000 + 123456) } + test("SPARK-15379: special invalid date string") { + // Test stringToDate + assert(stringToDate( + UTF8String.fromString("2015-02-29 00:00:00")).isEmpty) + assert(stringToDate( + UTF8String.fromString("2015-04-31 00:00:00")).isEmpty) + assert(stringToDate(UTF8String.fromString("2015-02-29")).isEmpty) + assert(stringToDate(UTF8String.fromString("2015-04-31")).isEmpty) + + + // Test stringToTimestamp + assert(stringToTimestamp( + UTF8String.fromString("2015-02-29 00:00:00")).isEmpty) + assert(stringToTimestamp( + UTF8String.fromString("2015-04-31 00:00:00")).isEmpty) + assert(stringToTimestamp(UTF8String.fromString("2015-02-29")).isEmpty) + assert(stringToTimestamp(UTF8String.fromString("2015-04-31")).isEmpty) + } + test("hours") { val c = Calendar.getInstance() c.set(2015, 2, 18, 13, 2, 11) -- cgit v1.2.3