aboutsummaryrefslogtreecommitdiff
path: root/unsafe
diff options
context:
space:
mode:
authorYijie Shen <henry.yijieshen@gmail.com>2015-08-08 11:01:25 -0700
committerReynold Xin <rxin@databricks.com>2015-08-08 11:01:25 -0700
commit23695f1d2d7ef9f3ea92cebcd96b1cf0e8904eb4 (patch)
tree3925d23669ad452b87edd8fd4b2c25572f523f89 /unsafe
parentac507a03c3371cd5404ca195ee0ba0306badfc23 (diff)
downloadspark-23695f1d2d7ef9f3ea92cebcd96b1cf0e8904eb4.tar.gz
spark-23695f1d2d7ef9f3ea92cebcd96b1cf0e8904eb4.tar.bz2
spark-23695f1d2d7ef9f3ea92cebcd96b1cf0e8904eb4.zip
[SPARK-9728][SQL]Support CalendarIntervalType in HiveQL
This PR enables converting interval term in HiveQL to CalendarInterval Literal. JIRA: https://issues.apache.org/jira/browse/SPARK-9728 Author: Yijie Shen <henry.yijieshen@gmail.com> Closes #8034 from yjshen/interval_hiveql and squashes the following commits: 7fe9a5e [Yijie Shen] declare throw exception and add unit test fce7795 [Yijie Shen] convert hiveql interval term into CalendarInterval literal
Diffstat (limited to 'unsafe')
-rw-r--r--unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java156
-rw-r--r--unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java91
2 files changed, 247 insertions, 0 deletions
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java b/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java
index 92a5e4f86f..30e1758076 100644
--- a/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java
+++ b/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java
@@ -50,6 +50,14 @@ public final class CalendarInterval implements Serializable {
unitRegex("week") + unitRegex("day") + unitRegex("hour") + unitRegex("minute") +
unitRegex("second") + unitRegex("millisecond") + unitRegex("microsecond"));
+ private static Pattern yearMonthPattern =
+ Pattern.compile("^(?:['|\"])?([+|-])?(\\d+)-(\\d+)(?:['|\"])?$");
+
+ private static Pattern dayTimePattern =
+ Pattern.compile("^(?:['|\"])?([+|-])?(\\d+) (\\d+):(\\d+):(\\d+)(\\.(\\d+))?(?:['|\"])?$");
+
+ private static Pattern quoteTrimPattern = Pattern.compile("^(?:['|\"])?(.*?)(?:['|\"])?$");
+
private static long toLong(String s) {
if (s == null) {
return 0;
@@ -79,6 +87,154 @@ public final class CalendarInterval implements Serializable {
}
}
+ public static long toLongWithRange(String fieldName,
+ String s, long minValue, long maxValue) throws IllegalArgumentException {
+ long result = 0;
+ if (s != null) {
+ result = Long.valueOf(s);
+ if (result < minValue || result > maxValue) {
+ throw new IllegalArgumentException(String.format("%s %d outside range [%d, %d]",
+ fieldName, result, minValue, maxValue));
+ }
+ }
+ return result;
+ }
+
+ /**
+ * Parse YearMonth string in form: [-]YYYY-MM
+ *
+ * adapted from HiveIntervalYearMonth.valueOf
+ */
+ public static CalendarInterval fromYearMonthString(String s) throws IllegalArgumentException {
+ CalendarInterval result = null;
+ if (s == null) {
+ throw new IllegalArgumentException("Interval year-month string was null");
+ }
+ s = s.trim();
+ Matcher m = yearMonthPattern.matcher(s);
+ if (!m.matches()) {
+ throw new IllegalArgumentException(
+ "Interval string does not match year-month format of 'y-m': " + s);
+ } else {
+ try {
+ int sign = m.group(1) != null && m.group(1).equals("-") ? -1 : 1;
+ int years = (int) toLongWithRange("year", m.group(2), 0, Integer.MAX_VALUE);
+ int months = (int) toLongWithRange("month", m.group(3), 0, 11);
+ result = new CalendarInterval(sign * (years * 12 + months), 0);
+ } catch (Exception e) {
+ throw new IllegalArgumentException(
+ "Error parsing interval year-month string: " + e.getMessage(), e);
+ }
+ }
+ return result;
+ }
+
+ /**
+ * Parse dayTime string in form: [-]d HH:mm:ss.nnnnnnnnn
+ *
+ * adapted from HiveIntervalDayTime.valueOf
+ */
+ public static CalendarInterval fromDayTimeString(String s) throws IllegalArgumentException {
+ CalendarInterval result = null;
+ if (s == null) {
+ throw new IllegalArgumentException("Interval day-time string was null");
+ }
+ s = s.trim();
+ Matcher m = dayTimePattern.matcher(s);
+ if (!m.matches()) {
+ throw new IllegalArgumentException(
+ "Interval string does not match day-time format of 'd h:m:s.n': " + s);
+ } else {
+ try {
+ int sign = m.group(1) != null && m.group(1).equals("-") ? -1 : 1;
+ long days = toLongWithRange("day", m.group(2), 0, Integer.MAX_VALUE);
+ long hours = toLongWithRange("hour", m.group(3), 0, 23);
+ long minutes = toLongWithRange("minute", m.group(4), 0, 59);
+ long seconds = toLongWithRange("second", m.group(5), 0, 59);
+ // Hive allow nanosecond precision interval
+ long nanos = toLongWithRange("nanosecond", m.group(7), 0L, 999999999L);
+ result = new CalendarInterval(0, sign * (
+ days * MICROS_PER_DAY + hours * MICROS_PER_HOUR + minutes * MICROS_PER_MINUTE +
+ seconds * MICROS_PER_SECOND + nanos / 1000L));
+ } catch (Exception e) {
+ throw new IllegalArgumentException(
+ "Error parsing interval day-time string: " + e.getMessage(), e);
+ }
+ }
+ return result;
+ }
+
+ public static CalendarInterval fromSingleUnitString(String unit, String s)
+ throws IllegalArgumentException {
+
+ CalendarInterval result = null;
+ if (s == null) {
+ throw new IllegalArgumentException(String.format("Interval %s string was null", unit));
+ }
+ s = s.trim();
+ Matcher m = quoteTrimPattern.matcher(s);
+ if (!m.matches()) {
+ throw new IllegalArgumentException(
+ "Interval string does not match day-time format of 'd h:m:s.n': " + s);
+ } else {
+ try {
+ if (unit.equals("year")) {
+ int year = (int) toLongWithRange("year", m.group(1),
+ Integer.MIN_VALUE / 12, Integer.MAX_VALUE / 12);
+ result = new CalendarInterval(year * 12, 0L);
+
+ } else if (unit.equals("month")) {
+ int month = (int) toLongWithRange("month", m.group(1),
+ Integer.MIN_VALUE, Integer.MAX_VALUE);
+ result = new CalendarInterval(month, 0L);
+
+ } else if (unit.equals("day")) {
+ long day = toLongWithRange("day", m.group(1),
+ Long.MIN_VALUE / MICROS_PER_DAY, Long.MAX_VALUE / MICROS_PER_DAY);
+ result = new CalendarInterval(0, day * MICROS_PER_DAY);
+
+ } else if (unit.equals("hour")) {
+ long hour = toLongWithRange("hour", m.group(1),
+ Long.MIN_VALUE / MICROS_PER_HOUR, Long.MAX_VALUE / MICROS_PER_HOUR);
+ result = new CalendarInterval(0, hour * MICROS_PER_HOUR);
+
+ } else if (unit.equals("minute")) {
+ long minute = toLongWithRange("minute", m.group(1),
+ Long.MIN_VALUE / MICROS_PER_MINUTE, Long.MAX_VALUE / MICROS_PER_MINUTE);
+ result = new CalendarInterval(0, minute * MICROS_PER_MINUTE);
+
+ } else if (unit.equals("second")) {
+ long micros = parseSecondNano(m.group(1));
+ result = new CalendarInterval(0, micros);
+ }
+ } catch (Exception e) {
+ throw new IllegalArgumentException("Error parsing interval string: " + e.getMessage(), e);
+ }
+ }
+ return result;
+ }
+
+ /**
+ * Parse second_nano string in ss.nnnnnnnnn format to microseconds
+ */
+ public static long parseSecondNano(String secondNano) throws IllegalArgumentException {
+ String[] parts = secondNano.split("\\.");
+ if (parts.length == 1) {
+ return toLongWithRange("second", parts[0], Long.MIN_VALUE / MICROS_PER_SECOND,
+ Long.MAX_VALUE / MICROS_PER_SECOND) * MICROS_PER_SECOND;
+
+ } else if (parts.length == 2) {
+ long seconds = parts[0].equals("") ? 0L : toLongWithRange("second", parts[0],
+ Long.MIN_VALUE / MICROS_PER_SECOND, Long.MAX_VALUE / MICROS_PER_SECOND);
+ long nanos = toLongWithRange("nanosecond", parts[1], 0L, 999999999L);
+ return seconds * MICROS_PER_SECOND + nanos / 1000L;
+
+ } else {
+ throw new IllegalArgumentException(
+ "Interval string does not match second-nano format of ss.nnnnnnnnn");
+ }
+ }
+
public final int months;
public final long microseconds;
diff --git a/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java b/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java
index 6274b92b47..80d4982c4b 100644
--- a/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java
+++ b/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java
@@ -102,6 +102,97 @@ public class CalendarIntervalSuite {
}
@Test
+ public void fromYearMonthStringTest() {
+ String input;
+ CalendarInterval i;
+
+ input = "99-10";
+ i = new CalendarInterval(99 * 12 + 10, 0L);
+ assertEquals(CalendarInterval.fromYearMonthString(input), i);
+
+ input = "-8-10";
+ i = new CalendarInterval(-8 * 12 - 10, 0L);
+ assertEquals(CalendarInterval.fromYearMonthString(input), i);
+
+ try {
+ input = "99-15";
+ CalendarInterval.fromYearMonthString(input);
+ fail("Expected to throw an exception for the invalid input");
+ } catch (IllegalArgumentException e) {
+ assertTrue(e.getMessage().contains("month 15 outside range"));
+ }
+ }
+
+ @Test
+ public void fromDayTimeStringTest() {
+ String input;
+ CalendarInterval i;
+
+ input = "5 12:40:30.999999999";
+ i = new CalendarInterval(0, 5 * MICROS_PER_DAY + 12 * MICROS_PER_HOUR +
+ 40 * MICROS_PER_MINUTE + 30 * MICROS_PER_SECOND + 999999L);
+ assertEquals(CalendarInterval.fromDayTimeString(input), i);
+
+ input = "10 0:12:0.888";
+ i = new CalendarInterval(0, 10 * MICROS_PER_DAY + 12 * MICROS_PER_MINUTE);
+ assertEquals(CalendarInterval.fromDayTimeString(input), i);
+
+ input = "-3 0:0:0";
+ i = new CalendarInterval(0, -3 * MICROS_PER_DAY);
+ assertEquals(CalendarInterval.fromDayTimeString(input), i);
+
+ try {
+ input = "5 30:12:20";
+ CalendarInterval.fromDayTimeString(input);
+ fail("Expected to throw an exception for the invalid input");
+ } catch (IllegalArgumentException e) {
+ assertTrue(e.getMessage().contains("hour 30 outside range"));
+ }
+
+ try {
+ input = "5 30-12";
+ CalendarInterval.fromDayTimeString(input);
+ fail("Expected to throw an exception for the invalid input");
+ } catch (IllegalArgumentException e) {
+ assertTrue(e.getMessage().contains("not match day-time format"));
+ }
+ }
+
+ @Test
+ public void fromSingleUnitStringTest() {
+ String input;
+ CalendarInterval i;
+
+ input = "12";
+ i = new CalendarInterval(12 * 12, 0L);
+ assertEquals(CalendarInterval.fromSingleUnitString("year", input), i);
+
+ input = "100";
+ i = new CalendarInterval(0, 100 * MICROS_PER_DAY);
+ assertEquals(CalendarInterval.fromSingleUnitString("day", input), i);
+
+ input = "1999.38888";
+ i = new CalendarInterval(0, 1999 * MICROS_PER_SECOND + 38);
+ assertEquals(CalendarInterval.fromSingleUnitString("second", input), i);
+
+ try {
+ input = String.valueOf(Integer.MAX_VALUE);
+ CalendarInterval.fromSingleUnitString("year", input);
+ fail("Expected to throw an exception for the invalid input");
+ } catch (IllegalArgumentException e) {
+ assertTrue(e.getMessage().contains("outside range"));
+ }
+
+ try {
+ input = String.valueOf(Long.MAX_VALUE / MICROS_PER_HOUR + 1);
+ CalendarInterval.fromSingleUnitString("hour", input);
+ fail("Expected to throw an exception for the invalid input");
+ } catch (IllegalArgumentException e) {
+ assertTrue(e.getMessage().contains("outside range"));
+ }
+ }
+
+ @Test
public void addTest() {
String input = "interval 3 month 1 hour";
String input2 = "interval 2 month 100 hour";