diff options
author | Wenchen Fan <cloud0fan@outlook.com> | 2015-07-13 00:49:39 -0700 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2015-07-13 00:49:39 -0700 |
commit | 6b89943834a8d9d5d0ecfd97efcc10056d08532a (patch) | |
tree | 7383eb5ef241c044e01393cccedc8fdf5fb94e48 /unsafe/src | |
parent | 92540d22e45f9300f413f520a1770e9f36cfa833 (diff) | |
download | spark-6b89943834a8d9d5d0ecfd97efcc10056d08532a.tar.gz spark-6b89943834a8d9d5d0ecfd97efcc10056d08532a.tar.bz2 spark-6b89943834a8d9d5d0ecfd97efcc10056d08532a.zip |
[SPARK-8944][SQL] Support casting between IntervalType and StringType
Author: Wenchen Fan <cloud0fan@outlook.com>
Closes #7355 from cloud-fan/fromString and squashes the following commits:
3bbb9d6 [Wenchen Fan] fix code gen
7dab957 [Wenchen Fan] naming fix
0fbbe19 [Wenchen Fan] address comments
ac1f3d1 [Wenchen Fan] Support casting between IntervalType and StringType
Diffstat (limited to 'unsafe/src')
-rw-r--r-- | unsafe/src/main/java/org/apache/spark/unsafe/types/Interval.java | 48 | ||||
-rw-r--r-- | unsafe/src/test/java/org/apache/spark/unsafe/types/IntervalSuite.java | 46 |
2 files changed, 94 insertions, 0 deletions
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/types/Interval.java b/unsafe/src/main/java/org/apache/spark/unsafe/types/Interval.java index 0af982d484..eb7475e9df 100644 --- a/unsafe/src/main/java/org/apache/spark/unsafe/types/Interval.java +++ b/unsafe/src/main/java/org/apache/spark/unsafe/types/Interval.java @@ -18,6 +18,8 @@ package org.apache.spark.unsafe.types; import java.io.Serializable; +import java.util.regex.Matcher; +import java.util.regex.Pattern; /** * The internal representation of interval type. @@ -30,6 +32,52 @@ public final class Interval implements Serializable { public static final long MICROS_PER_DAY = MICROS_PER_HOUR * 24; public static final long MICROS_PER_WEEK = MICROS_PER_DAY * 7; + /** + * A function to generate regex which matches interval string's unit part like "3 years". + * + * First, we can leave out some units in interval string, and we only care about the value of + * unit, so here we use non-capturing group to wrap the actual regex. + * At the beginning of the actual regex, we should match spaces before the unit part. + * Next is the number part, starts with an optional "-" to represent negative value. We use + * capturing group to wrap this part as we need the value later. + * Finally is the unit name, ends with an optional "s". + */ + private static String unitRegex(String unit) { + return "(?:\\s+(-?\\d+)\\s+" + unit + "s?)?"; + } + + private static Pattern p = Pattern.compile("interval" + unitRegex("year") + unitRegex("month") + + unitRegex("week") + unitRegex("day") + unitRegex("hour") + unitRegex("minute") + + unitRegex("second") + unitRegex("millisecond") + unitRegex("microsecond")); + + private static long toLong(String s) { + if (s == null) { + return 0; + } else { + return Long.valueOf(s); + } + } + + public static Interval fromString(String s) { + if (s == null) { + return null; + } + Matcher m = p.matcher(s); + if (!m.matches() || s.equals("interval")) { + return null; + } else { + long months = toLong(m.group(1)) * 12 + toLong(m.group(2)); + long microseconds = toLong(m.group(3)) * MICROS_PER_WEEK; + microseconds += toLong(m.group(4)) * MICROS_PER_DAY; + microseconds += toLong(m.group(5)) * MICROS_PER_HOUR; + microseconds += toLong(m.group(6)) * MICROS_PER_MINUTE; + microseconds += toLong(m.group(7)) * MICROS_PER_SECOND; + microseconds += toLong(m.group(8)) * MICROS_PER_MILLI; + microseconds += toLong(m.group(9)); + return new Interval((int) months, microseconds); + } + } + public final int months; public final long microseconds; diff --git a/unsafe/src/test/java/org/apache/spark/unsafe/types/IntervalSuite.java b/unsafe/src/test/java/org/apache/spark/unsafe/types/IntervalSuite.java index 0f4f38b2b0..44a949a371 100644 --- a/unsafe/src/test/java/org/apache/spark/unsafe/types/IntervalSuite.java +++ b/unsafe/src/test/java/org/apache/spark/unsafe/types/IntervalSuite.java @@ -56,4 +56,50 @@ public class IntervalSuite { i = new Interval(34, 3 * MICROS_PER_WEEK + 13 * MICROS_PER_HOUR + 123); assertEquals(i.toString(), "interval 2 years 10 months 3 weeks 13 hours 123 microseconds"); } + + @Test + public void fromStringTest() { + testSingleUnit("year", 3, 36, 0); + testSingleUnit("month", 3, 3, 0); + testSingleUnit("week", 3, 0, 3 * MICROS_PER_WEEK); + testSingleUnit("day", 3, 0, 3 * MICROS_PER_DAY); + testSingleUnit("hour", 3, 0, 3 * MICROS_PER_HOUR); + testSingleUnit("minute", 3, 0, 3 * MICROS_PER_MINUTE); + testSingleUnit("second", 3, 0, 3 * MICROS_PER_SECOND); + testSingleUnit("millisecond", 3, 0, 3 * MICROS_PER_MILLI); + testSingleUnit("microsecond", 3, 0, 3); + + String input; + + input = "interval -5 years 23 month"; + Interval result = new Interval(-5 * 12 + 23, 0); + assertEquals(Interval.fromString(input), result); + + // Error cases + input = "interval 3month 1 hour"; + assertEquals(Interval.fromString(input), null); + + input = "interval 3 moth 1 hour"; + assertEquals(Interval.fromString(input), null); + + input = "interval"; + assertEquals(Interval.fromString(input), null); + + input = "int"; + assertEquals(Interval.fromString(input), null); + + input = ""; + assertEquals(Interval.fromString(input), null); + + input = null; + assertEquals(Interval.fromString(input), null); + } + + private void testSingleUnit(String unit, int number, int months, long microseconds) { + String input1 = "interval " + number + " " + unit; + String input2 = "interval " + number + " " + unit + "s"; + Interval result = new Interval(months, microseconds); + assertEquals(Interval.fromString(input1), result); + assertEquals(Interval.fromString(input2), result); + } } |