aboutsummaryrefslogtreecommitdiff
path: root/unsafe
diff options
context:
space:
mode:
authorWenchen Fan <cloud0fan@outlook.com>2015-07-13 00:49:39 -0700
committerReynold Xin <rxin@databricks.com>2015-07-13 00:49:39 -0700
commit6b89943834a8d9d5d0ecfd97efcc10056d08532a (patch)
tree7383eb5ef241c044e01393cccedc8fdf5fb94e48 /unsafe
parent92540d22e45f9300f413f520a1770e9f36cfa833 (diff)
downloadspark-6b89943834a8d9d5d0ecfd97efcc10056d08532a.tar.gz
spark-6b89943834a8d9d5d0ecfd97efcc10056d08532a.tar.bz2
spark-6b89943834a8d9d5d0ecfd97efcc10056d08532a.zip
[SPARK-8944][SQL] Support casting between IntervalType and StringType
Author: Wenchen Fan <cloud0fan@outlook.com> Closes #7355 from cloud-fan/fromString and squashes the following commits: 3bbb9d6 [Wenchen Fan] fix code gen 7dab957 [Wenchen Fan] naming fix 0fbbe19 [Wenchen Fan] address comments ac1f3d1 [Wenchen Fan] Support casting between IntervalType and StringType
Diffstat (limited to 'unsafe')
-rw-r--r--unsafe/src/main/java/org/apache/spark/unsafe/types/Interval.java48
-rw-r--r--unsafe/src/test/java/org/apache/spark/unsafe/types/IntervalSuite.java46
2 files changed, 94 insertions, 0 deletions
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/types/Interval.java b/unsafe/src/main/java/org/apache/spark/unsafe/types/Interval.java
index 0af982d484..eb7475e9df 100644
--- a/unsafe/src/main/java/org/apache/spark/unsafe/types/Interval.java
+++ b/unsafe/src/main/java/org/apache/spark/unsafe/types/Interval.java
@@ -18,6 +18,8 @@
package org.apache.spark.unsafe.types;
import java.io.Serializable;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
/**
* The internal representation of interval type.
@@ -30,6 +32,52 @@ public final class Interval implements Serializable {
public static final long MICROS_PER_DAY = MICROS_PER_HOUR * 24;
public static final long MICROS_PER_WEEK = MICROS_PER_DAY * 7;
+ /**
+ * A function to generate regex which matches interval string's unit part like "3 years".
+ *
+ * First, we can leave out some units in interval string, and we only care about the value of
+ * unit, so here we use non-capturing group to wrap the actual regex.
+ * At the beginning of the actual regex, we should match spaces before the unit part.
+ * Next is the number part, starts with an optional "-" to represent negative value. We use
+ * capturing group to wrap this part as we need the value later.
+ * Finally is the unit name, ends with an optional "s".
+ */
+ private static String unitRegex(String unit) {
+ return "(?:\\s+(-?\\d+)\\s+" + unit + "s?)?";
+ }
+
+ private static Pattern p = Pattern.compile("interval" + unitRegex("year") + unitRegex("month") +
+ unitRegex("week") + unitRegex("day") + unitRegex("hour") + unitRegex("minute") +
+ unitRegex("second") + unitRegex("millisecond") + unitRegex("microsecond"));
+
+ private static long toLong(String s) {
+ if (s == null) {
+ return 0;
+ } else {
+ return Long.valueOf(s);
+ }
+ }
+
+ public static Interval fromString(String s) {
+ if (s == null) {
+ return null;
+ }
+ Matcher m = p.matcher(s);
+ if (!m.matches() || s.equals("interval")) {
+ return null;
+ } else {
+ long months = toLong(m.group(1)) * 12 + toLong(m.group(2));
+ long microseconds = toLong(m.group(3)) * MICROS_PER_WEEK;
+ microseconds += toLong(m.group(4)) * MICROS_PER_DAY;
+ microseconds += toLong(m.group(5)) * MICROS_PER_HOUR;
+ microseconds += toLong(m.group(6)) * MICROS_PER_MINUTE;
+ microseconds += toLong(m.group(7)) * MICROS_PER_SECOND;
+ microseconds += toLong(m.group(8)) * MICROS_PER_MILLI;
+ microseconds += toLong(m.group(9));
+ return new Interval((int) months, microseconds);
+ }
+ }
+
public final int months;
public final long microseconds;
diff --git a/unsafe/src/test/java/org/apache/spark/unsafe/types/IntervalSuite.java b/unsafe/src/test/java/org/apache/spark/unsafe/types/IntervalSuite.java
index 0f4f38b2b0..44a949a371 100644
--- a/unsafe/src/test/java/org/apache/spark/unsafe/types/IntervalSuite.java
+++ b/unsafe/src/test/java/org/apache/spark/unsafe/types/IntervalSuite.java
@@ -56,4 +56,50 @@ public class IntervalSuite {
i = new Interval(34, 3 * MICROS_PER_WEEK + 13 * MICROS_PER_HOUR + 123);
assertEquals(i.toString(), "interval 2 years 10 months 3 weeks 13 hours 123 microseconds");
}
+
+ @Test
+ public void fromStringTest() {
+ testSingleUnit("year", 3, 36, 0);
+ testSingleUnit("month", 3, 3, 0);
+ testSingleUnit("week", 3, 0, 3 * MICROS_PER_WEEK);
+ testSingleUnit("day", 3, 0, 3 * MICROS_PER_DAY);
+ testSingleUnit("hour", 3, 0, 3 * MICROS_PER_HOUR);
+ testSingleUnit("minute", 3, 0, 3 * MICROS_PER_MINUTE);
+ testSingleUnit("second", 3, 0, 3 * MICROS_PER_SECOND);
+ testSingleUnit("millisecond", 3, 0, 3 * MICROS_PER_MILLI);
+ testSingleUnit("microsecond", 3, 0, 3);
+
+ String input;
+
+ input = "interval -5 years 23 month";
+ Interval result = new Interval(-5 * 12 + 23, 0);
+ assertEquals(Interval.fromString(input), result);
+
+ // Error cases
+ input = "interval 3month 1 hour";
+ assertEquals(Interval.fromString(input), null);
+
+ input = "interval 3 moth 1 hour";
+ assertEquals(Interval.fromString(input), null);
+
+ input = "interval";
+ assertEquals(Interval.fromString(input), null);
+
+ input = "int";
+ assertEquals(Interval.fromString(input), null);
+
+ input = "";
+ assertEquals(Interval.fromString(input), null);
+
+ input = null;
+ assertEquals(Interval.fromString(input), null);
+ }
+
+ private void testSingleUnit(String unit, int number, int months, long microseconds) {
+ String input1 = "interval " + number + " " + unit;
+ String input2 = "interval " + number + " " + unit + "s";
+ Interval result = new Interval(months, microseconds);
+ assertEquals(Interval.fromString(input1), result);
+ assertEquals(Interval.fromString(input2), result);
+ }
}