aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHerman van Hovell <hvanhovell@databricks.com>2016-06-09 16:37:18 -0700
committerReynold Xin <rxin@databricks.com>2016-06-09 16:37:18 -0700
commitb0768538e56e5bbda7aaabbe2a0197e30ba5f993 (patch)
tree991917742403387109d764a7c761552b0896bae0
parent6cb71f4733a920d916b91c66bb2a508a21883b16 (diff)
downloadspark-b0768538e56e5bbda7aaabbe2a0197e30ba5f993.tar.gz
spark-b0768538e56e5bbda7aaabbe2a0197e30ba5f993.tar.bz2
spark-b0768538e56e5bbda7aaabbe2a0197e30ba5f993.zip
[SPARK-14321][SQL] Reduce date format cost and string-to-date cost in date functions
## What changes were proposed in this pull request? The current implementations of `UnixTime` and `FromUnixTime` do not cache their parser/formatter as much as they could. This PR resolved this issue. This PR is a take over from https://github.com/apache/spark/pull/13522 and further optimizes the re-use of the parser/formatter. It also fixes the improves handling (catching the actual exception instead of `Throwable`). All credits for this work should go to rajeshbalamohan. This PR closes https://github.com/apache/spark/pull/13522 ## How was this patch tested? Current tests. Author: Herman van Hovell <hvanhovell@databricks.com> Author: Rajesh Balamohan <rbalamohan@apache.org> Closes #13581 from hvanhovell/SPARK-14321.
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala48
1 files changed, 24 insertions, 24 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 69c32f447e..773431dd33 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -399,6 +399,8 @@ abstract class UnixTime extends BinaryExpression with ExpectsInputTypes {
override def nullable: Boolean = true
private lazy val constFormat: UTF8String = right.eval().asInstanceOf[UTF8String]
+ private lazy val formatter: SimpleDateFormat =
+ Try(new SimpleDateFormat(constFormat.toString)).getOrElse(null)
override def eval(input: InternalRow): Any = {
val t = left.eval(input)
@@ -411,11 +413,11 @@ abstract class UnixTime extends BinaryExpression with ExpectsInputTypes {
case TimestampType =>
t.asInstanceOf[Long] / 1000000L
case StringType if right.foldable =>
- if (constFormat != null) {
- Try(new SimpleDateFormat(constFormat.toString).parse(
- t.asInstanceOf[UTF8String].toString).getTime / 1000L).getOrElse(null)
- } else {
+ if (constFormat == null || formatter == null) {
null
+ } else {
+ Try(formatter.parse(
+ t.asInstanceOf[UTF8String].toString).getTime / 1000L).getOrElse(null)
}
case StringType =>
val f = right.eval(input)
@@ -434,13 +436,10 @@ abstract class UnixTime extends BinaryExpression with ExpectsInputTypes {
left.dataType match {
case StringType if right.foldable =>
val sdf = classOf[SimpleDateFormat].getName
- val fString = if (constFormat == null) null else constFormat.toString
- val formatter = ctx.freshName("formatter")
- if (fString == null) {
- ev.copy(code = s"""
- boolean ${ev.isNull} = true;
- ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};""")
+ if (formatter == null) {
+ ExprCode("", "true", ctx.defaultValue(dataType))
} else {
+ val formatterName = ctx.addReferenceObj("formatter", formatter, sdf)
val eval1 = left.genCode(ctx)
ev.copy(code = s"""
${eval1.code}
@@ -448,10 +447,8 @@ abstract class UnixTime extends BinaryExpression with ExpectsInputTypes {
${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};
if (!${ev.isNull}) {
try {
- $sdf $formatter = new $sdf("$fString");
- ${ev.value} =
- $formatter.parse(${eval1.value}.toString()).getTime() / 1000L;
- } catch (java.lang.Throwable e) {
+ ${ev.value} = $formatterName.parse(${eval1.value}.toString()).getTime() / 1000L;
+ } catch (java.text.ParseException e) {
${ev.isNull} = true;
}
}""")
@@ -463,7 +460,9 @@ abstract class UnixTime extends BinaryExpression with ExpectsInputTypes {
try {
${ev.value} =
(new $sdf($format.toString())).parse($string.toString()).getTime() / 1000L;
- } catch (java.lang.Throwable e) {
+ } catch (java.lang.IllegalArgumentException e) {
+ ${ev.isNull} = true;
+ } catch (java.text.ParseException e) {
${ev.isNull} = true;
}
"""
@@ -520,6 +519,8 @@ case class FromUnixTime(sec: Expression, format: Expression)
override def inputTypes: Seq[AbstractDataType] = Seq(LongType, StringType)
private lazy val constFormat: UTF8String = right.eval().asInstanceOf[UTF8String]
+ private lazy val formatter: SimpleDateFormat =
+ Try(new SimpleDateFormat(constFormat.toString)).getOrElse(null)
override def eval(input: InternalRow): Any = {
val time = left.eval(input)
@@ -527,10 +528,10 @@ case class FromUnixTime(sec: Expression, format: Expression)
null
} else {
if (format.foldable) {
- if (constFormat == null) {
+ if (constFormat == null || formatter == null) {
null
} else {
- Try(UTF8String.fromString(new SimpleDateFormat(constFormat.toString).format(
+ Try(UTF8String.fromString(formatter.format(
new java.util.Date(time.asInstanceOf[Long] * 1000L)))).getOrElse(null)
}
} else {
@@ -549,11 +550,10 @@ case class FromUnixTime(sec: Expression, format: Expression)
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
val sdf = classOf[SimpleDateFormat].getName
if (format.foldable) {
- if (constFormat == null) {
- ev.copy(code = s"""
- boolean ${ev.isNull} = true;
- ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};""")
+ if (formatter == null) {
+ ExprCode("", "true", "(UTF8String) null")
} else {
+ val formatterName = ctx.addReferenceObj("formatter", formatter, sdf)
val t = left.genCode(ctx)
ev.copy(code = s"""
${t.code}
@@ -561,9 +561,9 @@ case class FromUnixTime(sec: Expression, format: Expression)
${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};
if (!${ev.isNull}) {
try {
- ${ev.value} = UTF8String.fromString(new $sdf("${constFormat.toString}").format(
+ ${ev.value} = UTF8String.fromString($formatterName.format(
new java.util.Date(${t.value} * 1000L)));
- } catch (java.lang.Throwable e) {
+ } catch (java.lang.IllegalArgumentException e) {
${ev.isNull} = true;
}
}""")
@@ -574,7 +574,7 @@ case class FromUnixTime(sec: Expression, format: Expression)
try {
${ev.value} = UTF8String.fromString((new $sdf($f.toString())).format(
new java.util.Date($seconds * 1000L)));
- } catch (java.lang.Throwable e) {
+ } catch (java.lang.IllegalArgumentException e) {
${ev.isNull} = true;
}""".stripMargin
})