From b0768538e56e5bbda7aaabbe2a0197e30ba5f993 Mon Sep 17 00:00:00 2001 From: Herman van Hovell Date: Thu, 9 Jun 2016 16:37:18 -0700 Subject: [SPARK-14321][SQL] Reduce date format cost and string-to-date cost in date functions ## What changes were proposed in this pull request? The current implementations of `UnixTime` and `FromUnixTime` do not cache their parser/formatter as much as they could. This PR resolved this issue. This PR is a take over from https://github.com/apache/spark/pull/13522 and further optimizes the re-use of the parser/formatter. It also fixes the improves handling (catching the actual exception instead of `Throwable`). All credits for this work should go to rajeshbalamohan. This PR closes https://github.com/apache/spark/pull/13522 ## How was this patch tested? Current tests. Author: Herman van Hovell Author: Rajesh Balamohan Closes #13581 from hvanhovell/SPARK-14321. --- .../catalyst/expressions/datetimeExpressions.scala | 48 +++++++++++----------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 69c32f447e..773431dd33 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -399,6 +399,8 @@ abstract class UnixTime extends BinaryExpression with ExpectsInputTypes { override def nullable: Boolean = true private lazy val constFormat: UTF8String = right.eval().asInstanceOf[UTF8String] + private lazy val formatter: SimpleDateFormat = + Try(new SimpleDateFormat(constFormat.toString)).getOrElse(null) override def eval(input: InternalRow): Any = { val t = left.eval(input) @@ -411,11 +413,11 @@ abstract class UnixTime extends BinaryExpression with ExpectsInputTypes { case TimestampType => t.asInstanceOf[Long] / 1000000L case StringType if right.foldable => - if (constFormat != null) { - Try(new SimpleDateFormat(constFormat.toString).parse( - t.asInstanceOf[UTF8String].toString).getTime / 1000L).getOrElse(null) - } else { + if (constFormat == null || formatter == null) { null + } else { + Try(formatter.parse( + t.asInstanceOf[UTF8String].toString).getTime / 1000L).getOrElse(null) } case StringType => val f = right.eval(input) @@ -434,13 +436,10 @@ abstract class UnixTime extends BinaryExpression with ExpectsInputTypes { left.dataType match { case StringType if right.foldable => val sdf = classOf[SimpleDateFormat].getName - val fString = if (constFormat == null) null else constFormat.toString - val formatter = ctx.freshName("formatter") - if (fString == null) { - ev.copy(code = s""" - boolean ${ev.isNull} = true; - ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};""") + if (formatter == null) { + ExprCode("", "true", ctx.defaultValue(dataType)) } else { + val formatterName = ctx.addReferenceObj("formatter", formatter, sdf) val eval1 = left.genCode(ctx) ev.copy(code = s""" ${eval1.code} @@ -448,10 +447,8 @@ abstract class UnixTime extends BinaryExpression with ExpectsInputTypes { ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)}; if (!${ev.isNull}) { try { - $sdf $formatter = new $sdf("$fString"); - ${ev.value} = - $formatter.parse(${eval1.value}.toString()).getTime() / 1000L; - } catch (java.lang.Throwable e) { + ${ev.value} = $formatterName.parse(${eval1.value}.toString()).getTime() / 1000L; + } catch (java.text.ParseException e) { ${ev.isNull} = true; } }""") @@ -463,7 +460,9 @@ abstract class UnixTime extends BinaryExpression with ExpectsInputTypes { try { ${ev.value} = (new $sdf($format.toString())).parse($string.toString()).getTime() / 1000L; - } catch (java.lang.Throwable e) { + } catch (java.lang.IllegalArgumentException e) { + ${ev.isNull} = true; + } catch (java.text.ParseException e) { ${ev.isNull} = true; } """ @@ -520,6 +519,8 @@ case class FromUnixTime(sec: Expression, format: Expression) override def inputTypes: Seq[AbstractDataType] = Seq(LongType, StringType) private lazy val constFormat: UTF8String = right.eval().asInstanceOf[UTF8String] + private lazy val formatter: SimpleDateFormat = + Try(new SimpleDateFormat(constFormat.toString)).getOrElse(null) override def eval(input: InternalRow): Any = { val time = left.eval(input) @@ -527,10 +528,10 @@ case class FromUnixTime(sec: Expression, format: Expression) null } else { if (format.foldable) { - if (constFormat == null) { + if (constFormat == null || formatter == null) { null } else { - Try(UTF8String.fromString(new SimpleDateFormat(constFormat.toString).format( + Try(UTF8String.fromString(formatter.format( new java.util.Date(time.asInstanceOf[Long] * 1000L)))).getOrElse(null) } } else { @@ -549,11 +550,10 @@ case class FromUnixTime(sec: Expression, format: Expression) override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val sdf = classOf[SimpleDateFormat].getName if (format.foldable) { - if (constFormat == null) { - ev.copy(code = s""" - boolean ${ev.isNull} = true; - ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};""") + if (formatter == null) { + ExprCode("", "true", "(UTF8String) null") } else { + val formatterName = ctx.addReferenceObj("formatter", formatter, sdf) val t = left.genCode(ctx) ev.copy(code = s""" ${t.code} @@ -561,9 +561,9 @@ case class FromUnixTime(sec: Expression, format: Expression) ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)}; if (!${ev.isNull}) { try { - ${ev.value} = UTF8String.fromString(new $sdf("${constFormat.toString}").format( + ${ev.value} = UTF8String.fromString($formatterName.format( new java.util.Date(${t.value} * 1000L))); - } catch (java.lang.Throwable e) { + } catch (java.lang.IllegalArgumentException e) { ${ev.isNull} = true; } }""") @@ -574,7 +574,7 @@ case class FromUnixTime(sec: Expression, format: Expression) try { ${ev.value} = UTF8String.fromString((new $sdf($f.toString())).format( new java.util.Date($seconds * 1000L))); - } catch (java.lang.Throwable e) { + } catch (java.lang.IllegalArgumentException e) { ${ev.isNull} = true; }""".stripMargin }) -- cgit v1.2.3