From 83b682beec884da76708769414108f4316e620f2 Mon Sep 17 00:00:00 2001 From: Tarek Auel Date: Sat, 18 Jul 2015 22:48:05 -0700 Subject: [SPARK-8199][SPARK-8184][SPARK-8183][SPARK-8182][SPARK-8181][SPARK-8180][SPARK-8179][SPARK-8177][SPARK-8178][SPARK-9115][SQL] date functions Jira: https://issues.apache.org/jira/browse/SPARK-8199 https://issues.apache.org/jira/browse/SPARK-8184 https://issues.apache.org/jira/browse/SPARK-8183 https://issues.apache.org/jira/browse/SPARK-8182 https://issues.apache.org/jira/browse/SPARK-8181 https://issues.apache.org/jira/browse/SPARK-8180 https://issues.apache.org/jira/browse/SPARK-8179 https://issues.apache.org/jira/browse/SPARK-8177 https://issues.apache.org/jira/browse/SPARK-8179 https://issues.apache.org/jira/browse/SPARK-9115 Regarding `day`and `dayofmonth` are both necessary? ~~I am going to add `Quarter` to this PR as well.~~ Done. ~~As soon as the Scala coding is reviewed and discussed, I'll add the python api.~~ Done Author: Tarek Auel Author: Tarek Auel Closes #6981 from tarekauel/SPARK-8199 and squashes the following commits: f7b4c8c [Tarek Auel] [SPARK-8199] fixed bug in tests bb567b6 [Tarek Auel] [SPARK-8199] fixed test 3e095ba [Tarek Auel] [SPARK-8199] style and timezone fix 256c357 [Tarek Auel] [SPARK-8199] code cleanup 5983dcc [Tarek Auel] [SPARK-8199] whitespace fix 6e0c78f [Tarek Auel] [SPARK-8199] removed setTimeZone in tests, according to cloud-fans comment in #7488 4afc09c [Tarek Auel] [SPARK-8199] concise leap year handling ea6c110 [Tarek Auel] [SPARK-8199] fix after merging master 70238e0 [Tarek Auel] Merge branch 'master' into SPARK-8199 3c6ae2e [Tarek Auel] [SPARK-8199] removed binary search fb98ba0 [Tarek Auel] [SPARK-8199] python docstring fix cdfae27 [Tarek Auel] [SPARK-8199] cleanup & python docstring fix 746b80a [Tarek Auel] [SPARK-8199] build fix 0ad6db8 [Tarek Auel] [SPARK-8199] minor fix 523542d [Tarek Auel] [SPARK-8199] address comments 2259299 [Tarek Auel] [SPARK-8199] day_of_month alias d01b977 [Tarek Auel] [SPARK-8199] python underscore 56c4a92 [Tarek Auel] [SPARK-8199] update python docu e223bc0 [Tarek Auel] [SPARK-8199] refactoring d6aa14e [Tarek Auel] [SPARK-8199] fixed Hive compatibility b382267 [Tarek Auel] [SPARK-8199] fixed bug in day calculation; removed set TimeZone in HiveCompatibilitySuite for test purposes; removed Hive tests for second and minute, because we can cast '2015-03-18' to a timestamp and extract a minute/second from it 1b2e540 [Tarek Auel] [SPARK-8119] style fix 0852655 [Tarek Auel] [SPARK-8119] changed from ExpectsInputTypes to implicit casts ec87c69 [Tarek Auel] [SPARK-8119] bug fixing and refactoring 1358cdc [Tarek Auel] Merge remote-tracking branch 'origin/master' into SPARK-8199 740af0e [Tarek Auel] implement date function using a calculation based on days 4fb66da [Tarek Auel] WIP: date functions on calculation only 1a436c9 [Tarek Auel] wip f775f39 [Tarek Auel] fixed return type ad17e96 [Tarek Auel] improved implementation c42b444 [Tarek Auel] Removed merge conflict file ccb723c [Tarek Auel] [SPARK-8199] style and fixed merge issues 10e4ad1 [Tarek Auel] Merge branch 'master' into date-functions-fast 7d9f0eb [Tarek Auel] [SPARK-8199] git renaming issue f3e7a9f [Tarek Auel] [SPARK-8199] revert change in DataFrameFunctionsSuite 6f5d95c [Tarek Auel] [SPARK-8199] fixed year interval d9f8ac3 [Tarek Auel] [SPARK-8199] implement fast track 7bc9d93 [Tarek Auel] Merge branch 'master' into SPARK-8199 5a105d9 [Tarek Auel] [SPARK-8199] rebase after #6985 got merged eb6760d [Tarek Auel] Merge branch 'master' into SPARK-8199 f120415 [Tarek Auel] improved runtime a8edebd [Tarek Auel] use Calendar instead of SimpleDateFormat 5fe74e1 [Tarek Auel] fixed python style 3bfac90 [Tarek Auel] fixed style 356df78 [Tarek Auel] rely on cast mechanism of Spark. Simplified implementation 02efc5d [Tarek Auel] removed doubled code a5ea120 [Tarek Auel] added python api; changed test to be more meaningful b680db6 [Tarek Auel] added codegeneration to all functions c739788 [Tarek Auel] added support for quarter SPARK-8178 849fb41 [Tarek Auel] fixed stupid test 638596f [Tarek Auel] improved codegen 4d8049b [Tarek Auel] fixed tests and added type check 5ebb235 [Tarek Auel] resolved naming conflict d0e2f99 [Tarek Auel] date functions --- python/pyspark/sql/functions.py | 150 +++++++++++++ .../sql/catalyst/analysis/FunctionRegistry.scala | 14 +- .../catalyst/expressions/datetimeFunctions.scala | 206 +++++++++++++++++ .../spark/sql/catalyst/util/DateTimeUtils.scala | 195 +++++++++++++++- .../catalyst/expressions/DateFunctionsSuite.scala | 249 +++++++++++++++++++++ .../sql/catalyst/util/DateTimeUtilsSuite.scala | 91 ++++++-- .../scala/org/apache/spark/sql/functions.scala | 176 +++++++++++++++ .../apache/spark/sql/DateExpressionsSuite.scala | 170 ++++++++++++++ .../hive/execution/HiveCompatibilitySuite.scala | 9 +- 9 files changed, 1234 insertions(+), 26 deletions(-) create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateFunctionsSuite.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/DateExpressionsSuite.scala diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index e0816b3e65..0aca378892 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -652,6 +652,156 @@ def ntile(n): return Column(sc._jvm.functions.ntile(int(n))) +@ignore_unicode_prefix +@since(1.5) +def date_format(dateCol, format): + """ + Converts a date/timestamp/string to a value of string in the format specified by the date + format given by the second argument. + + A pattern could be for instance `dd.MM.yyyy` and could return a string like '18.03.1993'. All + pattern letters of the Java class `java.text.SimpleDateFormat` can be used. + + NOTE: Use when ever possible specialized functions like `year`. These benefit from a + specialized implementation. + + >>> df = sqlContext.createDataFrame([('2015-04-08',)], ['a']) + >>> df.select(date_format('a', 'MM/dd/yyy').alias('date')).collect() + [Row(date=u'04/08/2015')] + """ + sc = SparkContext._active_spark_context + return Column(sc._jvm.functions.date_format(dateCol, format)) + + +@since(1.5) +def year(col): + """ + Extract the year of a given date as integer. + + >>> df = sqlContext.createDataFrame([('2015-04-08',)], ['a']) + >>> df.select(year('a').alias('year')).collect() + [Row(year=2015)] + """ + sc = SparkContext._active_spark_context + return Column(sc._jvm.functions.year(col)) + + +@since(1.5) +def quarter(col): + """ + Extract the quarter of a given date as integer. + + >>> df = sqlContext.createDataFrame([('2015-04-08',)], ['a']) + >>> df.select(quarter('a').alias('quarter')).collect() + [Row(quarter=2)] + """ + sc = SparkContext._active_spark_context + return Column(sc._jvm.functions.quarter(col)) + + +@since(1.5) +def month(col): + """ + Extract the month of a given date as integer. + + >>> df = sqlContext.createDataFrame([('2015-04-08',)], ['a']) + >>> df.select(month('a').alias('month')).collect() + [Row(month=4)] + """ + sc = SparkContext._active_spark_context + return Column(sc._jvm.functions.month(col)) + + +@since(1.5) +def day(col): + """ + Extract the day of the month of a given date as integer. + + >>> sqlContext.createDataFrame([('2015-04-08',)], ['a']).select(day('a').alias('day')).collect() + [Row(day=8)] + """ + sc = SparkContext._active_spark_context + return Column(sc._jvm.functions.day(col)) + + +@since(1.5) +def day_of_month(col): + """ + Extract the day of the month of a given date as integer. + + >>> df = sqlContext.createDataFrame([('2015-04-08',)], ['a']) + >>> df.select(day_of_month('a').alias('day')).collect() + [Row(day=8)] + """ + sc = SparkContext._active_spark_context + return Column(sc._jvm.functions.day_of_month(col)) + + +@since(1.5) +def day_in_year(col): + """ + Extract the day of the year of a given date as integer. + + >>> df = sqlContext.createDataFrame([('2015-04-08',)], ['a']) + >>> df.select(day_in_year('a').alias('day')).collect() + [Row(day=98)] + """ + sc = SparkContext._active_spark_context + return Column(sc._jvm.functions.day_in_year(col)) + + +@since(1.5) +def hour(col): + """ + Extract the hours of a given date as integer. + + >>> df = sqlContext.createDataFrame([('2015-04-08 13:08:15',)], ['a']) + >>> df.select(hour('a').alias('hour')).collect() + [Row(hour=13)] + """ + sc = SparkContext._active_spark_context + return Column(sc._jvm.functions.hour(col)) + + +@since(1.5) +def minute(col): + """ + Extract the minutes of a given date as integer. + + >>> df = sqlContext.createDataFrame([('2015-04-08 13:08:15',)], ['a']) + >>> df.select(minute('a').alias('minute')).collect() + [Row(minute=8)] + """ + sc = SparkContext._active_spark_context + return Column(sc._jvm.functions.minute(col)) + + +@since(1.5) +def second(col): + """ + Extract the seconds of a given date as integer. + + >>> df = sqlContext.createDataFrame([('2015-04-08 13:08:15',)], ['a']) + >>> df.select(second('a').alias('second')).collect() + [Row(second=15)] + """ + sc = SparkContext._active_spark_context + return Column(sc._jvm.functions.second(col)) + + +@since(1.5) +def week_of_year(col): + """ + Extract the week number of a given date as integer. + + >>> df = sqlContext.createDataFrame([('2015-04-08',)], ['a']) + >>> df.select(week_of_year('a').alias('week')).collect() + [Row(week=15)] + """ + sc = SparkContext._active_spark_context + return Column(sc._jvm.functions.week_of_year(col)) + + class UserDefinedFunction(object): """ User defined function in Python diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index d1cda6bc27..159f7eca7a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -181,7 +181,19 @@ object FunctionRegistry { // datetime functions expression[CurrentDate]("current_date"), - expression[CurrentTimestamp]("current_timestamp") + expression[CurrentTimestamp]("current_timestamp"), + expression[DateFormatClass]("date_format"), + expression[Day]("day"), + expression[DayInYear]("day_in_year"), + expression[Day]("day_of_month"), + expression[Hour]("hour"), + expression[Month]("month"), + expression[Minute]("minute"), + expression[Quarter]("quarter"), + expression[Second]("second"), + expression[WeekOfYear]("week_of_year"), + expression[Year]("year") + ) val builtin: FunctionRegistry = { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeFunctions.scala index 4bed140cff..f9cbbb8c6b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeFunctions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeFunctions.scala @@ -17,10 +17,16 @@ package org.apache.spark.sql.catalyst.expressions +import java.sql.Date +import java.text.SimpleDateFormat +import java.util.{Calendar, TimeZone} + +import org.apache.spark.sql.catalyst.expressions.codegen.{CodeGenContext, GeneratedExpressionCode} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.util.DateTimeUtils import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.UTF8String /** * Returns the current date at the start of query evaluation. @@ -55,3 +61,203 @@ case class CurrentTimestamp() extends LeafExpression with CodegenFallback { System.currentTimeMillis() * 1000L } } + +case class Hour(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { + + override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType) + + override def dataType: DataType = IntegerType + + override protected def nullSafeEval(timestamp: Any): Any = { + DateTimeUtils.getHours(timestamp.asInstanceOf[Long]) + } + + override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { + val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") + defineCodeGen(ctx, ev, (c) => + s"""$dtu.getHours($c)""" + ) + } +} + +case class Minute(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { + + override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType) + + override def dataType: DataType = IntegerType + + override protected def nullSafeEval(timestamp: Any): Any = { + DateTimeUtils.getMinutes(timestamp.asInstanceOf[Long]) + } + + override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { + val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") + defineCodeGen(ctx, ev, (c) => + s"""$dtu.getMinutes($c)""" + ) + } +} + +case class Second(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { + + override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType) + + override def dataType: DataType = IntegerType + + override protected def nullSafeEval(timestamp: Any): Any = { + DateTimeUtils.getSeconds(timestamp.asInstanceOf[Long]) + } + + override protected def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { + val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") + defineCodeGen(ctx, ev, (c) => + s"""$dtu.getSeconds($c)""" + ) + } +} + +case class DayInYear(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { + + override def inputTypes: Seq[AbstractDataType] = Seq(DateType) + + override def dataType: DataType = IntegerType + + override def prettyName: String = "day_in_year" + + override protected def nullSafeEval(date: Any): Any = { + DateTimeUtils.getDayInYear(date.asInstanceOf[Int]) + } + + override protected def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { + val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") + defineCodeGen(ctx, ev, (c) => + s"""$dtu.getDayInYear($c)""" + ) + } +} + + +case class Year(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { + + override def inputTypes: Seq[AbstractDataType] = Seq(DateType) + + override def dataType: DataType = IntegerType + + override protected def nullSafeEval(date: Any): Any = { + DateTimeUtils.getYear(date.asInstanceOf[Int]) + } + + override protected def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { + val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") + defineCodeGen(ctx, ev, (c) => + s"""$dtu.getYear($c)""" + ) + } +} + +case class Quarter(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { + + override def inputTypes: Seq[AbstractDataType] = Seq(DateType) + + override def dataType: DataType = IntegerType + + override protected def nullSafeEval(date: Any): Any = { + DateTimeUtils.getQuarter(date.asInstanceOf[Int]) + } + + override protected def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { + val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") + defineCodeGen(ctx, ev, (c) => + s"""$dtu.getQuarter($c)""" + ) + } +} + +case class Month(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { + + override def inputTypes: Seq[AbstractDataType] = Seq(DateType) + + override def dataType: DataType = IntegerType + + override protected def nullSafeEval(date: Any): Any = { + DateTimeUtils.getMonth(date.asInstanceOf[Int]) + } + + override protected def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { + val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") + defineCodeGen(ctx, ev, (c) => + s"""$dtu.getMonth($c)""" + ) + } +} + +case class Day(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { + + override def inputTypes: Seq[AbstractDataType] = Seq(DateType) + + override def dataType: DataType = IntegerType + + override protected def nullSafeEval(date: Any): Any = { + DateTimeUtils.getDayOfMonth(date.asInstanceOf[Int]) + } + + override protected def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { + val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") + defineCodeGen(ctx, ev, (c) => + s"""$dtu.getDayOfMonth($c)""" + ) + } +} + +case class WeekOfYear(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { + + override def inputTypes: Seq[AbstractDataType] = Seq(DateType) + + override def dataType: DataType = IntegerType + + override def prettyName: String = "week_of_year" + + override protected def nullSafeEval(date: Any): Any = { + val c = Calendar.getInstance(TimeZone.getTimeZone("UTC")) + c.setFirstDayOfWeek(Calendar.MONDAY) + c.setMinimalDaysInFirstWeek(4) + c.setTimeInMillis(date.asInstanceOf[Int] * 1000L * 3600L * 24L) + c.get(Calendar.WEEK_OF_YEAR) + } + + override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = + nullSafeCodeGen(ctx, ev, (time) => { + val cal = classOf[Calendar].getName + val c = ctx.freshName("cal") + s""" + $cal $c = $cal.getInstance(java.util.TimeZone.getTimeZone("UTC")); + $c.setFirstDayOfWeek($cal.MONDAY); + $c.setMinimalDaysInFirstWeek(4); + $c.setTimeInMillis($time * 1000L * 3600L * 24L); + ${ev.primitive} = $c.get($cal.WEEK_OF_YEAR); + """ + }) +} + +case class DateFormatClass(left: Expression, right: Expression) extends BinaryExpression + with ImplicitCastInputTypes { + + override def dataType: DataType = StringType + + override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType, StringType) + + override def prettyName: String = "date_format" + + override protected def nullSafeEval(timestamp: Any, format: Any): Any = { + val sdf = new SimpleDateFormat(format.toString) + UTF8String.fromString(sdf.format(new Date(timestamp.asInstanceOf[Long] / 1000))) + } + + override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { + val sdf = classOf[SimpleDateFormat].getName + defineCodeGen(ctx, ev, (timestamp, format) => { + s"""UTF8String.fromString((new $sdf($format.toString())) + .format(new java.sql.Date($timestamp / 1000)))""" + }) + } +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 45e45aef1a..a0da73a995 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.util import java.sql.{Date, Timestamp} import java.text.{DateFormat, SimpleDateFormat} -import java.util.{Calendar, TimeZone} +import java.util.{TimeZone, Calendar} import org.apache.spark.unsafe.types.UTF8String @@ -39,6 +39,15 @@ object DateTimeUtils { final val MICROS_PER_SECOND = 1000L * 1000L final val NANOS_PER_SECOND = MICROS_PER_SECOND * 1000L + // number of days in 400 years + final val daysIn400Years: Int = 146097 + // number of days between 1.1.1970 and 1.1.2001 + final val to2001 = -11323 + + // this is year -17999, calculation: 50 * daysIn400Year + final val toYearZero = to2001 + 7304850 + + @transient lazy val defaultTimeZone = TimeZone.getDefault // Java TimeZone has no mention of thread safety. Use thread local instance to be safe. private val threadLocalLocalTimeZone = new ThreadLocal[TimeZone] { @@ -380,4 +389,188 @@ object DateTimeUtils { c.set(Calendar.MILLISECOND, 0) Some((c.getTimeInMillis / MILLIS_PER_DAY).toInt) } + + /** + * Returns the hour value of a given timestamp value. The timestamp is expressed in microseconds. + */ + def getHours(timestamp: Long): Int = { + val localTs = (timestamp / 1000) + defaultTimeZone.getOffset(timestamp / 1000) + ((localTs / 1000 / 3600) % 24).toInt + } + + /** + * Returns the minute value of a given timestamp value. The timestamp is expressed in + * microseconds. + */ + def getMinutes(timestamp: Long): Int = { + val localTs = (timestamp / 1000) + defaultTimeZone.getOffset(timestamp / 1000) + ((localTs / 1000 / 60) % 60).toInt + } + + /** + * Returns the second value of a given timestamp value. The timestamp is expressed in + * microseconds. + */ + def getSeconds(timestamp: Long): Int = { + ((timestamp / 1000 / 1000) % 60).toInt + } + + private[this] def isLeapYear(year: Int): Boolean = { + (year % 4) == 0 && ((year % 100) != 0 || (year % 400) == 0) + } + + /** + * Return the number of days since the start of 400 year period. + * The second year of a 400 year period (year 1) starts on day 365. + */ + private[this] def yearBoundary(year: Int): Int = { + year * 365 + ((year / 4 ) - (year / 100) + (year / 400)) + } + + /** + * Calculates the number of years for the given number of days. This depends + * on a 400 year period. + * @param days days since the beginning of the 400 year period + * @return (number of year, days in year) + */ + private[this] def numYears(days: Int): (Int, Int) = { + val year = days / 365 + val boundary = yearBoundary(year) + if (days > boundary) (year, days - boundary) else (year - 1, days - yearBoundary(year - 1)) + } + + /** + * Calculates the year and and the number of the day in the year for the given + * number of days. The given days is the number of days since 1.1.1970. + * + * The calculation uses the fact that the period 1.1.2001 until 31.12.2400 is + * equals to the period 1.1.1601 until 31.12.2000. + */ + private[this] def getYearAndDayInYear(daysSince1970: Int): (Int, Int) = { + // add the difference (in days) between 1.1.1970 and the artificial year 0 (-17999) + val daysNormalized = daysSince1970 + toYearZero + val numOfQuarterCenturies = daysNormalized / daysIn400Years + val daysInThis400 = daysNormalized % daysIn400Years + 1 + val (years, dayInYear) = numYears(daysInThis400) + val year: Int = (2001 - 20000) + 400 * numOfQuarterCenturies + years + (year, dayInYear) + } + + /** + * Returns the 'day in year' value for the given date. The date is expressed in days + * since 1.1.1970. + */ + def getDayInYear(date: Int): Int = { + getYearAndDayInYear(date)._2 + } + + /** + * Returns the year value for the given date. The date is expressed in days + * since 1.1.1970. + */ + def getYear(date: Int): Int = { + getYearAndDayInYear(date)._1 + } + + /** + * Returns the quarter for the given date. The date is expressed in days + * since 1.1.1970. + */ + def getQuarter(date: Int): Int = { + var (year, dayInYear) = getYearAndDayInYear(date) + if (isLeapYear(year)) { + dayInYear = dayInYear - 1 + } + if (dayInYear <= 90) { + 1 + } else if (dayInYear <= 181) { + 2 + } else if (dayInYear <= 273) { + 3 + } else { + 4 + } + } + + /** + * Returns the month value for the given date. The date is expressed in days + * since 1.1.1970. January is month 1. + */ + def getMonth(date: Int): Int = { + var (year, dayInYear) = getYearAndDayInYear(date) + if (isLeapYear(year)) { + if (dayInYear == 60) { + return 2 + } else if (dayInYear > 60) { + dayInYear = dayInYear - 1 + } + } + + if (dayInYear <= 31) { + 1 + } else if (dayInYear <= 59) { + 2 + } else if (dayInYear <= 90) { + 3 + } else if (dayInYear <= 120) { + 4 + } else if (dayInYear <= 151) { + 5 + } else if (dayInYear <= 181) { + 6 + } else if (dayInYear <= 212) { + 7 + } else if (dayInYear <= 243) { + 8 + } else if (dayInYear <= 273) { + 9 + } else if (dayInYear <= 304) { + 10 + } else if (dayInYear <= 334) { + 11 + } else { + 12 + } + } + + /** + * Returns the 'day of month' value for the given date. The date is expressed in days + * since 1.1.1970. + */ + def getDayOfMonth(date: Int): Int = { + var (year, dayInYear) = getYearAndDayInYear(date) + if (isLeapYear(year)) { + if (dayInYear == 60) { + return 29 + } else if (dayInYear > 60) { + dayInYear = dayInYear - 1 + } + } + + if (dayInYear <= 31) { + dayInYear + } else if (dayInYear <= 59) { + dayInYear - 31 + } else if (dayInYear <= 90) { + dayInYear - 59 + } else if (dayInYear <= 120) { + dayInYear - 90 + } else if (dayInYear <= 151) { + dayInYear - 120 + } else if (dayInYear <= 181) { + dayInYear - 151 + } else if (dayInYear <= 212) { + dayInYear - 181 + } else if (dayInYear <= 243) { + dayInYear - 212 + } else if (dayInYear <= 273) { + dayInYear - 243 + } else if (dayInYear <= 304) { + dayInYear - 273 + } else if (dayInYear <= 334) { + dayInYear - 304 + } else { + dayInYear - 334 + } + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateFunctionsSuite.scala new file mode 100644 index 0000000000..49d0b0acea --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateFunctionsSuite.scala @@ -0,0 +1,249 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.expressions + +import java.sql.{Timestamp, Date} +import java.text.SimpleDateFormat +import java.util.{TimeZone, Calendar} + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.types.{StringType, TimestampType, DateType} + +class DateFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper { + + val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") + val sdfDate = new SimpleDateFormat("yyyy-MM-dd") + val d = new Date(sdf.parse("2015-04-08 13:10:15").getTime) + val ts = new Timestamp(sdf.parse("2013-11-08 13:10:15").getTime) + + test("Day in Year") { + val sdfDay = new SimpleDateFormat("D") + (2002 to 2004).foreach { y => + (0 to 11).foreach { m => + (0 to 5).foreach { i => + val c = Calendar.getInstance() + c.set(y, m, 28, 0, 0, 0) + c.add(Calendar.DATE, i) + checkEvaluation(DayInYear(Cast(Literal(new Date(c.getTimeInMillis)), DateType)), + sdfDay.format(c.getTime).toInt) + } + } + } + + (1998 to 2002).foreach { y => + (0 to 11).foreach { m => + (0 to 5).foreach { i => + val c = Calendar.getInstance() + c.set(y, m, 28, 0, 0, 0) + c.add(Calendar.DATE, 1) + checkEvaluation(DayInYear(Cast(Literal(new Date(c.getTimeInMillis)), DateType)), + sdfDay.format(c.getTime).toInt) + } + } + } + + (1969 to 1970).foreach { y => + (0 to 11).foreach { m => + (0 to 5).foreach { i => + val c = Calendar.getInstance() + c.set(y, m, 28, 0, 0, 0) + c.add(Calendar.DATE, 1) + checkEvaluation(DayInYear(Cast(Literal(new Date(c.getTimeInMillis)), DateType)), + sdfDay.format(c.getTime).toInt) + } + } + } + + (2402 to 2404).foreach { y => + (0 to 11).foreach { m => + (0 to 5).foreach { i => + val c = Calendar.getInstance() + c.set(y, m, 28, 0, 0, 0) + c.add(Calendar.DATE, 1) + checkEvaluation(DayInYear(Cast(Literal(new Date(c.getTimeInMillis)), DateType)), + sdfDay.format(c.getTime).toInt) + } + } + } + + (2398 to 2402).foreach { y => + (0 to 11).foreach { m => + (0 to 5).foreach { i => + val c = Calendar.getInstance() + c.set(y, m, 28, 0, 0, 0) + c.add(Calendar.DATE, 1) + checkEvaluation(DayInYear(Cast(Literal(new Date(c.getTimeInMillis)), DateType)), + sdfDay.format(c.getTime).toInt) + } + } + } + } + + test("Year") { + checkEvaluation(Year(Literal.create(null, DateType)), null) + checkEvaluation(Year(Cast(Literal(d), DateType)), 2015) + checkEvaluation(Year(Cast(Literal(sdfDate.format(d)), DateType)), 2015) + checkEvaluation(Year(Cast(Literal(ts), DateType)), 2013) + + val c = Calendar.getInstance() + (2000 to 2010).foreach { y => + (0 to 11 by 11).foreach { m => + c.set(y, m, 28) + (0 to 5 * 24).foreach { i => + c.add(Calendar.HOUR_OF_DAY, 1) + checkEvaluation(Year(Cast(Literal(new Date(c.getTimeInMillis)), DateType)), + c.get(Calendar.YEAR)) + } + } + } + } + + test("Quarter") { + checkEvaluation(Quarter(Literal.create(null, DateType)), null) + checkEvaluation(Quarter(Cast(Literal(d), DateType)), 2) + checkEvaluation(Quarter(Cast(Literal(sdfDate.format(d)), DateType)), 2) + checkEvaluation(Quarter(Cast(Literal(ts), DateType)), 4) + + val c = Calendar.getInstance() + (2003 to 2004).foreach { y => + (0 to 11 by 3).foreach { m => + c.set(y, m, 28, 0, 0, 0) + (0 to 5 * 24).foreach { i => + c.add(Calendar.HOUR_OF_DAY, 1) + checkEvaluation(Quarter(Cast(Literal(new Date(c.getTimeInMillis)), DateType)), + c.get(Calendar.MONTH) / 3 + 1) + } + } + } + } + + test("Month") { + checkEvaluation(Month(Literal.create(null, DateType)), null) + checkEvaluation(Month(Cast(Literal(d), DateType)), 4) + checkEvaluation(Month(Cast(Literal(sdfDate.format(d)), DateType)), 4) + checkEvaluation(Month(Cast(Literal(ts), DateType)), 11) + + (2003 to 2004).foreach { y => + (0 to 11).foreach { m => + (0 to 5 * 24).foreach { i => + val c = Calendar.getInstance() + c.set(y, m, 28, 0, 0, 0) + c.add(Calendar.HOUR_OF_DAY, i) + checkEvaluation(Month(Cast(Literal(new Date(c.getTimeInMillis)), DateType)), + c.get(Calendar.MONTH) + 1) + } + } + } + + (1999 to 2000).foreach { y => + (0 to 11).foreach { m => + (0 to 5 * 24).foreach { i => + val c = Calendar.getInstance() + c.set(y, m, 28, 0, 0, 0) + c.add(Calendar.HOUR_OF_DAY, i) + checkEvaluation(Month(Cast(Literal(new Date(c.getTimeInMillis)), DateType)), + c.get(Calendar.MONTH) + 1) + } + } + } + } + + test("Day") { + checkEvaluation(Day(Cast(Literal("2000-02-29"), DateType)), 29) + checkEvaluation(Day(Literal.create(null, DateType)), null) + checkEvaluation(Day(Cast(Literal(d), DateType)), 8) + checkEvaluation(Day(Cast(Literal(sdfDate.format(d)), DateType)), 8) + checkEvaluation(Day(Cast(Literal(ts), DateType)), 8) + + (1999 to 2000).foreach { y => + val c = Calendar.getInstance() + c.set(y, 0, 1, 0, 0, 0) + (0 to 365).foreach { d => + c.add(Calendar.DATE, 1) + checkEvaluation(Day(Cast(Literal(new Date(c.getTimeInMillis)), DateType)), + c.get(Calendar.DAY_OF_MONTH)) + } + } + } + + test("Seconds") { + checkEvaluation(Second(Literal.create(null, DateType)), null) + checkEvaluation(Second(Cast(Literal(d), TimestampType)), 0) + checkEvaluation(Second(Cast(Literal(sdf.format(d)), TimestampType)), 15) + checkEvaluation(Second(Literal(ts)), 15) + + val c = Calendar.getInstance() + (0 to 60 by 5).foreach { s => + c.set(2015, 18, 3, 3, 5, s) + checkEvaluation(Second(Cast(Literal(new Timestamp(c.getTimeInMillis)), TimestampType)), + c.get(Calendar.SECOND)) + } + } + + test("WeekOfYear") { + checkEvaluation(WeekOfYear(Literal.create(null, DateType)), null) + checkEvaluation(WeekOfYear(Cast(Literal(d), DateType)), 15) + checkEvaluation(WeekOfYear(Cast(Literal(sdfDate.format(d)), DateType)), 15) + checkEvaluation(WeekOfYear(Cast(Literal(ts), DateType)), 45) + checkEvaluation(WeekOfYear(Cast(Literal("2011-05-06"), DateType)), 18) + } + + test("DateFormat") { + checkEvaluation(DateFormatClass(Literal.create(null, TimestampType), Literal("y")), null) + checkEvaluation(DateFormatClass(Cast(Literal(d), TimestampType), + Literal.create(null, StringType)), null) + checkEvaluation(DateFormatClass(Cast(Literal(d), TimestampType), + Literal("y")), "2015") + checkEvaluation(DateFormatClass(Literal(ts), Literal("y")), "2013") + } + + test("Hour") { + checkEvaluation(Hour(Literal.create(null, DateType)), null) + checkEvaluation(Hour(Cast(Literal(d), TimestampType)), 0) + checkEvaluation(Hour(Cast(Literal(sdf.format(d)), TimestampType)), 13) + checkEvaluation(Hour(Literal(ts)), 13) + + val c = Calendar.getInstance() + (0 to 24).foreach { h => + (0 to 60 by 15).foreach { m => + (0 to 60 by 15).foreach { s => + c.set(2015, 18, 3, h, m, s) + checkEvaluation(Hour(Cast(Literal(new Timestamp(c.getTimeInMillis)), TimestampType)), + c.get(Calendar.HOUR_OF_DAY)) + } + } + } + } + + test("Minute") { + checkEvaluation(Minute(Literal.create(null, DateType)), null) + checkEvaluation(Minute(Cast(Literal(d), TimestampType)), 0) + checkEvaluation(Minute(Cast(Literal(sdf.format(d)), TimestampType)), 10) + checkEvaluation(Minute(Literal(ts)), 10) + + val c = Calendar.getInstance() + (0 to 60 by 5).foreach { m => + (0 to 60 by 15).foreach { s => + c.set(2015, 18, 3, 3, m, s) + checkEvaluation(Minute(Cast(Literal(new Timestamp(c.getTimeInMillis)), TimestampType)), + c.get(Calendar.MINUTE)) + } + } + } + +} diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index 04c5f09792..fab9eb9cd4 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -26,6 +26,11 @@ import org.apache.spark.unsafe.types.UTF8String class DateTimeUtilsSuite extends SparkFunSuite { + private[this] def getInUTCDays(timestamp: Long): Int = { + val tz = TimeZone.getDefault + ((timestamp + tz.getOffset(timestamp)) / DateTimeUtils.MILLIS_PER_DAY).toInt + } + test("timestamp and us") { val now = new Timestamp(System.currentTimeMillis()) now.setNanos(1000) @@ -277,28 +282,6 @@ class DateTimeUtilsSuite extends SparkFunSuite { assert(DateTimeUtils.stringToTimestamp( UTF8String.fromString("2011-05-06 07:08:09.1000")).get === c.getTimeInMillis * 1000) - val defaultTimeZone = TimeZone.getDefault - TimeZone.setDefault(TimeZone.getTimeZone("America/Los_Angeles")) - - c = Calendar.getInstance() - c.set(2015, 2, 8, 2, 0, 0) - c.set(Calendar.MILLISECOND, 0) - assert(DateTimeUtils.stringToTimestamp( - UTF8String.fromString("2015-3-8 2:0:0")).get === c.getTimeInMillis * 1000) - c.add(Calendar.MINUTE, 30) - assert(DateTimeUtils.stringToTimestamp( - UTF8String.fromString("2015-3-8 3:30:0")).get === c.getTimeInMillis * 1000) - assert(DateTimeUtils.stringToTimestamp( - UTF8String.fromString("2015-3-8 2:30:0")).get === c.getTimeInMillis * 1000) - - c = Calendar.getInstance() - c.set(2015, 10, 1, 1, 59, 0) - c.set(Calendar.MILLISECOND, 0) - c.add(Calendar.MINUTE, 31) - assert(DateTimeUtils.stringToTimestamp( - UTF8String.fromString("2015-11-1 2:30:0")).get === c.getTimeInMillis * 1000) - TimeZone.setDefault(defaultTimeZone) - assert(DateTimeUtils.stringToTimestamp(UTF8String.fromString("238")).isEmpty) assert(DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-18 123142")).isEmpty) assert(DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-18T123123")).isEmpty) @@ -314,4 +297,68 @@ class DateTimeUtilsSuite extends SparkFunSuite { assert(DateTimeUtils.stringToTimestamp( UTF8String.fromString("2015-03-18T12:03.17-1:0:0")).isEmpty) } + + test("hours") { + val c = Calendar.getInstance() + c.set(2015, 2, 18, 13, 2, 11) + assert(DateTimeUtils.getHours(c.getTimeInMillis * 1000) === 13) + c.set(2015, 12, 8, 2, 7, 9) + assert(DateTimeUtils.getHours(c.getTimeInMillis * 1000) === 2) + } + + test("minutes") { + val c = Calendar.getInstance() + c.set(2015, 2, 18, 13, 2, 11) + assert(DateTimeUtils.getMinutes(c.getTimeInMillis * 1000) === 2) + c.set(2015, 2, 8, 2, 7, 9) + assert(DateTimeUtils.getMinutes(c.getTimeInMillis * 1000) === 7) + } + + test("seconds") { + val c = Calendar.getInstance() + c.set(2015, 2, 18, 13, 2, 11) + assert(DateTimeUtils.getSeconds(c.getTimeInMillis * 1000) === 11) + c.set(2015, 2, 8, 2, 7, 9) + assert(DateTimeUtils.getSeconds(c.getTimeInMillis * 1000) === 9) + } + + test("get day in year") { + val c = Calendar.getInstance() + c.set(2015, 2, 18, 0, 0, 0) + assert(DateTimeUtils.getDayInYear(getInUTCDays(c.getTimeInMillis)) === 77) + c.set(2012, 2, 18, 0, 0, 0) + assert(DateTimeUtils.getDayInYear(getInUTCDays(c.getTimeInMillis)) === 78) + } + + test("get year") { + val c = Calendar.getInstance() + c.set(2015, 2, 18, 0, 0, 0) + assert(DateTimeUtils.getYear(getInUTCDays(c.getTimeInMillis)) === 2015) + c.set(2012, 2, 18, 0, 0, 0) + assert(DateTimeUtils.getYear(getInUTCDays(c.getTimeInMillis)) === 2012) + } + + test("get quarter") { + val c = Calendar.getInstance() + c.set(2015, 2, 18, 0, 0, 0) + assert(DateTimeUtils.getQuarter(getInUTCDays(c.getTimeInMillis)) === 1) + c.set(2012, 11, 18, 0, 0, 0) + assert(DateTimeUtils.getQuarter(getInUTCDays(c.getTimeInMillis)) === 4) + } + + test("get month") { + val c = Calendar.getInstance() + c.set(2015, 2, 18, 0, 0, 0) + assert(DateTimeUtils.getMonth(getInUTCDays(c.getTimeInMillis)) === 3) + c.set(2012, 11, 18, 0, 0, 0) + assert(DateTimeUtils.getMonth(getInUTCDays(c.getTimeInMillis)) === 12) + } + + test("get day of month") { + val c = Calendar.getInstance() + c.set(2015, 2, 18, 0, 0, 0) + assert(DateTimeUtils.getDayOfMonth(getInUTCDays(c.getTimeInMillis)) === 18) + c.set(2012, 11, 24, 0, 0, 0) + assert(DateTimeUtils.getDayOfMonth(getInUTCDays(c.getTimeInMillis)) === 24) + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index c180407389..cadb25d597 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -1748,6 +1748,182 @@ object functions { */ def length(columnName: String): Column = length(Column(columnName)) + ////////////////////////////////////////////////////////////////////////////////////////////// + // DateTime functions + ////////////////////////////////////////////////////////////////////////////////////////////// + + /** + * Converts a date/timestamp/string to a value of string in the format specified by the date + * format given by the second argument. + * + * A pattern could be for instance `dd.MM.yyyy` and could return a string like '18.03.1993'. All + * pattern letters of [[java.text.SimpleDateFormat]] can be used. + * + * NOTE: Use when ever possible specialized functions like [[year]]. These benefit from a + * specialized implementation. + * + * @group datetime_funcs + * @since 1.5.0 + */ + def date_format(dateExpr: Column, format: String): Column = + DateFormatClass(dateExpr.expr, Literal(format)) + + /** + * Converts a date/timestamp/string to a value of string in the format specified by the date + * format given by the second argument. + * + * A pattern could be for instance `dd.MM.yyyy` and could return a string like '18.03.1993'. All + * pattern letters of [[java.text.SimpleDateFormat]] can be used. + * + * NOTE: Use when ever possible specialized functions like [[year]]. These benefit from a + * specialized implementation. + * + * @group datetime_funcs + * @since 1.5.0 + */ + def date_format(dateColumnName: String, format: String): Column = + date_format(Column(dateColumnName), format) + + /** + * Extracts the year as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def year(e: Column): Column = Year(e.expr) + + /** + * Extracts the year as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def year(columnName: String): Column = year(Column(columnName)) + + /** + * Extracts the quarter as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def quarter(e: Column): Column = Quarter(e.expr) + + /** + * Extracts the quarter as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def quarter(columnName: String): Column = quarter(Column(columnName)) + + /** + * Extracts the month as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def month(e: Column): Column = Month(e.expr) + + /** + * Extracts the month as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def month(columnName: String): Column = month(Column(columnName)) + + /** + * Extracts the day of the month as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def day(e: Column): Column = Day(e.expr) + + /** + * Extracts the day of the month as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def day(columnName: String): Column = day(Column(columnName)) + + /** + * Extracts the day of the month as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def day_of_month(e: Column): Column = Day(e.expr) + + /** + * Extracts the day of the month as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def day_of_month(columnName: String): Column = day_of_month(Column(columnName)) + + /** + * Extracts the day of the year as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def day_in_year(e: Column): Column = DayInYear(e.expr) + + /** + * Extracts the day of the year as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def day_in_year(columnName: String): Column = day_in_year(Column(columnName)) + + /** + * Extracts the hours as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def hour(e: Column): Column = Hour(e.expr) + + /** + * Extracts the hours as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def hour(columnName: String): Column = hour(Column(columnName)) + + /** + * Extracts the minutes as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def minute(e: Column): Column = Minute(e.expr) + + /** + * Extracts the minutes as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def minute(columnName: String): Column = minute(Column(columnName)) + + /** + * Extracts the seconds as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def second(e: Column): Column = Second(e.expr) + + /** + * Extracts the seconds as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def second(columnName: String): Column = second(Column(columnName)) + + /** + * Extracts the week number as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def week_of_year(e: Column): Column = WeekOfYear(e.expr) + + /** + * Extracts the week number as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def week_of_year(columnName: String): Column = week_of_year(Column(columnName)) + /** * Formats the number X to a format like '#,###,###.##', rounded to d decimal places, * and returns the result as a string. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DateExpressionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DateExpressionsSuite.scala new file mode 100644 index 0000000000..d24e3ee1dd --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/DateExpressionsSuite.scala @@ -0,0 +1,170 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +import java.sql.{Timestamp, Date} +import java.text.SimpleDateFormat + +import org.apache.spark.sql.functions._ + +class DateExpressionsSuite extends QueryTest { + private lazy val ctx = org.apache.spark.sql.test.TestSQLContext + + import ctx.implicits._ + + val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") + val sdfDate = new SimpleDateFormat("yyyy-MM-dd") + val d = new Date(sdf.parse("2015-04-08 13:10:15").getTime) + val ts = new Timestamp(sdf.parse("2013-04-08 13:10:15").getTime) + + + test("date format") { + val df = Seq((d, sdf.format(d), ts)).toDF("a", "b", "c") + + checkAnswer( + df.select(date_format("a", "y"), date_format("b", "y"), date_format("c", "y")), + Row("2015", "2015", "2013")) + + checkAnswer( + df.selectExpr("date_format(a, 'y')", "date_format(b, 'y')", "date_format(c, 'y')"), + Row("2015", "2015", "2013")) + } + + test("year") { + val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c") + + checkAnswer( + df.select(year("a"), year("b"), year("c")), + Row(2015, 2015, 2013)) + + checkAnswer( + df.selectExpr("year(a)", "year(b)", "year(c)"), + Row(2015, 2015, 2013)) + } + + test("quarter") { + val ts = new Timestamp(sdf.parse("2013-11-08 13:10:15").getTime) + + val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c") + + checkAnswer( + df.select(quarter("a"), quarter("b"), quarter("c")), + Row(2, 2, 4)) + + checkAnswer( + df.selectExpr("quarter(a)", "quarter(b)", "quarter(c)"), + Row(2, 2, 4)) + } + + test("month") { + val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c") + + checkAnswer( + df.select(month("a"), month("b"), month("c")), + Row(4, 4, 4)) + + checkAnswer( + df.selectExpr("month(a)", "month(b)", "month(c)"), + Row(4, 4, 4)) + } + + test("day") { + val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c") + + checkAnswer( + df.select(day("a"), day("b"), day("c")), + Row(8, 8, 8)) + + checkAnswer( + df.selectExpr("day(a)", "day(b)", "day(c)"), + Row(8, 8, 8)) + } + + test("day of month") { + val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c") + + checkAnswer( + df.select(day_of_month("a"), day_of_month("b"), day_of_month("c")), + Row(8, 8, 8)) + + checkAnswer( + df.selectExpr("day_of_month(a)", "day_of_month(b)", "day_of_month(c)"), + Row(8, 8, 8)) + } + + test("day in year") { + val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c") + + checkAnswer( + df.select(day_in_year("a"), day_in_year("b"), day_in_year("c")), + Row(98, 98, 98)) + + checkAnswer( + df.selectExpr("day_in_year(a)", "day_in_year(b)", "day_in_year(c)"), + Row(98, 98, 98)) + } + + test("hour") { + val df = Seq((d, sdf.format(d), ts)).toDF("a", "b", "c") + + checkAnswer( + df.select(hour("a"), hour("b"), hour("c")), + Row(0, 13, 13)) + + checkAnswer( + df.selectExpr("hour(a)", "hour(b)", "hour(c)"), + Row(0, 13, 13)) + } + + test("minute") { + val df = Seq((d, sdf.format(d), ts)).toDF("a", "b", "c") + + checkAnswer( + df.select(minute("a"), minute("b"), minute("c")), + Row(0, 10, 10)) + + checkAnswer( + df.selectExpr("minute(a)", "minute(b)", "minute(c)"), + Row(0, 10, 10)) + } + + test("second") { + val df = Seq((d, sdf.format(d), ts)).toDF("a", "b", "c") + + checkAnswer( + df.select(second("a"), second("b"), second("c")), + Row(0, 15, 15)) + + checkAnswer( + df.selectExpr("second(a)", "second(b)", "second(c)"), + Row(0, 15, 15)) + } + + test("week of year") { + val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c") + + checkAnswer( + df.select(week_of_year("a"), week_of_year("b"), week_of_year("c")), + Row(15, 15, 15)) + + checkAnswer( + df.selectExpr("week_of_year(a)", "week_of_year(b)", "week_of_year(c)"), + Row(15, 15, 15)) + } + +} diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala index 299cc599ff..2689d904d6 100644 --- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala +++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala @@ -115,6 +115,13 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { // This test is totally fine except that it includes wrong queries and expects errors, but error // message format in Hive and Spark SQL differ. Should workaround this later. "udf_to_unix_timestamp", + // we can cast dates likes '2015-03-18' to a timestamp and extract the seconds. + // Hive returns null for second('2015-03-18') + "udf_second", + // we can cast dates likes '2015-03-18' to a timestamp and extract the minutes. + // Hive returns null for minute('2015-03-18') + "udf_minute", + // Cant run without local map/reduce. "index_auto_update", @@ -896,7 +903,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { "udf_lpad", "udf_ltrim", "udf_map", - "udf_minute", "udf_modulo", "udf_month", "udf_named_struct", @@ -923,7 +929,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { "udf_round_3", "udf_rpad", "udf_rtrim", - "udf_second", "udf_sign", "udf_sin", "udf_smallint", -- cgit v1.2.3