diff options
author | Reynold Xin <rxin@databricks.com> | 2015-07-19 01:17:22 -0700 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2015-07-19 01:17:22 -0700 |
commit | 3427937ea2a4ed19142bd3d66707864879417d61 (patch) | |
tree | 3713bf77e9894e73c2b436f9f79ccf29eabb9713 | |
parent | a53d13f7aa5d44c706e5510f57399a32c7558b80 (diff) | |
download | spark-3427937ea2a4ed19142bd3d66707864879417d61.tar.gz spark-3427937ea2a4ed19142bd3d66707864879417d61.tar.bz2 spark-3427937ea2a4ed19142bd3d66707864879417d61.zip |
[SQL] Make date/time functions more consistent with other database systems.
This pull request fixes some of the problems in #6981.
- Added date functions to `__all__` so they get exposed
- Rename day_of_month -> dayofmonth
- Rename day_in_year -> dayofyear
- Rename week_of_year -> weekofyear
- Removed "day" from Scala/Python API since it is ambiguous. Only leaving the alias in SQL.
Author: Reynold Xin <rxin@databricks.com>
This patch had conflicts when merged, resolved by
Committer: Reynold Xin <rxin@databricks.com>
Closes #7506 from rxin/datetime and squashes the following commits:
0cb24d9 [Reynold Xin] Export all functions in Python.
e44a4a0 [Reynold Xin] Removed day function from Scala and Python.
9c08fdc [Reynold Xin] [SQL] Make date/time functions more consistent with other database systems.
-rw-r--r-- | python/pyspark/sql/functions.py | 35 | ||||
-rw-r--r-- | sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala | 8 | ||||
-rw-r--r-- | sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeFunctions.scala | 13 | ||||
-rw-r--r-- | sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala | 4 | ||||
-rw-r--r-- | sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala (renamed from sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateFunctionsSuite.scala) | 26 | ||||
-rw-r--r-- | sql/core/src/main/scala/org/apache/spark/sql/functions.scala | 338 | ||||
-rw-r--r-- | sql/core/src/test/scala/org/apache/spark/sql/DataFrameDateSuite.scala | 56 | ||||
-rw-r--r-- | sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala (renamed from sql/core/src/test/scala/org/apache/spark/sql/DateExpressionsSuite.scala) | 61 |
8 files changed, 239 insertions, 302 deletions
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 0aca378892..fd5a3ba8ad 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -55,6 +55,11 @@ __all__ = [ __all__ += ['lag', 'lead', 'ntile'] +__all__ += [ + 'date_format', + 'year', 'quarter', 'month', 'hour', 'minute', 'second', + 'dayofmonth', 'dayofyear', 'weekofyear'] + def _create_function(name, doc=""): """ Create a function for aggregator by name""" @@ -713,41 +718,29 @@ def month(col): @since(1.5) -def day(col): - """ - Extract the day of the month of a given date as integer. - - >>> sqlContext.createDataFrame([('2015-04-08',)], ['a']).select(day('a').alias('day')).collect() - [Row(day=8)] - """ - sc = SparkContext._active_spark_context - return Column(sc._jvm.functions.day(col)) - - -@since(1.5) -def day_of_month(col): +def dayofmonth(col): """ Extract the day of the month of a given date as integer. >>> df = sqlContext.createDataFrame([('2015-04-08',)], ['a']) - >>> df.select(day_of_month('a').alias('day')).collect() + >>> df.select(dayofmonth('a').alias('day')).collect() [Row(day=8)] """ sc = SparkContext._active_spark_context - return Column(sc._jvm.functions.day_of_month(col)) + return Column(sc._jvm.functions.dayofmonth(col)) @since(1.5) -def day_in_year(col): +def dayofyear(col): """ Extract the day of the year of a given date as integer. >>> df = sqlContext.createDataFrame([('2015-04-08',)], ['a']) - >>> df.select(day_in_year('a').alias('day')).collect() + >>> df.select(dayofyear('a').alias('day')).collect() [Row(day=98)] """ sc = SparkContext._active_spark_context - return Column(sc._jvm.functions.day_in_year(col)) + return Column(sc._jvm.functions.dayofyear(col)) @since(1.5) @@ -790,16 +783,16 @@ def second(col): @since(1.5) -def week_of_year(col): +def weekofyear(col): """ Extract the week number of a given date as integer. >>> df = sqlContext.createDataFrame([('2015-04-08',)], ['a']) - >>> df.select(week_of_year('a').alias('week')).collect() + >>> df.select(weekofyear('a').alias('week')).collect() [Row(week=15)] """ sc = SparkContext._active_spark_context - return Column(sc._jvm.functions.week_of_year(col)) + return Column(sc._jvm.functions.weekofyear(col)) class UserDefinedFunction(object): diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 159f7eca7a..4b256adcc6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -183,15 +183,15 @@ object FunctionRegistry { expression[CurrentDate]("current_date"), expression[CurrentTimestamp]("current_timestamp"), expression[DateFormatClass]("date_format"), - expression[Day]("day"), - expression[DayInYear]("day_in_year"), - expression[Day]("day_of_month"), + expression[DayOfMonth]("day"), + expression[DayOfYear]("dayofyear"), + expression[DayOfMonth]("dayofmonth"), expression[Hour]("hour"), expression[Month]("month"), expression[Minute]("minute"), expression[Quarter]("quarter"), expression[Second]("second"), - expression[WeekOfYear]("week_of_year"), + expression[WeekOfYear]("weekofyear"), expression[Year]("year") ) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeFunctions.scala index f9cbbb8c6b..8024455092 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeFunctions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeFunctions.scala @@ -116,14 +116,12 @@ case class Second(child: Expression) extends UnaryExpression with ImplicitCastIn } } -case class DayInYear(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { +case class DayOfYear(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { override def inputTypes: Seq[AbstractDataType] = Seq(DateType) override def dataType: DataType = IntegerType - override def prettyName: String = "day_in_year" - override protected def nullSafeEval(date: Any): Any = { DateTimeUtils.getDayInYear(date.asInstanceOf[Int]) } @@ -149,7 +147,7 @@ case class Year(child: Expression) extends UnaryExpression with ImplicitCastInpu override protected def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") - defineCodeGen(ctx, ev, (c) => + defineCodeGen(ctx, ev, c => s"""$dtu.getYear($c)""" ) } @@ -191,7 +189,7 @@ case class Month(child: Expression) extends UnaryExpression with ImplicitCastInp } } -case class Day(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { +case class DayOfMonth(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { override def inputTypes: Seq[AbstractDataType] = Seq(DateType) @@ -215,8 +213,6 @@ case class WeekOfYear(child: Expression) extends UnaryExpression with ImplicitCa override def dataType: DataType = IntegerType - override def prettyName: String = "week_of_year" - override protected def nullSafeEval(date: Any): Any = { val c = Calendar.getInstance(TimeZone.getTimeZone("UTC")) c.setFirstDayOfWeek(Calendar.MONDAY) @@ -225,7 +221,7 @@ case class WeekOfYear(child: Expression) extends UnaryExpression with ImplicitCa c.get(Calendar.WEEK_OF_YEAR) } - override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = + override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { nullSafeCodeGen(ctx, ev, (time) => { val cal = classOf[Calendar].getName val c = ctx.freshName("cal") @@ -237,6 +233,7 @@ case class WeekOfYear(child: Expression) extends UnaryExpression with ImplicitCa ${ev.primitive} = $c.get($cal.WEEK_OF_YEAR); """ }) + } } case class DateFormatClass(left: Expression, right: Expression) extends BinaryExpression diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index a0da73a995..07412e73b6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -31,14 +31,14 @@ import org.apache.spark.unsafe.types.UTF8String * precision. */ object DateTimeUtils { - final val MILLIS_PER_DAY = SECONDS_PER_DAY * 1000L - // see http://stackoverflow.com/questions/466321/convert-unix-timestamp-to-julian final val JULIAN_DAY_OF_EPOCH = 2440587 // and .5 final val SECONDS_PER_DAY = 60 * 60 * 24L final val MICROS_PER_SECOND = 1000L * 1000L final val NANOS_PER_SECOND = MICROS_PER_SECOND * 1000L + final val MILLIS_PER_DAY = SECONDS_PER_DAY * 1000L + // number of days in 400 years final val daysIn400Years: Int = 146097 // number of days between 1.1.1970 and 1.1.2001 diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index a0991ec998..f01589c58e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateFunctionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -19,19 +19,19 @@ package org.apache.spark.sql.catalyst.expressions import java.sql.{Timestamp, Date} import java.text.SimpleDateFormat -import java.util.{TimeZone, Calendar} +import java.util.Calendar import org.apache.spark.SparkFunSuite import org.apache.spark.sql.types.{StringType, TimestampType, DateType} -class DateFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper { +class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") val sdfDate = new SimpleDateFormat("yyyy-MM-dd") val d = new Date(sdf.parse("2015-04-08 13:10:15").getTime) val ts = new Timestamp(sdf.parse("2013-11-08 13:10:15").getTime) - test("Day in Year") { + test("DayOfYear") { val sdfDay = new SimpleDateFormat("D") (2002 to 2004).foreach { y => (0 to 11).foreach { m => @@ -39,7 +39,7 @@ class DateFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper { val c = Calendar.getInstance() c.set(y, m, 28, 0, 0, 0) c.add(Calendar.DATE, i) - checkEvaluation(DayInYear(Cast(Literal(new Date(c.getTimeInMillis)), DateType)), + checkEvaluation(DayOfYear(Cast(Literal(new Date(c.getTimeInMillis)), DateType)), sdfDay.format(c.getTime).toInt) } } @@ -51,7 +51,7 @@ class DateFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper { val c = Calendar.getInstance() c.set(y, m, 28, 0, 0, 0) c.add(Calendar.DATE, i) - checkEvaluation(DayInYear(Cast(Literal(new Date(c.getTimeInMillis)), DateType)), + checkEvaluation(DayOfYear(Cast(Literal(new Date(c.getTimeInMillis)), DateType)), sdfDay.format(c.getTime).toInt) } } @@ -63,7 +63,7 @@ class DateFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper { val c = Calendar.getInstance() c.set(y, m, 28, 0, 0, 0) c.add(Calendar.DATE, i) - checkEvaluation(DayInYear(Cast(Literal(new Date(c.getTimeInMillis)), DateType)), + checkEvaluation(DayOfYear(Cast(Literal(new Date(c.getTimeInMillis)), DateType)), sdfDay.format(c.getTime).toInt) } } @@ -163,19 +163,19 @@ class DateFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper { } } - test("Day") { - checkEvaluation(Day(Cast(Literal("2000-02-29"), DateType)), 29) - checkEvaluation(Day(Literal.create(null, DateType)), null) - checkEvaluation(Day(Cast(Literal(d), DateType)), 8) - checkEvaluation(Day(Cast(Literal(sdfDate.format(d)), DateType)), 8) - checkEvaluation(Day(Cast(Literal(ts), DateType)), 8) + test("Day / DayOfMonth") { + checkEvaluation(DayOfMonth(Cast(Literal("2000-02-29"), DateType)), 29) + checkEvaluation(DayOfMonth(Literal.create(null, DateType)), null) + checkEvaluation(DayOfMonth(Cast(Literal(d), DateType)), 8) + checkEvaluation(DayOfMonth(Cast(Literal(sdfDate.format(d)), DateType)), 8) + checkEvaluation(DayOfMonth(Cast(Literal(ts), DateType)), 8) (1999 to 2000).foreach { y => val c = Calendar.getInstance() c.set(y, 0, 1, 0, 0, 0) (0 to 365).foreach { d => c.add(Calendar.DATE, 1) - checkEvaluation(Day(Cast(Literal(new Date(c.getTimeInMillis)), DateType)), + checkEvaluation(DayOfMonth(Cast(Literal(new Date(c.getTimeInMillis)), DateType)), c.get(Calendar.DAY_OF_MONTH)) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index cadb25d597..f67c89437b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -1748,182 +1748,6 @@ object functions { */ def length(columnName: String): Column = length(Column(columnName)) - ////////////////////////////////////////////////////////////////////////////////////////////// - // DateTime functions - ////////////////////////////////////////////////////////////////////////////////////////////// - - /** - * Converts a date/timestamp/string to a value of string in the format specified by the date - * format given by the second argument. - * - * A pattern could be for instance `dd.MM.yyyy` and could return a string like '18.03.1993'. All - * pattern letters of [[java.text.SimpleDateFormat]] can be used. - * - * NOTE: Use when ever possible specialized functions like [[year]]. These benefit from a - * specialized implementation. - * - * @group datetime_funcs - * @since 1.5.0 - */ - def date_format(dateExpr: Column, format: String): Column = - DateFormatClass(dateExpr.expr, Literal(format)) - - /** - * Converts a date/timestamp/string to a value of string in the format specified by the date - * format given by the second argument. - * - * A pattern could be for instance `dd.MM.yyyy` and could return a string like '18.03.1993'. All - * pattern letters of [[java.text.SimpleDateFormat]] can be used. - * - * NOTE: Use when ever possible specialized functions like [[year]]. These benefit from a - * specialized implementation. - * - * @group datetime_funcs - * @since 1.5.0 - */ - def date_format(dateColumnName: String, format: String): Column = - date_format(Column(dateColumnName), format) - - /** - * Extracts the year as an integer from a given date/timestamp/string. - * @group datetime_funcs - * @since 1.5.0 - */ - def year(e: Column): Column = Year(e.expr) - - /** - * Extracts the year as an integer from a given date/timestamp/string. - * @group datetime_funcs - * @since 1.5.0 - */ - def year(columnName: String): Column = year(Column(columnName)) - - /** - * Extracts the quarter as an integer from a given date/timestamp/string. - * @group datetime_funcs - * @since 1.5.0 - */ - def quarter(e: Column): Column = Quarter(e.expr) - - /** - * Extracts the quarter as an integer from a given date/timestamp/string. - * @group datetime_funcs - * @since 1.5.0 - */ - def quarter(columnName: String): Column = quarter(Column(columnName)) - - /** - * Extracts the month as an integer from a given date/timestamp/string. - * @group datetime_funcs - * @since 1.5.0 - */ - def month(e: Column): Column = Month(e.expr) - - /** - * Extracts the month as an integer from a given date/timestamp/string. - * @group datetime_funcs - * @since 1.5.0 - */ - def month(columnName: String): Column = month(Column(columnName)) - - /** - * Extracts the day of the month as an integer from a given date/timestamp/string. - * @group datetime_funcs - * @since 1.5.0 - */ - def day(e: Column): Column = Day(e.expr) - - /** - * Extracts the day of the month as an integer from a given date/timestamp/string. - * @group datetime_funcs - * @since 1.5.0 - */ - def day(columnName: String): Column = day(Column(columnName)) - - /** - * Extracts the day of the month as an integer from a given date/timestamp/string. - * @group datetime_funcs - * @since 1.5.0 - */ - def day_of_month(e: Column): Column = Day(e.expr) - - /** - * Extracts the day of the month as an integer from a given date/timestamp/string. - * @group datetime_funcs - * @since 1.5.0 - */ - def day_of_month(columnName: String): Column = day_of_month(Column(columnName)) - - /** - * Extracts the day of the year as an integer from a given date/timestamp/string. - * @group datetime_funcs - * @since 1.5.0 - */ - def day_in_year(e: Column): Column = DayInYear(e.expr) - - /** - * Extracts the day of the year as an integer from a given date/timestamp/string. - * @group datetime_funcs - * @since 1.5.0 - */ - def day_in_year(columnName: String): Column = day_in_year(Column(columnName)) - - /** - * Extracts the hours as an integer from a given date/timestamp/string. - * @group datetime_funcs - * @since 1.5.0 - */ - def hour(e: Column): Column = Hour(e.expr) - - /** - * Extracts the hours as an integer from a given date/timestamp/string. - * @group datetime_funcs - * @since 1.5.0 - */ - def hour(columnName: String): Column = hour(Column(columnName)) - - /** - * Extracts the minutes as an integer from a given date/timestamp/string. - * @group datetime_funcs - * @since 1.5.0 - */ - def minute(e: Column): Column = Minute(e.expr) - - /** - * Extracts the minutes as an integer from a given date/timestamp/string. - * @group datetime_funcs - * @since 1.5.0 - */ - def minute(columnName: String): Column = minute(Column(columnName)) - - /** - * Extracts the seconds as an integer from a given date/timestamp/string. - * @group datetime_funcs - * @since 1.5.0 - */ - def second(e: Column): Column = Second(e.expr) - - /** - * Extracts the seconds as an integer from a given date/timestamp/string. - * @group datetime_funcs - * @since 1.5.0 - */ - def second(columnName: String): Column = second(Column(columnName)) - - /** - * Extracts the week number as an integer from a given date/timestamp/string. - * @group datetime_funcs - * @since 1.5.0 - */ - def week_of_year(e: Column): Column = WeekOfYear(e.expr) - - /** - * Extracts the week number as an integer from a given date/timestamp/string. - * @group datetime_funcs - * @since 1.5.0 - */ - def week_of_year(columnName: String): Column = week_of_year(Column(columnName)) - /** * Formats the number X to a format like '#,###,###.##', rounded to d decimal places, * and returns the result as a string. @@ -2410,6 +2234,168 @@ object functions { } ////////////////////////////////////////////////////////////////////////////////////////////// + // DateTime functions + ////////////////////////////////////////////////////////////////////////////////////////////// + + /** + * Converts a date/timestamp/string to a value of string in the format specified by the date + * format given by the second argument. + * + * A pattern could be for instance `dd.MM.yyyy` and could return a string like '18.03.1993'. All + * pattern letters of [[java.text.SimpleDateFormat]] can be used. + * + * NOTE: Use when ever possible specialized functions like [[year]]. These benefit from a + * specialized implementation. + * + * @group datetime_funcs + * @since 1.5.0 + */ + def date_format(dateExpr: Column, format: String): Column = + DateFormatClass(dateExpr.expr, Literal(format)) + + /** + * Converts a date/timestamp/string to a value of string in the format specified by the date + * format given by the second argument. + * + * A pattern could be for instance `dd.MM.yyyy` and could return a string like '18.03.1993'. All + * pattern letters of [[java.text.SimpleDateFormat]] can be used. + * + * NOTE: Use when ever possible specialized functions like [[year]]. These benefit from a + * specialized implementation. + * + * @group datetime_funcs + * @since 1.5.0 + */ + def date_format(dateColumnName: String, format: String): Column = + date_format(Column(dateColumnName), format) + + /** + * Extracts the year as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def year(e: Column): Column = Year(e.expr) + + /** + * Extracts the year as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def year(columnName: String): Column = year(Column(columnName)) + + /** + * Extracts the quarter as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def quarter(e: Column): Column = Quarter(e.expr) + + /** + * Extracts the quarter as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def quarter(columnName: String): Column = quarter(Column(columnName)) + + /** + * Extracts the month as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def month(e: Column): Column = Month(e.expr) + + /** + * Extracts the month as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def month(columnName: String): Column = month(Column(columnName)) + + /** + * Extracts the day of the month as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def dayofmonth(e: Column): Column = DayOfMonth(e.expr) + + /** + * Extracts the day of the month as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def dayofmonth(columnName: String): Column = dayofmonth(Column(columnName)) + + /** + * Extracts the day of the year as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def dayofyear(e: Column): Column = DayOfYear(e.expr) + + /** + * Extracts the day of the year as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def dayofyear(columnName: String): Column = dayofyear(Column(columnName)) + + /** + * Extracts the hours as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def hour(e: Column): Column = Hour(e.expr) + + /** + * Extracts the hours as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def hour(columnName: String): Column = hour(Column(columnName)) + + /** + * Extracts the minutes as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def minute(e: Column): Column = Minute(e.expr) + + /** + * Extracts the minutes as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def minute(columnName: String): Column = minute(Column(columnName)) + + /** + * Extracts the seconds as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def second(e: Column): Column = Second(e.expr) + + /** + * Extracts the seconds as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def second(columnName: String): Column = second(Column(columnName)) + + /** + * Extracts the week number as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def weekofyear(e: Column): Column = WeekOfYear(e.expr) + + /** + * Extracts the week number as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def weekofyear(columnName: String): Column = weekofyear(Column(columnName)) + + ////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////// // scalastyle:off diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameDateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameDateSuite.scala deleted file mode 100644 index a4719a38de..0000000000 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameDateSuite.scala +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql - -import java.sql.{Date, Timestamp} - -class DataFrameDateTimeSuite extends QueryTest { - - private lazy val ctx = org.apache.spark.sql.test.TestSQLContext - import ctx.implicits._ - - test("timestamp comparison with date strings") { - val df = Seq( - (1, Timestamp.valueOf("2015-01-01 00:00:00")), - (2, Timestamp.valueOf("2014-01-01 00:00:00"))).toDF("i", "t") - - checkAnswer( - df.select("t").filter($"t" <= "2014-06-01"), - Row(Timestamp.valueOf("2014-01-01 00:00:00")) :: Nil) - - - checkAnswer( - df.select("t").filter($"t" >= "2014-06-01"), - Row(Timestamp.valueOf("2015-01-01 00:00:00")) :: Nil) - } - - test("date comparison with date strings") { - val df = Seq( - (1, Date.valueOf("2015-01-01")), - (2, Date.valueOf("2014-01-01"))).toDF("i", "t") - - checkAnswer( - df.select("t").filter($"t" <= "2014-06-01"), - Row(Date.valueOf("2014-01-01")) :: Nil) - - - checkAnswer( - df.select("t").filter($"t" >= "2015"), - Row(Date.valueOf("2015-01-01")) :: Nil) - } -} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DateExpressionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala index d24e3ee1dd..9e80ae8692 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DateExpressionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala @@ -22,7 +22,7 @@ import java.text.SimpleDateFormat import org.apache.spark.sql.functions._ -class DateExpressionsSuite extends QueryTest { +class DateFunctionsSuite extends QueryTest { private lazy val ctx = org.apache.spark.sql.test.TestSQLContext import ctx.implicits._ @@ -32,6 +32,35 @@ class DateExpressionsSuite extends QueryTest { val d = new Date(sdf.parse("2015-04-08 13:10:15").getTime) val ts = new Timestamp(sdf.parse("2013-04-08 13:10:15").getTime) + test("timestamp comparison with date strings") { + val df = Seq( + (1, Timestamp.valueOf("2015-01-01 00:00:00")), + (2, Timestamp.valueOf("2014-01-01 00:00:00"))).toDF("i", "t") + + checkAnswer( + df.select("t").filter($"t" <= "2014-06-01"), + Row(Timestamp.valueOf("2014-01-01 00:00:00")) :: Nil) + + + checkAnswer( + df.select("t").filter($"t" >= "2014-06-01"), + Row(Timestamp.valueOf("2015-01-01 00:00:00")) :: Nil) + } + + test("date comparison with date strings") { + val df = Seq( + (1, Date.valueOf("2015-01-01")), + (2, Date.valueOf("2014-01-01"))).toDF("i", "t") + + checkAnswer( + df.select("t").filter($"t" <= "2014-06-01"), + Row(Date.valueOf("2014-01-01")) :: Nil) + + + checkAnswer( + df.select("t").filter($"t" >= "2015"), + Row(Date.valueOf("2015-01-01")) :: Nil) + } test("date format") { val df = Seq((d, sdf.format(d), ts)).toDF("a", "b", "c") @@ -83,39 +112,27 @@ class DateExpressionsSuite extends QueryTest { Row(4, 4, 4)) } - test("day") { - val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c") - - checkAnswer( - df.select(day("a"), day("b"), day("c")), - Row(8, 8, 8)) - - checkAnswer( - df.selectExpr("day(a)", "day(b)", "day(c)"), - Row(8, 8, 8)) - } - - test("day of month") { + test("dayofmonth") { val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c") checkAnswer( - df.select(day_of_month("a"), day_of_month("b"), day_of_month("c")), + df.select(dayofmonth("a"), dayofmonth("b"), dayofmonth("c")), Row(8, 8, 8)) checkAnswer( - df.selectExpr("day_of_month(a)", "day_of_month(b)", "day_of_month(c)"), + df.selectExpr("day(a)", "day(b)", "dayofmonth(c)"), Row(8, 8, 8)) } - test("day in year") { + test("dayofyear") { val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c") checkAnswer( - df.select(day_in_year("a"), day_in_year("b"), day_in_year("c")), + df.select(dayofyear("a"), dayofyear("b"), dayofyear("c")), Row(98, 98, 98)) checkAnswer( - df.selectExpr("day_in_year(a)", "day_in_year(b)", "day_in_year(c)"), + df.selectExpr("dayofyear(a)", "dayofyear(b)", "dayofyear(c)"), Row(98, 98, 98)) } @@ -155,15 +172,15 @@ class DateExpressionsSuite extends QueryTest { Row(0, 15, 15)) } - test("week of year") { + test("weekofyear") { val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c") checkAnswer( - df.select(week_of_year("a"), week_of_year("b"), week_of_year("c")), + df.select(weekofyear("a"), weekofyear("b"), weekofyear("c")), Row(15, 15, 15)) checkAnswer( - df.selectExpr("week_of_year(a)", "week_of_year(b)", "week_of_year(c)"), + df.selectExpr("weekofyear(a)", "weekofyear(b)", "weekofyear(c)"), Row(15, 15, 15)) } |