aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst/src
diff options
context:
space:
mode:
authorYin Huai <yhuai@databricks.com>2016-07-25 20:58:07 -0700
committerYin Huai <yhuai@databricks.com>2016-07-25 20:58:07 -0700
commit815f3eece5f095919a329af8cbd762b9ed71c7a8 (patch)
tree8078da6a29d07ca784c07d54813b2a0c6d256361 /sql/catalyst/src
parentf99e34e8e58c97ff30c6e054875533350d99fe5b (diff)
downloadspark-815f3eece5f095919a329af8cbd762b9ed71c7a8.tar.gz
spark-815f3eece5f095919a329af8cbd762b9ed71c7a8.tar.bz2
spark-815f3eece5f095919a329af8cbd762b9ed71c7a8.zip
[SPARK-16633][SPARK-16642][SPARK-16721][SQL] Fixes three issues related to lead and lag functions
## What changes were proposed in this pull request? This PR contains three changes. First, this PR changes the behavior of lead/lag back to Spark 1.6's behavior, which is described as below: 1. lead/lag respect null input values, which means that if the offset row exists and the input value is null, the result will be null instead of the default value. 2. If the offset row does not exist, the default value will be used. 3. OffsetWindowFunction's nullable setting also considers the nullability of its input (because of the first change). Second, this PR fixes the evaluation of lead/lag when the input expression is a literal. This fix is a result of the first change. In current master, if a literal is used as the input expression of a lead or lag function, the result will be this literal even if the offset row does not exist. Third, this PR makes ResolveWindowFrame not fire if a window function is not resolved. ## How was this patch tested? New tests in SQLWindowFunctionSuite Author: Yin Huai <yhuai@databricks.com> Closes #14284 from yhuai/lead-lag.
Diffstat (limited to 'sql/catalyst/src')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala3
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala45
2 files changed, 26 insertions, 22 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index d1d2c59cae..61162ccdba 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1787,7 +1787,8 @@ class Analyzer(
s @ WindowSpecDefinition(_, o, UnspecifiedFrame))
if wf.frame != UnspecifiedFrame =>
WindowExpression(wf, s.copy(frameSpecification = wf.frame))
- case we @ WindowExpression(e, s @ WindowSpecDefinition(_, o, UnspecifiedFrame)) =>
+ case we @ WindowExpression(e, s @ WindowSpecDefinition(_, o, UnspecifiedFrame))
+ if e.resolved =>
val frame = SpecifiedWindowFrame.defaultWindowFrame(o.nonEmpty, acceptWindowFrame = true)
we.copy(windowSpec = s.copy(frameSpecification = frame))
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
index e35192ca2d..6806591f68 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
@@ -321,8 +321,7 @@ abstract class OffsetWindowFunction
val input: Expression
/**
- * Default result value for the function when the input expression returns NULL. The default will
- * evaluated against the current row instead of the offset row.
+ * Default result value for the function when the 'offset'th row does not exist.
*/
val default: Expression
@@ -348,7 +347,7 @@ abstract class OffsetWindowFunction
*/
override def foldable: Boolean = false
- override def nullable: Boolean = default == null || default.nullable
+ override def nullable: Boolean = default == null || default.nullable || input.nullable
override lazy val frame = {
// This will be triggered by the Analyzer.
@@ -373,20 +372,22 @@ abstract class OffsetWindowFunction
}
/**
- * The Lead function returns the value of 'x' at 'offset' rows after the current row in the window.
- * Offsets start at 0, which is the current row. The offset must be constant integer value. The
- * default offset is 1. When the value of 'x' is null at the offset, or when the offset is larger
- * than the window, the default expression is evaluated.
- *
- * This documentation has been based upon similar documentation for the Hive and Presto projects.
+ * The Lead function returns the value of 'x' at the 'offset'th row after the current row in
+ * the window. Offsets start at 0, which is the current row. The offset must be constant
+ * integer value. The default offset is 1. When the value of 'x' is null at the 'offset'th row,
+ * null is returned. If there is no such offset row, the default expression is evaluated.
*
* @param input expression to evaluate 'offset' rows after the current row.
* @param offset rows to jump ahead in the partition.
- * @param default to use when the input value is null or when the offset is larger than the window.
+ * @param default to use when the offset is larger than the window. The default value is null.
*/
@ExpressionDescription(usage =
- """_FUNC_(input, offset, default) - LEAD returns the value of 'x' at 'offset' rows
- after the current row in the window""")
+ """_FUNC_(input, offset, default) - LEAD returns the value of 'x' at the 'offset'th row
+ after the current row in the window.
+ The default value of 'offset' is 1 and the default value of 'default' is null.
+ If the value of 'x' at the 'offset'th row is null, null is returned.
+ If there is no such offset row (e.g. when the offset is 1, the last row of the window
+ does not have any subsequent row), 'default' is returned.""")
case class Lead(input: Expression, offset: Expression, default: Expression)
extends OffsetWindowFunction {
@@ -400,20 +401,22 @@ case class Lead(input: Expression, offset: Expression, default: Expression)
}
/**
- * The Lag function returns the value of 'x' at 'offset' rows before the current row in the window.
- * Offsets start at 0, which is the current row. The offset must be constant integer value. The
- * default offset is 1. When the value of 'x' is null at the offset, or when the offset is smaller
- * than the window, the default expression is evaluated.
- *
- * This documentation has been based upon similar documentation for the Hive and Presto projects.
+ * The Lag function returns the value of 'x' at the 'offset'th row before the current row in
+ * the window. Offsets start at 0, which is the current row. The offset must be constant
+ * integer value. The default offset is 1. When the value of 'x' is null at the 'offset'th row,
+ * null is returned. If there is no such offset row, the default expression is evaluated.
*
* @param input expression to evaluate 'offset' rows before the current row.
* @param offset rows to jump back in the partition.
- * @param default to use when the input value is null or when the offset is smaller than the window.
+ * @param default to use when the offset row does not exist.
*/
@ExpressionDescription(usage =
- """_FUNC_(input, offset, default) - LAG returns the value of 'x' at 'offset' rows
- before the current row in the window""")
+ """_FUNC_(input, offset, default) - LAG returns the value of 'x' at the 'offset'th row
+ before the current row in the window.
+ The default value of 'offset' is 1 and the default value of 'default' is null.
+ If the value of 'x' at the 'offset'th row is null, null is returned.
+ If there is no such offset row (e.g. when the offset is 1, the first row of the window
+ does not have any previous row), 'default' is returned.""")
case class Lag(input: Expression, offset: Expression, default: Expression)
extends OffsetWindowFunction {