diff options
author | Yin Huai <yhuai@databricks.com> | 2015-11-10 11:06:29 -0800 |
---|---|---|
committer | Michael Armbrust <michael@databricks.com> | 2015-11-10 11:06:29 -0800 |
commit | e0701c75601c43f69ed27fc7c252321703db51f2 (patch) | |
tree | 52d85dfefce3da304fef585c895667f305cd8238 /python/pyspark/sql | |
parent | 6e5fc37883ed81c3ee2338145a48de3036d19399 (diff) | |
download | spark-e0701c75601c43f69ed27fc7c252321703db51f2.tar.gz spark-e0701c75601c43f69ed27fc7c252321703db51f2.tar.bz2 spark-e0701c75601c43f69ed27fc7c252321703db51f2.zip |
[SPARK-9830][SQL] Remove AggregateExpression1 and Aggregate Operator used to evaluate AggregateExpression1s
https://issues.apache.org/jira/browse/SPARK-9830
This PR contains the following main changes.
* Removing `AggregateExpression1`.
* Removing `Aggregate` operator, which is used to evaluate `AggregateExpression1`.
* Removing planner rule used to plan `Aggregate`.
* Linking `MultipleDistinctRewriter` to analyzer.
* Renaming `AggregateExpression2` to `AggregateExpression` and `AggregateFunction2` to `AggregateFunction`.
* Updating places where we create aggregate expression. The way to create aggregate expressions is `AggregateExpression(aggregateFunction, mode, isDistinct)`.
* Changing `val`s in `DeclarativeAggregate`s that touch children of this function to `lazy val`s (when we create aggregate expression in DataFrame API, children of an aggregate function can be unresolved).
Author: Yin Huai <yhuai@databricks.com>
Closes #9556 from yhuai/removeAgg1.
Diffstat (limited to 'python/pyspark/sql')
-rw-r--r-- | python/pyspark/sql/dataframe.py | 2 | ||||
-rw-r--r-- | python/pyspark/sql/functions.py | 2 | ||||
-rw-r--r-- | python/pyspark/sql/tests.py | 2 |
3 files changed, 3 insertions, 3 deletions
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index b97c94dad8..0dd75ba7ca 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -866,7 +866,7 @@ class DataFrame(object): This is a variant of :func:`select` that accepts SQL expressions. >>> df.selectExpr("age * 2", "abs(age)").collect() - [Row((age * 2)=4, 'abs(age)=2), Row((age * 2)=10, 'abs(age)=5)] + [Row((age * 2)=4, abs(age)=2), Row((age * 2)=10, abs(age)=5)] """ if len(expr) == 1 and isinstance(expr[0], list): expr = expr[0] diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 962f676d40..6e1cbde423 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -382,7 +382,7 @@ def expr(str): """Parses the expression string into the column that it represents >>> df.select(expr("length(name)")).collect() - [Row('length(name)=5), Row('length(name)=3)] + [Row(length(name)=5), Row(length(name)=3)] """ sc = SparkContext._active_spark_context return Column(sc._jvm.functions.expr(str)) diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index e224574bcb..9f5f7cfdf7 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -1017,7 +1017,7 @@ class SQLTests(ReusedPySparkTestCase): row = Row(a="length string", b=75) df = self.sqlCtx.createDataFrame([row]) result = df.select(functions.expr("length(a)")).collect()[0].asDict() - self.assertEqual(13, result["'length(a)"]) + self.assertEqual(13, result["length(a)"]) def test_replace(self): schema = StructType([ |