aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorWilliam Benton <willb@redhat.com>2014-07-23 16:25:32 -0700
committerMichael Armbrust <michael@databricks.com>2014-07-23 16:25:32 -0700
commite060d3ee2d910a5a802bb29630dca6f66cc0525d (patch)
tree766d3ece33ff5955d8b0ffafb824beee5b7f459e /sql
parent91903e0a50b0efb7217610021a628b3043004d82 (diff)
downloadspark-e060d3ee2d910a5a802bb29630dca6f66cc0525d.tar.gz
spark-e060d3ee2d910a5a802bb29630dca6f66cc0525d.tar.bz2
spark-e060d3ee2d910a5a802bb29630dca6f66cc0525d.zip
SPARK-2226: [SQL] transform HAVING clauses with aggregate expressions that aren't in the aggregation list
This change adds an analyzer rule to 1. find expressions in `HAVING` clause filters that depend on unresolved attributes, 2. push these expressions down to the underlying aggregates, and then 3. project them away above the filter. It also enables the `HAVING` queries in the Hive compatibility suite. Author: William Benton <willb@redhat.com> Closes #1497 from willb/spark-2226 and squashes the following commits: 92c9a93 [William Benton] Removed unnecessary import f1d4f34 [William Benton] Cleanups missed in prior commit 0e1624f [William Benton] Incorporated suggestions from @marmbrus; thanks! 541d4ee [William Benton] Cleanups from review 5a12647 [William Benton] Explanatory comments and stylistic cleanups. c7f2b2c [William Benton] Whitelist HAVING queries. 29a26e3 [William Benton] Added rule to handle unresolved attributes in HAVING clauses (SPARK-2226)
Diffstat (limited to 'sql')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala27
-rw-r--r--sql/hive/src/test/resources/golden/having-0-57f3f26c0203c29c2a91a7cca557ce550
-rw-r--r--sql/hive/src/test/resources/golden/having-1-ef81808faeab6d212c3cf32abfc0d87310
-rw-r--r--sql/hive/src/test/resources/golden/having-2-a2b4f52cb92f730ddb912b063636d6c10
-rw-r--r--sql/hive/src/test/resources/golden/having-3-3fa6387b6a4ece110ac340c7b893964e308
-rw-r--r--sql/hive/src/test/resources/golden/having-4-e9918bd385cb35db4ebcbd4e398547f40
-rw-r--r--sql/hive/src/test/resources/golden/having-5-4a0c4e521b8a6f6146151c13a2715ff199
-rw-r--r--sql/hive/src/test/resources/golden/having-6-9f50df5b5f31c7166b0396ab434dc0950
-rw-r--r--sql/hive/src/test/resources/golden/having-7-5ad96cb287df02080da1e2594f08d83e125
-rw-r--r--sql/hive/src/test/resources/golden/having-8-4aa7197e20b5a64461ca670a794881030
-rw-r--r--sql/hive/src/test/resources/golden/having-9-a79743372d86d77b0ff53a71adcb1cff199
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala2
12 files changed, 869 insertions, 1 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index c7188469bf..02bdb64f30 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -22,7 +22,6 @@ import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.catalyst.rules._
-
/**
* A trivial [[Analyzer]] with an [[EmptyCatalog]] and [[EmptyFunctionRegistry]]. Used for testing
* when all relations are already filled in and the analyser needs only to resolve attribute
@@ -54,6 +53,7 @@ class Analyzer(catalog: Catalog, registry: FunctionRegistry, caseSensitive: Bool
StarExpansion ::
ResolveFunctions ::
GlobalAggregates ::
+ UnresolvedHavingClauseAttributes ::
typeCoercionRules :_*),
Batch("Check Analysis", Once,
CheckResolution),
@@ -152,6 +152,31 @@ class Analyzer(catalog: Catalog, registry: FunctionRegistry, caseSensitive: Bool
}
/**
+ * This rule finds expressions in HAVING clause filters that depend on
+ * unresolved attributes. It pushes these expressions down to the underlying
+ * aggregates and then projects them away above the filter.
+ */
+ object UnresolvedHavingClauseAttributes extends Rule[LogicalPlan] {
+ def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
+ case filter @ Filter(havingCondition, aggregate @ Aggregate(_, originalAggExprs, _))
+ if !filter.resolved && aggregate.resolved && containsAggregate(havingCondition) => {
+ val evaluatedCondition = Alias(havingCondition, "havingCondition")()
+ val aggExprsWithHaving = evaluatedCondition +: originalAggExprs
+
+ Project(aggregate.output,
+ Filter(evaluatedCondition.toAttribute,
+ aggregate.copy(aggregateExpressions = aggExprsWithHaving)))
+ }
+
+ }
+
+ protected def containsAggregate(condition: Expression): Boolean =
+ condition
+ .collect { case ae: AggregateExpression => ae }
+ .nonEmpty
+ }
+
+ /**
* When a SELECT clause has only a single expression and that expression is a
* [[catalyst.expressions.Generator Generator]] we convert the
* [[catalyst.plans.logical.Project Project]] to a [[catalyst.plans.logical.Generate Generate]].
diff --git a/sql/hive/src/test/resources/golden/having-0-57f3f26c0203c29c2a91a7cca557ce55 b/sql/hive/src/test/resources/golden/having-0-57f3f26c0203c29c2a91a7cca557ce55
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/having-0-57f3f26c0203c29c2a91a7cca557ce55
diff --git a/sql/hive/src/test/resources/golden/having-1-ef81808faeab6d212c3cf32abfc0d873 b/sql/hive/src/test/resources/golden/having-1-ef81808faeab6d212c3cf32abfc0d873
new file mode 100644
index 0000000000..704f1e62f1
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/having-1-ef81808faeab6d212c3cf32abfc0d873
@@ -0,0 +1,10 @@
+4
+4
+5
+4
+5
+5
+4
+4
+5
+4
diff --git a/sql/hive/src/test/resources/golden/having-2-a2b4f52cb92f730ddb912b063636d6c1 b/sql/hive/src/test/resources/golden/having-2-a2b4f52cb92f730ddb912b063636d6c1
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/having-2-a2b4f52cb92f730ddb912b063636d6c1
diff --git a/sql/hive/src/test/resources/golden/having-3-3fa6387b6a4ece110ac340c7b893964e b/sql/hive/src/test/resources/golden/having-3-3fa6387b6a4ece110ac340c7b893964e
new file mode 100644
index 0000000000..b56757a60f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/having-3-3fa6387b6a4ece110ac340c7b893964e
@@ -0,0 +1,308 @@
+0 val_0
+2 val_2
+4 val_4
+5 val_5
+8 val_8
+9 val_9
+10 val_10
+11 val_11
+12 val_12
+15 val_15
+17 val_17
+18 val_18
+19 val_19
+20 val_20
+24 val_24
+26 val_26
+27 val_27
+28 val_28
+30 val_30
+33 val_33
+34 val_34
+35 val_35
+37 val_37
+41 val_41
+42 val_42
+43 val_43
+44 val_44
+47 val_47
+51 val_51
+53 val_53
+54 val_54
+57 val_57
+58 val_58
+64 val_64
+65 val_65
+66 val_66
+67 val_67
+69 val_69
+70 val_70
+72 val_72
+74 val_74
+76 val_76
+77 val_77
+78 val_78
+80 val_80
+82 val_82
+83 val_83
+84 val_84
+85 val_85
+86 val_86
+87 val_87
+90 val_90
+92 val_92
+95 val_95
+96 val_96
+97 val_97
+98 val_98
+100 val_100
+103 val_103
+104 val_104
+105 val_105
+111 val_111
+113 val_113
+114 val_114
+116 val_116
+118 val_118
+119 val_119
+120 val_120
+125 val_125
+126 val_126
+128 val_128
+129 val_129
+131 val_131
+133 val_133
+134 val_134
+136 val_136
+137 val_137
+138 val_138
+143 val_143
+145 val_145
+146 val_146
+149 val_149
+150 val_150
+152 val_152
+153 val_153
+155 val_155
+156 val_156
+157 val_157
+158 val_158
+160 val_160
+162 val_162
+163 val_163
+164 val_164
+165 val_165
+166 val_166
+167 val_167
+168 val_168
+169 val_169
+170 val_170
+172 val_172
+174 val_174
+175 val_175
+176 val_176
+177 val_177
+178 val_178
+179 val_179
+180 val_180
+181 val_181
+183 val_183
+186 val_186
+187 val_187
+189 val_189
+190 val_190
+191 val_191
+192 val_192
+193 val_193
+194 val_194
+195 val_195
+196 val_196
+197 val_197
+199 val_199
+200 val_200
+201 val_201
+202 val_202
+203 val_203
+205 val_205
+207 val_207
+208 val_208
+209 val_209
+213 val_213
+214 val_214
+216 val_216
+217 val_217
+218 val_218
+219 val_219
+221 val_221
+222 val_222
+223 val_223
+224 val_224
+226 val_226
+228 val_228
+229 val_229
+230 val_230
+233 val_233
+235 val_235
+237 val_237
+238 val_238
+239 val_239
+241 val_241
+242 val_242
+244 val_244
+247 val_247
+248 val_248
+249 val_249
+252 val_252
+255 val_255
+256 val_256
+257 val_257
+258 val_258
+260 val_260
+262 val_262
+263 val_263
+265 val_265
+266 val_266
+272 val_272
+273 val_273
+274 val_274
+275 val_275
+277 val_277
+278 val_278
+280 val_280
+281 val_281
+282 val_282
+283 val_283
+284 val_284
+285 val_285
+286 val_286
+287 val_287
+288 val_288
+289 val_289
+291 val_291
+292 val_292
+296 val_296
+298 val_298
+305 val_305
+306 val_306
+307 val_307
+308 val_308
+309 val_309
+310 val_310
+311 val_311
+315 val_315
+316 val_316
+317 val_317
+318 val_318
+321 val_321
+322 val_322
+323 val_323
+325 val_325
+327 val_327
+331 val_331
+332 val_332
+333 val_333
+335 val_335
+336 val_336
+338 val_338
+339 val_339
+341 val_341
+342 val_342
+344 val_344
+345 val_345
+348 val_348
+351 val_351
+353 val_353
+356 val_356
+360 val_360
+362 val_362
+364 val_364
+365 val_365
+366 val_366
+367 val_367
+368 val_368
+369 val_369
+373 val_373
+374 val_374
+375 val_375
+377 val_377
+378 val_378
+379 val_379
+382 val_382
+384 val_384
+386 val_386
+389 val_389
+392 val_392
+393 val_393
+394 val_394
+395 val_395
+396 val_396
+397 val_397
+399 val_399
+400 val_400
+401 val_401
+402 val_402
+403 val_403
+404 val_404
+406 val_406
+407 val_407
+409 val_409
+411 val_411
+413 val_413
+414 val_414
+417 val_417
+418 val_418
+419 val_419
+421 val_421
+424 val_424
+427 val_427
+429 val_429
+430 val_430
+431 val_431
+432 val_432
+435 val_435
+436 val_436
+437 val_437
+438 val_438
+439 val_439
+443 val_443
+444 val_444
+446 val_446
+448 val_448
+449 val_449
+452 val_452
+453 val_453
+454 val_454
+455 val_455
+457 val_457
+458 val_458
+459 val_459
+460 val_460
+462 val_462
+463 val_463
+466 val_466
+467 val_467
+468 val_468
+469 val_469
+470 val_470
+472 val_472
+475 val_475
+477 val_477
+478 val_478
+479 val_479
+480 val_480
+481 val_481
+482 val_482
+483 val_483
+484 val_484
+485 val_485
+487 val_487
+489 val_489
+490 val_490
+491 val_491
+492 val_492
+493 val_493
+494 val_494
+495 val_495
+496 val_496
+497 val_497
+498 val_498
diff --git a/sql/hive/src/test/resources/golden/having-4-e9918bd385cb35db4ebcbd4e398547f4 b/sql/hive/src/test/resources/golden/having-4-e9918bd385cb35db4ebcbd4e398547f4
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/having-4-e9918bd385cb35db4ebcbd4e398547f4
diff --git a/sql/hive/src/test/resources/golden/having-5-4a0c4e521b8a6f6146151c13a2715ff b/sql/hive/src/test/resources/golden/having-5-4a0c4e521b8a6f6146151c13a2715ff
new file mode 100644
index 0000000000..2d7022e386
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/having-5-4a0c4e521b8a6f6146151c13a2715ff
@@ -0,0 +1,199 @@
+4
+5
+8
+9
+26
+27
+28
+30
+33
+34
+35
+37
+41
+42
+43
+44
+47
+51
+53
+54
+57
+58
+64
+65
+66
+67
+69
+70
+72
+74
+76
+77
+78
+80
+82
+83
+84
+85
+86
+87
+90
+92
+95
+96
+97
+98
+256
+257
+258
+260
+262
+263
+265
+266
+272
+273
+274
+275
+277
+278
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+291
+292
+296
+298
+302
+305
+306
+307
+308
+309
+310
+311
+315
+316
+317
+318
+321
+322
+323
+325
+327
+331
+332
+333
+335
+336
+338
+339
+341
+342
+344
+345
+348
+351
+353
+356
+360
+362
+364
+365
+366
+367
+368
+369
+373
+374
+375
+377
+378
+379
+382
+384
+386
+389
+392
+393
+394
+395
+396
+397
+399
+400
+401
+402
+403
+404
+406
+407
+409
+411
+413
+414
+417
+418
+419
+421
+424
+427
+429
+430
+431
+432
+435
+436
+437
+438
+439
+443
+444
+446
+448
+449
+452
+453
+454
+455
+457
+458
+459
+460
+462
+463
+466
+467
+468
+469
+470
+472
+475
+477
+478
+479
+480
+481
+482
+483
+484
+485
+487
+489
+490
+491
+492
+493
+494
+495
+496
+497
+498
diff --git a/sql/hive/src/test/resources/golden/having-6-9f50df5b5f31c7166b0396ab434dc095 b/sql/hive/src/test/resources/golden/having-6-9f50df5b5f31c7166b0396ab434dc095
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/having-6-9f50df5b5f31c7166b0396ab434dc095
diff --git a/sql/hive/src/test/resources/golden/having-7-5ad96cb287df02080da1e2594f08d83e b/sql/hive/src/test/resources/golden/having-7-5ad96cb287df02080da1e2594f08d83e
new file mode 100644
index 0000000000..bd545ccf74
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/having-7-5ad96cb287df02080da1e2594f08d83e
@@ -0,0 +1,125 @@
+302
+305
+306
+307
+308
+309
+310
+311
+315
+316
+317
+318
+321
+322
+323
+325
+327
+331
+332
+333
+335
+336
+338
+339
+341
+342
+344
+345
+348
+351
+353
+356
+360
+362
+364
+365
+366
+367
+368
+369
+373
+374
+375
+377
+378
+379
+382
+384
+386
+389
+392
+393
+394
+395
+396
+397
+399
+400
+401
+402
+403
+404
+406
+407
+409
+411
+413
+414
+417
+418
+419
+421
+424
+427
+429
+430
+431
+432
+435
+436
+437
+438
+439
+443
+444
+446
+448
+449
+452
+453
+454
+455
+457
+458
+459
+460
+462
+463
+466
+467
+468
+469
+470
+472
+475
+477
+478
+479
+480
+481
+482
+483
+484
+485
+487
+489
+490
+491
+492
+493
+494
+495
+496
+497
+498
diff --git a/sql/hive/src/test/resources/golden/having-8-4aa7197e20b5a64461ca670a79488103 b/sql/hive/src/test/resources/golden/having-8-4aa7197e20b5a64461ca670a79488103
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/having-8-4aa7197e20b5a64461ca670a79488103
diff --git a/sql/hive/src/test/resources/golden/having-9-a79743372d86d77b0ff53a71adcb1cff b/sql/hive/src/test/resources/golden/having-9-a79743372d86d77b0ff53a71adcb1cff
new file mode 100644
index 0000000000..d77586c12b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/having-9-a79743372d86d77b0ff53a71adcb1cff
@@ -0,0 +1,199 @@
+4 val_4
+5 val_5
+8 val_8
+9 val_9
+26 val_26
+27 val_27
+28 val_28
+30 val_30
+33 val_33
+34 val_34
+35 val_35
+37 val_37
+41 val_41
+42 val_42
+43 val_43
+44 val_44
+47 val_47
+51 val_51
+53 val_53
+54 val_54
+57 val_57
+58 val_58
+64 val_64
+65 val_65
+66 val_66
+67 val_67
+69 val_69
+70 val_70
+72 val_72
+74 val_74
+76 val_76
+77 val_77
+78 val_78
+80 val_80
+82 val_82
+83 val_83
+84 val_84
+85 val_85
+86 val_86
+87 val_87
+90 val_90
+92 val_92
+95 val_95
+96 val_96
+97 val_97
+98 val_98
+256 val_256
+257 val_257
+258 val_258
+260 val_260
+262 val_262
+263 val_263
+265 val_265
+266 val_266
+272 val_272
+273 val_273
+274 val_274
+275 val_275
+277 val_277
+278 val_278
+280 val_280
+281 val_281
+282 val_282
+283 val_283
+284 val_284
+285 val_285
+286 val_286
+287 val_287
+288 val_288
+289 val_289
+291 val_291
+292 val_292
+296 val_296
+298 val_298
+302 val_302
+305 val_305
+306 val_306
+307 val_307
+308 val_308
+309 val_309
+310 val_310
+311 val_311
+315 val_315
+316 val_316
+317 val_317
+318 val_318
+321 val_321
+322 val_322
+323 val_323
+325 val_325
+327 val_327
+331 val_331
+332 val_332
+333 val_333
+335 val_335
+336 val_336
+338 val_338
+339 val_339
+341 val_341
+342 val_342
+344 val_344
+345 val_345
+348 val_348
+351 val_351
+353 val_353
+356 val_356
+360 val_360
+362 val_362
+364 val_364
+365 val_365
+366 val_366
+367 val_367
+368 val_368
+369 val_369
+373 val_373
+374 val_374
+375 val_375
+377 val_377
+378 val_378
+379 val_379
+382 val_382
+384 val_384
+386 val_386
+389 val_389
+392 val_392
+393 val_393
+394 val_394
+395 val_395
+396 val_396
+397 val_397
+399 val_399
+400 val_400
+401 val_401
+402 val_402
+403 val_403
+404 val_404
+406 val_406
+407 val_407
+409 val_409
+411 val_411
+413 val_413
+414 val_414
+417 val_417
+418 val_418
+419 val_419
+421 val_421
+424 val_424
+427 val_427
+429 val_429
+430 val_430
+431 val_431
+432 val_432
+435 val_435
+436 val_436
+437 val_437
+438 val_438
+439 val_439
+443 val_443
+444 val_444
+446 val_446
+448 val_448
+449 val_449
+452 val_452
+453 val_453
+454 val_454
+455 val_455
+457 val_457
+458 val_458
+459 val_459
+460 val_460
+462 val_462
+463 val_463
+466 val_466
+467 val_467
+468 val_468
+469 val_469
+470 val_470
+472 val_472
+475 val_475
+477 val_477
+478 val_478
+479 val_479
+480 val_480
+481 val_481
+482 val_482
+483 val_483
+484 val_484
+485 val_485
+487 val_487
+489 val_489
+490 val_490
+491 val_491
+492 val_492
+493 val_493
+494 val_494
+495 val_495
+496 val_496
+497 val_497
+498 val_498
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index bd036faaa6..8b451973a4 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -391,6 +391,8 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"groupby_sort_8",
"groupby_sort_9",
"groupby_sort_test_1",
+ "having",
+ "having1",
"implicit_cast1",
"innerjoin",
"inoutdriver",