1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
|
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>Scala Compiler Phase and Plug-In Initialization</title>
<meta name="sip" content="00002"/>
<meta name="author" content="Anders Bach Nielsen"/>
<meta name="version" content="0"/>
<meta name="type" content="standards"/>
<meta name="status" content="submission"/>
<meta name="created" content="2008-08-25"/>
<meta name="updated" content="2008-10-09"/>
<meta name="scala-version" content="2.8.0.final-"/>
<meta name="owner-contact" content="mailto:andersbach.nielsen@epfl.ch"/>
<meta name="discussion" content="http://www.nabble.com/SIP-00002%3A-Compiler-Phase-Initialization-and-Plugins-td19677473.html"/>
<!-- <link rel="auxiliary" href="[URI]" type="[mime type]"/> -->
<!-- <link rel="replaces" href="[URI]" type="application/xhtml+xml"/> -->
<!-- <link rel="depends-on" href="[URI]" type="application/xhtml+xml"/> -->
<!-- Stylesheet -->
<link rel="stylesheet" href="http://lampsvn.epfl.ch/svn-repos/scala/sip/trunk/sip.css" type="text/css"/>
</head>
<body>
<a name="top"></a>
<h1>Scala Compiler Phase and Plug-In Initialization</h1>
<h2>Abstract</h2>
<p>This document describes the design and implementation of a more
uniform way of handling internal compiler phases and external user
supplied phases via plug-ins in the Scala compiler.</p>
<h2>Contents</h2>
<ul>
<li><a href="#motivation">Motivation</a>
<ul>
<li><a href="#goalsandreq">Goals</a></li>
<li><a href="#usecases">Use Cases</a></li>
<li><a href="#detailedusecases">Detailed Use Cases</a>
<ul>
<li><a href="#usecase1">Assembling compiler internal phases for the JVM target ...</a></li>
<li><a href="#usecase2">Assembling compiler internal phases for the MSIL target ...</a></li>
<li><a href="#usecase3">Adding one plug-in supplied phase to the JVM target ...</a></li>
<li><a href="#usecase4">Adding two plug-in supplied phase to the JVM target ...</a></li>
<li><a href="#usecase5">Adding two plug-in supplied phases, where one has a ...</a></li>
<li><a href="#usecase6">Adding plug-in supplied phases with dependencies to ...</a></li>
<li><a href="#usecase7">Two separate phases that should be handled as a single ...</a></li>
<li><a href="#usecase8">A cyclic dependency among phases is a fatal error.</a></li>
<li><a href="#usecase9">Special handling of the <code>parser</code> and <code>terminal</code> phases.</a></li>
<li><a href="#usecase10">Larger compiler example with three plug-in supplied phases.</a></li>
</ul>
</li>
<li><a href="#futuregoals">Future Goals That Extend Beyond This Proposal</a></li>
</ul>
</li>
<li><a href="#description">Description</a>
<ul>
<li><a href="#presentdesign">The Present Design</a></li>
<li><a href="#proposeddesign">The Proposed Design</a>
<ul>
<li><a href="#propdesign1">Internal Phases</a></li>
<li><a href="#propdesign2">External Phases</a></li>
<li><a href="#propdesign3">Phases Set</a></li>
<li><a href="#propdesign4">Phase Assembly</a></li>
<li><a href="#propdesign5">Constraint System</a></li>
<li><a href="#constsolver">Constraint Solving Algorithm</a></li>
</ul>
</li>
<li><a href="#namingscheme">Naming Scheme for Internal and External Phases</a></li>
<li><a href="#implications">Implications of This Proposal</a></li>
</ul>
</li>
<li><a href="#implementation">Implementation</a></li>
</ul>
<a name="motivation"></a>
<h2>Motivation</h2>
<p>This section will cover the motivating goals of creating this
proposal and some use cases emphasizing these goals. There will also
be a section on goals that extend beyond this proposal and will
therefore not be covered by this proposal.</p>
<a name="goalsandreq"></a>
<h3>Goals</h3>
<ul>
<li>Handle inclusion and constraints for internal compiler phases
and external user supplied phases via plug-ins in a uniform
way.</li>
<li>Phase ordering is based on a simple, but sufficient powerful
and solvable constraint system.</li>
<li>Phase ordering of internal compiler phases and external user
supplied phases should retain as much compatibility with the
current scheme as possible.</li>
<li>Phase ordering is fully determined by the set of input phases.
It does not matter, for example, what order the compiler processes
the individual plug-ins that supply the external phases.</li>
<li>Ordering of the internal compiler phases should always succeed
in a valid compiler phase chain. Adding two plug-ins, which by
themselves give a valid compiler phase chain, should also give a
valid compiler phase chain.</li>
</ul>
<a name="usecases"></a> <h3>Use Cases</h3>
<ol>
<li><a href="#usecase1">Assembling compiler internal phases for
the JVM target and no plug-ins are loaded.</a></li>
<li><a href="#usecase2">Assembling compiler internal phases for
the MSIL target and no plug-ins are loaded.</a></li>
<li><a href="#usecase3">Adding one plug-in supplied phase to the
JVM target compiler after type checking.</a></li>
<li><a href="#usecase4">Adding two plug-in supplied phase to the
JVM target compiler after type checking.</a></li>
<li><a href="#usecase5">Adding two plug-in supplied phases, where
one has a dependency on the other, to the JVM target
compiler.</a></li>
<li><a href="#usecase6">Adding plug-in supplied phases with
dependencies to both plug-in supplied and internal phases to the
JVM target compiler.</a></li>
<li><a href="#usecase7">Two separate phases that should be handled
as a single phase.</a></li>
<li><a href="#usecase8">A cyclic dependency among phases is a
fatal error.</a></li>
<li><a href="#usecase9">Special handling of the
<code>parser</code> and <code>terminal</code> phases.</a></li>
<li><a href="#usecase10">Larger compiler example with three
plug-in supplied phases.</a></li>
</ol>
<a name="detailedusecases"></a>
<h3>Detailed Use Cases</h3>
<a name="usecase1"></a>
<h4>[1] Assembling compiler internal phases for the JVM target and
no plug-ins are loaded.</h4>
<p>All phases have to declare a <code>runsAfter</code> list of phase
names that should be run before the phase itself in the final
compiler phase chain. In this use case there are no plug-in supplied
phases, that have to be included. Depending on the command line
options and the JVM target the for this target default phases are
added to the phases set. This is a set, because a phase may only
occur once in the compiler phase chain.</p>
<p>Following these basic <code>runsAfter</code> constraints the
compiler phase chain is build from the phases set. In the example
below a simple set of phase constraints can be converted into a
graph, that in turn can be converted into a list of phases.
</p>
<div style="text-align: center;">
<img src="sip-00002-1.png" alt="" border="0" width="100%" height="" />
</div>
<p>Using the <code>runsAfter</code> list as the basic ordering
constraints there are some more properties that have to be
full filled.
</p>
<ul>
<li>The phases <code>liftcode</code> and <code>flatten</code> are
only included for the JVM target. They have <code>runsAfter</code>
dependencies on the <code>refchecks</code> phase and the
<code>constructors</code> phase, respectively. Because they are
only included in the JVM target the <code>uncurry</code> phase and
the <code>mixin</code> phase, respectively, need to have multiple
dependencies as seen in the example below.
<p>This example shows the desired transformation steps of a small
dependency graph taken from the actual compiler phase graph. When
processing <code>refchecks</code> we notice that it has two
dependencies to it. One dependency comes from
<code>liftcode</code> and the other from <code>uncurry</code>. We
now look if any of these phases have more then one outgoing
dependency. The <code>liftcode</code> phase has one outgoing
dependency, but the <code>uncurry</code> phase has two, so one of
the outgoing dependencies of <code>uncurry</code> has to go to
<code>refchecks</code> and the other dependency has to go to some
place we have not visited yet. So by this reasoning, we can safely
remove the dependency from <code>uncurry</code> to
<code>refchecks</code> (marked red).</p>
<div style="text-align: center;">
<img src="sip-00002-21.png" alt="" border="0" width="85%" height="" />
</div>
<p></p>
</li>
<li>Some internal phase pairs have very strong dependencies, like
the phases <code>explicitouter</code> and <code>erasure</code> and
also the <code>namer</code> and <code>typer</code> phases. See <a
href="#usecase7">use case 7</a> for more information on this
problem.</li>
</ul>
<a name="usecase2"></a>
<h4>[2] Assembling compiler internal phases for the MSIL target and
no plug-ins are loaded.</h4>
<p>Like in <a href="usecase1">use case 1</a> depending on command
line options and that the target being MSIL, the proper phases are
added to the phases set. Again there are no plug-in supplied phases
to be included. There are however some differences to use case 1.
</p>
<ul>
<li>The phases <code>liftcode</code> and <code>flatten</code> are
not included for the MSIL target. This makes the
<code>runsAfter</code> list of the phases <code>uncurry</code> and
<code>mixer</code> special by containing dependencies to phases
that are found in the phases set.
<p>For instance the <code>uncurry</code> phase has to specify that
it wants to run after both <code>refchecks</code> and
<code>liftcode</code>. <i>(For the JVM target this will force the
<code>liftcode</code> phase before <code>uncurry</code>.)</i> For
the MSIL target the phase <code>liftcode</code> is not present as
shown in the example diagram below. One solution that will work is
to silently drop all dependencies to phases, where there was no
phase object in the phases set.</p>
<div style="text-align: center;">
<img src="sip-00002-22.png" alt="" border="0" width="55%" height="" />
</div>
</li>
</ul>
<a name="usecase3"></a>
<h4>[3] Adding one plug-in supplied phase to the JVM target
compiler after type checking.</h4>
<p>Like in <a href="#usecase1">use case 1</a> the internal phases
are added to the phases set. In this use case a plug-in is loaded
that will supply one phase to the phases set with a constraint that
it wants to run after the <code>refchecks</code> phase. The original
plug-in handling code will still handle loading plug-ins, disabling
plug-ins, but will no longer have the responsibility of assembling
the compiler chain. The plug-in is loaded and the phase is added
to the phases set together with the internal phases.</p>
<p>When assembling the compiler phase chain in this use case, there
will be two phases directly after the <code>refchecks</code> phase
and the dependencies can not be removed as described in <a
href="#usecasee1">use case 1</a>. This situation is shown in the
example below.</p>
<div style="text-align: center;">
<img src="sip-00002-2.png" alt="" border="0" width="100%" height="" />
</div>
<p>One of the goals of this proposal is to be as compatible with the
current behavior of the compiler as possible. So if the situation is
as shown above that after including A, we can either take B or R, we
need a deterministic scheme for always constructing the same phase
chain. This is done by taking the plug-in supplied phases first and
then the internal phase after wards.
</p>
<a name="usecase4"></a>
<h4>[4] Adding two plug-in supplied phase to the JVM target
compiler after type checking.</h4>
<p>This use case is very similar to <a href="#usecase3">use case
3</a>, however instead of having one internal and one external phase
after the <code>refchecks</code> phase, we have one internal and two
external phases after the <code>refchecks</code> phase. This
situation is shown in the example below.</p>
<div style="text-align: center;">
<img src="sip-00002-23.png" alt="" border="0" width="100%" height="" />
</div>
<p>To be able to deterministically create the compiler phase chain
we need a rule to handle multiple external phases at the same
level. The simplest rule it to include the external phases
alphabetically order, as shown in the example above.</p>
<a name="usecase5"></a>
<h4>[5] Adding two plug-in supplied phases, where one has a
dependency on the other, to the JVM target compiler.</h4>
<p>This use case is very similar to <a href="#usecase4">use case
4</a>, however instead of the two plug-in supplied phases having a
dependency on the same internal, one of the plug-in supplied phases
has a dependency on the other and the other has a dependency on an
internal phase. This is the example shown in the diagram below.</p>
<div style="text-align: center;">
<img src="sip-00002-3.png" alt="" border="0" width="100%" height="" />
</div>
<p>As described in <a href="#usecase3">use case 3</a>, after
inclusion of phase A the next phase to include is phase T. In the
example above phase R is dependent on phase T, so the next to
include is phase R. This means that including phase T, means to
include T and the ordered sub tree, under phase T.</p>
<p>The example shown below, can be solved applying the same logic as
described above.</p>
<div style="text-align: center;">
<img src="sip-00002-4.png" alt="" border="0" width="100%" height="" />
</div>
<a name="usecase6"></a>
<h4>[6] Adding plug-in supplied phases with dependencies to both
plug-in supplied and internal phases to the JVM target
compiler.</h4>
<p>This use case builds upon <a href="#usecase1">use case 1</a>, <a
href="#usecase3">use case 3</a>, <a href="#usecase4">use case 4</a>
and <a href="#usecase5">use case 5</a> and shows how to handle
plug-in supplied phases with dependencies on other plug-in supplied
and internal phases.</p>
<p>In the example shown below the plug-in supplied phase T has
dependencies on the plug-in supplied phase P and the internal phase
B. The transformation shown below is made by applying the logic from
<a href="#usecase1">use case 1</a> to this example and using the
assumption that the dependencies of the internal phases will not
contain cycles and internal phases are visited last on each
level.</p>
<div style="text-align: center;">
<img src="sip-00002-5.png" alt="" border="0" width="100%" height="" />
</div>
<a name="usecase7"></a>
<h4>[7] Two separate phases that should be handled as a single
phase.</h4>
<p>In <a href="#usecase1">use case 1</a> we specified the assembly
of the compiler phase chain based on runs after
constraints. However, the compiler contains some phase pairs that
have very strong dependencies on each other or it simply does not
make sense to put a phase in between. In this use case we will focus
on the situation that we have two individual phase objects, but one
needs to run right after the other and thereby act as one large
phase.</p>
<p>The solution to this is to introduce a new runs right after
constraint to all phases. To keep the compatibility with current
plug-ins as high as possible, this runs right after constraint is
only mandatory for internal phases. In the example below the
<code>runsRightAfter</code> constraint is marked as a blue arrow and
behaves as a normal runs after constraint.
</p>
<div style="text-align: center;">
<img src="sip-00002-7.png" alt="" border="0" width="100%" height="" />
</div>
<p>In the example shown below, we can see the first four phases of
the Scala compiler. There is a <code>runsRightAfter</code>
constraint between <code>typer</code> and <code>namer</code>. There
is in this example a plug-in supplied phase (plugin1) that want to
run after the <code>namer</code> phase. The semantics of the run
after constraint is that is should run somewhere after a specified
phase, but as soon as possible. With this in mind it is perfectly
valid to change the dependency of <code>plugin1</code> from
<code>namer</code> to <code>typer</code>. </p>
<div style="text-align: center;">
<img src="sip-00002-8.png" alt="" border="0" width="100%" height="" />
</div>
<p>If the phase <code>plugin1</code> had declared to
<code>runRightAfter</code> the <code>namer</code> phase, this would
result in a unsolvable graph and would therefore generate an error,
because only one phase can come right after another phase.
</p>
<a name="usecase8"></a>
<h4>[8] A cyclic dependency among phases is a fatal error.</h4>
<p>Running the compiler without loading plug-ins, it is guaranteed
that there are do cyclic dependencies among the internal phases, so
the only way to introduce cycles in the phase graph is by loading
one or more plug-ins. If a cycle is detected in the phase graph the
compiler should refuse to start and give a proper error message.</p>
<p>In the example shown below a cycle is detected and marked in
pink. A cycle like this would generate a cyclic dependency error and
outputting the names of the involved phases.</p>
<div style="text-align: center;">
<img src="sip-00002-20.png" alt="" border="0" width="35%" />
</div>
<a name="usecase9"></a>
<h4>[9] Special handling of the <code>parser</code> and
<code>terminal</code> phases.</h4>
<p>There are two very special phase objects in the phase graph. The
first phase is the <code>parser</code> phase. This phase will be the
head of the compiler phase chain, there are however no other
restrictions on this phase.</p>
<p>The other special phase is the <code>terminal</code> phase, which
has to be the last phase in the compiler phase chain. For this
reason it is not allowed to run after or run right after the
<code>terminal</code> phase. Any phase that specifies a runs after
or runs right after dependency on the <code>terminal</code> phase
will have that dependency dropped silently.</p>
<a name="usecase10"></a>
<h4>[10] Larger compiler example with three plug-in supplied phases.</h4>
<p>This example is implemented in the <a
href="#implementation">reference implementation</a> of the algorithm
below. In this example the compiler consists of nine internal phases
and two plug-ins are loaded adding a total of three phases to the
compiler.
</p>
<table width="100%" border="1">
<tr>
<td><strong>Phase name</strong></td>
<td><strong>Runs after</strong></td>
<td><strong>Runs right after</strong></td>
</tr>
<tr>
<td colspan="3" align="center"><strong><i>Internal phases</i></strong></td>
</tr>
<tr>
<td>nsc::parser</td>
<td></td>
<td></td>
</tr>
<tr>
<td>nsc::typer</td>
<td>nsc::parser</td>
<td></td>
</tr>
<tr>
<td>nsc::pickler</td>
<td>nsc::typer</td>
<td></td>
</tr>
<tr>
<td>nsc::liftcode</td>
<td>nsc::pickler</td>
<td></td>
</tr>
<tr>
<td>nsc::tailcalls</td>
<td>nsc::pickler<br />nsc::liftcode</td>
<td></td>
</tr>
<tr>
<td>nsc::erasure</td>
<td></td>
<td>nsc::tailcalls</td>
</tr>
<tr>
<td>nsc::cleanup</td>
<td>nsc::erasure</td>
<td></td>
</tr>
<tr>
<td>nsc::jvm</td>
<td>nsc::cleanup</td>
<td></td>
</tr>
<tr>
<td>nsc::terminal</td>
<td>nsc::jvm<br />nsc::msil</td>
<td></td>
</tr>
<tr>
<td colspan="3" align="center"><strong><i>Plug-in supplied phases</i></strong></td>
</tr>
<tr>
<td>plug1::optimization1</td>
<td>nsc::typer</td>
<td></td>
</tr>
<tr>
<td>plug2::optimization1</td>
<td>nsc::liftcode</td>
<td></td>
</tr>
<tr>
<td>plug2::optimization2</td>
<td>plug2::optimization1<br />nsc::cleanup</td>
<td></td>
</tr>
</table>
<p>Using the phases in the table above as the basis for the
dependency graph, the following graph will be constructed. This
graph contains all the information that is also present in the
phases set. As seen in the diagram below there are nodes without
phase objects (marked with dotted lines) and nodes with multiple
dependencies. There could also be cycles at this stage, but there
are non in this example.
</p>
<div style="text-align: center;">
<img src="sip-00002-6.png" alt="" border="0" width="60%" height="" />
</div>
<p>The transformation of the dependency graph into a dependency tree
is the basis of this proposal. The resulting dependency tree is
shown below. This tree is then the basis for generating the compiler
phase list.
</p>
<div style="text-align: center;">
<img src="sip-00002-24.png" alt="" border="0" width="60%" height="" />
</div>
<p>The resulting compiler phase list of traversing the tree above is
shown below. It is ensured that the <code>parser</code> phase is the
first and that the <code>terminal</code> phase is the last in this list.</p>
<ul>
<li>nsc::parser</li>
<li>nsc::typer</li>
<li>plug1::optimization1</li>
<li>nsc::pickler</li>
<li>nsc::liftcode</li>
<li>plug2::optimization1</li>
<li>nsc::tailcalls</li>
<li>nsc::erasure</li>
<li>nsc::cleanup</li>
<li>plug2::optimization2</li>
<li>nsc::jvm</li>
<li>nsc::terminal</li>
</ul>
<a name="futuregoals"></a>
<h3>Future Goals That Extend Beyond This Proposal</h3>
<ul>
<li>Create and add new backends to the compiler that will replace
the JVM or MSIL specific phases using plug-ins.</li>
<li>Creating internal tools by subclassing the class
<code>Global</code>.</li>
<li>The constraint system should not contain support for the
following constraints.
<ul>
<li>A phase should be able to declare that it will replace a
given phase and thereby ignoring other constraints declared by
this phase and taking over the constraints from the replaced
phase.</li>
<li>A phase should be able to declare that if included it will
remove a given number of phases and removing constraints to
these phases.</li>
<li>A phase should be able to declare that it only wants to be
included if a given phase is also present or else it will be
silently dropped.</li>
</ul>
</li>
</ul>
<a href="#top">To the top</a>
<a name="description"></a>
<h2>Description</h2>
<p>This section will cover a short description of the present design
and the full description of the proposed design changes, including
the constraint system and algorithm for solving the constraint
system.
</p>
<a name="presentdesign"></a>
<h3>The Present Design</h3>
<p>Currently phases in the compiler are ordered statically by a hard
coded list structure in <code>class Global</code> (found in
Global.scala), where the method <code>builtInPhaseDescriptors</code>
returns the list of phase objects. The resulting list from invoking
this method is then extended with the phases from the plug-in
supplied by the user. This is done in the
<code>computePhaseDescriptors</code> method in <code>class
Plugins</code> (found in plugins/Plugins.scala). This method is
then again called from the method <code>phaseDescriptors</code> in
<code>class Global</code>. So all together the process of
calculating the phases of the compiler is spread over several files
and quite complicated.</p>
<p>A plug-in can supply a number of phases to the compiler and the
architecture of the plug-in subsystem already supply some of the
functionality that will be added to all internal phases. All phases
are a subclass of <code>class SubComponent</code> and all phases
supplied by plug-ins are subclasses of <code>class
PluginComponent</code> (which is a subclass of <code>class
SubComponent</code>).</p>
<a name="proposeddesign"></a>
<h3>The Proposed Design</h3>
<p>This section will cover the design of the proposed system for
handling inclusion of internal and external phases uniformly. It
will also cover a description of the constraint system and a
algorithm for solving these constraint for generating the compiler
phase chain. This design will focus on satisfying the use cases <a
href="#usecases">described earlier</a>.
</p>
<p>From the use cases above it is possible to identify two kinds of
constraints. The first constraint is the <i>runs after</i>
dependency and the second constraint is the <i>runs right after</i>
dependency. It is necessary to declare that a given phase has to
<i>runs after</i> a list of phase names, however the <i>runs right
after</i> constraint only needs to declare the name one phase.
</p>
<ul>
<li>Lets define the <i>runs after</i> constraint using the
<code>runsAfter</code> variable with the Scala type
<code>List[String]</code>. Using this constraint, phase B would
declare <code>runsAfter=List(A)</code>, meaning that phase B would
like to run somewhere after phase A, but as early as possible.</li>
<li>Lets define the <i>runs right after</i> constraint using the
<code>runsRightAfter</code> variable with the Scala type
<code>Option[String]</code>. Using this constraint, phase B would
declare <code>runsRightAfter=Some(A)</code>, meaning that phase B
wants to run right after phase A and no other phases are allowed to
come between phase A and B. From a larger perspective meaning that
phase A and B can be regarded as on large phase with one input and
one output.</li>
</ul>
<p>A goal of this work is to handle the inclusion and constraint
resolution of internal and plug-in supplied phases in a uniform way,
this also means that the responsibility of phase assembly should not
be handled by the plug-in subsystem any more, but by a separate
instance. For this separate instance we create a new
<code>PhaseAssembly</code> trait that is mixed into the
<code>Global</code> class in the same way as <code>Plug-Ins</code>
is mixed in.
</p>
<p>In the diagram below a schematic overview of the proposed system
is shown. A description of the individual components and the
modification needed are given below.
</p>
<div style="text-align: center;">
<img src="sip-00002-9.png" alt="" border="0" width="55%" />
</div>
<a name="propdesign1"></a>
<h4>1. Internal Phases</h4>
<p>All internal phase objects are declared in class
<code>Global</code> and will be extended to include the
<code>runsAfter</code> and the <code>runsRightAfter</code> variables
with the types described above. This will ensure that the internal
phases will always succeed in creating a compiler phase chain
depending on the compiler target.
</p>
<p>The phase objects that are needed for a given target will be
added to the phases set. See <a href="#propdesign3">3. Phases
Set</a> for more information.
</p>
<a name="propdesign2"></a>
<h4>2. External Phases</h4>
<p>All external phases are supplied through plug-ins. All these
phases inherit from class <code>PluginComponent</code>. Defining the
<code>runsRightAfter=None</code> in class
<code>PluginComponent</code> will make the changes to the existing
plug-ins minimal, but makes it possible for external phases to use
the feature by simple overriding the variable if needed.
</p>
<p>It is still the responsibility of the plug-in subsystem to load
the actual plug-ins from the jar files and also the processing of
all the <code>-Xplugin*</code> compiler options available at the
command line. The phase assembly code currently present in the
plug-in subsystem will be replaced by code that adds all plug-in
supplied phases to the phases set. See <a
href="#propdesign3">3. Phases Set</a> for more information.
</p>
<a name="propdesign3"></a>
<h4>3. Phases Set</h4>
<p>The phases set contains all the phase objects that should be
taken into consideration when performing compiler phase chain
assembly. This phases set would located in the class
<code>Global</code>. A proper Scala declaration of such a phases set
is shown below.
</p>
<pre>protected val phasesSet : HashSet[SubComponent] = new HashSet[SubComponent]()</pre>
<p>Ensuring that all phase objects are unique with respect to the
phase names, the <code>hashCode</code> method in class
<code>SubComponent</code> will be overridden to return the hash code
of the phase name and not the object itself.
</p>
<a name="propdesign4"></a>
<h4>4. Phase Assembly</h4>
<p>From a compiler design perspective this component will include
the data structures and implementation of the constraint solving
algorithm that are needed to implement this proposal. It would be
integrated as a <code>trait PhaseAssembly</code> that is mixed into
<code>class Global</code> with this self type <code>self: Global
=></code>.
</p>
<p>A very high level description of the actions performed by this
component can be split into three distinct parts. A detailed
algorithm that implements this high level description can be found
below in the <a href="#constsolver">section on the constraint
solving algorithm</a>.
</p>
<ol>
<li><strong>Phases Set to Dependency Graph</strong><br />
<p>From the phase objects in the phases set a dependency graph is
created, where the edges in this graph are constructed from the
<code>runsAfter</code> and <code>runsRightAfter</code>
constraints. Each node in the dependency graph has both a phase
name and a phase object, because all constraints in the phase
objects are only given by name.
</p>
</li>
<li><strong>Dependency Graph to Dependency Tree</strong><br />
<p>To be able to create the compiler phase chain the dependency
graph has to be converted into a dependency tree. The dependency
graph contains nodes that have a phase name, but no phase
object. These nodes have to be removed. Also all edges in the
graph that are generated from <code>runsRightAfter</code>
constraints have to be handled special to enforce this constraint
and the graph is checked for cycles. If a cycle is found, the
algorithm will terminal with a fatal error, preventing the
compiler from starting. If no cycles are found in the dependency
graph is simplified with respect to what will become the root in
the dependency tree, by removing unneeded dependency edges. This
will transform the dependency graph into a dependency tree.
</p>
</li>
<li><strong>Dependency Tree to Compiler Phase List</strong><br />
<p>The compiler phase list is constructed by traversing the
dependency tree from the root. The traversal algorithm will be
described below.
</p>
</li>
</ol>
<a name="propdesign5"></a>
<h4>5. Constraint System</h4>
<p>The constraint system is implemented as a graph structure where
phases are modeled as nodes and the constraints are modeled as
directed edges. This section will describe the basic entities of the
constraint system and how they are modeled in the dependency graph.</p>
<table>
<tr>
<td colspan="2">
<p>Each node in the dependency graph contains several items of
information about the phase object and the dependencies to and
from it.</p>
</td>
</tr>
<tr>
<td>
<ul>
<li><strong>phaseobj:</strong> <i>(type:
SubComponent)</i><br /> A reference to the actual phase
object, if there is any. If there is a phase object then
there is also a phase name, but the constraints are only
given by name, so some nodes may have a phase name and no
phase object.
</li>
<li><strong>phasename:</strong> <i>(type: String)</i><br />
The name of the phase object. This is always set.
</li>
<li><strong>after:</strong> <i>(type: HashSet[Edge]())</i>
<br />
This is the set of <code>Edge</code> object that point to
<code>Node</code> objects that have to come before this
<code>Node</code>. So all <code>runsAfter</code> and
<code>runsRightAfter</code> constraints given by a phase
object are present in this <code>after</code> list.
</li>
<li><strong>deps:</strong> <i>(type: HashSet[Edge]())</i>
<br />
This is the set of <code>Edge</code> object that points to
this <code>Node</code> object. So its the opposite of the
<code>after</code> list.
</li>
</ul>
</td>
<td>
<div style="text-align: center;">
<img src="sip-00002-15.png" alt="" border="0" width="235" height="186" />
</div>
</td>
</tr>
<tr>
<td colspan="2">
<p>Each edge in the dependency graph is directed, so it has
references to its to and from <code>Node</code> objects and
knows if it is created from a <code>runsAfter</code>
constraint (soft) or created from a
<code>runsRightAfter</code> constraint (hard). </p>
</td>
</tr>
<tr>
<td>
<ul>
<li><strong>to:</strong> <i>(type: Node)</i>
<br />
This is a reference to the <code>Node</code> object this
<code>Edge</code> object is pointing to. This means that the
<code>frm</code> node has a dependency on the
<code>to</code> node.
</li>
<li><strong>frm:</strong> <i>(type: Node)</i>
<br />
This is a reference to the <code>Node</code> object this
<code>Edge</code> object is pointing from.
</li>
<li><strong>hard:</strong> <i>(type: Boolean)</i>
<br />
In the dependency graph we have soft and hard
dependencies. The soft dependencies are created from the
<code>runsAfter</code> list of constraints. The hard
dependencies are created from the
<code>runsRightAfter</code> constraint, if any.
</li>
</ul>
</td>
<td>
<div style="text-align: center;">
<img src="sip-00002-16.png" alt="" border="0" width="152" height="186" />
</div>
</td>
</tr>
</table>
<p>Below is the interface implementation of the dependency graph in
Scala. There are classes implementing the edges and nodes and the
container for storing the edges and nodes and aux. method for easy
access to the stored information.
</p>
<pre>
class Edge(f: Node, t: Node, h: Boolean) {
var frm = f
var to = t
var hard = h
}
class Node(phs:SubComponent, name:String) {
var phasename: String = name
var phaseobj: SubComponent = phs
var after: HashSet[Edge] = new HashSet[Edge]()
var deps: HashSet[Edge] = new HashSet[Edge]()
}
class DependencyGraph {
val nodes = new HashMap[String,Node]()
val edges = new HashSet[Edge]()
def getNodeByPhase(name : String) : Node
def getNodeByPhase(phs : SubComponent) : Node
def softConnectNodes(frm: Node, to: Node) : Unit
def hardConnectNodes(frm: Node, to: Node) : Unit
}
</pre>
<a name="constsolver"></a>
<h4>Constraint Solving Algorithm</h4>
<p>This algorithm will build a list of phase objects from a set of
phase objects with constraints. The position of each phase object in
the final list will ensure that all its constraints are
satisfied. The algorithm does this by constructing a dependency
graph from the set of phase objects. This dependency graph is then
over a series of steps converted into a dependency tree with a root
node. To construct the final compiler phase list the dependency tree
is traversed from the root building the list of phase objects.</p>
<p>The function below is the top most function of this algorithm
that transforms the set of phase objects into a ordered list of
phase objects so all constraints are satisfied. This function
follows the description given about the <a href="#propdesign4">phase
assembly component</a>. All the auxiliary functions will be covered
below.</p>
<pre> <b>function</b> phasesSetToPhasesList(phasesSet)
<i>// call function to generate dependency graph from phases set</i>
graph <- phasesSetToDepGraph( phaseSet )
<i>// call function to transform the dependency graph to a dependency tree.</i>
<i>// if the dependency graph contains a cycle a fatal error will be generated.</i>
rootnode <- depGraphToDepTree( graph )
<i>// call function to recursively build phase list from the root of </i>
<i>// the dependency tree</i>
phaselist <- depTreeToPhaseList( rootnode, <b>new</b> List() )
<b>return</b> phaselist
</pre>
<p> List of functions called:</p>
<ul>
<li><a href="#func1">The <code>phasesSetToDepGraph</code> function</a></li>
<li><a href="#func2">The <code>depGraphToDepTree</code> function</a></li>
<li><a href="#func3">The <code>depTreeToPhaseList</code> function</a></li>
</ul>
<a name="func1"></a>
<h5>The <code>phasesSetToDepGraph</code> function</h5>
<p>The dependency graph, with the elements described in the <a
href="#propdesign5">section about the constraint system</a>, is
build from the phase objects in the phases set.<br />
It works as follows:
</p>
<ul>
<li>create a new dependency graph</li>
<li>foreach phase in the phasesSet
<ul>
<li>create a new graph node <i>fromnode</i> from the phase object and add the
ndoe to the graph</li>
<li>if the phase object has a runs right after constraint, then
create a node <i>tonode</i> from the phase name of the constraint and create a
hard edge between <i>fromnode</i> and <i>tonode</i>. Eventual
runs after constraints are ignored.</li>
<li>if the phase object has no runs right after constraints a
node <i>tonode</i> is created from each runs after constraint
and connected with a soft edge to the <i>fromnode</i>.</li>
</ul>
</li>
</ul>
<p>A pseudo code version of this algorithm is shown below.</p>
<pre> <b>function</b> phasesSetToDepGraph( phaseSet )
<i>// create new graph structure to hold data</i>
graph <- <b>new</b> DependencyGraph
<i>// iterate over all phase objects in the phasesSet</i>
<b>for</b> phs <b>in</b> phasesSet
<i>// create a node from the phase object</i>
fromnode <- graph.getNodeByName(phs)
<i>// test if the phase object has a runs right after constraint</i>
<b>if</b> phs.runRightAfter <b>not eq</b> ""
<i>// create a node from the phase constraint name and connect with hard edge</i>
tonode <- graph.getNodeByName( phs.runRightAfter )
graph.hardConnectNodes( fromnode, tonode )
<b>else</b>
<i>// the phase object did not have a runs right after constraint</i>
<i>// iterate over all phase constraints in the runs after constraint list</i>
<b>for</b> phsname <b>in</b> phs.runsAfter
<i>// create a node from the phase constraint name and connect with soft edge</i>
tonode <- graph.getNodeByName( phsname )
graph.softConnectNodes( fromnode, tonode )
<b>return</b> graph
</pre>
<a name="func2"></a>
<h5>The <code>depGraphToDepTree</code> function</h5>
<p>The process of converting the dependency graph into a dependency
tree is complex, so to simplify the algorithm the individual steps
are split into functions.<br />
It works as follows:
</p>
<ul>
<li>call method that will remove all nodes from the graph that
have no phase object attached</li>
<li>call method that will find all edges in the graph that are
marked as hard and enforce that the <code>deps</code> list of the
edges <code>to</code> node reference only contains this
edge. Other edges are moved down to the edges <code>frm</code>
node reference</li>
<li>What is to become the root of the dependency tree and also the
first element in the phase list is extracted from the graph.</li>
<li>call method to check if there are cycles in the graph starting
from the root node, if there are cycles a fatal error is generated</li>
<li>call method to simplify graph into a tree by removing edges
that will not break the constraints given</li>
</ul>
<p>Below is shown a pseudo code version of the function.</p>
<pre> <b>function</b> depGraphToDepTree( graph )
<i>// Remove all nodes where phaseobj == null</i>
removeDanglingNodes( graph )
<i>// Enforce hardlinks / runsRightAfter and promote nodes down the tree</i>
enforceHardlinks( graph )
<i>// Extract the root node from the graph</i>
root <- graph.getNodeByPhase( ROOTNAME )
<i>// Test for cycles in the graph, if found generate fatal error</i>
testForCycles( root, <b>new</b> Set() )
<i>// Simplify graph by removing edges, starting from the given root node</i>
simplifyGraphFromNode( root, graph )
<b>return</b> root
</pre>
<p> List of functions called:</p>
<ul>
<li><a href="#func4">Helper function <code>removeDanglingNodes</code></a></li>
<li><a href="#func5">Helper function <code>enforceHardlinks</code></a></li>
<li><a href="#func6">Helper function <code>testForCycles</code></a></li>
<li><a href="#func7">Helper function <code>simplifyGraphFromNode</code></a></li>
</ul>
<a name="func3"></a>
<h5>The <code>depTreeToPhaseList</code> function</h5>
<p>The graph has been transformed into a tree and a reference to the
root node is identified. The tree is now traversed from the root
node, by calling itself recursively on each node object. The empty
list is given to the function together with the root node and the
phase objects are added to the list as the function traverses the
tree.<br />
It works as follows:</p>
<ul>
<li>the function is called with a node object and a list</li>
<li>the phase object of the node object, the function was call
with, is added to the list</li>
<li>if this node has no nodes that depend on it, the list of phase
objects is returned</li>
<li>if this node has only one node that depends on it, we call
recursively with that node and the list as arguments, the result
of this is returned</li>
<li>if this node has more then one node that depends upon it,
these dependent nodes are ordered. We now iterate over this
ordered list of nodes and call recursively the function.</li>
</ul>
<p>Below is shown a pseudo code version of the function.</p>
<pre> <b>function</b> depTreeToPhaseList( node, chain )
<i>// add the phase object of the node to the chain list</i>
chain <- chain.append( node.phaseobj )
<i>// no more depends, return the chain</i>
<b>if</b> node.deps.length == 0
<b>return</b> chain
<i>// only one depend, so no need to order one element, just call recursively </i>
<b>else if</b> node.deps.length == 1
<b>return</b> depTreeToPhaseList( node.deps.getFirst().frm, chain )
<i>// more then one depend</i>
<b>else</b>
<i>// the depends are ordered and saved as a list</i>
order <- dependencyOrder( node.deps )
<i>// iterate over the list of ordered depend nodes and call recursively </i>
<b>for</b> nd <b>in</b> order
chain <- depTreeToPhaseList( nd, chain )
<i>// return the complete chain</i>
return chain
</pre>
<p> List of functions called:</p>
<ul>
<li><a href="#func8">Helper function <code>dependencyOrder</code></a></li>
</ul>
<a name="func4"></a>
<h5>Helper function <code>removeDanglingNodes</code></h5>
<p>This helper function will given a dependency graph find all nodes
that have no reference to a phase object and remove these nodes from
the graph. (See <a href="#usecase2">use caes 2</a>) These nodes that
have no phase object are generated from runs after or runs right
after constraints to phases that are not loaded.<br /> It is done as
follows:</p>
<ul>
<li>create a list L of all nodes in the given graph that have no
phase object</li>
<li>foreach node in L
<ul>
<li>remove the node from the graph</li>
<li>foreach edge in the nodes deps list, remove this edge from
the graph</li>
</ul>
</li>
</ul>
<p>Below is shown a pseudo code version of the function.</p>
<pre> <b>function</b> removeDanglingNodes( graph )
<i>// find all nodes with no phase object</i>
dnodes <- filter( lambda node: node.phaseobj == null, graph.nodes )
<i>// iterate over these nodes and remove them one by one</i>
<b>for</b> node <b>in</b> dnodes
graph.nodes.remove( node )
<i>// also remove all edge objects related to this node object</i>
<b>for</b> edge <b>in</b> node.deps
graph.edges.remove( edge )
edge.frm.after.remove( edge )
</pre>
<a name="func5"></a>
<h5>Helper function <code>enforceHardlinks</code></h5>
<p>This helper function will given a dependency graph find all edges
in the graph that are marked as hard. It will then ensure that the
<code>to</code> node the edge is pointing to only has this edge in
its deps list. (See <a href="#usecase7">use case 7</a>).<br />
It works as follows:
</p>
<ul>
<li>loop until no edges are moved
<ul>
<li>create list L of all edges in the given graph that are marked
as hard</li>
<li>foreach edge in L
<ul>
<li>create list HE of edges marked as hard in the deps list of
the node object this edge is pointing to</li>
<li>if there is more then one element in HE it will not be possible
to generate a compiler phase list and a fatal error is
generated</li>
<li>if there is only one element in HE, create a list E of all
the edges not marked as hard in the deps list of
the node object this edge is pointing to</li>
<li>override the deps list of the node object this edge is
pointing to with the HE list</li>
<li>foreach pedge in E
<ul>
<li>attach the pedge to the deps list of the node object this
edge is pointing from</li>
</ul>
</li>
</ul>
</li>
</ul>
</li>
</ul>
<p>Below is shown a pseudo code version of the function.</p>
<pre> <b>function</b> enforceHardlinks( graph )
<b>do</b>
<i>// lets assume that no edges are moved until proven otherwise</i>
rerun <- <b>false</b>
<i>// find all edges in the graph that are marked as hard</i>
hardlinks <- <b>filter</b>( <b>lambda</b> e: e.hard, graph.edges )
<i>// iterate over all hard edges found</i>
<b>for</b> edge <b>in</b> hardlinks
<i>// find all hard edges in the deps list of the node this edge points to</i>
sanity <- <b>filter</b>( <b>lambda</b> e: e.hard, edge.to.deps )
<b>if</b> sanity.length > 1
<i>// generate fatal error, there is more then one runs right
// after constraint on the same node</i>
<b>else</b>
<i>// find all non hard edges in the deps list of the node this edge points to</i>
promote <- <b>filter</b>( <b>lambda</b> e: not e.hard, edge.to.deps )
<i>// assign the list of hard edges to the deps list of the node this
// edge points to</i>
edge.to.deps <- sanity
<i>// iterate over all non hard edges and attach them to the deps list of the
// node this edge points from</i>
<b>for</b> pedge <b>in</b> promote
<i>// we have moved an edge - we need to rerun this to check</i>
rerun <- true
<i>// move the edge object down along the hard edge</i>
pedge.to <- edge.frm
edge.frm.deps.attach(pedge)
<i>// if edges where moved, then rerun to make sure the structure is valid</i>
<b>while</b> rerun
</pre>
<p>To better understand this little function, lets look at some
diagrams and how this function works on these examples. The first
example is shown below. Here phase B wants to run right after phase
A and phase C just wants to run somewhere after phase A, so there is
no problem in pushing phase C down to run after phase B. The
function does this be first finding all the hard edges (the blue
edges). It then inspects the edges one by one and in this example we
have only one hard edge. The edge goes from phase B to phase A,
meaning that phase B want to run right after phase A. The algorithm
now looks at the <code>deps</code> list of phase A and finds that
there is only one hard edges (this is very good). The algorithm now
filters out the non-hard edges and finds the edge to phase C. This
edge is now detached from phase A and attached to phase B.
</p>
<div style="text-align: center;">
<img src="sip-00002-17.png" alt="" border="0" width="40%" />
</div>
<p>With the same arguments can we also handle the situation shown
below.</p>
<div style="text-align: center;">
<img src="sip-00002-18.png" alt="" border="0" width="45%" />
</div>
<a name="func6"></a>
<h5>Helper function <code>testForCycles</code></h5>
<p>This helper function will, given a node in the graph and a set
containing node names, determine of there are any cycles in the
graph. This will ensure that <a href="#usecase8">use case 8</a> is
addressed. Please note that this function will call itself
recursively and is called the first time with the root node and an
empty set of node names.
<br />
It is done as follows:
</p>
<ul>
<li>test if the name of the node is already contained in the set,
if it is a cycle is found and a fatal error is generated.</li>
<li>if the name is not in the set, the name of the node is added
to the set</li>
<li>the deps list of the node is ordered and stored in nodes</li>
<li>foreach nd in nodes
<ul>
<li>call the function recursively with the node nd and the set
of node names</li>
</ul>
</li>
<li>remove the name of the node from the set</li>
</ul>
<p>Below is shown a pseudo code version of the function.</p>
<pre> <b>function</b> testForCycles( node, names )
<i>// test if the name of the node is already in the set</i>
<b>if</b> names.contains( node.phasename )
<i>// names are unique, so this is a fatal error, there is a cycle
// the same name has been visited twice</i>
<i>// add the name of the node to the set</i>
names.add( node.phasename )
<i>// use the dependencyOrder function to sort the deps of the node</i>
nodes <- dependencyOrder( node.deps )
<i>// iterate over all deps nodes and call recursively</i>
<b>for</b> nd <b>in</b> nodes
testForCycles(nd, names)
<i>// remove the name of the node from the set</i>
names.remove( node.phasename )
</pre>
<p> List of functions called:</p>
<ul>
<li><a href="#func8">Helper function <code>dependencyOrder</code></a></li>
</ul>
<p>There are other ways of detecting cycles in graphs, that are
faster and more efficient, please <a
href="http://www.cs.umu.se/kurser/TDBA77/VT06/algorithms/BOOK/BOOK2/NODE68.HTM">read
here</a> for more information.</p>
<a name="func7"></a>
<h5>Helper function <code>simplifyGraphFromNode</code></h5>
<p>This helper function will, given a node in the graph and a
reference to the graph structure, remove unneeded edges and
transforming the graph into a tree. Please note that this
function will call itself recursively and is called the first time
with the root node and the full graph.
<br />
It is done as follows:
</p>
<ul>
<li>create an empty list RM</li>
<li>foreach edge in the deps list of the node, if the node this
edge is pointing from has mode then one dependencies then add this
edge to RM</li>
<li>foreach edge in RM, remove this edge from the graph</li>
<li>the deps list of the node is ordered and stored in nodes</li>
<li>foreach nd in nodes
<ul>
<li>call the function recursively with the node nd and the
updated graph</li>
</ul>
</li>
</ul>
<p>Below is shown a pseudo code version of the function.</p>
<pre> <b>function</b> simplifyGraphFromNode( node, graph )
removal <- <b>new</b> List()
<b>for</b> edge <b>in</b> node.deps
<b>if</b> edge.frm.after.length > 1
removal.append( edge )
<b>for</b> edge <b>in</b> removal
node.deps.remove( edge )
edge.frm.after.remove( edge )
graph.edges.remove( edge )
nodes <- dependencyOrder( node.deps )
<b>for</b> nd <b>in</b> nodes
simplifyGraphFromNode( nd, graph )
</pre>
<p> List of functions called:</p>
<ul>
<li><a href="#func8">Helper function <code>dependencyOrder</code></a></li>
</ul>
<p>In the example shown below we simplify this graph into a tree
from the node A. This means that we start the traversal from node A
and uses the <code>A.deps</code> list to continue the traversal. If
<code>A.deps</code> contains more then one element, the list is
sorted so we first visit all plug-in supplied phases in alphabetical
order and then the internal phase. In this example this gives the
order <code>[P,R,B]</code>. Before the algorithm traverses to a new
node it checks that node for its number of <code>after</code>
links. If it has more then one after link, it simply deletes the
link to it so it can carry on. In the example below this is the case
of node P and T. When the algorithm visits node P and wants to
traverse on to node T, it checks the number of <code>after</code>
constraints in node T. Node T has two after constraints so it can
safely delete the edge between node P and T, because there must be
another node that we have not included yet, that will include this
node T.</p>
<div style="text-align: center;">
<img src="sip-00002-19.png" alt="" border="0" width="70%" />
</div>
<a name="func8"></a>
<h5>Helper function <code>dependencyOrder</code></h5>
<p>This helper function will given a list of edges sort them
according to a specific scheme are return them as a list. The
sorting scheme is as follows. The returned list is the reverse of
first having the plug-in supplied phases in alphabetical order
followed by the internal phases. <br />
This is done as follows:
</p>
<ul>
<li>nodes for internal and external phases are filtered from the
deps list</li>
<li>the external nodes are ordered by the phase name and
reversed</li>
<li>the external nodes are appended to the internal nodes list and
the resulting list is reversed</li>
</ul>
<p>Below is shown a pseudo code version of the function.</p>
<pre> <b>function</b> dependencyOrder( deps )
external <- <b>filter</b>(<b>lambda</b> e: (! e.frm.phaseobj.internal), deps)
internal <- <b>filter</b>(<b>lambda</b> e: e.frm.phaseobj.internal, deps)
extnodes <- <b>map</b>(<b>lambda</b> e: e.frm, external)
extnodes <- extnodes.sort(<b>lambda</b> (n1,n2): (n1.phasename compareTo n2.phasename) < 0)
extnodes <- extnodes.reverse
nodes <- (<b>map</b>(<b>lambda</b> e: e.frm, internal)).<b>extend</b>( extnodes )
nodes <- nodes.reverse
<b>return</b> nodes
</pre>
<a name="namingscheme"></a>
<h3>Naming Scheme for Internal and External Phases</h3>
<p>Having the ability to load a large number of plug-ins, where each
plug-in can supply several named phases, gives a high possibility of
name clashes. There is no way to enforce unique names in plug-in
supplied phases because they can have inter dependencies. The best
solution is to suggest a naming scheme for phase names.</p>
<p>This naming scheme composes all names from the package/plug-in
name, then a double colon <code>::</code> and then the actual phase
name. So for the internal compiler phase names this would be:</p>
<pre> val phaseName = "nsc::tailcalls"</pre>
<p>For a plug-in with name <code>unit</code> that supplies a phase
with name <code>convert</code> the full phase name would be:</p>
<pre> val phaseName = "unit::convert"</pre>
<p>By using this naming scheme for all phases there is a namespace
for phase names within each plug-in and external phases will never
clash with internal phase names.</p>
<a name="implications"></a>
<h3>Implications of This Proposal</h3>
<p>All plug-ins need to be updated to the design in this
proposal. This means that all current plug-ins will not be able to
load. However the modifications to the existing plug-ins are
minor. Current variables and types:
</p>
<pre> val runsAfter: String</pre>
<p>The proposed changes are:</p>
<pre> val runsAfter: List[String]</pre>
<p>All internal phases in the compiler need to declare there
ordering. This means that instead of having the ordering very
explicit in the list structure, the ordering is encoded into each
phase object. The following signatures will be added to the
<code>SubComponent</code> class.</p>
<pre> val runsAfter: List[String]
val runsRightAfter: Option[String]</pre>
<p>The following items must be added to each phase
object to implement the signatures (however they must be adapted to
the individual phase object).</p>
<pre> val runsAfter = List[String]("phasename")
val runsRightAfter = None</pre>
<p>There is no reason to change the names of the phases as suggested in
the section above, but it will minimize the chance of name clashes
and will also minimize the chance of accidental dependencies.</p>
<a href="#top">To the top</a>
<a name="implementation"></a>
<h2>Implementation</h2>
<p>An example implementation has been created that follows the
algorithm sketched in this proposal and models the compilers
internal structure. This implementation can be found in the file <a
href="sip-00002-10.scala">sip-00002-10.scala</a>. The example
implementation uses the phases from <a href="#usecase10">use case
10</a> as its data.
</p>
<p>Running the example code</p>
<ul>
<li>Compile the code using <code>scalac
sip-00002-10.scala</code></li>
<li>Run the example using <code>scala
depgraph.DepGraphTest</code></li>
<li>This will generate the console output as seen below and <a
href="http://www.graphviz.org/">Graphviz</a> dot files that can be
compiled into png images. These png images are also shown
below.</li>
</ul>
<a name="consoleexample"></a>
<p>Console output</p>
<pre>$ scalac sip-00002-10.scala
$ scala depgraph.DepGraphTest
[dropping depend on node with no phase: nsc::msil]
[removing edge: nsc::tailcalls => nsc::pickler]
[removing edge: plug2::optimization2 => plug2::optimization1]
- nsc::parser
- nsc::typer
- plug1::optimization1
- nsc::pickler
- nsc::liftcode
- plug2::optimization1
- nsc::tailcalls
- nsc::erasure
- nsc::cleanup
- plug2::optimization2
- nsc::jvm
- nsc::terminal
$ </pre>
<p>PNG images of the generated dot files showing the graph
structure. Internal phases are marked as black circles and plug-in
phases are marked as green circles. The hard links (runsRightAfter
constraints) are marked as blue arrows. Click on the pictures to
enlarge.</p>
<a name="examples"></a>
<table>
<tr>
<td valign="top"><strong>1. </strong><br />
<div style="text-align: center;">
<a href="sip-00002-11.png">
<img src="sip-00002-11.png" alt="" border="0" width="100%"/>
</a>
</div>
</td>
<td valign="top"><strong>2. </strong><br />
<div style="text-align: center;">
<a href="sip-00002-12.png">
<img src="sip-00002-12.png" alt="" border="0" width="100%" />
</a>
</div>
</td>
<td valign="top"><strong>3. </strong><br />
<div style="text-align: center;">
<a href="sip-00002-13.png">
<img src="sip-00002-13.png" alt="" border="0" width="100%" />
</a>
</div>
</td>
<td valign="top"><strong>4. </strong><br />
<div style="text-align: center;">
<a href="sip-00002-14.png">
<img src="sip-00002-14.png" alt="" border="0" width="100%" />
</a>
</div>
</td>
</tr>
</table>
<a href="#top">To the top</a>
</body>
</html>
|