summaryrefslogtreecommitdiff
path: root/SIP/compiler-phase-init/sip.xhtml
blob: 10cb4c31898974ed141548dcbd9625de7ee3e195 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
  <title>Scala Compiler Phase and Plug-In Initialization</title>
  <meta name="sip" content="00002"/>
  <meta name="author" content="Anders Bach Nielsen"/>
  <meta name="version" content="0"/>
  <meta name="type" content="standards"/>
  <meta name="status" content="submission"/>
  <meta name="created" content="2008-08-25"/>
  <meta name="updated" content="2008-10-09"/>
  <meta name="scala-version" content="2.8.0.final-"/>
  <meta name="owner-contact" content="mailto:andersbach.nielsen@epfl.ch"/>
  <meta name="discussion" content="http://www.nabble.com/SIP-00002%3A-Compiler-Phase-Initialization-and-Plugins-td19677473.html"/>
  <!-- <link rel="auxiliary" href="[URI]" type="[mime type]"/> -->
  <!-- <link rel="replaces" href="[URI]" type="application/xhtml+xml"/> -->
  <!-- <link rel="depends-on" href="[URI]" type="application/xhtml+xml"/> -->
  <!-- Stylesheet -->
  <link rel="stylesheet" href="http://lampsvn.epfl.ch/svn-repos/scala/sip/trunk/sip.css" type="text/css"/>
</head>
<body>
  <a name="top"></a>
  <h1>Scala Compiler Phase and Plug-In Initialization</h1>

  <h2>Abstract</h2>

  <p>This document describes the design and implementation of a more
  uniform way of handling internal compiler phases and external user
  supplied phases via plug-ins in the Scala compiler.</p>

  <h2>Contents</h2>

  <ul>
    <li><a href="#motivation">Motivation</a>
    <ul>
      <li><a href="#goalsandreq">Goals</a></li>
      <li><a href="#usecases">Use Cases</a></li>
      <li><a href="#detailedusecases">Detailed Use Cases</a>
      <ul>
	<li><a href="#usecase1">Assembling compiler internal phases for the JVM target ...</a></li>
	<li><a href="#usecase2">Assembling compiler internal phases for	the MSIL target ...</a></li>
	<li><a href="#usecase3">Adding one plug-in supplied phase to the JVM target ...</a></li>
	<li><a href="#usecase4">Adding two plug-in supplied phase to the JVM target ...</a></li>
	<li><a href="#usecase5">Adding two plug-in supplied phases, where one has a ...</a></li>
	<li><a href="#usecase6">Adding plug-in supplied phases with dependencies to ...</a></li>
	<li><a href="#usecase7">Two separate phases that should be handled as a single ...</a></li>
	<li><a href="#usecase8">A cyclic dependency among phases is a fatal error.</a></li>
	<li><a href="#usecase9">Special handling of the	<code>parser</code> and <code>terminal</code> phases.</a></li>
	<li><a href="#usecase10">Larger compiler example with three plug-in supplied phases.</a></li>
      </ul>
      </li>
      <li><a href="#futuregoals">Future Goals That Extend Beyond This Proposal</a></li>
    </ul>
    </li>
    <li><a href="#description">Description</a>
    <ul>
      <li><a href="#presentdesign">The Present Design</a></li>
      <li><a href="#proposeddesign">The Proposed Design</a>
      <ul>
	<li><a href="#propdesign1">Internal Phases</a></li>
	<li><a href="#propdesign2">External Phases</a></li>
	<li><a href="#propdesign3">Phases Set</a></li>
	<li><a href="#propdesign4">Phase Assembly</a></li>
	<li><a href="#propdesign5">Constraint System</a></li>
	<li><a href="#constsolver">Constraint Solving Algorithm</a></li>
      </ul>
      </li>
      <li><a href="#namingscheme">Naming Scheme for Internal and External Phases</a></li>
      <li><a href="#implications">Implications of This Proposal</a></li>
    </ul>
    </li>
    <li><a href="#implementation">Implementation</a></li>
  </ul>


  <a name="motivation"></a>
  <h2>Motivation</h2>

  <p>This section will cover the motivating goals of creating this
  proposal and some use cases emphasizing these goals. There will also
  be a section on goals that extend beyond this proposal and will
  therefore not be covered by this proposal.</p>
  
  <a name="goalsandreq"></a>
  <h3>Goals</h3>

  <ul>
    <li>Handle inclusion and constraints for internal compiler phases
    and external user supplied phases via plug-ins in a uniform
    way.</li>

    <li>Phase ordering is based on a simple, but sufficient powerful
    and solvable constraint system.</li>

    <li>Phase ordering of internal compiler phases and external user
    supplied phases should retain as much compatibility with the
    current scheme as possible.</li>

    <li>Phase ordering is fully determined by the set of input phases.
    It does not matter, for example, what order the compiler processes
    the individual plug-ins that supply the external phases.</li>

    <li>Ordering of the internal compiler phases should always succeed
    in a valid compiler phase chain. Adding two plug-ins, which by
    themselves give a valid compiler phase chain, should also give a
    valid compiler phase chain.</li>

  </ul>

  <a name="usecases"></a> <h3>Use Cases</h3>

  <ol>
    <li><a href="#usecase1">Assembling compiler internal phases for
    the JVM target and no plug-ins are loaded.</a></li>

    <li><a href="#usecase2">Assembling compiler internal phases for
    the MSIL target and no plug-ins are loaded.</a></li>

    <li><a href="#usecase3">Adding one plug-in supplied phase to the
    JVM target compiler after type checking.</a></li>

    <li><a href="#usecase4">Adding two plug-in supplied phase to the
    JVM target compiler after type checking.</a></li>

    <li><a href="#usecase5">Adding two plug-in supplied phases, where
    one has a dependency on the other, to the JVM target
    compiler.</a></li>

    <li><a href="#usecase6">Adding plug-in supplied phases with
    dependencies to both plug-in supplied and internal phases to the
    JVM target compiler.</a></li>

    <li><a href="#usecase7">Two separate phases that should be handled
    as a single phase.</a></li>

    <li><a href="#usecase8">A cyclic dependency among phases is a
    fatal error.</a></li>

    <li><a href="#usecase9">Special handling of the
    <code>parser</code> and <code>terminal</code> phases.</a></li>

    <li><a href="#usecase10">Larger compiler example with three
    plug-in supplied phases.</a></li>


  </ol>

  <a name="detailedusecases"></a>
  <h3>Detailed Use Cases</h3>  

  <a name="usecase1"></a> 
  <h4>[1] Assembling compiler internal phases for the JVM target and
  no plug-ins are loaded.</h4>

  <p>All phases have to declare a <code>runsAfter</code> list of phase
  names that should be run before the phase itself in the final
  compiler phase chain. In this use case there are no plug-in supplied
  phases, that have to be included. Depending on the command line
  options and the JVM target the for this target default phases are
  added to the phases set. This is a set, because a phase may only
  occur once in the compiler phase chain.</p>

  <p>Following these basic <code>runsAfter</code> constraints the
  compiler phase chain is build from the phases set. In the example
  below a simple set of phase constraints can be converted into a
  graph, that in turn can be converted into a list of phases.
  </p>
  <div style="text-align: center;">
    <img src="sip-00002-1.png" alt="" border="0" width="100%" height="" />
  </div>

  <p>Using the <code>runsAfter</code> list as the basic ordering
  constraints there are some more properties that have to be
  full filled.
  </p>
  <ul>
    <li>The phases <code>liftcode</code> and <code>flatten</code> are
    only included for the JVM target. They have <code>runsAfter</code>
    dependencies on the <code>refchecks</code> phase and the
    <code>constructors</code> phase, respectively. Because they are
    only included in the JVM target the <code>uncurry</code> phase and
    the <code>mixin</code> phase, respectively, need to have multiple
    dependencies as seen in the example below.

    <p>This example shows the desired transformation steps of a small
    dependency graph taken from the actual compiler phase graph. When
    processing <code>refchecks</code> we notice that it has two
    dependencies to it. One dependency comes from
    <code>liftcode</code> and the other from <code>uncurry</code>. We
    now look if any of these phases have more then one outgoing
    dependency. The <code>liftcode</code> phase has one outgoing
    dependency, but the <code>uncurry</code> phase has two, so one of
    the outgoing dependencies of <code>uncurry</code> has to go to
    <code>refchecks</code> and the other dependency has to go to some
    place we have not visited yet. So by this reasoning, we can safely
    remove the dependency from <code>uncurry</code> to
    <code>refchecks</code> (marked red).</p>

    <div style="text-align: center;">
      <img src="sip-00002-21.png" alt="" border="0" width="85%" height="" />
    </div>
    <p></p>
    </li>

    <li>Some internal phase pairs have very strong dependencies, like
    the phases <code>explicitouter</code> and <code>erasure</code> and
    also the <code>namer</code> and <code>typer</code> phases. See <a
    href="#usecase7">use case 7</a> for more information on this
    problem.</li>
  </ul>


  <a name="usecase2"></a>
  <h4>[2] Assembling compiler internal phases for the MSIL target and
  no plug-ins are loaded.</h4>

  <p>Like in <a href="usecase1">use case 1</a> depending on command
  line options and that the target being MSIL, the proper phases are
  added to the phases set. Again there are no plug-in supplied phases
  to be included. There are however some differences to use case 1.
  </p>
  <ul>
    <li>The phases <code>liftcode</code> and <code>flatten</code> are
    not included for the MSIL target. This makes the
    <code>runsAfter</code> list of the phases <code>uncurry</code> and
    <code>mixer</code> special by containing dependencies to phases
    that are found in the phases set.

    <p>For instance the <code>uncurry</code> phase has to specify that
    it wants to run after both <code>refchecks</code> and
    <code>liftcode</code>. <i>(For the JVM target this will force the
    <code>liftcode</code> phase before <code>uncurry</code>.)</i> For
    the MSIL target the phase <code>liftcode</code> is not present as
    shown in the example diagram below. One solution that will work is
    to silently drop all dependencies to phases, where there was no
    phase object in the phases set.</p>
    
    <div style="text-align: center;">
      <img src="sip-00002-22.png" alt="" border="0" width="55%" height="" />
    </div>    
  </li>
  </ul>
  


  <a name="usecase3"></a>
  <h4>[3] Adding one plug-in supplied phase to the JVM target
  compiler after type checking.</h4>

  <p>Like in <a href="#usecase1">use case 1</a> the internal phases
  are added to the phases set. In this use case a plug-in is loaded
  that will supply one phase to the phases set with a constraint that
  it wants to run after the <code>refchecks</code> phase. The original
  plug-in handling code will still handle loading plug-ins, disabling
  plug-ins, but will no longer have the responsibility of assembling
  the compiler chain. The plug-in is loaded and the phase is added
  to the phases set together with the internal phases.</p>
  <p>When assembling the compiler phase chain in this use case, there
  will be two phases directly after the <code>refchecks</code> phase
  and the dependencies can not be removed as described in <a
  href="#usecasee1">use case 1</a>. This situation is shown in the
  example below.</p>

  <div style="text-align: center;">
    <img src="sip-00002-2.png" alt="" border="0" width="100%" height="" />
  </div>

  <p>One of the goals of this proposal is to be as compatible with the
  current behavior of the compiler as possible. So if the situation is
  as shown above that after including A, we can either take B or R, we
  need a deterministic scheme for always constructing the same phase
  chain. This is done by taking the plug-in supplied phases first and
  then the internal phase after wards. 
  </p>



  <a name="usecase4"></a>
  <h4>[4] Adding two plug-in supplied phase to the JVM target
  compiler after type checking.</h4>

  <p>This use case is very similar to <a href="#usecase3">use case
  3</a>, however instead of having one internal and one external phase
  after the <code>refchecks</code> phase, we have one internal and two
  external phases after the <code>refchecks</code> phase. This
  situation is shown in the example below.</p>

  <div style="text-align: center;">
    <img src="sip-00002-23.png" alt="" border="0" width="100%" height="" />
  </div>

  <p>To be able to deterministically create the compiler phase chain
  we need a rule to handle multiple external phases at the same
  level. The simplest rule it to include the external phases
  alphabetically order, as shown in the example above.</p>


  <a name="usecase5"></a>
  <h4>[5] Adding two plug-in supplied phases, where one has a
  dependency on the other, to the JVM target compiler.</h4>

  <p>This use case is very similar to <a href="#usecase4">use case
  4</a>, however instead of the two plug-in supplied phases having a
  dependency on the same internal, one of the plug-in supplied phases
  has a dependency on the other and the other has a dependency on an
  internal phase. This is the example shown in the diagram below.</p>

  <div style="text-align: center;">
    <img src="sip-00002-3.png" alt="" border="0" width="100%" height="" />
  </div>

  <p>As described in <a href="#usecase3">use case 3</a>, after
  inclusion of phase A the next phase to include is phase T. In the
  example above phase R is dependent on phase T, so the next to
  include is phase R. This means that including phase T, means to
  include T and the ordered sub tree, under phase T.</p>

  <p>The example shown below, can be solved applying the same logic as
  described above.</p>

  <div style="text-align: center;">
    <img src="sip-00002-4.png" alt="" border="0" width="100%" height="" />
  </div>


  <a name="usecase6"></a>
  <h4>[6] Adding plug-in supplied phases with dependencies to both
  plug-in supplied and internal phases to the JVM target
  compiler.</h4>

  <p>This use case builds upon <a href="#usecase1">use case 1</a>, <a
  href="#usecase3">use case 3</a>, <a href="#usecase4">use case 4</a>
  and <a href="#usecase5">use case 5</a> and shows how to handle
  plug-in supplied phases with dependencies on other plug-in supplied
  and internal phases.</p>

  <p>In the example shown below the plug-in supplied phase T has
  dependencies on the plug-in supplied phase P and the internal phase
  B. The transformation shown below is made by applying the logic from
  <a href="#usecase1">use case 1</a> to this example and using the
  assumption that the dependencies of the internal phases will not
  contain cycles and internal phases are visited last on each
  level.</p>

  <div style="text-align: center;">
    <img src="sip-00002-5.png" alt="" border="0" width="100%" height="" />
  </div>
  
  <a name="usecase7"></a> 
  <h4>[7] Two separate phases that should be handled as a single
  phase.</h4>

  <p>In <a href="#usecase1">use case 1</a> we specified the assembly
  of the compiler phase chain based on runs after
  constraints. However, the compiler contains some phase pairs that
  have very strong dependencies on each other or it simply does not
  make sense to put a phase in between. In this use case we will focus
  on the situation that we have two individual phase objects, but one
  needs to run right after the other and thereby act as one large
  phase.</p>

  <p>The solution to this is to introduce a new runs right after
  constraint to all phases. To keep the compatibility with current
  plug-ins as high as possible, this runs right after constraint is
  only mandatory for internal phases. In the example below the
  <code>runsRightAfter</code> constraint is marked as a blue arrow and
  behaves as a normal runs after constraint.
  </p>
  <div style="text-align: center;">
    <img src="sip-00002-7.png" alt="" border="0" width="100%" height="" />
  </div>

  <p>In the example shown below, we can see the first four phases of
  the Scala compiler. There is a <code>runsRightAfter</code>
  constraint between <code>typer</code> and <code>namer</code>. There
  is in this example a plug-in supplied phase (plugin1) that want to
  run after the <code>namer</code> phase. The semantics of the run
  after constraint is that is should run somewhere after a specified
  phase, but as soon as possible. With this in mind it is perfectly
  valid to change the dependency of <code>plugin1</code> from
  <code>namer</code> to <code>typer</code>. </p>
  <div style="text-align: center;">
    <img src="sip-00002-8.png" alt="" border="0" width="100%" height="" />
  </div>

  <p>If the phase <code>plugin1</code> had declared to
  <code>runRightAfter</code> the <code>namer</code> phase, this would
  result in a unsolvable graph and would therefore generate an error,
  because only one phase can come right after another phase.
  </p>

  <a name="usecase8"></a> 
  <h4>[8] A cyclic dependency among phases is a fatal error.</h4>

  <p>Running the compiler without loading plug-ins, it is guaranteed
  that there are do cyclic dependencies among the internal phases, so
  the only way to introduce cycles in the phase graph is by loading
  one or more plug-ins. If a cycle is detected in the phase graph the
  compiler should refuse to start and give a proper error message.</p>

  <p>In the example shown below a cycle is detected and marked in
  pink. A cycle like this would generate a cyclic dependency error and
  outputting the names of the involved phases.</p>
  
  <div style="text-align: center;">
    <img src="sip-00002-20.png" alt="" border="0" width="35%" />
  </div>

  <a name="usecase9"></a>
  <h4>[9] Special handling of the <code>parser</code> and
  <code>terminal</code> phases.</h4>

  <p>There are two very special phase objects in the phase graph. The
  first phase is the <code>parser</code> phase. This phase will be the
  head of the compiler phase chain, there are however no other
  restrictions on this phase.</p>

  <p>The other special phase is the <code>terminal</code> phase, which
  has to be the last phase in the compiler phase chain. For this
  reason it is not allowed to run after or run right after the
  <code>terminal</code> phase. Any phase that specifies a runs after
  or runs right after dependency on the <code>terminal</code> phase
  will have that dependency dropped silently.</p>


  <a name="usecase10"></a> 
  <h4>[10] Larger compiler example with three plug-in supplied phases.</h4>

  <p>This example is implemented in the <a
  href="#implementation">reference implementation</a> of the algorithm
  below. In this example the compiler consists of nine internal phases
  and two plug-ins are loaded adding a total of three phases to the
  compiler.
  </p>

  <table width="100%" border="1">
    <tr>
      <td><strong>Phase name</strong></td>
      <td><strong>Runs after</strong></td>
      <td><strong>Runs right after</strong></td>
    </tr>
    <tr>
      <td colspan="3" align="center"><strong><i>Internal phases</i></strong></td>
    </tr>
    <tr>
      <td>nsc::parser</td>
      <td></td>
      <td></td>
    </tr>
    <tr>
      <td>nsc::typer</td>
      <td>nsc::parser</td>
      <td></td>
    </tr>
    <tr>
      <td>nsc::pickler</td>
      <td>nsc::typer</td>
      <td></td>
    </tr>
    <tr>
      <td>nsc::liftcode</td>
      <td>nsc::pickler</td>
      <td></td>
    </tr>
    <tr>
      <td>nsc::tailcalls</td>
      <td>nsc::pickler<br />nsc::liftcode</td>
      <td></td>
    </tr>
    <tr>
      <td>nsc::erasure</td>
      <td></td>
      <td>nsc::tailcalls</td>
    </tr>
    <tr>
      <td>nsc::cleanup</td>
      <td>nsc::erasure</td>
      <td></td>
    </tr>
    <tr>
      <td>nsc::jvm</td>
      <td>nsc::cleanup</td>
      <td></td>
    </tr>
    <tr>
      <td>nsc::terminal</td>
      <td>nsc::jvm<br />nsc::msil</td>
      <td></td>
    </tr>
    <tr>
      <td colspan="3" align="center"><strong><i>Plug-in supplied phases</i></strong></td>
    </tr>
    <tr>
      <td>plug1::optimization1</td>
      <td>nsc::typer</td>
      <td></td>
    </tr>
    <tr>
      <td>plug2::optimization1</td>
      <td>nsc::liftcode</td>
      <td></td>
    </tr>
    <tr>
      <td>plug2::optimization2</td>
      <td>plug2::optimization1<br />nsc::cleanup</td>
      <td></td>
    </tr>
  </table>

  <p>Using the phases in the table above as the basis for the
  dependency graph, the following graph will be constructed. This
  graph contains all the information that is also present in the
  phases set. As seen in the diagram below there are nodes without
  phase objects (marked with dotted lines) and nodes with multiple
  dependencies. There could also be cycles at this stage, but there
  are non in this example. 
  </p>

  <div style="text-align: center;">
    <img src="sip-00002-6.png" alt="" border="0" width="60%" height="" />
  </div>

  <p>The transformation of the dependency graph into a dependency tree
  is the basis of this proposal. The resulting dependency tree is
  shown below. This tree is then the basis for generating the compiler
  phase list.
  </p>

  <div style="text-align: center;">
    <img src="sip-00002-24.png" alt="" border="0" width="60%" height="" />
  </div>

  <p>The resulting compiler phase list of traversing the tree above is
  shown below. It is ensured that the <code>parser</code> phase is the
  first and that the <code>terminal</code> phase is the last in this list.</p>

  <ul>
    <li>nsc::parser</li>
    <li>nsc::typer</li>
    <li>plug1::optimization1</li>
    <li>nsc::pickler</li>
    <li>nsc::liftcode</li>
    <li>plug2::optimization1</li>
    <li>nsc::tailcalls</li>
    <li>nsc::erasure</li>
    <li>nsc::cleanup</li>
    <li>plug2::optimization2</li>
    <li>nsc::jvm</li>
    <li>nsc::terminal</li>
  </ul>

  <a name="futuregoals"></a>
  <h3>Future Goals That Extend Beyond This Proposal</h3>    

  <ul>
    <li>Create and add new backends to the compiler that will replace
    the JVM or MSIL specific phases using plug-ins.</li>
    <li>Creating internal tools by subclassing the class
    <code>Global</code>.</li>
    <li>The constraint system should not contain support for the
    following constraints.
    <ul>      
      <li>A phase should be able to declare that it will replace a
      given phase and thereby ignoring other constraints declared by
      this phase and taking over the constraints from the replaced
      phase.</li>
      <li>A phase should be able to declare that if included it will
      remove a given number of phases and removing constraints to
      these phases.</li>
      <li>A phase should be able to declare that it only wants to be
      included if a given phase is also present or else it will be
      silently dropped.</li>
    </ul>
    </li>
  </ul>

  <a href="#top">To the top</a>

  <a name="description"></a>
  <h2>Description</h2>  

  <p>This section will cover a short description of the present design
  and the full description of the proposed design changes, including
  the constraint system and algorithm for solving the constraint
  system.
  </p>

  <a name="presentdesign"></a>
  <h3>The Present Design</h3>

  <p>Currently phases in the compiler are ordered statically by a hard
  coded list structure in <code>class Global</code> (found in
  Global.scala), where the method <code>builtInPhaseDescriptors</code>
  returns the list of phase objects. The resulting list from invoking
  this method is then extended with the phases from the plug-in
  supplied by the user. This is done in the
  <code>computePhaseDescriptors</code> method in <code>class
  Plugins</code> (found in plugins/Plugins.scala). This method is
  then again called from the method <code>phaseDescriptors</code> in
  <code>class Global</code>. So all together the process of
  calculating the phases of the compiler is spread over several files
  and quite complicated.</p>

  <p>A plug-in can supply a number of phases to the compiler and the
  architecture of the plug-in subsystem already supply some of the
  functionality that will be added to all internal phases. All phases
  are a subclass of <code>class SubComponent</code> and all phases
  supplied by plug-ins are subclasses of <code>class
  PluginComponent</code> (which is a subclass of <code>class
  SubComponent</code>).</p>

  <a name="proposeddesign"></a>
  <h3>The Proposed Design</h3>

  <p>This section will cover the design of the proposed system for
  handling inclusion of internal and external phases uniformly. It
  will also cover a description of the constraint system and a
  algorithm for solving these constraint for generating the compiler
  phase chain. This design will focus on satisfying the use cases <a
  href="#usecases">described earlier</a>.
  </p>

  <p>From the use cases above it is possible to identify two kinds of
  constraints. The first constraint is the <i>runs after</i>
  dependency and the second constraint is the <i>runs right after</i>
  dependency. It is necessary to declare that a given phase has to
  <i>runs after</i> a list of phase names, however the <i>runs right
  after</i> constraint only needs to declare the name one phase.
  </p>
  <ul>
    <li>Lets define the <i>runs after</i> constraint using the
    <code>runsAfter</code> variable with the Scala type
    <code>List[String]</code>. Using this constraint, phase B would
    declare <code>runsAfter=List(A)</code>, meaning that phase B would
    like to run somewhere after phase A, but as early as possible.</li>

    <li>Lets define the <i>runs right after</i> constraint using the
    <code>runsRightAfter</code> variable with the Scala type
    <code>Option[String]</code>. Using this constraint, phase B would
    declare <code>runsRightAfter=Some(A)</code>, meaning that phase B
    wants to run right after phase A and no other phases are allowed to
    come between phase A and B. From a larger perspective meaning that
    phase A and B can be regarded as on large phase with one input and
    one output.</li>
  </ul>

  <p>A goal of this work is to handle the inclusion and constraint
  resolution of internal and plug-in supplied phases in a uniform way,
  this also means that the responsibility of phase assembly should not
  be handled by the plug-in subsystem any more, but by a separate
  instance. For this separate instance we create a new
  <code>PhaseAssembly</code> trait that is mixed into the
  <code>Global</code> class in the same way as <code>Plug-Ins</code>
  is mixed in.
  </p>

  <p>In the diagram below a schematic overview of the proposed system
  is shown. A description of the individual components and the
  modification needed are given below.
  </p>
  
  <div style="text-align: center;">
    <img src="sip-00002-9.png" alt="" border="0" width="55%" />
  </div>

  <a name="propdesign1"></a>
  <h4>1. Internal Phases</h4>

  <p>All internal phase objects are declared in class
  <code>Global</code> and will be extended to include the
  <code>runsAfter</code> and the <code>runsRightAfter</code> variables
  with the types described above. This will ensure that the internal
  phases will always succeed in creating a compiler phase chain
  depending on the compiler target. 
  </p>
  <p>The phase objects that are needed for a given target will be
  added to the phases set. See <a href="#propdesign3">3. Phases
  Set</a> for more information.
  </p>


  <a name="propdesign2"></a>
  <h4>2. External Phases</h4>

  <p>All external phases are supplied through plug-ins. All these
  phases inherit from class <code>PluginComponent</code>. Defining the
  <code>runsRightAfter=None</code> in class
  <code>PluginComponent</code> will make the changes to the existing
  plug-ins minimal, but makes it possible for external phases to use
  the feature by simple overriding the variable if needed.
  </p>
  <p>It is still the responsibility of the plug-in subsystem to load
  the actual plug-ins from the jar files and also the processing of
  all the <code>-Xplugin*</code> compiler options available at the
  command line. The phase assembly code currently present in the
  plug-in subsystem will be replaced by code that adds all plug-in
  supplied phases to the phases set. See <a
  href="#propdesign3">3. Phases Set</a> for more information.
  </p>

  <a name="propdesign3"></a>
  <h4>3. Phases Set</h4>
  
  <p>The phases set contains all the phase objects that should be
  taken into consideration when performing compiler phase chain
  assembly. This phases set would located in the class
  <code>Global</code>. A proper Scala declaration of such a phases set
  is shown below.
  </p>
  <pre>protected val phasesSet : HashSet[SubComponent] = new HashSet[SubComponent]()</pre>

  <p>Ensuring that all phase objects are unique with respect to the
  phase names, the <code>hashCode</code> method in class
  <code>SubComponent</code> will be overridden to return the hash code
  of the phase name and not the object itself.
  </p>

  <a name="propdesign4"></a>
  <h4>4. Phase Assembly</h4>

  <p>From a compiler design perspective this component will include
  the data structures and implementation of the constraint solving
  algorithm that are needed to implement this proposal. It would be
  integrated as a <code>trait PhaseAssembly</code> that is mixed into
  <code>class Global</code> with this self type <code>self: Global
  =&gt;</code>.
  </p>  
  
  <p>A very high level description of the actions performed by this
  component can be split into three distinct parts. A detailed
  algorithm that implements this high level description can be found
  below in the <a href="#constsolver">section on the constraint
  solving algorithm</a>.
  </p>

  <ol>
    <li><strong>Phases Set to Dependency Graph</strong><br />
    <p>From the phase objects in the phases set a dependency graph is
    created, where the edges in this graph are constructed from the
    <code>runsAfter</code> and <code>runsRightAfter</code>
    constraints. Each node in the dependency graph has both a phase
    name and a phase object, because all constraints in the phase
    objects are only given by name.
    </p>
    </li>
    <li><strong>Dependency Graph to Dependency Tree</strong><br />
    <p>To be able to create the compiler phase chain the dependency
    graph has to be converted into a dependency tree. The dependency
    graph contains nodes that have a phase name, but no phase
    object. These nodes have to be removed. Also all edges in the
    graph that are generated from <code>runsRightAfter</code>
    constraints have to be handled special to enforce this constraint
    and the graph is checked for cycles. If a cycle is found, the
    algorithm will terminal with a fatal error, preventing the
    compiler from starting. If no cycles are found in the dependency
    graph is simplified with respect to what will become the root in
    the dependency tree, by removing unneeded dependency edges. This
    will transform the dependency graph into a dependency tree.
    </p>
    </li>
    <li><strong>Dependency Tree to Compiler Phase List</strong><br />
    <p>The compiler phase list is constructed by traversing the
    dependency tree from the root. The traversal algorithm will be
    described below.
    </p>
    </li>
  </ol>

  
  <a name="propdesign5"></a>
  <h4>5. Constraint System</h4>

  <p>The constraint system is implemented as a graph structure where
  phases are modeled as nodes and the constraints are modeled as
  directed edges. This section will describe the basic entities of the
  constraint system and how they are modeled in the dependency graph.</p>
  <table>
    <tr>
      <td colspan="2">
	<p>Each node in the dependency graph contains several items of
	information about the phase object and the dependencies to and
	from it.</p>
      </td>
    </tr>
    <tr>
      <td>
	<ul>
	  <li><strong>phaseobj:</strong> <i>(type:
	  SubComponent)</i><br /> A reference to the actual phase
	  object, if there is any. If there is a phase object then
	  there is also a phase name, but the constraints are only
	  given by name, so some nodes may have a phase name and no
	  phase object.
	  </li>
	  <li><strong>phasename:</strong> <i>(type: String)</i><br />
	  The name of the phase object. This is always set.
	  </li>
	  <li><strong>after:</strong> <i>(type: HashSet[Edge]())</i>
	  <br />
	  This is the set of <code>Edge</code> object that point to
	  <code>Node</code> objects that have to come before this
	  <code>Node</code>. So all <code>runsAfter</code> and
	  <code>runsRightAfter</code> constraints given by a phase
	  object are present in this <code>after</code> list.
	  </li>
	  <li><strong>deps:</strong> <i>(type: HashSet[Edge]())</i>
	  <br />
	  This is the set of <code>Edge</code> object that points to
	  this <code>Node</code> object. So its the opposite of the
	  <code>after</code> list.
	  </li>
	</ul>
      </td>
      <td>
	<div style="text-align: center;">
	  <img src="sip-00002-15.png" alt="" border="0" width="235" height="186" />
	</div>
      </td>
    </tr>
    <tr>
      <td colspan="2">
	<p>Each edge in the dependency graph is directed, so it has
	references to its to and from <code>Node</code> objects and
	knows if it is created from a <code>runsAfter</code>
	constraint (soft) or created from a
	<code>runsRightAfter</code> constraint (hard). </p>
      </td>
    </tr>
    <tr>
      <td>
	<ul>
	  <li><strong>to:</strong> <i>(type: Node)</i>
	  <br />
	  This is a reference to the <code>Node</code> object this
	  <code>Edge</code> object is pointing to. This means that the
	  <code>frm</code> node has a dependency on the
	  <code>to</code> node.
	  </li>
	  <li><strong>frm:</strong> <i>(type: Node)</i>
	  <br />
	  This is a reference to the <code>Node</code> object this
	  <code>Edge</code> object is pointing from.
	  </li>
	  <li><strong>hard:</strong> <i>(type: Boolean)</i>
	  <br />
	  In the dependency graph we have soft and hard
	  dependencies. The soft dependencies are created from the
	  <code>runsAfter</code> list of constraints. The hard
	  dependencies are created from the
	  <code>runsRightAfter</code> constraint, if any.
	  </li>
	</ul>
      </td>
      <td>
	<div style="text-align: center;">
	  <img src="sip-00002-16.png" alt="" border="0" width="152" height="186" />
	</div>
      </td>
    </tr>
  </table>

  <p>Below is the interface implementation of the dependency graph in
  Scala. There are classes implementing the edges and nodes and the
  container for storing the edges and nodes and aux. method for easy
  access to the stored information.
  </p>
  <pre>
class Edge(f: Node, t: Node, h: Boolean) {
  var frm = f
  var to = t
  var hard = h
}

class Node(phs:SubComponent, name:String) {
  var phasename: String = name
  var phaseobj: SubComponent = phs
  var after: HashSet[Edge] = new HashSet[Edge]()
  var deps: HashSet[Edge] = new HashSet[Edge]()
}

class DependencyGraph {
  
  val nodes = new HashMap[String,Node]()
  val edges = new HashSet[Edge]()

  def getNodeByPhase(name : String) : Node
  def getNodeByPhase(phs : SubComponent) : Node
  def softConnectNodes(frm: Node, to: Node) : Unit
  def hardConnectNodes(frm: Node, to: Node) : Unit
}
  </pre>


  <a name="constsolver"></a>
  <h4>Constraint Solving Algorithm</h4>

  <p>This algorithm will build a list of phase objects from a set of
  phase objects with constraints. The position of each phase object in
  the final list will ensure that all its constraints are
  satisfied. The algorithm does this by constructing a dependency
  graph from the set of phase objects. This dependency graph is then
  over a series of steps converted into a dependency tree with a root
  node. To construct the final compiler phase list the dependency tree
  is traversed from the root building the list of phase objects.</p>

  <p>The function below is the top most function of this algorithm
  that transforms the set of phase objects into a ordered list of
  phase objects so all constraints are satisfied. This function
  follows the description given about the <a href="#propdesign4">phase
  assembly component</a>. All the auxiliary functions will be covered
  below.</p>

<pre>  <b>function</b> phasesSetToPhasesList(phasesSet)

      <i>// call function to generate dependency graph from phases set</i>    
      graph &lt;- phasesSetToDepGraph( phaseSet )

      <i>// call function to transform the dependency graph to a dependency tree.</i>
      <i>// if the dependency graph contains a cycle a fatal error will be generated.</i>
      rootnode &lt;- depGraphToDepTree( graph )

      <i>// call function to recursively build phase list from the root of </i>
      <i>// the dependency tree</i>
      phaselist &lt;- depTreeToPhaseList( rootnode, <b>new</b> List() )

      <b>return</b> phaselist
</pre>

  <p> List of functions called:</p>
  <ul>
    <li><a href="#func1">The <code>phasesSetToDepGraph</code> function</a></li>
    <li><a href="#func2">The <code>depGraphToDepTree</code> function</a></li>
    <li><a href="#func3">The <code>depTreeToPhaseList</code> function</a></li>
  </ul>

  <a name="func1"></a>
  <h5>The <code>phasesSetToDepGraph</code> function</h5>

  <p>The dependency graph, with the elements described in the <a
  href="#propdesign5">section about the constraint system</a>, is
  build from the phase objects in the phases set.<br />
  It works as follows:
  </p>
  <ul>
    <li>create a new dependency graph</li>
    <li>foreach phase in the phasesSet
    <ul>
      <li>create a new graph node <i>fromnode</i> from the phase object and add the
      ndoe to the graph</li>
      <li>if the phase object has a runs right after constraint, then
      create a node <i>tonode</i> from the phase name of the constraint and create a
      hard edge between <i>fromnode</i> and <i>tonode</i>. Eventual
      runs after constraints are ignored.</li>
      <li>if the phase object has no runs right after constraints a
      node <i>tonode</i> is created from each runs after constraint
      and connected with a soft edge to the <i>fromnode</i>.</li>
    </ul>
    </li>
  </ul>
  
  <p>A pseudo code version of this algorithm is shown below.</p>

<pre>  <b>function</b> phasesSetToDepGraph( phaseSet )
      <i>// create new graph structure to hold data</i>    
      graph &lt;- <b>new</b> DependencyGraph
      <i>// iterate over all phase objects in the phasesSet</i>
      <b>for</b> phs <b>in</b> phasesSet
          <i>// create a node from the phase object</i>
          fromnode &lt;- graph.getNodeByName(phs)
	  <i>// test if the phase object has a runs right after constraint</i>
	  <b>if</b> phs.runRightAfter <b>not eq</b> ""
	      <i>// create a node from the phase constraint name and connect with hard edge</i>
	      tonode &lt;- graph.getNodeByName( phs.runRightAfter )
	      graph.hardConnectNodes( fromnode, tonode )
	  <b>else</b>
              <i>// the phase object did not have a runs right after constraint</i>
              <i>// iterate over all phase constraints in the runs after constraint list</i>
	      <b>for</b> phsname <b>in</b> phs.runsAfter
                  <i>// create a node from the phase constraint name and connect with soft edge</i>
	          tonode &lt;- graph.getNodeByName( phsname )
		  graph.softConnectNodes( fromnode, tonode )

      <b>return</b> graph
</pre>



  <a name="func2"></a>
  <h5>The <code>depGraphToDepTree</code> function</h5>

  <p>The process of converting the dependency graph into a dependency
  tree is complex, so to simplify the algorithm the individual steps
  are split into functions.<br />
  It works as follows:
  </p>

  <ul>
    <li>call method that will remove all nodes from the graph that
    have no phase object attached</li>
    <li>call method that will find all edges in the graph that are
    marked as hard and enforce that the <code>deps</code> list of the
    edges <code>to</code> node reference only contains this
    edge. Other edges are moved down to the edges <code>frm</code>
    node reference</li>
    <li>What is to become the root of the dependency tree and also the
    first element in the phase list is extracted from the graph.</li>
    <li>call method to check if there are cycles in the graph starting
    from the root node, if there are cycles a fatal error is generated</li>
    <li>call method to simplify graph into a tree by removing edges
    that will not break the constraints given</li>
  </ul>

  <p>Below is shown a pseudo code version of the function.</p>

  <pre>  <b>function</b> depGraphToDepTree( graph ) 
    
      <i>// Remove all nodes where phaseobj == null</i>
      removeDanglingNodes( graph ) 

      <i>// Enforce hardlinks / runsRightAfter and promote nodes down the tree</i>
      enforceHardlinks( graph )

      <i>// Extract the root node from the graph</i>
      root &lt;- graph.getNodeByPhase( ROOTNAME )

      <i>// Test for cycles in the graph, if found generate fatal error</i>
      testForCycles( root, <b>new</b> Set() )

      <i>// Simplify graph by removing edges, starting from the given root node</i>
      simplifyGraphFromNode( root, graph )

      <b>return</b> root
  </pre>

  <p> List of functions called:</p>
  <ul>
    <li><a href="#func4">Helper function <code>removeDanglingNodes</code></a></li>
    <li><a href="#func5">Helper function <code>enforceHardlinks</code></a></li>
    <li><a href="#func6">Helper function <code>testForCycles</code></a></li>
    <li><a href="#func7">Helper function <code>simplifyGraphFromNode</code></a></li>
  </ul>

  <a name="func3"></a>
  <h5>The <code>depTreeToPhaseList</code> function</h5>

  <p>The graph has been transformed into a tree and a reference to the
  root node is identified. The tree is now traversed from the root
  node, by calling itself recursively on each node object. The empty
  list is given to the function together with the root node and the
  phase objects are added to the list as the function traverses the
  tree.<br /> 
  It works as follows:</p>

  <ul>
    <li>the function is called with a node object and a list</li>
    <li>the phase object of the node object, the function was call
    with, is added to the list</li>
    <li>if this node has no nodes that depend on it, the list of phase
    objects is returned</li>
    <li>if this node has only one node that depends on it, we call
    recursively with that node and the list as arguments, the result
    of this is returned</li>
    <li>if this node has more then one node that depends upon it,
    these dependent nodes are ordered. We now iterate over this
    ordered list of nodes and call recursively the function.</li>
  </ul>

  <p>Below is shown a pseudo code version of the function.</p>

  <pre>  <b>function</b> depTreeToPhaseList( node, chain )
      <i>// add the phase object of the node to the chain list</i>
      chain &lt;- chain.append( node.phaseobj )
      <i>// no more depends, return the chain</i>
      <b>if</b> node.deps.length == 0
          <b>return</b> chain
      <i>// only one depend, so no need to order one element, just call recursively </i>
      <b>else if</b> node.deps.length == 1
          <b>return</b> depTreeToPhaseList( node.deps.getFirst().frm,  chain )
      <i>// more then one depend</i>
      <b>else</b>
          <i>// the depends are ordered and saved as a list</i>
          order &lt;- dependencyOrder( node.deps )
          <i>// iterate over the list of ordered depend nodes and call recursively </i>
          <b>for</b> nd <b>in</b> order
              chain &lt;- depTreeToPhaseList( nd, chain )
          <i>// return the complete chain</i>
          return chain
  </pre>

  <p> List of functions called:</p>
  <ul>
    <li><a href="#func8">Helper function <code>dependencyOrder</code></a></li>
  </ul>

  <a name="func4"></a>
  <h5>Helper function <code>removeDanglingNodes</code></h5>
  
  <p>This helper function will given a dependency graph find all nodes
  that have no reference to a phase object and remove these nodes from
  the graph. (See <a href="#usecase2">use caes 2</a>) These nodes that
  have no phase object are generated from runs after or runs right
  after constraints to phases that are not loaded.<br /> It is done as
  follows:</p>

  <ul>
    <li>create a list L of all nodes in the given graph that have no
    phase object</li>
    <li>foreach node in L
    <ul>
      <li>remove the node from the graph</li>
      <li>foreach edge in the nodes deps list, remove this edge from
      the graph</li>
    </ul>
    </li>
  </ul>

  <p>Below is shown a pseudo code version of the function.</p>

  <pre>  <b>function</b> removeDanglingNodes( graph )
      <i>// find all nodes with no phase object</i>
      dnodes &lt;- filter( lambda node: node.phaseobj == null, graph.nodes )
      <i>// iterate over these nodes and remove them one by one</i>
      <b>for</b> node <b>in</b> dnodes
          graph.nodes.remove( node )
          <i>// also remove all edge objects related to this node object</i>
          <b>for</b> edge <b>in</b> node.deps
              graph.edges.remove( edge )
              edge.frm.after.remove( edge )
  </pre>


  <a name="func5"></a>
  <h5>Helper function <code>enforceHardlinks</code></h5>

  <p>This helper function will given a dependency graph find all edges
  in the graph that are marked as hard. It will then ensure that the
  <code>to</code> node the edge is pointing to only has this edge in
  its deps list. (See <a href="#usecase7">use case 7</a>).<br />
  It works as follows:
  </p>

  <ul>
    <li>loop until no edges are moved
    <ul>
      <li>create list L of all edges in the given graph that are marked
      as hard</li>
      <li>foreach edge in L
      <ul>
	<li>create list HE of edges marked as hard in the deps list of
	the node object this edge is pointing to</li>
	<li>if there is more then one element in HE it will not be possible
	to generate a compiler phase list and a fatal error is
	generated</li>
	<li>if there is only one element in HE, create a list E of all
	the edges not marked as hard in the deps list of
	the node object this edge is pointing to</li>
	<li>override the deps list of the node object this edge is
	pointing to with the HE list</li>
	<li>foreach pedge in E
	<ul>
	  <li>attach the pedge to the deps list of the node object this
	  edge is pointing from</li>
	</ul>
	</li>
      </ul>
      </li>
    </ul>
    </li>
  </ul>

  <p>Below is shown a pseudo code version of the function.</p>

  <pre>  <b>function</b> enforceHardlinks( graph )
      <b>do</b>
          <i>// lets assume that no edges are moved until proven otherwise</i>
          rerun &lt;- <b>false</b>
          <i>// find all edges in the graph that are marked as hard</i>
          hardlinks &lt;- <b>filter</b>( <b>lambda</b> e: e.hard, graph.edges )
          <i>// iterate over all hard edges found</i>
          <b>for</b> edge <b>in</b> hardlinks
              <i>// find all hard edges in the deps list of the node this edge points to</i>
              sanity &lt;- <b>filter</b>( <b>lambda</b> e: e.hard, edge.to.deps )
     	      <b>if</b> sanity.length > 1
     	      <i>// generate fatal error, there is more then one runs right 
              // after constraint on the same node</i>
     	      <b>else</b>
                  <i>// find all non hard edges in the deps list of the node this edge points to</i>
     	          promote &lt;- <b>filter</b>( <b>lambda</b> e: not e.hard, edge.to.deps )
                  <i>// assign the list of hard edges to the deps list of the node this 
                  // edge points to</i>
     	          edge.to.deps &lt;- sanity
                  <i>// iterate over all non hard edges and attach them to the deps list of the 
                  // node this edge points from</i>
     	          <b>for</b> pedge <b>in</b> promote
		      <i>// we have moved an edge - we need to rerun this to check</i>
                      rerun &lt;- true
                      <i>// move the edge object down along the hard edge</i>
                      pedge.to &lt;- edge.frm
                      edge.frm.deps.attach(pedge)
      <i>// if edges where moved, then rerun to make sure the structure is valid</i>
      <b>while</b> rerun
  </pre>

  <p>To better understand this little function, lets look at some
  diagrams and how this function works on these examples. The first
  example is shown below. Here phase B wants to run right after phase
  A and phase C just wants to run somewhere after phase A, so there is
  no problem in pushing phase C down to run after phase B. The
  function does this be first finding all the hard edges (the blue
  edges). It then inspects the edges one by one and in this example we
  have only one hard edge. The edge goes from phase B to phase A,
  meaning that phase B want to run right after phase A. The algorithm
  now looks at the <code>deps</code> list of phase A and finds that
  there is only one hard edges (this is very good). The algorithm now
  filters out the non-hard edges and finds the edge to phase C. This
  edge is now detached from phase A and attached to phase B. 
  </p>
  
  <div style="text-align: center;">
    <img src="sip-00002-17.png" alt="" border="0" width="40%" />
  </div>
  
  <p>With the same arguments can we also handle the situation shown
  below.</p>
  
  <div style="text-align: center;">
    <img src="sip-00002-18.png" alt="" border="0" width="45%" />
  </div>


  <a name="func6"></a>
  <h5>Helper function <code>testForCycles</code></h5>
  
  <p>This helper function will, given a node in the graph and a set
  containing node names, determine of there are any cycles in the
  graph. This will ensure that <a href="#usecase8">use case 8</a> is
  addressed. Please note that this function will call itself
  recursively and is called the first time with the root node and an
  empty set of node names.
  <br /> 
  It is done as follows:
  </p>

  <ul>
    <li>test if the name of the node is already contained in the set,
    if it is a cycle is found and a fatal error is generated.</li>
    <li>if the name is not in the set, the name of the node is added
    to the set</li>
    <li>the deps list of the node is ordered and stored in nodes</li>
    <li>foreach nd in nodes
    <ul>
      <li>call the function recursively with the node nd and the set
      of node names</li>
    </ul>
    </li>
    <li>remove the name of the node from the set</li>
  </ul>

  <p>Below is shown a pseudo code version of the function.</p>

  <pre>  <b>function</b> testForCycles( node, names )
      <i>// test if the name of the node is already in the set</i>
      <b>if</b> names.contains( node.phasename )
          <i>// names are unique, so this is a fatal error, there is a cycle
          // the same name has been visited twice</i>

      <i>// add the name of the node to the set</i>
      names.add( node.phasename )
      <i>// use the dependencyOrder function to sort the deps of the node</i>
      nodes &lt;- dependencyOrder( node.deps )
      <i>// iterate over all deps nodes and call recursively</i>
      <b>for</b> nd <b>in</b> nodes
          testForCycles(nd, names)
      
      <i>// remove the name of the node from the set</i>
      names.remove( node.phasename )
  </pre>

  <p> List of functions called:</p>
  <ul>
    <li><a href="#func8">Helper function <code>dependencyOrder</code></a></li>
  </ul>

  <p>There are other ways of detecting cycles in graphs, that are
  faster and more efficient, please <a
  href="http://www.cs.umu.se/kurser/TDBA77/VT06/algorithms/BOOK/BOOK2/NODE68.HTM">read
  here</a> for more information.</p>

  <a name="func7"></a>
  <h5>Helper function <code>simplifyGraphFromNode</code></h5>

  <p>This helper function will, given a node in the graph and a
  reference to the graph structure, remove unneeded edges and
  transforming the graph into a tree. Please note that this
  function will call itself recursively and is called the first time
  with the root node and the full graph.
  <br /> 
  It is done as follows:
  </p>

  <ul>
    <li>create an empty list RM</li>
    <li>foreach edge in the deps list of the node, if the node this
    edge is pointing from has mode then one dependencies then add this
    edge to RM</li>
    <li>foreach edge in RM, remove this edge from the graph</li>
    <li>the deps list of the node is ordered and stored in nodes</li>
    <li>foreach nd in nodes
    <ul>
      <li>call the function recursively with the node nd and the
      updated graph</li>
    </ul>
    </li>    
  </ul>

  <p>Below is shown a pseudo code version of the function.</p>

  <pre>  <b>function</b> simplifyGraphFromNode( node, graph )
      removal &lt;- <b>new</b> List()
      <b>for</b> edge <b>in</b> node.deps
          <b>if</b> edge.frm.after.length > 1
              removal.append( edge )
    
      <b>for</b> edge <b>in</b> removal
          node.deps.remove( edge )
	  edge.frm.after.remove( edge )
	  graph.edges.remove( edge )

      nodes &lt;- dependencyOrder( node.deps )
      <b>for</b> nd <b>in</b> nodes
          simplifyGraphFromNode( nd, graph )
  </pre>

  <p> List of functions called:</p>
  <ul>
    <li><a href="#func8">Helper function <code>dependencyOrder</code></a></li>
  </ul>


  <p>In the example shown below we simplify this graph into a tree
  from the node A. This means that we start the traversal from node A
  and uses the <code>A.deps</code> list to continue the traversal. If
  <code>A.deps</code> contains more then one element, the list is
  sorted so we first visit all plug-in supplied phases in alphabetical
  order and then the internal phase. In this example this gives the
  order <code>[P,R,B]</code>. Before the algorithm traverses to a new
  node it checks that node for its number of <code>after</code>
  links. If it has more then one after link, it simply deletes the
  link to it so it can carry on. In the example below this is the case
  of node P and T. When the algorithm visits node P and wants to
  traverse on to node T, it checks the number of <code>after</code>
  constraints in node T. Node T has two after constraints so it can
  safely delete the edge between node P and T, because there must be
  another node that we have not included yet, that will include this
  node T.</p>

  <div style="text-align: center;">
    <img src="sip-00002-19.png" alt="" border="0" width="70%" />
  </div>


  <a name="func8"></a>
  <h5>Helper function <code>dependencyOrder</code></h5>

  <p>This helper function will given a list of edges sort them
  according to a specific scheme are return them as a list. The
  sorting scheme is as follows. The returned list is the reverse of
  first having the plug-in supplied phases in alphabetical order
  followed by the internal phases. <br />
  This is done as follows:
  </p>
  <ul>
    <li>nodes for internal and external phases are filtered from the
    deps list</li>
    <li>the external nodes are ordered by the phase name and
    reversed</li>
    <li>the external nodes are appended to the internal nodes list and
    the resulting list is reversed</li>
  </ul>

  <p>Below is shown a pseudo code version of the function.</p>

  <pre>  <b>function</b> dependencyOrder( deps )
      external &lt;- <b>filter</b>(<b>lambda</b> e: (! e.frm.phaseobj.internal), deps)
      internal &lt;- <b>filter</b>(<b>lambda</b> e: e.frm.phaseobj.internal, deps)

      extnodes &lt;- <b>map</b>(<b>lambda</b> e: e.frm, external)
      extnodes &lt;- extnodes.sort(<b>lambda</b> (n1,n2): (n1.phasename compareTo n2.phasename) &lt; 0)
      extnodes &lt;- extnodes.reverse
	
      nodes &lt;- (<b>map</b>(<b>lambda</b> e: e.frm, internal)).<b>extend</b>( extnodes )
      nodes &lt;- nodes.reverse
      <b>return</b> nodes
  </pre>



  <a name="namingscheme"></a>
  <h3>Naming Scheme for Internal and External Phases</h3>

  <p>Having the ability to load a large number of plug-ins, where each
  plug-in can supply several named phases, gives a high possibility of
  name clashes. There is no way to enforce unique names in plug-in
  supplied phases because they can have inter dependencies. The best
  solution is to suggest a naming scheme for phase names.</p>

  <p>This naming scheme composes all names from the package/plug-in
  name, then a double colon <code>::</code> and then the actual phase
  name. So for the internal compiler phase names this would be:</p>

  <pre>    val phaseName = "nsc::tailcalls"</pre>

  <p>For a plug-in with name <code>unit</code> that supplies a phase
  with name <code>convert</code> the full phase name would be:</p>

  <pre>    val phaseName = "unit::convert"</pre>

  <p>By using this naming scheme for all phases there is a namespace
  for phase names within each plug-in and external phases will never
  clash with internal phase names.</p>

  <a name="implications"></a>
  <h3>Implications of This Proposal</h3>

  <p>All plug-ins need to be updated to the design in this
  proposal. This means that all current plug-ins will not be able to
  load. However the modifications to the existing plug-ins are
  minor. Current variables and types:
  </p>
  <pre>  val runsAfter: String</pre>
  <p>The proposed changes are:</p>
  <pre>  val runsAfter: List[String]</pre>

  <p>All internal phases in the compiler need to declare there
  ordering. This means that instead of having the ordering very
  explicit in the list structure, the ordering is encoded into each
  phase object. The following signatures will be added to the
  <code>SubComponent</code> class.</p>

  <pre>  val runsAfter: List[String]
  val runsRightAfter: Option[String]</pre>

  <p>The following items must be added to each phase
  object to implement the signatures (however they must be adapted to
  the individual phase object).</p>
  
  <pre>  val runsAfter = List[String]("phasename")
  val runsRightAfter = None</pre>

  <p>There is no reason to change the names of the phases as suggested in
  the section above, but it will minimize the chance of name clashes
  and will also minimize the chance of accidental dependencies.</p>

  <a href="#top">To the top</a>

  <a name="implementation"></a>
  <h2>Implementation</h2>

  <p>An example implementation has been created that follows the
  algorithm sketched in this proposal and models the compilers
  internal structure. This implementation can be found in the file <a
  href="sip-00002-10.scala">sip-00002-10.scala</a>. The example
  implementation uses the phases from <a href="#usecase10">use case
  10</a> as its data.
  </p>
  
  <p>Running the example code</p>
  
  <ul>
    <li>Compile the code using <code>scalac
    sip-00002-10.scala</code></li>
    <li>Run the example using <code>scala
    depgraph.DepGraphTest</code></li>
    <li>This will generate the console output as seen below and <a
    href="http://www.graphviz.org/">Graphviz</a> dot files that can be
    compiled into png images. These png images are also shown
    below.</li>
  </ul>
  
  <a name="consoleexample"></a>
  <p>Console output</p>

  <pre>$ scalac sip-00002-10.scala 
$ scala depgraph.DepGraphTest
[dropping depend on node with no phase: nsc::msil]
[removing edge: nsc::tailcalls =&gt; nsc::pickler]
[removing edge: plug2::optimization2 =&gt; plug2::optimization1]
 - nsc::parser
 - nsc::typer
 - plug1::optimization1
 - nsc::pickler
 - nsc::liftcode
 - plug2::optimization1
 - nsc::tailcalls
 - nsc::erasure
 - nsc::cleanup
 - plug2::optimization2
 - nsc::jvm
 - nsc::terminal
$ </pre>

  <p>PNG images of the generated dot files showing the graph
  structure. Internal phases are marked as black circles and plug-in
  phases are marked as green circles. The hard links (runsRightAfter
  constraints) are marked as blue arrows. Click on the pictures to
  enlarge.</p>
  <a name="examples"></a>
  <table>
    <tr>
      <td valign="top"><strong>1. </strong><br />
      <div style="text-align: center;">
	<a href="sip-00002-11.png">
	  <img src="sip-00002-11.png" alt="" border="0" width="100%"/>
	</a>
      </div>
      </td>

      <td valign="top"><strong>2. </strong><br />
      <div style="text-align: center;">
	<a href="sip-00002-12.png">
	  <img src="sip-00002-12.png" alt="" border="0" width="100%" />
	</a>
      </div>
      </td>
      <td valign="top"><strong>3. </strong><br />
      <div style="text-align: center;">
	<a href="sip-00002-13.png">
	  <img src="sip-00002-13.png" alt="" border="0" width="100%" />
	</a>
      </div>
      </td>
      <td valign="top"><strong>4. </strong><br />
      <div style="text-align: center;">
	<a href="sip-00002-14.png">
	  <img src="sip-00002-14.png" alt="" border="0" width="100%" />
	</a>
      </div>
      </td>
    </tr>
  </table>

  <a href="#top">To the top</a>

</body>
</html>