index.html
116 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
<!DOCTYPE html PUBLIC '-//W3C//DTD HTML 4.01 Transitional//EN' 'http://www.w3.org/TR/html4/loose.dtd'>
<html lang="en" dir="ltr">
<head>
<title>Canonical XML Version 2.0</title>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<!-- <script src='../../../dap-dev/ReSpec.js/js/respec.js'
class='remove'></script> -->
<style type="text/css">
/*****************************************************************
* ReSpec CSS
* Robin Berjon (robin at berjon dot com)
* v0.05 - 2009-07-31
*****************************************************************/
/* --- INLINES --- */
em.rfc2119 {
text-transform: lowercase;
font-variant: small-caps;
font-style: normal;
color: #900;
}
h1 acronym, h2 acronym, h3 acronym, h4 acronym, h5 acronym, h6 acronym, a acronym,
h1 abbr, h2 abbr, h3 abbr, h4 abbr, h5 abbr, h6 abbr, a abbr {
border: none;
}
dfn {
font-weight: bold;
}
a.internalDFN {
color: inherit;
border-bottom: 1px solid #99c;
text-decoration: none;
}
a.externalDFN {
color: inherit;
border-bottom: medium dotted #ccc;
text-decoration: none;
}
a.bibref {
text-decoration: none;
}
code {
color: #ff4500;
}
/* --- WEB IDL --- */
pre.idl {
border-top: 1px solid #90b8de;
border-bottom: 1px solid #90b8de;
padding: 1em;
line-height: 120%;
}
pre.idl::before {
content: "WebIDL";
display: block;
width: 150px;
background: #90b8de;
color: #fff;
font-family: initial;
padding: 3px;
font-weight: bold;
margin: -1em 0 1em -1em;
}
.idlType {
color: #ff4500;
font-weight: bold;
text-decoration: none;
}
/*.idlModule*/
/*.idlModuleID*/
/*.idlInterface*/
.idlInterfaceID {
font-weight: bold;
color: #005a9c;
}
.idlSuperclass {
font-style: italic;
color: #005a9c;
}
/*.idlAttribute*/
.idlAttrType, .idlFieldType {
color: #005a9c;
}
.idlAttrName, .idlFieldName {
color: #ff4500;
}
.idlAttrName a, .idlFieldName a {
color: #ff4500;
border-bottom: 1px dotted #ff4500;
text-decoration: none;
}
/*.idlMethod*/
.idlMethType {
color: #005a9c;
}
.idlMethName {
color: #ff4500;
}
.idlMethName a {
color: #ff4500;
border-bottom: 1px dotted #ff4500;
text-decoration: none;
}
/*.idlParam*/
.idlParamType {
color: #005a9c;
}
.idlParamName {
font-style: italic;
}
.extAttr {
color: #666;
}
/*.idlConst*/
.idlConstType {
color: #005a9c;
}
.idlConstName {
color: #ff4500;
}
.idlConstName a {
color: #ff4500;
border-bottom: 1px dotted #ff4500;
text-decoration: none;
}
/*.idlException*/
.idlExceptionID {
font-weight: bold;
color: #c00;
}
.idlTypedefID, .idlTypedefType {
color: #005a9c;
}
.idlRaises, .idlRaises a.idlType, .idlRaises a.idlType code, .excName a, .excName a code {
color: #c00;
font-weight: normal;
}
.excName a {
font-family: monospace;
}
.idlRaises a.idlType, .excName a.idlType {
border-bottom: 1px dotted #c00;
}
.excGetSetTrue, .excGetSetFalse, .prmNullTrue, .prmNullFalse, .prmOptTrue, .prmOptFalse {
width: 45px;
text-align: center;
}
.excGetSetTrue, .prmNullTrue, .prmOptTrue { color: #0c0; }
.excGetSetFalse, .prmNullFalse, .prmOptFalse { color: #c00; }
.idlImplements a {
font-weight: bold;
}
dl.attributes, dl.methods, dl.constants, dl.fields {
margin-left: 2em;
}
.attributes dt, .methods dt, .constants dt, .fields dt {
font-weight: normal;
}
.attributes dt code, .methods dt code, .constants dt code, .fields dt code {
font-weight: bold;
color: #000;
font-family: monospace;
}
.attributes dt code, .fields dt code {
background: #ffffd2;
}
.attributes dt .idlAttrType code, .fields dt .idlFieldType code {
color: #005a9c;
background: transparent;
font-family: inherit;
font-weight: normal;
font-style: italic;
}
.methods dt code {
background: #d9e6f8;
}
.constants dt code {
background: #ddffd2;
}
.attributes dd, .methods dd, .constants dd, .fields dd {
margin-bottom: 1em;
}
table.parameters, table.exceptions {
border-spacing: 0;
border-collapse: collapse;
margin: 0.5em 0;
width: 100%;
}
table.parameters { border-bottom: 1px solid #90b8de; }
table.exceptions { border-bottom: 1px solid #deb890; }
.parameters th, .exceptions th {
color: #fff;
padding: 3px 5px;
text-align: left;
font-family: initial;
font-weight: normal;
text-shadow: #666 1px 1px 0;
}
.parameters th { background: #90b8de; }
.exceptions th { background: #deb890; }
.parameters td, .exceptions td {
padding: 3px 10px;
border-top: 1px solid #ddd;
vertical-align: top;
}
.parameters tr:first-child td, .exceptions tr:first-child td {
border-top: none;
}
.parameters td.prmName, .exceptions td.excName, .exceptions td.excCodeName {
width: 100px;
}
.parameters td.prmType {
width: 120px;
}
table.exceptions table {
border-spacing: 0;
border-collapse: collapse;
width: 100%;
}
/* --- TOC --- */
.toc a {
text-decoration: none;
}
a .secno {
color: #000;
}
/* --- TABLE --- */
table.simple {
border-spacing: 0;
border-collapse: collapse;
border-bottom: 3px solid #005a9c;
}
.simple th {
background: #005a9c;
color: #fff;
padding: 3px 5px;
text-align: left;
}
.simple th[scope="row"] {
background: inherit;
color: inherit;
border-top: 1px solid #ddd;
}
.simple td {
padding: 3px 10px;
border-top: 1px solid #ddd;
}
.simple tr:nth-child(even) {
background: #f0f6ff;
}
/* --- DL --- */
.section dd > p:first-child {
margin-top: 0;
}
.section dd > p:last-child {
margin-bottom: 0;
}
.section dd {
margin-bottom: 1em;
}
.section dl.attrs dd, .section dl.eldef dd {
margin-bottom: 0;
}
/* --- EXAMPLES --- */
pre.example {
border-top: 1px solid #ff4500;
border-bottom: 1px solid #ff4500;
padding: 1em;
margin-top: 1em;
}
pre.example::before {
content: "Example";
display: block;
width: 150px;
background: #ff4500;
color: #fff;
font-family: initial;
padding: 3px;
font-weight: bold;
margin: -1em 0 1em -1em;
}
/* --- EDITORIAL NOTES --- */
.issue {
padding: 1em;
margin: 1em 0em 0em;
border: 1px solid #f00;
background: #ffc;
}
.issue::before {
content: "Issue";
display: block;
width: 150px;
margin: -1.5em 0 0.5em 0;
font-weight: bold;
border: 1px solid #f00;
background: #fff;
padding: 3px 1em;
}
.note {
margin: 1em 0em 0em;
padding: 1em;
border: 2px solid #cff6d9;
background: #e2fff0;
}
.note::before {
content: "Note";
display: block;
width: 150px;
margin: -1.5em 0 0.5em 0;
font-weight: bold;
border: 1px solid #cff6d9;
background: #fff;
padding: 3px 1em;
}
/* --- Best Practices --- */
div.practice {
border: solid #bebebe 1px;
margin: 2em 1em 1em 2em;
}
span.practicelab {
margin: 1.5em 0.5em 1em 1em;
font-weight: bold;
font-style: italic;
}
span.practicelab { background: #dfffff; }
span.practicelab {
position: relative;
padding: 0 0.5em;
top: -1.5em;
}
p.practicedesc {
margin: 1.5em 0.5em 1em 1em;
}
@media screen {
p.practicedesc {
position: relative;
top: -2em;
padding: 0;
margin: 1.5em 0.5em -1em 1em;
}
/* --- SYNTAX HIGHLIGHTING --- */
pre.sh_sourceCode {
background-color: white;
color: black;
font-style: normal;
font-weight: normal;
}
pre.sh_sourceCode .sh_keyword { color: #005a9c; font-weight: bold; } /* language keywords */
pre.sh_sourceCode .sh_type { color: #666; } /* basic types */
pre.sh_sourceCode .sh_usertype { color: teal; } /* user defined types */
pre.sh_sourceCode .sh_string { color: red; font-family: monospace; } /* strings and chars */
pre.sh_sourceCode .sh_regexp { color: orange; font-family: monospace; } /* regular expressions */
pre.sh_sourceCode .sh_specialchar { color: #ffc0cb; font-family: monospace; } /* e.g., \n, \t, \\ */
pre.sh_sourceCode .sh_comment { color: #A52A2A; font-style: italic; } /* comments */
pre.sh_sourceCode .sh_number { color: purple; } /* literal numbers */
pre.sh_sourceCode .sh_preproc { color: #00008B; font-weight: bold; } /* e.g., #include, import */
pre.sh_sourceCode .sh_symbol { color: blue; } /* e.g., *, + */
pre.sh_sourceCode .sh_function { color: black; font-weight: bold; } /* function calls and declarations */
pre.sh_sourceCode .sh_cbracket { color: red; } /* block brackets (e.g., {, }) */
pre.sh_sourceCode .sh_todo { font-weight: bold; background-color: #00FFFF; } /* TODO and FIXME */
/* Predefined variables and functions (for instance glsl) */
pre.sh_sourceCode .sh_predef_var { color: #00008B; }
pre.sh_sourceCode .sh_predef_func { color: #00008B; font-weight: bold; }
/* for OOP */
pre.sh_sourceCode .sh_classname { color: teal; }
/* line numbers (not yet implemented) */
pre.sh_sourceCode .sh_linenum { display: none; }
/* Internet related */
pre.sh_sourceCode .sh_url { color: blue; text-decoration: underline; font-family: monospace; }
/* for ChangeLog and Log files */
pre.sh_sourceCode .sh_date { color: blue; font-weight: bold; }
pre.sh_sourceCode .sh_time, pre.sh_sourceCode .sh_file { color: #00008B; font-weight: bold; }
pre.sh_sourceCode .sh_ip, pre.sh_sourceCode .sh_name { color: #006400; }
/* for Prolog, Perl... */
pre.sh_sourceCode .sh_variable { color: #006400; }
/* for LaTeX */
pre.sh_sourceCode .sh_italics { color: #006400; font-style: italic; }
pre.sh_sourceCode .sh_bold { color: #006400; font-weight: bold; }
pre.sh_sourceCode .sh_underline { color: #006400; text-decoration: underline; }
pre.sh_sourceCode .sh_fixed { color: green; font-family: monospace; }
pre.sh_sourceCode .sh_argument { color: #006400; }
pre.sh_sourceCode .sh_optionalargument { color: purple; }
pre.sh_sourceCode .sh_math { color: orange; }
pre.sh_sourceCode .sh_bibtex { color: blue; }
/* for diffs */
pre.sh_sourceCode .sh_oldfile { color: orange; }
pre.sh_sourceCode .sh_newfile { color: #006400; }
pre.sh_sourceCode .sh_difflines { color: blue; }
/* for css */
pre.sh_sourceCode .sh_selector { color: purple; }
pre.sh_sourceCode .sh_property { color: blue; }
pre.sh_sourceCode .sh_value { color: #006400; font-style: italic; }
/* other */
pre.sh_sourceCode .sh_section { color: black; font-weight: bold; }
pre.sh_sourceCode .sh_paren { color: red; }
pre.sh_sourceCode .sh_attribute { color: #006400; }
</style><link href="http://www.w3.org/StyleSheets/TR/W3C-WD" rel="stylesheet" type="text/css" charset="utf-8"></head><body style="display: inherit; "><div class="head"><p><a href="http://www.w3.org/"><img width="72" height="48" src="http://www.w3.org/Icons/w3c_home" alt="W3C"></a></p><h1 class="title" id="title">Canonical XML Version 2.0</h1><h2 id="w3c-working-draft-21-april-2011">W3C Working Draft 21 April 2011</h2><dl><dt>This version:</dt><dd><a href="http://www.w3.org/TR/2011/WD-xml-c14n2-20110421/">http://www.w3.org/TR/2011/WD-xml-c14n2-20110421/</a></dd><dt>Latest published version:</dt><dd><a href="http://www.w3.org/TR/xml-c14n2/">http://www.w3.org/TR/xml-c14n2/</a></dd><dt>Latest editor's draft:</dt><dd><a href="http://www.w3.org/2008/xmlsec/Drafts/c14n-20/">http://www.w3.org/2008/xmlsec/Drafts/c14n-20/</a></dd><dt>Previous version:</dt><dd><a href="http://www.w3.org/TR/2010/WD-xml-c14n2-20100831/">http://www.w3.org/TR/2010/WD-xml-c14n2-20100831/</a></dd><dt>Latest recommendation:</dt><dd><a href="http://www.w3.org/TR/xml-c14n2/">http://www.w3.org/TR/xml-c14n2/</a></dd><dt>Editors:</dt><dd><span>John Boyer</span>, IBM (formerly PureEdge Solutions Inc.) ( Version 1.0 )</dd>
<dd><span>Glenn Marcy</span>, IBM ( Version 1.1 )</dd>
<dd><span>Pratik Datta</span>, Oracle</dd>
<dd><span>Frederick Hirsch</span>, Nokia</dd>
</dl><p class="copyright"><a href="http://www.w3.org/Consortium/Legal/ipr-notice#Copyright">Copyright</a> © 2011 <a href="http://www.w3.org/"><acronym title="World Wide Web Consortium">W3C</acronym></a><sup>®</sup> (<a href="http://www.csail.mit.edu/"><acronym title="Massachusetts Institute of Technology">MIT</acronym></a>, <a href="http://www.ercim.eu/"><acronym title="European Research Consortium for Informatics and Mathematics">ERCIM</acronym></a>, <a href="http://www.keio.ac.jp/">Keio</a>), All Rights Reserved. W3C <a href="http://www.w3.org/Consortium/Legal/ipr-notice#Legal_Disclaimer">liability</a>, <a href="http://www.w3.org/Consortium/Legal/ipr-notice#W3C_Trademarks">trademark</a> and <a href="http://www.w3.org/Consortium/Legal/copyright-documents">document use</a> rules apply.</p><hr></div>
<div id="abstract" class="introductory section"><h2>Abstract</h2>
<p>
Canonical XML Version 2.0 is canonicalization algorithm for XML Signature 2.0. It addresses issues around performance,
streaming, hardware implementation, robustness, minimizing attack surface,
determining what is signed and more.
</p>
<p>Any XML document is part of a set of XML documents that
are logically equivalent within an application context,
but which vary in physical representation based on
syntactic changes permitted by XML 1.0 [<cite><a class="bibref" rel="biblioentry" href="#bib-XML10">XML10</a></cite>] and
Namespaces in XML 1.0 [<cite><a class="bibref" rel="biblioentry" href="#bib-XML-NAMES">XML-NAMES</a></cite>]. This specification
describes a method for generating a physical
representation,
the canonical form, of an XML document that accounts for the permissible changes. Except for limitations regarding
a few unusual cases, if two documents have the same canonical form, then the two documents are logically equivalent
within the given application context. Note that two documents may have differing canonical forms yet still be
equivalent in a given context based on application-specific equivalence rules for which no generalized XML
specification could account.
</p>
<p>Canonical XML Version 2.0 is applicable to XML 1.0. It is not defined for XML 1.1. </p>
</div><div id="sotd" class="introductory section"><h2>Status of This Document</h2><p><em>This section describes the status of this document at the time of its publication. Other documents may supersede this document. A list of current W3C publications and the latest revision of this technical report can be found in the <a href="http://www.w3.org/TR/">W3C technical reports index</a> at http://www.w3.org/TR/.</em></p>
<p>
This is a W3C Last Call Working Draft of "Canonical XML Version 2.0".
</p>
<p>A <a href="Overview-pub-diff.html">diff-marked version</a> of this
specification that highlights changes against the <a href="http://www.w3.org/TR/2010/WD-xml-c14n2-20100831/">previous
version</a> is available. Major changes in this version:
</p><ul>
<li>The number of parameters has been reduced and
simplified.</li>
<li>"Conformance profiles" has been removed as the parameters have
been simplified and support for all is now required.</li>
<li>The options
for <code>prefixRewrite</code> have been reduced to only support
sequential (and no longer digest), an additional
simplification.</li>
<li>An algorithm for finding visibly utilized namespaces has
been added.</li>
<li>Inclusive canonicalization support has been removed and
note added with explanation.</li>
<li>Attributes and elements are both considered in
<code>QNameAware</code> parameter.</li>
<li>Explicitly disallow redefining xml* prefixes and
explicitly do not support <code>xml:base</code></li>
<li>Remove CURIE discussion</li>
<li>Update references, add XML-PARSER-STAX reference.</li>
<li>Various formatting and editorial updates</li>
</ul>
<p></p>
<p>This document was published by the <a href="http://www.w3.org/2008/xmlsec/">XML Security Working Group</a> as a Last Call Working Draft. This document is intended to become a W3C Recommendation. If you wish to make comments regarding this document, please send them to <a href="mailto:public-xmlsec@w3.org">public-xmlsec@w3.org</a> (<a href="mailto:public-xmlsec-request@w3.org?subject=subscribe">subscribe</a>, <a href="http://lists.w3.org/Archives/Public/public-xmlsec/">archives</a>). The Last Call period ends 31 May 2011. All feedback is welcome.</p><p>Publication as a Working Draft does not imply endorsement by the W3C Membership. This is a draft document and may be updated, replaced or obsoleted by other documents at any time. It is inappropriate to cite this document as other than work in progress.</p><p>This is a Last Call Working Draft and thus the Working Group has determined that this document has satisfied the relevant technical requirements and is sufficiently stable to advance through the Technical Recommendation process.</p><p>This document was produced by a group operating under the <a href="http://www.w3.org/Consortium/Patent-Policy-20040205/">5 February 2004 W3C Patent Policy</a>. W3C maintains a <a href="http://www.w3.org/2004/01/pp-impl/42458/status" rel="disclosure">public list of any patent disclosures</a> made in connection with the deliverables of the group; that page also includes instructions for disclosing a patent. An individual who has actual knowledge of a patent which the individual believes contains <a href="http://www.w3.org/Consortium/Patent-Policy-20040205/#def-essential">Essential Claim(s)</a> must disclose the information in accordance with <a href="http://www.w3.org/Consortium/Patent-Policy-20040205/#sec-Disclosure">section 6 of the W3C Patent Policy</a>.</p></div><div id="toc" class="section"><h2 class="introductory">Table of Contents</h2><ul class="toc"><li class="tocline"><a href="#sec-Introduction" class="tocxref"><span class="secno">1. </span>Introduction</a><ul class="toc"><li class="tocline"><a href="#sec-Terminology" class="tocxref"><span class="secno">1.1 </span>Terminology</a></li><li class="tocline"><a href="#sec-Applications" class="tocxref"><span class="secno">1.2 </span>Applications</a></li><li class="tocline"><a href="#sec-Limitations" class="tocxref"><span class="secno">1.3 </span>Limitations</a></li><li class="tocline"><a href="#sec-Requirements" class="tocxref"><span class="secno">1.4 </span>Requirements for 2.0</a><ul class="toc"><li class="tocline"><a href="#sec-Requirements-Performance" class="tocxref"><span class="secno">1.4.1 </span>Performance</a></li><li class="tocline"><a href="#sec-Requirements-Streaming" class="tocxref"><span class="secno">1.4.2 </span>Streaming</a></li><li class="tocline"><a href="#sec-Requirements-Robustness" class="tocxref"><span class="secno">1.4.3 </span>Robustness</a></li><li class="tocline"><a href="#sec-Requirements-Portability" class="tocxref"><span class="secno">1.4.4 </span>Portability</a></li><li class="tocline"><a href="#sec-Requirements-Simplicity" class="tocxref"><span class="secno">1.4.5 </span>Simplicity</a></li></ul></li></ul></li><li class="tocline"><a href="#sec-XML-Canonicalization" class="tocxref"><span class="secno">2. </span>Canonical XML 2.0</a><ul class="toc"><li class="tocline"><a href="#sec-Data-Model" class="tocxref"><span class="secno">2.1 </span>Data Model</a></li><li class="tocline"><a href="#sec-Canonicalization-Parameters" class="tocxref"><span class="secno">2.2 </span>Parameters</a></li><li class="tocline"><a href="#sec-Processing-Model" class="tocxref"><span class="secno">2.3 </span>Processing Model</a></li><li class="tocline"><a href="#sec-ExclusiveNeed" class="tocxref"><span class="secno">2.4 </span>The Need for Exclusive XML Canonicalization</a><ul class="toc"><li class="tocline"><a href="#sec-Simple" class="tocxref"><span class="secno">2.4.1 </span> A Simple Example</a></li><li class="tocline"><a href="#sec-Enveloping" class="tocxref"><span class="secno">2.4.2 </span>General Problems with re-Enveloping</a></li></ul></li><li class="tocline"><a href="#sec-Namespace-Processing" class="tocxref"><span class="secno">2.5 </span>Namespace Processing</a><ul class="toc"><li class="tocline"><a href="#sec-Namespace-Concepts" class="tocxref"><span class="secno">2.5.1 </span>Namespace concepts</a></li><li class="tocline"><a href="#sec-Namespace-Algorithm" class="tocxref"><span class="secno">2.5.2 </span>Namespace processing algorithm</a></li><li class="tocline"><a href="#sec-ExcCanonicalization-Example" class="tocxref"><span class="secno">2.5.3 </span>Example of exclusive canonicalization with prefix rewriting</a><ul class="toc"><li class="tocline"><a href="#sec-Example-PrefixRewriteNone" class="tocxref"><span class="secno">2.5.3.1 </span>With <code>PrefixRewrite="none"</code></a></li><li class="tocline"><a href="#sec-Example-PrefixRewriteSeq" class="tocxref"><span class="secno">2.5.3.2 </span>With <code>PrefixRewrite="sequential"</code></a></li></ul></li></ul></li><li class="tocline"><a href="#sec-Attribute-processing" class="tocxref"><span class="secno">2.6 </span>Attribute processing</a></li></ul></li><li class="tocline"><a href="#sec-Use" class="tocxref"><span class="secno">3. </span>Use of Canonical XML 2.0 in XML Security</a><ul class="toc"><li class="tocline"><a href="#sec-Use-in-Signature" class="tocxref"><span class="secno">3.1 </span>Use of Canonical XML 2.0 in XML Signature 2.0</a></li><li class="tocline"><a href="#sec-Use-in-Encryption" class="tocxref"><span class="secno">3.2 </span>Use of Canonical XML 2.0 in XML Encryption 1.1</a></li></ul></li><li class="tocline"><a href="#sec-Pseudocode" class="tocxref"><span class="secno">4. </span>Pseudocode</a><ul class="toc"><li class="tocline"><a href="#sec-pseudocode-canonicalize" class="tocxref"><span class="secno">4.1 </span>canonicalize()</a></li><li class="tocline"><a href="#sec-pseudocode-canonicalizeSubtree" class="tocxref"><span class="secno">4.2 </span>canonicalizeSubtree()</a></li><li class="tocline"><a href="#sec-pseudocode-processNode" class="tocxref"><span class="secno">4.3 </span>processNode()</a></li><li class="tocline"><a href="#sec-pseudocode-processDocument" class="tocxref"><span class="secno">4.4 </span>processDocument()</a></li><li class="tocline"><a href="#sec-pseudocode-processElement" class="tocxref"><span class="secno">4.5 </span>processElement()</a></li><li class="tocline"><a href="#sec-pseudocode-processText" class="tocxref"><span class="secno">4.6 </span>processText()</a></li><li class="tocline"><a href="#sec-pseudocode-processPI" class="tocxref"><span class="secno">4.7 </span>processPI()</a></li><li class="tocline"><a href="#sec-pseudocode-processComment" class="tocxref"><span class="secno">4.8 </span>processComment()</a></li><li class="tocline"><a href="#sec-pseudocode-addNamespaces" class="tocxref"><span class="secno">4.9 </span>addNamespaces()</a></li><li class="tocline"><a href="#sec-pseudocode-processNamespaces" class="tocxref"><span class="secno">4.10 </span>processNamespaces()</a></li><li class="tocline"><a href="#sec-pseudocode-addXMLAttributes" class="tocxref"><span class="secno">4.11 </span>addXMLAttributes()</a></li></ul></li><li class="tocline"><a href="#sec-Output-Rules" class="tocxref"><span class="secno">5. </span>Output rules</a></li><li class="tocline"><a href="#sec-Processing-for-Streaming" class="tocxref"><span class="secno">6. </span>Processing model for Streaming XML parsers</a></li><li class="tocline"><a href="#references" class="tocxref"><span class="secno">A. </span>References</a><ul class="toc"><li class="tocline"><a href="#normative-references" class="tocxref"><span class="secno">A.1 </span>Normative references</a></li><li class="tocline"><a href="#informative-references" class="tocxref"><span class="secno">A.2 </span>Informative references</a></li></ul></li></ul></div>
<div id="sec-Introduction" class="section">
<!--OddPage--><h2><span class="secno">1. </span>Introduction</h2>
<div id="sec-Terminology" class="section">
<h3><span class="secno">1.1 </span>Terminology</h3>
<p>The key words "<em class="rfc2119" title="must">must</em>", "<em class="rfc2119" title="must not">must not</em>", "<em class="rfc2119" title="required">required</em>", "<em class="rfc2119" title="shall">shall</em>", "<em class="rfc2119" title="shall not">shall not</em>", "<em class="rfc2119" title="should">should</em>", "<em class="rfc2119" title="should not">should not</em>",
"<em class="rfc2119" title="recommended">recommended</em>", "<em class="rfc2119" title="may">may</em>", and "<em class="rfc2119" title="optional">optional</em>" in this document are to be interpreted as described in RFC 2119 [<cite><a class="bibref" rel="biblioentry" href="#bib-RFC2119">RFC2119</a></cite>].
</p>
<p>See [<cite><a class="bibref" rel="biblioentry" href="#bib-XML-NAMES">XML-NAMES</a></cite>] for the definition of QName.</p>
<dl>
<dt>document subset</dt>
<dd>A <em>document subset</em> is a portion of an XML document
that may not include all of the nodes in the document.</dd>
<dt>canonical form</dt>
<dd>The <em>canonical form</em> of an XML document is physical representation of the document
produced by the method described in this specification</dd>
<dt>canonical XML</dt>
<dd>The term <em>canonical XML</em> refers to XML that is in canonical form. The XML canonicalization
method is the algorithm defined by this specification that generates the canonical form of a given XML document
or document subset. The term XML canonicalization refers to the process of applying the XML canonicalization
method to an XML document or document subset.
</dd>
<dt>subtree</dt>
<dd>Subtree refers to one XML element node, and all that it contains. In XPath terminology it is an element
node and all its descendant nodes.</dd>
<dt>DOM</dt>
<dd>DOM or Document Object Model is a model of representing an XML document in tree
structure. The W3C DOM standard [<cite><a class="bibref" rel="biblioentry" href="#bib-DOM-LEVEL-2-CORE">DOM-LEVEL-2-CORE</a></cite>] is one such DOM, but this specification does not require this particular
set of DOM APIs; any similar model can be used as long as it has a tree representation of the XML document,
whose root is a document node, and the document node's descendants are element nodes,
attribute nodes, text nodes etc. </dd>
<dt>DOM parser</dt>
<dd>An software module that reads an XML document and constructs a DOM tree. </dd>
<dt>Stream parser</dt>
<dd>A software module that reads an XML document and
constructs a stream of XML events like "beginElement", "text",
"endElement". StAX [<cite><a class="bibref" rel="biblioentry" href="#bib-XML-PARSER-STAX">XML-PARSER-STAX</a></cite>] is an example of a stream parser. </dd>
</dl>
</div>
<div id="sec-Applications" class="section">
<h3><span class="secno">1.2 </span>Applications</h3>
<p>Since the XML 1.0 Recommendation [<cite><a class="bibref" rel="biblioentry" href="#bib-XML10">XML10</a></cite>] and the Namespaces in XML 1.0 Recommendation [<cite><a class="bibref" rel="biblioentry" href="#bib-XML-NAMES">XML-NAMES</a></cite>] define
multiple
syntactic methods for expressing the same information, XML applications tend to take liberties with changes that
have no impact on the information content of the document. XML canonicalization is designed to be useful to
applications that require the ability to test whether the information content of a document or document subset
has been changed. This is done by comparing the canonical form of the original document before application
processing with the canonical form of the document result of the application processing.
</p>
<p>For example, a digital signature over the canonical form of an XML document or document subset would allow
the signature digest calculations to be oblivious to changes in the original document's physical representation,
provided that the changes are defined to be logically equivalent by the XML 1.0 or Namespaces in XML 1.0.
During signature generation, the digest is computed over the canonical form of the document. The document is
then transferred to the relying party, which validates the signature by reading the document and computing
a digest of the canonical form of the received document. The equivalence of the digests computed by the
signing and relying parties (and hence the equivalence of the canonical forms over which they were computed)
ensures that the information content of the document has not been altered since it was signed.
</p>
<p><em>Note:</em> Although not stated as a requirement on implementations, nor formally proved to be the case,
it is the intent of this specification that if the text generated by canonicalizing a document according
to this specification is itself parsed and canonicalized according to this specification, the text generated
by the second canonicalization will be the same as that generated by the first canonicalization.
</p>
</div>
<div id="sec-Limitations" class="section">
<h3><span class="secno">1.3 </span>Limitations</h3>
<p>Two XML documents may have differing information content that is
nonetheless logically equivalent within a given application context. Although
two XML documents are equivalent (aside from limitations given in this section)
if their canonical forms are identical, it is not a goal of this work to establish
a method such that two XML documents are equivalent if <i>and only if</i> their
canonical forms are identical. Such a method is unachievable, in part due to
application-specific rules such as those governing unimportant whitespace and
equivalent data (e.g. <code><color>black</color></code> versus
<code><color>rgb(0,0,0)</color></code>). There are also equivalencies
established by other W3C Recommendations and Working Drafts. Accounting for
these additional equivalence rules is beyond the scope of this work. They can
be applied by the application or become the subject of future
specifications.</p>
<p>The canonical form of an XML document may not be completely operational
within the application context, though the circumstances under which this
occurs are unusual. This problem may be of concern in certain applications
since the canonical form of a document and the canonical form of the
canonical form of the document are equivalent. For example, in a digital
signature application, it cannot be established whether the operational
original document or the non-operational canonical form was signed
because the canonical form can be substituted for the original document
without changing the digest calculation. However, the security risk only
occurs in the unusual circumstances described below, which can all be
resolved or at least detected prior to digital signature generation.</p>
<p>The difficulties arise due to the loss of the following information not
available in the <a href="#sec-Data-Model">data model</a>:</p>
<ol>
<li>base URI, especially in content derived from the replacement text of
external general parsed entity references</li>
<li>notations and external unparsed entity references</li>
<li>attribute types in the document type declaration</li>
</ol>
<p>In the first case, note that a document containing a relative URI [<cite><a class="bibref" rel="biblioentry" href="#bib-URI">URI</a></cite>]
is only operational when accessed from a specific URI
that provides the proper base URI. In addition, if the document contains
external general parsed entity references to content containing relative URIs,
then the relative URIs will not be operational in the canonical form, which
replaces the entity reference with internal content (thereby implicitly
changing the default base URI of that content). Both of these problems can
typically be solved by adding support for the <code>xml:base</code> attribute
[<cite><a class="bibref" rel="biblioentry" href="#bib-XMLBASE">XMLBASE</a></cite>] to the application, then adding appropriate
<code>xml:base</code> attributes to document element and all top-level
elements in external entities. In addition, applications often have an
opportunity to resolve relative URIs prior to the need for a canonical form.
For example, in a digital signature application, a document is often retrieved
and processed prior to signature generation. The processing <em class="rfc2119" title="should">should</em> create a
new document in which relative URIs have been converted to absolute URIs,
thereby mitigating any security risk for the new document.</p>
<p>In the second case, the loss of external unparsed entity references and the
notations that bind them to applications means that canonical forms cannot
properly distinguish among XML documents that incorporate unparsed data via
this mechanism. This is an unusual case precisely because most XML processors
currently discard the document type declaration, which discards the notation,
the entity's binding to a URI, and the attribute type that binds the attribute
value to an entity name. For documents that must be subjected to more than one
XML processor, the XML design typically indicates a reference to unparsed data
using a URI in the attribute value.</p>
<p>In the third case, the loss of attribute types can affect the canonical
form in different ways depending on the type. Attributes of type ID cease to
be ID attributes. Hence, any XPath expressions that refer to the canonical
form using the <code>id()</code> function cease to operate. The attribute
types ENTITY and ENTITIES are not part of this case; they are covered in the
second case above. Attributes of enumerated type and of type ID, IDREF,
IDREFS, NMTOKEN, NMTOKENS, and NOTATION fail to be appropriately constrained
during future attempts to change the attribute value if the canonical form
replaces the original document during application processing. Applications can
avoid the difficulties of this case by ensuring that an appropriate document
type declaration is prepended prior to using the canonical form in further XML
processing. This is likely to be an easy task since attribute lists are
usually acquired from a standard external DTD subset, and any entity and
notation declarations not also in the external DTD subset are typically
constructed from application configuration information and added to the
internal DTD subset.</p>
</div>
<div id="sec-Requirements" class="section">
<h3><span class="secno">1.4 </span>Requirements for 2.0</h3>
<p> Canonical XML 2.0 solves many of the major
issues that have been identified by implementers
with Canonical XML 1.0 [<cite><a class="bibref" rel="biblioentry" href="#bib-XML-C14N">XML-C14N</a></cite>]
and 1.1 [<cite><a class="bibref" rel="biblioentry" href="#bib-XML-C14N11">XML-C14N11</a></cite>].</p>
<div id="sec-Requirements-Performance" class="section">
<h4><span class="secno">1.4.1 </span>Performance</h4>
<p>A major factor in performance issues noted in XML Signature is often
Canonical XML 1.1 processing. Canonicalization will be slow if the
implementation uses the Canonical XML 1.1 specification as a formula
without any attempt at
optimization. This specification rectifies this problem by
incorporating lessons learned from implementation into the
specification.
Most mature canonicalization implementations
solve the performance problem
by inspecting the signature first, to see if it
can be canonicalized using a simple tree walk
algorithm
whose performance is similar to regular XML
serialization. If not they fall back to the
expensive nodeset-based
algorithm. </p>
<p>The use cases that cannot be addressed by the
simple tree walk algorithm are mostly edge cases.
This specification restricts the input to the
canonicalization algorithm so that
implementations can always
use the simple tree walk algorithm. </p>
<p>C14N 1.x uses an "XPath 1.0 Nodeset" to
describe a document subset.
This is the root
cause of the performance
problem and
can be solved by
not using a nodeset. This version of the specification does not use a
nodeset, visits each node exactly once, and only visits the nodes
that are being canonicalized.
</p>
</div>
<div id="sec-Requirements-Streaming" class="section">
<h4><span class="secno">1.4.2 </span>Streaming</h4>
<p>A streaming implementation is required to be
able to process very large documents without
holding them all in
memory; it should be able to process documents one chunk at a time.
</p>
</div>
<div id="sec-Requirements-Robustness" class="section">
<h4><span class="secno">1.4.3 </span>Robustness</h4>
<p>Whitespace handling was a common cause of
signature breakage. XML libraries allow one to
"pretty print"
an XML document, and most people wrongly assume
that the white space introduced by pretty printing
will be
removed by canonicalization but that is not the
case. This specification adds three techniques to
improve robustness:
</p>
<ol>
<li>Optionally remove leading and trailing
whitespace from text nodes, </li>
<li>Allow for QNames
in content, particularly in the <code>xsi:type</code> attribute,</li>
<li>Optionally rewrite prefixes </li>
</ol>
</div>
<div id="sec-Requirements-Portability" class="section">
<h4><span class="secno">1.4.4 </span>Portability</h4>
It should be possible to canonicalize a subdocument
in such a way, that the signature
doesn't break when the subdocument moved into a completely different
XML document. This is the goal of
Exclusive canonicalization [<cite><a class="bibref" rel="biblioentry" href="#bib-XML-EXC-C14N">XML-EXC-C14N</a></cite>] that mostly satisfies this
requirement except for the case of namespace
prefixes embedded in content. This specification builds on exclusive
canonicalization and solves the problem
of namespaces in content.
</div>
<div id="sec-Requirements-Simplicity" class="section">
<h4><span class="secno">1.4.5 </span>Simplicity</h4>
<p>C14N 1.x algorithms are complex and depend on a
full XPath library. This increases the work required for
scripting languages to use XML Signatures. This
specification addresses this issue by not using
the complex nodeset
model, and therefore not relying completely on XPath.
</p>
</div>
</div>
</div>
<div id="sec-XML-Canonicalization" class="section">
<!--OddPage--><h2><span class="secno">2. </span>Canonical XML 2.0</h2>
<div id="sec-Data-Model" class="section">
<h3><span class="secno">2.1 </span>Data Model</h3>
<p>The input to the canonicalization algorithm
consists of an XML document subset, and set of
options. The XML document
subset can be expressed in two ways, with a DOM model or a
Stream model. </p>
<p>In the DOM model the XML subset is expressed as:
</p><ul>
<li><b>Inclusion List:</b> Either the document Node <code>D</code> or a list of one or more element nodes <code>E<sub>1</sub></code>, <code>E<sub>2</sub></code>, ... <code>E<sub>n</sub></code>.
<br>(If out of this list, one element node <code>E<sub>i</sub></code> is a descendant of another <code>E<sub>j</sub></code>, then that element node <code>E<sub>i</sub></code> is ignored.)</li>
<li><b>Exclusion List (optional):</b> A list of zero or more element nodes <code>E<sub>1</sub></code>, <code>E<sub>2</sub></code>, ... <code>E<sub>m</sub></code> and a list of zero or more attribute
nodes <code>A<sub>1</sub></code>, <code>A<sub>2</sub></code>,
... <code>A<sub>M</sub></code>. <br>These attribute nodes should
not be namespace declaration or attributes in the <code>xml</code>
namespace. </li>
</ul>
The XML subset consists of all the nodes in the Inclusion list and their descendant, minus all the nodes that are in the Exclusion list and their descendants.
<p></p>
<p>The element nodes in the Inclusion list are also referred as <em>apex nodes</em>.</p>
<p>
Note: This input model is a very limited form of the generic XPath Nodeset that was the input model for Canonical
XML 1.x. It is designed to be simple and allow for a high performance algorithm, while still supporting the most essential use
cases. Specifically:
</p><ul>
<li><p>This model does not
support re-inclusion; i.e. all the exclusions are applied after all the inclusions. It is effectively a simplified form of the
XPath Filter 2 model [<cite><a class="bibref" rel="biblioentry" href="#bib-XMLDSIG-XPATH-FILTER2">XMLDSIG-XPATH-FILTER2</a></cite>] with one intersect followed by one optional subtract operation.
Re-inclusion complicates the canonicalization algorithm, especially in the areas of namespace and xml attribute inheritance.
</p></li>
<li><p>Exclusion is limited to complete subtrees and attribute nodes.
Other kinds of nodes (text, comment, PI) cannot be excluded.
</p></li>
<li><p>Attribute exclusion is also limited, such that namespace declaration and attributes from the xml namespace cannot be excluded.</p></li>
<li><p>Some examples of subsets that were were permitted in the Canonical XML 1.x, but not in this new version:
</p><ul>
<li> A subset consisting of a single attribute all by itself. </li>
<li> A subset consisting of an attribute without its owner element.</li>
<li> A subset consisting of a text node all by itself.</li>
<li> A subset consisting of a text node without its parent node.</li>
<li> A subset consisting of an element without some of its text node children. </li>
</ul>
<p></p></li></ul>
<p></p>
<p>Note: Canonical XML 2.0, unlike earlier versions, does not support direct
input of an octet stream. The transformation of such a stream into the input
model required by this specification is application-specific and should be
defined in specifications that reference or make use of this one.
</p>
</div>
<div id="sec-Canonicalization-Parameters" class="section">
<h3><span class="secno">2.2 </span>Parameters</h3>
<p>Instead of separate algorithms for each variant of canonicalization, this specification takes the
approach of a single algorithm subject to a variety of parameters that change its behavior to address specific use cases.</p>
<p>The following is a list of the logical parameters supported by this
algorithm. The actual serialization that expresses the parameters in
use may be defined as appropriate to specific applications of this
specification (e.g., the <code><ds:CanonicalizationMethod></code> element in [<cite><a class="bibref" rel="biblioentry" href="#bib-XMLDSIG-CORE2">XMLDSIG-CORE2</a></cite>]).</p>
<table border="1">
<thead><tr><td>Name</td><td>Values</td><td>Description</td><td>Default</td></tr></thead>
<tbody>
<tr><td><code>IgnoreComments</code></td><td>true or false</td>
<td>whether to ignore comments during canonicalization</td>
<td>true</td></tr>
<tr><td><code>TrimTextNodes</code></td><td>true or false</td>
<td>whether to trim (i.e. remove leading and trailing whitespaces) all text nodes when canonicalizing.
Adjacent text nodes must be coalesced prior to trimming. If an element has an <code>xml:space="preserve"</code>
attribute, then text node descendants of that element are not trimmed regardless of the value of this parameter.
</td>
<td>true</td></tr>
<tr><td><code>PrefixRewrite</code></td><td>none, sequential</td>
<td>with <code>none</code>, prefixes are left unchanged, with <code>sequential</code>, prefixes are changed to "n0", "n1", "n2" ...
except the special prefixes "xml" and "xmlns" which are left unchanged.
</td>
<td>none</td></tr>
<tr><td><code>QNameAware</code></td><td>an enumeration of qualified element names, element names that contain XPath 1.0 expressions,
qualified attribute names,
and unqualified attribute names (identified by name, and parent qualified name)</td>
<td>set of nodes whose entire content must be processed as QName-valued for the
purposes of canonicalization, including prefix rewriting and recognition of prefix "visible utilization"</td>
<td>empty set</td></tr>
</tbody>
</table>
<p>
All of these parameters <em class="rfc2119" title="must">must</em> be implemented.
</p>
<p>
Note: Before Canonical XML 2.0, there were two separate canonicalization algorithms - Inclusive Canonicalization [<cite><a class="bibref" rel="biblioentry" href="#bib-XML-C14N11">XML-C14N11</a></cite>]
and Exclusive Canonicalization [<cite><a class="bibref" rel="biblioentry" href="#bib-XML-EXC-C14N">XML-EXC-C14N</a></cite>]. The major differences between these two algorithms is the treatment of namespace
declarations and inherited attributes in <code>xml:</code> namespace.
Earlier draft versions of Canonical XML 2.0 had combined Inclusive and Exclusive
into a single algorithm, with parameters to control how namespaces and inherited <code>xml:</code> attributes were treated.
Effectively one could set these parameters to make Canonical XML 2.0 emulate either C14n 1.0 or C14N 1.1 or Exc C14n 1.0.
But in the current version of Canonical XML 2.0, Inclusive canonicalization has been removed completely.
<br>
<br>
Exclusive canonicalization has been far more popular than inclusive, because of
its "portability" property. I.e. if a subdocument is signed with exclusive canonicalization, and then this subdocument is moved off
to a different XML context, the signature on that subdocument still remains valid. Inclusive canonicalization doesn't have this
portability property, however inclusive canonicalization has an advantage over exclusive canonicalization 1.0, when it comes to qnames in content.
Exclusive canonicalization 1.0 only emits namespaces declarations that it considers are visibly utilized, so if there is qname embedded in text node
or an attribute node, it doesn't recognize it. For example in this attribute <code>xsi:type="xsd:string"</code>, the "xsd" prefix is embedded
in the content, and so Exclusive canonicalization 1.0 will not consider the "xsd" prefix to be visibly utilized and hence not emit the
xsd namespace declaration. Not emitting the declaration, makes it susceptible to certain wrapping attacks. Exclusive canonicalization 1.0 offers
the "InclusiveNamespace" mechanism to deal with these kinds of prefixes. Any prefixes mentioned in this list will be treated inclusively, i.e. their
namespace declarations will be emitted even if they are not used.
<br>
<br>
Canonical XML 2.0 overcomes the shortcomings of Exclusive Canonicalization 1.0, with the <code>QNameAware</code> parameter. This parameter can be
used to list element or attribute nodes that are expected to have qnames. Canonical XML 2.0 will scan for prefixes in these elements and attributes
and consider them to be visibly utilized too. With the introduction of this parameter, there is really no need for Inclusive canonicalization any
more, so it has been completely removed from Canonical XML 2.0.
<br>
<br>
Note: The algorithm for prefix scanning doesn't cover all kinds of prefix embedding. For example if a text node's value is a space separate list of
qnames, this algorithm will not detect the prefixes of these qnames. It will only detect two kinds of embedding, a) when the entire text node or
attribute is a qname, and b) when a text node is an XPath expression containing prefixes.
<br>
<br>
Inclusive canonicalization also preserves the values <code>xml:</code> attributes in context. I.e. it looks at the ancestors of the
subdocument to be signed, and collects the value of any inheritable xml attributes,
specifically <code>xml:lang</code>, <code>xml:space</code> and <code>xml:base</code>, from these ancestor elements and emits them at the root of
the subdocument. Exclusive canonicalization does not do this as it this violates the portability requirement. Likewise, Canonical XML 2.0 ignores
these attributes as well.
</p>
</div>
<div id="sec-Processing-Model" class="section">
<h3><span class="secno">2.3 </span>Processing Model</h3>
<p>The basic canonicalization process consists of traversing the tree
and outputting octets for each node.</p>
<p>
<b>Input:</b> The XML subset consisting of an Inclusion list and an Exclusion list.
</p>
<p><b>Processing</b>
</p><ul>
<li><em>Sort inclusion list by document order:</em> If inclusion list only has the document node <code>D</code> there is nothing to sort. Otherwise remove all element nodes <code>E<sub>i</sub></code> that are descendants of some other element node in the inclusion list. Then sort the remaining element nodes <code>E<sub>1</sub></code>, <code>E<sub>2</sub></code>, ...<code>E<sub>n</sub></code> by document order.</li>
<li><em>Canonicalize each subtree</em> For each element
node <code>E<sub>i</sub></code> or document node <code>D</code> in
the sorted list, do a depth first traversal to visit all the
descendant nodes in the <code>E<sub>i</sub></code> subtree, and
canonicalize each one of them. While traversing, if the current
node is an element and that element is in the exclusion list, prune
the traversal, i.e. skip over that element and all its
descendants.</li>
</ul>
<p>
During traversal of each subtree, generate the canonicalized text depending on the node type as follows:
</p>
<ul>
<li><b>Root Node-</b> Ignore the byte order mark, XML declaration, nor anything from within the document type declaration. Traverse through the children.
<p>
</p></li>
<li><b>Element Nodes-</b> The canonicalized result is an open angle bracket (<code><</code>), the element QName,
the result of <a href="#sec-Namespace-Processing">processing the namespaces</a>,
the result of <a href="#sec-Attribute-processing">processing the attributes</a>,
a close angle bracket (<code>></code>), traverse the child nodes of the element, an open angle bracket (<code><</code>),
a forward slash (<code>/</code>), the element QName, and a close angle bracket (<code>></code>).
If parameter <code>PrefixRewrite</code> is <code>sequential</code>, the QNames will be written with the changed prefixes.
<p>
</p></li>
<li><b>Attribute Nodes-</b> a space, the node's QName, an
equals sign, an open quotation mark
(double quote), the modified string value, and a close
quotation mark (double quote).
The string value of the node is modified by replacing all
ampersands (<code>&</code>)
with <code>&amp;</code>, all open angle brackets
(<code><</code>) with <code>&lt;</code>,
all quotation mark characters with <code>&quot;</code>, and
the whitespace characters
<code>#x9</code>, <code>#xA</code>, and <code>#xD</code>,
with character references.
The character references are written in uppercase
hexadecimal with no leading zeroes
(for example, <code>#xD</code> is represented by the
character reference <code>&#xD;</code>).
<p>
If parameter <code>PrefixRewrite</code> is <code>sequential</code>, and the attribute name has a namespace prefix, the
prefix is changed to the rewritten prefix.
Also with prefix rewriting enabled, the attribute content is treated specially if the attribute is
among those enumerated for the <code>QNameAware</code> parameter. If so, the QName value of the
attribute is rewritten with the new prefix.
</p>
</li>
<li><b>Namespace Nodes-</b> Take the ordered list of
namespace nodes resulting from <a href="#sec-Namespace-Processing">namespace processing</a>,
and process each of the namespace node <code>N</code> in the
same way as an attribute node.
<p>
</p></li>
<li><b>Text Nodes-</b> the string value, except all
ampersands are replaced by <code>&amp;</code>,
all open angle brackets (<code><</code>) are replaced by
<code>&lt;</code>, all closing
angle brackets (<code>></code>) are replaced by
<code>&gt;</code>, and all <code>#xD</code>
characters are replaced by <code>&#xD;</code>.
<br>
If parameter <code>TrimTextNodes</code> is true and there is no <code>xml:space="preserve"</code>
declaration in context, trim the leading and trailing space. E.g. trim <code><A> <B/></code>
to <code><A><B/></code>
and trim <code><A> this is text </A></code> to <code><A>this is text</A></code>.
<p> Note: The DOM parser might have split up a long text node into multiple adjacent text nodes,
some of which may be empty. Be aware when trimming whitespace in such cases; the net result
should be equivalent to doing so as if the adjacent text nodes were concatenated.
</p>
<p>
If parameter <code>PrefixRewrite</code> is <code>sequential</code>, and if the parent element node is among those enumerated for the <code>QNameAware</code>
parameter, then the QName value of the text node is rewritten with the new prefix.
</p>
</li>
<li><b>Processing Instruction (PI) Nodes-</b> The opening PI
symbol (<code><?</code>), the
PI target name of the node, a leading space and the string value if it is not empty, and the
closing PI symbol (<code>?></code>). If the string value is empty, then the leading space
is not added. Also, a trailing <code>#xA</code> is rendered after the closing PI symbol for
PI children of the root node with a lesser document order than the document element, and a
leading <code>#xA</code> is rendered before the opening PI symbol of PI children of the
root node with a greater document order than the document element.
<p>
</p></li>
<li><b>Comment Nodes-</b> Nothing if generating canonical XML without comments. For canonical
XML with comments, generate the opening comment symbol (<code><!--</code>), the string value
of the node, and the closing comment symbol (<code>--></code>). Also, a trailing <code>#xA</code>
is rendered after the closing comment symbol for comment children of the root node with a
lesser document order than the document element, and a leading <code>#xA</code> is rendered
before the opening comment symbol of comment children of the root node with a greater document order
than the document element. (Comment children of the root node represent comments outside of the
top-level document element and outside of the document type declaration).</li>
</ul>
<p>Note although some XML models such as DOM don't distinguish namespace declarations from attributes, Canonicalization needs to treat them separately. In this document, attribute nodes that are actually namespace declarations are referred as "namespace nodes", other attributes are called "attribute nodes".</p>
</div>
<div id="sec-ExclusiveNeed" class="section">
<h3><span class="secno">2.4 </span>The Need for Exclusive XML Canonicalization</h3>
<p>In some cases, particularly for signed XML in protocol applications, there
is a need to canonicalize a subdocument in such a way that it is
substantially independent of its XML context. This is because, in protocol
applications, it is common to envelope XML in various layers of message or
transport elements, to strip off such enveloping, and to construct new
protocol messages, parts of which were extracted from different messages
previously received. If the pieces of XML in question are signed, they need
to be canonicalized in a way such that these operations do not break the
signature but the signature still provides as much security as can be
practically obtained.</p>
<div id="sec-Simple" class="section">
<h4><span class="secno">2.4.1 </span> A Simple Example</h4>
<p>As a simple example of the type of problem that changes in XML context can
cause for signatures, consider the following document:</p>
<pre class="example sh_xml sh_sourceCode"><span class="sh_keyword"><n1:elem1</span> <span class="sh_type">xmlns:n1</span><span class="sh_symbol">=</span><span class="sh_string">"http://b.example"</span><span class="sh_keyword">></span>
content
<span class="sh_keyword"></n1:elem1></span></pre>
<p>this is then enveloped in another document:</p>
<pre class="example sh_xml sh_sourceCode"><span class="sh_keyword"><n0:pdu</span> <span class="sh_type">xmlns:n0</span><span class="sh_symbol">=</span><span class="sh_string">"http://a.example"</span><span class="sh_keyword">></span>
<span class="sh_keyword"><n1:elem1</span> <span class="sh_type">xmlns:n1</span><span class="sh_symbol">=</span><span class="sh_string">"http://b.example"</span><span class="sh_keyword">></span>
content
<span class="sh_keyword"></n1:elem1></span>
<span class="sh_keyword"></n0:pdu></span></pre>
<p>The first document above is in canonical form. But assume that document is
enveloped as in the second case. The subdocument with <code>elem1</code> as
its apex node can be extracted from this second case with an XPath expression
such as:</p>
<pre class="example sh_xml sh_sourceCode">/descendant::n1:elem1</pre>
<p>The result of performing inclusive canonicalization to the resulting xml subset is
the following (except for line wrapping to fit this document):</p>
<pre class="example sh_xml sh_sourceCode"><span class="sh_keyword"><n1:elem1</span> <span class="sh_type">xmlns:n0</span><span class="sh_symbol">=</span><span class="sh_string">"http://a.example"</span>
<span class="sh_type">xmlns:n1</span><span class="sh_symbol">=</span><span class="sh_string">"http://b.example"</span><span class="sh_keyword">></span>
content
<span class="sh_keyword"></n1:elem1></span></pre>
<p>Note that the <code>n0</code> namespace has been included by inclusive canonicalization
because it includes namespace context. This change would break a
signature over <code>elem1</code> based on the first version.</p>
</div>
<div id="sec-Enveloping" class="section">
<h4><span class="secno">2.4.2 </span>General Problems with re-Enveloping</h4>
<p>As a more complete example of the changes in canonical form that can occur
when the enveloping context of a document subset is changed, consider the
following document:</p>
<pre class="example sh_xml sh_sourceCode"><span class="sh_keyword"><n0:local</span> <span class="sh_type">xmlns:n0</span><span class="sh_symbol">=</span><span class="sh_string">"foo:bar"</span> <span class="sh_type">xmlns:n3</span><span class="sh_symbol">=</span><span class="sh_string">"ftp://example.org"</span><span class="sh_keyword">></span>
<span class="sh_keyword"><n1:elem2</span> <span class="sh_type">xmlns:n1</span><span class="sh_symbol">=</span><span class="sh_string">"http://example.net"</span><span class="sh_keyword">></span>
<span class="sh_keyword"><n3:stuff</span> <span class="sh_type">xmlns:n3</span><span class="sh_symbol">=</span><span class="sh_string">"ftp://example.org"</span><span class="sh_keyword">/></span>
<span class="sh_keyword"></n1:elem2></span>
<span class="sh_keyword"></n0:local></span></pre>
<p>And the following which has been produced by changing the enveloping of
<code>elem2</code>:</p>
<pre class="example sh_xml sh_sourceCode"><span class="sh_keyword"><n2:pdu</span> <span class="sh_type">xmlns:n1</span><span class="sh_symbol">=</span><span class="sh_string">"http://example.com"</span> <span class="sh_type">xmlns:n2</span><span class="sh_symbol">=</span><span class="sh_string">"http://foo.example"</span><span class="sh_keyword">></span>
<span class="sh_keyword"><n1:elem2</span> <span class="sh_type">xmlns:n1</span><span class="sh_symbol">=</span><span class="sh_string">"http://example.net"</span><span class="sh_keyword">></span>
<span class="sh_keyword"><n3:stuff</span> <span class="sh_type">xmlns:n3</span><span class="sh_symbol">=</span><span class="sh_string">"ftp://example.org"</span><span class="sh_keyword">/></span>
<span class="sh_keyword"></n1:elem2></span>
<span class="sh_keyword"></n2:pdu></span></pre>
<p>Assume an xml subset produced from each case by applying the following
XPath expression:</p>
<pre class="example sh_xml sh_sourceCode">/descendant::n1:elem2</pre>
<p>Applying inclusive canonicalization to the xml subset produced from the first document
yields the following serialization:</p>
<pre class="example sh_xml sh_sourceCode"><span class="sh_keyword"><n1:elem2</span> <span class="sh_type">xmlns:n0</span><span class="sh_symbol">=</span><span class="sh_string">"foo:bar"</span> <span class="sh_type">xmlns:n3</span><span class="sh_symbol">=</span><span class="sh_string">"ftp://example.org"</span>
<span class="sh_type">xmlns:n1</span><span class="sh_symbol">=</span><span class="sh_string">"http://example.net"</span><span class="sh_keyword">></span>
<span class="sh_keyword"><n3:stuff></n3:stuff></span>
<span class="sh_keyword"></n1:elem2></span></pre>
<p>However, although <code>elem2</code> is represented by the same octet
sequence in both pieces of external XML above, the Canonical XML version of
<code>elem2</code> from the second case would be as follows:</p>
<pre class="example sh_xml sh_sourceCode"><span class="sh_keyword"><n1:elem2</span> <span class="sh_type">xmlns:n1</span><span class="sh_symbol">=</span><span class="sh_string">"http://example.net"</span> <span class="sh_type">xmlns:n2</span><span class="sh_symbol">=</span><span class="sh_string">"http://foo.example"</span><span class="sh_keyword">></span>
<span class="sh_keyword"><n3:stuff</span> <span class="sh_type">xmlns:n3</span><span class="sh_symbol">=</span><span class="sh_string">"ftp://example.org"</span><span class="sh_keyword">></n3:stuff></span>
<span class="sh_keyword"></n1:elem2></span></pre>
<p>Note that the change in context has resulted in lots of changes in the
subdocument as serialized by the inclusive canonicalization. In the first example, <code>n0</code> had
been included from the context and the presence of an identical
<code>n3</code> namespace declaration in the context had elevated that
declaration to the apex of the canonicalized form. In the second example,
<code>n0</code> has gone away but <code>n2</code> has appeared,
<code>n3</code> is no longer elevated. But not all context
changes have effect. In the second example, the presence of the <code>n1</code> prefix namespace declaration
have no effect because of existing declarations at the <code>elem2</code>
node.</p>
<p>On the other hand, using Exclusive canonicalization the physical form of <code>elem2</code> as extracted by the XPath
expression above is as follows:</p>
<pre class="example sh_xml sh_sourceCode"><span class="sh_keyword"><n1:elem2</span> <span class="sh_type">xmlns:n1</span><span class="sh_symbol">=</span><span class="sh_string">"http://example.net"</span><span class="sh_keyword">></span>
<span class="sh_keyword"><n3:stuff</span> <span class="sh_type">xmlns:n3</span><span class="sh_symbol">=</span><span class="sh_string">"ftp://example.org"</span><span class="sh_keyword">></n3:stuff></span>
<span class="sh_keyword"></n1:elem2></span></pre>
<p>in both cases.</p>
</div>
</div>
<div id="sec-Namespace-Processing" class="section">
<h3><span class="secno">2.5 </span>Namespace Processing</h3>
<p>As part of the canonicalization process, while traversing the subtree, use the following algorithm to look at all the namespace declarations in an element, and decide which ones to output.</p>
<div id="sec-Namespace-Concepts" class="section">
<h4><span class="secno">2.5.1 </span>Namespace concepts</h4>
<p>The following concepts are used in Namespace processing: </p>
<dl>
<dt>Explicit and Implicit namespace declarations</dt>
<dd><p>In DOM, there is no special node for namespace
declarations, they are just present as regular attribute nodes. An "explicit" namespace declaration is an attribute node whose prefix is "xmlns" and whose localName is the prefix being declared. <br>
</p><p>DOM also allows declaring a namespace "implicitly", i.e. if a new DOM element or attribute is constructed
using the <code>createElementNS</code> and <code>createAttributeNS</code> methods, then DOM adds a namespace declaration
automatically when serializing the document.</p>
</dd>
<dt>Special namespaces</dt>
<dd>The "xml" and "xmlns" prefixes are reserved and have special behavior. See [<cite><a class="bibref" rel="biblioentry" href="#bib-XML-NAMES">XML-NAMES</a></cite>].
</dd>
<dt>Apex nodes</dt>
<dd>An apex node is an element node in a document subset having no element node ancestor in the document subset.</dd>
<dt>Default namespace</dt>
<dd>The default namespace is declared by <code>xmlns="..."</code>. To make the algorithm simpler this will be treated
as a namespace declaration whose prefix value is "" i.e. an empty string.</dd>
<dt>Visibility utilized</dt>
<dd>This concept is required for exclusive canonicalization. An element <code>E</code> in the document subset visibly utilizes a namespace declaration, i.e. a namespace prefix <code>P</code> and bound value <code>V</code>, if
any of the following conditions are true:
<ul>
<li>The element <code>E</code> itself has a qualified name that uses the prefix <code>P</code>.
(Note if an element does not have a prefix, that means it visibly utilizes the default namespace.)
</li>
<li>OR The element <code>E</code> is among those enumerated for the <code>QNameAware</code> parameter,
and the QName value of the element uses the prefix <code>P</code> (or, lacking a prefix,
it visibly utilizes the default namespace)
</li>
<li>OR The element <code>E</code> is among those enumerated for the <code>QNameAware</code> parameter,
and it listed as an <code>XPathElement</code>. This value of the element is to be interpreted as
an XPath 1.0 expression and any prefixes used in this XPath expression are considered to be visibility utilized.
</li>
<li>OR An attribute <code>A</code> of that element has a qualified name that uses the prefix
<code>P</code>, and that attribute is not in the exclusion list. (Note: unlike elements, if an
attribute doesn't have a prefix, that means it is a locally scoped attribute. It does NOT mean that
the attribute visibly utilizes the default namespace.)
</li>
<li>OR An attribute <code>A</code> of that element is among those enumerated for the <code>QNameAware</code> parameter,
and the QName value of the attribute uses the prefix <code>P</code> (or, lacking a prefix,
it visibly utilizes the default namespace)
</li>
</ul>
</dd>
</dl>
</div>
<div id="sec-Namespace-Algorithm" class="section">
<h4><span class="secno">2.5.2 </span>Namespace processing algorithm</h4>
<p>
<b>Step 1:</b> At first determine the namespaces to be output for an element <code>E</code>.
</p><ol>
<li>Find a list of namespace declarations that are in
scope for this element <code>E</code> by looking at both
implicit
and explicit namespace declarations in this element and
its ancestors. Include the default namespace declaration
if present.</li>
<li>If in this list, any of the namespace declaration has
already been output during the canonicalization of one of
the element
<code>E</code>'s ancestors, say
<code>E<sub>j</sub></code>, and has not been redeclared
since then to a different value,
i.e not been redeclared by an element between
<code>E<sub>j</sub></code> and <code>E</code>, then remove
it from this list.</li>
<li>If this list contains the definition of the
<code>xml</code> prefix, remove it. <br>
Note: Canonical XML 2.0 never emits the declaration for
the <code>xml</code>
or <code>xmlns</code> prefixes. As mentioned in
[<cite><a class="bibref" rel="biblioentry" href="#bib-XML-NAMES">XML-NAMES</a></cite>] a valid XML document should never have the
declaration for <code>xmlns</code>, so Canonical
XML 2.0 should never encounter this declaration. Also a
valid XML document can optionally declare the
<code>xml</code> prefix , but if present
it must be bound to
<code>http://www.w3.org/XML/1998/namespace</code>. Canonical
XML 2.0 should ignore this declatation.</li>
<li>Return the list of namespace declarations left on the
list.</li>
</ol>
<p></p>
<p>
<b>Step 2:</b> For each of the prefixes check for visible utilization as follows
</p><ol>
<li>If <code>E</code> itself has a qualified name that
uses the prefix <code>P</code>, then <code>P</code>
is visibly utilized. Note if <code>E</code> does not have
a prefix, that means it visibly utilizes the default
namespace.
</li>
<li>If an attribute <code>A</code> of that element
<code>E</code> has a qualified name that uses the prefix
<code>P</code>, and that attribute is not in the exclusion
list. Note: unlike elements, if an
attribute doesn't have a prefix, that means it is a
locally scoped attribute. It does NOT mean that
the attribute visibly utilizes the default namespace.
</li>
<li>If there is a <code>QNameAware</code> parameter, check
whether the <code>E</code> or its attributes is enumerated
in it as follows:
<ul>
<li>If there is an <code>Element</code> subchild, whose
<code>Name</code> and <code>NS</code> attributes match
<code>E</code>'s localname and namespace
respectively, then <code>E</code> is expected to have a
single text node child containing a QName. Extract the
prefix from this
QName, and consider this prefix as visibly utilized.
</li>
<li>If there is a <code>QualifiedAttr</code> subchild,
whose <code>Name</code> and <code>NS</code> attributes
match one of <code>E</code>'s qualified attribute's
localname and namespace respectively, then that
attribute is expected to contain a QName. Extract this
prefix from the QName and consider this
prefix as visibly utilized. </li>
<li>If there is a <code>UnqualifiedAttr</code>
subchild, whose <code>Name</code> attribute match one
of <code>E</code>'s unqualified attribute's name,
and its <code>ParentName</code> and
<code>ParentNS</code> attributes match <code>E</code>'s
localname and namespace
respectively, then that attribute is expected to contain
a QName. Extract this prefix from the QName and consider
this
prefix as visibly utilized. </li>
<li>If there is a <code>XPathElement</code> subchild,
whose <code>Name</code> and <code>NS</code> attributes
match <code>E</code>'s localname and namespace
respectively, then <code>E</code> is expected to have a
single text node child containing a XPath 1.0
expression. Extract the prefixes from this
XPath by using the following algorithm. All of these
extracted prefixes should be considered as visibly
utilized.
<ul>
<li>Search for single colons <code>:</code> in the
XPath expression, but do not consider single colons
inside quoted strings.
Double colons are used for axes, e.g. in
<code>self::node()</code> , "self:" is not a prefix,
but an axis name.</li>
<li>The prefix will be present just before the single
colon. Go backwards from the colon, skip whitespace,
and extract the prefix, by collecting
charcaters till the first non <code>NCName</code>
match. e.g. in <code>/soap : Body</code>, extract
the "soap".
The <code>NCName</code> production is defined in
[<cite><a class="bibref" rel="biblioentry" href="#bib-XML-NAMES">XML-NAMES</a></cite>]. </li>
</ul>
This can be evaluated using perl style regular
expressions as follows. Note the regular expressions
here are provided as an example only, they are not
normative.
<ol>
<li> First remove all single quoted and double
quoted strings from the XPath, because
prefixes cannot be present there. i.e. do a substitute
of <code>s/"[^"]*"//g</code>
and <code>s/'[^']*'//g</code>. Removing
the quoted string
eliminates false positives in the next step.</li>
<li>In the resultant string search for single colons
and get the word just before colon, i.e search for
match forn
<code>m/([\w-_.]+)?\s*:(?!:)/</code>
Note prefixes follow the NCName production,
i.e. consists of alphanumeric or hyphen or underscore
or dot,
but cannot start with digit, hyphen or dot. . In an
NCName, the allowed alphanumeric characters are not just
Ascii, but any Unicode alphanumeric characters.
However the regular expression provided here is a very
simplified form of NCName production.
</li>
</ol>
</li>
<li>
If <code>PrefixRewrite</code> parameter is set to
<code>sequential</code> each of the prefixes found in
the above steps would need to be replaced
by the a new prefix. For efficiency, consider
combining this searching for prefixes step with the
subsequent replacing prefixes step.
</li>
</ul>
</li></ol>
<b>Step 3:</b> If the <code>PrefixRewrite</code> parameter is set to other than "none", then compute new prefixes for all the namespaces declarations in this list, as follows:
<ul>
<li>For <code>PrefixRewrite="sequential"</code> sort this list of namespace declarations by URI. Then assign a new prefix value "nN" to each prefix, incrementing the value of N for every prefix. The counter should be set to 0 in the beginning of the canonicalization (e.g. if the value of this counter was 5 when the traversal reached this element, and this element had 3 prefixes to be output, then use the prefixes "n5", "n6", "n7" and set the counter to 8 after that). </li>
</ul>
Note: with prefix rewriting the default namespace is also rewritten into a "nN" style prefix.
<p></p>
<p>
Note: with exclusive canonicalization namespace declarations and output only when they are utilized, this may lead to one declaration being output multiple times,
and it may be rewritten to a different value every time as shown in the example below.
</p>
<p><b>Step 4:</b> Sort this list of namespaces as follows:
<br>In case of <code>PrefixRewrite="none"</code> sort the namespace declaration in lexicographic(ascending) order
of prefixes (the default namespace declaration has no prefix, so it is lexicographically least).
<br>In case of <code>PrefixRewrite="sequential"</code> sort them in ascending order of namespace URI.
</p>
<p><b>Step 5:</b> Output each of these namespace nodes, as specified in the <a href="#sec-Processing-Model">Processing model</a>.</p>
</div>
<div id="sec-ExcCanonicalization-Example" class="section">
<h4><span class="secno">2.5.3 </span>Example of exclusive canonicalization with prefix rewriting</h4>
This following XML snippet will be used to determine the various options of prefixRewriting.
<pre class="example sh_xml sh_sourceCode"><span class="sh_keyword"><wsse:Security</span>
<span class="sh_type">xmlns:wsse</span><span class="sh_symbol">=</span><span class="sh_string">"http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-wssecurity-secext-1.0.xsd"</span>
<span class="sh_type">xmlns:wsu</span><span class="sh_symbol">=</span><span class="sh_string">"http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-wssecurity-utility-1.0.xsd"</span><span class="sh_keyword">></span>
<span class="sh_keyword"><wsse:UserName</span> <span class="sh_type">wsu:Id</span><span class="sh_symbol">=</span><span class="sh_string">"i1"</span><span class="sh_keyword">></span>
...
<span class="sh_keyword"></wsse:UserName></span>
<span class="sh_keyword"><wsse:Timestamp</span> <span class="sh_type">wsu:Id</span><span class="sh_symbol">=</span><span class="sh_string">"i2"</span><span class="sh_keyword">></span>
...
<span class="sh_keyword"></wsse:Timestamp></span>
<span class="sh_keyword"><wsse:Security></span></pre>
<div id="sec-Example-PrefixRewriteNone" class="section">
<h5><span class="secno">2.5.3.1 </span>With <code>PrefixRewrite="none"</code></h5>
<pre class="example sh_xml sh_sourceCode"><span class="sh_keyword"><wsse:Security</span>
<span class="sh_type">xmlns:wsse</span><span class="sh_symbol">=</span><span class="sh_string">"http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-wssecurity-secext-1.0.xsd"</span><span class="sh_keyword">></span>
<span class="sh_keyword"><wsse:UserName</span>
<span class="sh_type">xmlns:wsu</span><span class="sh_symbol">=</span><span class="sh_string">"http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-wssecurity-utility-1.0.xsd"</span>
<span class="sh_type">wsu:Id</span><span class="sh_symbol">=</span><span class="sh_string">"i1"</span><span class="sh_keyword">></span>
...
<span class="sh_keyword"></wsse:UserName></span>
<span class="sh_keyword"><wsse:Timestamp</span>
<span class="sh_type">xmlns:wsu</span><span class="sh_symbol">=</span><span class="sh_string">"http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-wssecurity-utility-1.0.xsd"</span>
<span class="sh_type">wsu:Id</span><span class="sh_symbol">=</span><span class="sh_string">"i2"</span><span class="sh_keyword">></span>
...
<span class="sh_keyword"></wsse:Timestamp></span>
<span class="sh_keyword"></wsse:Security></span></pre>
Note how the "wsu" prefix declaration is present in <code>wsse:Security</code>, but is not utilized.
So exclusive canonicalization will "push the declaration down" into
<code><UserName></code> and <code><Timestamp></code> where it is really used,
i.e. the wsu declaration will be output twice, once in
<code><UserName></code> and another in <code><Timestamp></code>, as shown above.
</div>
<div id="sec-Example-PrefixRewriteSeq" class="section">
<h5><span class="secno">2.5.3.2 </span>With <code>PrefixRewrite="sequential"</code></h5>
<pre class="example sh_xml sh_sourceCode"><span class="sh_keyword"><n0:Security</span>
<span class="sh_type">xmlns:n0</span><span class="sh_symbol">=</span><span class="sh_string">"http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-wssecurity-secext-1.0.xsd"</span><span class="sh_keyword">></span>
<span class="sh_keyword"><n0:UserName</span>
<span class="sh_type">xmlns:n1</span><span class="sh_symbol">=</span><span class="sh_string">"http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-wssecurity-utility-1.0.xsd"</span>
<span class="sh_type">n1:Id</span><span class="sh_symbol">=</span><span class="sh_string">"i1"</span><span class="sh_keyword">></span>
...
<span class="sh_keyword"></n0:UserName></span>
<span class="sh_keyword"><n0:Timestamp</span>
<span class="sh_type">xmlns:n2</span><span class="sh_symbol">=</span><span class="sh_string">"http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-wssecurity-utility-1.0.xsd"</span>
<span class="sh_type">n2:Id</span><span class="sh_symbol">=</span><span class="sh_string">"i2"</span><span class="sh_keyword">></span>
...
<span class="sh_keyword"></n0:Timestamp></span>
<span class="sh_keyword"></n0:Security></span></pre>
Now observe what happens with sequential prefix rewriting, the wsu namespace is emitted twice, but each time with a different prefix. - "n1" and "n2", as shown above.
</div>
</div>
</div>
<div id="sec-Attribute-processing" class="section">
<h3><span class="secno">2.6 </span>Attribute processing</h3>
<p>Note: namespace declarations are not considered as attributes, they are processed separately as namespace nodes.
</p>
<p> Processing the attributes of an element <code>E</code> consists of the following steps:
</p><ul>
<li>Ignore any attributes that are present in the exclusion list. However note that namespace nodes cannot be excluded. </li>
<li>Sort all the attributes in increasing lexicographic order with
namespace URI as the primary key and local name as the secondary key
(an empty namespace URI is lexicographically least).</li>
<li>If it is a qualified attribute and the <code>PrefixRewrite</code> parameter is <code>sequential</code>, modify the QName
of the attribute name to use the new prefix. i.e. one of <code>n0</code>, <code>n1</code>, <code>n2</code>, ... etc. Do not do this for the <code>xml</code>
prefix, as this is not changed during prefix rewriting.</li>
<li>If the attribute is among those enumerated
by the <code>QNameAware</code> parameter, then change the QName in that attribute value to use the new prefix.
</li>
</ul>
</div>
</div>
<div id="sec-Use" class="section">
<!--OddPage--><h2><span class="secno">3. </span>Use of Canonical XML 2.0 in XML Security</h2>
<div id="sec-Use-in-Signature" class="section">
<h3><span class="secno">3.1 </span>Use of Canonical XML 2.0 in XML Signature 2.0</h3>
<p>Canonical XML 2.0 may be used as a canonicalization
algorithm in XML Digital Signature [<cite><a class="bibref" rel="biblioentry" href="#bib-XMLDSIG-CORE2">XMLDSIG-CORE2</a></cite>], via the <code><ds:CanonicalizationMethod></code>.</p>
<dl>
<dt>Identifier:</dt>
<dd><a href="http://www.w3.org/2010/xml-c14n2">http://www.w3.org/2010/xml-c14n2</a>
</dd>
</dl>
<p>Canonical XML 2.0 supports a set of parameters, as enumerated in <a href="#sec-Canonicalization-Parameters">
Canonicalization Parameters</a>. All parameters are optional and have default values. When used in conjunction with
the <code><ds:CanonicalizationMethod></code> element, each parameter is expressed with a dedicated child element. They can be present in any order.
A schema definition for each parameter follows:
</p>
<pre class="sh_xml sh_sourceCode"> Schema Definition:
<span class="sh_keyword"><schema</span> <span class="sh_type">xmlns:xs</span><span class="sh_symbol">=</span><span class="sh_string">"http://www.w3.org/2001/XMLSchema"</span>
<span class="sh_type">xmlns</span><span class="sh_symbol">=</span><span class="sh_string">"http://www.w3.org/2010/xml-c14n2"</span>
<span class="sh_type">targetNamespace</span><span class="sh_symbol">=</span><span class="sh_string">"http://www.w3.org/2010/xml-c14n2"</span>
<span class="sh_type">version</span><span class="sh_symbol">=</span><span class="sh_string">"0.1"</span> <span class="sh_type">elementFormDefault</span><span class="sh_symbol">=</span><span class="sh_string">"qualified"</span><span class="sh_keyword">></span>
<span class="sh_keyword"><xs:element</span> <span class="sh_type">name</span><span class="sh_symbol">=</span><span class="sh_string">"IgnoreComments"</span> <span class="sh_type">type</span><span class="sh_symbol">=</span><span class="sh_string">"xs:boolean"</span><span class="sh_keyword">/></span>
<span class="sh_keyword"><xs:element</span> <span class="sh_type">name</span><span class="sh_symbol">=</span><span class="sh_string">"TrimTextNodes"</span> <span class="sh_type">type</span><span class="sh_symbol">=</span><span class="sh_string">"xs:boolean"</span><span class="sh_keyword">/></span>
<span class="sh_keyword"><xs:element</span> <span class="sh_type">name</span><span class="sh_symbol">=</span><span class="sh_string">"PrefixRewrite"</span><span class="sh_keyword">></span>
<span class="sh_keyword"><xs:simpleType></span>
<span class="sh_keyword"><xs:restriction</span> <span class="sh_type">base</span><span class="sh_symbol">=</span><span class="sh_string">"xs:string"</span><span class="sh_keyword">></span>
<span class="sh_keyword"><xs:enumeration</span> <span class="sh_type">value</span><span class="sh_symbol">=</span><span class="sh_string">"none"</span><span class="sh_keyword">/></span>
<span class="sh_keyword"><xs:enumeration</span> <span class="sh_type">value</span><span class="sh_symbol">=</span><span class="sh_string">"sequential"</span><span class="sh_keyword">/></span>
<span class="sh_keyword"><xs:enumeration</span> <span class="sh_type">value</span><span class="sh_symbol">=</span><span class="sh_string">"derived"</span><span class="sh_keyword">/></span>
<span class="sh_keyword"></xs:restriction></span>
<span class="sh_keyword"></xs:simpleType></span>
<span class="sh_keyword"></xs:element></span>
<span class="sh_keyword"><xs:element</span> <span class="sh_type">name</span><span class="sh_symbol">=</span><span class="sh_string">"QNameAware"</span><span class="sh_keyword">></span>
<span class="sh_keyword"><xs:complexType></span>
<span class="sh_keyword"><xs:choice</span> <span class="sh_type">maxOccurs</span><span class="sh_symbol">=</span><span class="sh_string">"unbounded"</span><span class="sh_keyword">></span>
<span class="sh_keyword"><xs:element</span> <span class="sh_type">ref</span><span class="sh_symbol">=</span><span class="sh_string">"Element"</span><span class="sh_keyword">/></span>
<span class="sh_keyword"><xs:element</span> <span class="sh_type">ref</span><span class="sh_symbol">=</span><span class="sh_string">"XPathElement"</span><span class="sh_keyword">/></span>
<span class="sh_keyword"><xs:element</span> <span class="sh_type">ref</span><span class="sh_symbol">=</span><span class="sh_string">"QualifiedAttr"</span><span class="sh_keyword">/></span>
<span class="sh_keyword"><xs:element</span> <span class="sh_type">ref</span><span class="sh_symbol">=</span><span class="sh_string">"UnqualifiedAttr"</span><span class="sh_keyword">/></span>
<span class="sh_keyword"><xs:sequence></span>
<span class="sh_keyword"></xs:complexType></span>
<span class="sh_keyword"></xs:element></span>
<span class="sh_keyword"><xs:element</span> <span class="sh_type">name</span><span class="sh_symbol">=</span><span class="sh_string">"Element"</span><span class="sh_keyword">></span>
<span class="sh_keyword"><xs:complexType></span>
<span class="sh_keyword"><xs:attribute</span> <span class="sh_type">name</span><span class="sh_symbol">=</span><span class="sh_string">"Name"</span> <span class="sh_type">type</span><span class="sh_symbol">=</span><span class="sh_string">"xs:NCName"</span> <span class="sh_type">use</span><span class="sh_symbol">=</span><span class="sh_string">"required"</span><span class="sh_keyword">/></span>
<span class="sh_keyword"><xs:attribute</span> <span class="sh_type">name</span><span class="sh_symbol">=</span><span class="sh_string">"NS"</span> <span class="sh_type">type</span><span class="sh_symbol">=</span><span class="sh_string">"xs:anyURI"</span><span class="sh_keyword">/></span>
<span class="sh_keyword"></xs:complexType></span>
<span class="sh_keyword"></xs:element></span>
<span class="sh_keyword"><xs:element</span> <span class="sh_type">name</span><span class="sh_symbol">=</span><span class="sh_string">"QualifiedAttr"</span><span class="sh_keyword">></span>
<span class="sh_keyword"><xs:complexType></span>
<span class="sh_keyword"><xs:attribute</span> <span class="sh_type">name</span><span class="sh_symbol">=</span><span class="sh_string">"Name"</span> <span class="sh_type">type</span><span class="sh_symbol">=</span><span class="sh_string">"xs:NCName"</span> <span class="sh_type">use</span><span class="sh_symbol">=</span><span class="sh_string">"required"</span><span class="sh_keyword">/></span>
<span class="sh_keyword"><xs:attribute</span> <span class="sh_type">name</span><span class="sh_symbol">=</span><span class="sh_string">"NS"</span> <span class="sh_type">type</span><span class="sh_symbol">=</span><span class="sh_string">"xs:anyURI"</span><span class="sh_keyword">/></span>
<span class="sh_keyword"></xs:complexType></span>
<span class="sh_keyword"></xs:element></span>
<span class="sh_keyword"><xs:element</span> <span class="sh_type">name</span><span class="sh_symbol">=</span><span class="sh_string">"UnqualifiedAttr"</span><span class="sh_keyword">></span>
<span class="sh_keyword"><xs:complexType></span>
<span class="sh_keyword"><xs:attribute</span> <span class="sh_type">name</span><span class="sh_symbol">=</span><span class="sh_string">"Name"</span> <span class="sh_type">type</span><span class="sh_symbol">=</span><span class="sh_string">"xs:NCName"</span> <span class="sh_type">use</span><span class="sh_symbol">=</span><span class="sh_string">"required"</span><span class="sh_keyword">/></span>
<span class="sh_keyword"><xs:attribute</span> <span class="sh_type">name</span><span class="sh_symbol">=</span><span class="sh_string">"ParentName"</span> <span class="sh_type">type</span><span class="sh_symbol">=</span><span class="sh_string">"xs:NCName"</span> <span class="sh_type">use</span><span class="sh_symbol">=</span><span class="sh_string">"required"</span><span class="sh_keyword">/></span>
<span class="sh_keyword"><xs:attribute</span> <span class="sh_type">name</span><span class="sh_symbol">=</span><span class="sh_string">"ParentNS"</span> <span class="sh_type">type</span><span class="sh_symbol">=</span><span class="sh_string">"xs:anyURI"</span><span class="sh_keyword">/></span>
<span class="sh_keyword"></xs:complexType></span>
<span class="sh_keyword"></xs:element></span>
<span class="sh_keyword"><xs:element</span> <span class="sh_type">name</span><span class="sh_symbol">=</span><span class="sh_string">"XPathElement"</span><span class="sh_keyword">></span>
<span class="sh_keyword"><xs:complexType></span>
<span class="sh_keyword"><xs:attribute</span> <span class="sh_type">name</span><span class="sh_symbol">=</span><span class="sh_string">"Name"</span> <span class="sh_type">type</span><span class="sh_symbol">=</span><span class="sh_string">"xs:NCName"</span> <span class="sh_type">use</span><span class="sh_symbol">=</span><span class="sh_string">"required"</span><span class="sh_keyword">/></span>
<span class="sh_keyword"><xs:attribute</span> <span class="sh_type">name</span><span class="sh_symbol">=</span><span class="sh_string">"NS"</span> <span class="sh_type">type</span><span class="sh_symbol">=</span><span class="sh_string">"xs:anyURI"</span><span class="sh_keyword">/></span>
<span class="sh_keyword"></xs:complexType></span>
<span class="sh_keyword"></xs:element></span>
<span class="sh_keyword"></schema></span></pre>
<p>
XML Signature 2.0 <em class="rfc2119" title="must">must</em> implicitly pass in the <code>dsig2:IncludedXPath</code> and <code>dsig2:ExcludedXpath</code> as QNameAware, even if they are
not explictly present in the <code>Signature</code> element.
</p>
</div>
<div id="sec-Use-in-Encryption" class="section">
<h3><span class="secno">3.2 </span>Use of Canonical XML 2.0 in XML Encryption 1.1</h3>
Canonical XML 2.0 may also be used in XML Encryption 1.1, with changes as noted in the non-normative section
"Serializing XML" of XML Encryption 1.1 [<cite><a class="bibref" rel="biblioentry" href="#bib-XMLENC-CORE1">XMLENC-CORE1</a></cite>].
</div>
</div>
<div id="sec-Pseudocode" class="section">
<!--OddPage--><h2><span class="secno">4. </span>Pseudocode</h2>
<p>This section presents the entire canonicalization algorithm in pseudo code. It is not normative.</p>
<div id="sec-pseudocode-canonicalize" class="section">
<h3><span class="secno">4.1 </span>canonicalize()</h3>
Top level canonicalize function.
<pre class="code"> canonicalize(list of subtree, list of exclusion elements and attributes, properties)
{
put the exclusion elements and attributes in hash table for easier lookup
sort the multiple subtrees by document order
for each subtree
canonicalizeSubtree(subtree)
}
</pre>
</div>
<div id="sec-pseudocode-canonicalizeSubtree" class="section">
<h3><span class="secno">4.2 </span>canonicalizeSubtree()</h3>
<p>Canonicalize an individual subtree.
</p>
<p>For efficiency the routines below maintain two contexts
</p><ul>
<li><b>namespaceContext:</b> <code>namespaceContext</code> is a hash table of <code>prefix -> (uri, hasBeenOutput, newPrefix)</code>.
<ul>
<li><em>uri</em> is the namespace URI that this prefix maps to.</li>
<li><em>hasBeenOutput</em> a boolean flag that indicates whether that
namespace declaration has been output</li>
<li><em>newPrefix</em> the rewritten value of the prefix.</li>
</ul>
At the beginning of the
canonicalization initialize this to contain only entry - the default namespace mapped to an empty URI, and hasBeenOutput = true.
A prefix value of "" can be used to denote the default namespace.
</li>
<li><b>xmlattribContext:</b> <code>xmlattribContext</code> is a hash table of <code>name -> value</code>.</li>
</ul>
<pre class="code"> canonicalizeSubtree(node)
{
initialize namespaceContext to contain the default prefix, mapped
to an empty URI, and hasBeenOutput to true
if (node is the document node or a document root element)
{
// (whole document is being processed, no ancestors to worry about)
call processNode(node, namespaceContext)
}
else
{
starting from the element, walk up the tree to collect a list of
ancestors
for each of this ancestor elements starting with the document
root, but not including the element itself
addNamespaces(ancestorElem, namespaceContext)
initialize xmlattribContext to empty
for each of this ancestor elements starting with the document
root, and also including the element itself
addXMLAttributes(ancestorElem, xmlattribContext)
if there are any attributes in xmlattribContext
temporarily add/replace these XML attributes in node
processNode(node, namspaceContext)
restore the original XML attributes
}
}
</pre>
</div>
<div id="sec-pseudocode-processNode" class="section">
<h3><span class="secno">4.3 </span>processNode()</h3>
Redirect to appropriate node processing function
<pre class="code"> processNode(node, namespaceContext)
{
call the appropriate function - processDocument, processElement,
processTextNode, ... depending on the node type.
}
</pre>
</div>
<div id="sec-pseudocode-processDocument" class="section">
<h3><span class="secno">4.4 </span>processDocument()</h3>
Process the Document Node.
<pre class="code"> processDocument(document, namespaceContext)
{
Loop through all child nodes and call
processNode(child, namespaceContext)
}
</pre>
</div>
<div id="sec-pseudocode-processElement" class="section">
<h3><span class="secno">4.5 </span>processElement()</h3>
Process an Element Node.
<pre class="code"> processElement(element, namespaceContext)
{
if this exists in the exclusion hash table
return
make of copy of xmlattribContext and namespaceContext
//(by copying, any changes made can be undone when this function returns)
nsToBeOutputList = processNamespaces(element, namespaceContext)
output('<')
if PrefixRewrite is sequential or digest, temporarily modify the
QName to have the new prefix value as determined from the namespaceContext
output(element QName)
for each of the namespaces in the nsToBeOutputList
output this namespace declaration
sort each of the non namespaces attributes by URI first then attribute name.
output each of these attributes with original QName or a modifiedQName if PrefixRewrite is true
output('>')
Loop through all child nodes and call
processNode(child, namespaceContext)
output('</')
output(element QName)
output('>')
restore xmlattribContext and namespaceContext
}
</pre>
</div>
<div id="sec-pseudocode-processText" class="section">
<h3><span class="secno">4.6 </span>processText()</h3>
Process an Text Node.
<pre class="code"> processText(textNode)
{
if this text node is outside document root
return
in the text replace
all ampersands by &,
all open angle brackets (<) by &lt;,
all closing angle brackets (>) by &gt;,
and all #xD characters by &#xD;.
If TrimTextNodes is true and there is no xml:space="preserve" declaration in scope
trim leading and trailing space
output(text)
}
</pre>
<p>Note: The DOM parser might have split up a long text node into multiple adjacent text nodes,
some of which may be empty. In that case be careful when trimming the leading and trailing space -
the net result should be same as if it the adjacent text nodes were concatenated into one</p>
</div>
<div id="sec-pseudocode-processPI" class="section">
<h3><span class="secno">4.7 </span>processPI()</h3>
Process an Processing Instruction (PI) Node.
<pre class="code"> processPI(piNode)
{
if after document node
output('#xA')
output('<?')
output(the PI target name of the node)
output(a leading space)
output(the PI string value)
output('?>')
if before document node
output('#xA')
}
</pre>
</div>
<div id="sec-pseudocode-processComment" class="section">
<h3><span class="secno">4.8 </span>processComment()</h3>
Process an Comment Node.
<pre class="code"> processComment(commentNode)
{
if ignoreComments
return
if after document node
output('#xA')
output('<!--')
output(string value of node)
output('-->')
if before document node
output('#xA')
}
</pre>
</div>
<div id="sec-pseudocode-addNamespaces" class="section">
<h3><span class="secno">4.9 </span>addNamespaces()</h3>
Add namespaces from this element to the namespace context. This function is called for every ancestor element, and also at every element of the subtrees (minus the exclusion elements).
<pre class="code"> addNamespaces(element, namespaceContext)
{
for each the explicit and implicit namespace declarations in the element
{
if there is already a declaration for this prefix, and this
declaration is different from existing declaration
overwrite the URI , and set hasBeenOutput to false
if there is no entry for this prefix
add an entry for this URI, and hasBeenOutout to false
}
}
</pre>
</div>
<div id="sec-pseudocode-processNamespaces" class="section">
<h3><span class="secno">4.10 </span>processNamespaces()</h3>
Process the list of namespaces for this element.
<pre class="code"> processNamespaces(element, namespaceContext)
{
addNamespaces(element, namespaceContext)
initialize nsToBeOutputList to empty list
for each prefix in the namespaceContext for which hasBeenOutput is false
{
if ExclusiveMode and this prefix is not in the inclusiveNamespacesList
{
if the prefix is visibly utilized by this element
add the prefix to the nsToBeOutputList and set
hasBeenOutput to true
}
else
add the prefix to the nsToBeOutputList and set hasBeenOutput to true
}
if (PrefixRewrite is none)
{
sort the nsToBeOutputList by the prefix
}
else if (PrefixRewrite is sequential)
{
sort the nsToBeOutputList by URI
assign new prefix values "nN" to each prefix in this
nsToBeOutputList where N represents an incremented counter value ,
i.e. n0, n1, n2 ..
// the counter should be set to 0 in the beginning of the canonicalization
// note: prefix numbers are assigned in the order that the
prefixes are present in nsToBeOutputList
}
else if (PrefixRewrite in digest)
{
sort the nsToBeOutputList by URI
assign new prefix values "nD" to each prefix in this nsToBeOutputList where
D represents the SHA1 digest of the URI represented as a hex string
}
return nsToBeOutputList
}
</pre>
</div>
<div id="sec-pseudocode-addXMLAttributes" class="section">
<h3><span class="secno">4.11 </span>addXMLAttributes()</h3>
Combine/modify the 3 special xml attributes: xml:lang, xml:space and xml:base.
<pre class="code"> addXMLAttributes(element, xmlattribContext)
{
for each of the xml: attributes of this element
{
case xml:lang attribute
if XmlAncestors is inherit then store this attribute value, else do nothing
case xml:space attribute
if XmlAncestors is inherit then store this attribute value, else do nothing
case xml:base attribute
if XmlAncestors is inherit, and there is a previous value of xml:base
then do a "join-URI-References" to combine the new value and the old value
else do nothing
}
}
</pre>
</div>
</div>
<div id="sec-Output-Rules" class="section">
<!--OddPage--><h2><span class="secno">5. </span>Output rules</h2>
<ul>
<li>The document is encoded in UTF-8.</li>
<li>Line breaks normalized to #xA on input (automatically done by a DOM parser)</li>
<li>Attribute values are normalized.</li>
<li>Character and parsed entity references are replaced.</li>
<li>CDATA sections are replaced with their character content.</li>
<li>The XML declaration and document type declaration are removed.</li>
<li>Empty elements are converted to start-end tag pairs.</li>
<li>Whitespace outside of the document element and within start and end tags is normalized.</li>
<li>Attribute value delimiters are set to quotation marks (double quotes).</li>
<li>Special characters in attribute values and character content are replaced by character references.</li>
<li>Default attributes are added to each element. </li>
</ul>
</div>
<div id="sec-Processing-for-Streaming" class="section">
<!--OddPage--><h2><span class="secno">6. </span>Processing model for Streaming XML parsers</h2>
<p>Unlike DOM parsers which represent XML document as a tree of nodes, streaming parsers represent
an XML document as stream of events like "start-element", "end-element", "text" etc. A document subset can
also be represented as a stream of events. This stream of events in exactly in the same order as a tree walk,
so the above canonicalization algorithm can be also used to canonicalize an event stream.
</p>
</div>
<div id="references" class="appendix section"><!--OddPage--><h2><span class="secno">A. </span>References</h2><p>Dated references below are to the latest known or appropriate edition of the referenced work. The referenced works may be subject to revision, and conformant implementations may follow, and are encouraged to investigate the appropriateness of following, some or all more recent editions or replacements of the works cited. It is in each case implementation-defined which editions are supported.</p><div id="normative-references" class="section"><h3><span class="secno">A.1 </span>Normative references</h3><dl class="bibliography"><dt id="bib-RFC2119">[RFC2119]</dt><dd>S. Bradner. <a href="http://www.ietf.org/rfc/rfc2119.txt"><cite>Key words for use in RFCs to Indicate Requirement Levels.</cite></a> March 1997. Internet RFC 2119. URL: <a href="http://www.ietf.org/rfc/rfc2119.txt">http://www.ietf.org/rfc/rfc2119.txt</a>
</dd><dt id="bib-XML-NAMES">[XML-NAMES]</dt><dd>Richard Tobin; et al. <a href="http://www.w3.org/TR/2009/REC-xml-names-20091208/"><cite>Namespaces in XML 1.0 (Third Edition).</cite></a> 8 December 2009. W3C Recommendation. URL: <a href="http://www.w3.org/TR/2009/REC-xml-names-20091208/">http://www.w3.org/TR/2009/REC-xml-names-20091208/</a>
</dd><dt id="bib-XML10">[XML10]</dt><dd>C. M. Sperberg-McQueen; et al. <a href="http://www.w3.org/TR/2008/REC-xml-20081126/"><cite>Extensible Markup Language (XML) 1.0 (Fifth Edition).</cite></a> 26 November 2008. W3C Recommendation. URL: <a href="http://www.w3.org/TR/2008/REC-xml-20081126/">http://www.w3.org/TR/2008/REC-xml-20081126/</a>
</dd><dt id="bib-XMLDSIG-CORE2">[XMLDSIG-CORE2]</dt><dd>Mark Bartel; John Boyer; Barb Fox et al. <a href="http://www.w3.org/TR/2011/WD-xmldsig-core2-20110421/"><cite>XML Signature Syntax and Processing Version 2.0</cite></a>. 21 April 2011. W3C Last Call Working Draft. URL: <a href="http://www.w3.org/TR/2011/WD-xmldsig-core2-20110421/">http://www.w3.org/TR/2011/WD-xmldsig-core2-20110421/</a>
</dd></dl></div><div id="informative-references" class="section"><h3><span class="secno">A.2 </span>Informative references</h3><dl class="bibliography"><dt id="bib-DOM-LEVEL-2-CORE">[DOM-LEVEL-2-CORE]</dt><dd>Arnaud Le Hors; et al. <a href="http://www.w3.org/TR/2000/REC-DOM-Level-2-Core-20001113/"><cite>Document Object Model (DOM) Level 2 Core Specification.</cite></a> 13 November 2000. W3C Recommendation. URL: <a href="http://www.w3.org/TR/2000/REC-DOM-Level-2-Core-20001113/">http://www.w3.org/TR/2000/REC-DOM-Level-2-Core-20001113/</a>
</dd><dt id="bib-URI">[URI]</dt><dd>T. Berners-Lee; R. Fielding; L. Masinter. <a href="http://www.ietf.org/rfc/rfc3986.txt"><cite>Uniform Resource Identifiers (URI): generic syntax.</cite></a> January 2005. Internet RFC 3986. URL: <a href="http://www.ietf.org/rfc/rfc3986.txt">http://www.ietf.org/rfc/rfc3986.txt</a>
</dd><dt id="bib-XML-C14N">[XML-C14N]</dt><dd>John Boyer. <a href="http://www.w3.org/TR/2001/REC-xml-c14n-20010315"><cite>Canonical XML Version 1.0.</cite></a> 15 March 2001. W3C Recommendation. URL: <a href="http://www.w3.org/TR/2001/REC-xml-c14n-20010315">http://www.w3.org/TR/2001/REC-xml-c14n-20010315</a>
</dd><dt id="bib-XML-C14N11">[XML-C14N11]</dt><dd>John Boyer, Glenn Marcy. <a href="http://www.w3.org/TR/2008/REC-xml-c14n11-20080502/"><cite>Canonical XML Version 1.1.</cite></a> 2 May 2008. W3C Recommendation. URL: <a href="http://www.w3.org/TR/2008/REC-xml-c14n11-20080502/">http://www.w3.org/TR/2008/REC-xml-c14n11-20080502/</a>
</dd><dt id="bib-XML-EXC-C14N">[XML-EXC-C14N]</dt><dd>Donald E. Eastlake 3rd; Joseph Reagle; John Boyer. <a href="http://www.w3.org/TR/2002/REC-xml-exc-c14n-20020718/"><cite>Exclusive XML Canonicalization Version 1.0.</cite></a> 18 July 2002. W3C Recommendation. URL: <a href="http://www.w3.org/TR/2002/REC-xml-exc-c14n-20020718/">http://www.w3.org/TR/2002/REC-xml-exc-c14n-20020718/</a>
</dd><dt id="bib-XML-PARSER-STAX">[XML-PARSER-STAX]</dt><dd>Christopher Fry; <a href="http://jcp.org/en/jsr/detail?id=173"><cite>JSR 173: Streaming API for XML for Java Specification</cite></a> 8th October 2003. v1.0 URL: <a href="http://jcp.org/en/jsr/detail?id=173">http://jcp.org/en/jsr/detail?id=173</a>
</dd><dt id="bib-XMLBASE">[XMLBASE]</dt><dd>Jonathan Marsh, Richard Tobin. <a href="http://www.w3.org/TR/2009/REC-xmlbase-20090128/"><cite>XML Base (Second Edition).</cite></a> 28 January 2009. W3C Recommendation. URL: <a href="http://www.w3.org/TR/2009/REC-xmlbase-20090128/">http://www.w3.org/TR/2009/REC-xmlbase-20090128/</a>
</dd><dt id="bib-XMLDSIG-XPATH-FILTER2">[XMLDSIG-XPATH-FILTER2]</dt><dd>Merlin Hughes; John Boyer; Joseph Reagle. <a href="http://www.w3.org/TR/2002/REC-xmldsig-filter2-20021108/"><cite>XML-Signature XPath Filter 2.0.</cite></a> 8 November 2002. W3C Recommendation. URL: <a href="http://www.w3.org/TR/2002/REC-xmldsig-filter2-20021108/">http://www.w3.org/TR/2002/REC-xmldsig-filter2-20021108/</a>
</dd><dt id="bib-XMLENC-CORE1">[XMLENC-CORE1]</dt><dd>J. Reagle; D. Eastlake; F. Hirsch; T. Roessler. <a href="http://www.w3.org/TR/2011/CR-xmlenc-core1-20110303/"><cite>XML Encryption Syntax and Processing Version 1.1.</cite></a> 3 March 2011. W3C Candidate Recommendation. (Work in progress.) URL: <a href="http://www.w3.org/TR/2011/CR-xmlenc-core1-20110303/">http://www.w3.org/TR/2011/CR-xmlenc-core1-20110303/</a>
</dd></dl></div></div></body></html>