index.html 96 KB
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239
<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
<head>
  <title>Protocol for Web Description Resources (POWDER): Grouping of Resources</title>
  <style type="text/css">
  li, dt, dd {margin-top: 1em;}
  ul, ol, dl {margin-top: 1em; margin-bottom: 1em;}
  ol ol {list-style-type: lower-alpha}
  ol ol ol {list-style-type: lower-roman}
  .comment {margin-left: 2em; font-style: italic;}
  .example {padding: 0.5em; background-color: rgb(204, 255, 204); border:thin dotted black; white-space:nowrap; overflow:auto}
    .oq {border-style: dotted; border-width: 1px; background-color:#ccffcc; padding:1em}
  table.vocab {margin: 0 auto; border-collapse:collapse; border:thin solid black; overflow:auto}
  td, th {border:thin solid black; padding:0.5em}
  caption {caption-side:bottom; padding-top:1em; margin:0 auto}
  .semext {padding: 0.5em; background-color:#cccccc; border:thin dotted black;}
  p.caption {font-weight:bold}
 .toc1 {padding:0 0 0.5em 0}
 .toc2 {padding:0 0 0.5em 1em}
 .toc3 {padding:0 0 0.5em 2em}
 .blockquote {margin:0 2em}
  </style>
  <link rel="stylesheet" type="text/css" href="http://www.w3.org/StyleSheets/TR/W3C-REC.css"/>
<!--  <link rel="stylesheet" type="text/css" href="http://www.w3.org/StyleSheets/TR/W3C-WD"/>-->

</head>
<body>
<div class="head">
<a href="http://www.w3.org/"><img height="48" width="72" alt="W3C" src="http://www.w3.org/Icons/w3c_home"/></a>
<h1 id="top">Protocol for Web Description Resources (POWDER): Grouping of Resources</h1>
<h2>W3C Recommendation 1 September 2009</h2>
<dl>
  <dt>This version</dt><dd><a href="http://www.w3.org/TR/2009/REC-powder-grouping-20090901/">http://www.w3.org/TR/2009/REC-powder-grouping-20090901/</a></dd>
  <dt>Latest version</dt><dd><a href="http://www.w3.org/TR/powder-grouping/">http://www.w3.org/TR/powder-grouping/</a></dd>
  <dt>Previous version</dt><dd><a href="http://www.w3.org/TR/2009/PR-powder-grouping-20090604/">http://www.w3.org/TR/2009/PR-powder-grouping-20090604/</a></dd>
</dl>
<dl>
<dt>Editors:</dt>
  <dd>Phil Archer, Institute of Informatics &amp; Telecommunications (IIT), NCSR &quot;Demokritos&quot; (formerly at FOSI)</dd>
  <dd>Andrea Perego, Universit&agrave; degli Studi dell'Insubria</dd>
  <dd>Kevin Smith, Vodafone Group R &amp; D</dd>
</dl>


<p>Please refer to the <a href="http://www.w3.org/2007/powder/powder-errata"><strong>errata</strong></a> for this document, which may include some normative corrections.</p>

<p>See also <a href=" http://www.w3.org/2003/03/Translations/byTechnology?technology=powder-grouping"> <strong>translations</strong></a>.</p>
<p class="copyright"><a href="http://www.w3.org/Consortium/Legal/ipr-notice#Copyright">Copyright</a> &copy; 2009 <a href="http://www.w3.org/"><acronym title="World Wide Web Consortium">W3C</acronym></a><sup>&reg;</sup> (<a href="http://www.csail.mit.edu/"><acronym title="Massachusetts Institute of Technology">MIT</acronym></a>, <a href="http://www.ercim.org/"><acronym title="European Research Consortium for Informatics and Mathematics">ERCIM</acronym></a>, <a href="http://www.keio.ac.jp/">Keio</a>), All Rights Reserved. W3C <a href="http://www.w3.org/Consortium/Legal/ipr-notice#Legal_Disclaimer">liability</a>, <a href="http://www.w3.org/Consortium/Legal/ipr-notice#W3C_Trademarks">trademark</a> and <a href="http://www.w3.org/Consortium/Legal/copyright-documents">document use</a> rules apply.</p>
<hr />
</div>
<h2 id="abstract">Abstract</h2>
<p>The Protocol for Web Description Resources (POWDER) facilitates the publication of descriptions of multiple resources such as all those 
available from a Web site. This document describes how sets of IRIs can be defined such that descriptions or other data can be applied 
to the resources obtained by dereferencing IRIs that are elements of the set. IRI sets are defined as XML elements with relatively 
loose operational semantics. This is underpinned by the formal semantics of POWDER which include a semantic extension, defined 
separately. A GRDDL transform is associated with the POWDER namespace that maps the operational to the formal semantics.</p>

<h2 id="status">Status of this document</h2>
<p><em>This section describes the status of this document at the time of its publication. Other documents may supersede this document. A list of current W3C publications and the latest revision of this technical report can be found in the <a href="http://www.w3.org/TR/" shape="rect">W3C technical reports index</a> at http://www.w3.org/TR/.</em></p>

<p>This document is a W3C Recommendation that was developed by the <a href="http://www.w3.org/2007/powder/" shape="rect">POWDER Working Group</a>.</p>

<p>Please see the Working Group's <a href="http://www.w3.org/2007/powder/Group/features.html">implementation report</a> 
and <a href="http://lists.w3.org/Archives/Public/public-powderwg/2009Apr/0015.html">Disposition of Last Call Comments</a>.
The disposition of comments received during previous calls are also <a href="http://lists.w3.org/Archives/Public/public-powderwg/2009Feb/0006.html">available</a>.
Changes since the <a href="http://www.w3.org/TR/2009/PR-powder-grouping-20090604/">previous version</a> of this 
document are minor in nature and are fully documented in the <a href="#change">Change log</a>.</p>

<p>Publication of this Recommendation is synchronized with several other documents:</p>
<ul>
  <li><a href="http://www.w3.org/TR/2009/REC-powder-dr-20090901/">POWDER: Description Resources</a> (Recommendation)</li>
  <li><a href="http://www.w3.org/TR/2009/REC-powder-formal-20090901/">POWDER: Formal Semantics</a> (Recommendation)</li>
  <li><a href="http://www.w3.org/TR/2009/NOTE-powder-primer-20090901/">POWDER: Primer</a> (Working Group Note)</li>
  <li><a href="http://www.w3.org/TR/2009/NOTE-powder-test-20090604/">POWDER: Test Suite</a> (Working Group Note)</li>
</ul>

<p>The W3C Membership and other interested parties are invited to review
the document and send comments to <a href="mailto:public-powderwg@w3.org">public-powderwg@w3.org</a> (with <a href="http://lists.w3.org/Archives/Public/public-powderwg/" shape="rect">public archive</a>).</p>

<p>This document has been reviewed by W3C Members, by software developers, and by other W3C groups and interested parties, and is endorsed by the Director as a W3C Recommendation. It is a stable document and may be used as reference material or cited from another document. W3C's role in making the Recommendation is to draw attention to the specification and to promote its widespread deployment. This enhances the functionality and interoperability of the Web.</p>

<p>This document was produced by a group operating under the <a href="http://www.w3.org/Consortium/Patent-Policy-20040205/">5 February 2004 W3C Patent Policy</a>. W3C maintains a <a rel="disclosure" href="http://www.w3.org/2004/01/pp-impl/40243/status">public list of any patent disclosures</a> made in connection with the deliverables of the group; that page also includes instructions for disclosing a patent. An individual who has actual knowledge of a patent which the individual believes contains <a href="http://www.w3.org/Consortium/Patent-Policy-20040205/#def-essential">Essential Claim(s)</a> must disclose the information in accordance with <a href="http://www.w3.org/Consortium/Patent-Policy-20040205/#sec-Disclosure">section 6 of the W3C Patent Policy</a>. </p>

<h2 id="toc">Table of Contents</h2>
<div class="toc1">1 <a href="#intro">Introduction</a></div>
<div class="toc2">1.1 <a href="#design">Design Goals and Constraints</a></div>
<div class="toc2">1.2 <a href="#methOutline">Outline Methodology</a></div>
<div class="toc2">1.3 <a href="#operationalSemantics">Operational Semantics</a></div>
<div class="toc2">1.4 <a href="#formalSemantics">Formal Semantics</a></div>
<div class="toc1">2 <a href="#byIRI">Defining a Resource Set</a></div>
<div class="toc2">2.1 <a href="#byIRIcomp">Constraints on IRI Components</a></div>
<div class="toc3">2.1.1 <a href="#ports">IRI Constraints Referring to Ports: <code>includeports</code> and <code>excludeports</code></a></div>
<div class="toc3">2.1.2 <a href="#query">IRI Constraints Referring to Queries: <code>includequerycontains</code> and <code>excludequerycontains</code></a></div>
<div class="toc3">2.1.3 <a href="#idnCanon">IRI constraints referring to internationalized domain names: <code>includehosts/excludehosts </code></a></div>
<div class="toc3">2.1.4 <a href="#canon">IRI/URI Canonicalization</a></div>
<div class="toc3">2.1.5 <a href="#dataEnc">Data encoding</a></div>
<div class="toc2">2.2 <a href="#wild">Grouping using Wildcards: The <code>includeiripattern</code> and <code>excludeiripattern</code> Constraints</a></div>
<div class="toc2">2.3 <a href="#reMatch">Grouping by Regular Expression: The <code>includeregex</code> and <code>excluderegex</code> Constraints</a></div>
<div class="toc3">2.3.1 <a href="#safe">Safe Use of <code>includeregex</code></a></div>
<div class="toc2">2.4 <a href="#byIP">Grouping by IP Address</a></div>
<div class="toc2">2.5 <a href="#listing">Enumerating Elements of an IRI Set: the <code>includeresources</code> and <code>excluderesources</code> Constraints</a></div>
<div class="toc2">2.6 <a href="#conj-disj">Complex Sets: Negation, Conjunction and Disjunction</a></div>
<div class="toc1">3 <a href="#extension">Extension Mechanism</a></div>
<div class="toc2">3.1 <a href="#custom-iri">Extension Example: Custom IRI Patterns</a></div>
<div class="toc2">3.2 <a href="#siteStruct">Extension Example: Custom Site Structure</a></div>
<div class="toc2">3.3 <a href="#isan">Extension Example: ISAN</a></div>
<div class="toc1">4 <a href="#conformance">Conformance Criteria</a></div>
<div class="toc1">5 <a href="#section-References">References</a></div>
<div class="toc1">6 <a href="#ack">Acknowledgments</a></div>
<div class="toc1">7 <a href="#change">Change Log</a></div>
<div class="toc1">Appendix A <a href="#appA">Summary of POWDER Elements</a></div>

<h2 id="intro">1 Introduction</h2>
<p>The Protocol for Web Description Resources (POWDER) facilitates the  publication of descriptions of multiple resources such as all those available from a Web site. These descriptions are attributable to a named individual, organization or entity that may or may not be the creator of the described resources. This contrasts with more usual metadata that typically apply to a single resource, such as a specific document's title, which is usually provided by its author.</p>
<p>Description Resources (DRs) are described separately [<a href="#dr">DR</a>]. This document sets out how groups (i.e. sets) of resources may be defined, either for use in DRs or in other contexts. Set theory has been used throughout as it provides a well-defined framework that leads to unambiguous definitions. However, it is used solely to provide a formal version of what is written in the natural language text.</p>
<p>POWDER uses a limited set of XML elements to define sets of resources and these have relatively loose semantics. However, a GRDDL [<a href="#grddl">GRDDL</a>] transform is associated with the POWDER root namespace through which formal semantics are accessible as RDF/OWL. This is known as Semantic POWDER or POWDER-S. The details of the GRDDL transform and the formal semantics are defined separately [<a href="#formal">FORMAL</a>] and outlined in <a href="#formalSemantics">Section 1.4</a> below. The <a href="#usecases">use cases</a>, a <a href="#primer">primer</a>, <a href="#testsuite">test suite</a> and schema namespace documents complete the document set.</p>
<p>The POWDER schema namespace is <code>http://www.w3.org/2007/05/powder#</code> for which we use the prefix <code>wdr</code>. The POWDER-S namespace is <code>http://www.w3.org/2007/05/powder-s#</code> for which we use the prefix <code>wdrs</code> and All namespaces and prefixes used in this document are shown in the table below.</p>
<table id="table1" class="vocab">
  <caption style="margin:0">Table 1: Namespace and prefixes used in this document</caption>
  <thead>
  <tr>
    <th>Prefix</th>
    <th>Namespace</th>
  </tr>
  </thead>
  <tbody>
  <tr>
    <td><code>wdr</code></td>
    <td><code>http://www.w3.org/2007/05/powder#</code></td>
  </tr>
  <tr>
    <td><code>wdrs</code></td>
    <td><code>http://www.w3.org/2007/05/powder-s#</code></td>
  </tr>
  <tr>
    <td><code>rdf</code></td>
    <td><code>http://www.w3.org/1999/02/22-rdf-syntax-ns#</code></td>
  </tr>
  <tr>
    <td><code>rdfs</code></td>
    <td><code>http://www.w3.org/2000/01/rdf-schema#"</code></td>
  </tr>
  <tr>
    <td><code>owl</code></td>
    <td><code>http://www.w3.org/2002/07/owl#</code></td>
  </tr>
  <tr>
    <td><code>xsd</code></td>
    <td><code><span id="ns_typo">http://www.w3.org/2001/XMLSchema</span></code></td>
  </tr>
  <tr>
    <td><code>ex</code></td>
    <td>An arbitrary prefix used to denote an 'example vocabulary'</td>
  </tr>
  </tbody>
</table>
<p>In this document, the words MUST, MUST NOT, SHOULD, SHOULD NOT and MAY are to be interpreted as described in RFC&nbsp;2119 [<a href="#ref-rfc2119">RFC2119</a>].</p>
<p id="whitespace">POWDER makes substantial use of XML, the processing rules for which MUST be followed faithfully. The processing rules for 
<a href="http://www.w3.org/TR/REC-xml/#AVNormalize">attribute-value normalization</a> are particularly relevant when considering the white space
separated lists of values that occur in POWDER. A space-separated list is a string of which the items are separated by one or more space characters (in
any order). The string may also be prefixed or suffixed with zero or more of those characters. To obtain the values from a space-separated list user
agents MUST replace any sequence of space characters with a single #x20 character, dropping any leading or trailing #x20 character, and then
chopping the resulting string at each occurrence of a #x20 character, dropping that character in the process.</p>
<p>The (unqualified) terms POWDER, POWDER Document and Description Resource (DR) refer to operational representations and semantics. The term POWDER-S refers to documents and data that express the formal semantics of POWDER. Unqualified XML element names are in the POWDER (<code>wdr</code>) namespace</p>

<h3 id="design">1.1 Design Goals and Constraints</h3>
<p>In designing a system to define sets of resources we have drawn on earlier work [<a href="#jo">Rabin</a>] carried out in the Web Content Label Incubator Activity [<a href="#wclxg">WCL-XG</a>],  and taken into account the following considerations.</p>
<ol>
  <li>It must be possible to define a set of resources, either by describing the characteristics of the IRIs of resources in the set, or by simply listing its elements.</li>
  <li>It must be possible to determine with certainty whether a given resource is or is not an element of the Resource Set, as long as the resource's IRI is known.</li>
  <li>The ease of creation of accurate and useful Resource Sets is important.</li>
  <li>It should be possible to write concise Resource Set definitions.</li>
  <li>Resource Set definitions must be easy to write, be comprehensible by humans and, as far as is possible, should avoid including or excluding resources unintentionally.</li>
  <li>It must be possible to create software that implements Resource Set definitions primarily using standard and commonly available components and specifically must not require the creation of custom parsing components.</li>
  <li>So far as is possible, use of processing resources should be minimized, especially by early detection of a match or failure to match.</li>
</ol>
<h3 id="methOutline">1.2 Outline Methodology</h3>
<p>Operationally, POWDER does not define resource sets, rather, it facilitates the definition of sets of IRIs (International Resource Identifiers) [<a href="#ref-iri">IRIS</a>], which can be used to denote resources in terms of their identifiers. We use the notion of IRIs instead of URIs[<a href="#ref-uri">URIS</a>] since IRIs are a superset of URIs. Therefore, an IRI set definition may denote a set of IRIs as well as a set of URIs.</p>
<p>Defining a resource set by specifying the characteristics that the identifiers of resources in the set share is clearly an indirect approach, albeit a very useful one in the real world. In a logical sense, the definition must be <em>interpreted</em> to arrive at the full set.</p>
<p>More formally, an IRI Set definition <var>D</var> denotes a set of IRIs <var>IS</var>&nbsp;=&nbsp;<var>D</var><sup><var>I</var></sup>, where <var>D</var><sup><var>I</var></sup> is the <em>interpretation</em> of <var>D</var>, i.e., the set of IRIs sharing the characteristics denoted by <var>D</var>.</p>
<p>We take this further and allow an IRI set definition to be built up in stages.</p>
<p>An IRI set <var>IS</var> is denoted by an IRI set definition <var>D</var><sub><var>IS</var></sub> in terms of one or more characteristics that the elements of the set have in common. Each characteristic is expressed by an <em>IRI constraint</em> <var>C</var>, and IRI constraints <var>C</var><sub>1</sub>, <var>C</var><sub>2</sub>, &hellip; <var>C</var><sub><var>n</var></sub> give rise to IRI set definitions <var>D</var>, <sub>1</sub>, <var>D</var><sub>2</sub>, &hellip; <var>D</var><sub><var>n</var></sub>, so that the complete IRI set definition <var>D</var><sub><var>IS</var></sub> comprises <var>D</var><sub>1</sub>, <var>D</var><sub>2</sub>, &hellip; <var>D</var><sub><var>n</var></sub>.</p>
<p>The IRI set <var>IS</var> is the intersection of the IRI sets denoted by the IRI set definitions in <var>D</var><sub><var>IS</var></sub>.</p>
<p>Formally:</p>
<blockquote>
<p><var>IS</var> = <var>D</var><sub><var>IS</var></sub><sup><var>I</var></sup> = <var>D</var><sub>1</sub><sup><var>I</var></sup> &cap; <var>D</var><sub>2</sub><sup><var>I</var></sup> &cap; &hellip; &cap; <var>D</var><sub><var>n</var></sub><sup><var>I</var></sup> = (<var>D</var><sub>1</sub> &and; <var>D</var><sub>2</sub> &and; &hellip; &and; <var>D</var><sub><var>n</var></sub>)<sup><var>I</var></sup>.</p>
</blockquote>
<p>For example, suppose that an IRI set <var>IS</var> is denoted by the following definitions:</p>
<ul>
<li><var>D</var><sub>1</sub>: &ldquo;the top level components of the host component of the IRI exactly match <code>example.org</code>&rdquo;</li>
<li><var>D</var><sub>2</sub>: &ldquo;the path component of the IRI begins with <code>/foo</code>&ldquo;</li>
</ul>
<p>Then, <var>D</var><sub><var>IS</var></sub> will be defined as follows: &ldquo;the top level components of the host component of the IRI exactly match <code>example.org</code>&rdquo; AND &ldquo;the path component of the IRI begins with <code>/foo</code>.&rdquo;</p>
<p>Whether the IRI of a specific resource <var>R</var>, known as the <strong id="candidateResource">candidate resource</strong>, is a member of IRI Set <var>IS</var> or not is determined by comparing its characteristics with those denoted by the set definitions used in <var>D</var><sub><var>IS</var></sub>. It must be an element of the intersection of the sets defined by the interpretation of <var>D</var><sub>1</sub>, <var>D</var><sub>2</sub>, &hellip;, <var>D</var><sub><var>n</var></sub> to be an element of <var>IS</var>. </p>
<p>If an IRI set definition contains no constraints, then its interpretation is by definition the empty set &empty;. Formally:</p>
<blockquote>
<p>Let <var>IS</var> be an IRI Set, and let <var>D</var><sub><var>IS</var></sub> be the set of IRI Set definitions denoting the IRIs in <var>IS</var>: if <var>D</var><sub><var>IS</var></sub> = &empty;, then <var>IS</var> = &empty;.</p>
</blockquote>

<h3 id="operationalSemantics">1.3 Operational Semantics</h3>
<p>The POWDER XML schema [<a href="#wdr">WDR</a>] defines the set of XML elements and attributes to be used for enforcing the operational semantics of an IRI set definition.</p>
<p>More precisely, we define an XML element <code><a href="#iriset">iriset</a></code> to take the place of the IRI set, and its child elements denote the set of IRI constraints <var>C</var><sub>1</sub>, <var>C</var><sub>2</sub>, &hellip;, <var>C</var><sub><var>n</var></sub>. The example reported in the previous section can therefore be written as follows:</p>
<div class="example" id="eg1-1">
<p class="caption">Example 1-1: A simple IRI Set definition</p>
<pre>
&lt;iriset&gt;
  &lt;includehosts&gt;example.org&lt;/includehosts&gt;
  &lt;includepathstartswith&gt;/foo&lt;/includepathstartswith&gt;
&lt;/iriset&gt;
</pre>
</div>

<h3 id="formalSemantics">1.4 Formal Semantics</h3>
<p>The operational semantics described above are underpinned by formal semantics. A GRDDL [<a href="#grddl">GRDDL</a>] transform 
is associated with the POWDER namespace that allows the XML data to be rendered and processed as RDF/OWL with one important 
proviso &mdash; that a <strong>semantic extension</strong> is understood. Defined fully in the Formal Semantics document 
[<a href="#formal">FORMAL</a>], this allows a candidate resource's IRI to be matched against regular expressions that are values 
of an OWL data type property <code>wdrs:matchesregex</code> <span id="notmatch1">(or <code>wdrs:notmatchesregex</code> in 
the case of patterns that are to be excluded). <span id="ofres">An OWL class takes</span> the place of the IRI set 
and resources whose IRIs match all the property restrictions defined using <code>wdrs:matchesregex</code> and <code>wdrs:notmatchesregex</code> </span>are 
instances of that class. The regular expression syntax used is defined by XML schema as modified by XQuery 1.0 and 
XPath 2.0 Functions and Operators [<a href="#xqxp">XQXP</a>].</p>
<p>As shown in <a href="#eg1-1">Example 1-1</a> above, the POWDER XML elements generally take strings as values. These are converted 
into regular expressions as a first step in the GRDDL transform which renders POWDER documents in an intermediate format known as 
POWDER-BASE. It is POWDER-BASE that is then transformed into POWDER-S. For clarity, this two-stage process is not referred to in 
the main section of this document on defining a resource set which only presents POWDER and POWDER-S examples. POWDER-BASE 
is, however, an important part of the <a href="#extension">extension mechanism</a> of POWDER Resource Grouping. The Formal 
Semantics document gives full details of the transformation of all elements of POWDER documents to POWDER-BASE and POWDER-S.</p>
<p>The result of the GRDDL transformation on <a href="#eg1-1">Example 1-1</a> above is shown below.</p>
<div class="example" id="eg1-2">
<p class="caption">Example 1-2: The POWDER-S encoding of <a href="#eg1-1">Example 1-1</a></p>
<pre>
&lt;owl:Class rdf:nodeID=&quot;iriset_1&quot;&gt;
  &lt;owl:equivalentClass&gt;
    &lt;owl:Class&gt;
      &lt;owl:intersectionOf rdf:parseType=&quot;Collection&quot;&gt;
        &lt;owl:Restriction&gt;
          &lt;owl:onProperty rdf:resource=&quot;http://www.w3.org/2007/05/powder-s#matchesregex&quot; /&gt;
          &lt;owl:hasValue rdf:datatype=&quot;http://www.w3.org/2001/XMLSchema-datatypes#string&quot;&gt;(([^\/\?\#]*)\@)?([^\:\/\?\#\@]+\.)?(example\.org)(:([0-9]+))?\/&lt;/owl:hasValue&gt;
        &lt;/owl:Restriction&gt;
        &lt;owl:Restriction&gt;
          &lt;owl:onProperty rdf:resource=&quot;http://www.w3.org/2007/05/powder-s#matchesregex&quot; /&gt;
          &lt;owl:hasValue rdf:datatype=&quot;http://www.w3.org/2001/XMLSchema-datatypes#string&quot;&gt;(([^\/\?\#]*)\@)?([^\:\/\?\#\@]*)(\:([0-9]+))?(\/foo)&lt;/owl:hasValue&gt;
        &lt;/owl:Restriction&gt;
      &lt;/owl:intersectionOf&gt;
    &lt;/owl:Class&gt;
  &lt;/owl:equivalentClass&gt;
&lt;/owl:Class&gt;
</pre>
</div>

<h2 id="byIRI">2 Defining a Resource Set</h2>
<p>A Resource Set is defined in terms of the IRIs of resources that are its members. Determining whether a candidate resource is, or is not, a member of the set, can therefore be done by comparing its IRI with the data in the set definition. Importantly, defining the Resource Set in terms of IRIs allows us to verify whether the candidate resource is in the set without having to fetch and parse it, or perform a DNS lookup, thus maximizing processing efficiency in many environments.</p>
<p id="oldandnew">We define a range of methods to support set definition by IRI, and provide support for methods defined in other Recommendations.</p>
<h3 id="byIRIcomp">2.1 Constraints on IRI components</h3>
<p>The syntax of an IRI, as defined in RFC&nbsp;3987 <a href="#ref-iri">[IRIS]</a>, provides a generic framework for identification schemes that goes beyond what is demanded by the POWDER use cases [<a href="#usecases">USECASES</a>]. We therefore limit our work to IRIs with the syntax: <code>scheme://iuser@ihost:port/ipath?iquery#ifragment</code>, as shown below:</p>
<div class="example" id="uriComponents" style="width:52em; margin:0 auto">
<pre>
http://jdoe@www.example.com:1234/example1/example2?query=help#fragment
\  /   \  / \             / \  /\                / \        / \      /
 --     --   -------------   --  ----------------   --------   ------ 
  |      |         |          |         |              |        |
scheme iuser    ihost        port     ipath         iquery  ifragment
       info 
</pre>
</div>
<p>The following Regular Expression, elaborated from that offered in RFC&nbsp;3986 [<a href="#jo">Rabin</a>],  provides a means of splitting both URIs and IRIs of this type into their component parts.</p>
<div id="rabinsRegEx">
<pre>
<code>^(([^:/?#]+):)?(//((([^/?#]*)@)?([^/?#:]*)(:([^/?#]*))?))?([^?#]*)(\?([^#]*))?(#(.*))?</code>
</pre></div>
<p id="jjcValid">If the IRI of the candidate resource is valid, this yields the components as shown in <a href="#table2">Table 2</a> (strings that are not valid IRIs will inevitably lead to unpredictable results).</p>
<table class="vocab" id="table2">
  <caption>Table 2: Mapping between regular expression variables and IRI components</caption>
  <thead>
    <tr><th>Component</th><th>RE variable</th></tr>
  </thead>
  <tbody>
    <tr><td><code>scheme</code></td> <td><code>$2</code></td></tr>
    <tr><td><code>iuserinfo</code></td>  <td><code>$6</code></td></tr>
    <tr><td><code>ihost</code></td>  <td><code>$7</code></td></tr>
    <tr><td><code>port</code></td>   <td><code>$9</code></td></tr>
    <tr><td><code>ipath</code></td>  <td><code>$10</code></td></tr>
    <tr><td><code>iquery</code></td>     <td><code>$12</code></td></tr>
    <tr><td><code>ifragment</code></td>  <td><code>$14</code></td></tr>
  </tbody>
</table>
<p>For the scheme, ihost, port, ipath, and iquery IRI components we define corresponding IRI constraints, 
the value of most of which is a white space-separated list of strings, any one of which must match 
the relevant portion of the IRI of the candidate resource. 
<span id="tlr1">The exceptions are the iquery and ihost components, which we discuss further in sections 
<a href="#query">2.1.2</a> and <a href="#idnCanon">2.1.3</a></span>.</p>

<p>The iuserinfo and ifragment components are not used in POWDER IRI set definitions directly as it is felt that these may add a layer of unnecessary complexity with few practical applications. That said, it is important not to discard these components when processing the candidate resource's IRI. Furthermore, IRI sets may be defined using additional vocabularies as set out in <a href="#extension">Section 3</a>. That extension method, or the use of the <code>includeregex</code> and <code>excluderegex</code> properties (see <a href="#reMatch">Section 2.3</a> below), means that user info and fragments can be used in IRI set definitions if required.</p>
<p>Formally, an IRI set definition <var>D</var> is expressed by one or more IRI constraints of the form <var>C</var> = <var>IRI_component_matches</var>(?<var>x</var>, {<var>string</var><sub>1</sub> | <var>string</var><sub>2</sub> | &hellip; | <var>string</var><sub><var>n</var></sub>}), where ?<var>x</var> is a variable denoting the IRI component under consideration, and {<var>string</var><sub>1</sub> | <var>string</var><sub>2</sub> | &hellip; | <var>string</var><sub><var>n</var></sub>} denotes a set consisting either of string <var>string</var><sub>1</sub> OR <var>string</var><sub>2</sub> OR &hellip; OR <var>string</var><sub><var>n</var></sub>.</p>
<p>Any number of IRI constraints <var>C</var><sub>1</sub>, <var>C</var><sub>2</sub>, &hellip;, <var>C</var><sub><var>n</var></sub> can be declared, and, as stated in <a href="#methOutline">Section 1.2</a>, the overall IRI set is the intersection of the sets that can be interpreted from IRI set definitions corresponding to <var>C</var><sub><var>n</var></sub>. With some exceptions, each particular IRI constraint can only appear 0 or 1 times.</p>
<p>Strings are matched according to one of four rules:</p>
<ul>
<li><code>startsWith</code>, meaning that the IRI component starts with any of the strings listed in the value of the relevant IRI constraint;</li>
<li><code>endsWith</code>, meaning that the IRI component ends with any of the strings listed in the value of the relevant IRI constraint;</li>
<li><code>exact</code>, meaning that there is an exact match between the candidate IRI component and at least one of the strings listed in the value of the relevant IRI constraint;</li>
<li><code>contains</code>, meaning that at least one of the strings listed in the value of the relevant IRI constraint appears somewhere in the IRI component.</li>
</ul>
<p>Recognizing the great diversity of potential uses and set definition requirements, multiple IRI constraints are defined relating to the path component. Furthermore, for each constraint there is a &lsquo;negative&rsquo; constraint, that is, a constraint whose value is a list of strings that must not be present in the relevant IRI component.</p>
<table class="vocab" id="table3">
  <caption>Table 3: Basic IRI constraints used to define IRI sets. These and other elements introduced in 
    subsequent sections are summarized in the <a href="#appA">Appendix</a>.</caption>
  <thead>
  <tr>
    <th>IRI constraint</th>
    <th>IRI component</th>
    <th>Matching rule</th>
    <th>Negative constraint</th>
  </tr>
  </thead>
  <tbody>
  <tr>
    <td><code><a href="#includeschemes">includeschemes</a></code></td>
    <td><code>scheme</code></td>
    <td><code>exact</code></td>
    <td><code><a href="#excludeschemes">excludeschemes</a></code></td>
  </tr>
  <tr>
    <td><code><a href="#includehosts">includehosts</a></code></td>
    <td><code>ihost</code></td>
    <td><code>endsWith</code></td>
    <td><code><a href="#excludehosts">excludehosts</a></code></td>
  </tr>
  <tr>
    <td><code><a href="#includeexactpaths">includeexactpaths</a></code></td>
    <td rowspan="4"><code>ipath</code></td>
    <td><code>exact</code></td>
    <td><code><a href="#excludeexactpaths">excludeexactpaths</a></code></td>
  </tr>
  <tr>
    <td><code><a href="#includepathcontains">includepathcontains</a></code> <a href="#dDagger">&dagger;</a></td>
    <td><code>contains</code></td>
    <td><code><a href="#excludepathcontains">excludepathcontains</a></code></td>
  </tr>
  <tr>
    <td><code><a href="#includepathstartswith">includepathstartswith</a></code></td>
    <td><code>startsWith</code></td>
    <td><code><a href="#excludepathstartswith">excludepathstartswith</a></code></td>
  </tr>
  <tr>
    <td><code><a href="#includepathendswith">includepathendswith</a></code></td>
    <td><code>endsWith</code></td>
    <td><code><a href="#excludepathendswith">excludepathendswith</a></code></td>
  </tr>
  <tr>
    <td><code><a href="#includeports">includeports</a></code></td>
    <td><code>port</code></td>
    <td><code>exact</code></td>
    <td><code><a href="#excludeports">excludeports</a></code></td>
  </tr>
  </tbody>
</table>
<div class="blockquote" style="margin-top:1em"><a name="dDagger" id="dDagger">&dagger;</a> <code>includepathcontains</code> may appear any number of times within an IRI set definition, so that it is easy to create one in which multiple strings must be present in paths. This is in contrast to all other terms in <a href="#table3">Table 3</a> which can only occur 0 or 1 times, since the IRI of a candidate resource can only have one scheme, one host etc.</div>
<p>As a quick example, the set of all resources on <code>example.org</code>, whether fetched using specifically <code>http</code> or <code>https</code>, where the path component of their IRIs starts with <code>foo</code>, and where the path does not end with <code>.png</code> or <code>.jpg</code> is defined thus:</p>
<div class="example" id="eg2-1">
<p class="caption">Example 2-1: An IRI Set definition using four IRI constraints</p>
<pre style="text-transform: lowercase">
&lt;iriset&gt;
  &lt;includeSchemes&gt;http https&lt;/includeSchemes&gt;
  &lt;includeHosts&gt;example.org&lt;/includeHosts&gt;
  &lt;includePathStartsWith&gt;/foo&lt;/includePathStartsWith&gt;
  &lt;excludePathEndsWith&gt;.png .jpg&lt;/excludePathEndsWith&gt;
&lt;/iriset&gt; 
</pre>
</div>
<p>As outlined in <a href="#formalSemantics">Section 1.4</a>, the POWDER GRDDL transform maps the IRI constraints in <a href="#table3">Table 3</a> to regular expressions against which the candidate IRI can be matched. These are shown in <a href="#table4">Table 4</a> below where <var>var</var> means the value of the XML element following processing as set out in the formal semantics document [<a href="#formal">FORMAL</a>]. In brief this turns white space separated lists of strings into alternative values within the regular expression such that:</p>
<pre>
&lt;includehosts&gt;example.org 
              example.com
&lt;/includehosts&gt;</pre>
<p>becomes</p>
<p><code>(example\.org|example\.com)</code>.</p>
<table class="vocab" id="table4">
  <caption>Table 4. Template regular expressions for IRI constraints that take a white space separated list of values. See <a href="#reMatch">Section 2.3</a>
  for details of the meta character escaping used in these regular expressions.</caption>
  <thead>
  <tr>
    <th>IRI Constraint <br />(<code>include</code> / <code>exclude</code>&hellip;)</th>
    <th>Regular Expression</th>
  </tr>
  </thead>
  <tbody>
  <tr>
    <td><code>schemes</code></td>
    <td><code>^</code><strong><var>var</var></strong><code>\:\/\/</code></td>
  </tr>
  <tr>
    <td><code>hosts</code></td>
    <td><code>\:\/\/(([^\/\?\#]*)\@)?([^\:\/\?\#\@]+\.)?</code><strong><var>var</var></strong><code>(\:([0-9]+))?\/</code></td>
  </tr>
  <tr>
    <td><code>ports</code></td>
    <td><code>\:\/\/(([^\/\?\#]*)\@)?([^\:\/\?\#\@]+\.)*[^\:\/\?\#\@]+\:<strong><var>var</var></strong>\/</code></td>
  </tr>
  <tr>
    <td><code>exactpaths</code></td>
    <td><code>\:\/\/(([^\/\?\#]*)\@)?([^\:\/\?\#\@]*)(\:([0-9]+))?</code><strong><var>var</var></strong><code>($|\?|\#)</code></td>
  </tr>
  <tr>
    <td><code>pathcontains</code></td>
    <td><code>\:\/\/(([^\/\?\#]*)\@)?([^\:\/\?\#\@]*)(\:([0-9]+))?\/[^\?\#]*</code><strong><var>var</var></strong><code>[^\?\#]*[\?\#]?</code></td>
  </tr>
  <tr>
    <td><code>pathstartswith</code></td>
    <td><code>\:\/\/(([^\/\?\#]*)\@)?([^\:\/\?\#\@]*)(\:([0-9]+))?</code><strong><var>var</var></strong></td>
  </tr>
  <tr>
    <td><code>pathendswith</code></td>
    <td><code>\:\/\/(([^\/\?\#]*)\@)?([^\:\/\?\#\@]*)(\:([0-9]+))?\/[^\?\#]*</code><strong><var>var</var></strong><code>($|\?|\#)</code></td>
  </tr>
  </tbody>
</table>
<p>These template regular expressions may be useful in processing POWDER documents directly but other methods of determining whether a candidate IRI does or does not match a particular constraint are equally valid.</p>
<p><a href="#eg2-2">Example 2-2</a> below uses the regular expressions from <a href="#table4">Table 4</a> in the POWDER-S version of <a href="#eg2-1">Example 2-1</a>.</p>
<div class="example" id="eg2-2">
<p class="caption">Example 2-2: The IRI Set defined in <a href="#eg2-1">Example 2-1</a> encoded in POWDER-S</p>
<pre>
1  &lt;owl:Class rdf:nodeID=&quot;iriset_1&quot;&gt;
2    &lt;owl:equivalentClass&gt;
3      &lt;owl:Class&gt;
4        &lt;owl:intersectionOf rdf:parseType=&quot;Collection&quot;&gt;
5          &lt;owl:Restriction&gt;
6            &lt;owl:onProperty rdf:resource=&quot;http://www.w3.org/2007/05/powder-s#matchesregex&quot; /&gt;
7            &lt;owl:hasValue rdf:datatype=&quot;http://www.w3.org/2001/XMLSchema-datatypes#string&quot;&gt;^(http|https)\:\/\/&lt;/owl:hasValue&gt;
8          &lt;/owl:Restriction&gt;
9          &lt;owl:Restriction&gt;
10           &lt;owl:onProperty rdf:resource=&quot;http://www.w3.org/2007/05/powder-s#matchesregex&quot; /&gt;
11           &lt;owl:hasValue rdf:datatype=&quot;http://www.w3.org/2001/XMLSchema-datatypes#string&quot;&gt;(([^\/\?\#]*)\@)?([^\:\/\?\#\@]+\.)?(example\.org)(:([0-9]+))?\/&lt;/owl:hasValue&gt;
12         &lt;/owl:Restriction&gt;
13         &lt;owl:Restriction&gt;
14           &lt;owl:onProperty rdf:resource=&quot;http://www.w3.org/2007/05/powder-s#matchesregex&quot; /&gt;
15           &lt;owl:hasValue rdf:datatype=&quot;http://www.w3.org/2001/XMLSchema-datatypes#string&quot;&gt;(([^\/\?\#]*)\@)?([^\:\/\?\#\@]*)(\:([0-9]+))?(\/foo)&lt;/owl:hasValue&gt;
16         &lt;/owl:Restriction&gt;
17         &lt;owl:Restriction&gt;
18           &lt;owl:onProperty rdf:resource=&quot;http://www.w3.org/2007/05/powder-s#notmatchesregex&quot; /&gt;
19           &lt;owl:hasValue rdf:datatype=&quot;http://www.w3.org/2001/XMLSchema-datatypes#string&quot;&gt;(([^\/\?\#]*)\@)?([^\:\/\?\#\@]*)(\:([0-9]+))?\/[^\?\#]*(\.png|\.jpg)($|\?|\#)&lt;/owl:hasValue&gt;
20         &lt;/owl:Restriction&gt;
21       &lt;/owl:intersectionOf&gt;
22     &lt;/owl:Class&gt;
23   &lt;/owl:equivalentClass&gt;
24 &lt;/owl:Class&gt;
</pre>
</div>
<p id="notmatch2">Note the use of <code>notmatchesregex</code> in line 18 to encode the <code>excludepathendswith</code> element.</p>

<h4 id="ports">2.1.1 IRI Constraints Referring to Ports: <code>includeports</code> and <code>excludeports</code></h4>
<p>Although ports are clearly integers, POWDER treats them as a string in the same way as the other constraints in <a href="#table3">Table 3</a>. Port ranges (such as 80-100) are not supported but note that the value of <code>includeports</code> and <code>excludeports</code> is a white space separated list so that multiple ports may be enumerated.</p>

<h4 id="query">2.1.2 IRI Constraints Referring to Queries: <code>includequerycontains</code> and <code>excludequerycontains</code></h4>
<p>Query strings typically contain a series of name-value pairs separated by ampersands thus:</p>
<p><code>?name<sub>1</sub>=value<sub>1</sub>&amp;name<sub>2</sub>=value<sub>2</sub></code></p>
<p>These are usually acted on by the server to generate content in real time and the order of the name-value pairs is unimportant. For practical purposes <code>?name<sub>1</sub>=value<sub>1</sub>&amp;name<sub>2</sub>=value<sub>2</sub></code> is equivalent to <code>?name<sub>2</sub>=value<sub>2</sub>&amp;name<sub>1</sub>=value<sub>1</sub></code>. As a result, a significant amount of processing must be done to determine whether or not a candidate IRI is or is not an element of an IRI set that includes either the <code><a href="#includequerycontains">includequerycontains</a></code> or <code><a href="#excludequerycontains">excludequerycontains</a></code> IRI Constraints.</p>
<p id="cardinality">To keep such processing manageable, the <code>includequerycontains</code> and <code>excludequerycontains</code> 
IRI Constraints take a <strong>single value</strong> not a white space separated list of values. Including two or more 
name/value pairs in an <code>includequerycontains</code> or <code>excludequerycontains</code> means that <em>all</em> those 
pairs must be present in the query string for the candidate IRI to be a member of the set. 
<a href="#conj-disj">Section 2.6</a> includes a further discussion on creating unions of multiple IRI sets 
where it is necessary to create a set defined in terms of alternative name/value pairs.</p>
<p>By default, the POWDER GRDDL transform assumes that the delimiting character in a query string is the ampersand (<code>&amp;</code>). However, an alternative delimiter can be specified as the value for the <code>delimiter</code> attribute on <code>includequerycontains</code> and <code>excludequerycontains</code> constraints. <a href="#eg2-3">Example 2-3</a> below shows this.</p>
<div class="example" id="eg2-3">
<p class="caption">Example 2-3: An IRI Set definition using <code>includequerycontains</code></p>
<pre>
&lt;iriset&gt;
  &lt;includehosts&gt;socialnetwork.example.com&lt;/includehosts&gt;
  &lt;includequerycontains delimiter=&quot;,&quot;&gt;id=abcdef,group=12345&lt;/includequerycontains&gt;
&lt;/iriset&gt;
</pre></div>
<p>The GRDDL transform splits the value provided for the <code>includequerycontains</code> or <code>excludequerycontains</code> IRI Constraints into its constituent pairs at the delimiting character and the presence of each name-value pair within the candidate IRI is then tested for independently. The template regular expression for such a test is:</p>
<p><code>\:\/\/(([^\/\?\#]*)\@)?([^\:\/\?\#\@]*)(\:([0-9]+))?\/[^\?\#]*\?([^\#]*<strong><var>d</var></strong>)?<strong><var>q</var></strong>(<strong><var>d</var></strong>|$)</code></p>
<p>Where <var>d</var> is the delimiting character and <var>q</var> is the name-value pair. The Formal Semantics document [<a href="#formal">FORMAL</a>] sets this out in more detail.</p>
<p>An important consequence of this processing model is that within the query string, only complete name-value pairs or value-less parameters are matched. More precisely, only complete query conjuncts in the query string are matched. As complete query conjuncts we consider any minimal substring of the query string that has <code>?</code> or <strong><var>d</var></strong> before the first character and <strong><var>d</var></strong> or <code>$</code> after the last character, where, as in the template regular expression, <strong><var>d</var></strong> is the query delimiter and <code>$</code> is the end-of-string.</p>
<p>If the value of includequerycontains in <a href="#eg2-3">Example 2-3</a> were changed to simply <code>abcdef</code> (rather than <code>id=abcdef,group=12345</code>) then: </p>
<ul>
  <li><code>http://socialnetwork.example.com/?group=12345,id=abcdef</code> <strong>would not</strong> match, even though the query does contain the string <code>abcdef</code></li>
  <li><code>http://socialnetwork.example.com/?abcdef</code> <strong>would</strong> match.</li>
  <li><code>http://socialnetwork.example.com/?abcdef=ijklm</code> <strong>would not</strong> match</li>
</ul>
<p>Again, a POWDER processor may use alternative methods to determine whether a given name-value pair is present in a candidate IRI but the template regular expression is used in the GRDDL transform to generate the POWDER-S shown in <a href="#eg2-4">Example 2-4</a>. Notice that the pre-processing described here allows POWDER-S to use the same restriction on the <code>wdrs:matchesregex</code> data property as the other elements in <a href="#table3">Table 3</a>.</p>
<div class="example" id="eg2-4">
<p class="caption">Example 2-4: The IRI Set defined in <a href="#eg2-3">Example 2-3</a> encoded in POWDER-S</p>
<pre>
&lt;owl:Class rdf:nodeID=&quot;iriset_1&quot;&gt;
  &lt;owl:equivalentClass&gt;
    &lt;owl:Class&gt;
      &lt;owl:intersectionOf rdf:parseType=&quot;Collection&quot;&gt;
        &lt;owl:Restriction&gt;
          &lt;owl:onProperty rdf:resource=&quot;http://www.w3.org/2007/05/powder-s#matchesregex&quot; /&gt;
          &lt;owl:hasValue rdf:datatype=&quot;http://www.w3.org/2001/XMLSchema-datatypes#string&quot;&gt;(([^\/\?\#]*)\@)?([^\:\/\?\#\@]+\.)?(socialnetwork\.example\.com)(:([0-9]+))?\/&lt;/owl:hasValue&gt;
        &lt;/owl:Restriction&gt;
        &lt;owl:Restriction&gt;
          &lt;owl:onProperty rdf:resource=&quot;http://www.w3.org/2007/05/powder-s#matchesregex&quot; /&gt;
          &lt;owl:hasValue rdf:datatype=&quot;http://www.w3.org/2001/XMLSchema-datatypes#string&quot;&gt;(([^\/\?\#]*)\@)?([^\:\/\?\#\@]*)(\:([0-9]+))?\/[^\?\#]*\?([^\#]*,)?id=abcdef(,|$)&lt;/owl:hasValue&gt;
        &lt;/owl:Restriction&gt;
        &lt;owl:Restriction&gt;
          &lt;owl:onProperty rdf:resource=&quot;http://www.w3.org/2007/05/powder-s#matchesregex&quot; /&gt;
          &lt;owl:hasValue rdf:datatype=&quot;http://www.w3.org/2001/XMLSchema-datatypes#string&quot;&gt;(([^\/\?\#]*)\@)?([^\:\/\?\#\@]*)(\:([0-9]+))?\/[^\?\#]*\?([^\#]*,)?id=abcdef(,|$)&lt;/owl:hasValue&gt;
        &lt;/owl:Restriction&gt;
      &lt;/owl:intersectionOf&gt;
    &lt;/owl:Class&gt;
  &lt;/owl:equivalentClass&gt;
&lt;/owl:Class&gt;
</pre></div>


<h4 id="idnCanon">2.1.3 IRI constraints referring to internationalized domain names: <code>includehosts/excludehosts </code></h4>
<p>Equivalence of internationalized domain names is defined in terms of ASCII case insensitive 
comparison of the output of the <code>ToASCII</code> operation (section 2 of [<a href="#rfc3490">RFC 3490</a>]).</p>

<p>If a regular expression is constructed to match the ihost component of an IRI, then it MUST 
be constructed to match the ASCII representation of that component.</p>

<p>Specifically, to generate a regular expression from an <code>includehosts</code> or <code>excludehosts</code> constraint, 
the value of each space separated token under the <code>ToASCII</code> operation, with the 
<code>UseSTD3ASCIIRules</code> flag unset and the <code>AllowUnassigned</code> flag set, MUST 
be used. [<a href="#rfc3490">RFC 3490</a>].  Behavior if the <code>ToASCII</code> operation fails is undefined.</p>

<p>When internationalized (non-ASCII) domain names appear in the values of the <code>includehosts</code>
and <code>excludehosts</code> constraints, they SHOULD be represented either as the output of a successful 
<code>ToASCII</code> operation, or as a string of Unicode characters that is mapped to itself when the 
<code>ToASCII</code> and <code>ToUnicode</code> operations are successfully applied, in this order.
(Note, in particular, that behavior for these strings is well-defined, since the <code>ToASCII</code>
operation must succeed by definition.)</p>


<h4 id="canon">2.1.4 IRI/URI Canonicalization</h4>

<p>Before determining whether an IRI is or is not a member of a given set, it is important to make sure that the
IRI is represented in the same way as the data against which it is being matched. We set out below a series 
of steps that are designed to help processors make a best effort in this regard, which are consistent with 
RFC&nbsp;3986 [<a href="#ref-uri">URIS</a>], RFC&nbsp;3987 [<a href="#ref-iri">IRIS</a>] and  
URISpace [<a href="#urispace">URISpace</a>].</p>

<h5 id="canon-encoding">2.1.4.1 Character encoding</h5>
<ul>
  <li>If not already so encoded, the IRI character string is converted into a sequence of Unicode [<a href="#unicode">UNICODE</a>] characters.</li>
  <li>Percent encoded triples are converted into the characters they represent (e.g. 
  <code>%c3%a7</code> becomes <code>&ccedil;</code> etc.). Note the hexadecimal digits are case-insensitive. 
  However space characters (%20) and reserved characters as per Section 2.2 of RFC 3986 [<a href="#ref-uri">URIS</a>] 
  must not be converted to literals, as that may invalidate the URI/IRI - the reason a URI would contain 
  (for example) <code>%2F</code> instead of <code>/</code> would be to distinguish between a literal <code>/</code>, 
  such as in 'his/hers', and the <code>/</code> which is used as a path separator.</li>
</ul>
<table class="vocab" id="table5">
  <caption>Table 5: Examples of percent-encoding conversion</caption>
  <thead>
  <tr>
    <th>Input IRI/URI</th>
    <th>Canonical form</th>
  </tr>
  </thead>
  <tbody>
  <tr>
    <td><code>http://example.com/staff/Fran%c3%a7ois</code></td>
    <td><code>http://www.example.com/staff/Fran&ccedil;ois</code></td>
  </tr>
  <tr>
    <td><code>http://example.com/my%20doc.doc</code></td>
    <td><code>http://www.example.com/my doc.doc</code></td>
  </tr>
  <tr>
    <td colspan="2">In this next example the <code>%2F</code> is a literal slash, not a path separator, and so is left as <code>%2F</code></td>
  </tr>
  <tr>
    <td><code>http://www.example.com/foo/his%2Fhers</code></td>
    <td><code>http://www.example.com/foo/his%2Fhers</code></td>
  </tr>
  </tbody>
</table>
<ul>
  <li>Normalize to Form C, as defined in Character Model for the World Wide Web 1.0: Normalization [<a href="#charmod-norm">CHARMOD-NORM</a>].</li>
</ul>


<h5 id="canon-defaults">2.1.4.2 Default Values and Case Folding</h5>
<ul>
  <li>Where the <code>authority</code> is present, but the <code>scheme</code> is absent, the <code>scheme</code> 
	should default to <code>http</code>.</li>
  <li>If the <span id="comment_km"><code>path</code></span> is absent, a <code>path</code> of <code>/</code> is appended.</li>
  <li>Trailing <code>.</code> characters in the <code>host</code> are removed, i.e. <code>http://www.example.com./</code> becomes <code>http://www.example.com/</code></li>
  <li id="tlr3">If the host string does not completely consist of ASCII characters, apply the 
      <code>ToASCII</code> operation to the <code>host</code> string, with the <code>UseSTD3ASCIIRules</code> flag unset and 
      the <code>AllowUnassigned</code> flag set [<a href="#rfc3490">RFC 3490</a>].
      Note that behavior if the <code>ToASCII</code> operation fails is undefined.</li>
  <li id="tlr2">Change the <code>scheme</code> and <code>host</code> strings to ASCII lowercase.</li>
  <li>If the <code>port</code> is specified, but it is the default port for the scheme, it is removed.</li>
</ul>
<p>The following table gives some examples.</p>
<table class="vocab" id="table6">
  <caption><a name="table-canonicalURIs" id="table-canonicalURIs">Table 6: Examples of canonicalized IRIs using defaults</a></caption>
  <thead>
    <tr><th>Input IRI</th><th>Canonical form</th></tr>
  </thead>
  <tbody>
    <tr><td><code>www.example.com</code></td><td><code>http://www.example.com/</code></td></tr>
    <tr><td><code>http://www.example.com</code></td><td><code>http://www.example.com/</code></td></tr>
    <tr><td><code>HTTPS://WWW.EXAMPLE.COM/FOO</code></td><td><code>https://www.example.com/FOO</code></td></tr>
    <tr><td><code>http://www.example.com./foo</code></td><td><code>http://www.example.com/foo</code></td></tr>
    <tr><td><code>http://www.example.com:80/foo</code></td><td><code>http://www.example.com/foo</code></td></tr>
    <tr><td id="tlr4"><code>http://sigma&#0963;.example.org/</code></td><td><code>http://xn--sigma-kde.example.org </code></td></tr>
  </tbody>
</table>

<h4 id="dataEnc">2.1.5 Data encoding</h4>
<p>To complement the IRI canonicalization steps described in the previous section, related processing steps 
	must also be carried out on the strings supplied as set defining data. The following steps should therefore 
	be applied to each item in the list separately.</p>
<ul>
  <li>If not already so encoded, the strings are converted into a sequence of Unicode characters.</li>
  <li>With the exception of spaces and the reserved characters defined in Section 2.2 of RFC&nbsp;3986 
	[<a href="#ref-uri">URIS</a>], percent encoded triples are converted into the characters they represent.</li>
  <li>Normalize to Form C, as defined in Character Model for the World Wide Web 1.0: Normalization [<a href="#charmod-norm">CHARMOD-NORM</a>].</li>
  <li>If the data relates to the <code>host</code>, trailing <code>.</code> characters are removed.</li>
  <li id="tlr5">If the data relates to the <code>host</code>, and does not completely consist of ASCII characters, 
	the <code>ToASCII</code> operation is applied as described in <a href="#idnCanon">Section 2.1.3</a></li>
  <li id="tlr6">If the data relates to the <code>scheme</code> or <code>host</code>, it is normalized to 
	ASCII lower case.</li>
  <li>Any values given for the IRI constraints <code>includepathstartswith</code>, <code>excludepathstartswith</code>, <code>includeexactpaths</code> or <code>excludeexactpaths</code> must begin with the <code>/</code> character which is pre-pended if absent.</li>
</ul>
<p>If the IRI set definition includes values related to the <code>port</code> then matching of the data against the candidate resource's IRI must be carried out as follows:</p>
<ul>
  <li>If the set definition includes the IRI constraint <code>includeports</code> then, when matching, if the default port for the candidate resource's IRI is present in the list of supplied values, but the candidate resource's IRI does not specify the port, the candidate resource IS an element of the set IF all other conditions are met.</li>
  <li>If the set definition includes the IRI constraint <code>excludeports</code> then, when matching, if the default port for the candidate resource's IRI is present in the list of supplied values, but the candidate resource's IRI does not specify the port, the candidate resource is NOT an element of the IRI Set.</li>
</ul>

<h3 id="wild">2.2 Grouping using Wildcards: The <code>includeiripattern</code> and <code>excludeiripattern</code> IRI constraints</h3>

<p id="wafUpdate">It is anticipated that resource groups will typically be defined in terms of the 
domains and sub domains from which they are available. In order to provide as much flexibility as possible 
in this regard, the <code><a href="#includeiripattern">includeiripattern</a></code> and 
<code><a href="#excludeiripattern">excludeiripattern</a></code> properties allow domains and sub-domains 
to be substituted by a wildcard character (*) according to the following EBNF:</p>

<div><pre>
iri-pattern    ::= (scheme "://")? domain-pattern (":" port)? | "*"
domain-pattern ::= domain | "*." domain</pre>
</div>

<p><code>scheme</code> and <code>port</code> are used as defined in 
RFC&nbsp;3986 [<a href="#ref-uri">URIS</a>]. <code>domain</code> is an internationalized 
domain name as defined in RFC&nbsp;3490 [<a href="#rfc3490">RFC3490</a>].</p>
<p>It follows that:</p>
<p><code>&lt;includehosts&gt;example.com&lt;/includehosts&gt;</code></p>
<p>and</p>
<p><code>&lt;includeiripattern&gt;example.com&lt;/includeiripattern&gt;</code></p>
<p>are equivalent. However, <code>*.example.com</code>, meaning resources on sub-domains of <code>example.com</code> but not on <code>example.com</code> itself, is not a valid value for <code>includehosts</code>.</p>
<p>In contrast to the IRI constraints shown in <a href="#table3">Table 3</a>, <code>includeiripattern</code> and <code>excludeiripattern</code> take a single pattern, <strong>not</strong> a white space separated list of values. Note that paths and query strings MUST NOT be included in the pattern. If these are required in an IRI set definition, the relevant IRI constraints from <a href="#table3">Table 3</a> can be used.</p>
<p>Any processing method that accurately tests a candidate IRI against the value of an <code>includeiripattern</code> or 
<code>excludeiripattern</code> element is valid but the POWDER GRDDL transform does it in the same way as the other 
IRI constraints, namely by creating a restriction on <span id="nomatch3">the 
<code>wdrs:matchesregex</code> and <code>wdrs:notmatchesregex</code> properties</span> as shown 
in the example below. Full details of the transformation are provided in the Formal Semantics 
document [<a href="#formal">FORMAL</a>] <span id="tlr7">however it is worth noting here once again that 
if a regular expression is constructed to match the <code>host</code> component of an IRI, then 
it MUST be constructed to match the ASCII representation of that component.</span></p>
<div class="example" id="eg2-5">
<p class="caption">Example 2-5: An IRI Set defined using the <code>includeiripattern</code> and 
<code>excludeiripattern</code> constraints</p>
<p>POWDER</p>
<pre>
&lt;iriset&gt;
  &lt;includeiripattern&gt;http://example.org&lt;/includeiripattern&gt;
  &lt;excludeiripattern&gt;search.example.com:81&lt;/excludeiripattern&gt;
&lt;/iriset&gt;
</pre>
<p>POWDER-S</p>
<pre>
&lt;owl:Class rdf:nodeID=&quot;iriset_1&quot;&gt;
  &lt;owl:equivalentClass&gt;
    &lt;owl:Class&gt;
      &lt;owl:intersectionOf rdf:parseType=&quot;Collection&quot;&gt;
        &lt;owl:Restriction&gt;
          &lt;owl:onProperty rdf:resource=&quot;http://www.w3.org/2007/05/powder-s#matchesregex&quot; /&gt;
          &lt;owl:hasValue rdf:datatype=&quot;http://www.w3.org/2001/XMLSchema-datatypes#string&quot;&gt;^http\:\/\/([^\:\/\?\#\@]+\.)<span id="waf1">+</span>example.org(\:[0-9]+)?&lt;/owl:hasValue&gt;
        &lt;/owl:Restriction&gt;
        &lt;owl:Restriction&gt;
          &lt;owl:onProperty rdf:resource=&quot;http://www.w3.org/2007/05/powder-s#notmatchesregex&quot; /&gt;
          &lt;owl:hasValue rdf:datatype=&quot;http://www.w3.org/2001/XMLSchema-datatypes#string&quot;&gt;^[A-Za-z]+\:\/\/([^\:\/\?\#\@]+\.)*search.example.com\:81&lt;/owl:hasValue&gt;
        &lt;/owl:Restriction&gt;
      &lt;/owl:intersectionOf&gt;
    &lt;/owl:Class&gt;
  &lt;/owl:equivalentClass&gt;
&lt;/owl:Class&gt;
</pre></div>

<h3 id="reMatch">2.3 Grouping by Regular Expression: The <code>includeregex</code> and <code>excluderegex</code> IRI constraints</h3>
<p>It is believed that the IRI constraints discussed above will be easy to use and cover the overwhelming 
majority of POWDER use cases. However, the use of strings with fixed matching rules clearly presents a restriction 
on flexibility. To support fully flexible set definition by IRI, the 
<code><a href="#includeregex">includeregex</a></code> and <code><a href="#excluderegex">excluderegex</a></code> 
properties take a Regular Expression and should be applied to the candidate resource's complete IRI (after following 
the <a href="#canon">canonicalization steps</a> above <span id="tlr8">and ensuring that regular expressions are
constructed using the ASCII representation of IRI components related to the <code>host</code> as 
discussed in <a href="#idnCanon">Section 2.1.3</a></span>). <span id="cardinality2">In common with 
<code>includepathcontains</code> and <code>excludepathcontains</code>, and unlike the other IRI constraints, 
<code>includeregex</code> and <code>excluderegex</code> may occur any number of times in an IRI set. This 
follows from the GRDDL transformation associated with POWDER introduced in 
<a href="#formalSemantics">Section 1.4</a>.</span> The same section notes that the regular expression 
syntax used is that defined by XML schema as modified by XQuery 1.0 and XPath 2.0 Functions and 
Operators [<a href="#xqxp">XQXP</a>].</p>

<p>For POWDER-S, the regular expressions are copied verbatim as values 
for <span id="notmatch4">the <code>wdrs:matchesregex</code> and <code>wdrs:notmatchesregex</code> properties</span>.</p>

<p><strong>N.B.</strong> The value of the <code>includeregex</code> and <code>excluderegex</code> properties MUST be a single Regular Expression, <strong>not</strong> a white space-separated list.</p>
<p>As an example, the set of all the resources hosted either by <code>example.org</code> or <code>example.net</code>, where the path component of their IRIs starts either with <code>foo</code> or <code>bar</code>, can be defined thus:</p>
<div class="example" id="eg2-6">
<p class="caption">Example 2-6: IRI set definition by regular expression (not including character escaping)</p>
<p>POWDER:</p>
<pre>
&lt;iriset&gt;
  &lt;includeregex&gt;^(([^:/?#]+):)//([^:/?#]+.)?example.(org|net)/(foo|bar)&lt;/includeregex&gt;
&lt;/iriset&gt; 
</pre>
<p>POWDER-S:</p>
<pre>
&lt;owl:Class rdf:nodeID=&quot;iriset_1&quot;&gt;
  &lt;owl:equivalentClass&gt;
    &lt;owl:Class&gt;
      &lt;owl:intersectionOf rdf:parseType=&quot;Collection&quot;&gt;
        &lt;owl:Restriction&gt;
          &lt;owl:onProperty rdf:resource=&quot;http://www.w3.org/2007/05/powder-s#matchesregex&quot; /&gt;
          &lt;owl:hasValue rdf:datatype=&quot;http://www.w3.org/2001/XMLSchema-datatypes#string&quot;&gt;^(([^:/?#]+):)//([^:/?#]+.)?example.(org|net)/(foo|bar)&lt;/owl:hasValue&gt;
        &lt;/owl:Restriction&gt;
      &lt;/owl:intersectionOf&gt;
    &lt;/owl:Class&gt;
  &lt;/owl:equivalentClass&gt;
&lt;/owl:Class&gt;
</pre>
</div>
<p>It is important to note that <a href="#eg2-6">Example 2-6</a> does not take account of the need to escape certain characters.</p>
<p>The following characters are used as meta characters in regular expressions and MUST therefore be escaped if used in a pattern given as the value of the <code>includeregex</code> property:</p>
<p style="text-align:center"><code>. \ ? * + { } ( ) [ ]</code></p>
<p id="krs">In addition, the &lt; (less than) and &amp; (ampersand) characters MUST always be escaped to &amp;lt; and &amp;amp; as per the XML specification [<a href="#xml">XML</a>].</p>
<p>As a safeguard against unintended consequences, other characters that always or typically have special meaning within IRI strings and/or XML SHOULD also be escaped, namely:</p>
<p style="text-align:center"><code>! " # % ' , - / : ; = > @ [ ] _ ` ~</code></p>
<p>As a result, <a href="#eg2-6">Example 2-6</a> should properly be written as shown in <a href="#eg2-7">Example 2-7</a> below.</p>
<div class="example" id="eg2-7">
<p class="caption">Example 2-7: Set definition by regular expression, including character escaping</p>
<p>POWDER:</p>
<pre>
&lt;iriset&gt;
  &lt;includeregex&gt;^(([^<strong>\</strong>:<strong>\</strong>/<strong>\</strong>?<strong>\</strong>#]+)<strong>\</strong>:)//([^<strong>\</strong>:<strong>\</strong>/<strong>\</strong>?<strong>\</strong>#]+<strong>\</strong>.)?example<strong>\</strong>.(org|net)/(foo|bar)&lt;/includeregex&gt;
&lt;/iriset&gt; 
</pre>
<p>POWDER-S:</p>
<pre>
&lt;owl:Class rdf:nodeID=&quot;iriset_1&quot;&gt;
  &lt;owl:equivalentClass&gt;
    &lt;owl:Class&gt;
      &lt;owl:intersectionOf rdf:parseType=&quot;Collection&quot;&gt;
        &lt;owl:Restriction&gt;
          &lt;owl:onProperty rdf:resource=&quot;http://www.w3.org/2007/05/powder-s#matchesregex&quot; /&gt;
          &lt;owl:hasValue rdf:datatype=&quot;http://www.w3.org/2001/XMLSchema-datatypes#string&quot;&gt;^(([^<strong>\</strong>:<strong>\</strong>/<strong>\</strong>?<strong>\</strong>#]+)<strong>\</strong>:)//([^<strong>\</strong>:<strong>\</strong>/<strong>\</strong>?<strong>\</strong>#]+<strong>\</strong>.)?example<strong>\</strong>.(org|net)/(foo|bar)&lt;/owl:hasValue&gt;
        &lt;/owl:Restriction&gt;
      &lt;/owl:intersectionOf&gt;
    &lt;/owl:Class&gt;
  &lt;/owl:equivalentClass&gt;
&lt;/owl:Class&gt;
</pre>
</div>
<h4 id="safe">2.3.1 Safe Use of <code>includeregex</code></h4>
<p><a href="#eg2-7">Example 2-7</a> uses a modified version of the regular expression given <a href="#byIRIcomp">Section 2.1</a>. This is the safest method but is not, perhaps, the most natural way to proceed. If a less rigorous approach is taken it is easy to make mistakes when specifying regular expressions, and incorrect regular expressions in set definitions will have one of two possible (and obvious) consequences</p>
<ol>
  <li>the corresponding set does not include the intended resources;</li>
  <li>the corresponding set includes resources not intended to be included.</li>
</ol>
<p><a href="#eg2-8">Example 2-8</a> below shows how this can happen.</p>
<div class="example" id="eg2-8">
<p class="caption">Example 2-8: An example of a bad set definition by regular expression</p>
<pre>
&lt;iriset&gt;
  &lt;includehosts&gt;example.org&lt;/includehosts&gt;
  &lt;includeregex&gt;https&lt;/includeregex&gt;
&lt;/iriset&gt; 
</pre>
</div>
<p>The intention of the regular expression given in <a href="#eg2-8">Example 2-8</a> is probably to say &quot;all resources on 
<code>example.org</code> with a URI beginning with <code>https</code>.&quot; However, as the regular expression is not 
anchored at either end, what this actually means is &quot;all resources on <code>example.org</code> where the URI 
includes <code>https</code>&quot;. Thus this IRI set includes both of:</p>
<ul>
  <li><code>https://www.example.org/page.html</code></li>
  <li><code>http://www.example.org/why_we_use_https.html</code></li>
</ul>
<p>Adding in anchors  at the beginning and end of the regular expression can have equally undesirable consequences.</p>
<div class="example" id="eg2-9">
<p class="caption">Example 2-9: A second example of a bad set definition by regular expression</p>
<pre>&lt;iriset&gt;
  &lt;includehosts&gt;example.org&lt;/includehosts&gt;
  &lt;includeregex&gt;^https$&lt;/includeregex&gt;
&lt;/iriset&gt; 
</pre>
</div>
<p>In <a href="#eg2-9">Example 2-9</a>, the intention is, again probably, to define the set of &quot;all resources on <code>example.org</code> fetched using <code>https</code> only&quot;. However, adding both the <code>^</code> and <code>$</code> anchors at the beginning and end of the regular expression means that the whole IRI must be <code>https</code> from start to finish &mdash; which can never be true so this IRI set is equivalent to the empty set.</p>
<p><a href="#eg2-10">Example 2-10</a> shows one possible way to encode the intended set definition.</p>
<div class="example" id="eg2-10">
<p class="caption">Example 2-10: An example of a correct set definition by regular expression</p>
<pre>
&lt;iriset&gt;
  &lt;includehosts&gt;example.org&lt;/includehosts&gt;
  &lt;includeregex&gt;^https&lt;/includeregex&gt;
&lt;/iriset&gt; 
</pre>
</div>
<p>Whilst <a href="#eg2-10">Example 2-10</a> 'works', the potential dangers of using regular expressions mean that it is generally better to use component strings where possible. <a href="#eg2-10">Example 2-10</a> is therefore better written as shown in <a href="#eg2-11">Example 2-11</a> below.</p>
<div class="example" id="eg2-11">
<p class="caption">Example 2-11: A re-write of <a href="#eg2-10">Example 2-10</a> without using a regular expression</p>
<pre>
&lt;iriset&gt;
  &lt;includehosts&gt;example.org&lt;/includehosts&gt;
  &lt;includeschemes&gt;https&lt;/includeschemes&gt;
&lt;/iriset&gt; 
</pre>
</div>

<h3 id="byIP">2.4 Grouping by IP Address</h3>
<p>It is noteworthy that POWDER does not define any special procedures where the host component of an IRI is expressed as an IP address. These are treated as strings, not as a sequence of digits.  If the intention is to define an IRI set that encompasses a particular group of resources <em>however they are accessed</em> then it may be appropriate to include both the domain name and associated IP address as two space separated values in an <code>includehosts</code> element for example. However, this assumes that there is a one to one relationship between the domain name and the IP address which, of course, is often not the case.</p>
<p>As noted in <a href="#methOutline">Section 1.2</a>, POWDER defines sets of IRIs, not of the resources that they identify. IRI sets must therefore be defined with care. For operational reasons, a user agent MAY perform a DNS or reverse DNS lookup to match domains names and IP addresses but this is very much application-specific.</p>

<h3 id="listing">2.5 Enumerating Elements of an IRI Set: the <code>includeresources</code> and <code>excluderesources</code> Constraints</h3>
<p>It is useful to be able to include or exclude IRIs from sets by simple listing. The <code><a href="#includeresources">includeresources</a></code> and <code><a href="#excluderesources">excluderesources</a></code> constraints support this, both of which take white space separated lists of IRIs. To give a simple example, the set of all resources on <code>example.org</code> <em>except</em> its stylesheet and JavaScript library can be encoded as shown in <a href="#eg2-12">Example 2-12</a> below.</p>
<div class="example" id="eg2-12">
<p class="caption">Example 2-12: IRI Set definition using the <code>excluderesources</code> constraint</p>
<pre>
&lt;iriset&gt;
  &lt;includehosts&gt;example.org&lt;/includehosts&gt;
  &lt;excluderesources&gt;http://www.example.org/stylesheet.css http://www.example.org/jslib.js&lt;/excluderesources&gt;
&lt;/iriset&gt;
</pre>
</div>
<p>The white space separated list of values is processed as set out in the Formal Semantics document [<a href="#formal">FORMAL</a>] to create a pattern <var>var</var> that can be inserted into the simple template regular expression:</p>
<p><code>^<strong><var>var</var></strong>$</code></p>
<p>Thus <a href="#eg2-12">Example 2-12</a> is transformed into the following POWDER-S.</p>
<div class="example" id="eg2-13">
<p class="caption">Example 2-13: The IRI Set defined in <a href="#eg2-12">Example 2-12</a> encoded in POWDER-S</p>
<pre>
&lt;owl:Class rdf:nodeID=&quot;iriset_1&quot;&gt;
  &lt;owl:equivalentClass&gt;
    &lt;owl:Class&gt;
      &lt;owl:intersectionOf rdf:parseType=&quot;Collection&quot;&gt;
        &lt;owl:Restriction&gt;
          &lt;owl:onProperty rdf:resource=&quot;http://www.w3.org/2007/05/powder-s#matchesregex&quot; /&gt;
          &lt;owl:hasValue rdf:datatype=&quot;http://www.w3.org/2001/XMLSchema-datatypes#string&quot;&gt;(([^\/\?\#]*)\@)?([^\:\/\?\#\@]+\.)?(example\.org)(:([0-9]+))?\/&lt;/owl:hasValue&gt;
        &lt;/owl:Restriction&gt;
        &lt;owl:Restriction&gt;
          &lt;owl:onProperty rdf:resource=&quot;http://www.w3.org/2007/05/powder-s#notmatchesregex&quot; /&gt;
          &lt;owl:hasValue rdf:datatype=&quot;http://www.w3.org/2001/XMLSchema-datatypes#string&quot;&gt;^(http\:\/\/www\.example\.org\/stylesheet\.css|http\:\/\/www\.example\.org\/jslib\.js)$&lt;/owl:hasValue&gt;
        &lt;/owl:Restriction&gt;
      &lt;/owl:intersectionOf&gt;
    &lt;/owl:Class&gt;
  &lt;/owl:equivalentClass&gt;
&lt;/owl:Class&gt;
</pre>
</div>
<p>As emphasized throughout this document, each constraint and its value creates a set definition of its own and the full IRI set is the intersection of those sets. Thus an alternative way of looking at <a href="#eg2-12">Example 2-12</a> is to say that a candidate IRI is a member of the IRI set IF it is on <code>example.org</code> AND does not have the IRI <code>http://www.example.org/stylesheet.css</code> AND does not have the IRI <code>http://www.example.org/jslib.js</code>.</p>
<h3 id="conj-disj">2.6 Complex Sets: Negation, Conjunction and Disjunction</h3>
<p>POWDER allows a DR to express any grouping of resources whatsoever, no matter how complex.</p>
<p><em>Atomic negation</em> is achieved by complementing each IRI constraint that includes certain IRI 
components by one that excludes them, and vice versa; furthermore, all <code>includeX</code> and <code>excludeX</code> constraints 
are mutually exclusive. <span id="notmatch5">The analogous properties <code>matchesregex</code> and <code>notmatchesregex</code> properties are used in POWDER-S</span>. Negation of complex constraints is not supported.</p>
<p><em>Conjunction</em> of atomic propositions (both positive and negative) is inherent in the basic model - an IRI must match all the constraints if it is to be an element of the set. The GRDDL transform uses <code>owl:intersectionOf</code> to render in POWDER-S <code>iriset</code> elements with multiple constraints.</p>
<p>The <em>disjunction</em> of conjunctions of atomic propositions (both positive and negative) is also possible, as a DR may contain multiple <code>iriset</code> elements, and if any of them holds, then the DR holds. The GRDDL transform encodes multiple <code>iriset</code> elements as multiple clauses in POWDER-S.</p>
<p>It follows from the above, that POWDER allows the expression of Disjunctive Normal Form propositions. Since arbitrarily complex propositions can be brought into DNF (<em>DNF Theorem</em>), it follows that POWDER allows the expression of any proposition.</p>
<p><a href="#eg2-14">Example 2-14</a> shows a Description Resource defining the set of IRIs on example.com with a path beginning with /foo and those on example.org where the path starts with /bar.</p>
<div class="example" id="eg2-14">
<p class="caption">Example 2-14: A Description Resource with its scope defined by the union of two IRI sets [<a href="example_2_14.xml">XML</a>]</p>
<pre>
&lt;?xml version=&quot;1.0&quot;?&gt;
&lt;powder xmlns=&quot;http://www.w3.org/2007/05/powder#&quot; 
        xmlns:ex=&quot;http://example.org/vocab#&quot;&gt;

  &lt;attribution&gt;
    &lt;issuedby src=&quot;http://authority.example.org/company.rdf#me&quot; /&gt;
    &lt;issued&gt;2007-12-14T00:00:00&lt;/issued&gt;
  &lt;/attribution&gt;

  &lt;dr&gt;
    &lt;iriset&gt;
      &lt;includehosts&gt;example.com&lt;/includehosts&gt;
      &lt;includepathstartswith&gt;/foo&lt;/includepathstartswith&gt;
    &lt;/iriset&gt;

    &lt;iriset&gt;
      &lt;includehosts&gt;example.org&lt;/includehosts&gt;
      &lt;includepathstartswith&gt;/bar&lt;/includepathstartswith&gt;
    &lt;/iriset&gt;

    &lt;descriptorset&gt;
      &lt;ex:color&gt;red&lt;/ex:color&gt;
      &lt;ex:shape&gt;square&lt;/ex:shape&gt;
      &lt;displaytext&gt;Everything on example.com where the path starts with /foo
       and everything on example.org where the path starts with /bar is red and square&lt;/displaytext&gt;
      &lt;displayicon&gt;http://example.org/icon.png&lt;/displayicon&gt;
    &lt;/descriptorset&gt;
  &lt;/dr&gt;

&lt;/powder&gt;
</pre></div>

<h2 id="extension">3 Extension Mechanism</h2>
<p>In this document we have specified various methods for defining sets of resource identifiers. The elements are clearly designed to be used with information resources available on the Web, identified by IRIs containing host names, directory paths, port numbers, and so on. The POWDER grouping vocabulary can be easily extended by new elements, defined via GRDDL transformation, which build upon the elements defined by POWDER. As examples, in <a href="#custom-iri">Sections 3.1</a> and <a href="#siteStruct">3.2</a> we show how other methods of defining IRI sets that may suit particular situations can be transformed into POWDER-BASE.</p>
<p>Furthermore, there is no fundamental reason to constrain the domain of POWDER descriptions to HTTP IRIs, so there should not be unnecessary constraints on how the protocol works. In other words, the domain of grouping extensions does not need to be HTTP IRIs, but may be any kind of IRIs. As an example, in <a href="#isan">Section 3.3</a> we show such an extension for ISAN numbers.</p>
<p>It should be noted that the treatment of non-HTTP IRIs is one of the basic motivations behind the two-step GRDDL transform from POWDER to POWDER-BASE to POWDER-S, outlined in
<a href="#formalSemantics">Section 1.4</a> and fully specified in the Formal Semantics document [<a href="#formal">FORMAL</a>]. If POWDER were rendered into POWDER-S in a single direct transform, the only XML language from which to derive extensions would be POWDER, which would oblige POWDER extensions to include HTTP-specific IRI restrictions such as <code>includehosts</code>, even if they are meaningless for the domain of the extension.</p>
<p>In the intermediate POWDER-BASE language, on the other hand, all HTTP-specific elements have been rendered as regular expressions, using the <code>includeregex</code> and <code>excluderegex</code> IRI restrictions, as POWDER-BASE only requires that these two restrictions are supported. Developers of non-HTTP extensions and tools are advised to use POWDER-BASE to derive their extension from, instead of POWDER, as this relieves them of the obligation to also implement the HTTP-specific IRI restrictions in their tools.</p>
<p>XML elements suitable for defining sets of URIs or IRIs from schemes other than HTTP may be created 
and<span id="f-norm1"> a GRDDL transform</span> defined that renders such IRI sets in POWDER-BASE. This is an generic extension mechanism since a conformant POWDER Processor, as defined in the Description Resources document [<a href="#dr">DR</a>], MUST be able to process POWDER-BASE. For clarity: POWDER-BASE is not a separate encoding of POWDER &mdash; it is all done in the <code>wdr</code> namespace &mdash; merely a restricted form of POWDER that just has the two possible child elements of <code>iriset</code>.</p>
<p>Developers of POWDER tools MAY directly implement extensions they know about, and MAY include support for 
<span id="f-norm2">transformation technologies such as </span>XSLT so that unknown extensions can be processed.</p>

<h3 id="custom-iri">3.1 Extension Example: Custom IRI Patterns</h3>
<p>As an example of a service-specific extension, consider a service which uses unix shell wildcards instead of regular expressions, so that <code>www.example.org/*</code> means &quot;all the resources on www.example.org fetched using HTTP.&quot; Such a system is easily used within an IRI set, only requiring the definition of a near copy of the POWDER schema [<a href="#wdr">WDR</a>] with a single IRI constraint <code>shell:includepattern</code> as child element of its IRI set element (good practice when defining <code>shell:includepattern</code> would be to also define <code>shell:excludepattern</code>).</p>
<p>A publisher of a document using <code>shell:includepattern</code> SHOULD define <span id="f-norm3">a GRDDL transform 
that will generate</span> a POWDER-BASE document as shown the example below.</p>
<div class="example" id="eg3-1">
<p class="caption">Example 3-1 An IRI set definition using a custom IRI pattern and the corresponding POWDER-BASE definition.</p>
<p>Custom IRI pattern:</p>
<pre>
&lt;shell:iriset&gt;
  &lt;shell:includepattern&gt;www.example.org/*&lt;/shell:includepattern&gt;
&lt;/shell:iriset&gt;
</pre>
<p>POWDER-BASE:</p>
<pre>
&lt;iriset&gt;
  &lt;includeregex&gt;http\:\/\/www\.example\.org\/.*&lt;/includeregex&gt;
&lt;/iriset&gt;
</pre>
</div>
<p>Note that the custom IRI pattern SHOULD NOT be used in a document with its root element in the POWDER namespace since the only valid child elements of the <code>iriset</code> element within a POWDER document are those defined in this document.</p>

<h3 id="siteStruct">3.2 Extension Example: Custom Site Structure</h3>
<p>Many content providers serve dynamic content stored in a database, so that IRIs express queries to that database. This kind of IRI will have certain structure but this is typically neither obvious nor easily human-interpreted.</p>
<p>As an example, consider <code>sport.example.com</code>, a sports news site, where IRIs look like the one shown in <a href="#eg3-2">Example 3-2</a>. The adopted scheme is systematic so that <code>sport=2&amp;countryID=16</code> provides a front page with news about Greek basketball and links to various Greek basketball leagues, <code>sport=3&amp;countryID=16</code> a front page about Greek volleyball, etc.</p>
<div class="example" id="eg3-2">
<p class="caption">Example 3-2 Sample IRI from site serving dynamic content. <code>sport=1</code> stands for football and <code>countryID=16</code> stands for Greece.</p>
<pre>
http://sport.example.com/matches.asp?sport=1&amp;countryID=16&amp;champID=2
</pre>
</div>
<p>A POWDER document providing metadata about this Web site would have to use regular expression matching with explicit reference to the numerical values in the <code>country</code> and <code>sport</code> fields of the query. This process is error-prone, and requires extensive changes if the underlying database schema is modified or extended.</p>
<p>As an alternative, the site developer may provide a POWDER-like scheme that abstracts away from the specific database fields to allow reference to sports and countries, as shown in <a href="#eg3-3">Example 3-3</a>. Description Resource authors can then use the properties in this extension to generate POWDER-BASE documents that are valid even if the site schema is modified, as long as the site developer updates the relevant transformations.</p>
<div class="example" id="eg3-3">
<p class="caption">Example 3-3 An IRI set definition using site-specific extensions and the equivalent definition using standard POWDER-BASE vocabulary.</p>
<p>Custom IRI constraint:</p>
  <pre>
&lt;sport:iriset&gt;
  &lt;wdr:includehosts&gt;sport.example.com&lt;/wdr:includehosts&gt;
  &lt;sport:countries&gt;Greece&lt;/sport:countries&gt;
  &lt;sport:sports&gt;Football Basketball&lt;/sport:sports&gt;
&lt;/sport:iriset&gt;
</pre>
<p>Corresponding POWDER-BASE IRI set:</p>
<pre>
&lt;iriset&gt;
  &lt;includeregex&gt;(([^\/\?\#]*)\@)?([^\:\/\?\#\@]+\.)?(sport\.example\.com)(:([0-9]+))?\/&lt;/includeregex&gt;
  &lt;includeregex&gt;country=16&lt;/includeregex&gt;
  &lt;includeregex&gt;sport=[1|2]&lt;/includeregex&gt;
&lt;/iriset&gt;
</pre>
</div>

<h3 id="isan">3.3 Extension Example: ISAN</h3>
<p>The International Standard Audiovisual Number [<a href="#isan1">ISAN1</a>] is a globally-unique, centrally managed and permanent numbering system for the identification of audiovisual works and versions. Following ISO 15706 [<a href="#isan3">ISAN3</a>], [<a href="#isan3-2">ISAN3-2</a>], the ISAN numbers are written as 24 bit hexadecimal digits in the following format [<a href="#isan2">ISAN2</a>].</p>
<table style="margin: 0pt auto; border-collapse: collapse;" class="example">
<tbody>
<tr>
  <td style="border-width: 0pt;"></td>
  <td style="border-width: 0pt;"><code>-----root-----</code></td>
  <td style="border-width: 0pt;"></td>
  <td style="border-width: 0pt;"><code>episode</code></td>
  <td style="border-width: 0pt;"></td>
  <td style="border-width: 0pt; text-align: center;"><code>-version-</code></td>
  <td style="border-width: 0pt;"></td>
</tr>
<tr>
  <td style="border-width: 0pt;"><code>ISAN</code></td>
  <td style="border-width: 0pt;"><code>1881-66C7-3420</code></td>
  <td style="border-width: 0pt;"><code>-</code></td>
  <td style="border-width: 0pt; text-align: center;"><code>0000</code></td>
  <td style="border-width: 0pt;"><code>-7-</code></td>
  <td style="border-width: 0pt;"><code>9F3A-0245</code></td>
  <td style="border-width: 0pt;"><code>-U</code></td>
</tr>
</tbody>
</table>
<p>The root segment of an ISAN number is assigned to a core work. When the core work is a serial, episodes are identified with a non null episode segment. Versions are assigned in the version segment and refer to changes in the audiovisual content, being a different language or soundtrack, subtitles, editions, promotional trailers, and so on.</p>
<p>Since ISAN numbers are URNs [<a href="#urn">URN</a>], and hence IRIs of the <code>urn:</code> scheme [<a href="#ref-uri">URIS</a>], a vocabulary can readily be defined to allow IRI Sets to be defined based on ISAN numbers. The terms might be along the lines of:</p>
<p><code>includeRoots</code> &mdash; the value of which would be a white space separated of hexadecimal digits and hyphens that would be matched against the first three blocks in the ISAN number.</p>
<p><code>includeEpisodes</code> &mdash; a white space separated list of hexadecimal digits and hyphens that would be matched against the 4th block of 4 digits in the ISAN number.</p>
<p><code>includeVersions</code> &mdash; a white space separated list of hexadecimal digits and hyphens that would be matched against the 5th and 6th blocks of 4 digits in the ISAN number.</p>
<p>The set of all audio visual resources that relate to two particular works might then be defined as shown in <a href="#eg3-4">Example 3-4</a>.</p>
<div class="example" id="eg3-4">
<p class="caption">Example 3-4: An IRI set definition using an ISAN number pattern and the corresponding definition using standard POWDER vocabulary</p>
<p>Custom ISAN pattern:</p>
<pre>
&lt;ex_isan:iriset&gt;
  &lt;ex_isan:includeRoots&gt;1881-66C7-3420 1881-66C7-3421&lt;/ex_isan:includeRoots&gt;
&lt;/ex_isan:iriset&gt;
</pre>
<p>Corresponding POWDER-BASE IRI Set:</p>
<pre>
&lt;iriset&gt;
 &lt;includeregex&gt;^urn:isan:(1881-66C7-3420)|(1881-66C7-3421)&lt;/includeregex&gt;
&lt;/iriset&gt;
</pre>
</div>

<h2 id="conformance">4 Conformance Criteria</h2>
<p>An IRI set definition is a <em>Conformant IRI set definition</em> if it adheres to the specification described in this document.</p>
<p>More precisely:</p>
<ul>
<li>An IRI set definition is an <em>XML Conformant IRI Set Definition</em> if it contains at least one of the child elements set out in Section 2 and adheres to the cardinality rules stated. These are summarized in <a href="#appA">Appendix A</a>.</li>
<li>Space separated lists of strings in an IRI set definition adhere to the rules described in <a href="#intro">Section 1</a>.</li>
<li>IRI strings in an IRI set definition adhere to the rules described in <a href="#canon">Section 2.1.3</a>.</li>
<li>Extensions to IRI set definitions adhere to the rules described in <a href="#extension">Section 3</a>.</li>
</ul>

<h2 class="nonum"><a id="section-References" name="section-References"></a>5 References</h2>
<h3 id="section-Normative-References">5.1 Normative References</h3>
<dl>
  <dt id="grddl">[GRDDL]</dt>
  <dd><cite><a href="http://www.w3.org/TR/grddl/">Gleaning Resource Descriptions from Dialects of Languages (GRDDL)</a></cite>, D. Connolly. W3C Recommendation, 11 September 2007. This document is at http://www.w3.org/TR/grddl/</dd>
  <dt id="httpcode">[HTTPCODE]</dt>
  <dd><cite><a href="http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html ">Part of Hypertext Transfer Protocol &ndash; HTTP/1.1, RFC&nbsp;2616</a></cite> Fielding, et al. This document is http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html .</dd>
  <dt id="httprdf">[HTTPRDF]</dt>
  <dd><cite><a href="http://www.w3.org/TR/HTTP-in-RDF/">HTTP Vocabulary in RDF</a></cite> J Koch, C Velasco, S Abou-Zahra. This document is at http://www.w3.org/TR/HTTP-in-RDF/</dd>
  <dt id="ref-iri">[IRIS]</dt>
  <dd><cite><a href="http://www.ietf.org/rfc/rfc3987.txt">RFC&nbsp;3987 &mdash; Internationalized Resource Identifiers (IRIs)</a></cite>, M. D&uuml;rst and M. Suignard, IETF, January 2005. This document is at http://www.ietf.org/rfc/rfc3987.txt</dd>
  <dt id="ref-rfc2119">[RFC2119]</dt>
  <dd><cite><a href="http://www.ietf.org/rfc/rfc2119">Key words for use in RFCs to Indicate Requirement Levels</a></cite>, S. Bradner. IETF, March 1997. This document is at http://www.ietf.org/rfc/rfc2119.</dd>
  <dt id="ref-uri">[URIS]</dt>
  <dd><cite><a href="http://tools.ietf.org/html/rfc3986">RFC&nbsp;3986 &mdash; Uniform Resource Identifiers (URI): Generic Syntax</a></cite>, T. Berners-Lee, R. Fielding and L. Masinter, IETF, January 2005. This document is http://tools.ietf.org/html/rfc3986.</dd>
  <dt id="urn">[URN]</dt>
  <dd><cite><a href="http://www.iana.org/assignments/urn-namespaces">Official IANA Registry of URN Namespaces</a></cite>. This document is http://www.iana.org/assignments/urn-namespaces.</dd>
  <dt id="unicode">[UNICODE]</dt>
  <dd><cite><a href="http://www.unicode.org/unicode/standard/versions/" title="http://www.unicode.org/unicode/standard/versions/">The Unicode Standard</a></cite>. 
		Unicode The Unicode Consortium, Version  5.1.0, ISBN 0-321-48091-0, as updated from time to time by the 
		publication of new versions. (See <a href="http://www.unicode.org/unicode/standard/versions" title="http://www.unicode.org/unicode/standard/versions">http://www.unicode.org/unicode/standard/versions</a> 
		for the latest version and additional information on versions of the standard and of the Unicode Character Database).</dd>  
  <dt id="rfc3490">[RFC 3490]</dt>
  <dd><cite><a href="http://www.faqs.org/rfcs/rfc3490.html"> Internationalizing Domain Names in Applications (IDNA)</a></cite>. P. Faltstrom, P. Hoffman, A. Costello. This document is at http://www.faqs.org/rfcs/rfc3490.html.</dd>

  <dt id="xqxp">[XQXP]</dt>
  <dd><cite><a href="http://www.w3.org/TR/xpath-functions/">XQuery 1.0 and XPath 2.0 Functions and Operators</a></cite>, A. Malhotra, J. Melton, N. Walsh. W3C Recommendation, 23 January 2007. This document is at http://www.w3.org/TR/xpath-functions/</dd>
  <dt id="xml">[XML]</dt>
  <dd><cite><a href="http://www.w3.org/TR/2008/REC-xml-20081126/#syntax">Extensible Markup Language (XML) 1.0 (Fifth Edition)</a></cite>. W3C Recommendation 26 November 2008. T. Bray, J. Paoli, C. M. Sperberg-McQueen, E. Maler, F. Yergeau. This document is at http://www.w3.org/TR/2008/REC-xml-20081126/.</dd>
</dl>
<h3 id="sources">5.2 Sources</h3>
<dl>
  <dt id="charmod-norm">[CHARMOD-NORM]</dt>
  <dd><cite><a href="http://www.w3.org/TR/charmod-norm/">Character Model for the World Wide Web 1.0: Fundamentals</a></cite>, F. Yergeau, M. J. D&uuml;rst, A. Phillips, M. Wolf, T. Texin. W3C Working Draft, 27 October 2005. This document is at http://www.w3.org/TR/charmod-norm/</dd>
  <dt id="dr">[DR]</dt>
  <dd><cite><a href="http://www.w3.org/TR/powder-dr/">Protocol for Web Description Resources (POWDER): Description Resources</a></cite>, P Archer, K. Smith, A Perego. W3C Working Draft, 15 August 2008. This document is at http://www.w3.org/TR/powder-dr/</dd>
  <dt id="formal">[FORMAL]</dt>
  <dd><a href="http://www.w3.org/TR/2008/WD-powder-formal-20080815/"><cite>Protocol for Web Description Resources (POWDER): Formal Semantics</cite></a>, S. Konstantopoulos, P. Archer. W3C Working Draft, 15 August 2008. This document is http://www.w3.org/TR/2008/WD-powder-formal-20080815/</dd>
  <dt id="isan1">[ISAN1]</dt>
  <dd><cite><a href="http://www.isan.org">International Standard Audiovisual Number</a></cite></dd>
  <dt id="isan2">[ISAN2]</dt>
  <dd><cite><a href="http://www.isan.org/portal/page?_pageid=166,41960&amp;_dad=portal&amp;_schema=PORTAL">ISAN FAQs: What is the ISAN?</a></cite> This document is at http://www.isan.org/portal/page?_pageid=166,41960&amp;_dad=portal&amp;_schema=PORTAL.</dd>
  <dt id="isan3">[ISAN3]</dt>
  <dd><cite>ISO 15706:2002, Information and Documentation &ndash; International Standard Audiovisual Number (ISAN).</cite></dd>
  <dt id="isan3-2">[ISAN3-2]</dt>
  <dd><cite>ISO 15706-2:2007, Information and Documentation &ndash; International Standard Audiovisual Number (ISAN) &ndash; Part 2: Version identifier.</cite></dd>
  <dt id="primer">[PRIMER]</dt>
  <dd><a href="http://www.w3.org/TR/powder-primer/"><cite>Protocol for Web Description Resources (POWDER): Primer</cite></a>, K. Scheppe, D. Pentecost. W3C Working Draft, 15 August 2008. This document is at http://www.w3.org/TR/powder-primer/</dd>
  <dt id="jo">[Rabin]</dt>
  <dd><a href="http://www.w3.org/2005/Incubator/wcl/matching.html"><cite>URI Pattern Matching for Groups of Resources</cite></a>, J. Rabin, Draft 0.1, 17 June 2006. This document is at http://www.w3.org/2005/Incubator/wcl/matching.html</dd>
  <dt id="testsuite">[TESTS]</dt>
  <dd><a href="http://www.w3.org/TR/powder-test/"><cite>Protocol for Web Description Resources (POWDER): Test Suite</cite></a>, A. Kukurikos. W3C Working Draft, 15 August 2008. This document is http://www.w3.org/TR/powder-test/</dd>
  <dt id="urispace">[URISpace]</dt>
  <dd><a href="http://www.w3.org/TR/urispace"><cite>URISpace 1.0</cite></a>, M. Nottingham, W3C Note, 15 February 2001. This document is http://www.w3.org/TR/urispace</dd>
  <dt id="usecases">[USECASES]</dt>
  <dd><a href="http://www.w3.org/TR/powder-use-cases/"><cite>POWDER: Use Cases and Requirements</cite></a>, P. Archer. W3C Working Group Note, 31 October 2007. This document is at http://www.w3.org/TR/powder-use-cases/</dd>
  <dt id="wclxg">[WCL-XG]</dt>
  <dd><a href="http://www.w3.org/2005/Incubator/wcl/"><cite>W3C Content Label Incubator Group</cite></a> February 2006 - February 2007</dd>
  <dt id="wdr">[WDR]</dt>
  <dd><a href="http://www.w3.org/2007/05/powder"><cite>Protocol for Web Description Resources (POWDER): Web Description Resources XML Schema (WDR)</cite></a>, K. Smith, A. Perego. This document is at http://www.w3.org/2007/05/powder</dd>
  
  <dt id="wdrs">[WDRS]</dt>
  <dd><a href="http://www.w3.org/2007/05/powder-s"><cite>Protocol for Web Description Resources (POWDER): POWDER-S Vocabulary (WDRS)</cite></a>, A. Perego, P. Archer, S. Konstantopoulos. This document is at http://www.w3.org/2007/05/powder-s</dd>
</dl>
<h2 id="ack">6 Acknowledgments</h2>
<p>The editors duly acknowledge the earlier work in this area carried out by Jo Rabin. 
Jeremy Carroll and David Booth developed the operational and formal semantics model 
which was further developed by Stasinos Konstantopoulos. Eric Prud'hommeaux, Addison Phillips and Thomas Roessler
made significant contributions to the development of the canonicalization section.  The editors gratefully 
acknowledge the further contributions made by R&eacute;gis Flad of ISANIA and members of the POWDER Working Group.</p>
<h2 id="change">7 Change Log</h2>
<p>Changes since the <a href="http://www.w3.org/TR/2009/WD-powder-grouping-20090403/">3 April 2009 draft</a> are as follows:</p>
<ol>
  <li>Further minor tweaks to the Canonicalization sections (<a href="#idnCanon">Section 2.1.3</a> to Section 2.1.5), 
	following <a href="http://lists.w3.org/Archives/Public/public-powderwg/2009Apr/0003.html">implementation experience</a>.</li>
  <li><a href="#comment_km">Typo corrected</a> following comment from <a href="http://lists.w3.org/Archives/Public/public-powderwg/2009Apr/0000.html">Krzysztof Maczyn'ski</a></li>
  <li><a href="#krs">Slight correction</a> to reference to encoding of ampersands (<a href="http://lists.w3.org/Archives/Public/public-powderwg/2009Apr/0007.html">K. Smith</a>).</li>
</ol>
<h3 id="sincePR">7.1 Change since <a href="http://www.w3.org/TR/2009/PR-powder-grouping-20090604/">Proposed Recommendation</a></h3>
<ul><li><a href="#ns_typo">XML Schema namespace</a> corrected</li>
<li>Sentence describing <a href="#whitespace">white space processing</a> rules amended to refer to, and be consistent with, XML attribute value normalization.</li>
</ul>



<h2 id="appA">Appendix A: Summary of POWDER Elements</h2>
<table style="border-collapse:collapse">
  <thead>
  <tr>
    <th>Element Name</th>
    <th>Content</th>
    <th>Attributes</th>
    <th>Cardinality</th>
    <th>Introduced</th>
  </tr>
  </thead>
  <tbody>
  <tr>
    <td><code><a name="iriset" id="iriset">iriset</a></code></td>
    <td>Any of 
      <code>includeschemes</code>, 
      <code>excludeschemes</code>, 
      <code>includehosts</code>, 
      <code>excludehosts</code>, 
      <code>includeexactpaths</code>, 
      <code>excludeexactpaths</code>, 
      <code>includepathcontains</code>, 
      <code>excludepathcontains</code>, 
      <code>includepathstartswith</code>, 
      <code>excludepathstartswith</code>, 
      <code>includepathendswith</code>, 
      <code>excludepathendswith</code>,
      <code>includeports</code>, 
      <code>excludeports</code>
    </td>
    <td></td>
    <td>At least 1 must be a child element of a <code>dr</code></td>
    <td><a href="#operationalSemantics">Section 1.3</a></td>
  </tr>
  <tr>
    <td><code><a name="includeschemes" id="includeschemes">includeschemes</a></code></td>
    <td rowspan="6">Token list</td>
    <td rowspan="6"></td>
    <td rowspan="6">0 or 1</td>
    <td rowspan="14"><a href="#byIRIcomp">Section 2.1</a></td>
  </tr>
  <tr>
    <td><code><a name="excludeschemes" id="excludeschemes">excludeschemes</a></code></td>
  </tr>
  <tr>
    <td><code><a name="includehosts" id="includehosts">includehosts</a></code></td>
  </tr>
  <tr>
    <td><code><a name="excludehosts" id="excludehosts">excludehosts</a></code></td>
  </tr>
  <tr>
    <td><code><a name="includeexactpaths" id="includeexactpaths">includeexactpaths</a></code></td>
  </tr>
  <tr>
    <td><code><a name="excludeexactpaths" id="excludeexactpaths">excludeexactpaths</a></code></td>
  </tr>
  <tr>
    <td><code><a name="includepathcontains" id="includepathcontains">includepathcontains</a></code></td>
    <td rowspan="2">Token list</td>
    <td rowspan="2"></td>
    <td rowspan="2">any number</td>
  </tr>
  <tr>
    <td><code><a name="excludepathcontains" id="excludepathcontains">excludepathcontains</a></code></td>
  </tr>
  <tr>
    <td><code><a name="includepathstartswith" id="includepathstartswith">includepathstartswith</a></code></td>
    <td rowspan="6">Token list</td>
    <td rowspan="6"></td>
    <td rowspan="6">0 or 1</td>
  </tr>
  <tr>
    <td><code><a name="excludepathstartswith" id="excludepathstartswith">excludepathstartswith</a></code></td>
  </tr>
  <tr>
    <td><code><a name="includepathendswith" id="includepathendswith">includepathendswith</a></code></td>
  </tr>
  <tr>
    <td><code><a name="excludepathendswith" id="excludepathendswith">excludepathendswith</a></code></td>
  </tr>
  <tr>
    <td><code><a name="includeports" id="includeports">includeports</a></code></td>
  </tr>
  <tr>
    <td><code><a name="excludeports" id="excludeports">excludeports</a></code></td>
  </tr>
  <tr>
    <td><code><a name="includequerycontains" id="includequerycontains">includequerycontains</a></code></td>
    <td rowspan="2">Single value</td>
    <td rowspan="2"><code>delimiter</code> (any single character). Default value is: <code>&amp;</code> (&ldquo;ampersand&rdquo;)</td>
    <td rowspan="2">0 or 1</td>
    <td rowspan="2"><a href="#query">Section 2.1.2</a></td>
  </tr>
  <tr>
    <td><code><a name="excludequerycontains" id="excludequerycontains">excludequerycontains</a></code></td>
  </tr>
  <tr>
    <td><code><a name="includeiripattern" id="includeiripattern">includeiripattern</a></code></td>
    <td rowspan="2">Single value</td>
    <td rowspan="2"></td>
    <td rowspan="2">0 or 1</td>
    <td rowspan="2"><a href="#wild">Section 2.2</a></td>
  </tr>
  <tr>
    <td><code><a name="excludeiripattern" id="excludeiripattern">excludeiripattern</a></code></td>
  </tr>
  <tr>
    <td><code><a name="includeregex" id="includeregex">includeregex</a></code></td>
    <td rowspan="2">Single value</td>
    <td rowspan="2"></td>
    <td rowspan="2">Any number</td>
    <td rowspan="2"><a href="#reMatch">Section 2.3</a></td>
  </tr>
  <tr>
    <td><code><a name="excluderegex" id="excluderegex">excluderegex</a></code></td>
  </tr>
  <tr>
    <td><code><a name="includeresources" id="includeresources">includeresources</a></code></td>
    <td rowspan="2">Token list</td>
    <td rowspan="2"></td>
    <td rowspan="2">0 or 1</td>
    <td rowspan="2"><a href="#listing">Section 2.5</a></td>
  </tr>
  <tr>
    <td><code><a name="excluderesources" id="excluderesources">excluderesources</a></code></td>
  </tr>
  </tbody>
</table>
</body>
</html>