HTML_Data_Improvements
33.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML+RDFa 1.0//EN" "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-1.dtd"><html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" dir="ltr">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta http-equiv="Content-Style-Type" content="text/css" />
<meta name="generator" content="MediaWiki 1.15.5" />
<meta name="keywords" content="HTML Data Improvements,Html-data-tf" />
<link rel="shortcut icon" href="/favicon.ico" />
<link rel="search" type="application/opensearchdescription+xml" href="/wiki/opensearch_desc.php" title="W3C Wiki (en)" />
<link rel="alternate" type="application/rss+xml" title="W3C Wiki RSS feed" href="/wiki/index.php?title=Special:RecentChanges&feed=rss" />
<link rel="alternate" type="application/atom+xml" title="W3C Wiki Atom feed" href="/wiki/index.php?title=Special:RecentChanges&feed=atom" />
<title>HTML Data Improvements - W3C Wiki</title>
<link rel="stylesheet" href="/wiki/skins/common/shared.css?207" type="text/css" media="screen" />
<link rel="stylesheet" href="/wiki/skins/common/commonPrint.css?207" type="text/css" media="print" />
<link rel="stylesheet" href="/wiki/skins/w3cmonobook/main.css?207" type="text/css" media="screen" />
<!--[if lt IE 5.5000]><link rel="stylesheet" href="/wiki/skins/w3cmonobook/IE50Fixes.css?207" type="text/css" media="screen" /><![endif]-->
<!--[if IE 5.5000]><link rel="stylesheet" href="/wiki/skins/w3cmonobook/IE55Fixes.css?207" type="text/css" media="screen" /><![endif]-->
<!--[if IE 6]><link rel="stylesheet" href="/wiki/skins/w3cmonobook/IE60Fixes.css?207" type="text/css" media="screen" /><![endif]-->
<!--[if IE 7]><link rel="stylesheet" href="/wiki/skins/w3cmonobook/IE70Fixes.css?207" type="text/css" media="screen" /><![endif]-->
<link rel="stylesheet" href="/wiki/index.php?title=MediaWiki:Common.css&usemsgcache=yes&ctype=text%2Fcss&smaxage=18000&action=raw&maxage=18000" type="text/css" />
<link rel="stylesheet" href="/wiki/index.php?title=MediaWiki:Print.css&usemsgcache=yes&ctype=text%2Fcss&smaxage=18000&action=raw&maxage=18000" type="text/css" media="print" />
<link rel="stylesheet" href="/wiki/index.php?title=MediaWiki:W3cmonobook.css&usemsgcache=yes&ctype=text%2Fcss&smaxage=18000&action=raw&maxage=18000" type="text/css" />
<link rel="stylesheet" href="/wiki/index.php?title=-&action=raw&maxage=18000&gen=css" type="text/css" />
<!--[if lt IE 7]><script type="text/javascript" src="/wiki/skins/common/IEFixes.js?207"></script>
<meta http-equiv="imagetoolbar" content="no" /><![endif]-->
<script type= "text/javascript">/*<![CDATA[*/
var skin = "w3cmonobook";
var stylepath = "/wiki/skins";
var wgArticlePath = "/wiki/$1";
var wgScriptPath = "/wiki";
var wgScript = "/wiki/index.php";
var wgVariantArticlePath = false;
var wgActionPaths = {};
var wgServer = "http://www.w3.org";
var wgCanonicalNamespace = "";
var wgCanonicalSpecialPageName = false;
var wgNamespaceNumber = 0;
var wgPageName = "HTML_Data_Improvements";
var wgTitle = "HTML Data Improvements";
var wgAction = "view";
var wgArticleId = "6022";
var wgIsArticle = true;
var wgUserName = null;
var wgUserGroups = null;
var wgUserLanguage = "en";
var wgContentLanguage = "en";
var wgBreakFrames = false;
var wgCurRevisionId = 55626;
var wgVersion = "1.15.5";
var wgEnableAPI = true;
var wgEnableWriteAPI = true;
var wgSeparatorTransformTable = ["", ""];
var wgDigitTransformTable = ["", ""];
var wgRestrictionEdit = [];
var wgRestrictionMove = [];
/*]]>*/</script>
<script type="text/javascript" src="/wiki/skins/common/wikibits.js?207"><!-- wikibits js --></script>
<!-- Head Scripts -->
<script type="text/javascript" src="/wiki/skins/common/ajax.js?207"></script>
<link rel="alternate" type="application/rdf+xml" title="HTML Data Improvements" href="/wiki/index.php?title=Special:ExportRDF/HTML_Data_Improvements&xmlmime=rdf" />
<script type="text/javascript" src="/wiki/index.php?title=-&action=raw&gen=js&useskin=w3cmonobook"><!-- site js --></script>
</head>
<body class="mediawiki ltr ns-0 ns-subject page-HTML_Data_Improvements skin-w3cmonobook">
<div id="globalWrapper">
<div id="column-content">
<div id="content">
<a id="top"></a>
<h1 id="firstHeading" class="firstHeading">HTML Data Improvements</h1>
<div id="bodyContent">
<h3 id="siteSub">From W3C Wiki</h3>
<div id="contentSub"></div>
<div id="jump-to-nav">Jump to: <a href="#column-one">navigation</a>, <a href="#searchInput">search</a></div> <!-- start content -->
<p>This page holds drafts and other tracking for improvements to microdata and RDFa developed by the <a href="/wiki/Html-data-tf" title="Html-data-tf">HTML Data TF</a>.
</p>
<ul><li> <a href="http://www.w3.org/Bugs/Public/buglist.cgi?product=HTML+WG&component=HTML+Microdata+%28editor%3A+Ian+Hickson%29" class="external text" title="http://www.w3.org/Bugs/Public/buglist.cgi?product=HTML+WG&component=HTML+Microdata+%28editor%3A+Ian+Hickson%29" rel="nofollow">Microdata bugs in W3C Bugzilla</a>
</li><li> <a href="http://www.w3.org/Bugs/Public/buglist.cgi?product=HTML+WG&component=HTML%2BRDFa+%28editor%3A+Manu+Sporny%29" class="external text" title="http://www.w3.org/Bugs/Public/buglist.cgi?product=HTML+WG&component=HTML%2BRDFa+%28editor%3A+Manu+Sporny%29" rel="nofollow">HTML+RDFa bugs in W3C Bugzilla</a>
</li><li> <a href="http://www.w3.org/2010/02/rdfa/track/products/2" class="external text" title="http://www.w3.org/2010/02/rdfa/track/products/2" rel="nofollow">tracker on RDFa 1.1 Core</a>
</li></ul>
<table id="toc" class="toc" summary="Contents"><tr><td><div id="toctitle"><h2>Contents</h2></div>
<ul>
<li class="toclevel-1"><a href="#Raised_Bugs"><span class="tocnumber">1</span> <span class="toctext">Raised Bugs</span></a>
<ul>
<li class="toclevel-2"><a href="#Additional_XSD_Datatypes"><span class="tocnumber">1.1</span> <span class="toctext">Additional XSD Datatypes</span></a></li>
<li class="toclevel-2"><a href="#Link_Relations"><span class="tocnumber">1.2</span> <span class="toctext">Link Relations</span></a>
<ul>
<li class="toclevel-3"><a href="#1._Different_registries"><span class="tocnumber">1.2.1</span> <span class="toctext">1. Different registries</span></a></li>
<li class="toclevel-3"><a href="#2._Combined_link_relations"><span class="tocnumber">1.2.2</span> <span class="toctext">2. Combined link relations</span></a></li>
<li class="toclevel-3"><a href="#3._Non-document_link_relations"><span class="tocnumber">1.2.3</span> <span class="toctext">3. Non-document link relations</span></a></li>
<li class="toclevel-3"><a href="#4._Alias_link_relations"><span class="tocnumber">1.2.4</span> <span class="toctext">4. Alias link relations</span></a></li>
<li class="toclevel-3"><a href="#5._Misinterpretation_due_to_.40vocab"><span class="tocnumber">1.2.5</span> <span class="toctext">5. Misinterpretation due to @vocab</span></a></li>
<li class="toclevel-3"><a href="#6._Misinterpretation_due_to_RDFa_context"><span class="tocnumber">1.2.6</span> <span class="toctext">6. Misinterpretation due to RDFa context</span></a></li>
</ul>
</li>
</ul>
</li>
<li class="toclevel-1"><a href="#Potential_Bugs"><span class="tocnumber">2</span> <span class="toctext">Potential Bugs</span></a>
<ul>
<li class="toclevel-2"><a href="#Structured_Values"><span class="tocnumber">2.1</span> <span class="toctext">Structured Values</span></a></li>
<li class="toclevel-2"><a href="#URLs.2C_URIs_and_IRIs"><span class="tocnumber">2.2</span> <span class="toctext">URLs, URIs and IRIs</span></a></li>
</ul>
</li>
</ul>
</td></tr></table><script type="text/javascript"> if (window.showTocToggle) { var tocShowText = "show"; var tocHideText = "hide"; showTocToggle(); } </script>
<a name="Raised_Bugs" id="Raised_Bugs"></a><h2> <span class="mw-headline"> Raised Bugs </span></h2>
<p>The following bugs have been raised with appropriate working groups.
</p>
<a name="Additional_XSD_Datatypes" id="Additional_XSD_Datatypes"></a><h3> <span class="mw-headline"> Additional XSD Datatypes </span></h3>
<p>HTML5's <code>time</code> elements supports timezones and weeks as well as the usual date/time/duration datatypes. These new datatypes will be hard to map into RDF or XML. This has been raised with the XML Schema Working Group (see <a href="http://www.w3.org/Bugs/Public/show_bug.cgi?id=14881" class="external text" title="http://www.w3.org/Bugs/Public/show_bug.cgi?id=14881" rel="nofollow">bug 14881</a>) but it's likely that they will reject the bug for lack of time and that W3C Notes will be required to define new <code>xs:timezone</code> and <code>xs:yearWeek</code> types to support this data.
</p>
<a name="Link_Relations" id="Link_Relations"></a><h3> <span class="mw-headline"> Link Relations </span></h3>
<p>This is background information and examples for <a href="http://www.w3.org/2010/02/rdfa/track/issues/108" class="external text" title="http://www.w3.org/2010/02/rdfa/track/issues/108" rel="nofollow">RDFA-ISSUE-108</a>.
</p><p>According to RDFa Core, if someone uses an recognised term within a <code>@rel</code> attribute then an RDFa processor will interpret that term based on the <a href="http://www.w3.org/2010/02/rdfa/drafts/2011/ED-rdfa-core-20111020/#T-local-term-mappings" class="external text" title="http://www.w3.org/2010/02/rdfa/drafts/2011/ED-rdfa-core-20111020/#T-local-term-mappings" rel="nofollow">local term mappings</a>. If the term isn't recognised, it will be interpreted based on the <a href="http://www.w3.org/2010/02/rdfa/drafts/2011/ED-rdfa-core-20111020/#T-local-default-vocabulary" class="external text" title="http://www.w3.org/2010/02/rdfa/drafts/2011/ED-rdfa-core-20111020/#T-local-default-vocabulary" rel="nofollow">local default vocabulary</a>. In HTML+RDFa 1.1, the local term mappings are based on the <a href="http://www.w3.org/2011/rdfa-context/html-rdfa-1.1.html" class="external text" title="http://www.w3.org/2011/rdfa-context/html-rdfa-1.1.html" rel="nofollow">IANA link relations</a> and the local default vocabulary is undefined. Unrecognised terms are therefore ignored in HTML+RDFa unless the <code>@vocab</code> attribute is used to provide a local default vocabulary.
</p><p>According to HTML5, there are a set of <a href="http://dev.w3.org/html5/spec/links.html#linkTypes" class="external text" title="http://dev.w3.org/html5/spec/links.html#linkTypes" rel="nofollow">built-in link types</a> with specified (and complex) semantics which is then extended using the <a href="http://microformats.org/wiki/existing-rel-values#HTML5_link_type_extensions" class="external text" title="http://microformats.org/wiki/existing-rel-values#HTML5_link_type_extensions" rel="nofollow">microformats wiki</a> as a <a href="http://dev.w3.org/html5/spec/links.html#other-link-types" class="external text" title="http://dev.w3.org/html5/spec/links.html#other-link-types" rel="nofollow">registry</a>.
</p><p>There are also microformats, in particular <a href="http://gmpg.org/xfn/" class="external text" title="http://gmpg.org/xfn/" rel="nofollow">XFN</a>, which rely on the <code>@rel</code> attribute. XFN looks for any relations within its <a href="http://gmpg.org/xfn/11" class="external text" title="http://gmpg.org/xfn/11" rel="nofollow">known list</a> and interprets these as pointers from the author of the document in which they are found to people with whom that author has a relationship.
</p><p>There are problems with this state of affairs in that documents may be interpreted differently when they are processed with an RDFa processor from when they are processed with an HTML5 processor.
</p>
<a name="1._Different_registries" id="1._Different_registries"></a><h4> <span class="mw-headline"> 1. Different registries </span></h4>
<p>The set of link relationships recognised by RDFa is different from that recognised by HTML. Some HTML link relations will be mapped into properties by RDFa, and some not; some relations recognised by RDFa will be ignored by HTML processors and some not. Publishers need to look at different lists to know what link relations are available.
</p>
<a name="2._Combined_link_relations" id="2._Combined_link_relations"></a><h4> <span class="mw-headline"> 2. Combined link relations </span></h4>
<p>RDFa breaks up the value of the <code>@rel</code> attribute on spaces and simply considers each term that results individually. HTML5 combines <code>alternate</code> and <code>stylesheet</code> to provide a single meaning ("alternate stylesheet"). However, under RDFa processing, a document containing:
</p>
<pre><link rel="alternate stylesheet" href="/styles/mobile.css" title="Mobile">
</pre>
<p>will create the triples:
</p>
<pre><>
xhv:alternate </styles/mobile.css> ;
xhv:stylesheet </styles/mobile.css> ;
.
</pre>
<p>when this is not the intention of the author or the meaning understood by an HTML processor.
</p><p>The link relation <code>shortcut icon</code> is similarly treated specially by HTML processors (as a synonym for just <code>icon</code>).
</p>
<a name="3._Non-document_link_relations" id="3._Non-document_link_relations"></a><h4> <span class="mw-headline"> 3. Non-document link relations </span></h4>
<p>The <code>bookmark</code> HTML link relation specifies a relationship between the nearest ancestor section of the page and the linked resource. Assuming no other RDFa markup within the page, an RDFa processor will erroneously associate the bookmark with the HTML document as a whole instead.
</p><p>The example from the spec is:
</p>
<pre><body>
<h1>Example of permalinks</h1>
<div id="a">
<h2>First example</h2>
<p><a href="a.html" rel="bookmark">This</a> permalink applies to
only the content from the first H2 to the second H2. The DIV isn't
exactly that section, but it roughly corresponds to it.</p>
</div>
<h2>Second example</h2>
<article id="b">
<p><a href="b.html" rel="bookmark">This</a> permalink applies to
the outer ARTICLE element (which could be, e.g., a blog post).</p>
<article id="c">
<p><a href="c.html" rel="bookmark">This</a> permalink applies to
the inner ARTICLE element (which could be, e.g., a blog comment).</p>
</article>
</article>
</body>
</pre>
<p>This will create the triples:
</p>
<pre><>
xhv:bookmark <a.html> ;
xhv:bookmark <b.html> ;
xhv:bookmark <c.html> ;
.
</pre>
<p>which is not the intention of the document. The intention is more like:
</p>
<pre><#a> xhv:bookmark <a.html> .
<#b> xhv:bookmark <b.html> .
<#c> xhv:bookmark <c.html> .
</pre>
<p><br />
</p>
<a name="4._Alias_link_relations" id="4._Alias_link_relations"></a><h4> <span class="mw-headline"> 4. Alias link relations </span></h4>
<p>Some link relations are aliases for others. For example, <code>copyright</code> is an alias for <code>license</code> — it means the same thing and should be treated in the same way by HTML processors. However, an RDFa processor will not normalise to the standard term.
</p><p>In addition, alias link relations are not valid within HTML documents. If they are not recognised as terms by RDFa processing and a publisher were to use them (coupled with <code>@vocab</code> such that they did not have a prefix) then they would be creating an invalid HTML document.
</p>
<a name="5._Misinterpretation_due_to_.40vocab" id="5._Misinterpretation_due_to_.40vocab"></a><h4> <span class="mw-headline"> 5. Misinterpretation due to @vocab </span></h4>
<p>When there is a local default vocabulary indicated by a <code>@vocab</code> attribute, and the term is not listed in the set that is recognised by RDFa, RDFa will interpret a link relation based on that vocabulary, while HTML and microformats semantics will not be affected. For example, say that <code>security.example.org</code> had a vocabulary for security disclosures about vulnerabilities in operating systems, and the RDFa in the page looked like:
</p>
<pre><a vocab="http://security.example.org/"
rel="disclosure" href="/vulnerability/4389">...</a>
</pre>
<p>An RDFa 1.1 processor will interpret this as:
</p>
<pre><> <http://security.example.org/disclosure> </vulnerability/4389> .
</pre>
<p>whereas based on the extended list of link relations managed by the microformats wiki, an HTML processor will assume that the link is to a "list of patent disclosures or a particular patent disclosure itself made with respect to material for which such relation type is specified".
</p><p>Similarly, if the publisher had actually meant to use the HTML definition of the link relation, they might not realise that the in-scope <code>@vocab</code> attribute actually changes the meaning of the normal link relation for RDFa processors in a way that they didn't intend.
</p><p>There are also clashes with microformat link relations. For example:
</p>
<pre> <a vocab="http://purl.org/dc/terms/"
rel="date" resource="http://reference.data.gov.uk/id/day/2011-11-15">15th November 2011</a>
</pre>
<p>will result in a <code>dc:date</code> relationship under RDFa processing, but as a link to someone the author of the page is dating according to XFN.
</p><p>This arises due to the mismatch between the RDFa and HTML sets of link relations, so is a side-effect of the problem of working from different lists of link relations; if all HTML and microformats link relations were reserved, there wouldn't be a problem, but if only a subset are reserved, the above becomes a problem.
</p>
<a name="6._Misinterpretation_due_to_RDFa_context" id="6._Misinterpretation_due_to_RDFa_context"></a><h4> <span class="mw-headline"> 6. Misinterpretation due to RDFa context </span></h4>
<p>The subject of link relations will often be different under RDFa and HTML processing. The subject of the HTML link relationships is usually (but not always) the document, whereas the subject of a property in RDFa is determined through the <code>@about</code>, <code>@typeof</code> etc attributes. An example is:
</p>
<pre><figure about="picture.jpg">
<img src="picture.jpg">
<figcaption><a rel="license"
href="http://creativecommons.org/licenses/by-nc-nd/3.0/">CC-by-nc-nd</a></figcaption>
</figure>
</pre>
<p>In this case, an RDFa processor will associate the license with the image (<code>picture.jpg</code>) whereas an HTML processor will associate the license with the HTML page.
</p>
<a name="Potential_Bugs" id="Potential_Bugs"></a><h2> <span class="mw-headline"> Potential Bugs </span></h2>
<p>The following haven't been raised as bugs, but might be.
</p>
<a name="Structured_Values" id="Structured_Values"></a><h3> <span class="mw-headline"> Structured Values </span></h3>
<p>Unlike microformats and RDFa, microdata doesn't have any support for values that are HTML structures. This has previously been <a href="http://www.w3.org/Bugs/Public/show_bug.cgi?id=13468" class="external text" title="http://www.w3.org/Bugs/Public/show_bug.cgi?id=13468" rel="nofollow">raised as a bug</a> which was resolved without change.
</p><p>There are examples in the <a href="http://schema.org/WebPage" class="external text" title="http://schema.org/WebPage" rel="nofollow">schema.org documentation</a> which use HTML structures without seeming to realise that they will be ignored. The <code>breadcrumb</code> property is described as "A set of links that can help a user understand and navigate a website hierarchy." The sole example of it is:
</p>
<pre><div itemprop="breadcrumb">
<a href="category/books.html">Books</a> >
<a href="category/books-literature.html">Literature & Fiction</a> >
<a href="category/books-classics">Classics</a>
</div>
</pre>
<p>Microdata processing dictates that the value of the <code>breadcrumb</code> property in this case is <code>"Books Literature & Fiction Classics"</code>. The HTML content of this property isn't preserved by microdata processing, so it isn't actually a set of links but rather a textual description of the context of the page.
</p><p>Other examples of microdata publishers assuming that the markup within content will be carried through into the data gleaned from the page <a href="http://lists.w3.org/Archives/Public/public-html-data-tf/2011Oct/0185.html" class="external text" title="http://lists.w3.org/Archives/Public/public-html-data-tf/2011Oct/0185.html" rel="nofollow">have been identified</a>:
</p><p>From <a href="http://www.goodreads.com/book/show/14770.Neuromancer" class="external free" title="http://www.goodreads.com/book/show/14770.Neuromancer" rel="nofollow">http://www.goodreads.com/book/show/14770.Neuromancer</a>
</p>
<pre><div class="infoBoxRowItem" itemprop='awards'>
<a href="/award/show/9-hugo-award" class="award">Hugo Award for Best Novel (1985)</a>,
<a href="/award/show/23-nebula-award" class="award">Nebula Award for Best Novel (1984)</a>,
<a href="/award/show/326-philip-k-dick-award" class="award">Philip K. Dick Award (1984)</a>,
<a href="/award/show/1403-john-w-campbell-memorial-award" class="award">John W. Campbell Memorial Award Nominee for Best Science Fiction Novel (1985)</a>
</div>
</pre>
<p>From <a href="http://www.telegraph.co.uk/foodanddrink/restaurants/8824409/The-Butchers-Arms-Woolhope-Herefordshire-restaurant-review.html" class="external free" title="http://www.telegraph.co.uk/foodanddrink/restaurants/8824409/The-Butchers-Arms-Woolhope-Herefordshire-restaurant-review.html" rel="nofollow">http://www.telegraph.co.uk/foodanddrink/restaurants/8824409/The-Butchers-Arms-Woolhope-Herefordshire-restaurant-review.html</a>
</p>
<pre><div id="mainBodyArea" itemprop="reviewBody">
<div class="firstPar">
<p>
<strong>The Butchers Arms</strong>, Woolhope, Herefordshire HR1 4RF <br>
<strong>Contact </strong>01432 860281; food@butchersarmswoolhope.co.uk).<br>
<strong>Price </strong>Three courses with a couple of pints, or half a bottle of
wine and coffee: £35-40 per head
</p>
</div>
<div class="secondPar">
<p>
For a man whose first career was in advertising, Stephen Bull is no huge fan
of the hard sell. “It’s all much of a muchness, really,”
replied the owner of The Butchers Arms near Hereford when asked to recommend a
couple of his dishes. “All pretty mediocre.”
</p>
...
</div>
</div>
</pre>
<a name="URLs.2C_URIs_and_IRIs" id="URLs.2C_URIs_and_IRIs"></a><h3> <span class="mw-headline"> URLs, URIs and IRIs </span></h3>
<p>These HTML5 rules on URL resolution are <a href="http://www.w3.org/Bugs/Public/show_bug.cgi?id=14693#c1" class="external text" title="http://www.w3.org/Bugs/Public/show_bug.cgi?id=14693#c1" rel="nofollow">known to be broken</a> and will hopefully be fixed by reference to a revised IRI spec. This section documents the particular issues that arise from the current algorithm as it applies to RDFa processing.
</p><p>HTML5 uses the term URL throughout. The <a href="http://dev.w3.org/html5/spec/urls.html#valid-url" class="external text" title="http://dev.w3.org/html5/spec/urls.html#valid-url" rel="nofollow">definition that it uses for a valid URL</a> is:
</p>
<dl><dd>A URL is a valid URL if at least one of the following conditions holds:
</dd><dd>
<ul><li> The URL is a valid URI reference [RFC3986].
</li><li> The URL is a valid IRI reference and it has no query component. [RFC3987]
</li><li> The URL is a valid IRI reference and its query component contains no unescaped non-ASCII characters. [RFC3987]
</li><li> The URL is a valid IRI reference and the character encoding of the URL's Document is UTF-8 or a UTF-16 encoding. [RFC3987]
</li></ul>
</dd></dl>
<p>This allows IRIs to appear within documents so long as the character encoding of the document in which the URL is found is UTF-8 or UTF-16.
</p><p>DOM attributes whose values reflect HTML attributes whose values are URLs (such as <code>@href</code>, <code>@src</code>, <code>@itemid</code> and so on) are then resolved through the <a href="http://dev.w3.org/html5/spec/urls.html#resolving-urls" class="external text" title="http://dev.w3.org/html5/spec/urls.html#resolving-urls" rel="nofollow">HTML5 resolution algorithm</a>. This turns all IRIs into URIs by percent-encoding characters that aren't allowed in URIs and performs resolution based on URI rules from <a href="http://tools.ietf.org/html/rfc3986" class="external text" title="http://tools.ietf.org/html/rfc3986" rel="nofollow">RFC3986</a>. The results at the moment are always valid URIs.
</p><p>In <a href="http://www.w3.org/2010/02/rdfa/drafts/2011/ED-rdfa-core-20111020/#s_curieprocessing" class="external text" title="http://www.w3.org/2010/02/rdfa/drafts/2011/ED-rdfa-core-20111020/#s_curieprocessing" rel="nofollow">RDFa-Core</a>, resolution of IRIs in all cases is done through the standard <b>IRI</b> resolution algorithm from <a href="http://www.ietf.org/rfc/rfc3987.txt" class="external text" title="http://www.ietf.org/rfc/rfc3987.txt" rel="nofollow">RFC3987</a>. The RDF restrictions on URIs used to identify resources is documented in its <a href="http://www.w3.org/TR/rdf-concepts/#section-Graph-URIref" class="external text" title="http://www.w3.org/TR/rdf-concepts/#section-Graph-URIref" rel="nofollow">abstract semantics</a>. This currently normalises IRIs to URIs by percent-encoding non-ASCII characters, so currently the effective RDF generated from RDFa will contain URIs.
</p><p>The new draft of the <a href="http://www.w3.org/TR/rdf11-concepts/#section-IRI-Vocabulary" class="external text" title="http://www.w3.org/TR/rdf11-concepts/#section-IRI-Vocabulary" rel="nofollow">RDF 1.1 abstract semantics</a> allows IRIs to be used as identifiers for resources. For future RDF 1.1 implementations, the effective RDF generated from RDFa will contain IRIs. Importantly, since these IRIs are being used as identifiers, their equivalence will be assessed through <a href="http://tools.ietf.org/html/rfc3987#section-5" class="external text" title="http://tools.ietf.org/html/rfc3987#section-5" rel="nofollow">string-equivalence</a> rather than by first normalising to URIs and then comparing.
</p><p>This mismatch between HTML5 and RDF is a problem for people using RDFa 1.0 and 1.1 because the resolution of those URL attributes defined in HTML5 (<code>@href</code>, <code>@src</code> etc) differs from the resolution of URL attributes defined by RDFa (<code>@resource</code>, <code>@typeof</code> etc). Specifically, normalising IRIs to URIs and then resolving according to RFC3986 (URI resolution), which is what HTML5 does, might not (?) produce the same results as resolving IRIs according to RFC3987 (IRI resolution) and then normalising to a URI, which is what RDFa does.
</p><p>It is also a problem when people are using RDFa and microdata side-by-side (or switching between them) because the URIs they use within <code>@itemid</code>, <code>@itemtype</code> and <code>@itemprop</code> will not be handled in the same way as those within <code>@about</code>, <code>@typeof</code> and <code>@property</code>, resulting in slightly different data in the two cases.
</p><p>These discrepancies will be worse when RDF 1.1 is standardised and used with HTML+RDFa, as at that point some of the identifiers generated from HTML+RDFa processing will be normalised to URIs (those in <code>@href</code>, <code>@src</code> etc attributes) while others will be IRIs (those in <code>@resource</code>, <code>@typeof</code>, <code>@property</code> etc attributes).
</p>
<!--
NewPP limit report
Preprocessor node count: 49/1000000
Post-expand include size: 0/2097152 bytes
Template argument size: 0/2097152 bytes
Expensive parser function count: 0/100
-->
<!-- Saved in parser cache with key wikidb-esw_:pcache:idhash:6022-0!1!0!!en!2!edit=0 and timestamp 20120115170537 -->
<div id='RDFa' about='http://www.w3.org/wiki/HTML_Data_Improvements' xmlns:wiki_1='http://www.w3.org/wiki/index.php/'xmlns:wiki_1_property='http://www.w3.org/wiki/index.php/Property:'xmlns:wiki_1_category='http://www.w3.org/wiki/index.php/Category:' typeof='wiki_1_categoryHTML Data TF'>
<div property='wiki_1_property:Modification_date' content='19 November 2011 20:49:49'></div>
</div><div style='display:none' ></div><div class="printfooter">
Retrieved from "<a href="http://www.w3.org/wiki/HTML_Data_Improvements">http://www.w3.org/wiki/HTML_Data_Improvements</a>"</div>
<div id='catlinks' class='catlinks'><div id="mw-normal-catlinks"><a href="/wiki/Special:Categories" title="Special:Categories">Category</a>: <span dir='ltr'><a href="/wiki/index.php?title=Category:HTML_Data_TF&action=edit&redlink=1" class="new" title="Category:HTML Data TF (page does not exist)">HTML Data TF</a></span></div></div> <!-- end content -->
<div class="visualClear"></div>
</div>
</div>
</div>
<div id="column-one">
<div id="p-cactions" class="portlet">
<h5>Views</h5>
<div class="pBody">
<ul>
<li id="ca-nstab-main" class="selected"><a href="/wiki/HTML_Data_Improvements" title="View the content page [c]" accesskey="c">Page</a></li>
<li id="ca-talk" class="new"><a href="/wiki/index.php?title=Talk:HTML_Data_Improvements&action=edit&redlink=1" title="Discussion about the content page [t]" accesskey="t">Discussion</a></li>
<li id="ca-viewsource"><a href="/wiki/index.php?title=HTML_Data_Improvements&action=edit" title="This page is protected. You can view its source [e]" accesskey="e">View source</a></li>
<li id="ca-history"><a href="/wiki/index.php?title=HTML_Data_Improvements&action=history" title="Past revisions of this page [h]" accesskey="h">History</a></li> </ul>
</div>
</div>
<div class="portlet" id="p-personal">
<h5>Personal tools</h5>
<div class="pBody">
<ul>
<li id="pt-login"><a href="/wiki/index.php?title=Special:UserLogin&returnto=HTML_Data_Improvements" title="You are encouraged to log in; however, it is not mandatory [o]" accesskey="o">Log in</a></li>
</ul>
</div>
</div>
<div class="portlet" id="p-logo">
<a style="background-image: url(/Icons/w3c_home);" href="/wiki/Main_Page" title="Visit the main page [z]" accesskey="z"></a>
</div>
<script type="text/javascript"> if (window.isMSIE55) fixalpha(); </script>
<div class='generated-sidebar portlet' id='p-navigation'>
<h5>Navigation</h5>
<div class='pBody'>
<ul>
<li id="n-mainpage"><a href="/wiki/Main_Page" title="Visit the main page">Main Page</a></li>
<li id="n-Browse-categories"><a href="/wiki/Special:Categories">Browse categories</a></li>
<li id="n-recentchanges"><a href="/wiki/Special:RecentChanges" title="The list of recent changes in the wiki [r]" accesskey="r">Recent changes</a></li>
<li id="n-Help"><a href="http://www.mediawiki.org/wiki/Help:Contents">Help</a></li>
</ul>
</div>
</div>
<div id="p-search" class="portlet">
<h5><label for="searchInput">Search</label></h5>
<div id="searchBody" class="pBody">
<form action="/wiki/index.php" id="searchform"><div>
<input type='hidden' name="title" value="Special:Search"/>
<input id="searchInput" name="search" type="text" title="Search W3C Wiki [f]" accesskey="f" value="" />
<input type='submit' name="go" class="searchButton" id="searchGoButton" value="Go" title="Go to a page with this exact name if exists" />
<input type='submit' name="fulltext" class="searchButton" id="mw-searchButton" value="Search" title="Search the pages for this text" />
</div></form>
</div>
</div>
<div class="portlet" id="p-tb">
<h5>Toolbox</h5>
<div class="pBody">
<ul>
<li id="t-whatlinkshere"><a href="/wiki/Special:WhatLinksHere/HTML_Data_Improvements" title="List of all wiki pages that link here [j]" accesskey="j">What links here</a></li>
<li id="t-recentchangeslinked"><a href="/wiki/Special:RecentChangesLinked/HTML_Data_Improvements" title="Recent changes in pages linked from this page [k]" accesskey="k">Related changes</a></li>
<li id="t-specialpages"><a href="/wiki/Special:SpecialPages" title="List of all special pages [q]" accesskey="q">Special pages</a></li>
<li id="t-print"><a href="/wiki/index.php?title=HTML_Data_Improvements&printable=yes" rel="alternate" title="Printable version of this page [p]" accesskey="p">Printable version</a></li> <li id="t-permalink"><a href="/wiki/index.php?title=HTML_Data_Improvements&oldid=55626" title="Permanent link to this revision of the page">Permanent link</a></li><li id="t-smwbrowselink"><a href="/wiki/Special:Browse/HTML_Data_Improvements" title="Special:Browse/HTML Data Improvements">Browse properties</a></li> </ul>
</div>
</div>
</div><!-- end of the left (by default at least) column -->
<div class="visualClear"></div>
<div id="footer">
<div id="f-poweredbyico"><a href="http://www.mediawiki.org/"><img src="/wiki/skins/common/images/poweredby_mediawiki_88x31.png" alt="Powered by MediaWiki" /></a></div>
<ul id="f-list">
<li id="lastmod"> This page was last modified on 19 November 2011, at 20:51.</li>
<li id="viewcount">This page has been accessed 7,871 times.</li>
<li id="privacy"><a href="/wiki/W3C_Wiki:Privacy_policy" title="W3C Wiki:Privacy policy">Privacy policy</a></li>
<li id="about"><a href="/wiki/W3C_Wiki:About" title="W3C Wiki:About">About W3C Wiki</a></li>
<li id="disclaimer"><a href="/wiki/W3C_Wiki:General_disclaimer" title="W3C Wiki:General disclaimer">Disclaimers</a></li>
</ul>
</div>
</div>
<script type="text/javascript">if (window.runOnloadHook) runOnloadHook();</script>
<!-- Served in 0.245 secs. --></body></html>