index.html
141 KB
<!--?xml version='1.0' encoding='UTF-8'?--><!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html
xmlns="http://www.w3.org/1999/xhtml"
dir="ltr">
<head>
<title>HTML Data Guide</title>
<meta content="text/html; charset=UTF-8" http-equiv="Content-Type" />
<!--
=== NOTA BENE === For the three scripts below, if your spec resides on dev.w3 you can check them
out in the same tree and use relative links so that they'll work offline, -->
<style type="text/css">/*****************************************************************
* ReSpec CSS
* Robin Berjon (robin at berjon dot com)
* v0.05 - 2009-07-31
*****************************************************************/
/* --- INLINES --- */em.rfc2119 {
text-transform: lowercase;
font-variant: small-caps;
font-style: normal;
color: #900;}
h1 acronym, h2 acronym, h3 acronym, h4 acronym, h5 acronym, h6 acronym, a acronym,h1 abbr, h2 abbr, h3 abbr, h4 abbr, h5 abbr, h6 abbr, a abbr {
border: none;}
dfn {
font-weight: bold;}
a.internalDFN {
color: inherit;
border-bottom: 1px solid #99c;
text-decoration: none;}
a.externalDFN {
color: inherit;
border-bottom: 1px dotted #ccc;
text-decoration: none;}
a.bibref {
text-decoration: none;}
code {
color: #ff4500;}
/* --- WEB IDL --- */pre.idl {
border-top: 1px solid #90b8de;
border-bottom: 1px solid #90b8de;
padding: 1em;
line-height: 120%;}
pre.idl::before {
content: "WebIDL";
display: block;
width: 150px;
background: #90b8de;
color: #fff;
font-family: initial;
padding: 3px;
font-weight: bold;
margin: -1em 0 1em -1em;}
.idlType {
color: #ff4500;
font-weight: bold;
text-decoration: none;}
/*.idlModule*//*.idlModuleID*//*.idlInterface*/.idlInterfaceID, .idlDictionaryID {
font-weight: bold;
color: #005a9c;}
.idlSuperclass {
font-style: italic;
color: #005a9c;}
/*.idlAttribute*/.idlAttrType, .idlFieldType, .idlMemberType {
color: #005a9c;}.idlAttrName, .idlFieldName, .idlMemberName {
color: #ff4500;}.idlAttrName a, .idlFieldName a, .idlMemberName a {
color: #ff4500;
border-bottom: 1px dotted #ff4500;
text-decoration: none;}
/*.idlMethod*/.idlMethType {
color: #005a9c;}.idlMethName {
color: #ff4500;}.idlMethName a {
color: #ff4500;
border-bottom: 1px dotted #ff4500;
text-decoration: none;}
/*.idlParam*/.idlParamType {
color: #005a9c;}.idlParamName {
font-style: italic;}
.extAttr {
color: #666;}
/*.idlConst*/.idlConstType {
color: #005a9c;}.idlConstName {
color: #ff4500;}.idlConstName a {
color: #ff4500;
border-bottom: 1px dotted #ff4500;
text-decoration: none;}
/*.idlException*/.idlExceptionID {
font-weight: bold;
color: #c00;}
.idlTypedefID, .idlTypedefType {
color: #005a9c;}
.idlRaises, .idlRaises a.idlType, .idlRaises a.idlType code, .excName a, .excName a code {
color: #c00;
font-weight: normal;}
.excName a {
font-family: monospace;}
.idlRaises a.idlType, .excName a.idlType {
border-bottom: 1px dotted #c00;}
.excGetSetTrue, .excGetSetFalse, .prmNullTrue, .prmNullFalse, .prmOptTrue, .prmOptFalse {
width: 45px;
text-align: center;}.excGetSetTrue, .prmNullTrue, .prmOptTrue { color: #0c0; }.excGetSetFalse, .prmNullFalse, .prmOptFalse { color: #c00; }
.idlImplements a {
font-weight: bold;}
dl.attributes, dl.methods, dl.constants, dl.fields, dl.dictionary-members {
margin-left: 2em;}
.attributes dt, .methods dt, .constants dt, .fields dt, .dictionary-members dt {
font-weight: normal;}
.attributes dt code, .methods dt code, .constants dt code, .fields dt code, .dictionary-members dt code {
font-weight: bold;
color: #000;
font-family: monospace;}
.attributes dt code, .fields dt code, .dictionary-members dt code {
background: #ffffd2;}
.attributes dt .idlAttrType code, .fields dt .idlFieldType code, .dictionary-members dt .idlMemberType code {
color: #005a9c;
background: transparent;
font-family: inherit;
font-weight: normal;
font-style: italic;}
.methods dt code {
background: #d9e6f8;}
.constants dt code {
background: #ddffd2;}
.attributes dd, .methods dd, .constants dd, .fields dd, .dictionary-members dd {
margin-bottom: 1em;}
table.parameters, table.exceptions {
border-spacing: 0;
border-collapse: collapse;
margin: 0.5em 0;
width: 100%;}table.parameters { border-bottom: 1px solid #90b8de; }table.exceptions { border-bottom: 1px solid #deb890; }
.parameters th, .exceptions th {
color: #fff;
padding: 3px 5px;
text-align: left;
font-family: initial;
font-weight: normal;
text-shadow: #666 1px 1px 0;}.parameters th { background: #90b8de; }.exceptions th { background: #deb890; }
.parameters td, .exceptions td {
padding: 3px 10px;
border-top: 1px solid #ddd;
vertical-align: top;}
.parameters tr:first-child td, .exceptions tr:first-child td {
border-top: none;}
.parameters td.prmName, .exceptions td.excName, .exceptions td.excCodeName {
width: 100px;}
.parameters td.prmType {
width: 120px;}
table.exceptions table {
border-spacing: 0;
border-collapse: collapse;
width: 100%;}
/* --- TOC --- */.toc a {
text-decoration: none;}
a .secno {
color: #000;}
/* --- TABLE --- */table.simple {
border-spacing: 0;
border-collapse: collapse;
border-bottom: 3px solid #005a9c;}
.simple th {
background: #005a9c;
color: #fff;
padding: 3px 5px;
text-align: left;}
.simple th[scope="row"] {
background: inherit;
color: inherit;
border-top: 1px solid #ddd;}
.simple td {
padding: 3px 10px;
border-top: 1px solid #ddd;}
.simple tr:nth-child(even) {
background: #f0f6ff;}
/* --- DL --- */.section dd > p:first-child {
margin-top: 0;}
.section dd > p:last-child {
margin-bottom: 0;}
.section dd {
margin-bottom: 1em;}
.section dl.attrs dd, .section dl.eldef dd {
margin-bottom: 0;}
/* --- EXAMPLES --- */pre.example {
border-top: 1px solid #ff4500;
border-bottom: 1px solid #ff4500;
padding: 1em;
margin-top: 1em;}
pre.example::before {
content: "Example";
display: block;
width: 150px;
background: #ff4500;
color: #fff;
font-family: initial;
padding: 3px;
font-weight: bold;
margin: -1em 0 1em -1em;}
/* --- EDITORIAL NOTES --- */.issue {
padding: 1em;
margin: 1em 0em 0em;
border: 1px solid #f00;
background: #ffc;}
.issue::before {
content: "Issue";
display: block;
width: 150px;
margin: -1.5em 0 0.5em 0;
font-weight: bold;
border: 1px solid #f00;
background: #fff;
padding: 3px 1em;}
.note {
margin: 1em 0em 0em;
padding: 1em;
border: 2px solid #cff6d9;
background: #e2fff0;}
.note::before {
content: "Note";
display: block;
width: 150px;
margin: -1.5em 0 0.5em 0;
font-weight: bold;
border: 1px solid #cff6d9;
background: #fff;
padding: 3px 1em;}
/* --- Best Practices --- */div.practice {
border: solid #bebebe 1px;
margin: 2em 1em 1em 2em;}
span.practicelab {
margin: 1.5em 0.5em 1em 1em;
font-weight: bold;
font-style: italic;}
span.practicelab { background: #dfffff; }
span.practicelab {
position: relative;
padding: 0 0.5em;
top: -1.5em;}
p.practicedesc {
margin: 1.5em 0.5em 1em 1em;}
@media screen {
p.practicedesc {
position: relative;
top: -2em;
padding: 0;
margin: 1.5em 0.5em -1em 1em;
}}
/* --- SYNTAX HIGHLIGHTING --- */pre.sh_sourceCode {
background-color: white;
color: black;
font-style: normal;
font-weight: normal;}
pre.sh_sourceCode .sh_keyword { color: #005a9c; font-weight: bold; } /* language keywords */pre.sh_sourceCode .sh_type { color: #666; } /* basic types */pre.sh_sourceCode .sh_usertype { color: teal; } /* user defined types */pre.sh_sourceCode .sh_string { color: red; font-family: monospace; } /* strings and chars */pre.sh_sourceCode .sh_regexp { color: orange; font-family: monospace; } /* regular expressions */pre.sh_sourceCode .sh_specialchar { color: #ffc0cb; font-family: monospace; } /* e.g., \n, \t, \\ */pre.sh_sourceCode .sh_comment { color: #A52A2A; font-style: italic; } /* comments */pre.sh_sourceCode .sh_number { color: purple; } /* literal numbers */pre.sh_sourceCode .sh_preproc { color: #00008B; font-weight: bold; } /* e.g., #include, import */pre.sh_sourceCode .sh_symbol { color: blue; } /* e.g., *, + */pre.sh_sourceCode .sh_function { color: black; font-weight: bold; } /* function calls and declarations */pre.sh_sourceCode .sh_cbracket { color: red; } /* block brackets (e.g., {, }) */pre.sh_sourceCode .sh_todo { font-weight: bold; background-color: #00FFFF; } /* TODO and FIXME */
/* Predefined variables and functions (for instance glsl) */pre.sh_sourceCode .sh_predef_var { color: #00008B; }pre.sh_sourceCode .sh_predef_func { color: #00008B; font-weight: bold; }
/* for OOP */pre.sh_sourceCode .sh_classname { color: teal; }
/* line numbers (not yet implemented) */pre.sh_sourceCode .sh_linenum { display: none; }
/* Internet related */pre.sh_sourceCode .sh_url { color: blue; text-decoration: underline; font-family: monospace; }
/* for ChangeLog and Log files */pre.sh_sourceCode .sh_date { color: blue; font-weight: bold; }pre.sh_sourceCode .sh_time, pre.sh_sourceCode .sh_file { color: #00008B; font-weight: bold; }pre.sh_sourceCode .sh_ip, pre.sh_sourceCode .sh_name { color: #006400; }
/* for Prolog, Perl... */pre.sh_sourceCode .sh_variable { color: #006400; }
/* for LaTeX */pre.sh_sourceCode .sh_italics { color: #006400; font-style: italic; }pre.sh_sourceCode .sh_bold { color: #006400; font-weight: bold; }pre.sh_sourceCode .sh_underline { color: #006400; text-decoration: underline; }pre.sh_sourceCode .sh_fixed { color: green; font-family: monospace; }pre.sh_sourceCode .sh_argument { color: #006400; }pre.sh_sourceCode .sh_optionalargument { color: purple; }pre.sh_sourceCode .sh_math { color: orange; }pre.sh_sourceCode .sh_bibtex { color: blue; }
/* for diffs */pre.sh_sourceCode .sh_oldfile { color: orange; }pre.sh_sourceCode .sh_newfile { color: #006400; }pre.sh_sourceCode .sh_difflines { color: blue; }
/* for css */pre.sh_sourceCode .sh_selector { color: purple; }pre.sh_sourceCode .sh_property { color: blue; }pre.sh_sourceCode .sh_value { color: #006400; font-style: italic; }
/* other */pre.sh_sourceCode .sh_section { color: black; font-weight: bold; }pre.sh_sourceCode .sh_paren { color: red; }pre.sh_sourceCode .sh_attribute { color: #006400; }
</style><link charset="utf-8" type="text/css" rel="stylesheet" href="http://www.w3.org/StyleSheets/TR/W3C-WD" />
</head>
<body style="display: inherit; ">
<div class="head">
<p><a href="http://www.w3.org/"><img width="72" height="48" alt="W3C" src="http://www.w3.org/Icons/w3c_home" /></a></p>
<h1 id="title" class="title">HTML Data Guide</h1>
<h2 id="w3c-working-draft-12-january-2012"><acronym title="World Wide Web Consortium">W3C</acronym>
Working Draft 12 January 2012</h2>
<dl>
<dt>This version:</dt>
<dd><a href="http://www.w3.org/TR/2012/WD-html-data-guide-20120112/">http://www.w3.org/TR/2012/WD-html-data-guide-20120112/</a></dd>
<dt>Latest published version:</dt>
<dd><a href="http://www.w3.org/TR/html-data-guide/">http://www.w3.org/TR/html-data-guide/</a></dd>
<dt>Latest editor's draft:</dt>
<dd><a href="https://dvcs.w3.org/hg/htmldata/raw-file/default/html-data-guide/index.html">https://dvcs.w3.org/hg/htmldata/raw-file/default/html-data-guide/index.html</a></dd>
<dt>Editor:</dt>
<dd><a href="http://www.jenitennison.com/blog/">Jeni Tennison</a>,
Independent</dd>
</dl>
<p class="copyright"><a href="http://www.w3.org/Consortium/Legal/ipr-notice#Copyright">Copyright</a>
© 2012 <a href="http://www.w3.org/"><acronym title="World Wide Web Consortium">W3C</acronym></a><sup>®</sup>
(<a href="http://www.csail.mit.edu/"><acronym title="Massachusetts Institute of Technology">MIT</acronym></a>,
<a href="http://www.ercim.eu/"><acronym title="European Research Consortium for Informatics and Mathematics">ERCIM</acronym></a>,
<a href="http://www.keio.ac.jp/">Keio</a>), All Rights Reserved. <acronym
title="World Wide Web Consortium">W3C</acronym>
<a href="http://www.w3.org/Consortium/Legal/ipr-notice#Legal_Disclaimer">liability</a>,
<a href="http://www.w3.org/Consortium/Legal/ipr-notice#W3C_Trademarks">trademark</a>
and <a href="http://www.w3.org/Consortium/Legal/copyright-documents">document
use</a> rules apply.</p>
<hr /></div>
<div class="introductory section" id="abstract">
<h2>Abstract</h2>
<p> Microformats, RDFa and microdata all enable consumers to extract data
from HTML pages. This data may be embedded within enhanced search engine
results, exposed to users through browser extensions, aggregated across
websites or used by scripts running within those HTML pages. </p>
<p> This guide aims to help publishers and consumers of HTML data use it
well. With several <a class="internalDFN" href="#dfn-syntax" title="syntax">syntaxes</a>
and <a class="internalDFN" href="#dfn-vocabulary" title="vocabulary">vocabularies</a>
to choose from, it provides guidance about how to decide which meets the
publisher's or consumer's needs. It discusses when it is necessary to
mix syntaxes and vocabularies and how to publish and consume data that
uses multiple formats. It describes how to create vocabularies that can
be used in multiple syntaxes and general best practices about the
publication and consumption of HTML data. </p>
</div>
<div class="introductory section" id="sotd">
<h2>Status of This Document</h2>
<p><em>This section describes the status of this document at the time of
its publication. Other documents may supersede this document. A list
of current <acronym title="World Wide Web Consortium">W3C</acronym>
publications and the latest revision of this technical report can be
found in the <a href="http://www.w3.org/TR/"><acronym title="World Wide Web Consortium">W3C</acronym>
technical reports index</a> at http://www.w3.org/TR/.</em></p>
<p>This document was published by the <a href="http://www.w3.org/2001/sw/interest/">HTML
Data Task Force, Semantic Web Interest Group</a> as a First Public
Working Draft. If you wish to make comments regarding this document,
please send them to <a href="mailto:public-html-data-tf@w3.org">public-html-data-tf@w3.org</a>
(<a href="mailto:public-html-data-tf-request@w3.org?subject=subscribe">subscribe</a>,
<a href="http://lists.w3.org/Archives/Public/public-html-data-tf/">archives</a>).
All feedback is welcome.</p>
<p>Publication as a Working Draft does not imply endorsement by the <acronym
title="World Wide Web Consortium">W3C</acronym>
Membership. This is a draft document and may be updated, replaced or
obsoleted by other documents at any time. It is inappropriate to cite
this document as other than work in progress. The disclosure obligations
of the Participants of this group are described in the <a href="http://www.w3.org/2003/12/swa/swig-charter">charter</a>.<br />
</p>
</div>
<div class="section" id="toc">
<h2 class="introductory">Table of Contents</h2>
<ul class="toc">
<li class="tocline"><a class="tocxref" href="#introduction"><span class="secno">1.
</span>Introduction</a>
<ul class="toc">
<li class="tocline"><a class="tocxref" href="#scope"><span class="secno">1.1
</span>Scope</a></li>
<li class="tocline"><a class="tocxref" href="#terminology"><span class="secno">1.2
</span>Terminology</a></li>
</ul>
</li>
<li class="tocline"><a class="tocxref" href="#publishers"><span class="secno">2.
</span>Publishers</a>
<ul class="toc">
<li class="tocline"><a class="tocxref" href="#choosing-a-publishing-format"><span
class="secno">2.1
</span>Choosing a Publishing Format</a>
<ul class="toc">
<li class="tocline"><a class="tocxref" href="#syntax-considerations"><span
class="secno">2.1.1
</span>Syntax Considerations</a></li>
<li class="tocline"><a class="tocxref" href="#vocabulary-considerations"><span
class="secno">2.1.2
</span>Vocabulary Considerations</a></li>
<li class="tocline"><a class="tocxref" href="#usability-considerations"><span
class="secno">2.1.3
</span>Usability Considerations</a></li>
</ul>
</li>
<li class="tocline"><a class="tocxref" href="#publishing-in-multiple-formats"><span
class="secno">2.2
</span>Publishing in Multiple Formats</a>
<ul class="toc">
<li class="tocline"><a class="tocxref" href="#mixing-vocabularies"><span
class="secno">2.2.1
</span>Mixing Vocabularies</a>
<ul class="toc">
<li class="tocline"><a class="tocxref" href="#mixing-vocabularies-in-microformats"><span
class="secno">2.2.1.1
</span>Mixing Vocabularies in Microformats</a></li>
<li class="tocline"><a class="tocxref" href="#mixing-vocabularies-in-rdfa"><span
class="secno">2.2.1.2
</span>Mixing Vocabularies in RDFa</a></li>
<li class="tocline"><a class="tocxref" href="#mixing-vocabularies-microdata"><span
class="secno">2.2.1.3
</span>Mixing Vocabularies in Microdata</a></li>
</ul>
</li>
<li class="tocline"><a class="tocxref" href="#mixing-syntaxes"><span
class="secno">2.2.2
</span>Mixing Syntaxes</a>
<ul class="toc">
<li class="tocline"><a class="tocxref" href="#dates-and-times"><span
class="secno">2.2.2.1
</span>Dates and Times</a></li>
<li class="tocline"><a class="tocxref" href="#link-relations"><span
class="secno">2.2.2.2
</span>Link relations</a></li>
<li class="tocline"><a class="tocxref" href="#microdata-and-rdfa-equivalencies"><span
class="secno">2.2.2.3
</span>Microdata and RDFa Equivalencies</a></li>
<li class="tocline"><a class="tocxref" href="#properties-within-links"><span
class="secno">2.2.2.4
</span>Properties Within Links</a></li>
<li class="tocline"><a class="tocxref" href="#datatypes"><span
class="secno">2.2.2.5
</span>Datatypes</a></li>
<li class="tocline"><a class="tocxref" href="#iris"><span class="secno">2.2.2.6
</span>IRIs</a></li>
</ul>
</li>
</ul>
</li>
<li class="tocline"><a class="tocxref" href="#good-publishing-practice"><span
class="secno">2.3
</span>Good Publishing Practice</a>
<ul class="toc">
<li class="tocline"><a class="tocxref" href="#valid-html"><span
class="secno">2.3.1
</span>Valid HTML</a></li>
<li class="tocline"><a class="tocxref" href="#context-independence"><span
class="secno">2.3.2
</span>Context Independence</a></li>
<li class="tocline"><a class="tocxref" href="#testing"><span class="secno">2.3.3
</span>Testing</a></li>
<li class="tocline"><a class="tocxref" href="#clear-licensing"><span
class="secno">2.3.4
</span>Clear Licensing</a></li>
</ul>
</li>
</ul>
</li>
<li class="tocline"><a class="tocxref" href="#consumers"><span class="secno">3.
</span>Consumers</a>
<ul class="toc">
<li class="tocline"><a class="tocxref" href="#choosing-a-syntax-to-consume"><span
class="secno">3.1
</span>Choosing a Syntax to Consume</a>
<ul class="toc">
<li class="tocline"><a class="tocxref" href="#application-considerations"><span
class="secno">3.1.1
</span>Application Considerations</a></li>
<li class="tocline"><a class="tocxref" href="#tooling-considerations"><span
class="secno">3.1.2
</span>Tooling Considerations</a>
<ul class="toc">
<li class="tocline"><a class="tocxref" href="#microdata-microformats-2-data-model"><span
class="secno">3.1.2.1
</span>Microdata/Microformats-2 Data Model</a></li>
<li class="tocline"><a class="tocxref" href="#rdf-data-model"><span
class="secno">3.1.2.2
</span>RDF Data Model</a></li>
</ul>
</li>
<li class="tocline"><a class="tocxref" href="#data-model-considerations"><span
class="secno">3.1.3
</span>Data Model Considerations</a></li>
<li class="tocline"><a class="tocxref" href="#usability-considerations-1"><span
class="secno">3.1.4
</span>Usability Considerations</a></li>
</ul>
</li>
<li class="tocline"><a class="tocxref" href="#consuming-multiple-formats"><span
class="secno">3.2
</span>Consuming Pages with Multiple Formats</a></li>
<li class="tocline"><a class="tocxref" href="#good-consumption-practice"><span
class="secno">3.3
</span>Good Consumption Practice</a></li>
</ul>
</li>
<li class="tocline"><a class="tocxref" href="#vocabulary-authors"><span
class="secno">4.
</span>Vocabulary Authors</a>
<ul class="toc">
<li class="tocline"><a class="tocxref" href="#extending-vocabularies"><span
class="secno">4.1
</span>Extending Vocabularies</a>
<ul class="toc">
<li class="tocline"><a class="tocxref" href="#extending-microformats"><span
class="secno">4.1.1
</span>Extending Microformats</a></li>
<li class="tocline"><a class="tocxref" href="#extending-rdf-vocabularies"><span
class="secno">4.1.2
</span>Extending RDF Vocabularies</a></li>
<li class="tocline"><a class="tocxref" href="#extending-microdata-vocabularies"><span
class="secno">4.1.3
</span>Extending Microdata Vocabularies</a></li>
</ul>
</li>
<li class="tocline"><a class="tocxref" href="#designing-vocabularies"><span
class="secno">4.2
</span>Designing Vocabularies</a>
<ul class="toc">
<li class="tocline"><a class="tocxref" href="#syntax-specific-requirements"><span
class="secno">4.2.1
</span>Syntax-Specific Requirements</a>
<ul class="toc">
<li class="tocline"><a class="tocxref" href="#microformat-vocabularies"><span
class="secno">4.2.1.1
</span>Microformat Vocabularies</a></li>
<li class="tocline"><a class="tocxref" href="#microdata-vocabularies"><span
class="secno">4.2.1.2
</span>Microdata Vocabularies</a></li>
<li class="tocline"><a class="tocxref" href="#rdfa-vocabularies"><span
class="secno">4.2.1.3
</span>RDFa Vocabularies</a></li>
</ul>
</li>
<li class="tocline"><a class="tocxref" href="#syntax-neutral-vocabularies"><span
class="secno">4.2.2
</span>Syntax-Neutral Vocabularies</a></li>
<li class="tocline"><a class="tocxref" href="#good-vocabulary-design-practices"><span
class="secno">4.2.3
</span>Good Vocabulary Design Practices</a></li>
</ul>
</li>
</ul>
</li>
<li class="tocline"><a class="tocxref" href="#acknowledgements"><span class="secno">A.
</span>Acknowledgements</a></li>
<li class="tocline"><a class="tocxref" href="#multiple-types-microdata"><span
class="secno">B.
</span>Multiple Item Types in Microdata</a>
<ul class="toc">
<li class="tocline"><a class="tocxref" href="#mixing-vocabularies-using-a-type-property"><span
class="secno">B.1
</span>Mixing Vocabularies using a Type Property</a></li>
<li class="tocline"><a class="tocxref" href="#mixing-vocabularies-using-repeated-content"><span
class="secno">B.2
</span>Mixing Vocabularies using Repeated Content</a></li>
</ul>
</li>
<li class="tocline"><a class="tocxref" href="#references"><span class="secno">C.
</span>References</a>
<ul class="toc">
<li class="tocline"><a class="tocxref" href="#normative-references"><span
class="secno">C.1
</span>Normative references</a></li>
<li class="tocline"><a class="tocxref" href="#informative-references"><span
class="secno">C.2
</span>Informative references</a></li>
</ul>
</li>
</ul>
</div>
<div class="section" id="introduction">
<!-- OddPage -->
<h2><span class="secno">1. </span>Introduction</h2>
<p> The first formal methods of embedding data within HTML pages were
those pioneered by the microformats community. These sought to
regularise the existing use of semantic classes and link relations
within HTML markup for common subject areas such as people,
organisations and events. </p>
<p> Since then, the practice of embedding HTML data within web pages has
gradually grown, particularly bolstered by search engines using embedded
data to supplement the appearance of entries within their result pages
and by the open linked data community seeking to bridge the gap between
documents and data on the web. HTML data is used in a variety of ways,
as evinced by the <a href="http://lists.w3.org/Archives/Public/public-html/2009May/0207.html">use
cases collected during the design of microdata</a>. Consumers of HTML
data include: </p>
<ul>
<li>scripting libraries</li>
<li>browsers and browser extensions</li>
<li>general-purpose search engines</li>
<li>vertical or domain-specific search engines</li>
<li>data reusers known and unknown to the publisher of the data</li>
</ul>
<p> There are currently three main <a class="internalDFN" href="#dfn-syntax"
title="syntax">syntaxes</a>
for embedding data within HTML pages: </p>
<dl>
<dt><a href="http://microformats.org">microformats</a></dt>
<dd>microformats use <code>@class</code>, <code>@rel</code> and other
attributes to encode data using standard HTML markup, and can be used
with other markup languages that have <code>@class</code> attributes.
Traditionally, different microformat <a class="internalDFN" href="#dfn-vocabulary"
title="vocabulary">vocabularies</a>
have followed different parsing rules, but <a href="http://microformats.org/wiki/microformats-2">microformats-2</a>
provides a standard parsing algorithm.</dd>
<dt><a href="http://www.w3.org/TR/rdfa-in-html/">RDFa</a></dt>
<dd>RDFa reuses existing HTML attributes such as <code>@href</code> and
<code>@rel</code> and adds a few of its own to enable data to be
extracted from HTML pages as RDF. RDFa was originally designed for
XHTML 1.1; its latest version (RDFa 1.1) is also usable with HTML5 and
other markup languages such as SVG.</dd>
<dt><a href="http://www.w3.org/TR/microdata/">microdata</a></dt>
<dd>Microdata adds attributes to HTML5 to provide machine-readable
descriptions of items within the page in terms of <a class="internalDFN"
href="#dfn-property"
title="property">properties</a>
and <a class="internalDFN" href="#dfn-value" title="value">values</a>
for those properties. It is designed to be used alongside detailed
specifications of how these descriptions should be processed by
consumers.</dd>
</dl>
<p> The three <a class="internalDFN" href="#dfn-syntax" title="syntax">syntaxes</a>
are similar in goals but differ in approach. This document provides
guidance about how to choose between them and use them together as well
as some good practices for publishing, consuming and designing
vocabularies for HTML data. However, it is not intended to be a
general-purpose introduction to any of these syntaxes. As well as the
specifications themselves, examples and explanations can be found
within: </p>
<ul>
<li><a href="http://microformats.org/wiki">the microformats wiki</a></li>
<li><a href="http://www.w3.org/TR/rdfa-primer/">the RDFa primer</a></li>
<li><a href="http://schema.org">the schema.org website</a></li>
</ul>
<div class="section" id="scope">
<h3><span class="secno">1.1 </span>Scope</h3>
<p> There are many ways of publishing data on the web such that it is
can be discovered from HTML pages or used by scripts and stylesheets
that operate over your page. </p>
<p> First, publishers may link to alternative versions of a document,
using different syntax, through a <code>link</code> element. The <code>@rel</code>
attribute should take the value <code>alternate</code> and the <code>@type</code>
attribute should provide the mime type of the alternative
representation. For example: </p>
<pre><link rel="alternate" type="text/calendar" value="calendar.ics" /></pre>
<p> Second, publishers may embed data within the <code>head</code> of
an HTML document, nested inside a <code>script</code> element with an
appropriate <code>@type</code> attribute. This can be used for
text-based formats, such as JSON or Turtle, as well as XML-based
formats. For example: </p>
<pre><strong><script type="text/turtle"></strong>
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix gr: <http://purl.org/goodrelations/v1#> .
@prefix vcard: <http://www.w3.org/2006/vcard/ns#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
<#company> gr:hasPOS <#store> .
<#store> a gr:Location ;
gr:name "Hair Masters" ;
vcard:adr [
a vcard:Address ;
vcard:country-name "USA" ;
vcard:locality "Sebastpol" ;
vcard:postal-code "95472" ;
vcard:street-address "6980 Mckinley Ave" ;
] ;
foaf:page <> ;
.<strong></script></strong></pre>
<p> Third, data can be embedded through <a href="http://dev.w3.org/html5/spec/global-attributes.html#custom-data-attribute">custom
data attributes</a>. These must not be used by third parties, but
can be useful when the only consumers of the data are scripts and
stylesheets used by the page. For example: </p>
<pre><div class="spaceship" <strong>data-ship-id="92432"
data-weapons="laser 2" data-shields="50%"
data-x="30" data-y="10" data-z="90"</strong>>
<button class="fire"
onclick="spaceships[this.parentNode.dataset.shipId].fire()">
Fire
</button></div></pre>
<p> This document focuses on methods of data markup that reuse visible
data within the page. Embedding data within an HTML page has the
advantage of avoiding repetition, enables access through scripts and
stylesheets, and is more easily discoverable by browsers and search
engines which regularly consume HTML documents. </p>
</div>
<div class="section" id="terminology">
<h3><span class="secno">1.2 </span>Terminology</h3>
<p> Within this document, a <dfn id="dfn-format">format</dfn> is a
combination of a <a class="internalDFN" href="#dfn-syntax">syntax</a>
and <a class="internalDFN" href="#dfn-type" title="type">types</a>
and <a class="internalDFN" href="#dfn-property" title="property">properties</a>
from one or more <a class="internalDFN" href="#dfn-vocabulary" title="vocabulary">vocabularies</a>.
Traditional microformats do not make the distinction between syntax
and vocabulary, but RDFa, microdata and microformats-2 do make this
distinction. </p>
<p> In this document, a <dfn id="dfn-syntax">syntax</dfn> is a set of
conventions for parsing data from an HTML page into a data structure.
The three syntaxes discussed in this document are RDFa, microdata and
microformats-2. Each of these can be used with different <a class="internalDFN"
href="#dfn-vocabulary"
title="vocabulary">vocabularies</a>.
</p>
<p> A <dfn id="dfn-vocabulary">vocabulary</dfn> is a set of terms for
describing <a class="internalDFN" href="#dfn-entity" title="entity">entities</a>
within a particular domain. Different mechanisms are used for
describing vocabularies. A microformat vocabulary is described within
a wiki page. An RDFa vocabulary might be described through an RDFS
schema or OWL ontology provided at the vocabulary's URI. A microdata
vocabulary must be described within a specification that describes how
it is processed. </p>
<p> All three <a class="internalDFN" href="#dfn-syntax" title="syntax">syntaxes</a>
follow a similar data model. Each is used to describe <dfn id="dfn-entity"
title="entity">entities</dfn>
— things such as people or events (RDFa calls these resources,
microdata calls these items). These entities each have one or more <dfn
id="dfn-type"
title="type">types</dfn>
which indicate what kind of thing they are and a number of <dfn id="dfn-property"
title="property">properties</dfn>
that have <dfn id="dfn-value" title="value">values</dfn>, which
provide the data about the entity. The main difference is that in the
RDF generated from RDFa, the entities are arranged in a graph, whereas
the default data model for microformats and microdata is a tree. </p>
<p> <a class="internalDFN" href="#dfn-type" title="type">Types</a>, <a
class="internalDFN"
href="#dfn-property"
title="property">properties</a>
and <a class="internalDFN" href="#dfn-entity" title="entity">entities</a>
can be identified in different ways. Microformats uses short names.
RDFa, like RDF, uses <a class="externalDFN" href="http://tools.ietf.org/html/rfc3987">IRIs</a>,
while microdata uses <a class="externalDFN" href="http://dev.w3.org/html5/spec/urls.html#url">URLs</a>
as defined in HTML5. This document tries to use the appropriate term
(IRI or URL) when discussing identifiers, but sometimes uses the term
URL to mean a URL or IRI. See also <a class="sectionRef" href="#iris">section
2.2.2.6 IRIs</a> for more detail around the use of identifiers in
microdata and RDFa. </p>
</div>
</div>
<div class="section" id="publishers">
<!-- OddPage -->
<h2><span class="secno">2. </span>Publishers</h2>
<p> If you are publishing HTML data, you are likely to find that the
markup within your pages is simpler and easier to maintain if you only
use one <a class="internalDFN" href="#dfn-format">format</a> (<a class="internalDFN"
href="#dfn-syntax">syntax</a>
and <a class="internalDFN" href="#dfn-vocabulary">vocabulary</a>)
within each page. To decide which to use, your first consideration has
to be which consumers will read the data within your web pages, and
which formats they support. These may include: </p>
<ul>
<li>scripting libraries</li>
<li>browsers and browser extensions</li>
<li>general-purpose search engines</li>
<li>vertical or domain-specific search engines</li>
<li>data reusers with whom you have agreements</li>
</ul>
<p> Your second consideration may be the current state of the tooling to
support a particular format. For example: </p>
<dl>
<dt>Are you able to publish using HTML5?</dt>
<dd> If you are using a content-management system that doesn't support
adding new attributes such as <code>@itemprop</code> or <code>@typeof</code>
then you will be constrained to using microformats. </dd>
<dt>Are there development tools available?</dt>
<dd> Because it is not visible within a web page, it can be hard to tell
whether HTML data has been written correctly. Consumers should provide
validators that enable you to check that your data has been correctly
detected and interpreted, but you may also want to consider tool
support for generating the HTML data. </dd>
</dl>
<p class="note"> Microdata requires the use of attributes which are
introduced by HTML5 and RDFa can be used with XHTML 1.1 or HTML5, while
microformats can be used with all versions of HTML. Your organisation's
publishing guidelines may need to be brought up to date to sanction use
of microdata or RDFa. </p>
<p> Once you have considered both your target consumers and the tooling
support that is available, you will be in one of four situations: </p>
<ol>
<li><strong>with a single choice of format</strong> in which case there
are no further choices to be made</li>
<li><strong>unable to publish HTML data that your target consumers
understand</strong> in which case you either have to lobby those
consumers to add support for the format(s) you can publish in, or
consider changing your toolset so that you can publish in something
they understand</li>
<li><strong>still with a choice between a number of formats</strong> in
which case you will want to pick one to use; this is covered in <a class="sectionRef"
href="#choosing-a-publishing-format">section
2.1 Choosing a Publishing Format</a></li>
<li><strong>having to use multiple <a class="internalDFN" href="#dfn-format"
title="format">formats</a>
at the same time to provide data to all your target customers</strong>
in which case you will need to mix formats within your pages; this is
covered in <a class="sectionRef" href="#publishing-in-multiple-formats">section
2.2 Publishing in Multiple Formats</a></li>
</ol>
<div class="section" id="choosing-a-publishing-format">
<h3><span class="secno">2.1 </span>Choosing a Publishing Format</h3>
<p> This section addresses a situation where all your target consumers
recognise a set of <a class="internalDFN" href="#dfn-format" title="format">formats</a>
(each with a particular <a class="internalDFN" href="#dfn-syntax">syntax</a>
and vocabulary), your toolset supports publishing in all of them, and
you need to make a choice about which of these formats to use. It's
assumed that you will want to choose a single format rather than
mixing multiple formats as described in <a class="sectionRef" href="#publishing-in-multiple-formats">section
2.2 Publishing in Multiple Formats</a>, as this will mean less
markup in your page and make your publishing task easier. </p>
<div class="section" id="syntax-considerations">
<h4><span class="secno">2.1.1 </span>Syntax Considerations</h4>
<p> The different <a class="internalDFN" href="#dfn-syntax" title="syntax">syntaxes</a>
— microformats, microdata and RDFa — have different capabilities
which may inform your choice. </p>
<dl>
<dt>Structured HTML values</dt>
<dd> Under appropriate conditions, RDFa and microformats will use
markup within the content of an element to provide a <a class="internalDFN"
href="#dfn-property">property</a>
<a class="internalDFN" href="#dfn-value">value</a>; in microdata
values never retain markup. If property values within your page
contain markup (for example a <code>description</code> property
containing emphasized text, multiple paragraphs, tables and so
on), you may want to use RDFa or microformats to ensure that
structure is available to consumers of your pages. In RDFa, this
is done through adding <code>datatype="rdf:XMLLiteral"</code> to
the relevant element. In traditional microformats, the handling of
the content of an element is determined by the property; in
microformats-2, those that retain the HTML structure are named
with a <code>e-*</code> prefix, such as <code>e-content</code>.
</dd>
<dt>Language support</dt>
<dd> Microformats and RDFa use the language of the HTML elements in
the page (from the <code>@lang</code> attribute) to indicate the
language of relevant <a class="internalDFN" href="#dfn-value" title="value">values</a>.
In microdata, the <a class="internalDFN" href="#dfn-vocabulary">vocabulary</a>
has to provide a separate mechanism to indicate a language. If you
have multi-lingual information in your pages, you may find it
easier to use microformats or RDFa than microdata. </dd>
<dt>CSS support</dt>
<dd> Because microformats generally use classes to mark up data
within an HTML page, it is easy to use CSS to style those elements
based on their type. For example <code>.hcard .n { font-weight:
bold; }</code> will enbolden any person's name. This is a little
harder with microdata where the selector might be something like
<pre>[itemtype~="http://microformats.org/profile/hcard"] [itemprop~="n"]</pre>
or RDFa where it might be
<pre>[typeof~="foaf:Person"] [property~="foaf:name"]</pre>
If you are planning to style your page based on the data embedded
within it, you may find it easier to use microformats than either
microdata or RDFa; if you do style RDFa, you should plan for
dependencies between your CSS documents and any prefixes used
within it. </dd>
</dl>
<p class="issue"> The handling of language by microdata <a href="http://www.w3.org/Bugs/Public/show_bug.cgi?id=14470">may
change in the future</a>. </p>
</div>
<div class="section" id="vocabulary-considerations">
<h4><span class="secno">2.1.2 </span>Vocabulary Considerations</h4>
<p> <a class="internalDFN" href="#dfn-vocabulary" title="vocabulary">Vocabularies</a>
and <a class="internalDFN" href="#dfn-syntax" title="syntax">syntaxes</a>
are closely tied together, especially in the case of microformats.
Aspects of a vocabulary to bear in mind are: </p>
<ul>
<li>How closely does it match with the information that you have?</li>
<li>How much support does it have? Are there tools for validating
and viewing it? Is there good documentation?</li>
<li>How stable is it? Who has control to make changes to it? How
frequently might those changes be made?</li>
<li>Are other consumers likely to adopt it in the future?</li>
</ul>
</div>
<div class="section" id="usability-considerations">
<h4><span class="secno">2.1.3 </span>Usability Considerations</h4>
<p> The usability of a particular <a class="internalDFN" href="#dfn-format">format</a>
is likely to depend on your existing expertise and the match between
the structure and content of your web pages and the required
structure and content of the format. The best thing to do is to try
using the format to mark up an example page from your site. </p>
</div>
</div>
<div class="section" id="publishing-in-multiple-formats">
<h3><span class="secno">2.2 </span>Publishing in Multiple Formats</h3>
<p> Publishing in multiple <a class="internalDFN" href="#dfn-format" title="format">formats</a>
can be easy. For example, it may be that different consumers expect
HTML data to appear in different places within the page, such as
Facebook requiring Open Graph Protocol data to appear within the <code>head</code>
of an HTML page, while schema.org markup appears in the <code>body</code>
of the page. Or it may be that the items that you need to mark up on
the page appear in different places — events listed in a sidebar while
company details are provided in a footer, for example. </p>
<p> Different <a class="internalDFN" href="#dfn-format" title="format">formats</a>
and <a class="internalDFN" href="#dfn-vocabulary" title="vocabulary">vocabularies</a>
can be used independently in these circumstances. Consumers of the
data within your pages might read additional data if it is in a <a class="internalDFN"
href="#dfn-syntax">syntax</a>
that they recognise — for example, an processor that recognises both
RDFa and microdata will interpret all such markup in the page — but it
should ignore information that is in a vocabulary that it doesn't
understand rather than giving an error. </p>
<p> Publishing can be harder when there are multiple consumers of
information that require different <a class="internalDFN" href="#dfn-format"
title="format">formats</a>.
If your target consumers will all accept the same <a class="internalDFN"
href="#dfn-syntax">syntax</a>,
it is usually easiest to use that single syntax in your pages.
However, microdata does not support multiple <a class="internalDFN" href="#dfn-type"
title="type">types</a>
for a single <a class="internalDFN" href="#dfn-entity">entity</a>, so
if your target consumers expect different <a class="internalDFN" href="#dfn-vocabulary"
title="vocabulary">vocabularies</a>
to be used for the same entities you may find it easier to mix
syntaxes or to use RDFa or microformats, which do support multiple
vocabularies. </p>
<div class="section" id="mixing-vocabularies">
<h4><span class="secno">2.2.1 </span>Mixing Vocabularies</h4>
<p> Methods for marking up the same data in a page using different <a
class="internalDFN"
href="#dfn-vocabulary"
title="vocabulary">vocabularies</a>
in the same <a class="internalDFN" href="#dfn-syntax">syntax</a>
vary by syntax. </p>
<div class="section" id="mixing-vocabularies-in-microformats">
<h5><span class="secno">2.2.1.1 </span>Mixing Vocabularies in
Microformats</h5>
<p> As microformats are simply indicated through classes, it's
possible to mix several within the same set of content. An example
is the <a rel="nofollow" href="http://www.bbc.co.uk/worldservice/bangladeshboat/">BBC
Bangladesh River Journey</a> page which includes hAtom,
hCalendar and geo microformats: </p>
<pre><li class="<strong>hentry</strong> <strong>vevent</strong> xfolkentry postid-f2068841910">
<h3 class="<strong>entry-title</strong> <strong>summary</strong>">
<a href="http://www.flickr.com/photos/bangladeshboat/2068841910" title="The final picture (on Flickr)">The final picture</a>
</h3>
<div class="<strong>entry-content</strong>">
<p class="photo">
<a rel="<strong>bookmark</strong>" class="taggedlink <strong>url</strong>" href="http://www.flickr.com/photos/bangladeshboat/2068841910" title="The final picture (on Flickr)">
<img src="http://farm3.static.flickr.com/2175/2068841910_1162a8086b_s.jpg"
alt="The final picture (on Flickr)" title="The final picture (on Flickr)" width="64" height="64" />
</a>
</p>
<p class="<strong>description</strong>">As the BBC team prepare to disembark the boat, the sun sets overhead, and indeed on the trip itself.</p>
</div>
<ul class="meta">
<li class="date"><abbr class="published <strong>dtstart</strong>" <strong>title="2007-11-26T02:11:51+06:00"</strong>>2 days ago</abbr></li>
<li class="location"><abbr class="<strong>geo</strong> point-22" <strong>title="+22.47157;+89.59534"</strong>>Mongla, Bangladesh</abbr></li>
</ul></li></pre>
</div>
<div class="section" id="mixing-vocabularies-in-rdfa">
<h5><span class="secno">2.2.1.2 </span>Mixing Vocabularies in RDFa</h5>
<p> RDFa is designed to be used with multiple <a class="internalDFN"
href="#dfn-vocabulary"
title="vocabulary">vocabularies</a>:
</p>
<ul>
<li><a class="internalDFN" href="#dfn-type" title="type">types</a>
and <a class="internalDFN" href="#dfn-property" title="property">properties</a>
are given IRIs as names, so do not have to be disambiguated;
IRIs do not have to be written out in full (see below)</li>
<li> an <a class="internalDFN" href="#dfn-entity">entity</a> can
be assigned multiple <a class="internalDFN" href="#dfn-type" title="type">types</a>
from different <a class="internalDFN" href="#dfn-vocabulary" title="vocabulary">vocabularies</a>
by listing them within the <code>@typeof</code> attribute</li>
<li> attributes that indicate <a class="internalDFN" href="#dfn-property"
title="property">properties</a>
(<code>@property</code>, <code>@rel</code> and <code>@rev</code>)
can take multiple space-separated properties which may be from
different vocabularies</li>
</ul>
<p> Writing out IRIs in full can clutter HTML so RDFa provides four
mechanisms to shorten IRIs: </p>
<ul>
<li>There are several built-in prefixes which can be used for
popular vocabularies. These are listed as part of the <a href="http://www.w3.org/2011/rdfa-context/rdfa-1.1">RDFa
1.1 Core initial context</a>. Any IRI within one of these <a
class="internalDFN"
href="#dfn-vocabulary"
title="vocabulary">vocabularies</a>
can be abbreviated using the <code>prefix:name</code> notation.</li>
<li>The <code>@prefix</code> attribute can be used to define
additional prefixes for other vocabularies.</li>
<li>The <code>@vocab</code> attribute defines a default <a class="internalDFN"
href="#dfn-vocabulary">vocabulary</a>
within its scope; any IRIs that begin with this vocabulary can
be abbreviated to a short name (the remainder of the IRI after
the vocabulary IRI).</li>
</ul>
<p> Note that if you use any of the last two mechanisms, the
shortened IRIs can only be understood when they are within the
scope of the relevant attributes. These can be easy to mislay when
people copy and paste HTML from one place to another, or as the
result of template changes in a content-management system. We
therefore recommend that these attributes are avoided where
possible — use the built-in prefixes or full IRIs in preference —
and, where they are used, placed on elements that represent <a class="internalDFN"
href="#dfn-entity"
title="entity">entities</a>
(those with <code>@about</code> or <code>@typeof</code>
attributes) and repeated on each entity element rather than being
inherited from an ancestor element. For more details, see <a class="sectionRef"
href="#context-independence">section
2.3.2 Context Independence</a>. </p>
</div>
<div class="section" id="mixing-vocabularies-microdata">
<h5><span class="secno">2.2.1.3 </span>Mixing Vocabularies in
Microdata</h5>
<p> Microdata is designed such that each piece of information in a
page is assigned <a class="internalDFN" href="#dfn-type" title="type">types</a>
from a single <a class="internalDFN" href="#dfn-vocabulary">vocabulary</a>,
though each <a class="internalDFN" href="#dfn-entity">entity</a>
may have multiple types and have <a class="internalDFN" href="#dfn-property"
title="property">properties</a>
from other vocabularies. </p>
<p> <a class="internalDFN" href="#dfn-property" title="property">Properties</a>
in microdata are either short names (in which case they are scoped
to the <a class="internalDFN" href="#dfn-vocabulary">vocabulary</a>
of the <a class="internalDFN" href="#dfn-type" title="type">types</a>
of the entity) or URLs. A URL property has no relationship to a
given short name property unless that relationship is specified
within the vocabulary that defines the properties. </p>
<p> You might find that you need to target two consumers who each
recognise items using <a class="internalDFN" href="#dfn-type" title="type">types</a>
from different <a title="vocabularies">vocabularies</a>. For
example, you might want to both target schema.org and use the
vEvent vocabulary when providing data about an event. </p>
<p> In this case there are three options available to you. The
first, if consumers support it, is to use a different <a class="internalDFN"
href="#dfn-syntax">syntax</a>
for one of the <a class="internalDFN" href="#dfn-vocabulary" title="vocabulary">vocabularies</a>.
For example, the vEvent vocabulary is only supported in microdata
but schema.org can be consumed from either microdata or RDFa, so
it would be possible to mark up the data using the vEvent
vocabulary in microdata and the schema.org vocabulary in RDFa.
This approach is described in more detail in <a class="sectionRef"
href="#mixing-syntaxes">section
2.2.2 Mixing Syntaxes</a>. Mixing syntaxes within a single page
is rarely a good option but in some circumstances it may be
preferable to the other workarounds described here. </p>
<p> The second option is to use a <a class="internalDFN" href="#dfn-property">property</a>
that is treated by consumers as providing the <a class="internalDFN"
href="#dfn-type">type</a>
for an item, as if the <code>@itemtype</code> attribute had been
used. This requires <a class="internalDFN" href="#dfn-vocabulary">vocabulary</a>
authors to define such a property for a given vocabulary. </p>
<p> The third option is to repeat the data markup, once in visible
content and once in hidden markup (either through <code>link</code>
and <code>meta</code> elements or in a section hidden using CSS).
</p>
<p> These two options are described in detail within <a class="sectionRef"
href="#multiple-types-microdata">section
B. Multiple Item Types in Microdata</a>. </p>
</div>
</div>
<div class="section" id="mixing-syntaxes">
<h4><span class="secno">2.2.2 </span>Mixing Syntaxes</h4>
<p> A requirement to support a large range of consumers can mean that
it becomes necessary to publish using not only multiple <a class="internalDFN"
href="#dfn-vocabulary"
title="vocabulary">vocabularies</a>
but multiple syntaxes. </p>
<p> RDFa, microformats and microdata all share the same basic <a class="internalDFN"
href="#dfn-entity">entity</a>/<a
class="internalDFN"
href="#dfn-property">property</a>/<a
class="internalDFN"
href="#dfn-value">value</a>
model, so in many cases it is possible to mirror attributes across
the syntaxes. The following example shows the same content marked up
with: </p>
<ul>
<li>hCalendar (microformat)</li>
<li>schema.org (RDFa)</li>
<li>vEvent (microdata)</li>
</ul>
<pre><div <strong>class="vevent"</strong>
<strong>itemscope itemtype="http://microformats.org/profile/hcalendar#vevent"</strong>
<strong>vocab="http://schema.org/" typeof="Event"</strong>>
<a <strong>class="url" itemprop="url" property="url"</strong> href="nba-miami-philadelphia-game3.html">
NBA Eastern Conference First Round Playoff Tickets:
<span <strong>class="summary" itemprop="summary" property="name"</strong>> Miami Heat at Philadelphia 76ers - Game 3 (Home Game 1) </span>
</a>
<strong><time itemprop="dtstart" property="startDate" content="2016-04-21T20:00:00"></strong>
<abbr <strong>class="dtstart" title="2016-04-21T20:00:00"</strong>>
Thu, 04/21/16
8:00 p.m.
</abbr>
</time>
<div <strong>class="location" itemprop="location"
vocab="http://schema.org/" property="location" typeof="Place"</strong>>
<a <strong>property="url"</strong> href="wells-fargo-center.html">
Wells Fargo Center
</a>
<div <strong>property="address" vocab="http://schema.org/" typeof="PostalAddress"</strong>>
<span <strong>property="addressLocality"</strong>>Philadelphia</span>,
<span <strong>property="addressRegion"</strong>>PA</span>
</div>
</div></div></pre>
<p> A microformats processor will extract the data: </p>
<pre>{
"type": [ "vevent" ],
"properties": {
"url": [ "http://example.com/nba-miami-philadelphia-game3.html" ],
"summary": [ " Miami Heat at Philadelphia 76ers - Game 3 (Home Game 1) " ],
"dtstart": [ "2016-04-21T20:00:00" ],
"location": [ "\n \n \n Wells Fargo Center\n \n \n Philadelphia,\n PA\n \n \n " ]
}
}</pre>
<p> A microdata processor will extract something very similar, the
only difference being the URL of the type: </p>
<pre>{
"type": [ "http://microformats.org/profile/hcalendar#vevent" ],
"properties": {
"url": [ "http://example.com/nba-miami-philadelphia-game3.html" ],
"summary": [ " Miami Heat at Philadelphia 76ers - Game 3 (Home Game 1) " ],
"dtstart": [ "2016-04-21T20:00:00" ],
"location": [ "\n \n Wells Fargo Center\n \n \n Philadelphia,\n PA\n \n " ]
}
}</pre>
<p> while processors that <a href="https://dvcs.w3.org/hg/htmldata/raw-file/default/microdata-rdf/index.html">map
microdata to RDF</a> would extract the following RDF from the
microdata markup: </p>
<pre>@prefix hcal: <http://microformats.org/profile/hcalendar#>
[] a hcal:vevent ;
hcal:url <http://example.com/nba-miami-philadelphia-game3.html> ;
hcal:summary " Miami Heat at Philadelphia 76ers - Game 3 (Home Game 1) " ;
hcal:dtstart "2016-04-21T20:00:00"^^xs:dateTime ;
hcal:location "\n \n Wells Fargo Center\n \n \n Philadelphia,\n PA\n \n " ;
.</pre>
<p> and an RDFa processor will extract the data provided through the
schema.org vocabulary: </p>
<pre>[] a schema:Event;
schema:location [
a schema:Place ;
schema:address [
a schema:PostalAddress ;
schema:addressLocality "Philadelphia" ;
schema:addressRegion "PA" ;
] ;
schema:url <http://example.com/wells-fargo-center.html> ;
] ;
schema:name " Miami Heat at Philadelphia 76ers - Game 3 (Home Game 1) " ;
schema:startDate "2016-04-21T20:00:00"^^xsd:dateTime ;
schema:url <http://example.com/nba-miami-philadelphia-game3.html> ;
.</pre>
<p> It is particularly important to check pages in which <a class="internalDFN"
href="#dfn-syntax"
title="syntax">syntaxes</a>
are mixed together using an appropriate validator for each format. </p>
<p> The following guidelines may help when creating pages in which
different <a class="internalDFN" href="#dfn-syntax" title="syntax">syntaxes</a>
are mixed together. </p>
<div class="section" id="dates-and-times">
<h5><span class="secno">2.2.2.1 </span>Dates and Times</h5>
<p> Microformats do not use <code>link</code> or <code>meta</code>
elements within the content of the page and in some cases require
particular elements to be used to encode information. In
particular, <code>abbr</code> must be used to support the <a href="http://microformats.org/wiki/datetime-design-pattern">datetime-design-pattern</a>.
Conversely, properties that hold dates and times must be marked up
using the <code>time</code> element in microdata. Using the <code>time</code>
element is also advantageous in RDFa, as it automatically confers
the appropriate datatype to the value. So when using both
microformats and RDFa or microdata, you must nest a <code>time</code>
element within a <code>abbr</code> element or vice versa, as
shown here: </p>
<pre><strong><time itemprop="dtstart" property="startDate" content="2016-04-21T20:00:00"></strong>
<abbr <strong>class="dtstart" title="2016-04-21T20:00:00"</strong>>
Thu, 04/21/16 8:00 p.m.
</abbr></time></pre>
<p> RDFa <a class="internalDFN" href="#dfn-vocabulary" title="vocabulary">vocabularies</a>
are typically stricter in the range of <a class="internalDFN" href="#dfn-value"
title="value">values</a>
that they accept for <a class="internalDFN" href="#dfn-property"
title="property">properties</a>
that take dates and times; it is best to use the syntax <code>YYYY-MM-DD</code>
for dates, <code>hh:mm:ss</code> for times and <code>YYYY-MM-DDThh:mm:ss</code>
for dateTimes to be compliant with the <a href="http://www.w3.org/TR/xmlschema-2/#dateTime">XML
Schema dates and times</a> which RDFa-based vocabularies will
typically use. </p>
<p class="issue"> It is likely that the HTML5 <code>time</code>
element will accept types of values that do not have an equivalent
XML Schema datatype. These should be avoided when using RDFa. See
<a href="https://www.w3.org/Bugs/Public/show_bug.cgi?id=14881">bug
14881</a>. </p>
</div>
<div class="section" id="link-relations">
<h5><span class="secno">2.2.2.2 </span>Link relations</h5>
<p> In (X)HTML5 markup, unprefixed values in the <code>@rel</code>
attribute will usually be ignored by RDFa processing unless there
is a <code>@vocab</code> attribute in scope, the exceptions being
<code>describedby</code>, <code>license</code> and <code>role</code>
which will be recognised as being part of the HTML <a class="internalDFN"
href="#dfn-vocabulary">vocabulary</a>.
In RDFa in XHTML 1.1, some additional unprefixed values are
recognised as known terms and used to create triples. </p>
<p> Link relations required in certain microformats, particularly
XFN, clash with the use of RDFa's <code>@vocab</code> attribute.
For example: </p>
<pre><a vocab="http://purl.org/dc/terms/"
rel="date" href="http://reference.data.gov.uk/id/day/2011-11-15">15th November 2011</a></pre>
<p> will result in a <code>dc:date</code> relationship based on
RDFa processing, but XFN processing will assume that the link is
to someone whom the author of the HTML page is dating. </p>
<p> To avoid the <code>@rel</code> attribute being misinterpreted,
it is best to avoid using <code>@vocab</code> on any ancestor of
an element that contains a <code>@rel</code> attribute: use <code>@property</code>
instead to provide RDFa properties, and if you need to use <code>@rel</code>
attributes on your links, use prefixes instead of <code>@vocab</code>
in the RDFa markup. </p>
</div>
<div class="section" id="microdata-and-rdfa-equivalencies">
<h5><span class="secno">2.2.2.3 </span>Microdata and RDFa
Equivalencies</h5>
<p> When marking up RDFa alongside microdata, the following
equivalencies between attributes generally hold true: </p>
<ul>
<li><code>@itemid</code> = <code>@resource</code></li>
<li><code>@itemtype</code> = <code>@typeof</code> (+ <code>@vocab</code>
to enable the use of short names for properties)</li>
<li><code>@itemprop</code> + <code>@itemscope</code> = <code>@property</code>
+ an empty <code>@typeof</code> if there's no <code>@itemtype</code></li>
<li><code>@itemprop</code> otherwise = <code>@property</code></li>
</ul>
<p class="issue"> The guidance above does not adhere to the RDFa 1.1
Lite set of attributes, because of the use of the <code>@resource</code>
attribute rather than the <code>@about</code> attribute. However,
using <code>@resource</code> gives a more natural mapping when
mixing RDFa and microdata within a page. See <a href="http://www.w3.org/2010/02/rdfa/track/issues/119">ISSUE-119</a>.
</p>
</div>
<div class="section" id="properties-within-links">
<h5><span class="secno">2.2.2.4 </span>Properties Within Links</h5>
<p> When using RDFa, any <code>@property</code> attributes within
an element with a <code>@href</code> (ie a link) will be taken as
providing <a class="internalDFN" href="#dfn-property" title="property">properties</a>
of the <a class="internalDFN" href="#dfn-entity">entity</a>
identified by the URL in that <code>@href</code>. This is not the
case in microdata or microformats, where the <code>@href</code>
attribute is only ever used to provide a <a class="internalDFN" href="#dfn-value">value</a>
for a property. For example, the microdata: </p>
<pre><div itemscope itemtype="http://schema.org/AggregateRating">
Ratings:
<a href="ratings"
title="23,201 IMDb users have given an average vote of 7.2/10">
<span itemprop="ratingCount">23,201</span> users</a></div></pre>
<p> will generate an <code>http://schema.org/AggregateRating</code>
whose <code>ratingCount</code> is <code>23,201</code>. However,
the similar RDFa: </p>
<pre><div vocab="http://schema.org/" typeof="AggregateRating">
Ratings:
<a href="ratings"
title="23,201 IMDb users have given an average vote of 7.2/10">
<span property="ratingCount">23,201</span> users</a></div></pre>
<p> creates two unconnected statements: </p>
<pre>[] a schema:AggregateRating .<http://example.com/ratings> schema:ratingCount "23,201" .</pre>
<p> If the link doesn't have a <code>@rel</code> attribute, as in
this example, you can avoid the <code>@href</code> attribute
creating a new subject by adding an empty <code>@property</code>
attribute to the link: </p>
<pre><div vocab="http://schema.org/" typeof="AggregateRating">
Ratings:
<a href="ratings" <strong>property=""</strong>
title="23,201 IMDb users have given an average vote of 7.2/10">
<span property="ratingCount">23,201</span> users</a></div></pre>
<p> If the link <em>does</em> have a <code>@rel</code> attribute,
it is usually easiest to move the relevant property outside the
link, for example: </p>
<pre><div vocab="http://schema.org/" typeof="AggregateRating">
Ratings:
<strong><span property="ratingCount" content="23201"></span></strong>
<a href="ratings" rel="nofollow"
title="23,201 IMDb users have given an average vote of 7.2/10">
23,201 users</a></div></pre>
<p> The alternative is to identify the subject explicitly using a <code>@resource</code>
attribute on both the outer element and the link element: </p>
<pre><div vocab="http://schema.org/" typeof="AggregateRating"
<strong>resource="_:rating"</strong>>
Ratings:
<a href="ratings" rel="nofollow" <strong>resource="_:rating"</strong>
title="23,201 IMDb users have given an average vote of 7.2/10">
<span property="ratingCount">23,201</span> users</a></div></pre>
<p> These three methods all generate the same RDF: </p>
<pre>[] a schema:AggregateRating ;
schema:ratingCount "23,201" ;
.</pre>
</div>
<div class="section" id="datatypes">
<h5><span class="secno">2.2.2.5 </span>Datatypes</h5>
<p> The <code>@datatype</code> attribute might be required for some
RDFa <a class="internalDFN" href="#dfn-vocabulary" title="vocabulary">vocabularies</a>/consumers;
others will coerce <a class="internalDFN" href="#dfn-value" title="value">values</a>
into the appropriate datatype based on the <a class="internalDFN"
href="#dfn-property">property</a>
itself. However, if a property takes a structured value, the
property element must have <code>datatype="rdf:XMLLiteral"</code>
for that structure to be preserved. </p>
</div>
<div class="section" id="iris">
<h5><span class="secno">2.2.2.6 </span>IRIs</h5>
<p> HTML defines some attributes, such as <code>@href</code> and <code>@src</code>,
as holding URLs. The <a href="http://dev.w3.org/html5/spec/urls.html#urls">currently
specified processing</a> of these URLs results in non-URI
characters within IRIs being percent-encoded. This also happens
with microdata attributes such as <code>@itemid</code> and <code>@itemtype</code>.
</p>
<p> This normalisation does not happen in attributes defined in
RDFa, such as <code>@resource</code> and <code>@property</code>:
IRIs provided in these attributes will be passed into the
extracted RDF as IRIs. </p>
<p> This discrepancy means that when using RDFa, you have to be
careful to use URIs only (by percent-encoding IRIs) or avoid using
the HTML-defined attributes such as <code>@href</code> or <code>@src</code>.
For example: </p>
<pre><p resource="#menu">
<a property="eg:wine" <strong>href="#rosé"</strong>>Rosé</a>
...</p>
...<p <strong>resource="#rosé"</strong>>
<span property="eg:description">This Californian wine...</span></p></pre>
<p> will result in the RDF: </p>
<pre><#menu> eg:wine <#ros%E9> .<#rosé> eg:description "This Californian wine..." .</pre>
<p> The URL in the <code>@href</code> attribute is percent-encoded,
while the one from the <code>@resource</code> attribute is not;
while the URLs appear identical in the HTML, in the RDF, they
refer to distinct <a class="internalDFN" href="#dfn-entity" title="entity">entities</a>.
</p>
<p> This can be avoided by percent-encoding the non-URI characters
within the original HTML: </p>
<pre><p resource="#menu">
<a property="eg:wine" <strong>href="#ros%E9"</strong>>Rosé</a>
...</p>
...<p <strong>resource="#ros%E9"</strong>>
<span property="eg:description">This Californian wine...</span></p></pre>
<p> which will result in: </p>
<pre><#menu> eg:wine <#ros%E9> .<#ros%E9> eg:description "This Californian wine..." .</pre>
<p> or by using the <code>@resource</code> attribute to provide the
IRI value for a property: </p>
<pre><p resource="#menu">
<a property="eg:wine" <strong>resource="#rosé"</strong> href="#rosé">Rosé</a>
...</p>
...<p <strong>resource="#rosé"</strong>>
<span property="eg:description">This Californian wine...</span></p></pre>
<p> which will result in: </p>
<pre><#menu> eg:wine <#rosé> .<#rosé> eg:description "This Californian wine..." .</pre>
<p> Similar considerations apply when mixing microdata or
microformats with RDFa, since the identifiers used within the
microdata or microformats will be URIs rather than IRIs. </p>
<p> It is good practice for vocabulary authors to state whether any
further normalisation occurs when interpreting URL values, and to
either avoid using IRIs for property names or state explicitly
equivalence between IRIs and the percent-encoded URI versions of <a
class="internalDFN"
href="#dfn-property">property</a>
and <a class="internalDFN" href="#dfn-type">type</a> identifiers
that will be generated from microdata markup. </p>
</div>
</div>
</div>
<div class="section" id="good-publishing-practice">
<h3><span class="secno">2.3 </span>Good Publishing Practice</h3>
<p> There are a number of practices which can help ensure good quality
HTML Data that can be easily reused by consumers. </p>
<div class="section" id="valid-html">
<h4><span class="secno">2.3.1 </span>Valid HTML</h4>
<p> Valid HTML is particularly important in pages that contain
embedded markup. All methods of embedding data within HTML use the
structure of the HTML to determine the meaning of the additional
markup. For example, in microdata the item to which an element with
an <code>@itemprop</code> attribute assigns a <a class="internalDFN"
href="#dfn-property">property</a>
is usually the closest ancestor element with a <code>@itemscope</code>
attribute. </p>
<p> In some cases, elements can be moved when HTML is parsed into a
DOM. This can lead to <a class="internalDFN" href="#dfn-property" title="property">properties</a>
unexpectedly referring to the wrong entity, and, if you are serving
your documents as XHTML (with a <code>application/xhtml+xml</code>
mime type), it can cause discrepancies between the data gleaned by
XML-based consumers and HTML-aware consumers. There are two causes
for this: </p>
<ul>
<li> Error correction in HTML parsing can restructure invalid HTML
is restructured to make it valid, for example non-table markup
within a table is moved to before the table. This includes <code>link</code>
and <code>meta</code> elements that are directly within the <code>table</code>
element. You can avoid this restructuring by making sure that your
HTML is valid so that it is not needed. </li>
<li> Firefox 3.5 and 3.6 move <code>meta</code> elements in the <code>body</code>
of an HTML document to within the <code>head</code> element,
because they cannot not validly appear within the body in older
versions of HTML. If you are targeting consumers which run within
these old browsers, such as scripts or extensions, you can avoid
this restructuring by using empty <code>span</code> or other
elements instead of <code>link</code> or <code>meta</code>;
other consumers should be using an up-to-date HTML5 parser which
will not do this. </li>
</ul>
</div>
<div class="section" id="context-independence">
<h4><span class="secno">2.3.2 </span>Context Independence</h4>
<p> One of the ways in which people learn how to publish information
on the web is to view the source of other web pages and copy
portions of their contents into their own pages. It is also common
for web pages to be constructed from templates and for these to
change as the result of site redesigns. In both these situations, it
can be easy to lose any context information that is used to
interpret the HTML Data embedded within the page. </p>
<p> To help preserve relevant context information: </p>
<ul>
<li>when using microformats, use microformats-2 if possible as the
prefixed classnames are less likely to be changed during site
redesigns; use the top-most microformat class as near as possible
to the properties of the relevant <a class="internalDFN" href="#dfn-entity">entity</a></li>
<li>when using RDFa, avoid using namespace declarations, <code>@prefix</code>
or <code>@vocab</code>; if you do use them, add them as close to
the elements that use the prefixes or vocabulary as possible</li>
<li>when using microdata, add the <code>@itemscope</code> attribute
as closely as possible to the data and use <code>@itemtype</code>
where a relevant <a class="internalDFN" href="#dfn-type">type</a>
is available rather than relying on consumers to infer the type</li>
</ul>
</div>
<div class="section" id="testing">
<h4><span class="secno">2.3.3 </span>Testing</h4>
<p> It is good practice to test the data that you expose within your
page against a parser that will show you the data your page
contains. It is also good practice to test the data that you expose
using a tool that understands the <a class="internalDFN" href="#dfn-vocabulary">vocabulary</a>
you are using. Consumers may provide testing tools and validators
for this purpose, or you may need to check the way that
vocabulary-specific tools behave with your data. </p>
<p> If you are constructing your page from a database, another good
testing approach is to compare the data extracted from the page with
the data extracted directly from the database. </p>
</div>
<div class="section" id="clear-licensing">
<h4><span class="secno">2.3.4 </span>Clear Licensing</h4>
<p> The goal of publishing HTML data is to enable consumers to reuse
it. To make it clear how the HTML data you publish can be reused,
you should include information about the rights holder and license
that the information is made under. There are a number of <a class="internalDFN"
href="#dfn-vocabulary"
title="vocabulary">vocabularies</a>
that enable you to do this, such as schema.org, rel-license,
Creative Commons and Dublin Core. Your target consumers should
indicate which <a class="internalDFN" href="#dfn-format" title="format">formats</a>
they understand when it comes to expressing licensing information
and which licenses they know about, and you should choose a relevant
<a class="internalDFN" href="#dfn-format">format</a> in the same way
as you do for the core data that you are publishing. </p>
</div>
</div>
</div>
<div class="section" id="consumers">
<!-- OddPage -->
<h2><span class="secno">3. </span>Consumers</h2>
<p> You will find it easier to consume and combine data published using a
single <a class="internalDFN" href="#dfn-format">format</a> (syntax and
vocabulary). To decide which to consume, you should first look at what
formats your target publishers are currently using. It may be that these
contain sufficient information for your application. </p>
<p> If the publishers whom you are targeting are already publishing using
multiple formats, you may want to consume from all those formats (see <a
class="sectionRef"
href="#consuming-multiple-formats">section
3.2 Consuming Pages with Multiple Formats</a>) in order to maximise
the data that you can collect while minimising the impact on the
publishers who are providing that information. If you are consuming
microdata and storing the results as RDF, you should follow a <a href="https://dvcs.w3.org/hg/htmldata/raw-file/default/microdata-rdf/index.html">standard
mapping</a>. </p>
<p> If current <a class="internalDFN" href="#dfn-format" title="format">formats</a>
do not encode the information you need to the detail you need it for
your application, publishers will be more likely to publish extra data
for you to consume if you: </p>
<ul>
<li><a href="#extending-vocabularies">extend existing common
vocabularies</a> they are already using</li>
<li>consume data from a <a class="internalDFN" href="#dfn-syntax">syntax</a>
they already use</li>
</ul>
<p> If you cannot simply extend an existing <a class="internalDFN" href="#dfn-vocabulary">vocabulary</a>,
you will need to create your own vocabulary and choose which <a class="internalDFN"
href="#dfn-syntax"
title="syntax">syntaxes</a>
to support with that vocabulary. </p>
<div class="section" id="choosing-a-syntax-to-consume">
<h3><span class="secno">3.1 </span>Choosing a Syntax to Consume</h3>
<p> As you choose syntax, you should take into account the following
considerations. </p>
<div class="section" id="application-considerations">
<h4><span class="secno">3.1.1 </span>Application Considerations</h4>
<p> Microdata, RDFa and microformats-2 all use a generic <a class="internalDFN"
href="#dfn-syntax">syntax</a>,
which means that it's possible to have generic parsers operate over
them to extract data. In the case of microdata and microformats-2,
the data has a JSON structure; data extracted from RDFa has a RDF
structure (microdata can also be converted into RDF). </p>
<p> Generic applications can work in the browser to do things such as
highlighting markup that follows a particular syntax or enabling
users to download the data embedded within a page into a separate
file. These can also use the context in which the HTML data is found
to provide additional features. For example, generic consumers may
detect that each row in a table is associated with a distinct <a class="internalDFN"
href="#dfn-entity">entity</a>,
and each cell with a particular <a class="internalDFN" href="#dfn-property">property</a>,
and enable users to sort that table based on property <a class="internalDFN"
href="#dfn-value"
title="value">values</a>.
In this case, a consumer could ensure that when values are marked up
as dates, times or durations using the <code>time</code> element,
the items are sorted by date/time/duration rather than
alphabetically. </p>
<p> Both microformats-2 and RDFa provide additional facilities that
enable publishers to indicate the datatypes of <a>values</a> to
support generic consumers. Microformats-2 properties have a prefix
that can indicate when a value is a URL (<code>u-*</code>), a
date/time (<code>dt-*</code>), extended HTML (<code>e-*</code>) or a
string (<code>p-*</code>). RDFa supports a <code>@datatype</code>
attribute that publishers can use to indicate the datatype of a
value, usually an XML Schema datatype such as <code>xsd:integer</code>
or <code>xsd:language</code>. Note that once microformats-2 data is
extracted from a page into JSON, these prefixes are no longer
available, so a consumer of the JSON has to know the <a class="internalDFN"
href="#dfn-vocabulary">vocabulary</a>
to tell whether a given value should be interpreted as a string or
as HTML markup, for example. In contrast, the datatypes used to
annotate RDFa values are carried within the RDF data. </p>
<p> RDFa also adheres to a follow-your-nose principle, whereby <a class="internalDFN"
href="#dfn-vocabulary">vocabulary</a>
authors are encouraged to provide a machine-readable description of
<a class="internalDFN" href="#dfn-type" title="type">types</a> and <a
class="internalDFN"
href="#dfn-property"
title="property">properties</a>
at the URL used for the type or property. This can enable generic
processors to automatically pick up additional information about the
type or property such as labels, help text, supertypes, property
cardinality and ranges and so on. While microdata also uses URLs for
types and properties, microdata consumers are not permitted to
dereference URLs that they do not already recognise. </p>
</div>
<div class="section" id="tooling-considerations">
<h4><span class="secno">3.1.2 </span>Tooling Considerations</h4>
<p> Applications vary widely in terms of the tooling that they need. A
script that runs in a publisher's page needs easy access to data
through a DOM API. A crawler that creates a store of data from a set
of distributed pages requires a server-side parser and good storage
and querying support. </p>
<p> As a consumer, you will be led by the requirements you have for
your application and the experience that you have with different
technology sets. It's important, however, to also consider the
experience and capabilities of the publishers that are providing you
with data, and which <a class="internalDFN" href="#dfn-format" title="format">formats</a>
they will find easy to publish given their tooling. You should also
consider the ease with which you can provide support tools for the
format, such as validators or previewers that make it easy for
publishers to tell whether they have published data correctly within
their pages. </p>
<p> There are several specifications that can be used to provide
standard mechanisms for accessing, manipulating, querying and
validating data gleaned from HTML pages. However, you should check
what has been implemented in your environment: it may be that there
isn't an implementation that follows a standard, but there is one
that provides its own API which enables you to do what you need to
do. </p>
<div class="section" id="microdata-microformats-2-data-model">
<h5><span class="secno">3.1.2.1 </span>Microdata/Microformats-2
Data Model</h5>
<p> Microdata and microformats-2 can be mapped to the same <a rel="nofollow"
href="http://dev.w3.org/html5/md/Overview.html#json">basic
(JSON) data model</a>. Processing JSON into native programming
structures, in Javascript and other languages, is usually very
easy. Vocabularies are usually described in specification prose
rather than a formal language. </p>
<ul>
<li><a href="http://dev.w3.org/html5/md/Overview.html#microdata-dom-api">microdata
DOM API</a> — part of microdata specification (<acronym title="World Wide Web Consortium">W3C</acronym>
Last Call Working Draft)</li>
<li><a href="http://tools.ietf.org/html/draft-zyp-json-schema-03">JSON
Schema</a> — schema language for JSON (IETF Internet Draft)</li>
</ul>
</div>
<div class="section" id="rdf-data-model">
<h5><span class="secno">3.1.2.2 </span>RDF Data Model</h5>
<p> RDFa processors extract an RDF data model and processors can
also generate <a href="https://dvcs.w3.org/hg/htmldata/raw-file/default/microdata-rdf/index.html">RDF
from microdata</a>. There are a number of standards for
alternative serialisations of RDF graphs that target different
toolchains, formally expressing RDF <a class="internalDFN" href="#dfn-vocabulary"
title="vocabulary">vocabularies</a>
and querying RDF, and drafts in progress for DOM-based
manipulation of RDFa content. </p>
<ul>
<li><a href="http://www.w3.org/TR/REC-rdf-syntax/">RDF/XML</a> —
XML-based serialisation of RDF (<acronym title="World Wide Web Consortium">W3C</acronym>
Recommendation)</li>
<li><a href="http://www.w3.org/TR/turtle/">Turtle</a> — text-based
serialisation of RDF (<acronym title="World Wide Web Consortium">W3C</acronym>
Working Draft)</li>
<li> <a href="http://json-ld.org/spec/latest/">JSON-LD</a> —
JSON-based serialisation of RDF (Unofficial Draft)</li>
<li><a href="http://www.w3.org/TR/rdf-mt/">RDFS</a> — vocabulary
description language for RDF (<acronym title="World Wide Web Consortium">W3C</acronym>
Recommendation)</li>
<li><a href="http://www.w3.org/TR/owl-primer/">OWL</a> — ontology
language for RDF (<acronym title="World Wide Web Consortium">W3C</acronym>
Recommendation)</li>
<li><a href="http://www.w3.org/TR/rdf-sparql-query/">SPARQL</a> —
query language for RDF (<acronym title="World Wide Web Consortium">W3C</acronym>
Recommendation)</li>
<li><a href="http://www.w3.org/TR/sparql11-overview/">SPARQL 1.1</a>
— <acronym title="World Wide Web Consortium">W3C</acronym>
Working Draft</li>
<li><a href="http://www.w3.org/TR/rdfa-api/">RDFa API</a> — <acronym
title="World Wide Web Consortium">W3C</acronym>
Working Draft</li>
</ul>
</div>
</div>
<div class="section" id="data-model-considerations">
<h4><span class="secno">3.1.3 </span>Data Model Considerations</h4>
<p> Microdata uses a JSON-based data model of a tree of objects which
may be identified through a URI, with <a class="internalDFN" href="#dfn-property"
title="property">properties</a>
whose <a class="internalDFN" href="#dfn-value" title="value">values</a>
are strings. microformats-2 uses a similar JSON-based data model of
a tree of objects, but they do not have identifiers and their
property values may be strings, URLs, date/times or structured HTML
values. RDFa uses RDF as its data model, which is a graph of objects
identified by URLs with properties whose values may be other
objects, lists or literal values which can be tagged with a language
or any datatype. These different models have different capabilities.
</p>
<dl>
<dt>Structured HTML values</dt>
<dd> Under appropriate conditions, RDFa and microformats will use
markup within the content of an element to provide a <a class="internalDFN"
href="#dfn-property">property</a>
value; in microdata <a class="internalDFN" href="#dfn-value" title="value">values</a>
never retain markup. If you wish to consume data that may contain
markup — be it structures such as multiple paragraphs, list items,
tables, or inline markup such as emphases, links or ruby markup —
you will need publishers to use RDFa or microformats to mark up
that data. In RDFa, this is done by publishers adding <code>datatype="rdf:XMLLiteral"</code>
to elements whose markup should be preserved. In microformats, the
handling of the content of an element is determined by the
property; in microformats-2, those that retain the HTML structure
are named with a <code>e-*</code> prefix, such as <code>e-content</code>.
</dd>
<dt>Language support</dt>
<dd> Microformats and RDFa use the language of the HTML elements in
the page (from the <code>@lang</code> attribute) to indicate the
language of relevant values. In microdata, the <a class="internalDFN"
href="#dfn-vocabulary">vocabulary</a>
has to provide a separate mechanism to indicate a language. If you
are consuming information about the same things from pages that
use different languages, or anticipate publishers using multiple
languages in their pages to describe a particular entity, you can
automatically pick up the language of the content of the page if
publishers use microformats or RDFa. If you consume microdata, you
need to provide specific <a class="internalDFN" href="#dfn-property"
title="property">properties</a>
in your vocabulary that publishers can use to indicate the
language of the content. </dd>
</dl>
<p class="issue"> The handling of language by microdata <a href="http://www.w3.org/Bugs/Public/show_bug.cgi?id=14470">may
change in the future</a>. </p>
</div>
<div class="section" id="usability-considerations-1">
<h4><span class="secno">3.1.4 </span>Usability Considerations</h4>
<p> Publishing data within HTML can be a challenge for publishers,
simply because the structure of the data that they publish is not
immediately visible within their pages. The publishers you are
targeting will have different levels of skill and experience, which
may influence your choice of <a class="internalDFN" href="#dfn-syntax">syntax</a>
and the way in which you design your vocabulary. If you can, you
should try to work closely with a few target publishers to better
understand their requirements and constraints. Experimenting with
marking up a few of their existing pages will often highlight issues
with both syntax and vocabulary. </p>
<p> Some usability issues may be addressed by restricting the set of
attributes that you instruct publishers how to use, or by
restricting their location to provide more consistency. For example:
</p>
<ul>
<li><a rel="nofollow" href="http://www.w3.org/2010/02/rdfa/sources/rdfa-lite/Overview-src.html">RDFa
1.1 Lite</a> is an authoring profile of RDFa 1.1 that is
sufficient for most data publishing</li>
<li> most microdata markup does not require <code>@itemid</code> or
<code>@itemref</code></li>
<li>constraining data markup to the <code>head</code> of an HTML
document can make it easier to author and protect it from
templating changes, although it also runs the risk of getting out
of sync with the content of the page, increases repetition, and is
hard to use for anything but flat data structures</li>
</ul>
<p> Profiling microdata and RDFa is useful for documentation, but
consumers should still recognise and understand the full set of
syntactic constructs described by the standards. This ensures that
those publishers who find that they need the more advanced
constructs to mark up their pages can do so, and means that
publishers can use general-purpose tools and documentation rather
than just those that you provide. </p>
</div>
</div>
<div class="section" id="consuming-multiple-formats">
<h3><span class="secno">3.2 </span>Consuming Pages with Multiple
Formats</h3>
<p> In attempting to provide information to multiple consumers,
publishers may use several <a class="internalDFN" href="#dfn-format"
title="format">formats</a>
within a single page. Consumers should ignore data in <a class="internalDFN"
href="#dfn-vocabulary"
title="vocabulary">vocabularies</a>
that they do not recognise and only raise errors for unexpected <a class="internalDFN"
href="#dfn-property"
title="property">properties</a>
in those vocabularies. </p>
<p> Consumers of HTML data may recognise several <a class="internalDFN"
href="#dfn-format"
title="format">formats</a>
embedded within a given page, and even within the same part of a page.
In these cases, consumers should merge from the different formats; in
the example above, a consumer should recognise that the data in
vEvent, hCalendar and schema.org is about is a single event rather
than interpreting it as three events and merge <a class="internalDFN"
href="#dfn-property">property</a>
<a class="internalDFN" href="#dfn-value" title="value">values</a> so
that the event ends up having a single URL rather than several.
Different formats may provide information about different aspects of
an <a class="internalDFN" href="#dfn-entity">entity</a> to different
levels of fidelity — in the example above, the schema.org RDFa
provided extra details about the location of the event t to the vEvent
or hCalendar formats — and consumers should seek to use whatever gives
them the most detailed information. </p>
</div>
<div class="section" id="good-consumption-practice">
<h3><span class="secno">3.3 </span>Good Consumption Practice</h3>
<p> It is good practice for a consumer to provide tools that help
publishers to see how the data within their pages is interpreted by
the consumer and that highlight any errors in the markup, such as
invalid <a class="internalDFN" href="#dfn-value" title="value">values</a>
or missing required <a class="internalDFN" href="#dfn-property" title="property">properties</a>.
</p>
<p> It is good practice for consumers to ignore markup that uses <a class="internalDFN"
href="#dfn-syntax">syntax</a>
or <a class="internalDFN" href="#dfn-vocabulary" title="vocabulary">vocabularies</a>
that they do not understand. Properties and <a class="internalDFN" href="#dfn-type"
title="type">types</a>
in unrecognised vocabularies should be ignored by consumers. </p>
<p> The presence of HTML data within a website does not imply that the
data can be used without restriction. Publishers may license the
information provided through HTML data, for example to restrict it to
non-commercial use or to use only with attribution. Legally, consumers
must honour licenses and it is good practice for consumers to indicate
to publishers which <a class="internalDFN" href="#dfn-format" title="format">formats</a>
they recognise for expressing licensing information within HTML pages,
and which licenses they recognise as indicating that the data within
the page is consumable. Typical <a class="internalDFN" href="#dfn-vocabulary"
title="vocabulary">vocabularies</a>
for expressing this information are schema.org, rel-license, Creative
Commons or Dublin Core. </p>
<p> Even when the use of data is unrestricted, it is good practice for
consumers to record the source of the information that they use and,
when republishing that data, provide metadata about the rights holder,
source and license under which the information is available, using the
same <a class="internalDFN" href="#dfn-vocabulary" title="vocabulary">vocabularies</a>
as those listed above. </p>
</div>
</div>
<div class="section" id="vocabulary-authors">
<!-- OddPage -->
<h2><span class="secno">4. </span>Vocabulary Authors</h2>
<p> Designing <a class="internalDFN" href="#dfn-vocabulary" title="vocabulary">vocabularies</a>
is a complex craft, and this document does not cover all aspects of how
to go about it. There are several existing more general resources for
vocabulary creators, such as: </p>
<ul>
<li><a rel="nofollow" href="http://microformats.org/wiki/process">the
microformats process</a></li>
<li><a rel="nofollow" href="http://www.w3.org/2001/sw/interest/webschema.html">SWIG
Web Schemas Task Force</a></li>
</ul>
<div class="section" id="extending-vocabularies">
<h3><span class="secno">4.1 </span>Extending Vocabularies</h3>
<p> There are already many <a class="internalDFN" href="#dfn-vocabulary"
title="vocabulary">vocabularies</a>
in existence, particularly for common domains such as people,
organisations, events, products, reviews, recipes and so on. Reusing
these vocabularies benefits consumers because it saves design time and
means they do not have to create supporting tools and materials such
as validators, previewers or documentation. It also benefits
publishers because it increases the likelihood that the data within
their pages can be consumed by other useful tools. It is therefore
good practice to extend existing vocabularies rather than creating new
ones, where possible. </p>
<p> This section describes some of the issues that <a class="internalDFN"
href="#dfn-vocabulary">vocabulary</a>
authors who extend existing vocabularies need to be aware of. </p>
<div class="section" id="extending-microformats">
<h4><span class="secno">4.1.1 </span>Extending Microformats</h4>
<p> Microformats are developed using an iterative process whereby
proposals for extensions are <a href="http://microformats.org/wiki/process#Brainstorm_Proposals">brainstormed</a>
and eventually either accepted or rejected by the microformats
community. It is not appropriate to create unilateral extensions to
microformats. On the other hand, publishers should use semantic
classes within their HTML, whether or not they are used within
current microformats. Evidence of use of semantic classes within
HTML pages is one input to the microdata standardisation process. </p>
</div>
<div class="section" id="extending-rdf-vocabularies">
<h4><span class="secno">4.1.2 </span>Extending RDF Vocabularies</h4>
<p> RDF <a class="internalDFN" href="#dfn-vocabulary" title="vocabulary">vocabularies</a>,
which are used within RDFa, use IRIs for <a class="internalDFN" href="#dfn-type"
title="type">types</a>
and <a class="internalDFN" href="#dfn-property" title="property">properties</a>.
Any resource in RDFa can be extended by adding new types to the <code>@typeof</code>
attribute and/or adding new properties from different vocabularies.
However, it is not general practice to allow RDF vocabularies
themselves to be extended with new types or properties by third
parties. </p>
<p> One pattern that is quite common is for one <a class="internalDFN"
href="#dfn-vocabulary">vocabulary</a>
to accept a string for a <a class="internalDFN" href="#dfn-property">property</a>,
such as an address, and for an extension to provide more structure
for that property. In this case, a useful pattern is to nest the
more structured property inside the textual property within the
HTML. For example: </p>
<pre><div <strong>property="location"</strong>>
<address <strong>property="http://example.org/address" vocab="http://example.org/" typeof="Address"</strong>>
<span property="name">The White House</span><br>
<span property="street">1600 Pennsylvania Avenue NW</span><br>
<span property="city">Washington</span>, <span property="state">DC</span> <span property="zip">20500</span>
</address></div></pre>
<p> This pattern also works for <a class="internalDFN" href="#dfn-property"
title="property">properties</a>
whose <a class="internalDFN" href="#dfn-value" title="value">values</a>
are XML literals; in this case, the XML literal will include the
RDFa markup. </p>
</div>
<div class="section" id="extending-microdata-vocabularies">
<h4><span class="secno">4.1.3 </span>Extending Microdata Vocabularies</h4>
<p> Microdata items can have both <a class="internalDFN" href="#dfn-property"
title="property">properties</a>
that are scoped to the <a class="internalDFN" href="#dfn-type">type</a>
of the item and <a class="internalDFN" href="#dfn-property" title="property">properties</a>
that have absolute URLs. There are two ways you can extend a type by
adding new properties: </p>
<ul>
<li>use a property that is an absolute URL</li>
<li>if the vocabulary allows it, use a new short-name property</li>
</ul>
<p> Third parties who wish to extend an existing type with new
properties should check the constraints of the type being extended
to work out whether it's possible to use a non-URL property or not.
Note that there is always a possibility, if you do use a non-URL
property name, that your extension will conflict with an extension
made by someone else; properties whose names are absolute URLs do
not have this issue but are more verbose when used in markup. </p>
<p> Microdata does not allow items to have multiple <a class="internalDFN"
href="#dfn-type"
title="type">types</a>
from different vocabularies. Some vocabularies, such as schema.org,
may permit third parties to freely extend existing types within that
vocabulary. In this case, items should be assigned both the
supertype and the extension type within the <code>@itemtype</code>
attribute. For example, schema.org describes a <a href="http://schema.org/docs/extension.html">method
of extending its vocabulary</a> that involves identifying an
appropriate supertype or superproperty and appending a <code>/</code>
and then the name of a subtype or subproperty. Schema.org also
permits anyone to create additional non-URL properties on these new
types. To extend schema.org's types with a type for a member of
parliament, a <a class="internalDFN" href="#dfn-vocabulary">vocabulary</a>
author might use the URI <code>http://schema.org/Person/MP</code>,
and mark up their page with </p>
<pre><p itemscope itemtype="<strong>http://schema.org/Person http://schema.org/Person/MP</strong>">
<span itemprop="<strong>name</strong>">David Cameron</span> is the member of parliament for <span itemprop="<strong>constituency</strong>">Witney</span>.</p></pre>
<p> Here, both <code>http://schema.org/Person</code> and <code>http://schema.org/Person/MP</code>
are given as <a class="internalDFN" href="#dfn-type" title="type">types</a>,
and the non-URL <code>constituency</code> <a class="internalDFN" href="#dfn-property">property</a>
is used despite it not being defined within the schema.org
vocabulary. </p>
<p> Other microdata <a class="internalDFN" href="#dfn-vocabulary" title="vocabulary">vocabularies</a>
do not enable third parties to extend the vocabulary. In these
cases, third parties should use a URL <a class="internalDFN" href="#dfn-property">property</a>
to specify the additional <a class="internalDFN" href="#dfn-type">type</a>
for the item. For compatibility with RDF, we recommend using <code>http://www.w3.org/1999/02/22-rdf-syntax-ns#type</code>
for this property, and using a full URL for the type. An alternative
to the example above that didn't use the schema.org extension
mechanism would be: </p>
<pre><p itemscope itemtype="http://schema.org/Person">
<strong><link itemprop="http://www.w3.org/1999/02/22-rdf-syntax-ns#type" href="http://gov.example.org/uk/MP"></strong>
<span itemprop="<strong>name</strong>">David Cameron</span> is the member of parliament for <span itemprop="<strong>http://gov.example.org/uk/constituency</strong>">Witney</span>.</p></pre>
<p> More details about the use and limitations of this technique can
be found in <a class="sectionRef" href="#mixing-vocabularies-microdata">section
2.2.1.3 Mixing Vocabularies in Microdata</a>. </p>
<p> The technique described for RDFa above, of nesting a <a class="internalDFN"
href="#dfn-property">property</a>
that contains more structure within a property that has less, can
also be used with microdata content. </p>
</div>
</div>
<div class="section" id="designing-vocabularies">
<h3><span class="secno">4.2 </span>Designing Vocabularies</h3>
<p> This section looks at the particular requirements of different HTML
data <a href="#dfn-syntax">syntaxes</a> on <a class="internalDFN" href="#dfn-vocabulary"
title="vocabulary">vocabularies</a>,
and how to create vocabularies that can be used across HTML data
syntaxes. </p>
<div class="section" id="syntax-specific-requirements">
<h4><span class="secno">4.2.1 </span>Syntax-Specific Requirements</h4>
<p> Each HTML data <a class="internalDFN" href="#dfn-syntax">syntax</a>
brings with it a set of constraints on both how <a class="internalDFN"
href="#dfn-vocabulary"
title="vocabulary">vocabularies</a>
are designed and their documentation. </p>
<div class="section" id="microformat-vocabularies">
<h5><span class="secno">4.2.1.1 </span>Microformat Vocabularies</h5>
<p> The <a href="http://microformats.org/wiki/microformats-2">microformats
2</a> page describes the constraints on the design of
microformat vocabularies, and the <a href="http://microformats.org/wiki/process">microformats
process</a> describes additional procedural guidelines on how to
create a new microformat. </p>
</div>
<div class="section" id="microdata-vocabularies">
<h5><span class="secno">4.2.1.2 </span>Microdata Vocabularies</h5>
<p> Microdata <a class="internalDFN" href="#dfn-vocabulary" title="vocabulary">vocabularies</a>
must define, within a specification for that vocabulary,
processing rules to be followed by consumers of that vocabulary,
using the terms given by the <a href="http://dev.w3.org/html5/md/">microdata
specification</a>. These include: </p>
<ul>
<li>what <a class="internalDFN" href="#dfn-type" title="type">types</a>
the <a class="internalDFN" href="#dfn-vocabulary">vocabulary</a>
includes</li>
<li>which <a class="internalDFN" href="#dfn-type" title="type">types</a>
support <code>@itemid</code> to provide global identifiers for
items</li>
<li>whether and how two items described using microdata should be
considered a single item by a consumer (such as when they have
the same <code>@itemid</code>) and if so, how two items within
an HTML page should be merged</li>
<li>whether URL <a class="internalDFN" href="#dfn-value" title="value">values</a>
that have the same value as an <code>@itemid</code> should be
treated the same as if the item had been nested within the page</li>
<li>which non-URL <a class="internalDFN" href="#dfn-property" title="property">properties</a>
(<b>defined property names</b>) are permitted on each of those
types, whether there are equivalent URL properties for them, and
how properties will be merged if both are used</li>
<li>how many and what kinds of <a class="internalDFN" href="#dfn-value"
title="value">values</a>
are allowed for each <a class="internalDFN" href="#dfn-property">property</a>,
and what consumers should do if there are more or fewer values
than required, how the values are parsed, and what happens when
the values are of the wrong type</li>
<li>whether items that are the <a class="internalDFN" href="#dfn-value">value</a>
of a <a class="internalDFN" href="#dfn-property">property</a>
must explicitly have a <a class="internalDFN" href="#dfn-type">type</a>
or if this can be inferred by consumers</li>
<li>what to do when an item has a <a class="internalDFN" href="#dfn-property">property</a>
that it should not have</li>
<li>whether <a class="internalDFN" href="#dfn-type">type</a> and
<a class="internalDFN" href="#dfn-property">property</a> URLs
can be dereferenced</li>
<li>how consumers should recognise items belonging to the <a class="internalDFN"
href="#dfn-vocabulary">vocabulary</a>
(whether purely by <code>@itemtype</code> or through some other
mechanism)</li>
</ul>
<p> An example of a microdata <a class="internalDFN" href="#dfn-vocabulary">vocabulary</a>
description is available for <a rel="nofollow" href="http://www.heppnetz.de/ontologies/goodrelations/v1.html#microdata">GoodRelations</a>.
There are also example microdata vocabularies within the <a rel="nofollow"
href="http://www.whatwg.org/specs/web-apps/current-work/multipage/microdata.html#mdvocabs">WHATWG
version of the microdata specification</a>. </p>
<p> Microdata does not support the use of the HTML <code>@lang</code>
attribute to provide language information for textual values; if
this is important, a microdata <a class="internalDFN" href="#dfn-vocabulary">vocabulary</a>
must provide a mechanism for supplying a language separately. This
can be done by: </p>
<ul>
<li>having a <a class="internalDFN" href="#dfn-property">property</a>
that indicates the language used in the data for the item; this
only works if all the data uses the same language</li>
<li>defining a <code>LanguageString</code> <a class="internalDFN"
href="#dfn-type">type</a>
that has properties for both content and language and specifying
the use of items of that type as a <a class="internalDFN" href="#dfn-value">value</a>
for any appropriate property</li>
</ul>
<p> Microdata does not support structured HTML values. Where these
need to be captured, <a class="internalDFN" href="#dfn-vocabulary"
title="vocabulary">vocabularies</a>
can instead use URLs that reference fragments of HTML in the page.
For example: </p>
<pre><strong><link itemprop="breadcrumb" href="#breadcrumb"></strong><div <strong>id="breadcrumb"</strong>>
<a href="category/books.html">Books</a> >
<a href="category/books-literature.html">Literature & Fiction</a> >
<a href="category/books-classics">Classics</a></div></pre>
</div>
<div class="section" id="rdfa-vocabularies">
<h5><span class="secno">4.2.1.3 </span>RDFa Vocabularies</h5>
<p> RDFa is used to create RDF graphs, so <a class="internalDFN" href="#dfn-vocabulary"
title="vocabulary">vocabularies</a>
used within RDFa should bear in mind the constraints and
conventions that commonly apply to RDF vocabularies. These
include: </p>
<ul>
<li><a class="internalDFN" href="#dfn-type" title="type">types</a>
should be named using CapitalCamelCase, and <a class="internalDFN"
href="#dfn-property"
title="property">properties</a>
using lowerCamelCase</li>
<li><a class="internalDFN" href="#dfn-type" title="type">types</a>
and <a class="internalDFN" href="#dfn-property" title="property">properties</a>
in the same <a class="internalDFN" href="#dfn-vocabulary">vocabulary</a>
should share a IRI prefix — the vocabulary IRI — which should
end in a <code>#</code> or a <code>/</code>; the local part of
a <a class="internalDFN" href="#dfn-type">type</a> or property
IRI, after this prefix, should be a valid <a href="http://www.w3.org/TR/REC-xml-names/#NT-NCName">NCName</a>
so that it can be used within RDF/XML serialisations</li>
<li>the IRIs used for <a class="internalDFN" href="#dfn-type" title="type">types</a>
and <a class="internalDFN" href="#dfn-property" title="property">properties</a>
should resolve into documentation and/or (through content
negotiation) an <a href="http://www.w3.org/TR/rdf-schema/">RDFS
schema</a> or <a href="http://www.w3.org/TR/owl-overview/">OWL
ontology</a> that describes the types and properties</li>
</ul>
<p> In addition, the authors of <a class="internalDFN" href="#dfn-vocabulary"
title="vocabulary">vocabularies</a>
designed to be used with RDFa should specify whether IRIs and
percent-encoded URIs should be treated as equivalent when used for
<a class="internalDFN" href="#dfn-property">property</a> and <a class="internalDFN"
href="#dfn-type">type</a>
identifiers or <a class="internalDFN" href="#dfn-value" title="value">values</a>.
</p>
<p> More guidelines and patterns for modelling using RDF are
available within <a rel="nofollow" href="http://patterns.dataincubator.org/book/modelling-patterns.html">Linked
Data Patterns</a>. </p>
</div>
</div>
<div class="section" id="syntax-neutral-vocabularies">
<h4><span class="secno">4.2.2 </span>Syntax-Neutral Vocabularies</h4>
<p> Syntax-neutral <a class="internalDFN" href="#dfn-vocabulary" title="vocabulary">vocabularies</a>
must have variants for each <a class="internalDFN" href="#dfn-syntax">syntax</a>
that meet the requirements for the syntax as described above, but
the capabilities of each variant do not have to be identical. </p>
<p> For example, a syntax-neutral review <a class="internalDFN" href="#dfn-vocabulary">vocabulary</a>
could specify a required <code>reviewLanguage</code> <a class="internalDFN"
href="#dfn-property">property</a>
to give the language of a review in microdata, but say that if
microformats or RDFa were used, and this were left unspecified, the
language would be assumed. Publishers who had content that included
multiple languages in the review itself (which couldn't be
represented using a property providing a language for the entire
review) would be able to use microformats or RDFa to mark up the
review. </p>
<p> There are a number of measures that make it easier for <a class="internalDFN"
href="#dfn-vocabulary"
title="vocabulary">vocabularies</a>
to be used across <a class="internalDFN" href="#dfn-syntax" title="syntax">syntaxes</a>
in ways that make it easier for consumers to combine data whichever
<a class="internalDFN" href="#dfn-syntax">syntax</a> is used. </p>
<dl>
<dt>Naming Conventions</dt>
<dd> Adopt consistent names across <a class="internalDFN" href="#dfn-syntax"
title="syntax">syntaxes</a>,
even if the naming conventions between the syntaxes differs. For
example, microformats uses lowercase-hyphenated-names whereas RDF
uses lowerCamelCase; all that is needed is a clear mapping between
them. Although microdata allows defined <a class="internalDFN" href="#dfn-property">property</a>
names to contain any character except <code>:</code> and <code>.</code>,
non-URL properties should have names that are <a href="http://www.w3.org/TR/REC-xml-names/#NT-NCName">NCNames</a>
so that they can be used in microformats and RDFa. Note that
microdata's restrictions mean that <code>.</code>s should be
avoided in these names. </dd>
<dt>Entity Identity</dt>
<dd> Microformats and microdata have a limited notion of <a class="internalDFN"
href="#dfn-entity">entity</a>
identity: entities may have identifiers (in microdata, from the <code>@itemid</code>
attribute) but these are not used within the data model to combine
entities or link them together into graphs. Syntax-neutral <a class="internalDFN"
href="#dfn-vocabulary"
title="vocabulary">vocabularies</a>
use the RDF concept of identity whereby entities with the same
identifier are the same entity, and references to that entity's
identifier serve to create a graph of entities. This should be
reflected in the definition of the microdata variant of the
vocabulary, which should allow <code>@itemid</code> on all items,
and specify that consumers should combine and link to items to
create a graph. </dd>
</dl>
</div>
<div class="section" id="good-vocabulary-design-practices">
<h4><span class="secno">4.2.3 </span>Good Vocabulary Design Practices</h4>
<p> It is good practice for <a class="internalDFN" href="#dfn-vocabulary">vocabulary</a>
creators to collaborate with others who are consuming or publishing
information in the relevant domains in order to create a vocabulary
that can be used widely across an industry. </p>
<p> It is good practice for <a class="internalDFN" href="#dfn-vocabulary">vocabulary</a>
creators to make available a validation tool that enables publishers
who use a vocabulary to check that their HTML pages contain data
that is valid against that vocabulary. </p>
<p> It is good practice for <a class="internalDFN" href="#dfn-vocabulary">vocabulary</a>
creators to make available test suites that enable implementers to
check the behaviour of their implementations. These test suites
should cover error handling as well as the correct interpretation of
valid data. </p>
</div>
</div>
</div>
<div id="acknowledgements" class="appendix section">
<!-- OddPage -->
<h2><span class="secno">A. </span>Acknowledgements</h2>
<p> Many thanks to the members of the HTML Data Task Force for their
contributions to this document. </p>
</div>
<div id="multiple-types-microdata" class="appendix section">
<!-- OddPage -->
<h2><span class="secno">B. </span>Multiple Item Types in Microdata</h2>
<p> As discussed in <a href="#mixing-vocabularies-microdata" class="sectionRef">section
2.2.1.3 Mixing Vocabularies in Microdata</a>, microdata does not
support providing multiple <a class="internalDFN" href="#dfn-type" title="type">types</a>
from different <a class="internalDFN" href="#dfn-vocabulary" title="vocabulary">vocabularies</a>
to a given item within the <code>@itemtype</code> attribute. There are
two work-arounds for this, which are discussed here using the example of
targetting both schema.org and use the vEvent vocabulary with the
original HTML: </p>
<pre><a href="nba-miami-philadelphia-game3.html">
NBA Eastern Conference First Round Playoff Tickets:
Miami Heat at Philadelphia 76ers - Game 3 (Home Game 1)</a>
Thu, 04/21/16
8:00 p.m.
<a href="wells-fargo-center.html">
Wells Fargo Center</a>
Philadelphia, PA</pre>
<div class="section" id="mixing-vocabularies-using-a-type-property">
<h3><span class="secno">B.1 </span>Mixing Vocabularies using a Type
Property</h3>
<p> Some <a class="internalDFN" href="#dfn-vocabulary" title="vocabulary">vocabularies</a>
may define a <a class="internalDFN" href="#dfn-property">property</a>
through which <a class="internalDFN" href="#dfn-type" title="type">types</a>
from that vocabulary can be assigned to items that are in a different
vocabulary. For example, schema.org could define a <code>http://schema.org/type</code>
property. It could say that the value of <code>http://schema.org/type</code>
must be the URL for a schema.org type. And further, that if the
property <code>http://schema.org/type</code> has the value <code>http://schema.org/Person</code>,
say, then the item will be interpreted exactly as if the <code>@itemtype</code>
attribute held the value <code>http://schema.org/Person</code>. </p>
<p class="note"> At time of writing schema.org does not specify a <code><a
rel="nofollow"
href="http://schema.org/type">http://schema.org/type</a></code>
property, and this explanation is hypothetical. </p>
<p> When using this technique, the types specified in the <code>@itemtype</code>
attribute are the <b>primary types</b> of the item and those
specified through the type property are the <b>secondary types</b>. </p>
<p> If the schema.org <a class="internalDFN" href="#dfn-vocabulary">vocabulary</a>
also stated that <a class="internalDFN" href="#dfn-property">property</a>
URLs that begin with <code>http://schema.org/</code> must be treated
in the same way as equivalent short-name properties on items with a
schema.org type, the schema.org <a class="internalDFN" href="#dfn-vocabulary">vocabulary</a>
could be mixed in with an item marked up using vEvent: </p>
<pre><div itemscope itemtype="http://microformats.org/profile/hcalendar#vevent">
<strong><link itemprop="http://schema.org/type" href="http://schema.org/Event"></strong>
<a itemprop="url <strong>http://schema.org/url</strong>" href="nba-miami-philadelphia-game3.html">
NBA Eastern Conference First Round Playoff Tickets:
<span itemprop="summary <strong>http://schema.org/name</strong>"> Miami Heat at Philadelphia 76ers - Game 3 (Home Game 1) </span>
</a>
<meta itemprop="dtstart <strong>http://schema.org/startDate</strong>" content="2016-04-21T20:00">
Thu, 04/21/16
8:00 p.m.
<div itemprop="location">
<strong><div itemprop="http://schema.org/location" itemscope itemtype="http://schema.org/Place">
<a itemprop="url" href="wells-fargo-center.html">
Wells Fargo Center
</a>
<div itemprop="address" itemscope itemtype="http://schema.org/PostalAddress">
<span itemprop="addressLocality">Philadelphia</span>,
<span itemprop="addressRegion">PA</span>
</div>
</div></strong>
</div></div></pre>
<p class="note"> The vEvent <code>location</code> <a class="internalDFN"
href="#dfn-property">property</a>
takes text while the schema.org <code>location</code> property takes
structured information about the location. These are combined by
having an element for the property which requires structured
information nested within the property that requires text. </p>
<p> This generates the JSON: </p>
<pre>{
"type": [ "http://microformats.org/profile/hcalendar#vevent" ],
"properties": {
"http://schema.org/type": [ "http://schema.org/Event" ],
"url": [ "http://example.com/nba-miami-philadelphia-game3.html" ],
"http://schema.org/url": [ "http://example.com/nba-miami-philadelphia-game3.html" ],
"summary": [ " Miami Heat at Philadelphia 76ers - Game 3 (Home Game 1) " ],
"http://schema.org/name": [ " Miami Heat at Philadelphia 76ers - Game 3 (Home Game 1) " ],
"dtstart": [ "2016-04-21T20:00" ],
"http://schema.org/startDate": [ "2016-04-21T20:00" ],
"location": [ "\n \n \n Wells Fargo Center\n \n \n Philadelphia,\n PA\n \n \n " ],
"http://schema.org/location": [{
"type": [ "http://schema.org/Place" ],
"properties": {
"url": [ "http://example.com/wells-fargo-center.html" ],
"address": [{
"type": [ "http://schema.org/PostalAddress" ],
"properties": {
"addressLocality": [ "Philadelphia" ],
"addressRegion": [ "PA" ]
}
}]
}
}]
}
}</pre>
<p> The schema.org consumer would ignore the vEvent vocabulary but
recognise the use of the <code>http://schema.org/type</code>
property, and therefore treat this data in the same way as if the JSON
were: </p>
<pre>{
"type": [ "http://schema.org/Event" ],
"properties": {
"url": [ "http://example.com/nba-miami-philadelphia-game3.html" ],
"name": [ " Miami Heat at Philadelphia 76ers - Game 3 (Home Game 1) " ],
"startDate": [ "2016-04-21T20:00" ],
"location": [{
"type": [ "http://schema.org/Place" ],
"properties": {
"url": [ "http://example.com/wells-fargo-center.html" ],
"address": [{
"type": [ "http://schema.org/PostalAddress" ],
"properties": {
"addressLocality": [ "Philadelphia" ],
"addressRegion": [ "PA" ]
}
}]
}
}]
}
}</pre>
<p> Also note that in this example the <code>http://schema.org/type</code>
<a class="internalDFN" href="#dfn-property">property</a> is only used
where necessary, on the item which needs to be marked as an event in
both <a class="internalDFN" href="#dfn-vocabulary" title="vocabulary">vocabularies</a>.
Where possible, the schema.org <a class="internalDFN" href="#dfn-type">type</a>
for an entity is provided explicitly through the <code>@itemtype</code>
attribute. </p>
<p> This method of mixing <a class="internalDFN" href="#dfn-vocabulary"
title="vocabulary">vocabularies</a>
requires vocabularies to specify how consumers should recognise items
of a particular <a class="internalDFN" href="#dfn-type">type</a>. It
is recommended that vocabulary authors define an <code>@itemtype</code>-equivalent
<a class="internalDFN" href="#dfn-property">property</a>, and that,
for better integration with RDF tools, this property is <code>http://www.w3.org/1999/02/22-rdf-syntax-ns#type</code>.
</p>
<p> A particular disadvantage of this approach is that there is no
support within the microdata API for retrieving items based on the <a
class="internalDFN"
href="#dfn-value">value</a>
of a <a class="internalDFN" href="#dfn-property">property</a>. In the
example above, it would be possible to retrieve the event using: </p>
<pre>document.getItems('http://microformats.org/profile/hcalendar#vevent')</pre>
<p> but not through: </p>
<pre>document.getItems('http://schema.org/Event')</pre>
<p> Scripts that extract microdata information using the DOM will be
faster if they can use the primary <a class="internalDFN" href="#dfn-type"
title="type">types</a>
for an item, specified within the <code>@itemtype</code> attribute,
so you should specify types accessed through scripts within <code>@itemtype</code>
rather than through a <a class="internalDFN" href="#dfn-property">property</a>
wherever possible. </p>
</div>
<div class="section" id="mixing-vocabularies-using-repeated-content">
<h3><span class="secno">B.2 </span>Mixing Vocabularies using Repeated
Content</h3>
<p> The second method of supporting multiple <a class="internalDFN" href="#dfn-property"
title="property">properties</a>
is to have the <a class="internalDFN" href="#dfn-entity">entity</a>
represented by two (or more) microdata items on the page. To enable
dragging and dropping the data from these items, they should be nested
inside each other. Properties can be set on the outer element using <code>link</code>
and <code>meta</code> elements which are hidden from users, while the
visible content of the page is marked up by the inner element. </p>
<pre><div itemscope itemtype="<strong>http://microformats.org/profile/hcalendar#vevent</strong>">
<strong><link itemprop="url" href="nba-miami-philadelphia-game3.html">
<meta itemprop="summary" content="Miami Heat at Philadelphia 76ers - Game 3 (Home Game 1)">
<meta itemprop="dtstart" content="2016-04-21T20:00">
<meta itemprop="location" content="Wells Fargo Center, Philadelphia, PA"></strong>
<div itemscope itemtype="<strong>http://schema.org/Event</strong>">
<a itemprop="url" href="nba-miami-philadelphia-game3.html">
NBA Eastern Conference First Round Playoff Tickets:
<span itemprop="name"> Miami Heat at Philadelphia 76ers - Game 3 (Home Game 1) </span>
</a>
<meta itemprop="startDate" content="2016-04-21T20:00">
Thu, 04/21/16
8:00 p.m.
<div itemprop="location" itemscope itemtype="http://schema.org/Place">
<a itemprop="url" href="wells-fargo-center.html">
Wells Fargo Center
</a>
<div itemprop="address" itemscope itemtype="http://schema.org/PostalAddress">
<span itemprop="addressLocality">Philadelphia</span>,
<span itemprop="addressRegion">PA</span>
</div>
</div>
</div></div></pre>
<p> This generates two items: </p>
<pre>{
"items": [{
"type": [ "http://microformats.org/profile/hcalendar#vevent" ],
"properties": {
"url": [ "http://example.com/nba-miami-philadelphia-game3.html" ],
"summary": [ "Miami Heat at Philadelphia 76ers - Game 3 (Home Game 1)" ],
"dtstart": [ "2016-04-21T20:00" ],
"location": [ "Wells Fargo Center, Philadelphia, PA" ]
}
}, {
"type": [ "http://schema.org/Event" ],
"properties": {
"url": [ "http://example.com/nba-miami-philadelphia-game3.html" ],
"name": [ " Miami Heat at Philadelphia 76ers - Game 3 (Home Game 1) " ],
"startDate": [ "2016-04-21T20:00" ],
"location": [{
"type": [ "http://schema.org/Place" ],
"properties": {
"url": [ "http://example.com/wells-fargo-center.html" ],
"address": [{
"type": [ "http://schema.org/PostalAddress" ],
"properties": {
"addressLocality": [ "Philadelphia" ],
"addressRegion": [ "PA" ]
}
}]
}
}]
}
}]
}</pre>
<p> This method does not require any special <a class="internalDFN" href="#dfn-property"
title="property">properties</a>
to be defined in the <a class="internalDFN" href="#dfn-vocabulary" title="vocabulary">vocabularies</a>
used to mark up the page, and the two items are directly assigned the
relevant <a class="internalDFN" href="#dfn-type">type</a> and are
thus accessible to scripts through the <code>document.getItems()</code>
method. </p>
<p> The disadvantages of this method are that the page contains more
items than there are <a class="internalDFN" href="#dfn-entity" title="entity">entities</a>
(in the above example, two items representing the same event), and it
requires repetition of data within the page. </p>
</div>
</div>
<div class="appendix section" id="references">
<!-- OddPage -->
<h2><span class="secno">C. </span>References</h2>
<div class="section" id="normative-references">
<h3><span class="secno">C.1 </span>Normative references</h3>
<p>No normative references.</p>
</div>
<div class="section" id="informative-references">
<h3><span class="secno">C.2 </span>Informative references</h3>
<p>No informative references.</p>
</div>
</div>
</body></html>