index.html
55.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
<title>Unicode block names for use in XSD regular expressions</title>
<style type="text/css">
code { font-family: monospace; }
div.constraint,
div.issue,
div.note,
div.notice { margin-left: 2em; }
ol.enumar { list-style-type: decimal; }
ol.enumla { list-style-type: lower-alpha; }
ol.enumlr { list-style-type: lower-roman; }
ol.enumua { list-style-type: upper-alpha; }
ol.enumur { list-style-type: upper-roman; }
div.exampleInner pre { margin-left: 1em;
margin-top: 0em; margin-bottom: 0em}
div.exampleOuter {border: 4px double gray;
margin: 0em; padding: 0em}
div.exampleInner { background-color: #d5dee3;
border-top-width: 4px;
border-top-style: double;
border-top-color: #d3d3d3;
border-bottom-width: 4px;
border-bottom-style: double;
border-bottom-color: #d3d3d3;
padding: 4px; margin: 0em }
div.exampleWrapper { margin: 4px }
div.exampleHeader { font-weight: bold;
margin: 4px}
div.odiff-nsq-add { background-color: #DFDFDF; }
div.odiff-nsq-del { display: none; background-color: #FFDFDF }
div.idiff-nsq-del { display: none; text-decoration: line-through }
div.odiff-nsq-chg { background-color: #DFDFDF }
div.diff-nsq-off { }
span.odiff-nsq-add { background-color: #DFDFDF; }
span.diff-nsq-add { background-color: #DFDFDF; }
span.odiff-nsq-del { display: none; background-color: #FFDFDF }
span.idiff-nsq-del { display: none; text-decoration: line-through }
span.diff-nsq-del { display: none; background-color: #FFDFDF ; text-decoration: line-through }
span.odiff-nsq-chg { background-color: #DFDFDF }
span.diff-nsq-chg { background-color: #DFFFDF }
span.diff-nsq-off { }
td.odiff-nsq-add { background-color: #DFDFDF; }
td.odiff-nsq-del { display: none; background-color: #FFDFDF }
td.odiff-nsq-chg { background-color: #DFDFDF }
td.diff-nsq-off { }
table { width: 100%; }
img { color: white; border: none }
span.rfc2119 { font-variant: small-caps }
span.nav { float: right}
span.arrow { font-style: normal; font-weight: bold }
span.enumval { font-style: italic; font-weight: bold }
code { font-family: monospace; font-size: 100%}
span.propdef { font-weight: bold; font-family: monospace }
span.termdef {color: #850021}
div.termdef {color: #850021}
a.termref:visited, a.termref:link {font-family: sans-serif;
font-style: normal;
color: black;
text-decoration: none }
a.eltref:visited, a.eltref:link { font-family: sans-serif;
color: black;
text-decoration: none }
a.propref:visited, a.xpropref:visited, a.propref:link, a.xpropref:link { color: black; text-decoration: none;
font-family: sans-serif }
div.component {border: 2px solid black; margin-top: 1ex}
span.propdef { font-weight: bold; font-family: monospace }
div.ownDesc {margin-top: -2ex; margin-bottom: -2ex}
a.compref {font-family: sans-serif;
font-style: normal;
color: black;
text-decoration: none}
dl.props, dl.psvi {margin-bottom: .5em; margin-top: 0em}
div.toc1 {margin-left: 5ex}
div.toc2 {margin-left: 2ex}
div.tocLine{margin: 0em; text-indent: -6ex}
h3.withToc {margin-bottom: 0em}
div.constraintnote { margin-top: 1em }
div.constraint {
margin-left: 1em; }
div.constraintlist {
margin-left: 1em; margin-bottom: 0em
}
div.clnumber {
text-indent: -1em;
margin-top: 0em; margin-bottom: 0em }
div.schemaComp { border: 4px double gray;
margin: 0em 1em; padding: 0em }
div.scHead { border: 4px double gray;
border-bottom: 0px;
text-align: center;
margin-left: 1em; padding: .5em }
div.compHeader { margin: 4px;
font-weight: bold }
span.schemaComp { color: #A52A2A }
div.compBody {
border-top-width: 4px;
border-top-style: double;
border-top-color: #d3d3d3;
padding: 4px ; margin: 0em}
div.psviDef { border: 4px double gray;
margin: 1em 1em; padding: 0em }
div.psviHeader { margin: 4px;
font-weight: bold }
span.psviDef { color: #A52A2A }
div.psviBody { border-top-width: 4px;
border-top-style: double;
border-top-color: #d3d3d3;
padding: 4px ; margin: 0em}
div.reprdef { border: 4px double gray;
margin: 0em 1em; padding: 0em }
div.reprHeader { margin: 4px;
font-weight: bold }
span.reprdef { color: #A52A2A }
div.reprBody, div.reprcompmulti, div.reprdep {
border-top-width: 4px;
border-top-style: double;
border-top-color: #d3d3d3;
padding: 4px ; margin: 0em}
div.reprcomp {padding: 4px ; margin: 0em}
div.reprHead { text-align: center; }
div.mapSep { font-size: 50% ; clear: both}
div.mapProp {clear: left; float: left; width: 5em;
max-width: 12em; min-width: 5em }
div.mapRepr { margin-left: 6.5em }
p.element-syntax-1 { font-family: monospace;
margin-top: 0em; margin-bottom: .5em }
p.element-syntax { font-family: monospace;
border-top-width: 1px;
border-top-style: solid;
border-top-color: #d3d3d3;
padding: 4px ; margin: 0em}
div.exampleInner pre { margin-left: 1em;
margin-top: 0em; margin-bottom: 0em}
div.exampleOuter {border: 4px double gray;
margin: 0em;
margin-bottom: 0.6em;
padding: 0em}
div.exampleInner { background-color: #d5dee3;
border-top-width: 4px;
border-top-style: double;
border-top-color: #d3d3d3;
border-bottom-width: 4px;
border-bottom-style: double;
border-bottom-color: #d3d3d3;
padding: 4px; margin: 0em }
div.exampleWrapper { margin: 4px }
div.exampleHeader { font-weight: bold;
margin: 4px}
table.restricts { margin-top: 1em; margin-bottom: 1em; margin-left: -2em}
table.restricts th { margin-left: 0em }
table.ubc td, table.ubc th { font-size: smaller }
table.dtdemo th { text-align: center;
background-color: #d5dee3}
table.dtdemo pre { margin-left: 0em; margin-bottom: 0em}
table.dtdemo td {background-color: #bedce6}
table.scrap {margin: .5em; background-color: #f5dcb3}
table.defset {background-color: #ffeedd }
table.defset thead, table.diffed-defset thead { color: red; font-weight: bold }
img { color: white; border: none }
span.nav { float: right}
span.arrow { font-style: normal; font-weight: bold }
.shrink {font-size: 80% ; }
.defset ul { margin-top: 0 ;
margin-bottom: 0 ; }
div.defset { margin: 4px ;
border-width: 4px ;
border-style: double ;
border-color: gray ; }
div.aux { background-color: #eeeeee ;
color: #333333 ; }
div.defset-head { font-weight: bold ;
padding: 0.6em ;
border-bottom-width: 4px ;
border-bottom-style: double ;
border-color: #cfcfcf ; }
div.deftop {background-color: #d5dee3 ;
margin-top: 1.5em;
padding-bottom: 0.3em }
div.defindent { margin-left: 1em ;
margin-top: 0em ;
margin-bottom: 0em ; }
div.defargs { margin-left: 3em ; }
div.prod { margin: 1em ;
margin-left: 5em ; }
.lhs { margin-left: -4em ; }
table table, .defset table { margin: 0 ;
border: 0 ;
padding: 0 ; }
.note { margin-left: 2em ;
margin-top: 1em ;
margin-bottom: 1em ; }
div.issue { background-color: #d5bbbb}
.giLabel, .pdName { margin-bottom: 0 ; font-weight: bold }
.giDef, .pdDef { margin-left: 2.5em ; margin-top: 0}
div.pvlist { border: 4px double gray;
margin-bottom: .5em; margin-left: 1em; padding: .5em;
padding-right: 1em; padding-bottom: 1em }
div.pvVal div.pvlist {
border: 4px double gray;
margin-top: 1.2em;
margin-bottom: .5em;
/* margin-left: -5em; */
margin-left: -1.3em;
padding: .5em;
padding-right: 1em;
padding-bottom: 1em;
}
div.clnumber div.pvlist { border: 4px double gray;
margin-bottom: .5em; margin-left: 1em; padding-top: .5em;
text-indent: 0;
padding-right: 1em; padding-bottom: 1em }
div.mapRepr div.pvlist { border: 4px double gray; margin-top: .5em;
margin-bottom: .5em; padding: .5em;
padding-right: 1em; padding-bottom: 1em }
div.pvSep { font-size: 50% ; clear: both}
div.pvProp {clear: left; float: left; width: 7em;
max-width: 12em; min-width: 7em }
div.pvVal { margin-left: 8em }
div.pvpair {
clear: both;
padding: 0.3em;
padding-right: 0;
}
div.sfsScrap { border: 4px double gray;
margin: 0em 1em; padding: 0em }
div.sfsHead { margin: 4px;
font-weight: bold }
div.sfsBody {
border-top-width: 4px;
border-top-style: double;
border-top-color: #d3d3d3;
padding: 4px ; margin: 0em}
div.ednote {
display: block;
margin: 1.33em 0;
}
a.scrapref {
font-family: serif, sans-serif;
}
/* Added 2008-01-30. Value may be tweaked, but whatever it is,
* make it the same for these three different ways of saying
* 'paragraph'.
*/
p, div.p, div.block { margin: 1em 0; }
p.image-caption {
margin-left: 2em;
margin-right: 2em;
margin-bottom: 3em;
font-style: italic;
}
var {
/* color: green; */
color: navy; /* or perhaps try MediumBlue */
font-style: italic;
font-weight: bold;
}
table.blocknames,
table.blocknames td,
table.blocknames th {
border-style: solid;
border-width: thin;
empty-cells: show;
}
table.blocknames td,
table.blocknames th {
padding: 0.2em;
}
</style><link rel="stylesheet" type="text/css" href="http://www.w3.org/StyleSheets/TR/W3C-WG-NOTE.css"/></head>
<body>
<div class="head"><p><a href="http://www.w3.org/"><img src="http://www.w3.org/Icons/w3c_home" alt="W3C" height="48" width="72"/></a></p>
<h1><a name="title" id="title"/>Unicode block names for use in XSD regular expressions</h1>
<h2><a name="w3c-doctype" id="w3c-doctype"/>W3C Working Group Note 9 June 2011</h2><dl><dt>This version:</dt><dd><a href="http://www.w3.org/TR/2011/NOTE-xsd-unicode-blocknames-20110609/">http://www.w3.org/TR/2011/NOTE-xsd-unicode-blocknames-20110609/</a></dd><dt>Latest version:</dt><dd><a href="http://www.w3.org/TR/xsd-unicode-blocknames/">http://www.w3.org/TR/xsd-unicode-blocknames/</a></dd><dt>Editor:</dt><dd>C. M. Sperberg-McQueen, Black Mesa Technologies LLC <a href="mailto:cmsmcq@blackmesatech.com"><cmsmcq@blackmesatech.com></a></dd></dl><p class="copyright"><a href="http://www.w3.org/Consortium/Legal/ipr-notice#Copyright">Copyright</a> © 2011 <a href="http://www.w3.org/"><acronym title="World Wide Web Consortium">W3C</acronym></a><sup>®</sup> (<a href="http://www.csail.mit.edu/"><acronym title="Massachusetts Institute of Technology">MIT</acronym></a>, <a href="http://www.ercim.eu/"><acronym title="European Research Consortium for Informatics and Mathematics">ERCIM</acronym></a>, <a href="http://www.keio.ac.jp/">Keio</a>), All Rights Reserved. W3C <a href="http://www.w3.org/Consortium/Legal/ipr-notice#Legal_Disclaimer">liability</a>, <a href="http://www.w3.org/Consortium/Legal/ipr-notice#W3C_Trademarks">trademark</a> and <a href="http://www.w3.org/Consortium/Legal/copyright-documents">document use</a> rules apply.</p></div><hr/><div>
<h2><a name="abstract" id="abstract"/>Abstract</h2><p>
This document lists the names of character categories
and character blocks defined by Unicode and used
in the regular expression language defined by XSD 1.0
and XSD 1.1.
</p></div><div class="sotd">
<h2><a name="status" id="status"/>Status of this Document</h2><p><em>This section describes the status of this document at
the time of its publication. Other documents may supersede
this document. A list of current W3C publications and the
latest revision of this technical report can be found in the
<a href="http://www.w3.org/TR/">W3C technical reports
index</a> at http://www.w3.org/TR/.</em></p><p>This document is a W3C
<a href="http://www.w3.org/2005/10/Process-20051014/tr.html#maturity-levels">Working Group Note</a> as described in the
<a href="http://www.w3.org/2005/10/Process-20051014/cover.html">World Wide Web Consortium Process Document</a>.
It contains a definition of a precisionDecimal datatype
designed for compatibility with IEEE 754
floating-point decimal numbers.
</p><p>In its current state, this document lists the block names
that have appeared in various versions of the Unicode
database. Some of this material has appeared in working
drafts of <a href="#datatypes-1.1">[XSD 1.1 Part 2: Datatypes]</a>; some has not.
This document is substantially complete in its current form;
future updates, if any, may include changes made in later
versions of the Unicode database.</p><p>
Comments on this document should be sent to the W3C XML Schema
comments mailing list, <a href="mailto:www-xml-schema-comments@w3.org">www-xml-schema-comments@w3.org</a> (<a href="http://lists.w3.org/Archives/Public/www-xml-schema-comments/">archive</a>). Each email message should contain only one
comment.
</p><p>Publication as a Working Group Note does not imply
endorsement by the W3C Membership. This is a draft document and may be
updated, replaced or obsoleted by other documents at any time. It is
inappropriate to cite this document as other than work in
progress.</p><p>
This document has been produced by the
<a href="http://www.w3.org/XML/Schema">W3C XML Schema Working Group</a>
as part of the W3C <a href="http://www.w3.org/XML/Activity">XML
Activity</a>. The authors of this document are
the members of the XML Schema Working Group.
</p><p>This document was produced by a group operating under the
<a href="http://www.w3.org/Consortium/Patent-Policy-20040205/">5
February 2004 W3C Patent Policy</a>. W3C maintains a <a href="http://www.w3.org/2004/01/pp-impl/19482/status">public list of any patent disclosures</a> made in
connection with the deliverables of the group; that page also
includes instructions for disclosing a patent. An individual
who has actual knowledge of a patent which the individual
believes contains <a href="http://www.w3.org/Consortium/Patent-Policy-20040205/#def-essential">Essential Claim(s)</a> must disclose the information in
accordance with <a href="http://www.w3.org/Consortium/Patent-Policy-20040205/#sec-Disclosure">section 6 of the W3C Patent Policy</a>. </p></div><div class="toc">
<h2><a name="contents" id="contents"/>Table of Contents</h2><div class="toc">1 <a href="#intro">Introduction</a><br/>
2 <a href="#blocks">Named blocks</a><br/>
</div>
<h3><a name="appendices" id="appendices"/>Appendices</h3><p class="toc">A <a href="#refs">References</a><br/>
B <a href="#acknowledgments">Acknowledgements (non-normative)</a><br/>
</p></div><hr/><div class="body"><div class="div1">
<h2><a name="intro" id="intro"/>1 Introduction</h2><p>
XSD (the XML Schema Definition Language) defines a notation
for regular expressions to be used in the <code>pattern</code>
facet of simple type definitions; this regular-expression
language has also been used, with some modifications, in other
specifications.
</p><p>
The character-class escapes of XSD regular expressions allow
regular expressions to refer conveniently to all UCS
characters which share values for certain properties;
in particular, XSD
provides <a href="http://www.w3.org/TR/xmlschema11-2#dt-ccescat">category
escapes</a>, which allow reference to the
"general category" property of an entry in the
Unicode database, and <a href="http://www.w3.org/TR/xmlschema11-2#dt-ccesblock">block escapes</a>,
which identify characters on the basis of named UCS blocks.
</p><p>Experience has shown that it is cumbersome to revise the
relevant passages in the <a href="#datatypes-1.1">[XSD 1.1 Part 2: Datatypes]</a>
each time the Unicode database is revised, so the
version-specific information about Unicode block names
present in earlier
descriptions of XSD regular expressions has been factored out
of <a href="#datatypes-1.1">[XSD 1.1 Part 2: Datatypes]</a> and moved into this
document for more convenient updates.</p><p>
This document does not, however, contain any normative
information.
The normative specification of the XSD regular expression
language is in <a href="#datatypes-1.1">[XSD 1.1 Part 2: Datatypes]</a>.
The normative specification of Unicode block
names for any version of Unicode is in the Unicode database
for that version.
</p><div class="note"><div class="p"><b>Note:</b> The definitions of <a href="http://www.w3.org/TR/xmlschema11-2#dt-cces1">single-character
escapes</a> and <a href="http://www.w3.org/TR/xmlschema11-2#dt-ccesN">multi-character
escapes</a> do not vary from version to version of
Unicode and are not recapitulated here; the definitions are
given in <a href="#datatypes-1.1">[XSD 1.1 Part 2: Datatypes]</a>. The possible
values for the <em>General Category</em> property of
each character (used in <a href="http://www.w3.org/TR/xmlschema11-2#dt-ccescat">category
escapes</a>) are also stable across versions (there
has been only one change over the history of the Unicode
database) and are also not given here.
</div></div></div><div class="div1">
<h2><a name="blocks" id="blocks"/>2 Named blocks</h2><p>The Unicode database <a href="#UnicodeDB">[Unicode Database]</a> groups the
code points of the Universal Character Set (UCS) into a number
of blocks such as Basic Latin (i.e., ASCII), Latin-1 Supplement,
Hangul Jamo, CJK Compatibility, etc. The block-escape construct
allows regular expressions to refer to sets of characters by the
name of the block in which they appear, using a <a href="#dt-normalized-block-name" class="termref"><span class="arrow">·</span>normalized block name<span class="arrow">·</span></a>.
</p><p>
<span class="termdef"><a name="dt-normalized-block-name" id="dt-normalized-block-name" title="">[Definition:] </a>
For any Unicode block, the <b>normalized block name</b> of that
block is the string of characters formed by stripping out white space
and underbar characters from the block name as given in <a href="#UnicodeDB">[Unicode Database]</a>, while retaining hyphens and preserving case
distinctions.</span>
</p><p>
<span class="termdef"><a name="dt-ccesblock" id="dt-ccesblock" title="">[Definition:] </a>
A <b>block escape</b> expression denotes the set of characters
in a given Unicode block. For any Unicode block B;, with <a href="#dt-normalized-block-name" class="termref"><span class="arrow">·</span>normalized block name<span class="arrow">·</span></a> <var>X</var>, the set containing all
characters defined in block <var>B</var> can be identified with the <b>block
escape</b> <code>\p{Is</code><var>X</var><code>}</code> (using lower-case
'<code>p</code>'). The complement of this set is denoted by the
<b>block escape</b>
<code>\P{Is</code><var>X</var><code>}</code>
(using upper-case
'<code>P</code>'). For all <var>X</var>, if <var>X</var> is a normalized block name
recognized by the processor, then
<code>[\P{Is</code><var>X</var><code>}]</code> =
<code>[^\p{Is</code><var>X</var><code>}]</code>.
</span>
</p><p>
For example, the <a href="http://www.w3.org/TR/xmlschema11-2#dt-ccesblock">block escape</a> for identifying the ASCII
characters is
"<code>\p{IsBasicLatin}</code>".</p><div class="note"><div class="p"><b>Note:</b> Current versions of the Unicode database recommend that
whenever block names are being matched hyphens, underbars,
and white space should be dropped and letters folded to a
single case, so both the string '<code>BasicLatin</code>'
and the string '<code>-- basic LATIN --</code>' will match
the block name "Basic Latin".
</div><div class="p">The handling of block names in XSD block escapes differs
from this behavior in two ways. First, the normalized block
names defined in XSD do not suppress hyphens in the Unicode
block names and do not level case distinctions. The
normalized form of the block name '<code>Latin-1
Supplement</code>', for example, is thus
'<code>Latin-1Supplement</code>', not
'<code>latin1supplement</code>' or
'<code>LATIN1SUPPLEMENT</code>'. Second, XSD processors
are not required to perform any normalization at all upon
the block name as given in the <a href="http://www.w3.org/TR/xmlschema11-2#dt-ccesblock">block escape</a>, so
'<code>\p{IsLatin-1Supplement}</code>' will be recognized as
a reference to the Latin-1 Supplement block, but
'<code>\p{Is Latin-1 supplement}</code>' will not.
</div></div><div class="ednote"><div class="note"><p><b>Editorial Note: </b><span class="edtext">What happens when <var>X</var> is not a recognized
block name? It's not an error, strictly speaking,
since the grammar accepts any sequence of Basic
Latin alphanumerics and hyphens.
</span></p></div></div><p>
The following table lists the block names in the versions of
<a href="#UnicodeDB">[Unicode Database]</a> cited in the references
(<a href="#refs">References (§A)</a>); the normative authority for any
given version of <a href="#UnicodeDB">[Unicode Database]</a> is the Unicode
database itself; in current versions, see the "Blocks.txt"
file.
The "Versions" column indicates which versions
of <a href="#UnicodeDB">[Unicode Database]</a> have a block with the name
and endpoints indicated; if the column is blank, all versions
have such a block.
</p><p>
When these block names are used in <a href="http://www.w3.org/TR/xmlschema11-2#dt-ccesblock">block escapes</a>, blanks and
underbars should be removed and the letters
"<code>Is</code>" should be prepended: the block
name "Basic Latin" appears in a <a href="http://www.w3.org/TR/xmlschema11-2#dt-ccesblock">block escape</a> as
"<code>\p{IsBasicLatin}</code>".
</p><table xmlns:u="http://www.unicode.org/ns/2003/ucd/1.0" class="blocknames"><col width="70%"/><col width="30%"/><tbody><tr><th>Start–End, Block name</th><th>Versions</th></tr>
<tr><td>#x0000–#x007F Basic Latin</td><td> <!--* all versions *--></td></tr>
<tr><td>#x0080–#x00FF Latin-1 Supplement</td><td> <!--* all versions *--></td></tr>
<tr><td>#x0100–#x017F Latin Extended-A</td><td> <!--* all versions *--></td></tr>
<tr><td>#x0180–#x024F Latin Extended-B</td><td> <!--* all versions *--></td></tr>
<tr><td>#x0250–#x02AF IPA Extensions</td><td> <!--* all versions *--></td></tr>
<tr><td>#x02B0–#x02FF Spacing Modifier Letters</td><td> <!--* all versions *--></td></tr>
<tr><td>#x0300–#x036F Combining Diacritical Marks</td><td> <!--* all versions *--></td></tr>
<tr><td>#x0370–#x03FF Greek</td><td>before 3.2.0 (i.e. 2.0.0 through 3.1.1)</td></tr>
<tr><td>#x0370–#x03FF Greek and Coptic</td><td>3.2.0 and later</td></tr>
<tr><td>#x0400–#x04FF Cyrillic</td><td> <!--* all versions *--></td></tr>
<tr><td>#x0500–#x052F Cyrillic Supplementary</td><td>3.2.0, 4.0.0</td></tr>
<tr><td>#x0500–#x052F Cyrillic Supplement</td><td>4.0.1 and later</td></tr>
<tr><td>#x0530–#x058F Armenian</td><td> <!--* all versions *--></td></tr>
<tr><td>#x0590–#x05FF Hebrew</td><td> <!--* all versions *--></td></tr>
<tr><td>#x0600–#x06FF Arabic</td><td> <!--* all versions *--></td></tr>
<tr><td>#x0700–#x074F Syriac</td><td>3.2.0 and later</td></tr>
<tr><td>#x0750–#x077F Arabic Supplement</td><td>4.1.0 and later</td></tr>
<tr><td>#x0780–#x07BF Thaana</td><td>3.0.0 and later</td></tr>
<tr><td>#x07C0–#x07FF NKo</td><td>5.0.0 and later</td></tr>
<tr><td>#x0800–#x083F Samaritan</td><td>5.2.0 and later</td></tr>
<tr><td>#x0840–#x085F Mandaic</td><td>6.0.0</td></tr>
<tr><td>#x0900–#x097F Devanagari</td><td> <!--* all versions *--></td></tr>
<tr><td>#x0980–#x09FF Bengali</td><td> <!--* all versions *--></td></tr>
<tr><td>#x0A00–#x0A7F Gurmukhi</td><td> <!--* all versions *--></td></tr>
<tr><td>#x0A80–#x0AFF Gujarati</td><td> <!--* all versions *--></td></tr>
<tr><td>#x0B00–#x0B7F Oriya</td><td> <!--* all versions *--></td></tr>
<tr><td>#x0B80–#x0BFF Tamil</td><td> <!--* all versions *--></td></tr>
<tr><td>#x0C00–#x0C7F Telugu</td><td> <!--* all versions *--></td></tr>
<tr><td>#x0C80–#x0CFF Kannada</td><td> <!--* all versions *--></td></tr>
<tr><td>#x0D00–#x0D7F Malayalam</td><td> <!--* all versions *--></td></tr>
<tr><td>#x0D80–#x0DFF Sinhala</td><td>3.0.0 and later</td></tr>
<tr><td>#x0E00–#x0E7F Thai</td><td> <!--* all versions *--></td></tr>
<tr><td>#x0E80–#x0EFF Lao</td><td> <!--* all versions *--></td></tr>
<tr><td>#x0F00–#x0FBF Tibetan</td><td>2.0.0 through 2.1.9</td></tr>
<tr><td>#x0F00–#x0FFF Tibetan</td><td>3.0.0 and later</td></tr>
<tr><td>#x1000–#x109F Myanmar</td><td>3.2.0 and later</td></tr>
<tr><td>#x10A0–#x10FF Georgian</td><td> <!--* all versions *--></td></tr>
<tr><td>#x1100–#x11FF Hangul Jamo</td><td> <!--* all versions *--></td></tr>
<tr><td>#x1200–#x137F Ethiopic</td><td>3.0.0 and later</td></tr>
<tr><td>#x1380–#x139F Ethiopic Supplement</td><td>4.1.0 and later</td></tr>
<tr><td>#x13A0–#x13FF Cherokee</td><td>3.0.0 and later</td></tr>
<tr><td>#x1400–#x167F Unified Canadian Aboriginal Syllabics</td><td>3.0.0 and later</td></tr>
<tr><td>#x1680–#x169F Ogham</td><td>3.0.0 and later</td></tr>
<tr><td>#x16A0–#x16FF Runic</td><td>3.0.0 and later</td></tr>
<tr><td>#x1700–#x171F Tagalog</td><td>3.2.0 and later</td></tr>
<tr><td>#x1720–#x173F Hanunoo</td><td>3.2.0 and later</td></tr>
<tr><td>#x1740–#x175F Buhid</td><td>3.2.0 and later</td></tr>
<tr><td>#x1760–#x177F Tagbanwa</td><td>3.2.0 and later</td></tr>
<tr><td>#x1780–#x17FF Khmer</td><td>3.0.0 and later</td></tr>
<tr><td>#x1800–#x18AF Mongolian</td><td>3.0.0 and later</td></tr>
<tr><td>#x18B0–#x18FF Unified Canadian Aboriginal Syllabics Extended</td><td>5.2.0 and later</td></tr>
<tr><td>#x1900–#x194F Limbu</td><td>4.0.0 and later</td></tr>
<tr><td>#x1950–#x197F Tai Le</td><td>4.0.0 and later</td></tr>
<tr><td>#x1980–#x19DF New Tai Lue</td><td>4.1.0 and later</td></tr>
<tr><td>#x19E0–#x19FF Khmer Symbols</td><td>4.0.0 and later</td></tr>
<tr><td>#x1A00–#x1A1F Buginese</td><td>4.1.0 and later</td></tr>
<tr><td>#x1A20–#x1AAF Tai Tham</td><td>5.2.0 and later</td></tr>
<tr><td>#x1B00–#x1B7F Balinese</td><td>5.0.0 and later</td></tr>
<tr><td>#x1B80–#x1BBF Sundanese</td><td>5.1.0 and later</td></tr>
<tr><td>#x1BC0–#x1BFF Batak</td><td>6.0.0</td></tr>
<tr><td>#x1C00–#x1C4F Lepcha</td><td>5.1.0 and later</td></tr>
<tr><td>#x1C50–#x1C7F Ol Chiki</td><td>5.1.0 and later</td></tr>
<tr><td>#x1CD0–#x1CFF Vedic Extensions</td><td>5.2.0 and later</td></tr>
<tr><td>#x1D00–#x1D7F Phonetic Extensions</td><td>4.0.0 and later</td></tr>
<tr><td>#x1D80–#x1DBF Phonetic Extensions Supplement</td><td>4.1.0 and later</td></tr>
<tr><td>#x1DC0–#x1DFF Combining Diacritical Marks Supplement</td><td>4.1.0 and later</td></tr>
<tr><td>#x1E00–#x1EFF Latin Extended Additional</td><td> <!--* all versions *--></td></tr>
<tr><td>#x1F00–#x1FFF Greek Extended</td><td> <!--* all versions *--></td></tr>
<tr><td>#x2000–#x206F General Punctuation</td><td> <!--* all versions *--></td></tr>
<tr><td>#x2070–#x209F Superscripts and Subscripts</td><td> <!--* all versions *--></td></tr>
<tr><td>#x20A0–#x20CF Currency Symbols</td><td> <!--* all versions *--></td></tr>
<tr><td>#x20D0–#x20FF Combining Marks for Symbols</td><td>before 3.2.0 (i.e. 2.0.0 through 3.1.1)</td></tr>
<tr><td>#x20D0–#x20FF Combining Diacritical Marks for Symbols</td><td>3.2.0 and later</td></tr>
<tr><td>#x2100–#x214F Letterlike Symbols</td><td> <!--* all versions *--></td></tr>
<tr><td>#x2150–#x218F Number Forms</td><td> <!--* all versions *--></td></tr>
<tr><td>#x2190–#x21FF Arrows</td><td> <!--* all versions *--></td></tr>
<tr><td>#x2200–#x22FF Mathematical Operators</td><td> <!--* all versions *--></td></tr>
<tr><td>#x2300–#x23FF Miscellaneous Technical</td><td> <!--* all versions *--></td></tr>
<tr><td>#x2400–#x243F Control Pictures</td><td> <!--* all versions *--></td></tr>
<tr><td>#x2440–#x245F Optical Character Recognition</td><td> <!--* all versions *--></td></tr>
<tr><td>#x2460–#x24FF Enclosed Alphanumerics</td><td> <!--* all versions *--></td></tr>
<tr><td>#x2500–#x257F Box Drawing</td><td> <!--* all versions *--></td></tr>
<tr><td>#x2580–#x259F Block Elements</td><td> <!--* all versions *--></td></tr>
<tr><td>#x25A0–#x25FF Geometric Shapes</td><td> <!--* all versions *--></td></tr>
<tr><td>#x2600–#x26FF Miscellaneous Symbols</td><td> <!--* all versions *--></td></tr>
<tr><td>#x2700–#x27BF Dingbats</td><td> <!--* all versions *--></td></tr>
<tr><td>#x27C0–#x27EF Miscellaneous Mathematical Symbols-A</td><td>3.2.0 and later</td></tr>
<tr><td>#x27F0–#x27FF Supplemental Arrows-A</td><td>3.2.0 and later</td></tr>
<tr><td>#x2800–#x28FF Braille Patterns</td><td>3.0.0 and later</td></tr>
<tr><td>#x2900–#x297F Supplemental Arrows-B</td><td>3.2.0 and later</td></tr>
<tr><td>#x2980–#x29FF Miscellaneous Mathematical Symbols-B</td><td>3.2.0 and later</td></tr>
<tr><td>#x2A00–#x2AFF Supplemental Mathematical Operators</td><td>3.2.0 and later</td></tr>
<tr><td>#x2B00–#x2BFF Miscellaneous Symbols and Arrows</td><td>4.0.0 and later</td></tr>
<tr><td>#x2C00–#x2C5F Glagolitic</td><td>4.1.0 and later</td></tr>
<tr><td>#x2C60–#x2C7F Latin Extended-C</td><td>5.0.0 and later</td></tr>
<tr><td>#x2C80–#x2CFF Coptic</td><td>4.1.0 and later</td></tr>
<tr><td>#x2D00–#x2D2F Georgian Supplement</td><td>4.1.0 and later</td></tr>
<tr><td>#x2D30–#x2D7F Tifinagh</td><td>4.1.0 and later</td></tr>
<tr><td>#x2D80–#x2DDF Ethiopic Extended</td><td>4.1.0 and later</td></tr>
<tr><td>#x2DE0–#x2DFF Cyrillic Extended-A</td><td>5.1.0 and later</td></tr>
<tr><td>#x2E00–#x2E7F Supplemental Punctuation</td><td>4.1.0 and later</td></tr>
<tr><td>#x2E80–#x2EFF CJK Radicals Supplement</td><td>3.0.0 and later</td></tr>
<tr><td>#x2F00–#x2FDF Kangxi Radicals</td><td>3.0.0 and later</td></tr>
<tr><td>#x2FF0–#x2FFF Ideographic Description Characters</td><td>3.0.0 and later</td></tr>
<tr><td>#x3000–#x303F CJK Symbols and Punctuation</td><td> <!--* all versions *--></td></tr>
<tr><td>#x3040–#x309F Hiragana</td><td> <!--* all versions *--></td></tr>
<tr><td>#x30A0–#x30FF Katakana</td><td> <!--* all versions *--></td></tr>
<tr><td>#x3100–#x312F Bopomofo</td><td> <!--* all versions *--></td></tr>
<tr><td>#x3130–#x318F Hangul Compatibility Jamo</td><td> <!--* all versions *--></td></tr>
<tr><td>#x3190–#x319F Kanbun</td><td> <!--* all versions *--></td></tr>
<tr><td>#x31A0–#x31BF Bopomofo Extended</td><td>3.0.0 and later</td></tr>
<tr><td>#x31C0–#x31EF CJK Strokes</td><td>4.1.0 and later</td></tr>
<tr><td>#x31F0–#x31FF Katakana Phonetic Extensions</td><td>3.2.0 and later</td></tr>
<tr><td>#x3200–#x32FF Enclosed CJK Letters and Months</td><td> <!--* all versions *--></td></tr>
<tr><td>#x3300–#x33FF CJK Compatibility</td><td> <!--* all versions *--></td></tr>
<tr><td>#x3400–#x4DB5 CJK Unified Ideographs Extension A</td><td>3.0.0 through 3.1.1</td></tr>
<tr><td>#x3400–#x4DBF CJK Unified Ideographs Extension A</td><td>3.2.0 and later</td></tr>
<tr><td>#x4DC0–#x4DFF Yijing Hexagram Symbols</td><td>4.0.0 and later</td></tr>
<tr><td>#x4E00–#x9FFF CJK Unified Ideographs</td><td> <!--* all versions *--></td></tr>
<tr><td>#xA000–#xA48F Yi Syllables</td><td>3.0.0 and later</td></tr>
<tr><td>#xA490–#xA4CF Yi Radicals</td><td>3.0.0 and later</td></tr>
<tr><td>#xA4D0–#xA4FF Lisu</td><td>5.2.0 and later</td></tr>
<tr><td>#xA500–#xA63F Vai</td><td>5.1.0 and later</td></tr>
<tr><td>#xA640–#xA69F Cyrillic Extended-B</td><td>5.1.0 and later</td></tr>
<tr><td>#xA6A0–#xA6FF Bamum</td><td>5.2.0 and later</td></tr>
<tr><td>#xA700–#xA71F Modifier Tone Letters</td><td>4.1.0 and later</td></tr>
<tr><td>#xA720–#xA7FF Latin Extended-D</td><td>5.0.0 and later</td></tr>
<tr><td>#xA800–#xA82F Syloti Nagri</td><td>4.1.0 and later</td></tr>
<tr><td>#xA830–#xA83F Common Indic Number Forms</td><td>5.2.0 and later</td></tr>
<tr><td>#xA840–#xA87F Phags-pa</td><td>5.0.0 and later</td></tr>
<tr><td>#xA880–#xA8DF Saurashtra</td><td>5.1.0 and later</td></tr>
<tr><td>#xA8E0–#xA8FF Devanagari Extended</td><td>5.2.0 and later</td></tr>
<tr><td>#xA900–#xA92F Kayah Li</td><td>5.1.0 and later</td></tr>
<tr><td>#xA930–#xA95F Rejang</td><td>5.1.0 and later</td></tr>
<tr><td>#xA960–#xA97F Hangul Jamo Extended-A</td><td>5.2.0 and later</td></tr>
<tr><td>#xA980–#xA9DF Javanese</td><td>5.2.0 and later</td></tr>
<tr><td>#xAA00–#xAA5F Cham</td><td>5.1.0 and later</td></tr>
<tr><td>#xAA60–#xAA7F Myanmar Extended-A</td><td>5.2.0 and later</td></tr>
<tr><td>#xAA80–#xAADF Tai Viet</td><td>5.2.0 and later</td></tr>
<tr><td>#xAB00–#xAB2F Ethiopic Extended-A</td><td>6.0.0</td></tr>
<tr><td>#xABC0–#xABFF Meetei Mayek</td><td>5.2.0 and later</td></tr>
<tr><td>#xAC00–#xD7A3 Hangul Syllables</td><td>before 3.2.0 (i.e. 2.0.0 through 3.1.1)</td></tr>
<tr><td>#xAC00–#xD7AF Hangul Syllables</td><td>3.2.0 and later</td></tr>
<tr><td>#xD7B0–#xD7FF Hangul Jamo Extended-B</td><td>5.2.0 and later</td></tr>
<tr><td>#xD800–#xDB7F High Surrogates</td><td> <!--* all versions *--></td></tr>
<tr><td>#xDB80–#xDBFF High Private Use Surrogates</td><td> <!--* all versions *--></td></tr>
<tr><td>#xDC00–#xDFFF Low Surrogates</td><td> <!--* all versions *--></td></tr>
<tr><td>#xE000–#xF8FF Private Use</td><td>before 3.2.0 (i.e. 2.0.0 through 3.1.1)</td></tr>
<tr><td>#xE000–#xF8FF Private Use Area</td><td>3.2.0 and later</td></tr>
<tr><td>#xF900–#xFAFF CJK Compatibility Ideographs</td><td> <!--* all versions *--></td></tr>
<tr><td>#xFB00–#xFB4F Alphabetic Presentation Forms</td><td> <!--* all versions *--></td></tr>
<tr><td>#xFB50–#xFDFF Arabic Presentation Forms-A</td><td> <!--* all versions *--></td></tr>
<tr><td>#xFE00–#xFE0F Variation Selectors</td><td>3.2.0 and later</td></tr>
<tr><td>#xFE10–#xFE1F Vertical Forms</td><td>4.1.0 and later</td></tr>
<tr><td>#xFE20–#xFE2F Combining Half Marks</td><td> <!--* all versions *--></td></tr>
<tr><td>#xFE30–#xFE4F CJK Compatibility Forms</td><td> <!--* all versions *--></td></tr>
<tr><td>#xFE50–#xFE6F Small Form Variants</td><td> <!--* all versions *--></td></tr>
<tr><td>#xFE70–#xFEFE Arabic Presentation Forms-B</td><td>2.1.9 through 3.1.1</td></tr>
<tr><td>#xFE70–#xFEFF Arabic Presentation Forms-B</td><td>2.0.0 through 2.1.8, also 3.2.0 and later (i.e. not 2.1.9 through 3.1.1)</td></tr>
<tr><td>#xFEFF–#xFEFF Specials</td><td>before 3.2.0 (i.e. 2.0.0 through 3.1.1)</td></tr>
<tr><td>#xFF00–#xFFEF Halfwidth and Fullwidth Forms</td><td> <!--* all versions *--></td></tr>
<tr><td>#xFFF0–#xFFFD Specials</td><td>2.1.9 through 3.1.1</td></tr>
<tr><td>#xFFF0–#xFFFF Specials</td><td>2.0.0 through 2.1.8, also 3.2.0 and later (i.e. not 2.1.9 through 3.1.1)</td></tr>
<tr><td>#x10000–#x1007F Linear B Syllabary</td><td>4.0.0 and later</td></tr>
<tr><td>#x10080–#x100FF Linear B Ideograms</td><td>4.0.0 and later</td></tr>
<tr><td>#x10100–#x1013F Aegean Numbers</td><td>4.0.0 and later</td></tr>
<tr><td>#x10140–#x1018F Ancient Greek Numbers</td><td>4.1.0 and later</td></tr>
<tr><td>#x10190–#x101CF Ancient Symbols</td><td>5.1.0 and later</td></tr>
<tr><td>#x101D0–#x101FF Phaistos Disc</td><td>5.1.0 and later</td></tr>
<tr><td>#x10280–#x1029F Lycian</td><td>5.1.0 and later</td></tr>
<tr><td>#x102A0–#x102DF Carian</td><td>5.1.0 and later</td></tr>
<tr><td>#x10300–#x1032F Old Italic</td><td>3.1.0 and later</td></tr>
<tr><td>#x10330–#x1034F Gothic</td><td>3.1.0 and later</td></tr>
<tr><td>#x10380–#x1039F Ugaritic</td><td>4.0.0 and later</td></tr>
<tr><td>#x103A0–#x103DF Old Persian</td><td>4.1.0 and later</td></tr>
<tr><td>#x10400–#x1044F Deseret</td><td>3.1.0 and later</td></tr>
<tr><td>#x10450–#x1047F Shavian</td><td>4.0.0 and later</td></tr>
<tr><td>#x10480–#x104AF Osmanya</td><td>4.0.0 and later</td></tr>
<tr><td>#x10800–#x1083F Cypriot Syllabary</td><td>4.0.0 and later</td></tr>
<tr><td>#x10840–#x1085F Imperial Aramaic</td><td>5.2.0 and later</td></tr>
<tr><td>#x10900–#x1091F Phoenician</td><td>5.0.0 and later</td></tr>
<tr><td>#x10920–#x1093F Lydian</td><td>5.1.0 and later</td></tr>
<tr><td>#x10A00–#x10A5F Kharoshthi</td><td>4.1.0 and later</td></tr>
<tr><td>#x10A60–#x10A7F Old South Arabian</td><td>5.2.0 and later</td></tr>
<tr><td>#x10B00–#x10B3F Avestan</td><td>5.2.0 and later</td></tr>
<tr><td>#x10B40–#x10B5F Inscriptional Parthian</td><td>5.2.0 and later</td></tr>
<tr><td>#x10B60–#x10B7F Inscriptional Pahlavi</td><td>5.2.0 and later</td></tr>
<tr><td>#x10C00–#x10C4F Old Turkic</td><td>5.2.0 and later</td></tr>
<tr><td>#x10E60–#x10E7F Rumi Numeral Symbols</td><td>5.2.0 and later</td></tr>
<tr><td>#x11000–#x1107F Brahmi</td><td>6.0.0</td></tr>
<tr><td>#x11080–#x110CF Kaithi</td><td>5.2.0 and later</td></tr>
<tr><td>#x12000–#x123FF Cuneiform</td><td>5.0.0 and later</td></tr>
<tr><td>#x12400–#x1247F Cuneiform Numbers and Punctuation</td><td>5.0.0 and later</td></tr>
<tr><td>#x13000–#x1342F Egyptian Hieroglyphs</td><td>5.2.0 and later</td></tr>
<tr><td>#x16800–#x16A3F Bamum Supplement</td><td>6.0.0</td></tr>
<tr><td>#x1B000–#x1B0FF Kana Supplement</td><td>6.0.0</td></tr>
<tr><td>#x1D000–#x1D0FF Byzantine Musical Symbols</td><td>3.1.0 and later</td></tr>
<tr><td>#x1D100–#x1D1FF Musical Symbols</td><td>3.1.0 and later</td></tr>
<tr><td>#x1D200–#x1D24F Ancient Greek Musical Notation</td><td>4.1.0 and later</td></tr>
<tr><td>#x1D300–#x1D35F Tai Xuan Jing Symbols</td><td>4.0.0 and later</td></tr>
<tr><td>#x1D360–#x1D37F Counting Rod Numerals</td><td>5.0.0 and later</td></tr>
<tr><td>#x1D400–#x1D7FF Mathematical Alphanumeric Symbols</td><td>3.1.0 and later</td></tr>
<tr><td>#x1F000–#x1F02F Mahjong Tiles</td><td>5.1.0 and later</td></tr>
<tr><td>#x1F030–#x1F09F Domino Tiles</td><td>5.1.0 and later</td></tr>
<tr><td>#x1F0A0–#x1F0FF Playing Cards</td><td>6.0.0</td></tr>
<tr><td>#x1F100–#x1F1FF Enclosed Alphanumeric Supplement</td><td>5.2.0 and later</td></tr>
<tr><td>#x1F200–#x1F2FF Enclosed Ideographic Supplement</td><td>5.2.0 and later</td></tr>
<tr><td>#x1F300–#x1F5FF Miscellaneous Symbols And Pictographs</td><td>6.0.0</td></tr>
<tr><td>#x1F600–#x1F64F Emoticons</td><td>6.0.0</td></tr>
<tr><td>#x1F680–#x1F6FF Transport And Map Symbols</td><td>6.0.0</td></tr>
<tr><td>#x1F700–#x1F77F Alchemical Symbols</td><td>6.0.0</td></tr>
<tr><td>#x20000–#x2A6D6 CJK Unified Ideographs Extension B</td><td>3.1.0, 3.1.1</td></tr>
<tr><td>#x20000–#x2A6DF CJK Unified Ideographs Extension B</td><td>3.2.0 and later</td></tr>
<tr><td>#x2A700–#x2B73F CJK Unified Ideographs Extension C</td><td>5.2.0 and later</td></tr>
<tr><td>#x2B740–#x2B81F CJK Unified Ideographs Extension D</td><td>6.0.0</td></tr>
<tr><td>#x2F800–#x2FA1F CJK Compatibility Ideographs Supplement</td><td>3.1.0 and later</td></tr>
<tr><td>#xE0000–#xE007F Tags</td><td>3.1.0 and later</td></tr>
<tr><td>#xE0100–#xE01EF Variation Selectors Supplement</td><td>4.0.0 and later</td></tr>
<tr><td>#xF0000–#xFFFFD Private Use</td><td>3.1.0, 3.1.1</td></tr>
<tr><td>#xF0000–#xFFFFF Supplementary Private Use Area-A</td><td>3.2.0 and later</td></tr>
<tr><td>#x100000–#x10FFFD Private Use</td><td>3.1.0, 3.1.1</td></tr>
<tr><td>#x100000–#x10FFFF Supplementary Private Use Area-B</td><td>3.2.0 and later</td></tr>
</tbody></table><div class="note"><div class="p"><b>Note:</b> The blocks mentioned above include the
<code>HighSurrogates</code>, <code>LowSurrogates</code>, and
<code>HighPrivateUseSurrogates</code> blocks. These blocks
identify <em>surrogate</em> characters, which do not occur
at the level of the character abstraction that XML instance
documents operate on. For that reason, block escapes using
these block names will never match any characters in an XML
document.</div></div><p>
As indicated in the "Versions" column,
<a href="#UnicodeDB">[Unicode Database]</a> has been revised over time.
Implementors of the XSD regular expression language
are encouraged to support the block names
defined in all versions of the Unicode
Standard. When the implementation supports multiple versions
of the Unicode database, and they differ in salient respects
(e.g. different characters are assigned to a given block in
different versions of the database), then it is implementation-defined
which set of block definitions is used for any given
assessment episode.
</p></div></div><div class="back"><div class="div1">
<h2><a name="refs" id="refs"/>A References</h2><dl><dt class="label"><a name="UnicodeDB" id="UnicodeDB"/>Unicode Database</dt><dd>
The Unicode Consortium. <em>Unicode Character Database</em>.
Current version available at:
<a href="http://www.unicode.org/Public/">http://www.unicode.org/Public/</a>
</dd><dt class="label"><a name="UnicodeDB-2.0.0" id="UnicodeDB-2.0.0"/>Unicode Database 2.0.0</dt><dd>
The Unicode Consortium. <em>The Unicode Character
Database</em>, version 2.0.0.
[n.p.]: The Unicode Consortium, 1996.
List of components at
<a href="http://www.unicode.org/versions/components-2.0.0.html">http://www.unicode.org/versions/components-2.0.0.html</a>.
Character data at
<a href="http://www.unicode.org/Public/2.0-Update/UnicodeData-2.0.14.txt">http://www.unicode.org/Public/2.0-Update/UnicodeData-2.0.14.txt</a>.
Blocks data at
<a href="http://www.unicode.org/Public/2.0-Update/Blocks-1.txt">http://www.unicode.org/Public/2.0-Update/Blocks-1.txt</a>.
</dd><dt class="label"><a name="UnicodeDB-2.1.2" id="UnicodeDB-2.1.2"/>Unicode Database 2.1.2</dt><dd>
The Unicode Consortium. <em>The Unicode Character
Database</em>, version 2.1.2.
[n.p.]: The Unicode Consortium, 1998.
List of components at
<a href="http://www.unicode.org/versions/components-2.1.2.html">http://www.unicode.org/versions/components-2.1.2.html</a>.
Character data at
<a href="http://www.unicode.org/Public/2.1-Update/UnicodeData-2.1.2.txt">http://www.unicode.org/Public/2.1-Update/UnicodeData-2.1.2.txt</a>.
Blocks data as for 2.0.0.
</dd><dt class="label"><a name="UnicodeDB-2.1.5" id="UnicodeDB-2.1.5"/>Unicode Database 2.1.5</dt><dd>
The Unicode Consortium. <em>The Unicode Character
Database</em>, version 2.1.5.
[n.p.]: The Unicode Consortium, 1998.
List of components at
<a href="http://www.unicode.org/versions/components-2.1.5.html">http://www.unicode.org/versions/components-2.1.5.html</a>.
Character data at
<a href="http://www.unicode.org/Public/2.1-Update2/UnicodeData-2.1.5.txt">http://www.unicode.org/Public/2.1-Update2/UnicodeData-2.1.5.txt</a>.
Blocks data as for 2.0.0.
</dd><dt class="label"><a name="UnicodeDB-2.1.8" id="UnicodeDB-2.1.8"/>Unicode Database 2.1.8</dt><dd>
The Unicode Consortium. <em>The Unicode Character
Database</em>, version 2.1.8.
[n.p.]: The Unicode Consortium, 1998.
List of components at
<a href="http://www.unicode.org/versions/components-2.1.8.html">http://www.unicode.org/versions/components-2.1.8.html</a>.
Character data at
<a href="http://www.unicode.org/Public/2.1-Update3/UnicodeData-2.1.8.txt">http://www.unicode.org/Public/2.1-Update3/UnicodeData-2.1.8.txt</a>.
Blocks data as for 2.0.0.
</dd><dt class="label"><a name="UnicodeDB-2.1.9" id="UnicodeDB-2.1.9"/>Unicode Database 2.1.9</dt><dd>
The Unicode Consortium. <em>The Unicode Character
Database</em>, version 2.1.9.
[n.p.]: The Unicode Consortium, 1999.
List of components at
<a href="http://www.unicode.org/versions/components-2.1.9.html">http://www.unicode.org/versions/components-2.1.9.html</a>.
Character data at
<a href="http://www.unicode.org/Public/2.1-Update4/UnicodeData-2.1.9.txt">http://www.unicode.org/Public/2.1-Update4/UnicodeData-2.1.9.txt</a>.
Blocks data at
<a href="http://www.unicode.org/Public/2.1-Update4/Blocks-2.txt">http://www.unicode.org/Public/2.1-Update4/Blocks-2.txt</a>.
</dd><dt class="label"><a name="UnicodeDB-3.0.0" id="UnicodeDB-3.0.0"/>Unicode Database 3.0.0</dt><dd>
The Unicode Consortium. <em>The Unicode Character
Database</em>, version 3.0.0.
[n.p.]: The Unicode Consortium, 1999.
List of components at
<a href="http://www.unicode.org/versions/components-3.0.0.html">http://www.unicode.org/versions/components-3.0.0.html</a>.
Character data at
<a href="http://www.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.txt">http://www.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.txt</a>.
Blocks data at
<a href="http://www.unicode.org/Public/3.0-Update/Blocks-3.txt">http://www.unicode.org/Public/3.0-Update/Blocks-3.txt</a>.
</dd><dt class="label"><a name="UnicodeDB-3.0.1" id="UnicodeDB-3.0.1"/>Unicode Database 3.0.1</dt><dd>
The Unicode Consortium. <em>The Unicode Character
Database</em>, version 3.0.1.
[n.p.]: The Unicode Consortium, 2000.
List of components at
<a href="http://www.unicode.org/versions/components-3.0.1.html">http://www.unicode.org/versions/components-3.0.1.html</a>.
Character data at
<a href="http://www.unicode.org/Public/3.0-Update1/UnicodeData-3.0.1.txt">http://www.unicode.org/Public/3.0-Update1/UnicodeData-3.0.1.txt</a>.
Blocks data as for 3.0.0.
</dd><dt class="label"><a name="UnicodeDB-3.1.0" id="UnicodeDB-3.1.0"/>Unicode Database 3.1.0</dt><dd>
The Unicode Consortium. <em>The Unicode Character
Database</em>, version 3.1.0.
[n.p.]: The Unicode Consortium, 2001.
List of components at
<a href="http://www.unicode.org/versions/components-3.1.0.html">http://www.unicode.org/versions/components-3.1.0.html</a>.
Character data at
<a href="http://www.unicode.org/Public/3.1-Update/UnicodeData-3.1.0.txt">http://www.unicode.org/Public/3.1-Update/UnicodeData-3.1.0.txt</a>.
Blocks data at
<a href="http://www.unicode.org/Public/3.1-Update/Blocks-4.txt">http://www.unicode.org/Public/3.1-Update/Blocks-4.txt</a>.
</dd><dt class="label"><a name="UnicodeDB-3.1.1" id="UnicodeDB-3.1.1"/>Unicode Database 3.1.1</dt><dd>
The Unicode Consortium. <em>The Unicode Character
Database</em>, version 3.1.1.
[n.p.]: The Unicode Consortium, 2001.
List of components at
<a href="http://www.unicode.org/versions/components-3.1.1.html">http://www.unicode.org/versions/components-3.1.1.html</a>.
Character data and blocks data as for 3.1.0.
</dd><dt class="label"><a name="UnicodeDB-3.2.0" id="UnicodeDB-3.2.0"/>Unicode Database 3.2.0</dt><dd>
The Unicode Consortium. <em>The Unicode Character
Database</em>, version 3.2.0.
[n.p.]: The Unicode Consortium, 2002.
List of components at
<a href="http://www.unicode.org/versions/components-3.2.0.html">http://www.unicode.org/versions/components-3.2.0.html</a>.
Character data at
<a href="http://www.unicode.org/Public/3.2-Update/UnicodeData-3.2.0.txt">http://www.unicode.org/Public/3.2-Update/UnicodeData-3.2.0.txt</a>.
Blocks data at
<a href="http://www.unicode.org/Public/3.2-Update/Blocks-3.2.0.txt">http://www.unicode.org/Public/3.2-Update/Blocks-3.2.0.txt</a>.
</dd><dt class="label"><a name="UnicodeDB-4.0.0" id="UnicodeDB-4.0.0"/>Unicode Database 4.0.0</dt><dd>
The Unicode Consortium. <em>The Unicode Character
Database</em>, version 4.0.0.
[n.p.]: The Unicode Consortium, 2003.
List of components at
<a href="http://www.unicode.org/versions/components-4.0.0.html">http://www.unicode.org/versions/components-4.0.0.html</a>.
Character data at
<a href="http://www.unicode.org/Public/4.0-Update/UnicodeData-4.0.0.txt">http://www.unicode.org/Public/4.0-Update/UnicodeData-4.0.0.txt</a>.
Blocks data at
<a href="http://www.unicode.org/Public/4.0-Update/Blocks-4.0.0.txt">http://www.unicode.org/Public/4.0-Update/Blocks-4.0.0.txt</a>.
</dd><dt class="label"><a name="UnicodeDB-4.0.1" id="UnicodeDB-4.0.1"/>Unicode Database 4.0.1</dt><dd>
The Unicode Consortium. <em>The Unicode Character
Database</em>, version 4.0.1.
[n.p.]: The Unicode Consortium, 2004.
List of components at
<a href="http://www.unicode.org/versions/components-4.0.1.html">http://www.unicode.org/versions/components-4.0.1.html</a>.
Character data at
<a href="http://www.unicode.org/Public/4.0-Update1/UnicodeData-4.0.1.txt">http://www.unicode.org/Public/4.0-Update1/UnicodeData-4.0.1.txt</a>.
Blocks data at
<a href="http://www.unicode.org/Public/4.0-Update1/Blocks-4.0.1.txt">http://www.unicode.org/Public/4.0-Update1/Blocks-4.0.1.txt</a>.
</dd><dt class="label"><a name="UnicodeDB-4.1.0" id="UnicodeDB-4.1.0"/>Unicode Database 4.1.0</dt><dd>
The Unicode Consortium. <em>The Unicode Character
Database</em>, version 4.1.0.
[n.p.]: The Unicode Consortium, 2005.
List of components at
<a href="http://www.unicode.org/versions/components-4.1.0.html">http://www.unicode.org/versions/components-4.1.0.html</a>.
Character data at
<a href="http://www.unicode.org/Public/4.1.0/ucd/UnicodeData.txt">http://www.unicode.org/Public/4.1.0/ucd/UnicodeData.txt</a>.
Blocks data at
<a href="http://www.unicode.org/Public/4.1.0/ucd/Blocks.txt">http://www.unicode.org/Public/4.1.0/ucd/Blocks.txt</a>.
</dd><dt class="label"><a name="UnicodeDB-5.0.0" id="UnicodeDB-5.0.0"/>Unicode Database 5.0.0</dt><dd>
The Unicode Consortium. <em>The Unicode Character
Database</em>, version 5.0.0.
[n.p.]: The Unicode Consortium, 2006.
List of components at
<a href="http://www.unicode.org/versions/components-5.0.0.html">http://www.unicode.org/versions/components-5.0.0.html</a>.
Character data at
<a href="http://www.unicode.org/Public/5.0.0/ucd/UnicodeData.txt">http://www.unicode.org/Public/5.0.0/ucd/UnicodeData.txt</a>.
Blocks data at
<a href="http://www.unicode.org/Public/5.0.0/ucd/Blocks.txt">http://www.unicode.org/Public/5.0.0/ucd/Blocks.txt</a>.
</dd><dt class="label"><a name="UnicodeDB-5.1.0" id="UnicodeDB-5.1.0"/>Unicode Database 5.1.0</dt><dd>
The Unicode Consortium. <em>The Unicode Character
Database</em>, version 5.1.0.
[n.p.]: The Unicode Consortium, 2008.
List of components at
<a href="http://www.unicode.org/versions/components-5.1.0.html">http://www.unicode.org/versions/components-5.1.0.html</a>.
Character data at
<a href="http://www.unicode.org/Public/5.1.0/ucd/UnicodeData.txt">http://www.unicode.org/Public/5.1.0/ucd/UnicodeData.txt</a>.
Blocks data at
<a href="http://www.unicode.org/Public/5.1.0/ucd/Blocks.txt">http://www.unicode.org/Public/5.1.0/ucd/Blocks.txt</a>.
XML versions of the database at
<a href="http://www.unicode.org/Public/5.1.0/ucdxml/">http://www.unicode.org/Public/5.1.0/ucdxml/</a>.
</dd><dt class="label"><a name="UnicodeDB-5.2.0" id="UnicodeDB-5.2.0"/>Unicode Database 5.2.0</dt><dd>
The Unicode Consortium. <em>The Unicode Character
Database</em>, version 5.2.0.
[n.p.]: The Unicode Consortium, 2009.
List of components at
<a href="http://www.unicode.org/versions/components-5.2.0.html">http://www.unicode.org/versions/components-5.2.0.html</a>.
Character data at
<a href="http://www.unicode.org/Public/5.2.0/ucd/UnicodeData.txt">http://www.unicode.org/Public/5.2.0/ucd/UnicodeData.txt</a>.
Blocks data at
<a href="http://www.unicode.org/Public/5.2.0/ucd/Blocks.txt">http://www.unicode.org/Public/5.2.0/ucd/Blocks.txt</a>.
XML versions of the database at
<a href="http://www.unicode.org/Public/5.2.0/ucdxml/">http://www.unicode.org/Public/5.2.0/ucdxml/</a>.
</dd><dt class="label"><a name="UnicodeDB-6.0.0" id="UnicodeDB-6.0.0"/>Unicode Database 6.0.0</dt><dd>
The Unicode Consortium. <em>The Unicode Character
Database</em>, version 6.0.0.
[n.p.]: The Unicode Consortium, 2010.
List of components at
<a href="http://www.unicode.org/versions/components-6.0.0.html">http://www.unicode.org/versions/components-6.0.0.html</a>.
Character data at
<a href="http://www.unicode.org/Public/6.0.0/ucd/UnicodeData.txt">http://www.unicode.org/Public/6.0.0/ucd/UnicodeData.txt</a>.
Blocks data at
<a href="http://www.unicode.org/Public/6.0.0/ucd/Blocks.txt">http://www.unicode.org/Public/6.0.0/ucd/Blocks.txt</a>.
XML versions of the database at
<a href="http://www.unicode.org/Public/6.0.0/ucdxml/">http://www.unicode.org/Public/6.0.0/ucdxml/</a>.
</dd><dt class="label"><a name="unicodeRegEx" id="unicodeRegEx"/>Unicode Regular Expression Guidelines</dt><dd>
Mark Davis. <em>Unicode Regular Expression Guidelines</em>, 1988.
Available at: <a href="http://www.unicode.org/unicode/reports/tr18/">
http://www.unicode.org/unicode/reports/tr18/</a>
</dd><dt class="label"><a name="UnicodeVersions" id="UnicodeVersions"/>Unicode Versions</dt><dd>
Unicode Consortium. <em>Enumerated Versions of The Unicode
Standard</em>,
2011.
Available at:
<a href="http://www.unicode.org/versions/enumeratedversions.html">http://www.unicode.org/versions/enumeratedversions.html</a>
</dd><dt class="label"><a name="structures-1.0" id="structures-1.0"/>XSD 1.0 Part 1: Structures</dt><dd>
World Wide Web Consortium.
<em>XML Schema Part 1: Structures</em>,
ed. Henry Thompson et al.
W3C Recommendation 2 May 2001.
Available at:
<a href="http://www.w3.org/TR/2001/REC-xmlschema-1-20010502/">http://www.w3.org/TR/2001/REC-xmlschema-1-20010502/</a>
</dd><dt class="label"><a name="datatypes-1.0" id="datatypes-1.0"/>XSD 1.0 Part 2: Datatypes</dt><dd>
World Wide Web Consortium.
<em>XML Schema Part 2: Datatypes</em>,
ed. Paul V. Biron and Ashok Malhotra.
W3C Recommendation 2 May 2001.
Available at:
<a href="http://www.w3.org/TR/2001/REC-xmlschema-2-20010502/">http://www.w3.org/TR/2001/REC-xmlschema-2-20010502/</a>
</dd><dt class="label"><a name="structures-1.1" id="structures-1.1"/>XSD 1.1 Part 1: Structures</dt><dd>
World Wide Web Consortium.
<em>W3C XML Schema Definition Language (XSD) 1.1 Part 1:
Structures</em>, ed. Shudi (Sandy) Gao 高殊镝,
C. M. Sperberg-McQueen,
and Henry S. Thompson.
W3C Working Draft 3 December 2009.
Available at:
<a href="http://www.w3.org/TR/xmlschema11-1">http://www.w3.org/TR/xmlschema11-1</a>
</dd><dt class="label"><a name="datatypes-1.1" id="datatypes-1.1"/>XSD 1.1 Part 2: Datatypes</dt><dd>
World Wide Web Consortium.
<em>W3C XML Schema Definition Language (XSD) 1.1 Part 2:
Structures</em>,
ed. David Peterson et al.
W3C Working Draft 3 December 2009.
Available at:
<a href="http://www.w3.org/TR/xmlschema11-2">http://www.w3.org/TR/xmlschema11-2</a>
</dd></dl></div><div class="div1">
<h2><a name="acknowledgments" id="acknowledgments"/>B Acknowledgements (non-normative)</h2><p>This document was prepared by the W3C
XML Schema Working Group. The
members at the time of publication were:</p><ul><li>Gioele Barabucci, University of Bologna</li><li>Paul V. Biron, Invited expert</li><li>David Ezell, National Association of Convenience Stores (NACS) (<i>chair</i>) </li><li>Shudi (Sandy) Gao 高殊镝, IBM</li><li>Mary Holstege, Mark Logic</li><li>Sam Idicula, Oracle</li><li>Michael Kay, Invited expert</li><li>Nan Ma, China Electronics Standardization Institute</li><li>Paolo Marinelli, University of Bologna</li><li>Jim Melton, Oracle</li><li>Noah Mendelsohn, Invited expert</li><li>Dave Peterson, Invited expert</li><li>Liam Quin, W3C</li><li>C. M. Sperberg-McQueen, Black Mesa Technologies (for W3C) (<i>staff contact</i>) </li><li>Henry S. Thompson, University of Edinburgh</li><li>Scott Tsao, The Boeing Company</li><li>Fabio Vitali, University of Bologna</li><li>Stefano Zacchiroli, University of Bologna</li><li>Kongyi Zhou, Oracle</li></ul></div></div></body>
</html>