why-html-5-matters.html
68.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
<?xml version="1.0"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<style type="text/css" media="all">
@import "/QA/2006/01/blogstyle.css";
</style>
<meta name="keywords" content='' />
<meta name="description" content="This is a simple story. The story of an HTML bug. Like every stories, it could start with… Once upon a time, there was a bug. The bug and its consequences A known HTML page contains a similar piece of..." />
<meta name="revision" content="$Id: why-html-5-matters.html,v 1.128 2011/12/16 02:58:30 gerald Exp $" />
<link rel="alternate" type="application/atom+xml" title="Atom" href="http://www.w3.org/QA/atom.xml" />
<link rel="alternate" type="application/rss+xml" title="RSS 1.0" href="http://www.w3.org/QA/news.rss" />
<title>Why HTML 5 Specification Matters? - W3C Blog</title>
<link rel="start" href="http://www.w3.org/QA/" title="Home" />
<link rel="prev" href="http://www.w3.org/QA/2007/07/html_classes_of_products_and_a.html" title="HTML Classes of Products and Authoring" />
<link rel="next" href="http://www.w3.org/QA/2007/07/the_way_of_web_standards.html" title="Web Standards Do - the Way of Web Standards" />
<!--
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:trackback="http://madskills.com/public/xml/rss/module/trackback/"
xmlns:dc="http://purl.org/dc/elements/1.1/">
<rdf:Description
rdf:about="http://www.w3.org/QA/2007/07/why-html-5-matters.html"
trackback:ping="http://www.w3.org/QA/sununga/mt-tb.cgi/65"
dc:title="Why HTML 5 Specification Matters?"
dc:identifier="http://www.w3.org/QA/2007/07/why-html-5-matters.html"
dc:subject="HTML"
dc:description="This is a simple story. The story of an HTML bug. Like every stories, it could start with… Once upon a time, there was a bug. The bug and its consequences A known HTML page contains a similar piece of..."
dc:creator="Karl Dubost"
dc:date="2007-07-06T06:30:23+00:00" />
</rdf:RDF>
-->
<!-- <script type="text/javascript" src="http://www.w3.org/QA/mt.js"></script>-->
</head>
<body class="layout-one-column">
<div id="banner">
<h1 id="title">
<a href="http://www.w3.org/"><img height="48" alt="W3C" id="logo" src="http://www.w3.org/Icons/WWW/w3c_home_nb" /></a>
W3C Blog
</h1>
</div>
<ul class="navbar" id="menu">
<li><strong><a href="/QA/" title="W3C Blog Home">[ W3C Blog ]</a></strong></li>
<li><a href="/QA/Library/" title="Documents and Publications on Web and Quality">Documents</a></li>
<li><a href="/QA/Tools/" accesskey="3" title="Validators and other Tools">Tools</a></li>
<li><a href="/2007/12/qa-blog-help/index#feedback">Feedback</a></li>
</ul>
<div id="searchbox">
<form method="get" action="http://www.google.com/custom" enctype="application/x-www-form-urlencoded">
<p id="formbox"><input type="text" size="15" class="textfield" name="q" accesskey="E" maxlength="255" /> <input type="submit" class="submitfield" value="Search" id="goButton" name="sa" accesskey="G" /> <input type="hidden" name="cof" value="T:black;LW:72;ALC:#ff3300;L:http://www.w3.org/Icons/w3c_home;LC:#000099;LH:48;BGC:white;AH:left;VLC:#660066;GL:0;AWFID:0b9847e42caf283e;" /><input type="hidden" id="searchW3C" name="sitesearch" checked="checked" value="www.w3.org/QA" /><input type="hidden" name="domains" value="www.w3.org/QA" /></p>
</form>
</div>
<div id="main"><!-- This DIV encapsulates everything in this page - necessary for the positioning -->
<p class="content-nav">
<a href="http://www.w3.org/QA/2007/07/html_classes_of_products_and_a.html">« HTML Classes of Products and Authoring</a> |
<a href="http://www.w3.org/QA/">Main</a>
| <a href="http://www.w3.org/QA/2007/07/the_way_of_web_standards.html">Web Standards Do - the Way of Web Standards »</a>
</p>
<h2 class="entry-header">Why HTML 5 Specification Matters?</h2>
<div class="entry-body">
<p>This is a simple story. The story of an HTML bug. Like every stories, it could start with… Once upon a time, <a href="http://bugs.webkit.org/show_bug.cgi?id=12740" title="Bug 12740 - www.bmw.com page doesn't work">there was a bug</a>.</p>
<h3 id="bug">The bug and its consequences</h3>
<p>A known HTML page contains a similar piece of code:</p>
<pre class="html">
<div style="display:none">
<table>
<div>
<table>
</table>
</div>
</div>
</pre>
<p>The HTML code is invalid. It means that a browser which "reads" this page has to recover the errors and recreate something logical to simply display it to the user in the best case, to apply javascript and CSS code in the worst case. The browser implementer is then facing a question. How do I recreate the structure of the content? How do I catch the error and make it something usable?</p>
<p>Some browsers will find a recovery strategy, but not necessary the same. Some browsers will fail on the page. It means in the end two things.</p>
<ul>
<li>Users with unpredictable results, then usability problems and erosion of trust.</li>
<li>Interoperability issues for browser implementers, and then a risk of losing market share (It is working with browser A and not browser B.)</li>
</ul>
<h3 id="repair">How to repair?</h3>
<p>HTML 4.01 specification is not that much help in this case. It doesn't define a precise <a href="http://www.w3.org/TR/html401/appendix/notes.html#h-B.1" title="Performance, Implementation, and Design Notes">error recovery mechanism for invalid documents</a>. So the browser implementer has to create its own strategy with the consequences we have just talked about.</p>
<p>HTML 5.0 Editor's draft defines a very precise mechanism for <a href="http://www.w3.org/html/wg/html5/#stack">recovering invalid markup</a>. As we <a href="http://bugs.webkit.org/show_bug.cgi?id=12740#c11" title="Bug 12740 - www.bmw.com page doesn't work">can see in the comment about the bug</a>, Dave Hyatt says: <q>Easy, the html5 spec covers this.</q></p>
<p>The browser implementer had clear instructions for this type, was able to implement it, and then to create an interoperable recovery system for this type of mistake. The Web users finally were able to access the Web site without troubles and in the same way than with other browsers. <strong><a href="http://www.w3.org/html/wg/html5/">HTML 5</a> Specification matters because it creates more interoperability when recovering from errors</strong>.</p>
</div>
<div id="more" class="entry-more">
</div>
<p class="postinfo">Filed by <a href="http://www.w3.org/People/karl/">Karl Dubost</a> on July 6, 2007 6:30 AM in <a href="http://www.w3.org/QA/archive/technology/html/">HTML</a><br />
<span class="separator">|</span> <a class="permalink" href="http://www.w3.org/QA/2007/07/why-html-5-matters.html">Permalink</a>
| <a href="http://www.w3.org/QA/2007/07/why-html-5-matters.html#comments">Comments (44)</a>
| <a href="http://www.w3.org/QA/2007/07/why-html-5-matters.html#trackback">TrackBacks (0)</a>
</p>
<h3 class="comments-header" id="comments">Comments</h3>
<div class="comment" id="comment-53319">
<p class="comment-meta" id="c053319">
<span class="comment-meta-author"><strong>Vlad Alexander </strong></span>
<span class="comment-meta-date"><a href="#c053319">#</a> 2007-07-06</span>
</p>
<div class="comment-bulk">
<p>Hi Karl,</p>
<p>Unfortunately, this is a very one-sided perspective. The consequences of silent error handling in this case are:</p>
<ol>
<li><p>Invalid HTML markup is still online and not fixed.</p></li>
<li><p>This does nothing to make the Web page render correctly in current / legacy browsers.</p></li>
<li><p>The Web page author learned nothing. He/she remains ignorant of the mistake and will continue to make similar mistakes on other pages.</p></li>
</ol>
</div>
</div>
<div class="comment" id="comment-53363">
<p class="comment-meta" id="c053363">
<span class="comment-meta-author"><strong>thacker </strong></span>
<span class="comment-meta-date"><a href="#c053363">#</a> 2007-07-06</span>
</p>
<div class="comment-bulk">
<p>Dubost--</p>
<p>Thank you very much for the hard example that supports your position on the need for HTML 5.0 to efficiently render "hobby content" within browsers. That one example, for what it is worth, sold me on the need to further the development of the 5.0 spec.</p>
<p>I still, adamantly, support the basis for Holzschlag's call to let things catch up to full implementation of existing specs before attempts are made to implement any parts of the proposed 5.0 spec.</p>
<p>Whether, I adopt HTML 5.0 -- that is too far down the road. XHTML 1.1 works for me, my clients and the clients' markets and customers. [Does it very well and without any "angry" e-Mails.] </p>
<p>Again, thanks for that hardcore example.</p>
</div>
</div>
<div class="comment" id="comment-54238">
<p class="comment-meta" id="c054238">
<span class="comment-meta-author"><strong>karl dubost, W3C </strong></span>
<span class="comment-meta-date"><a href="#c054238">#</a> 2007-07-09</span>
</p>
<div class="comment-bulk">
<p>Hi Vlad,</p>
<p>I do not disagree with you. But let's be practical in a business sense. </p>
<ol>
<li><p>Invalid markup is still online, and most of the time, it will stay online for a long time. Bear with me, I'm all for fixing the markup, but unfortunately I do not see any practical solutions to do that.</p></li>
<li><p>What would be your practical proposal for "no silent recovery"?</p></li>
<li><p>Legacy browsers. Indeed that is a very good point to keep in mind. Agreed with you.</p></li>
<li><p>The Web page author will not learn anything with a browser. Or at least in a common web browser. The issue here is that Web authors should use appropriate tools. Either </p></li>
<li><p>appropriate authoring tools</p></li>
<li>appropriate quality checking in the development process</li>
<li>appropriate checking tools of the code.</li>
</ol>
<p>Browsers are not tools to check your work. There are the very final part step to see the rendering. Do not trust browsers. They are meant to be used by everyone.</p>
</div>
</div>
<div class="comment" id="comment-54302">
<p class="comment-meta" id="c054302">
<span class="comment-meta-author"><strong>Vlad Alexander </strong></span>
<span class="comment-meta-date"><a href="#c054302">#</a> 2007-07-09</span>
</p>
<div class="comment-bulk">
<p>Hi Karl,</p>
<blockquote>
<p>What would be your practical proposal for "no silent recovery"?
Let's back up and look at the big picture. The real discussion is about the future of the Web. Let's take W3C's vision of the future - the Semantic Web. If you can honestly tell me that a Semantic Web can be successfully built on top of invalid HTML, then I will take back my objections to your original post.</p>
</blockquote>
<p>However, I suspect most future W3C technologies will not work well in an invalid HTML world. If this is the case, then HTML 5 is a diversion from building the future Web.</p>
<p>So how do you make valid markup? From our experience as an authoring tool vendor, "active error feedback" is the only way to ensure content is authored according to specification.</p>
<p>What is the practical way forward towards a Web with valid markup? You need a new spec that is not backwards compatible. Specs don't need to be backwards compatible. It is user-agents that need to be backwards compatible by supporting multiple specs.</p>
</div>
</div>
<div class="comment" id="comment-54414">
<p class="comment-meta" id="c054414">
<span class="comment-meta-author"><strong>thacker </strong></span>
<span class="comment-meta-date"><a href="#c054414">#</a> 2007-07-09</span>
</p>
<div class="comment-bulk">
<p>Of course semantic Internet content can be built. What logical reasons are there that the entire Web needs to be homogenous -- it doesn't need nor should it be.</p>
<p>The BMW example, if Bavarian Motor Works wishes to take advantage of the benefits of semantic content, avail itself to future technologies and communicate with the broadest spectrum of its customer base, BMW will have to place the same level of quality of engineering into their Internet communication as they do into their product line. The market place will decide that .. not you, I nor the W3C.</p>
<p>The main thrust of HTML 5 is to manage non-standard compliant markup, as I understand it, albeit with some extra bells and whistles tossed in. </p>
<p>To believe that the Web is broken and by some magical formula and divine right it will get 'fixed' is a pretty big stretch and egotistical view for anyone who believes they or a collective group has that power or capability.</p>
<p>If HTML 5, ultimately, eases the way for interoperability of non-standard content so that user agents, technologies, whatever, can re-focus efforts on standards compliance, communication tools and technologies built around and upon that compliance and thus move the Web field forward, I am all for it. </p>
</div>
</div>
<div class="comment" id="comment-54611">
<p class="comment-meta" id="c054611">
<span class="comment-meta-author"><strong>thacker </strong></span>
<span class="comment-meta-date"><a href="#c054611">#</a> 2007-07-11</span>
</p>
<div class="comment-bulk">
<p>No intention to hog up this particular post--</p>
<p>One significant concern was the possibility that a new HTML 5.0 spec would detract or become a defacto standard to XHTML and thus stall development.</p>
<p>The recent interview between Berners-Lee and IDG Now put that concern to rest:</p>
<p><a href="http://www.itworld.com/Tech/4535/070709future/pfindex.html" rel="nofollow">http://www.itworld.com/Tech/4535/070709future/pfindex.html</a></p>
</div>
</div>
<div class="comment" id="comment-54690">
<p class="comment-meta" id="c054690">
<span class="comment-meta-author"><strong>Dark Phoenix </strong></span>
<span class="comment-meta-date"><a href="#c054690">#</a> 2007-07-12</span>
</p>
<div class="comment-bulk">
<p>Actually, when it comes to error checking, I am of the opinion that the browser should record errors SOMEWHERE. Maybe browsers ought to have a mode where every HTML error gets displayed to the user (preferably which can be turned off, so people don't start complaining about being hassled)?</p>
</div>
</div>
<div class="comment" id="comment-54712">
<p class="comment-meta" id="c054712">
<span class="comment-meta-author"><strong>Martin Hassman </strong></span>
<span class="comment-meta-date"><a href="#c054712">#</a> 2007-07-13</span>
</p>
<div class="comment-bulk">
<p>html5lib logs parsing errors, see at the bottom of <a href="http://james.html5.org/cgi-bin/parsetree/parsetree.py?uri=http%3A%2F%2Fbugs.webkit.org%2Fattachment.cgi%3Fid%3D14511" rel="nofollow">http://james.html5.org/cgi-bin/parsetree/parsetree.py?uri=http%3A%2F%2Fbugs.webkit.org%2Fattachment.cgi%3Fid%3D14511</a> </p>
<p>If browsers start to log these errors like they do with JavaScript and CSS errors, it will be really great.</p>
</div>
</div>
<div class="comment" id="comment-55142">
<p class="comment-meta" id="c055142">
<span class="comment-meta-author"><strong>Michael Daines </strong></span>
<span class="comment-meta-date"><a href="#c055142">#</a> 2007-07-15</span>
</p>
<div class="comment-bulk">
<p>Maybe this is naive, but isn't it possible that we can have the semantic web without worrying so much about whether documents are valid? For example, given a user agent that uses or is informed in some way by (let's say) the HTML5 spec, how badly would you have to mess up your hCalendar to make it too broken or ambiguous to interpret or use?</p>
</div>
</div>
<div class="comment" id="comment-56486">
<p class="comment-meta" id="c056486">
<span class="comment-meta-author"><strong>Stuart Jones </strong></span>
<span class="comment-meta-date"><a href="#c056486">#</a> 2007-07-23</span>
</p>
<div class="comment-bulk">
<p>With regards to feedback of errors or recovery of errors...</p>
<p>Developers should have feedback of errors in their code - but that does not necessarily mean that you can't have browsers that are able to produce consistent content event if there are some errors in the underlying code.
All it means is that there needs to be a differentiation between your average user's browser and a developer's browser.</p>
<p>This differentiation is already starting to come about with some of the developer plugins that are available for some browsers.</p>
</div>
</div>
<div class="comment" id="comment-56607">
<p class="comment-meta" id="c056607">
<span class="comment-meta-author"><strong>Vijayakumar Subburaj </strong></span>
<span class="comment-meta-date"><a href="#c056607">#</a> 2007-07-24</span>
</p>
<div class="comment-bulk">
<p>Hi Karl,</p>
<p>Recovering from errors?! First, why allowing errors?!</p>
<p>Why not just say the document is invalid, like xml?!</p>
</div>
</div>
<div class="comment" id="comment-56611">
<p class="comment-meta" id="c056611">
<span class="comment-meta-author"><strong>Karl Dubost, W3C </strong></span>
<span class="comment-meta-date"><a href="#c056611">#</a> 2007-07-24</span>
</p>
<div class="comment-bulk">
<p>Hi,</p>
<p>you said why recovering and not saying errors right away. There are two reasons: browser market and companies market.</p>
<ul>
<li>companies market: </li>
</ul>
<p>An advertisement in a magazine gives an URI to your commercial web site. Your web site is dependent on many third parties softwares and employees creating content. In one part of this software, someone has to create small chunks of HTML for editing the Web site. The person is tired, publishes the content quickly before the week-end but unfortunately it is invalid. The consumer tries to access the page without success, it shows a big error message instead. The consumer is going to the site of your competitor.</p>
<ul>
<li>Browsers market: </li>
</ul>
<p>The company Opeziri makes a very strict browsers which doesn't accept any errors in the markup. Each page which is bogus is not displayed and has a big red message saying error. Your grand father is using this Opeziri browser but he's getting tired of using it. Something around 95% of the Web is not viewable because everything is invalid, though he doesn't know that. He just sees the error message. Then there is a competitor Moslacker which accepts all pages. Moslacker starts to get more marketshare than Opeziri, which sees its business model going down.</p>
</div>
</div>
<div class="comment" id="comment-56650">
<p class="comment-meta" id="c056650">
<span class="comment-meta-author"><strong>Gustaf Liljegren </strong></span>
<span class="comment-meta-date"><a href="#c056650">#</a> 2007-07-24</span>
</p>
<div class="comment-bulk">
<p>First, I think it's a great idea to standardize how browsers should behave when coming upon broken documents. Second, I think we all ought to strive for a well-formed web, because well-formed code is easier to read, for humans and machines alike.</p>
<p>It's obvious that browsers can't parse HTML as strict as we do other XML documents. However, I don't agree that browsers are therefore not good for teaching users how to write well-formed and valid documents. Browsers could assist users to a great extent on this task. Here's how:</p>
<p>Whenever the browser encounters an error in the syntax or grammar, make a small icon appear on the status bar. Hover your mouse pointer over it, and it says "This page is not valid. Click to validate this page". Click and you find yourself at W3C's then improved validator, which tells you not only what is wrong, but why it's wrong and what ought to be done about it.</p>
<p>Whenever this icon appears on a public site, it puts the expert author to shame with his fellows. It lets the curious hobby user learn from other's mistakes. And most importantly: it lets your grand father ignore parsing errors. It doesn't prevent the page from rendering, along the lines of HTML 5 error handling. If you are the author, you get valuable feedback from the end user environment.</p>
<p>It is my opinion that this kind of feedback ought to be a SHOULD requirement in the HTML 5 spec.</p>
</div>
</div>
<div class="comment" id="comment-56751">
<p class="comment-meta" id="c056751">
<span class="comment-meta-author"><strong>nomad </strong></span>
<span class="comment-meta-date"><a href="#c056751">#</a> 2007-07-25</span>
</p>
<div class="comment-bulk">
<p>Hi Karl,</p>
<p>Re your first case (publish invalid document, customer goes to competitor), that's <em>exactly</em> what I'd like to see. This reason puts market pressure on businesses to publish only valid documents and to check their validity, and that is a good thing.</p>
<p>And if you publish invalid document you risk having your document inaccessible anyway, because not every error is recoverable.</p>
<p>By specifying unified error recovery mechanism you give authors more leeway to ignore their error, practically provoking them to use invalid HTML.</p>
</div>
</div>
<div class="comment" id="comment-56753">
<p class="comment-meta" id="c056753">
<span class="comment-meta-author"><strong>Sean Farrell </strong></span>
<span class="comment-meta-date"><a href="#c056753">#</a> 2007-07-25</span>
</p>
<div class="comment-bulk">
<p>I am reading over and over and over the prase "that your grandmother / grandfather should be able to publish in the web" as a excuse to have invalid HTML hanging around. This is total nonsese!</p>
<p>You can differenciate two type of people, coder and non-coders. Coder wither write the HTML or tools (scripts) that output HTML and non-coders only use tools that generate HTML. A non-coder will never write a line (tag) of HTML, nor understand it. He/She simple does not have to, since there are tools that can be used. (If the tool does not outbut 100% valid HTML it is broken...)</p>
<p>The reason why 95% of the web is invalid (not broken) ist because browsers allowed showing broken pages. The creators of tools simple did not care to check if their tool wrote 100% valid HTML.
The other problem was that in the erly spect the emphasis was never put on the actual way to display HTML and so browsers differed slighlt. Additionally browser extentions did not make life easyer.</p>
<p>My proposal to get people to comply in the furure is to display a red bar at the top of the page if it is illformed, just like pop-up blockers do. The content is still viewable, but the authors take a little blame. There is a good chance that the manager that browses the corporate site gos to the web develeoper and aks "Why is our site non conforment?".</p>
</div>
</div>
<div class="comment" id="comment-56767">
<p class="comment-meta" id="c056767">
<span class="comment-meta-author"><strong>Vlad Alexander </strong></span>
<span class="comment-meta-date"><a href="#c056767">#</a> 2007-07-25</span>
</p>
<div class="comment-bulk">
<p>Can we give names to different error handling options so that it's easier to discuss?</p>
<p>Gustaf, let's call your suggestion "passive error feedback". Let's call the XML approach "active error feedback". And the HTML approach "no error feedback".</p>
<p>Karl, nobody is suggesting that browsers should use "active error feedback" for existing specs like HTML 4.x, but only for new specs. Web site developers can to use the new spec or the old spec. If all browser vendors agree to respect the rules of the new spec, then market share is not affected.</p>
</div>
</div>
<div class="comment" id="comment-56775">
<p class="comment-meta" id="c056775">
<span class="comment-meta-author"><strong>David </strong></span>
<span class="comment-meta-date"><a href="#c056775">#</a> 2007-07-25</span>
</p>
<div class="comment-bulk">
<p>Gustaf is correct that a browser should notify the user in some manner that it is "fixing" the document in order to have it display "properly". I use quotes because it is impossible to correctly guess the intended results 100% of the time. I also know many web developers that use a browser as their primary means of testing their code. Their main concern is how the page looks in browsers people are actually using, not whether it validates. Also, checking that it validates is an additional step, one which might break the layout when the HTML is modified to comply.
This brings me to my ultimate point, which is a bit off-topic: Why is the W3C developing yet another specification when the available browsers fail to support the current ones? (source: <a href="http://www.webdevout.net/browser-support-summary" rel="nofollow">http://www.webdevout.net/browser-support-summary</a> ) Apparently I live in a fantasy world, because I would like the ability to create a web page that both validates and looks how I intended in browser X and know that everyone else will see the same thing I see as long as they are using a browser that fully supports the correct specifications. This may be an unreasonable request, but why doesn't the W3C spend their time and resources to create an opensource reference browser that web developers can use to test page layout and browser developers can use as a guide for fixing their current browsers?</p>
</div>
</div>
<div class="comment" id="comment-56983">
<p class="comment-meta" id="c056983">
<span class="comment-meta-author"><strong>Tony E </strong></span>
<span class="comment-meta-date"><a href="#c056983">#</a> 2007-07-26</span>
</p>
<div class="comment-bulk">
<p>I'm personally a bit stuck on the fence on whether a browser should, or should not fix or auto-repair website code. I am leaning on the thought that browsers should not, however, simply for the fact that if browsers allow for broken code, why fix it? I recall old Netscape 2 vs. Internet Explorer debates when IE auto-repaired tables and Netscape Communicator did not, for example. People designing for Netscape & similar browsers knew it worked properly, "for the most part".</p>
<p>My train of thought on this issue is like that of purchasing a vehicle. If you buy a brand new car, and the door doesn't open. Do you send it back? or just roll down the window with your neat remote window open/close mechanism, and climb in? Allowing broken html to exist is the same to me, as climbing in the window. The manufacturer has no clue they just sold a defective product, because you're driving away with a smile on your face.</p>
<p>I fail to see how that is acceptable. Let Microsoft do it's own thing, they will anyway (in my humble opinion)... but browsers like Firefox/Mozilla, Netscape, Opera, Safari, they seem to be willing to work with standards, why not have them choke on errors? Then people will build better code. Perhaps browser developers can opt to add an option "Enable Code/DOM debugging?" so it <em>will</em> popup a window describing any code that choked or is incorrect?</p>
<p>Just an idea and train of thought of one small time web developer. And I also do believe a good developer runs their code through Tidy or some other code checker, but obviously Joe Shmoe 14yo just got hired by Jim Bob's Used Car Sales, and isn't a pro.</p>
</div>
</div>
<div class="comment" id="comment-57145">
<p class="comment-meta" id="c057145">
<span class="comment-meta-author"><strong>thacker </strong></span>
<span class="comment-meta-date"><a href="#c057145">#</a> 2007-07-27</span>
</p>
<div class="comment-bulk">
<p>I am not so sure that, while handy for developers for example, that any additional burdens should be placed upon browsers and their development to notify a Web user of invalid code.</p>
<p>A browser's primary function is to serve the user of the Internet. There are enough tools designed and in-place for the developer to use in generating standards compliant code.</p>
<p>What will drive and continue to drive increased production of standard compliance is education at the developer level and the market and economic pressures that are exerted upon the business and how it impacts their performance and relationships with their customers.</p>
<p>Browser developers need to focus on implementation of standards, evolution of standards and browser based technologies and upon security for the purpose of delivering Web based communication to the end user. That in and of itself is more than enough to keep their plate full.</p>
<p>However, for the hobbyist and for businesses, such as BMW which was referenced by Dubost, it is the responsibility and function, I believe, of the CMS and development applications to serve the function of notifying the developer of invalid code.</p>
<p>David--</p>
<p>You came close to answering your own question of why another spec:</p>
<blockquote><p>checking that it validates is an additional step, one which might break the layout when the HTML is modified to comply.</p></blockquote>
<p>The primary function of the HTML 5 spec is to adapt to the ways that the vast majority of content is being coded and allow it to comply to a standard.</p>
<p>In theory, and hopefully practicality, this will ease the way for browsers to render, for example, tag soup, consistently.</p>
<p>There are a lot of hurdles, as history as pointed out, to achieve and implement any standard. </p>
<p>You make a very valid point, along with Molly Holzschlag, that full implementation of all existing standards should and needs to happen, quickly.</p>
</div>
</div>
<div class="comment" id="comment-57349">
<p class="comment-meta" id="c057349">
<span class="comment-meta-author"><strong>eekee </strong></span>
<span class="comment-meta-date"><a href="#c057349">#</a> 2007-07-28</span>
</p>
<div class="comment-bulk">
<p>Lot of wrangling over details in these comments, with the bulk of opinion seeming to go to something that I believe would be actively harmful to individuals trying to earn a living. Do you guys who want the web browser to flag bad pages think it's alright to publicly show a working person mistakes to the world? Granted, sloppy work is no good thing, but everyone makes mistakes, and in correcting those mistakes one reaches a point of diminishing returns:</p>
<p>To the individual site creator, the effort put in matters, and I don't want to have my judgment of an individual colored by his or her html skill!</p>
<p>Likewise with the company, good html coders I'm sure cost money, and I do not want to have my judgment of a company influenced by how good a coder –how good a shop window fitter – they could afford!</p>
<p>I also, as my last and least point, don't want my browsing being bothered by a little icon in the corner saying "Oh dear, this web page author ade a mistake!" I do, however, want to be able to enable just such an icon for checking my own pages. It would be a handy browser feature but it would be a browser feature, not something that belongs in the standard. (Of course, if one wishes to see the validity of every page, one could simply leave that feature enabled.)</p>
<p>I read the original post as something with a much, much nobler goal than pointing fingers. HTML 5 will require that all browsers respond to any given error in the same way. This, if implemented properly, removes an artificial and meaningless source of differences between IE, Gecko, Opera, khtml, and all other html renderers out there.</p>
</div>
</div>
<div class="comment" id="comment-58041">
<p class="comment-meta" id="c058041">
<span class="comment-meta-author"><strong>Gustaf Liljegren </strong></span>
<span class="comment-meta-date"><a href="#c058041">#</a> 2007-07-31</span>
</p>
<div class="comment-bulk">
<p>Even though most of us want a more well-formed web, it appears we are divided on how to get there. One extreme is to let browsers silently fix all markup errors, like today. The other extreme is to make it a requirement for conforming browsers to refuse to show pages with syntax/grammar errors. The first doesn't lead to a more well-formed web, which is our ultimate goal. The second is not realistic, because it breaks the web.</p>
<p>Anything that breaks the web (i.e. makes much of today's web unreadable) will surely not be implemented, and for a good reason. We can't have a transitional period where most websites won't work in the latest browser. The first priority of HTML 5 must be to accomodate the masses, and that means making old tagsoup show up pretty in next-gen browsers.</p>
<p>However, browsers could still promote well-formedness in discrete ways. The icon on the status bar I suggested earlier would have this effect. People wouldn't see it unless they looked for it, but it would have a impact on webmasters. The specification doesn't need to be too specific on how to notify the user of validation errors, but it could state that a browser by default SHOULD notify the user, and give an option to validate the page.</p>
</div>
</div>
<div class="comment" id="comment-58100">
<p class="comment-meta" id="c058100">
<span class="comment-meta-author"><strong>Vlad Alexander </strong></span>
<span class="comment-meta-date"><a href="#c058100">#</a> 2007-08-01</span>
</p>
<div class="comment-bulk">
<p>Gustaf, please do not misrepresent the side that supports "active error feedback". NOBODY is suggesting that Web browsers should stop rendering HTML 4.x/XHTML 1.x Web pages because they are invalid. Supporters of "active error feedback" suggest this approach only for NEW specs.</p>
</div>
</div>
<div class="comment" id="comment-58486">
<p class="comment-meta" id="c058486">
<span class="comment-meta-author"><strong>JP Fiset </strong></span>
<span class="comment-meta-date"><a href="#c058486">#</a> 2007-08-02</span>
</p>
<div class="comment-bulk">
<p>I find myself leaning towards the comments that allow rendering of bad markup. Users want to see content that companies are offering. Creating tools that hinder this relation is not good.</p>
<p>Besides, if you have worked long enough in private companies, you can envisage the examples given above turning into situations where an expert author is coming back to the office on a long week end to fix something broken by a novice author. Please, display the page.</p>
<p>I also want to see a valid web. The discussion so far is focused on letting the user know that a page is broken. The user can not do anything about it and I suspect he/she will not take the time to send an e-mail. </p>
<p>How about letting the originating site know that the page is broken? Could there be any mechanism put in place to help with that?</p>
</div>
</div>
<div class="comment" id="comment-58503">
<p class="comment-meta" id="c058503">
<span class="comment-meta-author"><strong>Cecil Ward </strong></span>
<span class="comment-meta-date"><a href="#c058503">#</a> 2007-08-02</span>
</p>
<div class="comment-bulk">
<p>As we all know, the reason that this situation ever came about is that early web browsers were unreasonably forgiving and these browsers were the only testing tools that non-professional web authors were exposed to. These browsers failed in their duty to web authors.
We should not keep becoming distracted by the issue of what happens when end users view invalid pages in their browsers. What is important to focus on when considering how things must change is sorting out the experience that <em>web authors</em> at home (say) have when they are writing HTML and using their browsers as test tools.
It is time to draw a line and bring the era of pervasive broken markup to an end and this is to be achieved by a combination of measures.
One strategy towards this is to pressure browser manufacturers and vendors of other tools to include accurate and updatable validation tools in their products. This is an idea worth pursuing, but this strategy on its own will not succeed because only the more knowledgeable web authors will know to obtain such validators or know to turn them on.
Rather, I believe that it is vital to proceed as follows
(i) W3C to version-mark HTML5 now, so that forthcoming browsers can recognise it as such
(ii) W3C to require browser manufacturers to fail with an error (just like XML) if they see an invaliud pages bearing that version marker.
(iii) W3C to act quickly to get browser manufacturers to sign up for this, and fast-track the release of a small spec covering this issue well ahead of the HTML5 spec timescale. Such a spec would be small, and a technique similar to DOCTYPE-switching would serve. Aim to get it into Firefox 3 and IE8 without fail.
There is no need to worry about any bad impact on end-users' experience in this respect, because in the new scenario the web author would never have been able to unknowingly release an HTML5-version-marked page that was invalud. Failing to signal errors to home web authors was never a way of helping them, it was actually a behaviour that was letting them down, and it’s time to acknowledge that, and put an end to the old era.</p>
<p>Cecil Ward.</p>
</div>
</div>
<div class="comment" id="comment-58896">
<p class="comment-meta" id="c058896">
<span class="comment-meta-author"><strong>Olivier Wehner </strong></span>
<span class="comment-meta-date"><a href="#c058896">#</a> 2007-08-05</span>
</p>
<div class="comment-bulk">
<p>There is something spooky about these "should browsers render tag soup" discussions: Yes, this would make the live of everyone easyer. No, browser vendors will never stick to that rule. Why should they?</p>
<p>The people who write the spec do not write the browser code, the spec is not law and the browser market is too competitive to give purity, purism or beauty a chance. </p>
</div>
</div>
<div class="comment" id="comment-58983">
<p class="comment-meta" id="c058983">
<span class="comment-meta-author"><strong>mina86 </strong></span>
<span class="comment-meta-date"><a href="#c058983">#</a> 2007-08-05</span>
</p>
<div class="comment-bulk">
<p>So what, now invalid markup will have to be rendered in given way? Why is it called "invalid" then? If specification says how to render it why should anyone care to produce valid markup?</p>
<p>This only impose new restrictions on the user agents which may only do harm, ie. bloat them, make them bigger, slower and introduce new bugs.</p>
<p>After pointing an invalid markup to a web developer, s/he could say: "According to HTML5 this has to be rendered the way X Web Browser renders it so yout Y Web Browser is wrong and I'm right."</p>
<p>I don't mean to offend anyone but putting error recovery into the spec is an absurd and the second most stupid thing that happened in HTML after the FONT tag.</p>
</div>
</div>
<div class="comment" id="comment-59442">
<p class="comment-meta" id="c059442">
<span class="comment-meta-author"><strong>Francis </strong></span>
<span class="comment-meta-date"><a href="#c059442">#</a> 2007-08-08</span>
</p>
<div class="comment-bulk">
<p>I recently joined a company, working with all MS .NET developers. I was shocked to discover that none of them known that HTML/CSS Specifications existed, nor of XML and XML Schema. OMG they don't even know what W3C is.</p>
<p>Effort should be given to promote W3C Standards.</p>
</div>
</div>
<div class="comment" id="comment-60715">
<p class="comment-meta" id="c060715">
<span class="comment-meta-author"><strong>SuperKoko </strong></span>
<span class="comment-meta-date"><a href="#c060715">#</a> 2007-08-15</span>
</p>
<div class="comment-bulk">
<blockquote>
<p>
Karl, nobody is suggesting that browsers should use "active error feedback" for existing specs like HTML 4.x, but only for new specs. Web site developers can to use the new spec or the old spec. If all browser vendors agree to respect the rules of the new spec, then market share is not affected.
</p>
</blockquote>
<p>Unfortunately, you forget one thing: New specs will be recognized by new browsers, through the DOCTYPE declaration, and new browsers will be able to use "active error feedback", but existing browsers (e.g. IE6) ignores the unrecognized DOCTYPE, and will read the new HTML as if it were HTML 4.x, and will actually render something viewable, because new specs usually are quite "backward-compatible".</p>
<ol>
<li> Bad web designers who use IE6 to test their pages may use the new DOCTYPE, and think that "it works", while new browsers won't display it.</li>
<li> Users will be frustated by behavior of new browsers and claim they're buggy, and argue that IE6 does a better job, because: "It displays more pages".</li>
<li> Users will get back to IE6.</li>
<li> Compiler vendors learn the lesson, and their next browser provides no error checking or passive error checking.</li>
</ol>
<p>That happened with XHTML (though, XHTML served as application/xhtml+xml gets active error checking with some browsers). That won't happen with next specs of HTML, because browser vendors won't even try to provide active error checking: They've already learned the lesson.
</p>
<p>
Active error checking can only be put in a spec that isn't backward compatible (i.e. that current browsers don't display). XHTML served as application/xhtml+xml is one example: This MIME type is not recognized by IE6 which doesn't render it, but just asks "Do you want to save it to disk?".
</p>
<p>Passive error checking is a great idea, in my opinion. I wouldn't go as far as putting it as a SHOULD in the HTML spec. I find it more sensible to humbly request this feature to be added in specific browsers.
</p>
<blockquote>
<p>
This may be an unreasonable request, but why doesn't the W3C spend their time and resources to create an opensource reference browser that web developers can use to test page layout and browser developers can use as a guide for fixing their current browsers?
</p>
</blockquote>
<p>Because their parsing & rendering bugs would become <em>de facto</em> standard.
</p>
<p>I wish browsers would use a true SGML parser. Such parsers do exist. Unfortunately, they don't have enough error recovery mechanism. An OSS SGML parser could be adapted and that would make the browser conforming to the HTML spec.
</p>
<blockquote>
<p>
So what, now invalid markup will have to be rendered in given way? Why is it called "invalid" then? If specification says how to render it why should anyone care to produce valid markup?
</p>
</blockquote>
<p>I agree. HTML is a contract between HTML document writers and user agents.</p>
<p> HTML5 contract is:</p>
<p> If you do your job (web developer) you'll get your money (proper layout in user agent), but if you do half of the job (but, you MUST not), you'll get the same amount of money.</p>
<p> Is not it equivalent to?</p>
<p> If you do half or your job or more, you'll get your money?</p>
<blockquote>
<p>
This only impose new restrictions on the user agents which may only do harm, ie. bloat them, make them bigger, slower and introduce new bugs.
</p>
<p></p></blockquote>
<p>Exactly. The <em>de facto standard</em> "extended HTML tag soup" would be so complex, have so many quirks, that it would not be accessible to small tools written in three days by a normal developer. Only big company could produce parsers able to parse one of the most complex computer language ever.
</p>
</div>
</div>
<div class="comment" id="comment-60741">
<p class="comment-meta" id="c060741">
<span class="comment-meta-author"><strong>Ether </strong></span>
<span class="comment-meta-date"><a href="#c060741">#</a> 2007-08-15</span>
</p>
<div class="comment-bulk">
<p>Well, I fail to see what are you people arguing about. The main ideas were mostly said by other, :</p>
<p>x] To create a specification of how should browsers render invalid HTML is a good idea, which should bring some uniformity to the browsers. But hey, admit it, browser don't render the same even valid documents, so where's the certainty that this time they will listen to the specifications?</p>
<p>x] I don't know why it should apply for the NEW "version" of HTML. An invalid HTML5 document shouldn't be rendered at all. That way, old invalid HTML4 pages will stay mostly the same and new HTML5 pages will be valid and no error recovery will be needed for it.</p>
<p>x] When I code an application in PHP, it doesn't even try to recover from syntax errors (which is the essence of tag soup), so why the coders of HTML should be given more? Yes, they are coders (and with all browser displaying i.e. CSS the same way, it would be easier for them to obey the specifications).</p>
<p>x] When someone cannot or doesn't want to become a coder, there are still applications that will have to create valid HTML, because when page exported from editor A won't be displayed and the same page exported from editor B will, no-one will blame HTML5 strict specifications, they'll blame editor A.</p>
</div>
</div>
<div class="comment" id="comment-60756">
<p class="comment-meta" id="c060756">
<span class="comment-meta-author"><strong>karl dubost, w3c </strong></span>
<span class="comment-meta-date"><a href="#c060756">#</a> 2007-08-15</span>
</p>
<div class="comment-bulk">
<p>In the comment of SuperKoko, he/she has used cite="first<em>name last</em>name" which has made the comment non HTML conformant. (I have fixed it.) The value of a cite attribute must be a URI.</p>
<p>What a modern browser should have done with a non conformant markup like this ? Should it carry a message saying the whole page was non conformant ?</p>
<p>btw, the doctype will not carry a version in HTML 5. It will will simply be <DOCTYPE html>, Among browser vendors, Chris Wilson (Microsoft) and Dave Hyatt (Apple) have advocated for versioning in HTML 5. Ian Hickson (Google), Anne (Opera), Maciej (Apple) are against.</p>
<p>The issue is not on the browser level, but authoring level. The issue is that authoring tools (and authors) should be strict in what they produce, that was part of the sense of my article in the craft of HTML. </p>
<p>Though I will disagree on your last statement, developers going on the HTML market <em>have</em> to recover non conformant HTML markup. It means they have to find techniques to recover the content. What HTML 5 offer for the first time is a precise description on how to recover.</p>
<p>Now I would like on the WG is more developers of CMS, and authoring tools. All people implementing the production of HTML code. The sanitization of the HTML code is in the production tools.</p>
</div>
</div>
<div class="comment" id="comment-60757">
<p class="comment-meta" id="c060757">
<span class="comment-meta-author"><strong>karl dubost, w3c </strong></span>
<span class="comment-meta-date"><a href="#c060757">#</a> 2007-08-15</span>
</p>
<div class="comment-bulk">
<p>@ether</p>
<p>if we really want to be strict :)
Your comment is non conformant you are using paragraphs instead of a real list. ul/li</p>
<p>Luckily enough is something which is almost impossible to check by machine. So if we push a bit further, this article should not have been displayed because of your comment.</p>
</div>
</div>
<div class="comment" id="comment-60809">
<p class="comment-meta" id="c060809">
<span class="comment-meta-author"><strong>thacker </strong></span>
<span class="comment-meta-date"><a href="#c060809">#</a> 2007-08-16</span>
</p>
<div class="comment-bulk">
<p>Dubost--</p>
<p>I have a bunch of questions, hope you don't mind.</p>
<p>Why the differences of opinion between version reference and generic HTML 5 DOCTYPE?</p>
<p>You sold me on the need for the HTML 5 spec based solely on error and recovering handling. A key component on this are the CMS and authoring tool developers. What are the reasons for their non-participation within the HTML 5 working groups and how can this critical element be resolved?</p>
<p>Secondly, why are there two working groups on this spec?</p>
<p>The article by Anne van Kesteren [ <a href="http://www.w3.org/html/wg/html5/diff/" rel="nofollow">http://www.w3.org/html/wg/html5/diff/</a> ]is very succinct, clear and ideal for non-technical people such as myself versus the WHAT spec draft. Is it possible that van Kesteren will keep this article current on a monthly basis or as needed? </p>
<p>The W3C projects a final recommendation date of the 4th quarter of 2010. Why can't this be compressed to the 4th quarter of 2008? </p>
<p>Wouldn't incremental releases of the spec be more practical with focus initially upon error/recovery handling and security with the initial release?</p>
<p>Why is there even consideration of depreciation of elements and/or attributes based upon frequency of use or confusion/misunderstanding of use? Isn't this more of a matter of education rather than assumption that use reflects practicality or functionality of the element/attribute?</p>
<p>What impact will the HTML 5 spec have upon the existing XHTML specifications?</p>
<p>Finally, what are your thoughts about Holzschlag's suggestions that she presented within her most recent post on her blog?</p>
<p>Don't get discouraged, I have a gazillion more questions.</p>
<p>Thank you very much.</p>
</div>
</div>
<div class="comment" id="comment-60957">
<p class="comment-meta" id="c060957">
<span class="comment-meta-author"><strong>Bennett McElwee </strong></span>
<span class="comment-meta-date"><a href="#c060957">#</a> 2007-08-16</span>
</p>
<div class="comment-bulk">
<p>Consistent recovery from invalid markup is indeed a useful and important thing to specify. But it should not be part of the HTML 5 specification.</p>
<p>The original article says, "HTML 5 Specification matters because it creates more interoperability when recovering from errors." Actually, it's only the spec's definition of "a very precise mechanism for recovering invalid markup" that does this. This is indeed a useful thing to specify; but it's not logically part of the specification of the HTML 5 language.</p>
<p>W3C could instead just create an "HTML 4 Invalid Markup Recovery" spec. This spec could act as a helpful guide to browser implementors, who currently all use their own algorithms for rendering invalid markup. It would apply to both HTML 4.x and XHTML 1.x served as text/html.</p>
<p>If there is still really a need for HTML 5, then it could be accompanied by a "HTML 5 Invalid Markup Recovery" spec. But perhaps an even better approach would be to forget HTML 5 and simply allow XHTML 2 to be served as text/xhtml (or for backward compatibility, text/html). Then publish an "XHTML 2 Invalid Markup Recovery" spec.</p>
</div>
</div>
<div class="comment" id="comment-61855">
<p class="comment-meta" id="c061855">
<span class="comment-meta-author"><strong>Ether </strong></span>
<span class="comment-meta-date"><a href="#c061855">#</a> 2007-08-22</span>
</p>
<div class="comment-bulk">
<p>@karl dubost] I was talking about syntax errors, which make the document unparsable, not about the semantic ones. Anyway, I haven't wrote the 'p' tags, it was done automatically.</p>
<p>x] Shouldn't the browser display this page because of errors? Sure, but the script shouldn't have put it there in the first place. There are ways to detect that the posted code is invalid before putting it on the page. And when the recovery algorithm will be available, the receiving script should be able to apply it easily and make the posted code valid.</p>
<p>x]Well, I don't know that much about DOCTYPEs, SGML vs. XML and such, but a HTML5 document could be always recognized using the DTD clausule. I know that this won't solve the thing SuperKoko wrote about, but I'm no pro, right? What about creating strict (with active error feedback) and transitional or such (with passive error feedback) standards?</p>
</div>
</div>
<div class="comment" id="comment-62376">
<p class="comment-meta" id="c062376">
<span class="comment-meta-author"><strong>karl dubost, W3C </strong></span>
<span class="comment-meta-date"><a href="#c062376">#</a> 2007-08-27</span>
</p>
<div class="comment-bulk">
<p>Hi,</p>
<p>Just a quick note, that I see sometimes some comments made anonymously. These comments will not be moderated positively.</p>
</div>
</div>
<div class="comment" id="comment-62629">
<p class="comment-meta" id="c062629">
<span class="comment-meta-author"><strong>Stuart Metcalfe </strong></span>
<span class="comment-meta-date"><a href="#c062629">#</a> 2007-08-28</span>
</p>
<div class="comment-bulk">
<p>I second <a href="#c060957" rel="nofollow">Bennett McElwee's comment</a>.</p>
<p>That browser vendors want to be able to recover from minor errors is entirely understandable and I don't think many people are going to lose any sleep if they write a recovery mechanism 'on top of' the HTML specification to improve the user experience. That an agreed 'standard' for this is established is good in this case. I strongly believe, however, that this mechanism has no place in the main HTML specification which should be clean, clear and above all require correct implementation. Poor quality code should be optionally recoverable but never explicitly accommodated.</p>
</div>
</div>
<div class="comment" id="comment-69750">
<p class="comment-meta" id="c069750">
<span class="comment-meta-author"><strong>Felix Schins </strong></span>
<span class="comment-meta-date"><a href="#c069750">#</a> 2007-10-02</span>
</p>
<div class="comment-bulk">
<p>Hi!</p>
<p>I think, it would be very good, if HTML5 uses some of the good ideas of XHTML2. Like e.g. the <h>-<section>-model that is better than <h1> - <h6>, <separator> instead of <hr>, removing the <font>-element and the <iframe>-tag, and so on...</p>
<p>A list of more great things, that HTML5 could use from XHTML2 is found here:
<a href="http://www.xhtml.com/en/future/x-html-5-versus-xhtml-2/" rel="nofollow">http://www.xhtml.com/en/future/x-html-5-versus-xhtml-2/</a></p>
<p>Best wishes for the standards-development...
Felix</p>
</div>
</div>
<div class="comment" id="comment-119961">
<p class="comment-meta" id="c119961">
<span class="comment-meta-author"><strong>Erik Reppen </strong></span>
<span class="comment-meta-date"><a href="#c119961">#</a> 2008-03-02</span>
</p>
<div class="comment-bulk">
<p>Why advocate any recovery from invalid markup for new technologies? It only leeches time and energy away from browser development and makes everybody's lives more difficult. How is a user who can't even master simple SGML-based syntax supposed to correct behavior if a given recovery-process has failed to properly assess their intentions?</p>
<p>People who don't want to learn anything new can stick to older technologies which will no doubt continue to be supported indefinitely as the resources expended on doing so gradually become even more negligible.</p>
<p>The carrot offered for taking the very minor step of learning to stick to lower-case, nest, and close properly will be the advantages that new technologies offer. Asking people to validate their markup for a to-the-line error check hardly seems like a major barrier to entry.</p>
<p>Sloppy syntax allowances in something as basic as an SGML-based language is a waste of resources that can only hurt accessibility, aggravate proper indexing in search engines and slow the implementation of all new technologies. Why bring the evolution of the web to a snail's pace for people who can't be bothered to do the amateur web designer's equivalent of a spell check?</p>
</div>
</div>
<div class="comment" id="comment-120061">
<p class="comment-meta" id="c120061">
<span class="comment-meta-author"><strong>Karl Dubost <a class="commenter-profile" href="http://www.w3.org/People/karl/"><img alt="Author Profile Page" src="http://www.w3.org/QA/sununga/mt-static/images/comment/mt_logo.png" width="16" height="16" /></a></strong></span>
<span class="comment-meta-date"><a href="#c120061">#</a> 2008-03-02</span>
</p>
<div class="comment-bulk">
<p>Hi Erik,</p>
<p>Nothing forbids an author to stick to lowercase, strict guidelines, quoted attributes, etc for writing HTML. I would even personally encourage this. It is good design and practices when sharing work in a Team. </p>
<p>That said, browser developers also need to recover broken markup in their implementations. if you stop to recover for broken markup, we will not be able to access 95% of the Web.</p>
<p>There are really two things to separate:</p>
<ol>
<li>Authoring HTML which can be strict with a well defined content model</li>
<li>Parsing HTML which has to cope with errors.</li>
</ol>
<p>For the 1., I invited people to <a href="http://www.w3.org/QA/2008/02/authoring-html5" rel="nofollow">commit their time to write the HTML 5 Authoring guidelines</a>. It means people <strong>actually writing prose</strong> and not only discussing about the why and when and how. The only way to move forward on this is really to create the document for it.</p>
</div>
</div>
<div class="comment" id="comment-120158">
<p class="comment-meta" id="c120158">
<span class="comment-meta-author"><strong>Erik Reppen </strong></span>
<span class="comment-meta-date"><a href="#c120158">#</a> 2008-03-03</span>
</p>
<div class="comment-bulk">
<p>"That said, browser developers also need to recover broken markup in their implementations."</p>
<p>This is the part I'm confused on. Why?</p>
<p>Ever since I first caught wind of the new spec, I've been trying to understand whether I've misunderstood the goal of standards all along or if there's been a change of plan. I thought the idea was to ultimately transition to strict syntax. Period. Not for the purpose of "enforcing" proper coding practice out of sheer priggishness but to improve the quality of the development and usage environment for everybody involved by making sure that if something is live, it's code can be easily read by machine, code, and developer. We can continue to allow for old mistakes through the use of proper doctype recognition and proprietary opt-in browser targeting (in the case of IE exclusivists).</p>
<p>But if sloppy markup continues to be allowed to go live, accessibility, indexing, barrier to entry and standards as a whole are all impacted in a negative manner in my eyes. I just don't see who it benefits. Certainly not the new markup coder who is trying to figure this stuff out for the first time but can't because a browser is incorrectly guessing at what his sloppy syntax is supposed to mean rather than simply pointing out where it needs to be corrected before rendering anything at all.</p>
<p>If the browser devs don't think that's good enough for less expeirenced aspiring web developers, all that's really needed is the equivalent of an SGML spellchecker that suggests rather than automatically assumes it knows the proper code. Although I'd expect most new devs could make do with something similar to the validation process.</p>
<p>So help me out here. Am I under some sort of mistaken impression about how things work or what the W3Cs goals are? I'd love to have a better understanding of everybody's priorities in these matters, especially the browser devs (MS mostly) are.</p>
<p>Thanks for your response thus far and feel free to direct me to a more appropriate place for this discussion if there is one. It just seems to me like strict syntax is win-win for everybody and I don't see the cons of it.</p>
</div>
</div>
<div class="comment" id="comment-120742">
<p class="comment-meta" id="c120742">
<span class="comment-meta-author"><strong>Karl Dubost <a class="commenter-profile" href="http://www.w3.org/People/karl/"><img alt="Author Profile Page" src="http://www.w3.org/QA/sununga/mt-static/images/comment/mt_logo.png" width="16" height="16" /></a></strong></span>
<span class="comment-meta-date"><a href="#c120742">#</a> 2008-03-05</span>
</p>
<div class="comment-bulk">
<blockquote><p>I've been trying to understand whether I've misunderstood the goal of standards all along or if there's been a change of plan. </p></blockquote>
<p>The goal of a standard is to be implemented by a good share of the market so that people can benefit of smooth interoperability when they are working with documents. It is a practical exercise with social, economical, technical constraints.</p>
<blockquote><p>But if sloppy markup continues to be allowed to go live, accessibility, indexing, barrier to entry and standards as a whole are all impacted in a negative manner in my eyes. I just don't see who it benefits.</p></blockquote>
<p>I will try to use another metaphor, because there is a misunderstanding. </p>
<p>In my native language, French, I do mistakes (typo, grammar, etc.). The rules for French are strict and defined. Someone who is listening to me or who is reading me is still able to understand me even when I do typos and grammar errors (except if my content becomes really garbage). The person has applied an automatic recovery process to make the discussion possible. In a teaching context, if the person is a professor, she/he will fix my mistakes (note that he/she has been able to understand my broken content in the first place). My <strong>author responsibility</strong> is to create a correct content following the rules.</p>
<p>There are billions of documents (<strong>95%</strong>) on the Web with incorrect syntax. Two solutions:</p>
<ol>
<li>Browsers stop to process any documents which is written with an incorrect syntax. It means that most of the Web sites on the Web will not be displayed anymore, your favorite travel agency, your favorite search engine, etc. <em>With the previous metaphor, nobody understands you as soon as you make a mistake.</em></li>
<li>We create a specification which explains to browsers, fixing libraries to recover the content available on the Web in an <strong>interoperable way</strong>. <em>With the previous metaphor, everyone has a formal process to recover what you said incorrectly. Useful for teachers (validators, checkers), useful for your buddies (browsers).</em></li>
</ol>
<p><strong>That said</strong> nobody forbids you to apply your <strong>author responsibility</strong> and creates strict markup. The content model of HTML 5 (rules for writing in html and xhtml) is not yet finished. A specification which makes it obvious for authors is needed. A volunteer editor, who commits time, is what we need for now.</p>
</div>
</div>
<div class="comment" id="comment-120849">
<p class="comment-meta" id="c120849">
<span class="comment-meta-author"><strong>Tom Aman </strong></span>
<span class="comment-meta-date"><a href="#c120849">#</a> 2008-03-05</span>
</p>
<div class="comment-bulk">
<p>First of all, most of the comments here refer to browsers. Instead of browsers, think user-agents. While it is reasonable to continue to have user-agents attempt to fix bad html, I think it would be great to at last insist that any new version of HTML <strong>MUST</strong> be valid. One reason that there is so much bad code out there is that user-agents (mainly browsers) have been so forgiving and have done their best to cope with errors by guessing at the repair and many page creators never validate their code (often are not aware that W3C offers free validation). The problem with allowing the errors is that it makes it difficult to write any user-agent to cope, greatly increasing the code needed to parse a document and, at the same time, slowing the rendering. In addition, allowing the errors will just perpetuate the present situation.</p>
<p>Essentially, we can't do much with existing documents except carry on as we have and display the pages as best we can but we can insist that any document that purports to be HTML 5 or higher will NOT display unless the markup is correct (and good browsers will, as a minimum, identify the line containing the error, preferably will also tell what is wrong).</p>
</div>
</div>
<div class="comment" id="comment-121757">
<p class="comment-meta" id="c121757">
<span class="comment-meta-author"><strong>Karl Dubost <a class="commenter-profile" href="http://www.w3.org/People/karl/"><img alt="Author Profile Page" src="http://www.w3.org/QA/sununga/mt-static/images/comment/mt_logo.png" width="16" height="16" /></a></strong></span>
<span class="comment-meta-date"><a href="#c121757">#</a> 2008-03-09</span>
</p>
<div class="comment-bulk">
<p>The spec already mandates that the content which is produced must be valid.</p>
<p>For the second part of your comment, do no display content written for HTML 5 which is invalid? How do you know if an invalid document has been written with HTML 4.01, HTML 5 or nothing specific in mind? It's almost impossible to know that except if you are the author yourself (or the tool which is producing the content.)</p>
</div>
</div>
<div class="comment" id="comment-164823">
<p class="comment-meta" id="c164823">
<span class="comment-meta-author"><strong>Strick </strong></span>
<span class="comment-meta-date"><a href="#c164823">#</a> 2008-09-03</span>
</p>
<div class="comment-bulk">
<p><blockquote><p>How do you know if an invalid document has been written with HTML 4.01, HTML 5 or nothing specific in mind? It's almost impossible to know that except if you are the author yourself (or the tool which is producing the content.)</p></blockquote></p>
<p>All they would have to do is add some sort of attribute to an existing tag to put what spec you are using. (Kinda like what MS is doing with IE 8 <a href="http://support.microsoft.com/kb/956197" rel="nofollow"><a href="http://support.microsoft.com/kb/956197" rel="nofollow">http://support.microsoft.com/kb/956197</a></a></p>
<p>I'm all for forcing the new standard. I'm tired of trying to maintain code that uses tags to design. </p>
</div>
</div>
<div class="comments-open" id="comments-open">
<h3 class="comments-open-header">Leave a comment</h3>
<div class="comments-open-moderated">
<p>
Note: this blog is intended to foster <strong>polite
on-topic discussions</strong>. Comments failing these
requirements and spam will not get published. Please,
enter your real name and email address. Every
individual comment is reviewed by the W3C staff.
This may take some time, thank you for your patience.
</p>
<p>
You can use the following HTML markup (a href, b, i,
br/, p, strong, em, ul, ol, li, blockquote, pre)
and/or <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a>.</p>
</div>
<div id="comments-open-data">
<form method="post" action="http://www.w3.org/QA/sununga/beach.pl" id="comments-form">
<h4>Your comment</h4>
<div id="comments-open-text">
<textarea id="comment-text" name="text" rows="20" cols="100"></textarea><br />
<label for="comment-text">Write your comment text here. Remember, keep the discussion on topic and courteous.</label>
</div>
<h4>About you</h4>
<div id="comment-form-name">
<input type="hidden" name="static" value="1" />
<input type="hidden" name="entry_id" value="69" />
<input type="hidden" name="__lang" value="en" />
<label for="comment-author">Your Name</label>
<input id="comment-author" name="author" size="30" value="" />
</div>
<div id="comment-form-email">
<label for="comment-email">Your Email Address</label>
<input id="comment-email" name="email" size="30" value="" />
</div>
<div id="comments-open-footer">
<input type="submit" accesskey="s" name="post" id="comment-submit" value="Submit" />
</div>
</form>
</div>
</div>
<p id="gentime">This page was last generated on $Date: 2011/12/16 02:58:30 $</p>
</div><!-- End of "main" DIV. -->
<address>
This blog is written by W3C staff and working group participants,<br />
and maintained by <a href="/People/CMercier/">Coralie Mercier</a>.<br />
Authorized parties may <a href="/QA/new">log in</a> to create a new entry.<br/>
<span id="poweredby">Powered by Movable Type, magpierss and a lot of Web Technology</span>
</address>
<p class="copyright">
<a rel="Copyright" href="http://www.w3.org/Consortium/Legal/ipr-notice#Copyright">Copyright</a> © 1994-2011
<a href="http://www.w3.org/"><acronym title="World Wide Web Consortium">W3C</acronym></a>®
(<a href="http://www.csail.mit.edu/"><acronym title="Massachusetts Institute of Technology">MIT</acronym></a>,
<a href="http://www.ercim.eu/"><acronym title="European Research Consortium for Informatics and Mathematics">ERCIM</acronym></a>,
<a href="http://www.keio.ac.jp/">Keio</a>),
All Rights Reserved.
W3C <a href="http://www.w3.org/Consortium/Legal/ipr-notice#Legal_Disclaimer">liability</a>,
<a href="http://www.w3.org/Consortium/Legal/ipr-notice#W3C_Trademarks">trademark</a>,
<a rel="Copyright" href="http://www.w3.org/Consortium/Legal/copyright-documents">document use</a>
and <a rel="Copyright" href="http://www.w3.org/Consortium/Legal/copyright-software">software licensing</a>
rules apply. Your interactions with this site are in accordance
with our <a href="http://www.w3.org/Consortium/Legal/privacy-statement#Public">public</a> and
<a href="http://www.w3.org/Consortium/Legal/privacy-statement#Members">Member</a> privacy
statements.
</p>
</body>
</html>