why-html-5-matters.html 68.1 KB
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081
<?xml version="1.0"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  <head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
    <style type="text/css" media="all">
    @import "/QA/2006/01/blogstyle.css";
    </style>
    <meta name="keywords" content='' />
    <meta name="description" content="This is a simple story. The story of an HTML bug. Like every stories, it could start with… Once upon a time, there was a bug. The bug and its consequences A known HTML page contains a similar piece of..." />
    <meta name="revision" content="$Id: why-html-5-matters.html,v 1.128 2011/12/16 02:58:30 gerald Exp $" />    
   <link rel="alternate" type="application/atom+xml" title="Atom" href="http://www.w3.org/QA/atom.xml" />
   <link rel="alternate" type="application/rss+xml" title="RSS 1.0" href="http://www.w3.org/QA/news.rss" />   
   <title>Why HTML 5 Specification Matters? - W3C Blog</title>

   <link rel="start" href="http://www.w3.org/QA/" title="Home" />
   <link rel="prev" href="http://www.w3.org/QA/2007/07/html_classes_of_products_and_a.html" title="HTML Classes of Products and Authoring" />
   <link rel="next" href="http://www.w3.org/QA/2007/07/the_way_of_web_standards.html" title="Web Standards Do - the Way of Web Standards" />

   <!--
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
         xmlns:trackback="http://madskills.com/public/xml/rss/module/trackback/"
         xmlns:dc="http://purl.org/dc/elements/1.1/">
<rdf:Description
    rdf:about="http://www.w3.org/QA/2007/07/why-html-5-matters.html"
    trackback:ping="http://www.w3.org/QA/sununga/mt-tb.cgi/65"
    dc:title="Why HTML 5 Specification Matters?"
    dc:identifier="http://www.w3.org/QA/2007/07/why-html-5-matters.html"
    dc:subject="HTML"
    dc:description="This is a simple story. The story of an HTML bug. Like every stories, it could start with… Once upon a time, there was a bug. The bug and its consequences A known HTML page contains a similar piece of..."
    dc:creator="Karl Dubost"
    dc:date="2007-07-06T06:30:23+00:00" />
</rdf:RDF>
-->

    <!-- <script type="text/javascript" src="http://www.w3.org/QA/mt.js"></script>-->

</head>
<body class="layout-one-column">
      <div id="banner">
      <h1 id="title">
	<a href="http://www.w3.org/"><img height="48" alt="W3C" id="logo" src="http://www.w3.org/Icons/WWW/w3c_home_nb" /></a>
W3C Blog
</h1>
    </div>
    
    <ul class="navbar" id="menu">
        <li><strong><a href="/QA/" title="W3C Blog Home">[ W3C Blog ]</a></strong></li>
        <li><a href="/QA/Library/" title="Documents and Publications on Web and Quality">Documents</a></li>
        <li><a href="/QA/Tools/" accesskey="3" title="Validators and other Tools">Tools</a></li>
        <li><a href="/2007/12/qa-blog-help/index#feedback">Feedback</a></li>
    </ul>
<div id="searchbox">
<form method="get" action="http://www.google.com/custom" enctype="application/x-www-form-urlencoded">
<p id="formbox"><input type="text" size="15" class="textfield" name="q" accesskey="E" maxlength="255" /> <input type="submit" class="submitfield" value="Search" id="goButton" name="sa" accesskey="G" /> <input type="hidden" name="cof" value="T:black;LW:72;ALC:#ff3300;L:http://www.w3.org/Icons/w3c_home;LC:#000099;LH:48;BGC:white;AH:left;VLC:#660066;GL:0;AWFID:0b9847e42caf283e;" /><input type="hidden" id="searchW3C" name="sitesearch" checked="checked" value="www.w3.org/QA" /><input type="hidden" name="domains" value="www.w3.org/QA" /></p>
</form>
</div>


    <div id="main"><!-- This DIV encapsulates everything in this page - necessary for the positioning -->

                     <p class="content-nav">
                        <a href="http://www.w3.org/QA/2007/07/html_classes_of_products_and_a.html">&laquo; HTML Classes of Products and Authoring</a> |
                        <a href="http://www.w3.org/QA/">Main</a>
                        | <a href="http://www.w3.org/QA/2007/07/the_way_of_web_standards.html">Web Standards Do - the Way of Web Standards &raquo;</a>
                     </p>

                        <h2 class="entry-header">Why HTML 5 Specification Matters?</h2>
                           <div class="entry-body">
                              <p>This is a simple story. The story of an HTML bug. Like every stories, it could start with… Once upon a time, <a href="http://bugs.webkit.org/show_bug.cgi?id=12740" title="Bug 12740 - www.bmw.com page doesn't work">there was a bug</a>.</p>

<h3 id="bug">The bug and its consequences</h3>
<p>A known HTML page contains a similar piece of code:</p>

<pre class="html">
&lt;div style=&quot;display:none&quot;&gt;
  &lt;table&gt;
    &lt;div&gt;
      &lt;table&gt;
      &lt;/table&gt;
    &lt;/div&gt;
&lt;/div&gt;
</pre>

<p>The HTML code is invalid. It means that a browser which "reads" this page has to recover the errors and recreate something logical to simply display it to the user in the best case, to apply javascript and CSS code in the worst case. The browser implementer is then facing a question. How do I recreate the structure of the content? How do I catch the error and make it something usable?</p>

<p>Some browsers will find a recovery strategy, but not necessary the same. Some browsers will fail on the page. It means in the end two things.</p>

<ul>
    <li>Users with unpredictable results, then usability problems and erosion of trust.</li>
    <li>Interoperability issues for browser implementers, and then a risk of losing market share (It is working with browser A and not browser B.)</li>
</ul>

<h3 id="repair">How to repair?</h3>

<p>HTML 4.01 specification is not that much help in this case. It doesn't define a precise <a href="http://www.w3.org/TR/html401/appendix/notes.html#h-B.1" title="Performance, Implementation, and Design Notes">error recovery mechanism for invalid documents</a>. So the browser implementer has to create its own strategy with the consequences we have just talked about.</p>

<p>HTML 5.0 Editor's draft defines a very precise mechanism for <a href="http://www.w3.org/html/wg/html5/#stack">recovering invalid markup</a>. As we <a href="http://bugs.webkit.org/show_bug.cgi?id=12740#c11" title="Bug 12740 - www.bmw.com page doesn't work">can see in the comment about the bug</a>, Dave Hyatt says: <q>Easy, the html5 spec covers this.</q></p>

<p>The browser implementer had clear instructions for this type, was able to implement it, and then to create an interoperable recovery system for this type of mistake. The Web users finally were able to access the Web site without troubles and in the same way than with other browsers. <strong><a href="http://www.w3.org/html/wg/html5/">HTML 5</a> Specification matters because it creates more interoperability when recovering from errors</strong>.</p>


                           </div>
                           <div id="more" class="entry-more">
                              
                           </div>
                       <p class="postinfo">Filed by <a href="http://www.w3.org/People/karl/">Karl Dubost</a> on July  6, 2007  6:30 AM in <a href="http://www.w3.org/QA/archive/technology/html/">HTML</a><br />
<span class="separator">|</span> <a class="permalink" href="http://www.w3.org/QA/2007/07/why-html-5-matters.html">Permalink</a>
                                 | <a href="http://www.w3.org/QA/2007/07/why-html-5-matters.html#comments">Comments (44)</a>
                                 | <a href="http://www.w3.org/QA/2007/07/why-html-5-matters.html#trackback">TrackBacks (0)</a>
</p>



<h3 class="comments-header" id="comments">Comments</h3>
<div class="comment" id="comment-53319">
<p class="comment-meta" id="c053319">
<span class="comment-meta-author"><strong>Vlad Alexander </strong></span>
<span class="comment-meta-date"><a href="#c053319">#</a> 2007-07-06</span>
</p>
<div class="comment-bulk">
<p>Hi Karl,</p>

<p>Unfortunately, this is a very one-sided perspective. The consequences of silent error handling in this case are:</p>

<ol>
<li><p>Invalid HTML markup is still online and not fixed.</p></li>
<li><p>This does nothing to make the Web page render correctly in current / legacy browsers.</p></li>
<li><p>The Web page author learned nothing. He/she remains ignorant of the mistake and will continue to make similar mistakes on other pages.</p></li>
</ol>

</div>
</div>


<div class="comment" id="comment-53363">
<p class="comment-meta" id="c053363">
<span class="comment-meta-author"><strong>thacker </strong></span>
<span class="comment-meta-date"><a href="#c053363">#</a> 2007-07-06</span>
</p>
<div class="comment-bulk">
<p>Dubost--</p>

<p>Thank you very much for the hard example that supports your position on the need for HTML 5.0 to efficiently render "hobby content" within browsers. That one example, for what it is worth, sold me on the need to further the development of the 5.0 spec.</p>

<p>I still, adamantly, support the basis for Holzschlag's call to let things catch up to full implementation of existing specs before attempts are made to implement any parts of the proposed 5.0 spec.</p>

<p>Whether, I adopt HTML 5.0 -- that is too far down the road. XHTML 1.1 works for me, my clients and the clients' markets and customers. [Does it very well and without any "angry" e-Mails.] </p>

<p>Again, thanks for that hardcore example.</p>

</div>
</div>


<div class="comment" id="comment-54238">
<p class="comment-meta" id="c054238">
<span class="comment-meta-author"><strong>karl dubost, W3C </strong></span>
<span class="comment-meta-date"><a href="#c054238">#</a> 2007-07-09</span>
</p>
<div class="comment-bulk">
<p>Hi Vlad,</p>

<p>I do not disagree with you. But let's be practical in a business sense. </p>

<ol>
<li><p>Invalid markup is still online, and most of the time, it will stay online for a long time. Bear with me, I'm all for fixing the markup, but unfortunately I do not see any practical solutions to do that.</p></li>
<li><p>What would be your practical proposal for "no silent recovery"?</p></li>
<li><p>Legacy browsers. Indeed that is a very good point to keep in mind. Agreed with you.</p></li>
<li><p>The Web page author will not learn anything with a browser. Or at least in a common web browser. The issue here is that Web authors should use appropriate tools. Either </p></li>
<li><p>appropriate authoring tools</p></li>
<li>appropriate quality checking in the development process</li>
<li>appropriate checking tools of the code.</li>
</ol>

<p>Browsers are not tools to check your work. There are the very final part step to see the rendering. Do not trust browsers. They are meant to be used by everyone.</p>

</div>
</div>


<div class="comment" id="comment-54302">
<p class="comment-meta" id="c054302">
<span class="comment-meta-author"><strong>Vlad Alexander </strong></span>
<span class="comment-meta-date"><a href="#c054302">#</a> 2007-07-09</span>
</p>
<div class="comment-bulk">
<p>Hi Karl,</p>

<blockquote>
  <p>What would be your practical proposal for "no silent recovery"?
  Let's back up and look at the big picture. The real discussion is about the future of the Web. Let's take W3C's vision of the future - the Semantic Web. If you can honestly tell me that a Semantic Web can be successfully built on top of invalid HTML, then I will take back my objections to your original post.</p>
</blockquote>

<p>However, I suspect most future W3C technologies will not work well in an invalid HTML world. If this is the case, then HTML 5 is a diversion from building the future Web.</p>

<p>So how do you make valid markup? From our experience as an authoring tool vendor, "active error feedback" is the only way to ensure content is authored according to specification.</p>

<p>What is the practical way forward towards a Web with valid markup? You need a new spec that is not backwards compatible. Specs don't need to be backwards compatible. It is user-agents that need to be backwards compatible by supporting multiple specs.</p>

</div>
</div>


<div class="comment" id="comment-54414">
<p class="comment-meta" id="c054414">
<span class="comment-meta-author"><strong>thacker </strong></span>
<span class="comment-meta-date"><a href="#c054414">#</a> 2007-07-09</span>
</p>
<div class="comment-bulk">
<p>Of course semantic Internet content can be built. What logical reasons are there that the entire Web needs to be homogenous -- it doesn't need nor should it be.</p>

<p>The BMW example, if Bavarian Motor Works wishes to take advantage of the benefits of semantic content, avail itself to future technologies and communicate with the broadest spectrum of its customer base, BMW will have to place the same level of quality of engineering into their Internet communication as they do into their product line.  The market place will decide that .. not you, I nor the W3C.</p>

<p>The main thrust of HTML 5 is to manage non-standard compliant markup, as I understand it, albeit with some extra bells and whistles tossed in. </p>

<p>To believe that the Web is broken and by some magical formula and divine right it will get 'fixed' is a pretty big stretch and egotistical view for anyone who believes they or a collective group has that power or capability.</p>

<p>If HTML 5, ultimately, eases the way for interoperability of non-standard content so that user agents, technologies, whatever, can re-focus efforts on standards compliance, communication tools and technologies built around and upon that compliance and thus move the Web field forward, I am all for it. </p>

</div>
</div>


<div class="comment" id="comment-54611">
<p class="comment-meta" id="c054611">
<span class="comment-meta-author"><strong>thacker </strong></span>
<span class="comment-meta-date"><a href="#c054611">#</a> 2007-07-11</span>
</p>
<div class="comment-bulk">
<p>No intention to hog up this particular post--</p>

<p>One significant concern was the possibility that a new HTML 5.0 spec would detract or become a defacto standard to XHTML and thus  stall development.</p>

<p>The recent interview between Berners-Lee and IDG Now put that concern to rest:</p>

<p><a href="http://www.itworld.com/Tech/4535/070709future/pfindex.html" rel="nofollow">http://www.itworld.com/Tech/4535/070709future/pfindex.html</a></p>

</div>
</div>


<div class="comment" id="comment-54690">
<p class="comment-meta" id="c054690">
<span class="comment-meta-author"><strong>Dark Phoenix </strong></span>
<span class="comment-meta-date"><a href="#c054690">#</a> 2007-07-12</span>
</p>
<div class="comment-bulk">
<p>Actually, when it comes to error checking, I am of the opinion that the browser should record errors SOMEWHERE.  Maybe browsers ought to have a mode where every HTML error gets displayed to the user (preferably which can be turned off, so people don't start complaining about being hassled)?</p>

</div>
</div>


<div class="comment" id="comment-54712">
<p class="comment-meta" id="c054712">
<span class="comment-meta-author"><strong>Martin Hassman </strong></span>
<span class="comment-meta-date"><a href="#c054712">#</a> 2007-07-13</span>
</p>
<div class="comment-bulk">
<p>html5lib logs parsing errors, see at the bottom of <a href="http://james.html5.org/cgi-bin/parsetree/parsetree.py?uri=http%3A%2F%2Fbugs.webkit.org%2Fattachment.cgi%3Fid%3D14511" rel="nofollow">http://james.html5.org/cgi-bin/parsetree/parsetree.py?uri=http%3A%2F%2Fbugs.webkit.org%2Fattachment.cgi%3Fid%3D14511</a> </p>

<p>If browsers start to log these errors like they do with JavaScript and CSS errors, it will be really great.</p>

</div>
</div>


<div class="comment" id="comment-55142">
<p class="comment-meta" id="c055142">
<span class="comment-meta-author"><strong>Michael Daines </strong></span>
<span class="comment-meta-date"><a href="#c055142">#</a> 2007-07-15</span>
</p>
<div class="comment-bulk">
<p>Maybe this is naive, but isn't it possible that we can have the semantic web without worrying so much about whether documents are valid? For example, given a user agent that uses or is informed in some way by (let's say) the HTML5 spec, how badly would you have to mess up your hCalendar to make it too broken or ambiguous to interpret or use?</p>

</div>
</div>


<div class="comment" id="comment-56486">
<p class="comment-meta" id="c056486">
<span class="comment-meta-author"><strong>Stuart Jones </strong></span>
<span class="comment-meta-date"><a href="#c056486">#</a> 2007-07-23</span>
</p>
<div class="comment-bulk">
<p>With regards to feedback of errors or recovery of errors...</p>

<p>Developers should have feedback of errors in their code - but that does not necessarily mean that you can't have browsers that are able to produce consistent content event if there are some errors in the underlying code.
All it means is that there needs to be a differentiation between your average user's browser and a developer's browser.</p>

<p>This differentiation is already starting to come about with some of the developer plugins that are available for some browsers.</p>

</div>
</div>


<div class="comment" id="comment-56607">
<p class="comment-meta" id="c056607">
<span class="comment-meta-author"><strong>Vijayakumar Subburaj </strong></span>
<span class="comment-meta-date"><a href="#c056607">#</a> 2007-07-24</span>
</p>
<div class="comment-bulk">
<p>Hi Karl,</p>

<p>Recovering from errors?! First, why allowing errors?!</p>

<p>Why not just say the document is invalid, like xml?!</p>

</div>
</div>


<div class="comment" id="comment-56611">
<p class="comment-meta" id="c056611">
<span class="comment-meta-author"><strong>Karl Dubost, W3C </strong></span>
<span class="comment-meta-date"><a href="#c056611">#</a> 2007-07-24</span>
</p>
<div class="comment-bulk">
<p>Hi,</p>

<p>you said why recovering and not saying errors right away. There are two reasons: browser market and companies market.</p>

<ul>
<li>companies market: </li>
</ul>

<p>An advertisement in a magazine gives an URI to your commercial web site. Your web site is dependent on many third parties softwares and employees creating content. In one part of this software, someone has to create small chunks of HTML for editing the Web site. The person is tired, publishes the content quickly before the week-end but unfortunately it is invalid. The consumer tries to access the page without success, it shows a big error message instead. The consumer is going to the site of your competitor.</p>

<ul>
<li>Browsers market: </li>
</ul>

<p>The company Opeziri makes a very strict browsers which doesn't accept any errors in the markup. Each page which is bogus is not displayed and has a big red message saying error. Your grand father is using this Opeziri browser but he's getting tired of using it. Something around 95% of the Web is not viewable because everything is invalid, though he doesn't know that. He just sees the error message. Then there is a competitor Moslacker which accepts all pages. Moslacker starts to get more marketshare than Opeziri, which sees its business model going down.</p>

</div>
</div>


<div class="comment" id="comment-56650">
<p class="comment-meta" id="c056650">
<span class="comment-meta-author"><strong>Gustaf Liljegren </strong></span>
<span class="comment-meta-date"><a href="#c056650">#</a> 2007-07-24</span>
</p>
<div class="comment-bulk">
<p>First, I think it's a great idea to standardize how browsers should behave when coming upon broken documents. Second, I think we all ought to strive for a well-formed web, because well-formed code is easier to read, for humans and machines alike.</p>

<p>It's obvious that browsers can't parse HTML as strict as we do other XML documents. However, I don't agree that browsers are therefore not good for teaching users how to write well-formed and valid documents. Browsers could assist users to a great extent on this task. Here's how:</p>

<p>Whenever the browser encounters an error in the syntax or grammar, make a small icon appear on the status bar. Hover your mouse pointer over it, and it says "This page is not valid. Click to validate this page". Click and you find yourself at W3C's then improved validator, which tells you not only what is wrong, but why it's wrong and what ought to be done about it.</p>

<p>Whenever this icon appears on a public site, it puts the expert author to shame with his fellows. It lets the curious hobby user learn from other's mistakes. And most importantly: it lets your grand father ignore parsing errors. It doesn't prevent the page from rendering, along the lines of HTML 5 error handling. If you are the author, you get valuable feedback from the end user environment.</p>

<p>It is my opinion that this kind of feedback ought to be a SHOULD requirement in the HTML 5 spec.</p>

</div>
</div>


<div class="comment" id="comment-56751">
<p class="comment-meta" id="c056751">
<span class="comment-meta-author"><strong>nomad </strong></span>
<span class="comment-meta-date"><a href="#c056751">#</a> 2007-07-25</span>
</p>
<div class="comment-bulk">
<p>Hi Karl,</p>

<p>Re your first case (publish invalid document, customer goes to competitor), that's <em>exactly</em> what I'd like to see. This reason puts market pressure on businesses to publish only valid documents and to check their validity, and that is a good thing.</p>

<p>And if you publish invalid document you risk having your document inaccessible anyway, because not every error is recoverable.</p>

<p>By specifying unified error recovery mechanism you give authors more leeway to ignore their error, practically provoking them to use invalid HTML.</p>

</div>
</div>


<div class="comment" id="comment-56753">
<p class="comment-meta" id="c056753">
<span class="comment-meta-author"><strong>Sean Farrell </strong></span>
<span class="comment-meta-date"><a href="#c056753">#</a> 2007-07-25</span>
</p>
<div class="comment-bulk">
<p>I am reading over and over and over the prase "that your grandmother / grandfather should be able to publish in the web" as a excuse to have invalid HTML hanging around. This is total nonsese!</p>

<p>You can differenciate two type of people, coder and non-coders. Coder wither write the HTML or tools (scripts) that output HTML and non-coders only use tools that generate HTML. A non-coder will never write a line (tag) of HTML, nor understand it. He/She simple does not have to, since there are tools that can be used. (If the tool does not outbut 100% valid HTML it is broken...)</p>

<p>The reason why 95% of the web is invalid (not broken) ist because browsers allowed showing broken pages. The creators of tools simple did not care to check if their tool wrote 100% valid HTML.
The other problem was that in the erly spect the emphasis was never put on the actual way to display HTML and so browsers differed slighlt. Additionally browser extentions did not make life easyer.</p>

<p>My proposal to get people to comply in the furure is to display a red bar at the top of the page if it is illformed, just like pop-up blockers do. The content is still viewable, but the authors take a little blame. There is a good chance that the manager that browses the corporate site gos to the web develeoper and aks "Why is our site non conforment?".</p>

</div>
</div>


<div class="comment" id="comment-56767">
<p class="comment-meta" id="c056767">
<span class="comment-meta-author"><strong>Vlad Alexander </strong></span>
<span class="comment-meta-date"><a href="#c056767">#</a> 2007-07-25</span>
</p>
<div class="comment-bulk">
<p>Can we give names to different error handling options so that it's easier to discuss?</p>

<p>Gustaf, let's call your suggestion "passive error feedback". Let's call the XML approach "active error feedback". And the HTML approach "no error feedback".</p>

<p>Karl, nobody is suggesting that browsers should use "active error feedback" for existing specs like HTML 4.x, but only for new specs. Web site developers can to use the new spec or the old spec. If all browser vendors agree to respect the rules of the new spec, then market share is not affected.</p>

</div>
</div>


<div class="comment" id="comment-56775">
<p class="comment-meta" id="c056775">
<span class="comment-meta-author"><strong>David </strong></span>
<span class="comment-meta-date"><a href="#c056775">#</a> 2007-07-25</span>
</p>
<div class="comment-bulk">
<p>Gustaf is correct that a browser should notify the user in some manner that it is "fixing" the document in order to have it display "properly". I use quotes because it is impossible to correctly guess the intended results 100% of the time. I also know many web developers that use a browser as their primary means of testing their code. Their main concern is how the page looks in browsers people are actually using, not whether it validates. Also, checking that it validates is an additional step, one which might break the layout when the HTML is modified to comply.
This brings me to my ultimate point, which is a bit off-topic: Why is the W3C developing yet another specification when the available browsers fail to support the current ones? (source: <a href="http://www.webdevout.net/browser-support-summary" rel="nofollow">http://www.webdevout.net/browser-support-summary</a> ) Apparently I live in a fantasy world, because I would like the ability to create a web page that both validates and looks how I intended in browser X and know that everyone else will see the same thing I see as long as they are using a browser that fully supports the correct specifications. This may be an unreasonable request, but why doesn't the W3C spend their time and resources to create an opensource reference browser that web developers can use to test page layout and browser developers can use as a guide for fixing their current browsers?</p>

</div>
</div>


<div class="comment" id="comment-56983">
<p class="comment-meta" id="c056983">
<span class="comment-meta-author"><strong>Tony E </strong></span>
<span class="comment-meta-date"><a href="#c056983">#</a> 2007-07-26</span>
</p>
<div class="comment-bulk">
<p>I'm personally a bit stuck on the fence on whether a browser should, or should not fix or auto-repair website code. I am leaning on the thought that browsers should not, however, simply for the fact that if browsers allow for broken code, why fix it?  I recall old Netscape 2 vs. Internet Explorer debates when IE auto-repaired tables and Netscape Communicator did not, for example.  People designing for Netscape &amp; similar browsers knew it worked properly, "for the most part".</p>

<p>My train of thought on this issue is like that of purchasing a vehicle.  If you buy a brand new car, and the door doesn't open. Do you send it back? or just roll down the window with your neat remote window open/close mechanism, and climb in?  Allowing broken html to exist is the same to me, as climbing in the window.  The manufacturer has no clue they just sold a defective product, because you're driving away with a smile on your face.</p>

<p>I fail to see how that is acceptable.  Let Microsoft do it's own thing, they will anyway (in my humble opinion)... but browsers like Firefox/Mozilla, Netscape, Opera, Safari, they seem to be willing to work with standards, why not have them choke on errors? Then people will build better code.  Perhaps browser developers can opt to add an option "Enable Code/DOM debugging?" so it <em>will</em> popup a window describing any code that choked or is incorrect?</p>

<p>Just an idea and train of thought of one small time web developer.  And I also do believe a good developer runs their code through Tidy or some other code checker, but obviously Joe Shmoe 14yo just got hired by Jim Bob's Used Car Sales, and isn't a pro.</p>

</div>
</div>


<div class="comment" id="comment-57145">
<p class="comment-meta" id="c057145">
<span class="comment-meta-author"><strong>thacker </strong></span>
<span class="comment-meta-date"><a href="#c057145">#</a> 2007-07-27</span>
</p>
<div class="comment-bulk">
<p>I am not so sure that, while handy for developers for example, that any additional burdens should be placed upon browsers and their development to notify a Web user of invalid code.</p>

<p>A browser's primary function is to serve the user of the Internet. There are enough tools designed and in-place for the developer to use in generating standards compliant code.</p>

<p>What will drive and continue to drive increased production of standard compliance is education at the developer level and the market and economic pressures that are exerted upon the business and how it impacts their performance and relationships with their customers.</p>

<p>Browser developers need to focus on implementation of standards, evolution of standards and browser based technologies and upon security for the purpose of delivering Web based communication to the end user. That in and of itself is more than enough to keep their plate full.</p>

<p>However, for the hobbyist and for businesses, such as BMW which was referenced by Dubost, it is the responsibility and function, I believe, of the CMS and development applications to serve the function of notifying the developer of invalid code.</p>

<p>David--</p>

<p>You came close to answering your own question of why another spec:</p>

<blockquote><p>checking that it validates is an additional step, one which might break the layout when the HTML is modified to comply.</p></blockquote>

<p>The primary function of the HTML 5 spec is to adapt to the ways that the vast majority of content is being coded and allow it to comply to a standard.</p>

<p>In theory, and hopefully practicality, this  will ease the way for browsers to render, for example, tag soup, consistently.</p>

<p>There are a lot of hurdles, as history as pointed out, to achieve and implement any standard. </p>

<p>You make a very valid point, along with Molly Holzschlag, that full implementation of all existing standards should and needs to happen, quickly.</p>

</div>
</div>


<div class="comment" id="comment-57349">
<p class="comment-meta" id="c057349">
<span class="comment-meta-author"><strong>eekee </strong></span>
<span class="comment-meta-date"><a href="#c057349">#</a> 2007-07-28</span>
</p>
<div class="comment-bulk">
<p>Lot of wrangling over details in these comments, with the bulk of opinion seeming to go to something that I believe would be actively harmful to individuals trying to earn a living. Do you guys who want the web browser to flag bad pages think it's alright to publicly show a working person mistakes to the world? Granted, sloppy work is no good thing, but everyone makes mistakes, and in correcting those mistakes one reaches a point of diminishing returns:</p>

<p>To the individual site creator, the effort put in matters, and I don't want to have my judgment of an individual colored by his or her html skill!</p>

<p>Likewise with the company, good html coders I'm sure cost money, and I do not want to have my judgment of a company influenced by how good a coder –how good a shop window fitter – they could afford!</p>

<p>I also, as my last and least point, don't want my browsing being bothered by a little icon in the corner saying "Oh dear, this web page author ade a mistake!" I do, however, want to be able to enable just such an icon for checking my own pages. It would be a handy browser feature but it would be a browser feature, not something that belongs in the standard. (Of course, if one wishes to see the validity of every page, one could simply leave that feature enabled.)</p>

<p>I read the original post as something with a much, much nobler goal than pointing fingers. HTML 5 will require that all browsers respond to any given error in the same way. This, if implemented properly, removes an artificial and meaningless source of differences between IE, Gecko, Opera, khtml, and all other html renderers out there.</p>

</div>
</div>


<div class="comment" id="comment-58041">
<p class="comment-meta" id="c058041">
<span class="comment-meta-author"><strong>Gustaf Liljegren </strong></span>
<span class="comment-meta-date"><a href="#c058041">#</a> 2007-07-31</span>
</p>
<div class="comment-bulk">
<p>Even though most of us want a more well-formed web, it appears we are divided on how to get there. One extreme is to let browsers silently fix all markup errors, like today. The other extreme is to make it a requirement for conforming browsers to refuse to show pages with syntax/grammar errors. The first doesn't lead to a more well-formed web, which is our ultimate goal. The second is not realistic, because it breaks the web.</p>

<p>Anything that breaks the web (i.e. makes much of today's web unreadable) will surely not be implemented, and for a good reason. We can't have a transitional period where most websites won't work in the latest browser. The first priority of HTML 5 must be to accomodate the masses, and that means making old tagsoup show up pretty in next-gen browsers.</p>

<p>However, browsers could still promote well-formedness in discrete ways. The icon on the status bar I suggested earlier would have this effect. People wouldn't see it unless they looked for it, but it would have a impact on webmasters. The specification doesn't need to be too specific on how to notify the user of validation errors, but it could state that a browser by default SHOULD notify the user, and give an option to validate the page.</p>

</div>
</div>


<div class="comment" id="comment-58100">
<p class="comment-meta" id="c058100">
<span class="comment-meta-author"><strong>Vlad Alexander </strong></span>
<span class="comment-meta-date"><a href="#c058100">#</a> 2007-08-01</span>
</p>
<div class="comment-bulk">
<p>Gustaf, please do not misrepresent the side that supports "active error feedback". NOBODY is suggesting that Web browsers should stop rendering HTML 4.x/XHTML 1.x Web pages because they are invalid. Supporters of "active error feedback" suggest this approach only for NEW specs.</p>

</div>
</div>


<div class="comment" id="comment-58486">
<p class="comment-meta" id="c058486">
<span class="comment-meta-author"><strong>JP Fiset </strong></span>
<span class="comment-meta-date"><a href="#c058486">#</a> 2007-08-02</span>
</p>
<div class="comment-bulk">
<p>I find myself leaning towards the comments that allow rendering of bad markup. Users want to see content that companies are offering. Creating tools that hinder this relation is not good.</p>

<p>Besides, if you have worked long enough in private companies, you can envisage the examples given above turning into situations where an expert author is coming back to the office on a long week end to fix something broken by a novice author. Please, display the page.</p>

<p>I also want to see a valid web. The discussion so far is focused on letting the  user know that a page is broken. The user can not do anything about it and I suspect he/she will not take the time to send an e-mail. </p>

<p>How about letting the originating site know that the page is broken? Could there be any mechanism put in place to help with that?</p>

</div>
</div>


<div class="comment" id="comment-58503">
<p class="comment-meta" id="c058503">
<span class="comment-meta-author"><strong>Cecil Ward </strong></span>
<span class="comment-meta-date"><a href="#c058503">#</a> 2007-08-02</span>
</p>
<div class="comment-bulk">
<p>As we all know, the reason that this situation ever came about is that early web browsers were unreasonably forgiving and these browsers were the only testing tools that non-professional web authors were exposed to. These browsers failed in their duty to web authors.
We should not keep becoming distracted by the issue of what happens when end users view invalid pages in their browsers. What is important to focus on when considering how things must change is sorting out the experience that <em>web authors</em> at home (say)  have when they are writing HTML and using their browsers as test tools.
It is time to draw a line and bring the era of pervasive broken markup to an end and this is to be achieved by a combination of measures.
One strategy towards this is to pressure browser manufacturers and vendors of other tools to include accurate and updatable validation tools in their products. This is an idea worth pursuing, but this strategy on its own will not succeed because only the more knowledgeable web authors will know to obtain such validators or know to turn them on.
Rather, I believe that it is vital to proceed as follows
(i) W3C to version-mark HTML5 now, so that forthcoming browsers can recognise it as such
(ii)    W3C to require browser manufacturers to fail with an error (just like XML) if they see an  invaliud pages bearing that version marker.
(iii)   W3C to act quickly to get browser manufacturers to sign up for this, and fast-track the release of a small spec covering this issue well ahead of the HTML5 spec timescale.  Such a spec would be small, and a technique similar to DOCTYPE-switching would serve. Aim to get it into Firefox 3 and IE8 without fail.
There is no need to worry about any bad impact on end-users' experience in this respect, because in the new scenario the web author would never have been able to unknowingly release an HTML5-version-marked page that was invalud. Failing to signal errors to home web authors was never a way of helping them, it was actually a behaviour that was letting them down, and it’s time to acknowledge that, and put an end to the old era.</p>

<p>Cecil Ward.</p>

</div>
</div>


<div class="comment" id="comment-58896">
<p class="comment-meta" id="c058896">
<span class="comment-meta-author"><strong>Olivier Wehner </strong></span>
<span class="comment-meta-date"><a href="#c058896">#</a> 2007-08-05</span>
</p>
<div class="comment-bulk">
<p>There is something spooky about these "should browsers render tag soup" discussions: Yes, this would make the live of everyone easyer. No, browser vendors will never stick to that rule. Why should they?</p>

<p>The people who write the spec do not write the browser code, the spec is not law and the browser market is too competitive to give purity, purism or beauty a chance. </p>

</div>
</div>


<div class="comment" id="comment-58983">
<p class="comment-meta" id="c058983">
<span class="comment-meta-author"><strong>mina86 </strong></span>
<span class="comment-meta-date"><a href="#c058983">#</a> 2007-08-05</span>
</p>
<div class="comment-bulk">
<p>So what, now invalid markup will have to be rendered in given way? Why is it called "invalid" then? If specification says how to render it why should anyone care to produce valid markup?</p>

<p>This only impose new restrictions on the user agents which may only do harm, ie. bloat them, make them bigger, slower and introduce new bugs.</p>

<p>After pointing an invalid markup to a web developer, s/he could say: "According to HTML5 this has to be rendered the way X Web Browser renders it so yout Y Web Browser is wrong and I'm right."</p>

<p>I don't mean to offend anyone but putting error recovery into the spec is an absurd and the second most stupid thing that happened in HTML after the FONT tag.</p>

</div>
</div>


<div class="comment" id="comment-59442">
<p class="comment-meta" id="c059442">
<span class="comment-meta-author"><strong>Francis </strong></span>
<span class="comment-meta-date"><a href="#c059442">#</a> 2007-08-08</span>
</p>
<div class="comment-bulk">
<p>I recently joined a company, working with all MS .NET developers. I was shocked to discover that none of them known that HTML/CSS Specifications existed, nor of XML and XML Schema. OMG they don't even know what W3C is.</p>

<p>Effort should be given to promote W3C Standards.</p>

</div>
</div>


<div class="comment" id="comment-60715">
<p class="comment-meta" id="c060715">
<span class="comment-meta-author"><strong>SuperKoko </strong></span>
<span class="comment-meta-date"><a href="#c060715">#</a> 2007-08-15</span>
</p>
<div class="comment-bulk">
<blockquote>
<p>
Karl, nobody is suggesting that browsers should use "active error feedback" for existing specs like HTML 4.x, but only for new specs. Web site developers can to use the new spec or the old spec. If all browser vendors agree to respect the rules of the new spec, then market share is not affected.
</p>
</blockquote>

<p>Unfortunately, you forget one thing: New specs will be recognized by new browsers, through the DOCTYPE declaration, and new browsers will be able to use "active error feedback", but existing browsers (e.g. IE6) ignores the unrecognized DOCTYPE, and will read the new HTML as if it were HTML 4.x, and will actually render something viewable, because new specs usually are quite "backward-compatible".</p>

<ol>
<li> Bad web designers who use IE6 to test their pages may use the new DOCTYPE, and think that "it works", while new browsers won't display it.</li>
<li> Users will be frustated by behavior of new browsers and claim they're buggy, and argue that IE6 does a better job, because: "It displays more pages".</li>
<li> Users will get back to IE6.</li>
<li> Compiler vendors learn the lesson, and their next browser provides no error checking or passive error checking.</li>
</ol>

<p>That happened with XHTML (though, XHTML served as application/xhtml+xml gets active error checking with some browsers). That won't happen with next specs of HTML, because browser vendors won't even try to provide active error checking: They've already learned the lesson.
</p>

<p>
Active error checking can only be put in a spec that isn't backward compatible (i.e. that current browsers don't display). XHTML served as application/xhtml+xml is one example: This MIME type is not recognized by IE6 which doesn't render it, but just asks "Do you want to save it to disk?".
</p>

<p>Passive error checking is a great idea, in my opinion. I wouldn't go as far as putting it as a SHOULD in the HTML spec. I find it more sensible to humbly request this feature to be added in specific browsers.
</p>

<blockquote>
<p>
This may be an unreasonable request, but why doesn't the W3C spend their time and resources to create an opensource reference browser that web developers can use to test page layout and browser developers can use as a guide for fixing their current browsers?
</p>
</blockquote>

<p>Because their parsing &amp; rendering bugs would become <em>de facto</em> standard.
</p>

<p>I wish browsers would use a true SGML parser. Such parsers do exist. Unfortunately, they don't have enough error recovery mechanism. An OSS SGML parser could be adapted and that would make the browser conforming to the HTML spec.
</p>

<blockquote>
<p>
So what, now invalid markup will have to be rendered in given way? Why is it called "invalid" then? If specification says how to render it why should anyone care to produce valid markup?
</p>
</blockquote>

<p>I agree. HTML is a contract between HTML document writers and user agents.</p>
<p> HTML5 contract is:</p>
<p> If you do your job (web developer) you'll get your money (proper layout in user agent), but if you do half of the job (but, you MUST not), you'll get the same amount of money.</p>
<p> Is not it equivalent to?</p>
<p> If you do half or your job or more, you'll get your money?</p>

<blockquote>
<p>
This only impose new restrictions on the user agents which may only do harm, ie. bloat them, make them bigger, slower and introduce new bugs.
</p>

<p></p></blockquote>

<p>Exactly. The <em>de facto standard</em> "extended HTML tag soup" would be so complex, have so many quirks, that it would not be accessible to small tools written in three days by a normal developer. Only big company could produce parsers able to parse one of the most complex computer language ever.
</p>

</div>
</div>


<div class="comment" id="comment-60741">
<p class="comment-meta" id="c060741">
<span class="comment-meta-author"><strong>Ether </strong></span>
<span class="comment-meta-date"><a href="#c060741">#</a> 2007-08-15</span>
</p>
<div class="comment-bulk">
<p>Well, I fail to see what are you people arguing about. The main ideas were mostly said by other, :</p>

<p>x] To create a specification of how should browsers render invalid HTML is a good idea, which should bring some uniformity to the browsers. But hey, admit it, browser don't render the same even valid documents, so where's the certainty that this time they will listen to the specifications?</p>

<p>x] I don't know why it should apply for the NEW "version" of HTML. An invalid HTML5 document shouldn't be rendered at all. That way, old invalid HTML4 pages will stay mostly the same and new HTML5 pages will be valid and no error recovery will be needed for it.</p>

<p>x] When I code an application in PHP, it doesn't even try to recover from syntax errors (which is the essence of tag soup), so why the coders of HTML should be given more? Yes, they are coders (and with all browser displaying i.e. CSS the same way, it would be easier for them to obey the specifications).</p>

<p>x] When someone cannot or doesn't want to become a coder, there are still applications that will have to create valid HTML, because when page exported from editor A won't be displayed and the same page exported from editor B will, no-one will blame HTML5 strict specifications, they'll blame editor A.</p>

</div>
</div>


<div class="comment" id="comment-60756">
<p class="comment-meta" id="c060756">
<span class="comment-meta-author"><strong>karl dubost, w3c </strong></span>
<span class="comment-meta-date"><a href="#c060756">#</a> 2007-08-15</span>
</p>
<div class="comment-bulk">
<p>In the comment of SuperKoko, he/she has used cite="first<em>name last</em>name" which has made the comment non HTML conformant. (I have fixed it.) The value of a cite attribute must be a URI.</p>

<p>What a modern browser should have done with a non conformant markup like this ? Should it carry a message saying the whole page was non conformant ?</p>

<p>btw, the doctype will not carry a version in HTML 5. It will will simply be &lt;DOCTYPE html&gt;, Among browser vendors, Chris Wilson (Microsoft) and Dave Hyatt (Apple) have advocated for versioning in HTML 5. Ian Hickson (Google), Anne (Opera), Maciej (Apple) are against.</p>

<p>The issue is not on the browser level, but authoring level. The issue is that authoring tools (and authors) should be strict in what they produce, that was part of the sense of my article in the craft of HTML. </p>

<p>Though I will disagree on your last statement, developers going on the HTML market <em>have</em> to recover non conformant HTML markup. It means they have to find techniques to recover the content. What HTML 5 offer for the first time is a precise description on how to recover.</p>

<p>Now I would like on the WG is more developers of CMS, and authoring tools. All people implementing the production of HTML code. The sanitization of the HTML code is in the production tools.</p>

</div>
</div>


<div class="comment" id="comment-60757">
<p class="comment-meta" id="c060757">
<span class="comment-meta-author"><strong>karl dubost, w3c </strong></span>
<span class="comment-meta-date"><a href="#c060757">#</a> 2007-08-15</span>
</p>
<div class="comment-bulk">
<p>@ether</p>

<p>if we really want to be strict :)
Your comment is non conformant you are using paragraphs instead of a real list. ul/li</p>

<p>Luckily enough is something which is almost impossible to check by machine. So if we push a bit further, this article should not have been displayed because of your comment.</p>

</div>
</div>


<div class="comment" id="comment-60809">
<p class="comment-meta" id="c060809">
<span class="comment-meta-author"><strong>thacker </strong></span>
<span class="comment-meta-date"><a href="#c060809">#</a> 2007-08-16</span>
</p>
<div class="comment-bulk">
<p>Dubost--</p>

<p>I have a bunch of questions, hope you don't mind.</p>

<p>Why the differences of opinion between version reference and generic HTML 5 DOCTYPE?</p>

<p>You sold me on the need for the HTML 5 spec based solely on error and recovering handling. A key component on this are the CMS and authoring tool developers. What are the reasons for their non-participation within the HTML 5 working groups and how can this critical element be resolved?</p>

<p>Secondly, why are there two working groups on this spec?</p>

<p>The article by Anne van Kesteren [ <a href="http://www.w3.org/html/wg/html5/diff/" rel="nofollow">http://www.w3.org/html/wg/html5/diff/</a> ]is very succinct, clear and ideal for non-technical people such as myself versus the WHAT spec draft. Is it possible that van Kesteren will keep this article current on a monthly basis or as needed? </p>

<p>The W3C projects a final recommendation date of the 4th quarter of 2010. Why can't this be compressed to the 4th quarter of 2008?  </p>

<p>Wouldn't incremental releases of the spec be more practical with focus initially upon error/recovery handling and security with the initial release?</p>

<p>Why is there even consideration of depreciation of elements and/or attributes  based upon frequency of use or confusion/misunderstanding of use?  Isn't this more of a matter of education rather than assumption that use reflects practicality or functionality of the element/attribute?</p>

<p>What impact will the HTML 5 spec have upon the existing XHTML specifications?</p>

<p>Finally, what are your thoughts about Holzschlag's suggestions that she presented within her most recent post on her blog?</p>

<p>Don't get discouraged, I have a gazillion more questions.</p>

<p>Thank you very much.</p>

</div>
</div>


<div class="comment" id="comment-60957">
<p class="comment-meta" id="c060957">
<span class="comment-meta-author"><strong>Bennett McElwee </strong></span>
<span class="comment-meta-date"><a href="#c060957">#</a> 2007-08-16</span>
</p>
<div class="comment-bulk">
<p>Consistent recovery from invalid markup is indeed a useful and important thing to specify. But it should not be part of the HTML 5 specification.</p>

<p>The original article says, "HTML 5 Specification matters because it creates more interoperability when recovering from errors." Actually, it's only the spec's definition of "a very precise mechanism for recovering invalid markup" that does this. This is indeed a useful thing to specify; but it's not logically part of the specification of the HTML 5 language.</p>

<p>W3C could instead just create an "HTML 4 Invalid Markup Recovery" spec. This spec could act as a helpful guide to browser implementors, who currently all use their own algorithms for rendering invalid markup. It would apply to both HTML 4.x and XHTML 1.x served as text/html.</p>

<p>If there is still really a need for HTML 5, then it could be accompanied by a "HTML 5 Invalid Markup Recovery" spec. But perhaps an even better approach would be to forget HTML 5 and simply allow XHTML 2 to be served as text/xhtml (or for backward compatibility, text/html). Then publish an "XHTML 2 Invalid Markup Recovery" spec.</p>

</div>
</div>


<div class="comment" id="comment-61855">
<p class="comment-meta" id="c061855">
<span class="comment-meta-author"><strong>Ether </strong></span>
<span class="comment-meta-date"><a href="#c061855">#</a> 2007-08-22</span>
</p>
<div class="comment-bulk">
<p>@karl dubost] I was talking about syntax errors, which make the document unparsable, not about the semantic ones. Anyway, I haven't wrote the 'p' tags, it was done automatically.</p>

<p>x] Shouldn't the browser display this page because of errors? Sure, but the script shouldn't have put it there in the first place. There are ways to detect that the posted code is invalid before putting it on the page. And when the recovery algorithm will be available, the receiving script should be able to apply it easily and make the posted code valid.</p>

<p>x]Well, I don't know that much about DOCTYPEs, SGML vs. XML and such, but a HTML5 document could be always recognized using the DTD clausule. I know that this won't solve the thing SuperKoko wrote about, but I'm no pro, right? What about creating strict (with active error feedback) and transitional or such (with passive error feedback) standards?</p>

</div>
</div>


<div class="comment" id="comment-62376">
<p class="comment-meta" id="c062376">
<span class="comment-meta-author"><strong>karl dubost, W3C </strong></span>
<span class="comment-meta-date"><a href="#c062376">#</a> 2007-08-27</span>
</p>
<div class="comment-bulk">
<p>Hi,</p>

<p>Just a quick note, that I see sometimes some comments made anonymously. These comments will not be moderated positively.</p>

</div>
</div>


<div class="comment" id="comment-62629">
<p class="comment-meta" id="c062629">
<span class="comment-meta-author"><strong>Stuart Metcalfe </strong></span>
<span class="comment-meta-date"><a href="#c062629">#</a> 2007-08-28</span>
</p>
<div class="comment-bulk">
<p>I second <a href="#c060957" rel="nofollow">Bennett McElwee's comment</a>.</p>

<p>That browser vendors want to be able to recover from minor errors is entirely understandable and I don't think many people are going to lose any sleep if they write a recovery mechanism 'on top of' the HTML specification to improve the user experience.  That an agreed 'standard' for this is established is good in this case.  I strongly believe, however, that this mechanism has no place in the main HTML specification which should be clean, clear and above all require correct implementation.  Poor quality code should be optionally recoverable but never explicitly accommodated.</p>

</div>
</div>


<div class="comment" id="comment-69750">
<p class="comment-meta" id="c069750">
<span class="comment-meta-author"><strong>Felix Schins </strong></span>
<span class="comment-meta-date"><a href="#c069750">#</a> 2007-10-02</span>
</p>
<div class="comment-bulk">
<p>Hi!</p>

<p>I think, it would be very good, if HTML5 uses some of the good ideas of XHTML2. Like e.g. the &lt;h>-&lt;section>-model that is better than &lt;h1> - &lt;h6>, &lt;separator> instead of &lt;hr>, removing the &lt;font>-element and the &lt;iframe>-tag, and so on...</p>

<p>A list of more great things, that HTML5 could use from XHTML2 is found here:
<a href="http://www.xhtml.com/en/future/x-html-5-versus-xhtml-2/" rel="nofollow">http://www.xhtml.com/en/future/x-html-5-versus-xhtml-2/</a></p>

<p>Best wishes for the standards-development...
Felix</p>

</div>
</div>


<div class="comment" id="comment-119961">
<p class="comment-meta" id="c119961">
<span class="comment-meta-author"><strong>Erik Reppen </strong></span>
<span class="comment-meta-date"><a href="#c119961">#</a> 2008-03-02</span>
</p>
<div class="comment-bulk">
<p>Why advocate any recovery from invalid markup for new technologies? It only leeches time and energy away from browser development and makes everybody's lives more difficult. How is a user who can't even master simple SGML-based syntax supposed to correct behavior if a given recovery-process has failed to properly assess their intentions?</p>

<p>People who don't want to learn anything new can stick to older technologies which will no doubt continue to be supported indefinitely as the resources expended on doing so gradually become even more negligible.</p>

<p>The carrot offered for taking the very minor step of learning to stick to lower-case, nest, and close properly will be the advantages that new technologies offer. Asking people to validate their markup for a to-the-line error check hardly seems like a major barrier to entry.</p>

<p>Sloppy syntax allowances in something as basic as an SGML-based language is a waste of resources that can only hurt accessibility, aggravate proper indexing in search engines and slow the implementation of all new technologies. Why bring the evolution of the web to a snail's pace for people who can't be bothered to do the amateur web designer's equivalent of a spell check?</p>

</div>
</div>


<div class="comment" id="comment-120061">
<p class="comment-meta" id="c120061">
<span class="comment-meta-author"><strong>Karl Dubost <a class="commenter-profile" href="http://www.w3.org/People/karl/"><img alt="Author Profile Page" src="http://www.w3.org/QA/sununga/mt-static/images/comment/mt_logo.png" width="16" height="16" /></a></strong></span>
<span class="comment-meta-date"><a href="#c120061">#</a> 2008-03-02</span>
</p>
<div class="comment-bulk">
<p>Hi Erik,</p>

<p>Nothing forbids an author to stick to lowercase, strict guidelines, quoted attributes, etc for writing HTML. I would even personally encourage this. It is good design and practices when sharing work in a Team. </p>

<p>That said, browser developers also need to recover broken markup in their implementations. if you stop to recover for broken markup, we will not be able to access 95% of the Web.</p>

<p>There are really two things to separate:</p>

<ol>
<li>Authoring HTML which can be strict with a well defined content model</li>
<li>Parsing HTML which has to cope with errors.</li>
</ol>

<p>For the 1., I invited people to <a href="http://www.w3.org/QA/2008/02/authoring-html5" rel="nofollow">commit their time to write the HTML 5 Authoring guidelines</a>. It means people <strong>actually writing prose</strong> and not only discussing about the why and when and how. The only way to move forward on this is really to create the document for it.</p>

</div>
</div>


<div class="comment" id="comment-120158">
<p class="comment-meta" id="c120158">
<span class="comment-meta-author"><strong>Erik Reppen </strong></span>
<span class="comment-meta-date"><a href="#c120158">#</a> 2008-03-03</span>
</p>
<div class="comment-bulk">
<p>"That said, browser developers also need to recover broken markup in their implementations."</p>

<p>This is the part I'm confused on. Why?</p>

<p>Ever since I first caught wind of the new spec, I've been trying to understand whether I've misunderstood the goal of standards all along or if there's been a change of plan. I thought the idea was to ultimately transition to strict syntax. Period. Not for the purpose of "enforcing" proper coding practice out of sheer priggishness but to improve the quality of the development and usage environment for everybody involved by making sure that if something is live, it's code can be easily read by machine, code, and developer. We can continue to allow for old mistakes through the use of proper doctype recognition and proprietary opt-in browser targeting (in the case of IE exclusivists).</p>

<p>But if sloppy markup continues to be allowed to go live, accessibility, indexing, barrier to entry and standards as a whole are all impacted in a negative manner in my eyes. I just don't see who it benefits. Certainly not the new markup coder who is trying to figure this stuff out for the first time but can't because a browser is incorrectly guessing at what his sloppy syntax is supposed to mean rather than simply pointing out where it needs to be corrected before rendering anything at all.</p>

<p>If the browser devs don't think that's good enough for less expeirenced aspiring web developers, all that's really needed is the equivalent of an SGML spellchecker that suggests rather than automatically assumes it knows the proper code. Although I'd expect most new devs could make do with something similar to the validation process.</p>

<p>So help me out here. Am I under some sort of mistaken impression about how things work or what the W3Cs goals are? I'd love to have a better understanding of everybody's priorities in these matters, especially the browser devs (MS mostly) are.</p>

<p>Thanks for your response thus far and feel free to direct me to a more appropriate place for this discussion if there is one. It just seems to me like strict syntax is win-win for everybody and I don't see the cons of it.</p>

</div>
</div>


<div class="comment" id="comment-120742">
<p class="comment-meta" id="c120742">
<span class="comment-meta-author"><strong>Karl Dubost <a class="commenter-profile" href="http://www.w3.org/People/karl/"><img alt="Author Profile Page" src="http://www.w3.org/QA/sununga/mt-static/images/comment/mt_logo.png" width="16" height="16" /></a></strong></span>
<span class="comment-meta-date"><a href="#c120742">#</a> 2008-03-05</span>
</p>
<div class="comment-bulk">
<blockquote><p>I've been trying to understand whether I've misunderstood the goal of standards all along or if there's been a change of plan. </p></blockquote>

<p>The goal of a standard is to be implemented by a good share of the market so that people can benefit of smooth interoperability when they are working with documents. It is a practical exercise with social, economical, technical constraints.</p>

<blockquote><p>But if sloppy markup continues to be allowed to go live, accessibility, indexing, barrier to entry and standards as a whole are all impacted in a negative manner in my eyes. I just don't see who it benefits.</p></blockquote>

<p>I will try to use another metaphor, because there is a misunderstanding. </p>

<p>In my native language, French, I do mistakes (typo, grammar, etc.). The rules for French are strict and defined. Someone who is listening to me or who is reading me is still able to understand me even when I do typos and grammar errors (except if my content becomes really garbage). The person has applied an automatic recovery process to make the discussion possible. In a teaching context, if the person is a professor, she/he will fix my mistakes (note that he/she has been able to understand my broken content in the first place). My <strong>author responsibility</strong> is to create a correct content following the rules.</p>

<p>There are billions of documents (<strong>95%</strong>) on the Web with incorrect syntax. Two solutions:</p>

<ol>
<li>Browsers stop to process any documents which is written with an incorrect syntax. It means that most of the Web sites on the Web will not be displayed anymore, your favorite travel agency, your favorite search engine, etc. <em>With the previous metaphor, nobody understands you as soon as you make a mistake.</em></li>
<li>We create a specification which explains to browsers, fixing libraries to recover the content available on the Web in an <strong>interoperable way</strong>. <em>With the previous metaphor, everyone has a formal process to recover what you said incorrectly. Useful for teachers (validators, checkers), useful for your buddies (browsers).</em></li>
</ol>

<p><strong>That said</strong> nobody forbids you to apply your <strong>author responsibility</strong> and creates strict markup. The content model of HTML 5 (rules for writing in html and xhtml) is not yet finished. A specification which makes it obvious for authors is needed. A volunteer editor, who commits time, is what we need for now.</p>

</div>
</div>


<div class="comment" id="comment-120849">
<p class="comment-meta" id="c120849">
<span class="comment-meta-author"><strong>Tom Aman </strong></span>
<span class="comment-meta-date"><a href="#c120849">#</a> 2008-03-05</span>
</p>
<div class="comment-bulk">
<p>First of all, most of the comments here refer to browsers.  Instead of browsers, think user-agents.  While it is reasonable to continue to have user-agents attempt to fix bad html, I think it would be great to at last insist that any new version of HTML <strong>MUST</strong> be valid.  One reason that there is so much bad code out there is that user-agents (mainly browsers) have been so forgiving and have done their best to cope with errors by guessing at the repair and many page creators never validate their code (often are not aware that W3C offers free validation).  The problem with allowing the errors is that it makes it difficult to write any user-agent to cope, greatly increasing the code needed to parse a document and, at the same time, slowing the rendering.  In addition, allowing the errors will just perpetuate the present situation.</p>

<p>Essentially, we can't do much with existing documents except carry on as we have and display the pages as best we can but we can insist that any document that purports to be HTML 5 or higher will NOT display unless the markup is correct (and good browsers will, as a minimum, identify the line containing the error, preferably will also tell what is wrong).</p>

</div>
</div>


<div class="comment" id="comment-121757">
<p class="comment-meta" id="c121757">
<span class="comment-meta-author"><strong>Karl Dubost <a class="commenter-profile" href="http://www.w3.org/People/karl/"><img alt="Author Profile Page" src="http://www.w3.org/QA/sununga/mt-static/images/comment/mt_logo.png" width="16" height="16" /></a></strong></span>
<span class="comment-meta-date"><a href="#c121757">#</a> 2008-03-09</span>
</p>
<div class="comment-bulk">
<p>The spec already mandates that the content which is produced must be valid.</p>

<p>For the second part of your comment, do no display content written for HTML 5 which is invalid? How do you know if an invalid document has been written with HTML 4.01, HTML 5 or nothing specific in mind? It's almost impossible to know that except if you are the author yourself (or the tool which is producing the content.)</p>

</div>
</div>


<div class="comment" id="comment-164823">
<p class="comment-meta" id="c164823">
<span class="comment-meta-author"><strong>Strick </strong></span>
<span class="comment-meta-date"><a href="#c164823">#</a> 2008-09-03</span>
</p>
<div class="comment-bulk">
<p><blockquote><p>How do you know if an invalid document has been written with HTML 4.01, HTML 5 or nothing specific in mind? It's almost impossible to know that except if you are the author yourself (or the tool which is producing the content.)</p></blockquote></p>

<p>All they would have to do is add some sort of attribute to an existing tag to put what spec you are using. (Kinda like what MS is doing with IE 8 <a href="http://support.microsoft.com/kb/956197" rel="nofollow"><a href="http://support.microsoft.com/kb/956197" rel="nofollow">http://support.microsoft.com/kb/956197</a></a></p>

<p>I'm all for forcing the new standard.  I'm tired of trying to maintain code that uses tags to design.  </p>

</div>
</div>



  <div class="comments-open" id="comments-open">
<h3 class="comments-open-header">Leave a comment</h3>

<div class="comments-open-moderated">
   <p>
   Note: this blog is intended to foster <strong>polite
   on-topic discussions</strong>. Comments failing these
   requirements and spam will not get published. Please,
   enter your real name and email address. Every
   individual comment is reviewed by the W3C staff.
   This may take some time, thank you for your patience.
   </p>
   <p>
   You can use the following HTML markup (a href, b, i, 
   br/, p, strong, em, ul, ol, li, blockquote, pre) 
   and/or <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a>.</p>
</div>

<div id="comments-open-data">
<form method="post" action="http://www.w3.org/QA/sununga/beach.pl" id="comments-form">
<h4>Your comment</h4>
<div id="comments-open-text">
  <textarea id="comment-text" name="text" rows="20" cols="100"></textarea><br />
<label for="comment-text">Write your comment text here. Remember, keep the discussion on topic and courteous.</label>
</div>

<h4>About you</h4>
<div id="comment-form-name">
  <input type="hidden" name="static" value="1" />
<input type="hidden" name="entry_id" value="69" />
<input type="hidden" name="__lang" value="en" /> 
<label for="comment-author">Your Name</label>
<input id="comment-author" name="author" size="30" value="" />
</div>
<div id="comment-form-email">
<label for="comment-email">Your Email Address</label>
<input id="comment-email" name="email" size="30" value="" />
</div>

<div id="comments-open-footer">
<input type="submit" accesskey="s" name="post" id="comment-submit" value="Submit" />

</div>
</form>
</div>
</div>



<p id="gentime">This page was last generated on $Date: 2011/12/16 02:58:30 $</p> 

      </div><!-- End of "main" DIV. -->

<address>

This blog is written by W3C staff and working group participants,<br />
&nbsp;and maintained by <a href="/People/CMercier/">Coralie Mercier</a>.<br />
Authorized parties may <a href="/QA/new">log in</a> to create a new entry.<br/>
<span id="poweredby">Powered by Movable Type, magpierss and a lot of Web Technology</span>
    </address>


    
    <p class="copyright">
      <a rel="Copyright" href="http://www.w3.org/Consortium/Legal/ipr-notice#Copyright">Copyright</a> &copy; 1994-2011
      <a href="http://www.w3.org/"><acronym title="World Wide Web Consortium">W3C</acronym></a>&reg;
      (<a href="http://www.csail.mit.edu/"><acronym title="Massachusetts Institute of Technology">MIT</acronym></a>,
      <a href="http://www.ercim.eu/"><acronym title="European Research Consortium for Informatics and Mathematics">ERCIM</acronym></a>,
      <a href="http://www.keio.ac.jp/">Keio</a>),
      All Rights Reserved.
      W3C <a href="http://www.w3.org/Consortium/Legal/ipr-notice#Legal_Disclaimer">liability</a>,
      <a href="http://www.w3.org/Consortium/Legal/ipr-notice#W3C_Trademarks">trademark</a>,
      <a rel="Copyright" href="http://www.w3.org/Consortium/Legal/copyright-documents">document use</a>
      and <a rel="Copyright" href="http://www.w3.org/Consortium/Legal/copyright-software">software licensing</a>
      rules apply. Your interactions with this site are in accordance
      with our <a href="http://www.w3.org/Consortium/Legal/privacy-statement#Public">public</a> and
      <a href="http://www.w3.org/Consortium/Legal/privacy-statement#Members">Member</a> privacy
      statements.
    </p>

  </body>
</html>