robots.txt
1.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#
# robots.txt for http://www.w3.org/
#
# $Id: robots.txt,v 1.62 2010/08/23 18:35:40 ted Exp $
#
# For use by search.w3.org
User-agent: W3C-gsa
Disallow: /Out-Of-Date
User-agent: W3T_SE
Disallow: /Out-Of-Date
User-agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT; MS Search 4.0 Robot)
Disallow: /
# W3C Link checker
User-agent: W3C-checklink
Disallow:
# exclude some access-controlled areas
User-agent: *
Disallow: /2004/ontaria/basic
Disallow: /Team/
Disallow: /Project
Disallow: /Web
Disallow: /Systems
Disallow: /History
Disallow: /Out-Of-Date
Disallow: /2002/02/mid
Disallow: /mid/
Disallow: /2005/06/blog/
Disallow: /2004/08/W3CTalks
Disallow: /2007/11/Talks/search
Disallow: /People/all/
Disallow: /RDF/Validator/ARPServlet
Disallow: /2003/03/Translations/byLanguage
Disallow: /2003/03/Translations/byTechnology
Disallow: /2005/11/Translations/Query
#Disallow: /2003/glossary/subglossary/
#Disallow: /2001/07/pubrules-checker
#shouldnt get transparent proxies but will ml links of things like pubrules
Disallow: /2000/06/webdata/xslt
Disallow: /2000/09/webdata/xslt
Disallow: /2005/08/online_xslt/xslt
Disallow: /Bugs/
Disallow: /Search/Mail/Public/
Disallow: /2006/02/chartergen
Disallow: /2004/01/pp-impl
Disallow: /Consortium/supporters
Disallow: /2007/08/pyRdfa/
Disallow: /WAI/PF/comments/
# exclude some wikis that are full of spam
User-agent: *
Disallow: /2001/sw/sweo/group/wiki/
Disallow: /2005/Incubator/geo/Wiki/
Disallow: /2005/Incubator/mmsem/wiki/
Disallow: /2005/Incubator/urw3/wiki/
Disallow: /2005/MWI/BPWG/techs/
Disallow: /2005/MWI/Steer/wiki/
Disallow: /2006/appformats/group/
Disallow: /2006/tsdtf/
Disallow: /2006/ubiweb-wiki/
Disallow: /2006/webapi/group/
Disallow: /2007/xmlsec/wiki/
Disallow: /MarkUp/Forms/Group/wiki/