Parser.py
5.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
"""
@author Georg Hopp
"""
import re
from Message import Message
class Parser(object):
def __init__(self):
self._header_exp = re.compile(r"([^:]+):(.+)\r\n")
self._chunk_exp = re.compile(r"([\da-f]+).*\r\n")
self._req_exp = re.compile(
r".*(%s) +([^ ]+) +(HTTP/\d\.\d)\r\n"%'|'.join(Message.METHODS))
self._state_exp = re.compile(r".*(HTTP/\d\.\d) *(\d{3}) *(.*)\r\n")
def parse(self, message, data):
"""
Parse data into this message.
Returns 0 when the Message is already complete or the amount of the
successfully parsed data.
@message: An HttpMessage instance where the data is parsed into.
@data: The data to be parsed.
"""
end = 0
if 0 == message.state:
if message.isRequest() or message.isResponse():
message.reset()
end += self.parseStartLine(message, data)
if message.startlineReady() and not message.headersReady():
end += self.parseHeaders(message, data[end:])
if message.headersReady() and not message.bodyReady():
end += self.parseBody(message, data[end:])
return end
def parseStartLine(self, message, data):
"""
Parse data into the HTTP message startline, either a Request- or a
Statusline. This will set the message start_line if the given data
matches the start_exp expression. In that case it will also set
the start_ready flag.
Returns the position of the data that is not parsed.
@message: An HttpMessage instance where the data is parsed into.
@data: The data to be parsed.
"""
end = 0
match = self._parseRequest(message, data)
if match: end = match.end()
match = self._parseResponse(message, data)
if match: end = match.end()
if 0 != end:
message.state |= Message.START_READY
else:
end = self._checkInvalid(message, data[end:])
return end
def parseHeaders(self, message, data):
"""
Parse data into the headers of a message.
Returns the position of the data that is not parsed.
@message: An HttpMessage instance where the data is parsed into.
@data: The data to be parsed.
"""
end = 0
match = self._header_exp.match(data[end:])
while match and "\r\n" != data[end:end+2]:
message.setHeader(match.group(1).strip(), match.group(2).strip())
end += match.end()
match = self._header_exp.match(data[end:])
if "\r\n" == data[end:end+2]:
# a single \r\n at the beginning indicates end of headers.
if message.headerKeyExists('Content-Length'):
message._chunk_size = int(message.getHeader('Content-Length'))
elif message.headerKeyExists('Transfer-Encoding') and \
'chunked' in message.getHeader('Transfer-Encoding'):
message._chunked = True
else:
message.state |= Message.BODY_READY
message.state |= Message.HEADERS_READY
end += 2
else:
end += self._checkInvalid(message, data[end:])
return end
def parseBody(self, message, data):
"""
Parse data into the body of a message. This is also capable of
handling chunked bodies as defined for HTTP/1.1.
Returns the position of the data that is not parsed.
@message: An HttpMessage instance where the data is parsed into.
@data: The data to be parsed.
"""
readlen = 0
if message._chunked and 0 == message._chunk_size:
match = self._chunk_exp.match(data)
if match is None:
return 0
message._chunk_size = int(match.group(1), 16)
readlen += match.end()
data = data[match.end():]
if 0 == self._chunk_size:
message.state |= Message.BODY_READY
return readlen + 2
available_data = len(data[0:message._chunk_size])
message._chunk_size -= available_data
readlen += available_data
message._body += data[0:available_data]
if 0 == message._chunk_size:
if not message._chunked:
message.state |= Message.BODY_READY
return readlen
else:
readlen += 2
return readlen
def _parseRequest(self, message, data):
match = self._req_exp.search(data)
if match:
message._method = Message.METHODS.index(match.group(1))
message._uri = match.group(2)
message._http = match.group(3)
return match
def _parseResponse(self, message, data):
match = self._state_exp.search(data)
if match:
message._http = match.group(1)
message._code = int(match.group(2))
message._message = match.group(3)
return match
def _checkInvalid(self, message, data):
end = 0
nl = data.find("\r\n")
if -1 != nl:
# We received an invalid message...ignore it and start again
# TODO This should be logged.
message.reset()
end = nl + 2
return end
# vim: set ft=python et ts=8 sw=4 sts=4: