Parser.py 5.36 KB
"""
@author Georg Hopp

"""

import re
from Message import Message

class Parser(object):
    def __init__(self):
        self._header_exp = re.compile(r"([^:]+):(.+)\r\n")
        self._chunk_exp  = re.compile(r"([\da-f]+).*\r\n")
        self._req_exp    = re.compile(
                r".*(%s) +([^ ]+) +(HTTP/\d\.\d)\r\n"%'|'.join(Message.METHODS))
        self._state_exp = re.compile(r".*(HTTP/\d\.\d) *(\d{3}) *(.*)\r\n")

    def parse(self, message, data):
        """
        Parse data into this message.

        Returns 0 when the Message is already complete or the amount of the
        successfully parsed data.

        @message: An HttpMessage instance where the data is parsed into.
        @data:    The data to be parsed.
        """
        end = 0

        if 0 == message.state:
            if message.isRequest() or message.isResponse():
                message.reset()
            end += self.parseStartLine(message, data)

        if message.startlineReady() and not message.headersReady():
            end += self.parseHeaders(message, data[end:])

        if message.headersReady() and not message.bodyReady():
            end += self.parseBody(message, data[end:])

        return end

    def parseStartLine(self, message, data):
        """
        Parse data into the HTTP message startline, either a Request- or a
        Statusline. This will set the message start_line if the given data
        matches the start_exp expression. In that case it will also set
        the start_ready flag.

        Returns the position of the data that is not parsed.

        @message: An HttpMessage instance where the data is parsed into.
        @data:    The data to be parsed.
        """
        end = 0

        match = self._parseRequest(message, data)
        if match: end = match.end()

        match = self._parseResponse(message, data)
        if match: end = match.end()

        if 0 != end:
            message.state |= Message.START_READY
        else:
            end = self._checkInvalid(message, data[end:])

        return end

    def parseHeaders(self, message, data):
        """
        Parse data into the headers of a message.

        Returns the position of the data that is not parsed.

        @message: An HttpMessage instance where the data is parsed into.
        @data:    The data to be parsed.
        """
        end = 0

        match = self._header_exp.match(data[end:])
        while match and "\r\n" != data[end:end+2]:
            message.setHeader(match.group(1).strip(), match.group(2).strip())
            end += match.end()
            match = self._header_exp.match(data[end:])

        if "\r\n" == data[end:end+2]:
            # a single \r\n at the beginning indicates end of headers.
            if message.headerKeyExists('Content-Length'):
                message._chunk_size = int(message.getHeader('Content-Length'))
            elif message.headerKeyExists('Transfer-Encoding') and \
                    'chunked' in message.getHeader('Transfer-Encoding'):
                message._chunked = True
            else:
                message.state |= Message.BODY_READY

            message.state |= Message.HEADERS_READY
            end += 2
        else:
            end += self._checkInvalid(message, data[end:])

        return end

    def parseBody(self, message, data):
        """
        Parse data into the body of a message. This is also capable of
        handling chunked bodies as defined for HTTP/1.1.

        Returns the position of the data that is not parsed.

        @message: An HttpMessage instance where the data is parsed into.
        @data:    The data to be parsed.
        """
        readlen = 0

        if message._chunked and 0 == message._chunk_size:
            match = self._chunk_exp.match(data)

            if match is None:
                return 0

            message._chunk_size  = int(match.group(1), 16)
            readlen             += match.end()
            data                 = data[match.end():]

            if 0 == self._chunk_size:
                message.state |= Message.BODY_READY
                return readlen + 2

        available_data       = len(data[0:message._chunk_size])
        message._chunk_size -= available_data
        readlen             += available_data
        message._body       += data[0:available_data]

        if 0 == message._chunk_size:
            if not message._chunked:
                message.state |= Message.BODY_READY
                return readlen
            else:
                readlen += 2

        return readlen

    def _parseRequest(self, message, data):
        match = self._req_exp.search(data)
        if match:
            message._method = Message.METHODS.index(match.group(1))
            message._uri    = match.group(2)
            message._http   = match.group(3)
        return match

    def _parseResponse(self, message, data):
        match = self._state_exp.search(data)
        if match:
            message._http    = match.group(1)
            message._code    = int(match.group(2))
            message._message = match.group(3)
        return match

    def _checkInvalid(self, message, data):
        end = 0
        nl  = data.find("\r\n")
        if -1 != nl:
            # We received an invalid message...ignore it and start again
            # TODO This should be logged.
            message.reset()
            end = nl + 2
        return end

# vim: set ft=python et ts=8 sw=4 sts=4: