-
Notifications
You must be signed in to change notification settings - Fork 77
Added ability to use LF, not only CRLF delimiter for response Headers and Body #115
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 10 commits
fda65a2
d3b2ef2
b2f70e9
ce26dc7
af7b481
341f632
74693fc
489de4d
ef27d89
a94d728
8aa9e7f
1aefea0
f688615
ca534f7
a1197d6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,12 @@ | ||
import re | ||
import sys | ||
|
||
__all__ = ["ReceiveBuffer"] | ||
|
||
|
||
# Operations we want to support: | ||
# - find next \r\n or \r\n\r\n, or wait until there is one | ||
# - find next \r\n or \r\n\r\n (\n or \n\n are also acceptable), | ||
# or wait until there is one | ||
# - read at-most-N bytes | ||
# Goals: | ||
# - on average, do this fast | ||
|
@@ -38,13 +40,22 @@ | |
# slightly clever thing where we delay calling compress() until we've | ||
# processed a whole event, which could in theory be slightly more efficient | ||
# than the internal bytearray support.) | ||
|
||
blank_line_delimiter_regex = re.compile(b"\n\r?\n", re.MULTILINE) | ||
|
||
|
||
def rstrip_line(line): | ||
return line.rstrip(b"\r") | ||
|
||
|
||
class ReceiveBuffer(object): | ||
def __init__(self): | ||
self._data = bytearray() | ||
# These are both absolute offsets into self._data: | ||
self._start = 0 | ||
self._looked_at = 0 | ||
self._looked_for = b"" | ||
|
||
self._looked_for_regex = blank_line_delimiter_regex | ||
|
||
def __bool__(self): | ||
return bool(len(self)) | ||
|
@@ -79,21 +90,29 @@ def maybe_extract_at_most(self, count): | |
self._start += len(out) | ||
return out | ||
|
||
def maybe_extract_until_next(self, needle): | ||
def maybe_extract_until_next(self, needle_regex, max_needle_length): | ||
# Returns extracted bytes on success (advancing offset), or None on | ||
# failure | ||
if self._looked_for == needle: | ||
search_start = max(self._start, self._looked_at - len(needle) + 1) | ||
if self._looked_for_regex == needle_regex: | ||
looked_at = max(self._start, self._looked_at - max_needle_length) | ||
else: | ||
search_start = self._start | ||
offset = self._data.find(needle, search_start) | ||
if offset == -1: | ||
looked_at = self._start | ||
self._looked_for_regex = needle_regex | ||
|
||
delimiter_match = next( | ||
self._looked_for_regex.finditer(self._data, looked_at), None | ||
) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you help explain why this is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you, you are absolutely right, I missed |
||
|
||
if delimiter_match is None: | ||
self._looked_at = len(self._data) | ||
self._looked_for = needle | ||
return None | ||
new_start = offset + len(needle) | ||
out = self._data[self._start : new_start] | ||
self._start = new_start | ||
|
||
_, end = delimiter_match.span(0) | ||
|
||
out = self._data[self._start : end] | ||
|
||
self._start = end | ||
|
||
return out | ||
|
||
# HTTP/1.1 has a number of constructs where you keep reading lines until | ||
|
@@ -102,11 +121,14 @@ def maybe_extract_lines(self): | |
if self._data[self._start : self._start + 2] == b"\r\n": | ||
self._start += 2 | ||
return [] | ||
elif self._data[self._start : self._start + 1] == b"\n": | ||
self._start += 1 | ||
return [] | ||
else: | ||
data = self.maybe_extract_until_next(b"\r\n\r\n") | ||
data = self.maybe_extract_until_next(blank_line_delimiter_regex, 3) | ||
pgjones marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if data is None: | ||
return None | ||
lines = data.split(b"\r\n") | ||
assert lines[-2] == lines[-1] == b"" | ||
del lines[-2:] | ||
|
||
lines = list(map(rstrip_line, data.rstrip(b"\r\n").split(b"\n"))) | ||
|
||
return lines |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,7 @@ | ||
import re | ||
|
||
import pytest | ||
|
||
from .._receivebuffer import ReceiveBuffer | ||
|
||
|
||
|
@@ -35,23 +39,23 @@ def test_receivebuffer(): | |
|
||
b += b"12345a6789aa" | ||
|
||
assert b.maybe_extract_until_next(b"a") == b"12345a" | ||
assert b.maybe_extract_until_next(re.compile(b"a"), 1) == b"12345a" | ||
assert bytes(b) == b"6789aa" | ||
|
||
assert b.maybe_extract_until_next(b"aaa") is None | ||
assert b.maybe_extract_until_next(re.compile(b"aaa"), 3) is None | ||
assert bytes(b) == b"6789aa" | ||
|
||
b += b"a12" | ||
assert b.maybe_extract_until_next(b"aaa") == b"6789aaa" | ||
assert b.maybe_extract_until_next(re.compile(b"aaa"), 3) == b"6789aaa" | ||
assert bytes(b) == b"12" | ||
|
||
# check repeated searches for the same needle, triggering the | ||
# pickup-where-we-left-off logic | ||
b += b"345" | ||
assert b.maybe_extract_until_next(b"aaa") is None | ||
assert b.maybe_extract_until_next(re.compile(b"aaa"), 3) is None | ||
|
||
b += b"6789aaa123" | ||
assert b.maybe_extract_until_next(b"aaa") == b"123456789aaa" | ||
assert b.maybe_extract_until_next(re.compile(b"aaa"), 3) == b"123456789aaa" | ||
assert bytes(b) == b"123" | ||
|
||
################################################################ | ||
|
@@ -76,3 +80,54 @@ def test_receivebuffer(): | |
b += b"\r\ntrailing" | ||
assert b.maybe_extract_lines() == [] | ||
assert bytes(b) == b"trailing" | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"data", | ||
[ | ||
pytest.param( | ||
( | ||
b"HTTP/1.1 200 OK\r\n", | ||
b"Content-type: text/plain\r\n", | ||
b"Connection: close\r\n", | ||
b"\r\n", | ||
b"Some body", | ||
), | ||
id="with_crlf_delimiter", | ||
), | ||
pytest.param( | ||
( | ||
b"HTTP/1.1 200 OK\n", | ||
b"Content-type: text/plain\n", | ||
b"Connection: close\n", | ||
b"\n", | ||
b"Some body", | ||
), | ||
id="with_lf_only_delimiter", | ||
), | ||
pytest.param( | ||
( | ||
b"HTTP/1.1 200 OK\n", | ||
b"Content-type: text/plain\r\n", | ||
b"Connection: close\n", | ||
b"\n", | ||
b"Some body", | ||
), | ||
id="with_mixed_crlf_and_lf", | ||
), | ||
], | ||
) | ||
def test_receivebuffer_for_invalid_delimiter(data): | ||
b = ReceiveBuffer() | ||
|
||
for line in data: | ||
b += line | ||
|
||
lines = b.maybe_extract_lines() | ||
|
||
assert lines == [ | ||
b"HTTP/1.1 200 OK", | ||
b"Content-type: text/plain", | ||
b"Connection: close", | ||
] | ||
assert bytes(b) == b"Some body" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you also add a few similar tests to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added similar tests to |
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this actually be
min_needle_length
, or have I got that back-to-front?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ie. always resume searching from the first possible point where there could still be a match.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah yeah I see it
max_needle_length
, because that pushes the restart back as far as possible.