python-hyper · pgjones · Dec 26, 2020 · Nov 6, 2020 · Nov 7, 2020 · Nov 19, 2020
diff --git a/h11/_readers.py b/h11/_readers.py
@@ -30,6 +30,7 @@
 # Remember that this has to run in O(n) time -- so e.g. the bytearray cast is
 # critical.
 obs_fold_re = re.compile(br"[ \t]+")
+strict_line_delimiter_regex = re.compile(b"\r\n", re.MULTILINE)
 
 
 def _obsolete_line_fold(lines):
@@ -153,7 +154,7 @@ def __call__(self, buf):
         assert self._bytes_to_discard == 0
         if self._bytes_in_chunk == 0:
             # We need to refill our chunk count
-            chunk_header = buf.maybe_extract_until_next(b"\r\n")
+            chunk_header = buf.maybe_extract_until_next(strict_line_delimiter_regex, 2)
             if chunk_header is None:
                 return None
             matches = validate(

diff --git a/h11/_receivebuffer.py b/h11/_receivebuffer.py
@@ -1,10 +1,12 @@
+import re
 import sys
 
 __all__ = ["ReceiveBuffer"]
 
 
 # Operations we want to support:
-# - find next \r\n or \r\n\r\n, or wait until there is one
+# - find next \r\n or \r\n\r\n (\n or \n\n are also acceptable),
+#   or wait until there is one
 # - read at-most-N bytes
 # Goals:
 # - on average, do this fast
@@ -38,13 +40,22 @@
 # slightly clever thing where we delay calling compress() until we've
 # processed a whole event, which could in theory be slightly more efficient
 # than the internal bytearray support.)
+
+blank_line_delimiter_regex = re.compile(b"\n\r?\n", re.MULTILINE)
+
+
+def rstrip_line(line):
+    return line.rstrip(b"\r")
+
+
 class ReceiveBuffer(object):
     def __init__(self):
         self._data = bytearray()
         # These are both absolute offsets into self._data:
         self._start = 0
         self._looked_at = 0
-        self._looked_for = b""
+
+        self._looked_for_regex = blank_line_delimiter_regex
 
     def __bool__(self):
         return bool(len(self))
@@ -79,21 +90,29 @@ def maybe_extract_at_most(self, count):
         self._start += len(out)
         return out
 
-    def maybe_extract_until_next(self, needle):
+    def maybe_extract_until_next(self, needle_regex, max_needle_length):
         # Returns extracted bytes on success (advancing offset), or None on
         # failure
-        if self._looked_for == needle:
-            search_start = max(self._start, self._looked_at - len(needle) + 1)
+        if self._looked_for_regex == needle_regex:
+            looked_at = max(self._start, self._looked_at - max_needle_length)
         else:
-            search_start = self._start
-        offset = self._data.find(needle, search_start)
-        if offset == -1:
+            looked_at = self._start
+            self._looked_for_regex = needle_regex
+
+        delimiter_match = next(
+            self._looked_for_regex.finditer(self._data, looked_at), None
+        )
+
+        if delimiter_match is None:
             self._looked_at = len(self._data)
-            self._looked_for = needle
             return None
-        new_start = offset + len(needle)
-        out = self._data[self._start : new_start]
-        self._start = new_start
+
+        _, end = delimiter_match.span(0)
+
+        out = self._data[self._start : end]
+
+        self._start = end
+
         return out
 
     # HTTP/1.1 has a number of constructs where you keep reading lines until
@@ -102,11 +121,14 @@ def maybe_extract_lines(self):
         if self._data[self._start : self._start + 2] == b"\r\n":
             self._start += 2
             return []
+        elif self._data[self._start : self._start + 1] == b"\n":
+            self._start += 1
+            return []
         else:
-            data = self.maybe_extract_until_next(b"\r\n\r\n")
+            data = self.maybe_extract_until_next(blank_line_delimiter_regex, 3)
             if data is None:
                 return None
-            lines = data.split(b"\r\n")
-            assert lines[-2] == lines[-1] == b""
-            del lines[-2:]
+
+            lines = list(map(rstrip_line, data.rstrip(b"\r\n").split(b"\n")))
+
             return lines
diff --git a/h11/tests/test_receivebuffer.py b/h11/tests/test_receivebuffer.py
@@ -1,3 +1,7 @@
+import re
+
+import pytest
+
 from .._receivebuffer import ReceiveBuffer
 
 
@@ -35,23 +39,23 @@ def test_receivebuffer():
 
     b += b"12345a6789aa"
 
-    assert b.maybe_extract_until_next(b"a") == b"12345a"
+    assert b.maybe_extract_until_next(re.compile(b"a"), 1) == b"12345a"
     assert bytes(b) == b"6789aa"
 
-    assert b.maybe_extract_until_next(b"aaa") is None
+    assert b.maybe_extract_until_next(re.compile(b"aaa"), 3) is None
     assert bytes(b) == b"6789aa"
 
     b += b"a12"
-    assert b.maybe_extract_until_next(b"aaa") == b"6789aaa"
+    assert b.maybe_extract_until_next(re.compile(b"aaa"), 3) == b"6789aaa"
     assert bytes(b) == b"12"
 
     # check repeated searches for the same needle, triggering the
     # pickup-where-we-left-off logic
     b += b"345"
-    assert b.maybe_extract_until_next(b"aaa") is None
+    assert b.maybe_extract_until_next(re.compile(b"aaa"), 3) is None
 
     b += b"6789aaa123"
-    assert b.maybe_extract_until_next(b"aaa") == b"123456789aaa"
+    assert b.maybe_extract_until_next(re.compile(b"aaa"), 3) == b"123456789aaa"
     assert bytes(b) == b"123"
 
     ################################################################
@@ -76,3 +80,54 @@ def test_receivebuffer():
     b += b"\r\ntrailing"
     assert b.maybe_extract_lines() == []
     assert bytes(b) == b"trailing"
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        pytest.param(
+            (
+                b"HTTP/1.1 200 OK\r\n",
+                b"Content-type: text/plain\r\n",
+                b"Connection: close\r\n",
+                b"\r\n",
+                b"Some body",
+            ),
+            id="with_crlf_delimiter",
+        ),
+        pytest.param(
+            (
+                b"HTTP/1.1 200 OK\n",
+                b"Content-type: text/plain\n",
+                b"Connection: close\n",
+                b"\n",
+                b"Some body",
+            ),
+            id="with_lf_only_delimiter",
+        ),
+        pytest.param(
+            (
+                b"HTTP/1.1 200 OK\n",
+                b"Content-type: text/plain\r\n",
+                b"Connection: close\n",
+                b"\n",
+                b"Some body",
+            ),
+            id="with_mixed_crlf_and_lf",
+        ),
+    ],
+)
+def test_receivebuffer_for_invalid_delimiter(data):
+    b = ReceiveBuffer()
+
+    for line in data:
+        b += line
+
+    lines = b.maybe_extract_lines()
+
+    assert lines == [
+        b"HTTP/1.1 200 OK",
+        b"Content-type: text/plain",
+        b"Connection: close",
+    ]
+    assert bytes(b) == b"Some body"