From 47dd05b610f170542b3c84c9c95d63af6542122e Mon Sep 17 00:00:00 2001 From: hisanori Date: Mon, 24 Mar 2025 21:11:30 +0900 Subject: [PATCH 1/8] allow bare LF in cgi header --- lib/webrick/httpservlet/cgihandler.rb | 2 +- lib/webrick/httputils.rb | 11 ++++++++--- sig/httputils.rbs | 2 +- test/webrick/test_cgi.rb | 11 +++++++++++ test/webrick/webrick_bare_lf.cgi | 8 ++++++++ 5 files changed, 29 insertions(+), 5 deletions(-) create mode 100644 test/webrick/webrick_bare_lf.cgi diff --git a/lib/webrick/httpservlet/cgihandler.rb b/lib/webrick/httpservlet/cgihandler.rb index 450aa380..efb76e0a 100644 --- a/lib/webrick/httpservlet/cgihandler.rb +++ b/lib/webrick/httpservlet/cgihandler.rb @@ -96,7 +96,7 @@ def do_GET(req, res) "Premature end of script headers: #{@script_filename}" if body.nil? begin - header = HTTPUtils::parse_header(raw_header) + header = HTTPUtils::parse_header(raw_header, true) if /^(\d+)/ =~ header['status'][0] res.status = $1.to_i header.delete('status') diff --git a/lib/webrick/httputils.rb b/lib/webrick/httputils.rb index e2c2a407..4ebcd5db 100644 --- a/lib/webrick/httputils.rb +++ b/lib/webrick/httputils.rb @@ -168,17 +168,22 @@ def join(separator = "; ") "cookie" => CookieHeader, }) - def parse_header(raw) + def parse_header(raw, cgi_mode = false) header = Hash.new([].freeze) field = nil + + line_break = cgi_mode ? "\\r?\\n" : "\\r\\n" + header_line = Regexp.new(/^([A-Za-z0-9!\#$%&'*+\-.^_`|~]+):([^\r\n\0]*?)#{line_break}\z/m) + continued_header_lines = Regexp.new(/^[ \t]+([^\r\n\0]*?)#{line_break}/m) + raw.each_line{|line| case line - when /^([A-Za-z0-9!\#$%&'*+\-.^_`|~]+):([^\r\n\0]*?)\r\n\z/om + when header_line field, value = $1, $2 field.downcase! header[field] = HEADER_CLASSES[field].new unless header.has_key?(field) header[field] << value - when /^[ \t]+([^\r\n\0]*?)\r\n/om + when continued_header_lines unless field raise HTTPStatus::BadRequest, "bad header '#{line}'." end diff --git a/sig/httputils.rbs b/sig/httputils.rbs index a554cdfb..546675ae 100644 --- a/sig/httputils.rbs +++ b/sig/httputils.rbs @@ -26,7 +26,7 @@ module WEBrick HEADER_CLASSES: Hash[String, untyped] - def self?.parse_header: (String raw) -> Hash[String, Array[String]] + def self?.parse_header: (String raw, ?bool cgi_mode) -> Hash[String, Array[String]] def self?.split_header_value: (String str) -> Array[String] diff --git a/test/webrick/test_cgi.rb b/test/webrick/test_cgi.rb index a9be8f35..f8e1312e 100644 --- a/test/webrick/test_cgi.rb +++ b/test/webrick/test_cgi.rb @@ -145,4 +145,15 @@ def test_bad_header assert_not_match(CtrlPat, s) } end + + def test_bare_lf_in_cgi_header + TestWEBrick.start_cgi_server do |server, addr, port, log| + http = Net::HTTP.new(addr, port) + req = Net::HTTP::Get.new("/webrick_bare_lf.cgi") + assert_nothing_raised do + res = http.request(req) + assert_equal res['Content-Type'], 'text/plain' + end + end + end end diff --git a/test/webrick/webrick_bare_lf.cgi b/test/webrick/webrick_bare_lf.cgi new file mode 100644 index 00000000..add92fee --- /dev/null +++ b/test/webrick/webrick_bare_lf.cgi @@ -0,0 +1,8 @@ +#!ruby + +body = "test for bare LF in cgi header" + +print "Content-Type: text/plain\n" +print "Content-Length: #{body.size}\n" +print "\n" +print body From ee1d7ba58326071eabc5fb593f1cb805fd69c519 Mon Sep 17 00:00:00 2001 From: "hisanori takahashi (nyan)" Date: Tue, 25 Mar 2025 11:40:15 +0900 Subject: [PATCH 2/8] Update lib/webrick/httputils.rb use keyword argument Co-authored-by: Jeremy Evans --- lib/webrick/httputils.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/webrick/httputils.rb b/lib/webrick/httputils.rb index 4ebcd5db..d4b83ab6 100644 --- a/lib/webrick/httputils.rb +++ b/lib/webrick/httputils.rb @@ -168,7 +168,7 @@ def join(separator = "; ") "cookie" => CookieHeader, }) - def parse_header(raw, cgi_mode = false) + def parse_header(raw, cgi_mode: false) header = Hash.new([].freeze) field = nil From 84e865a42ea1a4a4a6a9ef5c0d136f2568333bbb Mon Sep 17 00:00:00 2001 From: "hisanori takahashi (nyan)" Date: Tue, 25 Mar 2025 11:40:27 +0900 Subject: [PATCH 3/8] Update lib/webrick/httpservlet/cgihandler.rb use keyword argument Co-authored-by: Jeremy Evans --- lib/webrick/httpservlet/cgihandler.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/webrick/httpservlet/cgihandler.rb b/lib/webrick/httpservlet/cgihandler.rb index efb76e0a..bb85512f 100644 --- a/lib/webrick/httpservlet/cgihandler.rb +++ b/lib/webrick/httpservlet/cgihandler.rb @@ -96,7 +96,7 @@ def do_GET(req, res) "Premature end of script headers: #{@script_filename}" if body.nil? begin - header = HTTPUtils::parse_header(raw_header, true) + header = HTTPUtils::parse_header(raw_header, cgi_mode: true) if /^(\d+)/ =~ header['status'][0] res.status = $1.to_i header.delete('status') From 4ab4c2d209c130bdca8d8aa9acaced4e62bbf370 Mon Sep 17 00:00:00 2001 From: hisanori Date: Tue, 25 Mar 2025 19:01:55 +0900 Subject: [PATCH 4/8] fix rbs --- sig/httputils.rbs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sig/httputils.rbs b/sig/httputils.rbs index 546675ae..864f41da 100644 --- a/sig/httputils.rbs +++ b/sig/httputils.rbs @@ -26,7 +26,7 @@ module WEBrick HEADER_CLASSES: Hash[String, untyped] - def self?.parse_header: (String raw, ?bool cgi_mode) -> Hash[String, Array[String]] + def self?.parse_header: (String raw, ?cgi_mode: bool) -> Hash[String, Array[String]] def self?.split_header_value: (String str) -> Array[String] From efa1987e16e1e484daab8c880196fbf43f6b8779 Mon Sep 17 00:00:00 2001 From: hisanori Date: Tue, 25 Mar 2025 20:41:42 +0900 Subject: [PATCH 5/8] use constants for parsing headers --- lib/webrick/httputils.rb | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/lib/webrick/httputils.rb b/lib/webrick/httputils.rb index d4b83ab6..3972e26a 100644 --- a/lib/webrick/httputils.rb +++ b/lib/webrick/httputils.rb @@ -168,13 +168,17 @@ def join(separator = "; ") "cookie" => CookieHeader, }) + REGEXP_HEADER_LINE = /^([A-Za-z0-9!\#$%&'*+\-.^_`|~]+):([^\r\n\0]*?)\r\n\z/m + REGEXP_CGI_HEADER_LINE = /^([A-Za-z0-9!\#$%&'*+\-.^_`|~]+):([^\r\n\0]*?)\r?\n\z/m + REGEXP_CONTINUED_HEADER_LINE = /^[ \t]+([^\r\n\0]*?)\r\n/m + REGEXP_CONTINUED_CGI_HEADER_LINE = /^[ \t]+([^\r\n\0]*?)\r?\n/m + def parse_header(raw, cgi_mode: false) header = Hash.new([].freeze) field = nil - line_break = cgi_mode ? "\\r?\\n" : "\\r\\n" - header_line = Regexp.new(/^([A-Za-z0-9!\#$%&'*+\-.^_`|~]+):([^\r\n\0]*?)#{line_break}\z/m) - continued_header_lines = Regexp.new(/^[ \t]+([^\r\n\0]*?)#{line_break}/m) + header_line = cgi_mode ? REGEXP_CGI_HEADER_LINE : REGEXP_HEADER_LINE + continued_header_lines = cgi_mode ? REGEXP_CONTINUED_CGI_HEADER_LINE : REGEXP_CONTINUED_HEADER_LINE raw.each_line{|line| case line From 32887fd4645e14bab5d2f200a17aebd701c8cfc3 Mon Sep 17 00:00:00 2001 From: hisanori Date: Fri, 28 Mar 2025 11:23:11 +0900 Subject: [PATCH 6/8] Updated the comment for parse_header --- lib/webrick/httputils.rb | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/webrick/httputils.rb b/lib/webrick/httputils.rb index 3972e26a..111dd397 100644 --- a/lib/webrick/httputils.rb +++ b/lib/webrick/httputils.rb @@ -148,10 +148,6 @@ def mime_type(filename, mime_tab) end module_function :mime_type - ## - # Parses an HTTP header +raw+ into a hash of header fields with an Array - # of values. - class SplitHeader < Array def join(separator = ", ") super @@ -173,6 +169,11 @@ def join(separator = "; ") REGEXP_CONTINUED_HEADER_LINE = /^[ \t]+([^\r\n\0]*?)\r\n/m REGEXP_CONTINUED_CGI_HEADER_LINE = /^[ \t]+([^\r\n\0]*?)\r?\n/m + ## + # Parses an HTTP header +raw+ into a hash of header fields with an Array + # of values. The header is expected to end with \r\n. If the header is in CGI + # format, set +cgi_mode+ to true to allow for a single \n line ending. + def parse_header(raw, cgi_mode: false) header = Hash.new([].freeze) field = nil From b6dd139cebed0606be9547cc05bb0f865f3d298a Mon Sep 17 00:00:00 2001 From: hisanori Date: Sun, 30 Mar 2025 12:18:36 +0900 Subject: [PATCH 7/8] move CGI-specific constants to CGIHandler --- lib/webrick/httpservlet/cgihandler.rb | 3 +++ lib/webrick/httputils.rb | 8 ++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/lib/webrick/httpservlet/cgihandler.rb b/lib/webrick/httpservlet/cgihandler.rb index bb85512f..97159503 100644 --- a/lib/webrick/httpservlet/cgihandler.rb +++ b/lib/webrick/httpservlet/cgihandler.rb @@ -119,6 +119,9 @@ def do_GET(req, res) end alias do_POST do_GET + REGEXP_CGI_HEADER_LINE = /^([A-Za-z0-9!\#$%&'*+\-.^_`|~]+):([^\r\n\0]*?)\r?\n\z/m + REGEXP_CONTINUED_CGI_HEADER_LINE = /^[ \t]+([^\r\n\0]*?)\r?\n/m + # :startdoc: end diff --git a/lib/webrick/httputils.rb b/lib/webrick/httputils.rb index 111dd397..d743b3b1 100644 --- a/lib/webrick/httputils.rb +++ b/lib/webrick/httputils.rb @@ -165,9 +165,7 @@ def join(separator = "; ") }) REGEXP_HEADER_LINE = /^([A-Za-z0-9!\#$%&'*+\-.^_`|~]+):([^\r\n\0]*?)\r\n\z/m - REGEXP_CGI_HEADER_LINE = /^([A-Za-z0-9!\#$%&'*+\-.^_`|~]+):([^\r\n\0]*?)\r?\n\z/m REGEXP_CONTINUED_HEADER_LINE = /^[ \t]+([^\r\n\0]*?)\r\n/m - REGEXP_CONTINUED_CGI_HEADER_LINE = /^[ \t]+([^\r\n\0]*?)\r?\n/m ## # Parses an HTTP header +raw+ into a hash of header fields with an Array @@ -178,8 +176,10 @@ def parse_header(raw, cgi_mode: false) header = Hash.new([].freeze) field = nil - header_line = cgi_mode ? REGEXP_CGI_HEADER_LINE : REGEXP_HEADER_LINE - continued_header_lines = cgi_mode ? REGEXP_CONTINUED_CGI_HEADER_LINE : REGEXP_CONTINUED_HEADER_LINE + header_line = cgi_mode ? WEBrick::HTTPServlet::CGIHandler::REGEXP_CGI_HEADER_LINE + : REGEXP_HEADER_LINE + continued_header_lines = cgi_mode ? WEBrick::HTTPServlet::CGIHandler::REGEXP_CONTINUED_CGI_HEADER_LINE + : REGEXP_CONTINUED_HEADER_LINE raw.each_line{|line| case line From b7b3d613e12ed9999bb379d1872d5871ef90220d Mon Sep 17 00:00:00 2001 From: hisanori Date: Sun, 30 Mar 2025 13:39:20 +0900 Subject: [PATCH 8/8] pass regexp to parse_header instead of cgi_mode --- lib/webrick/httpservlet/cgihandler.rb | 5 ++++- lib/webrick/httputils.rb | 15 ++++++--------- sig/httputils.rbs | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/lib/webrick/httpservlet/cgihandler.rb b/lib/webrick/httpservlet/cgihandler.rb index 97159503..873ddd59 100644 --- a/lib/webrick/httpservlet/cgihandler.rb +++ b/lib/webrick/httpservlet/cgihandler.rb @@ -96,7 +96,9 @@ def do_GET(req, res) "Premature end of script headers: #{@script_filename}" if body.nil? begin - header = HTTPUtils::parse_header(raw_header, cgi_mode: true) + header = HTTPUtils::parse_header(raw_header, + header_line_regexp: REGEXP_CGI_HEADER_LINE , + continued_header_line_regexp: REGEXP_CONTINUED_CGI_HEADER_LINE) if /^(\d+)/ =~ header['status'][0] res.status = $1.to_i header.delete('status') @@ -121,6 +123,7 @@ def do_GET(req, res) REGEXP_CGI_HEADER_LINE = /^([A-Za-z0-9!\#$%&'*+\-.^_`|~]+):([^\r\n\0]*?)\r?\n\z/m REGEXP_CONTINUED_CGI_HEADER_LINE = /^[ \t]+([^\r\n\0]*?)\r?\n/m + private_constant :REGEXP_CGI_HEADER_LINE, :REGEXP_CONTINUED_CGI_HEADER_LINE # :startdoc: end diff --git a/lib/webrick/httputils.rb b/lib/webrick/httputils.rb index d743b3b1..53c9ba2a 100644 --- a/lib/webrick/httputils.rb +++ b/lib/webrick/httputils.rb @@ -170,25 +170,22 @@ def join(separator = "; ") ## # Parses an HTTP header +raw+ into a hash of header fields with an Array # of values. The header is expected to end with \r\n. If the header is in CGI - # format, set +cgi_mode+ to true to allow for a single \n line ending. + # format, use +header_line_regexp+ and +continued_header_line_regexp+ to + # parse the header lines. The default values are REGEXP_HEADER_LINE and + # REGEXP_CONTINUED_HEADER_LINE. - def parse_header(raw, cgi_mode: false) + def parse_header(raw, header_line_regexp: REGEXP_HEADER_LINE, continued_header_line_regexp: REGEXP_CONTINUED_HEADER_LINE) header = Hash.new([].freeze) field = nil - header_line = cgi_mode ? WEBrick::HTTPServlet::CGIHandler::REGEXP_CGI_HEADER_LINE - : REGEXP_HEADER_LINE - continued_header_lines = cgi_mode ? WEBrick::HTTPServlet::CGIHandler::REGEXP_CONTINUED_CGI_HEADER_LINE - : REGEXP_CONTINUED_HEADER_LINE - raw.each_line{|line| case line - when header_line + when header_line_regexp field, value = $1, $2 field.downcase! header[field] = HEADER_CLASSES[field].new unless header.has_key?(field) header[field] << value - when continued_header_lines + when continued_header_line_regexp unless field raise HTTPStatus::BadRequest, "bad header '#{line}'." end diff --git a/sig/httputils.rbs b/sig/httputils.rbs index 864f41da..1c69cd49 100644 --- a/sig/httputils.rbs +++ b/sig/httputils.rbs @@ -26,7 +26,7 @@ module WEBrick HEADER_CLASSES: Hash[String, untyped] - def self?.parse_header: (String raw, ?cgi_mode: bool) -> Hash[String, Array[String]] + def self?.parse_header: (String raw, ?header_line_regexp: Regexp, ?continued_header_line_regexp: Regexp) -> Hash[String, Array[String]] def self?.split_header_value: (String str) -> Array[String]