From b9620c26a0ceaa23d2d7876718586f969f01168f Mon Sep 17 00:00:00 2001 From: martinhsv <55407942+martinhsv@users.noreply.github.com> Date: Mon, 29 Jun 2020 06:13:45 -0700 Subject: [PATCH] rx:exit after full match; fix TX population after unused group --- CHANGES | 3 + src/operators/rx.cc | 23 ++-- src/utils/regex.cc | 24 +++- src/utils/regex.h | 12 ++ test/test-cases/regression/variable-TX.json | 138 ++++++++++++++++++++ 5 files changed, 184 insertions(+), 16 deletions(-) diff --git a/CHANGES b/CHANGES index 1ce3452f98..f99378629a 100644 --- a/CHANGES +++ b/CHANGES @@ -1,6 +1,9 @@ v3.x.y - YYYY-MMM-DD (to be released) ------------------------------------- + - rx: exit after full match (remove /g emulation); ensure capture + groups occuring after unused groups still populate TX vars + [Issue #2336 - @martinhsv] - Correct CHANGES file entry for #2234 - Add support to test framework for audit log content verification and add regression tests for issues #2000, #2196 diff --git a/src/operators/rx.cc b/src/operators/rx.cc index 0ba983d73f..b4fc6ff4d7 100644 --- a/src/operators/rx.cc +++ b/src/operators/rx.cc @@ -38,7 +38,6 @@ bool Rx::init(const std::string &arg, std::string *error) { bool Rx::evaluate(Transaction *transaction, RuleWithActions *rule, const std::string& input, std::shared_ptr ruleMessage) { - std::list matches; Regex *re; if (m_param.empty() && !m_string->m_containsMacro) { @@ -52,29 +51,29 @@ bool Rx::evaluate(Transaction *transaction, RuleWithActions *rule, re = m_re; } - matches = re->searchAll(input); + std::vector captures; + re->searchOneMatch(input, captures); + if (rule && rule->hasCaptureAction() && transaction) { - int i = 0; - matches.reverse(); - for (const SMatch& a : matches) { + for (const Utils::SMatchCapture& capture : captures) { + const std::string capture_substring(input.substr(capture.m_offset,capture.m_length)); transaction->m_collections.m_tx_collection->storeOrUpdateFirst( - std::to_string(i), a.str()); + std::to_string(capture.m_group), capture_substring); ms_dbg_a(transaction, 7, "Added regex subexpression TX." + - std::to_string(i) + ": " + a.str()); - transaction->m_matched.push_back(a.str()); - i++; + std::to_string(capture.m_group) + ": " + capture_substring); + transaction->m_matched.push_back(capture_substring); } } - for (const auto & i : matches) { - logOffset(ruleMessage, i.offset(), i.str().size()); + for (const auto & capture : captures) { + logOffset(ruleMessage, capture.m_offset, capture.m_length); } if (m_string->m_containsMacro) { delete re; } - if (matches.size() > 0) { + if (captures.size() > 0) { return true; } diff --git a/src/utils/regex.cc b/src/utils/regex.cc index be56e378a0..0feb256cca 100644 --- a/src/utils/regex.cc +++ b/src/utils/regex.cc @@ -16,10 +16,6 @@ #include "src/utils/regex.h" #include -#include -#include -#include -#include #include #include @@ -99,6 +95,26 @@ std::list Regex::searchAll(const std::string& s) const { return retList; } +bool Regex::searchOneMatch(const std::string& s, std::vector& captures) const { + const char *subject = s.c_str(); + int ovector[OVECCOUNT]; + + int rc = pcre_exec(m_pc, m_pce, subject, s.size(), 0, 0, ovector, OVECCOUNT); + + for (int i = 0; i < rc; i++) { + size_t start = ovector[2*i]; + size_t end = ovector[2*i+1]; + size_t len = end - start; + if (end > s.size()) { + continue; + } + SMatchCapture capture(i, start, len); + captures.push_back(capture); + } + + return (rc > 0); +} + int Regex::search(const std::string& s, SMatch *match) const { int ovector[OVECCOUNT]; int ret = pcre_exec(m_pc, m_pce, s.c_str(), diff --git a/src/utils/regex.h b/src/utils/regex.h index 7dcc4dbf61..46dab6b83e 100644 --- a/src/utils/regex.h +++ b/src/utils/regex.h @@ -19,6 +19,7 @@ #include #include #include +#include #ifndef SRC_UTILS_REGEX_H_ #define SRC_UTILS_REGEX_H_ @@ -47,6 +48,16 @@ class SMatch { size_t m_offset; }; +struct SMatchCapture { + SMatchCapture(size_t group, size_t offset, size_t length) : + m_group(group), + m_offset(offset), + m_length(length) { } + + size_t m_group; // E.g. 0 = full match; 6 = capture group 6 + size_t m_offset; // offset of match within the analyzed string + size_t m_length; +}; class Regex { public: @@ -58,6 +69,7 @@ class Regex { Regex& operator=(const Regex&) = delete; std::list searchAll(const std::string& s) const; + bool searchOneMatch(const std::string& s, std::vector& captures) const; int search(const std::string &s, SMatch *match) const; int search(const std::string &s) const; diff --git a/test/test-cases/regression/variable-TX.json b/test/test-cases/regression/variable-TX.json index 0cd45381b4..904628e9b7 100644 --- a/test/test-cases/regression/variable-TX.json +++ b/test/test-cases/regression/variable-TX.json @@ -80,5 +80,143 @@ "SecRule REQUEST_HEADERS \"@rx ([A-z]+)\" \"id:1,log,pass,capture,id:14\"", "SecRule TX:0 \"@rx ([A-z]+)\" \"id:15\"" ] + }, + { + "enabled":1, + "version_min":300000, + "title":"Testing Variables :: capture group match after unused group", + "client":{ + "ip":"200.249.12.31", + "port":123 + }, + "server":{ + "ip":"200.249.12.31", + "port":80 + }, + "request":{ + "uri":"/?key=aadd", + "method":"GET" + }, + "response":{ + "headers":{ + "Date":"Mon, 13 Jul 2015 20:02:41 GMT", + "Last-Modified":"Sun, 26 Oct 2014 22:33:37 GMT", + "Content-Type":"text/html" + }, + "body":[ + "no need." + ] + }, + "expected":{ + "debug_log":"Added regex subexpression TX\\.3: dd[\\s\\S]*Target value: \"dd\" \\(Variable\\: TX\\:3[\\s\\S]*Rule returned 1" + }, + "rules":[ + "SecRuleEngine On", + "SecRule ARGS \"@rx (aa)(bb|cc)?(dd)\" \"id:1,log,pass,capture,id:16\"", + "SecRule TX:3 \"@streq dd\" \"id:19,phase:2,log,pass\"" + ] + }, + { + "enabled":1, + "version_min":300000, + "title":"Testing Variables :: empty capture group match followed by nonempty capture group", + "client":{ + "ip":"200.249.12.31", + "port":123 + }, + "server":{ + "ip":"200.249.12.31", + "port":80 + }, + "request":{ + "uri":"/?key=aadd", + "method":"GET" + }, + "response":{ + "headers":{ + "Date":"Mon, 13 Jul 2015 20:02:41 GMT", + "Last-Modified":"Sun, 26 Oct 2014 22:33:37 GMT", + "Content-Type":"text/html" + }, + "body":[ + "no need." + ] + }, + "expected":{ + "debug_log":"Added regex subexpression TX\\.3: dd[\\s\\S]*Target value: \"dd\" \\(Variable\\: TX\\:3[\\s\\S]*Rule returned 1" + }, + "rules":[ + "SecRuleEngine On", + "SecRule ARGS \"@rx (aa)(bb|cc|)(dd)\" \"id:18,phase:1,log,pass,capture\"", + "SecRule TX:3 \"@streq dd\" \"id:19,phase:2,log,pass\"" + ] + }, + { + "enabled":1, + "version_min":300000, + "title":"Testing Variables :: repeating capture group -- alternates", + "client":{ + "ip":"200.249.12.31", + "port":123 + }, + "server":{ + "ip":"200.249.12.31", + "port":80 + }, + "request":{ + "uri":"/?key=_abc123_", + "method":"GET" + }, + "response":{ + "headers":{ + "Date":"Mon, 13 Jul 2015 20:02:41 GMT", + "Last-Modified":"Sun, 26 Oct 2014 22:33:37 GMT", + "Content-Type":"text/html" + }, + "body":[ + "no need." + ] + }, + "expected":{ + "debug_log":"Added regex subexpression TX\\.2: abc[\\s\\S]*Added regex subexpression TX\\.3: 123" + }, + "rules":[ + "SecRuleEngine On", + "SecRule ARGS \"@rx _((?:(abc)|(123))+)_\" \"id:18,phase:1,log,pass,capture\"" + ] + }, + { + "enabled":1, + "version_min":300000, + "title":"Testing Variables :: repeating capture group -- same (nested)", + "client":{ + "ip":"200.249.12.31", + "port":123 + }, + "server":{ + "ip":"200.249.12.31", + "port":80 + }, + "request":{ + "uri":"/?key=a:5a:8a:9", + "method":"GET" + }, + "response":{ + "headers":{ + "Date":"Mon, 13 Jul 2015 20:02:41 GMT", + "Last-Modified":"Sun, 26 Oct 2014 22:33:37 GMT", + "Content-Type":"text/html" + }, + "body":[ + "no need." + ] + }, + "expected":{ + "debug_log":"Added regex subexpression TX\\.1: 5[\\s\\S]*Added regex subexpression TX\\.2: 8[\\s\\S]*Added regex subexpression TX\\.3: 9" + }, + "rules":[ + "SecRuleEngine On", + "SecRule ARGS \"@rx a:([0-9])(?:a:([0-9])(?:a:([0-9]))*)*\" \"id:18,phase:1,log,pass,capture\"" + ] } ]