Skip to content

rx:exit after full match; fix TX population after unused group #2348

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 6, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
v3.x.y - YYYY-MMM-DD (to be released)
-------------------------------------

- rx: exit after full match (remove /g emulation); ensure capture
groups occuring after unused groups still populate TX vars
[Issue #2336 - @martinhsv]
- Correct CHANGES file entry for #2234
- Add support to test framework for audit log content verification
and add regression tests for issues #2000, #2196
Expand Down
23 changes: 11 additions & 12 deletions src/operators/rx.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ bool Rx::init(const std::string &arg, std::string *error) {

bool Rx::evaluate(Transaction *transaction, RuleWithActions *rule,
const std::string& input, std::shared_ptr<RuleMessage> ruleMessage) {
std::list<SMatch> matches;
Regex *re;

if (m_param.empty() && !m_string->m_containsMacro) {
Expand All @@ -52,29 +51,29 @@ bool Rx::evaluate(Transaction *transaction, RuleWithActions *rule,
re = m_re;
}

matches = re->searchAll(input);
std::vector<Utils::SMatchCapture> captures;
re->searchOneMatch(input, captures);

if (rule && rule->hasCaptureAction() && transaction) {
int i = 0;
matches.reverse();
for (const SMatch& a : matches) {
for (const Utils::SMatchCapture& capture : captures) {
const std::string capture_substring(input.substr(capture.m_offset,capture.m_length));
transaction->m_collections.m_tx_collection->storeOrUpdateFirst(
std::to_string(i), a.str());
std::to_string(capture.m_group), capture_substring);
ms_dbg_a(transaction, 7, "Added regex subexpression TX." +
std::to_string(i) + ": " + a.str());
transaction->m_matched.push_back(a.str());
i++;
std::to_string(capture.m_group) + ": " + capture_substring);
transaction->m_matched.push_back(capture_substring);
}
}

for (const auto & i : matches) {
logOffset(ruleMessage, i.offset(), i.str().size());
for (const auto & capture : captures) {
logOffset(ruleMessage, capture.m_offset, capture.m_length);
}

if (m_string->m_containsMacro) {
delete re;
}

if (matches.size() > 0) {
if (captures.size() > 0) {
return true;
}

Expand Down
24 changes: 20 additions & 4 deletions src/utils/regex.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,6 @@
#include "src/utils/regex.h"

#include <pcre.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <string>
#include <list>

Expand Down Expand Up @@ -99,6 +95,26 @@ std::list<SMatch> Regex::searchAll(const std::string& s) const {
return retList;
}

bool Regex::searchOneMatch(const std::string& s, std::vector<SMatchCapture>& captures) const {
const char *subject = s.c_str();
int ovector[OVECCOUNT];

int rc = pcre_exec(m_pc, m_pce, subject, s.size(), 0, 0, ovector, OVECCOUNT);

for (int i = 0; i < rc; i++) {
size_t start = ovector[2*i];
size_t end = ovector[2*i+1];
size_t len = end - start;
if (end > s.size()) {
continue;
}
SMatchCapture capture(i, start, len);
captures.push_back(capture);
}

return (rc > 0);
}

int Regex::search(const std::string& s, SMatch *match) const {
int ovector[OVECCOUNT];
int ret = pcre_exec(m_pc, m_pce, s.c_str(),
Expand Down
12 changes: 12 additions & 0 deletions src/utils/regex.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <fstream>
#include <string>
#include <list>
#include <vector>

#ifndef SRC_UTILS_REGEX_H_
#define SRC_UTILS_REGEX_H_
Expand Down Expand Up @@ -47,6 +48,16 @@ class SMatch {
size_t m_offset;
};

struct SMatchCapture {
SMatchCapture(size_t group, size_t offset, size_t length) :
m_group(group),
m_offset(offset),
m_length(length) { }

size_t m_group; // E.g. 0 = full match; 6 = capture group 6
size_t m_offset; // offset of match within the analyzed string
size_t m_length;
};

class Regex {
public:
Expand All @@ -58,6 +69,7 @@ class Regex {
Regex& operator=(const Regex&) = delete;

std::list<SMatch> searchAll(const std::string& s) const;
bool searchOneMatch(const std::string& s, std::vector<SMatchCapture>& captures) const;
int search(const std::string &s, SMatch *match) const;
int search(const std::string &s) const;

Expand Down
138 changes: 138 additions & 0 deletions test/test-cases/regression/variable-TX.json
Original file line number Diff line number Diff line change
Expand Up @@ -80,5 +80,143 @@
"SecRule REQUEST_HEADERS \"@rx ([A-z]+)\" \"id:1,log,pass,capture,id:14\"",
"SecRule TX:0 \"@rx ([A-z]+)\" \"id:15\""
]
},
{
"enabled":1,
"version_min":300000,
"title":"Testing Variables :: capture group match after unused group",
"client":{
"ip":"200.249.12.31",
"port":123
},
"server":{
"ip":"200.249.12.31",
"port":80
},
"request":{
"uri":"/?key=aadd",
"method":"GET"
},
"response":{
"headers":{
"Date":"Mon, 13 Jul 2015 20:02:41 GMT",
"Last-Modified":"Sun, 26 Oct 2014 22:33:37 GMT",
"Content-Type":"text/html"
},
"body":[
"no need."
]
},
"expected":{
"debug_log":"Added regex subexpression TX\\.3: dd[\\s\\S]*Target value: \"dd\" \\(Variable\\: TX\\:3[\\s\\S]*Rule returned 1"
},
"rules":[
"SecRuleEngine On",
"SecRule ARGS \"@rx (aa)(bb|cc)?(dd)\" \"id:1,log,pass,capture,id:16\"",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a quick note: I assume this is just a typo, but surprising that a rule has two (different) id, and the engine allows that.

Also wanted to check the regression tests result, but there isn't any CI output. Did you merge this patch without that?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes there should not be two instances of id. I believe that happened because the preexisting tests in the file likewise had that, and I cut-and-pasted from there as a starting point. Obviously an oversight on my part that I did not notice it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and I cut-and-pasted from there as a starting point - sure, it's no problem, we're humans :).

But why the libmodsecurity3 allows this? And where it the regression test output?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All the regression tests are available on travis-ci. There is a build for every commit. The history is available here: https://travis-ci.org/github/SpiderLabs/ModSecurity. That link is also available on the project description.

The checking is disabled on v3/master as a consequence of the development towards 3.1 as described here:
v3.1 Project

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks.

"SecRule TX:3 \"@streq dd\" \"id:19,phase:2,log,pass\""
]
},
{
"enabled":1,
"version_min":300000,
"title":"Testing Variables :: empty capture group match followed by nonempty capture group",
"client":{
"ip":"200.249.12.31",
"port":123
},
"server":{
"ip":"200.249.12.31",
"port":80
},
"request":{
"uri":"/?key=aadd",
"method":"GET"
},
"response":{
"headers":{
"Date":"Mon, 13 Jul 2015 20:02:41 GMT",
"Last-Modified":"Sun, 26 Oct 2014 22:33:37 GMT",
"Content-Type":"text/html"
},
"body":[
"no need."
]
},
"expected":{
"debug_log":"Added regex subexpression TX\\.3: dd[\\s\\S]*Target value: \"dd\" \\(Variable\\: TX\\:3[\\s\\S]*Rule returned 1"
},
"rules":[
"SecRuleEngine On",
"SecRule ARGS \"@rx (aa)(bb|cc|)(dd)\" \"id:18,phase:1,log,pass,capture\"",
"SecRule TX:3 \"@streq dd\" \"id:19,phase:2,log,pass\""
]
},
{
"enabled":1,
"version_min":300000,
"title":"Testing Variables :: repeating capture group -- alternates",
"client":{
"ip":"200.249.12.31",
"port":123
},
"server":{
"ip":"200.249.12.31",
"port":80
},
"request":{
"uri":"/?key=_abc123_",
"method":"GET"
},
"response":{
"headers":{
"Date":"Mon, 13 Jul 2015 20:02:41 GMT",
"Last-Modified":"Sun, 26 Oct 2014 22:33:37 GMT",
"Content-Type":"text/html"
},
"body":[
"no need."
]
},
"expected":{
"debug_log":"Added regex subexpression TX\\.2: abc[\\s\\S]*Added regex subexpression TX\\.3: 123"
},
"rules":[
"SecRuleEngine On",
"SecRule ARGS \"@rx _((?:(abc)|(123))+)_\" \"id:18,phase:1,log,pass,capture\""
]
},
{
"enabled":1,
"version_min":300000,
"title":"Testing Variables :: repeating capture group -- same (nested)",
"client":{
"ip":"200.249.12.31",
"port":123
},
"server":{
"ip":"200.249.12.31",
"port":80
},
"request":{
"uri":"/?key=a:5a:8a:9",
"method":"GET"
},
"response":{
"headers":{
"Date":"Mon, 13 Jul 2015 20:02:41 GMT",
"Last-Modified":"Sun, 26 Oct 2014 22:33:37 GMT",
"Content-Type":"text/html"
},
"body":[
"no need."
]
},
"expected":{
"debug_log":"Added regex subexpression TX\\.1: 5[\\s\\S]*Added regex subexpression TX\\.2: 8[\\s\\S]*Added regex subexpression TX\\.3: 9"
},
"rules":[
"SecRuleEngine On",
"SecRule ARGS \"@rx a:([0-9])(?:a:([0-9])(?:a:([0-9]))*)*\" \"id:18,phase:1,log,pass,capture\""
]
}
]