-
Notifications
You must be signed in to change notification settings - Fork 1.7k
Add support for new operator rxGlobal #2396
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
/* | ||
* ModSecurity, http://www.modsecurity.org/ | ||
* Copyright (c) 2020 Trustwave Holdings, Inc. (http://www.trustwave.com/) | ||
* | ||
* You may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* If any of the files related to licensing are missing or if you have any | ||
* other questions related to licensing please contact Trustwave Holdings, Inc. | ||
* directly using the email address [email protected]. | ||
* | ||
*/ | ||
|
||
#include "src/operators/rx_global.h" | ||
|
||
#include <string> | ||
#include <list> | ||
#include <memory> | ||
|
||
#include "src/operators/operator.h" | ||
#include "modsecurity/rule.h" | ||
#include "modsecurity/rule_message.h" | ||
|
||
namespace modsecurity { | ||
namespace operators { | ||
|
||
|
||
bool RxGlobal::init(const std::string &arg, std::string *error) { | ||
if (m_string->m_containsMacro == false) { | ||
m_re = new Regex(m_param); | ||
} | ||
|
||
return true; | ||
} | ||
|
||
|
||
bool RxGlobal::evaluate(Transaction *transaction, RuleWithActions *rule, | ||
const std::string& input, std::shared_ptr<RuleMessage> ruleMessage) { | ||
Regex *re; | ||
|
||
if (m_param.empty() && !m_string->m_containsMacro) { | ||
return true; | ||
} | ||
|
||
if (m_string->m_containsMacro) { | ||
std::string eparam(m_string->evaluate(transaction)); | ||
re = new Regex(eparam); | ||
} else { | ||
re = m_re; | ||
} | ||
|
||
std::vector<Utils::SMatchCapture> captures; | ||
re->searchGlobal(input, captures); | ||
|
||
if (rule && rule->hasCaptureAction() && transaction) { | ||
for (const Utils::SMatchCapture& capture : captures) { | ||
const std::string capture_substring(input.substr(capture.m_offset,capture.m_length)); | ||
transaction->m_collections.m_tx_collection->storeOrUpdateFirst( | ||
std::to_string(capture.m_group), capture_substring); | ||
ms_dbg_a(transaction, 7, "Added regex subexpression TX." + | ||
std::to_string(capture.m_group) + ": " + capture_substring); | ||
transaction->m_matched.push_back(capture_substring); | ||
} | ||
} | ||
|
||
for (const auto & capture : captures) { | ||
logOffset(ruleMessage, capture.m_offset, capture.m_length); | ||
} | ||
|
||
if (m_string->m_containsMacro) { | ||
delete re; | ||
} | ||
|
||
if (captures.size() > 0) { | ||
return true; | ||
} | ||
|
||
return false; | ||
} | ||
|
||
|
||
} // namespace operators | ||
} // namespace modsecurity |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
/* | ||
* ModSecurity, http://www.modsecurity.org/ | ||
* Copyright (c) 2020 Trustwave Holdings, Inc. (http://www.trustwave.com/) | ||
* | ||
* You may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* If any of the files related to licensing are missing or if you have any | ||
* other questions related to licensing please contact Trustwave Holdings, Inc. | ||
* directly using the email address [email protected]. | ||
* | ||
*/ | ||
|
||
#ifndef SRC_OPERATORS_RX_GLOBAL_H_ | ||
#define SRC_OPERATORS_RX_GLOBAL_H_ | ||
|
||
#include <string> | ||
//#include <list> | ||
#include <memory> | ||
#include <utility> | ||
|
||
#include "src/operators/operator.h" | ||
#include "src/utils/regex.h" | ||
|
||
|
||
namespace modsecurity { | ||
using Utils::SMatch; | ||
using Utils::regex_search; | ||
using Utils::Regex; | ||
|
||
namespace operators { | ||
|
||
|
||
class RxGlobal : public Operator { | ||
public: | ||
/** @ingroup ModSecurity_Operator */ | ||
explicit RxGlobal(std::unique_ptr<RunTimeString> param) | ||
: m_re(nullptr), | ||
Operator("RxGlobal", std::move(param)) { | ||
m_couldContainsMacro = true; | ||
} | ||
|
||
~RxGlobal() { | ||
if (m_string->m_containsMacro == false && m_re != NULL) { | ||
delete m_re; | ||
m_re = NULL; | ||
} | ||
} | ||
|
||
bool evaluate(Transaction *transaction, RuleWithActions *rule, | ||
const std::string& input, | ||
std::shared_ptr<RuleMessage> ruleMessage) override; | ||
|
||
bool init(const std::string &arg, std::string *error) override; | ||
|
||
private: | ||
Regex *m_re; | ||
}; | ||
|
||
|
||
} // namespace operators | ||
} // namespace modsecurity | ||
|
||
|
||
#endif // SRC_OPERATORS_RX_GLOBAL_H_ |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -33,6 +33,24 @@ | |
namespace modsecurity { | ||
namespace Utils { | ||
|
||
// Helper function to tell us if the current config indicates CRLF is a valid newline sequence | ||
bool crlfIsNewline() { | ||
int d = 0; | ||
pcre_config(PCRE_CONFIG_NEWLINE, &d); | ||
|
||
unsigned int option_bits = (d == 13)? PCRE_NEWLINE_CR : | ||
(d == 10)? PCRE_NEWLINE_LF : | ||
(d == (13<<8 | 10))? PCRE_NEWLINE_CRLF : | ||
(d == -2)? PCRE_NEWLINE_ANYCRLF : | ||
(d == -1)? PCRE_NEWLINE_ANY : 0; | ||
|
||
bool crlf_is_newline = | ||
option_bits == PCRE_NEWLINE_ANY || | ||
option_bits == PCRE_NEWLINE_CRLF || | ||
option_bits == PCRE_NEWLINE_ANYCRLF; | ||
|
||
return crlf_is_newline; | ||
} | ||
|
||
Regex::Regex(const std::string& pattern_) | ||
: pattern(pattern_.empty() ? ".*" : pattern_) { | ||
|
@@ -115,6 +133,66 @@ bool Regex::searchOneMatch(const std::string& s, std::vector<SMatchCapture>& cap | |
return (rc > 0); | ||
} | ||
|
||
bool Regex::searchGlobal(const std::string& s, std::vector<SMatchCapture>& captures) const { | ||
const char *subject = s.c_str(); | ||
|
||
bool prev_match_zero_length = false; | ||
int pcre_options = 0; | ||
int startOffset = 0; | ||
|
||
while (startOffset <= s.length()) { | ||
int ovector[OVECCOUNT]; | ||
if (prev_match_zero_length) { | ||
pcre_options = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED; | ||
} else { | ||
pcre_options = 0; // common case | ||
} | ||
int rc = pcre_exec(m_pc, m_pce, subject, s.length(), startOffset, pcre_options, ovector, OVECCOUNT); | ||
|
||
if (rc > 0) { | ||
size_t firstGroupForThisFullMatch = captures.size(); | ||
for (int i = 0; i < rc; i++) { | ||
size_t start = ovector[2*i]; | ||
size_t end = ovector[2*i+1]; | ||
size_t len = end - start; | ||
if (end > s.length()) { | ||
continue; | ||
} | ||
SMatchCapture capture(firstGroupForThisFullMatch + i, start, len); | ||
captures.push_back(capture); | ||
|
||
if (i == 0) { | ||
if (len > 0) { | ||
// normal case; next call to pcre_exec should start after the end of the last full match string | ||
startOffset = end; | ||
prev_match_zero_length = false; | ||
} else { | ||
// zero-length match; modify next match attempt to avoid infinite loop | ||
prev_match_zero_length = true; | ||
} | ||
} | ||
} | ||
} else { | ||
if (prev_match_zero_length) { | ||
// The n-1 search found a zero-length match, so we did a subsequent search | ||
// with the special flags. That subsequent exec did not find a match, so now advance | ||
// by one character (unless CRLF, then advance by two) | ||
startOffset++; | ||
if (crlfIsNewline() && (startOffset < s.length()) && (s[startOffset-1] == '\r') | ||
&& (s[startOffset] == '\n')) { | ||
startOffset++; | ||
} | ||
prev_match_zero_length = false; | ||
} else { | ||
// normal case; no match on most recent scan (with options=0). We are done. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just my 2 cents... Here you check the value of Referring to the PCRE API, the value what you're looking for is -1. I'm sure the chances are very-very-very small to get the value 0 (considering the value of the OVECTOR, which is 900), but may be here would be good to make some error handling. "no match on most recent" is only when the value of Just a few thoughts... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hi @airween , Thanks for the observation; it is at least something for us to have considered. As you note, the OVECTOR is enormous. It seems incredibly unlikely that anyone would compose a regex pattern with 300 capture groups (keeping in mind that repeating capture groups like '(sub-expression)*' only count as one). It is true that the functionality implication here is that if this use case occurs, it will be treated the same as 'no match'. Note, that this is also the case for the standard @rx operator in v3.x -- both before and after the changes made this past summer, for example. We may well want to make some adjustments for this edge case going forward. Whether we want to change main behaviour in this case might be debatable, but we should at least produce some kind of debug message. However, I think it would make more sense to adjust all relevant occurrences of this behaviour at the same time (including the much more important 'rx' operator), rather than adjusting this pull request in isolation. For those reasons, my inclination here is to leave the current pull request as is. Thanks again. |
||
break; | ||
} | ||
} | ||
} | ||
|
||
return (captures.size() > 0); | ||
} | ||
|
||
int Regex::search(const std::string& s, SMatch *match) const { | ||
int ovector[OVECCOUNT]; | ||
int ret = pcre_exec(m_pc, m_pce, s.c_str(), | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
[ | ||
{ | ||
"enabled":1, | ||
"version_min":300000, | ||
"title":"Testing Operator :: @rxGlobal", | ||
"client":{ | ||
"ip":"200.249.12.31", | ||
"port":123 | ||
}, | ||
"server":{ | ||
"ip":"200.249.12.31", | ||
"port":80 | ||
}, | ||
"request":{ | ||
"headers":{ | ||
"Host":"localhost", | ||
"User-Agent":"curl/7.38.0", | ||
"Accept":"*/*", | ||
"Content-Length": "27", | ||
"Content-Type": "application/x-www-form-urlencoded" | ||
}, | ||
"uri":"/", | ||
"method":"POST", | ||
"body": [ | ||
"param1=value1¶m2=value2" | ||
] | ||
}, | ||
"response":{ | ||
"headers":{ | ||
"Date":"Mon, 13 Jul 2015 20:02:41 GMT", | ||
"Last-Modified":"Sun, 26 Oct 2014 22:33:37 GMT", | ||
"Content-Type":"text/html" | ||
}, | ||
"body":[ | ||
"no need." | ||
] | ||
}, | ||
"expected":{ | ||
"debug_log":"Executing operator \"RxGlobal" | ||
}, | ||
"rules":[ | ||
"SecRuleEngine On", | ||
"SecRule ARGS \"@rxGlobal (value1)\" \"id:1,phase:2,pass,t:trim\"" | ||
] | ||
} | ||
] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
int pcre_options scope could be reduce. Having it inside the while loop -
https://github.com/SpiderLabs/ModSecurity/blob/2672db103e511cbf0866efc62dc7a3b4f43a8657/src/utils/regex.cc#L139-L147