diff --git a/CHANGES b/CHANGES index b2b051c910..09e36faaae 100644 --- a/CHANGES +++ b/CHANGES @@ -1,6 +1,8 @@ v3.x.y - YYYY-MMM-DD (to be released) ------------------------------------- + - Add support for new operator rxGlobal + [@martinhsv] - Adds support to lua 5.4 [@zimmerle] - GeoIP: switch to GEOIP_MEMORY_CACHE from GEOIP_INDEX_CACHE diff --git a/Makefile.am b/Makefile.am index 734b6f3597..6583b252cd 100644 --- a/Makefile.am +++ b/Makefile.am @@ -290,6 +290,7 @@ TESTS+=test/test-cases/secrules-language-tests/operators/noMatch.json TESTS+=test/test-cases/secrules-language-tests/operators/pmFromFile.json TESTS+=test/test-cases/secrules-language-tests/operators/pm.json TESTS+=test/test-cases/secrules-language-tests/operators/rx.json +TESTS+=test/test-cases/secrules-language-tests/operators/rxGlobal.json TESTS+=test/test-cases/secrules-language-tests/operators/streq.json TESTS+=test/test-cases/secrules-language-tests/operators/strmatch.json TESTS+=test/test-cases/secrules-language-tests/operators/unconditionalMatch.json diff --git a/src/Makefile.am b/src/Makefile.am index e6496ccc40..67dfc99f78 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -221,6 +221,7 @@ OPERATORS = \ operators/rbl.cc \ operators/rsub.cc \ operators/rx.cc \ + operators/rx_global.cc \ operators/str_eq.cc \ operators/str_match.cc \ operators/validate_byte_range.cc \ diff --git a/src/operators/operator.cc b/src/operators/operator.cc index 3bb12ce8ef..cf4144780f 100644 --- a/src/operators/operator.cc +++ b/src/operators/operator.cc @@ -47,6 +47,7 @@ #include "src/operators/rbl.h" #include "src/operators/rsub.h" #include "src/operators/rx.h" +#include "src/operators/rx_global.h" #include "src/operators/str_eq.h" #include "src/operators/str_match.h" #include "src/operators/validate_byte_range.h" @@ -169,6 +170,7 @@ Operator *Operator::instantiate(std::string op, std::string param_str) { IF_MATCH(rbl) { return new Rbl(std::move(param)); } IF_MATCH(rsub) { return new Rsub(std::move(param)); } IF_MATCH(rx) { return new Rx(std::move(param)); } + IF_MATCH(rxglobal) { return new RxGlobal(std::move(param)); } IF_MATCH(streq) { return new StrEq(std::move(param)); } IF_MATCH(strmatch) { return new StrMatch(std::move(param)); } IF_MATCH(validatebyterange) { diff --git a/src/operators/rx_global.cc b/src/operators/rx_global.cc new file mode 100644 index 0000000000..bd48c730b3 --- /dev/null +++ b/src/operators/rx_global.cc @@ -0,0 +1,85 @@ +/* + * ModSecurity, http://www.modsecurity.org/ + * Copyright (c) 2020 Trustwave Holdings, Inc. (http://www.trustwave.com/) + * + * You may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * If any of the files related to licensing are missing or if you have any + * other questions related to licensing please contact Trustwave Holdings, Inc. + * directly using the email address security@modsecurity.org. + * + */ + +#include "src/operators/rx_global.h" + +#include +#include +#include + +#include "src/operators/operator.h" +#include "modsecurity/rule.h" +#include "modsecurity/rule_message.h" + +namespace modsecurity { +namespace operators { + + +bool RxGlobal::init(const std::string &arg, std::string *error) { + if (m_string->m_containsMacro == false) { + m_re = new Regex(m_param); + } + + return true; +} + + +bool RxGlobal::evaluate(Transaction *transaction, RuleWithActions *rule, + const std::string& input, std::shared_ptr ruleMessage) { + Regex *re; + + if (m_param.empty() && !m_string->m_containsMacro) { + return true; + } + + if (m_string->m_containsMacro) { + std::string eparam(m_string->evaluate(transaction)); + re = new Regex(eparam); + } else { + re = m_re; + } + + std::vector captures; + re->searchGlobal(input, captures); + + if (rule && rule->hasCaptureAction() && transaction) { + for (const Utils::SMatchCapture& capture : captures) { + const std::string capture_substring(input.substr(capture.m_offset,capture.m_length)); + transaction->m_collections.m_tx_collection->storeOrUpdateFirst( + std::to_string(capture.m_group), capture_substring); + ms_dbg_a(transaction, 7, "Added regex subexpression TX." + + std::to_string(capture.m_group) + ": " + capture_substring); + transaction->m_matched.push_back(capture_substring); + } + } + + for (const auto & capture : captures) { + logOffset(ruleMessage, capture.m_offset, capture.m_length); + } + + if (m_string->m_containsMacro) { + delete re; + } + + if (captures.size() > 0) { + return true; + } + + return false; +} + + +} // namespace operators +} // namespace modsecurity diff --git a/src/operators/rx_global.h b/src/operators/rx_global.h new file mode 100644 index 0000000000..237079f4b9 --- /dev/null +++ b/src/operators/rx_global.h @@ -0,0 +1,67 @@ +/* + * ModSecurity, http://www.modsecurity.org/ + * Copyright (c) 2020 Trustwave Holdings, Inc. (http://www.trustwave.com/) + * + * You may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * If any of the files related to licensing are missing or if you have any + * other questions related to licensing please contact Trustwave Holdings, Inc. + * directly using the email address security@modsecurity.org. + * + */ + +#ifndef SRC_OPERATORS_RX_GLOBAL_H_ +#define SRC_OPERATORS_RX_GLOBAL_H_ + +#include +//#include +#include +#include + +#include "src/operators/operator.h" +#include "src/utils/regex.h" + + +namespace modsecurity { +using Utils::SMatch; +using Utils::regex_search; +using Utils::Regex; + +namespace operators { + + +class RxGlobal : public Operator { + public: + /** @ingroup ModSecurity_Operator */ + explicit RxGlobal(std::unique_ptr param) + : m_re(nullptr), + Operator("RxGlobal", std::move(param)) { + m_couldContainsMacro = true; + } + + ~RxGlobal() { + if (m_string->m_containsMacro == false && m_re != NULL) { + delete m_re; + m_re = NULL; + } + } + + bool evaluate(Transaction *transaction, RuleWithActions *rule, + const std::string& input, + std::shared_ptr ruleMessage) override; + + bool init(const std::string &arg, std::string *error) override; + + private: + Regex *m_re; +}; + + +} // namespace operators +} // namespace modsecurity + + +#endif // SRC_OPERATORS_RX_GLOBAL_H_ diff --git a/src/parser/seclang-parser.yy b/src/parser/seclang-parser.yy index 02e8e9eab9..fdb2bb1113 100644 --- a/src/parser/seclang-parser.yy +++ b/src/parser/seclang-parser.yy @@ -132,6 +132,7 @@ class Driver; #include "src/operators/rbl.h" #include "src/operators/rsub.h" #include "src/operators/rx.h" +#include "src/operators/rx_global.h" #include "src/operators/str_eq.h" #include "src/operators/str_match.h" #include "src/operators/unconditional_match.h" @@ -455,6 +456,7 @@ using namespace modsecurity::operators; OPERATOR_RSUB "OPERATOR_RSUB" OPERATOR_RX_CONTENT_ONLY "Operator RX (content only)" OPERATOR_RX "OPERATOR_RX" + OPERATOR_RX_GLOBAL "OPERATOR_RX_GLOBAL" OPERATOR_STR_EQ "OPERATOR_STR_EQ" OPERATOR_STR_MATCH "OPERATOR_STR_MATCH" OPERATOR_UNCONDITIONAL_MATCH "OPERATOR_UNCONDITIONAL_MATCH" @@ -1037,6 +1039,10 @@ op_before_init: { OPERATOR_CONTAINER($$, new operators::Rx(std::move($2))); } + | OPERATOR_RX_GLOBAL run_time_string + { + OPERATOR_CONTAINER($$, new operators::RxGlobal(std::move($2))); + } | OPERATOR_STR_EQ run_time_string { OPERATOR_CONTAINER($$, new operators::StrEq(std::move($2))); diff --git a/src/parser/seclang-scanner.ll b/src/parser/seclang-scanner.ll index 05794a32a3..9686027ba7 100755 --- a/src/parser/seclang-scanner.ll +++ b/src/parser/seclang-scanner.ll @@ -302,6 +302,7 @@ OPERATOR_PM (?i:@pm) OPERATOR_RBL (?i:@rbl) OPERATOR_RSUB (?i:@rsub) OPERATOR_RX (?i:@rx) +OPERATOR_RX_GLOBAL (?i:@rxGlobal) OPERATOR_STR_EQ (?i:@streq) OPERATOR_STR_MATCH (?i:@strmatch) OPERATOR_UNCONDITIONAL_MATCH (?i:@unconditionalMatch) @@ -1105,6 +1106,7 @@ EQUALS_MINUS (?i:=\-) {OPERATOR_PM} { BEGIN_PARAMETER(); return p::make_OPERATOR_PM(*driver.loc.back()); } {OPERATOR_RBL} { BEGIN_PARAMETER(); return p::make_OPERATOR_RBL( *driver.loc.back()); } {OPERATOR_RX} { BEGIN_PARAMETER(); return p::make_OPERATOR_RX(*driver.loc.back()); } +{OPERATOR_RX_GLOBAL} { BEGIN_PARAMETER(); return p::make_OPERATOR_RX_GLOBAL(*driver.loc.back()); } {OPERATOR_STR_EQ} { BEGIN_PARAMETER(); return p::make_OPERATOR_STR_EQ(*driver.loc.back()); } {OPERATOR_STR_MATCH} { BEGIN_PARAMETER(); return p::make_OPERATOR_STR_MATCH(*driver.loc.back()); } {OPERATOR_BEGINS_WITH} { BEGIN_PARAMETER(); return p::make_OPERATOR_BEGINS_WITH(*driver.loc.back()); } diff --git a/src/utils/regex.cc b/src/utils/regex.cc index 0feb256cca..4f5c73a0ee 100644 --- a/src/utils/regex.cc +++ b/src/utils/regex.cc @@ -33,6 +33,24 @@ namespace modsecurity { namespace Utils { +// Helper function to tell us if the current config indicates CRLF is a valid newline sequence +bool crlfIsNewline() { + int d = 0; + pcre_config(PCRE_CONFIG_NEWLINE, &d); + + unsigned int option_bits = (d == 13)? PCRE_NEWLINE_CR : + (d == 10)? PCRE_NEWLINE_LF : + (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF : + (d == -2)? PCRE_NEWLINE_ANYCRLF : + (d == -1)? PCRE_NEWLINE_ANY : 0; + + bool crlf_is_newline = + option_bits == PCRE_NEWLINE_ANY || + option_bits == PCRE_NEWLINE_CRLF || + option_bits == PCRE_NEWLINE_ANYCRLF; + + return crlf_is_newline; +} Regex::Regex(const std::string& pattern_) : pattern(pattern_.empty() ? ".*" : pattern_) { @@ -115,6 +133,66 @@ bool Regex::searchOneMatch(const std::string& s, std::vector& cap return (rc > 0); } +bool Regex::searchGlobal(const std::string& s, std::vector& captures) const { + const char *subject = s.c_str(); + + bool prev_match_zero_length = false; + int pcre_options = 0; + int startOffset = 0; + + while (startOffset <= s.length()) { + int ovector[OVECCOUNT]; + if (prev_match_zero_length) { + pcre_options = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED; + } else { + pcre_options = 0; // common case + } + int rc = pcre_exec(m_pc, m_pce, subject, s.length(), startOffset, pcre_options, ovector, OVECCOUNT); + + if (rc > 0) { + size_t firstGroupForThisFullMatch = captures.size(); + for (int i = 0; i < rc; i++) { + size_t start = ovector[2*i]; + size_t end = ovector[2*i+1]; + size_t len = end - start; + if (end > s.length()) { + continue; + } + SMatchCapture capture(firstGroupForThisFullMatch + i, start, len); + captures.push_back(capture); + + if (i == 0) { + if (len > 0) { + // normal case; next call to pcre_exec should start after the end of the last full match string + startOffset = end; + prev_match_zero_length = false; + } else { + // zero-length match; modify next match attempt to avoid infinite loop + prev_match_zero_length = true; + } + } + } + } else { + if (prev_match_zero_length) { + // The n-1 search found a zero-length match, so we did a subsequent search + // with the special flags. That subsequent exec did not find a match, so now advance + // by one character (unless CRLF, then advance by two) + startOffset++; + if (crlfIsNewline() && (startOffset < s.length()) && (s[startOffset-1] == '\r') + && (s[startOffset] == '\n')) { + startOffset++; + } + prev_match_zero_length = false; + } else { + // normal case; no match on most recent scan (with options=0). We are done. + break; + } + } + } + + return (captures.size() > 0); +} + int Regex::search(const std::string& s, SMatch *match) const { int ovector[OVECCOUNT]; int ret = pcre_exec(m_pc, m_pce, s.c_str(), diff --git a/src/utils/regex.h b/src/utils/regex.h index 46dab6b83e..661a8767fe 100644 --- a/src/utils/regex.h +++ b/src/utils/regex.h @@ -70,6 +70,7 @@ class Regex { std::list searchAll(const std::string& s) const; bool searchOneMatch(const std::string& s, std::vector& captures) const; + bool searchGlobal(const std::string& s, std::vector& captures) const; int search(const std::string &s, SMatch *match) const; int search(const std::string &s) const; diff --git a/test/test-cases/regression/operator-rxGlobal.json b/test/test-cases/regression/operator-rxGlobal.json new file mode 100644 index 0000000000..d49f31d6a3 --- /dev/null +++ b/test/test-cases/regression/operator-rxGlobal.json @@ -0,0 +1,46 @@ +[ + { + "enabled":1, + "version_min":300000, + "title":"Testing Operator :: @rxGlobal", + "client":{ + "ip":"200.249.12.31", + "port":123 + }, + "server":{ + "ip":"200.249.12.31", + "port":80 + }, + "request":{ + "headers":{ + "Host":"localhost", + "User-Agent":"curl/7.38.0", + "Accept":"*/*", + "Content-Length": "27", + "Content-Type": "application/x-www-form-urlencoded" + }, + "uri":"/", + "method":"POST", + "body": [ + "param1=value1¶m2=value2" + ] + }, + "response":{ + "headers":{ + "Date":"Mon, 13 Jul 2015 20:02:41 GMT", + "Last-Modified":"Sun, 26 Oct 2014 22:33:37 GMT", + "Content-Type":"text/html" + }, + "body":[ + "no need." + ] + }, + "expected":{ + "debug_log":"Executing operator \"RxGlobal" + }, + "rules":[ + "SecRuleEngine On", + "SecRule ARGS \"@rxGlobal (value1)\" \"id:1,phase:2,pass,t:trim\"" + ] + } +] diff --git a/test/test-cases/regression/variable-TX.json b/test/test-cases/regression/variable-TX.json index 904628e9b7..199de4b8b1 100644 --- a/test/test-cases/regression/variable-TX.json +++ b/test/test-cases/regression/variable-TX.json @@ -39,7 +39,7 @@ "SecRuleEngine On", "SecResponseBodyAccess On", "SecRequestBodyAccess On", - "SecRule RESPONSE_BODY \"@rx ([0-9]+)\" \"id:1,phase:4,capture,id:105\"", + "SecRule RESPONSE_BODY \"@rx ([0-9]+)\" \"id:105,phase:4,capture\"", "SecRule TX \"@rx ([A-z]+)\" \"phase:4,id:106\"" ] }, @@ -77,7 +77,7 @@ }, "rules":[ "SecRuleEngine On", - "SecRule REQUEST_HEADERS \"@rx ([A-z]+)\" \"id:1,log,pass,capture,id:14\"", + "SecRule REQUEST_HEADERS \"@rx ([A-z]+)\" \"id:14,log,pass,capture\"", "SecRule TX:0 \"@rx ([A-z]+)\" \"id:15\"" ] }, @@ -112,7 +112,7 @@ }, "rules":[ "SecRuleEngine On", - "SecRule ARGS \"@rx (aa)(bb|cc)?(dd)\" \"id:1,log,pass,capture,id:16\"", + "SecRule ARGS \"@rx (aa)(bb|cc)?(dd)\" \"id:16,log,pass,capture\"", "SecRule TX:3 \"@streq dd\" \"id:19,phase:2,log,pass\"" ] }, @@ -218,5 +218,40 @@ "SecRuleEngine On", "SecRule ARGS \"@rx a:([0-9])(?:a:([0-9])(?:a:([0-9]))*)*\" \"id:18,phase:1,log,pass,capture\"" ] + }, + { + "enabled":1, + "version_min":310000, + "title":"Testing Variables :: rxGlobal capture groups for two full matches", + "client":{ + "ip":"200.249.12.31", + "port":123 + }, + "server":{ + "ip":"200.249.12.31", + "port":80 + }, + "request":{ + "uri":"/?key=aabbddxxaaccddyy", + "method":"GET" + }, + "response":{ + "headers":{ + "Date":"Mon, 13 Jul 2015 20:02:41 GMT", + "Last-Modified":"Sun, 26 Oct 2014 22:33:37 GMT", + "Content-Type":"text/html" + }, + "body":[ + "no need." + ] + }, + "expected":{ + "debug_log":"Added regex subexpression TX\\.0: aabbdd[\\s\\S]*TX\\.1: aa[\\s\\S]*TX\\.2: bb[\\s\\S]*TX\\.3: dd[\\s\\S]*TX\\.4: aaccdd[\\s\\S]*TX\\.5: aa[\\s\\S]*TX\\.6: cc[\\s\\S]*TX\\.7: dd" + }, + "rules":[ + "SecRuleEngine On", + "SecRule ARGS \"@rxGlobal (aa)(bb|cc)?(dd)\" \"id:19,log,pass,capture\"", + "SecRule TX:6 \"@streq cc\" \"id:20,phase:2,log,pass\"" + ] } ]