From 8b047fb974a43b2e1f591d7ec1b4fb71d5dc1dac Mon Sep 17 00:00:00 2001 From: WGH Date: Thu, 17 Jan 2019 01:55:17 +0300 Subject: [PATCH] Refactor regex code This commit fixes quite a few odd things in regex code: * Lack of encapsulation. * Non-method functions for matching without retrieving all groups. * Regex class being copyable without proper copy-constructor (potential UAF and double free due to pointer members m_pc and m_pce). * Redundant SMatch::m_length, which always equals to match.size() anyway. * Weird SMatch::size_ member which is initialized only by one of the three matching functions, and equals to the return value of that function anyways. * Several places in code having std::string value instead of reference. --- CHANGES | 2 + .../backend/in_memory-per_process.cc | 2 +- src/modsecurity.cc | 14 ++--- src/operators/rx.cc | 9 ++-- src/operators/verify_cpf.cc | 8 +-- src/operators/verify_ssn.cc | 8 +-- src/utils/regex.cc | 33 +++++------- src/utils/regex.h | 53 +++++++++++-------- test/regression/regression.cc | 2 +- test/unit/unit_test.cc | 6 +-- 10 files changed, 69 insertions(+), 68 deletions(-) diff --git a/CHANGES b/CHANGES index 525de6d241..4fe7d1552e 100644 --- a/CHANGES +++ b/CHANGES @@ -37,6 +37,8 @@ v3.0.4 - YYYY-MMM-DD (to be released) [Issue #1943 - @victorhora, @allanbomsft] - Fix buffer size for utf8toUnicode transformation [Issue #1208 - @katef, @victorhora] + - Refactoring on Regex and SMatch classes. + [@WGH-] v3.0.3 - 2018-Nov-05 diff --git a/src/collection/backend/in_memory-per_process.cc b/src/collection/backend/in_memory-per_process.cc index 1035977c58..9f13fa589f 100644 --- a/src/collection/backend/in_memory-per_process.cc +++ b/src/collection/backend/in_memory-per_process.cc @@ -134,7 +134,7 @@ void InMemoryPerProcess::resolveRegularExpression(const std::string& var, //std::string name = std::string(var, var.find(":") + 2, // var.size() - var.find(":") - 3); //size_t keySize = col.size(); - Utils::Regex r = Utils::Regex(var); + Utils::Regex r(var); for (const auto& x : *this) { //if (x.first.size() <= keySize + 1) { diff --git a/src/modsecurity.cc b/src/modsecurity.cc index 7ccfdcb141..ab9bba2ef3 100644 --- a/src/modsecurity.cc +++ b/src/modsecurity.cc @@ -259,9 +259,9 @@ int ModSecurity::processContentOffset(const char *content, size_t len, std::string value; yajl_gen_map_open(g); vars.pop_back(); - std::string startingAt = vars.back().match; + const std::string &startingAt = vars.back().str(); vars.pop_back(); - std::string size = vars.back().match; + const std::string &size = vars.back().str(); vars.pop_back(); yajl_gen_string(g, reinterpret_cast("startingAt"), @@ -311,11 +311,11 @@ int ModSecurity::processContentOffset(const char *content, size_t len, strlen("transformation")); yajl_gen_string(g, - reinterpret_cast(trans.back().match.c_str()), - trans.back().match.size()); + reinterpret_cast(trans.back().str().c_str()), + trans.back().str().size()); t = modsecurity::actions::transformations::Transformation::instantiate( - trans.back().match.c_str()); + trans.back().str().c_str()); varValueRes = t->evaluate(varValue, NULL); varValue.assign(varValueRes); trans.pop_back(); @@ -343,9 +343,9 @@ int ModSecurity::processContentOffset(const char *content, size_t len, strlen("highlight")); yajl_gen_map_open(g); ops.pop_back(); - std::string startingAt = ops.back().match; + std::string startingAt = ops.back().str(); ops.pop_back(); - std::string size = ops.back().match; + std::string size = ops.back().str(); ops.pop_back(); yajl_gen_string(g, reinterpret_cast("startingAt"), diff --git a/src/operators/rx.cc b/src/operators/rx.cc index 1891a8bbf8..43f6444b1d 100644 --- a/src/operators/rx.cc +++ b/src/operators/rx.cc @@ -38,7 +38,6 @@ bool Rx::init(const std::string &arg, std::string *error) { bool Rx::evaluate(Transaction *transaction, Rule *rule, const std::string& input, std::shared_ptr ruleMessage) { - SMatch match; std::list matches; Regex *re; @@ -59,16 +58,16 @@ bool Rx::evaluate(Transaction *transaction, Rule *rule, matches.reverse(); for (const SMatch& a : matches) { transaction->m_collections.m_tx_collection->storeOrUpdateFirst( - std::to_string(i), a.match); + std::to_string(i), a.str()); ms_dbg_a(transaction, 7, "Added regex subexpression TX." + - std::to_string(i) + ": " + a.match); - transaction->m_matched.push_back(a.match); + std::to_string(i) + ": " + a.str()); + transaction->m_matched.push_back(a.str()); i++; } } for (const auto & i : matches) { - logOffset(ruleMessage, i.m_offset, i.m_length); + logOffset(ruleMessage, i.offset(), i.str().size()); } if (m_string->m_containsMacro) { diff --git a/src/operators/verify_cpf.cc b/src/operators/verify_cpf.cc index 10b262918b..ac4874bfbc 100644 --- a/src/operators/verify_cpf.cc +++ b/src/operators/verify_cpf.cc @@ -130,14 +130,14 @@ bool VerifyCPF::evaluate(Transaction *t, Rule *rule, for (i = 0; i < input.size() - 1 && is_cpf == false; i++) { matches = m_re->searchAll(input.substr(i, input.size())); for (const auto & i : matches) { - is_cpf = verify(i.match.c_str(), i.match.size()); + is_cpf = verify(i.str().c_str(), i.str().size()); if (is_cpf) { - logOffset(ruleMessage, i.m_offset, i.m_length); + logOffset(ruleMessage, i.offset(), i.str().size()); if (rule && t && rule->m_containsCaptureAction) { t->m_collections.m_tx_collection->storeOrUpdateFirst( - "0", std::string(i.match)); + "0", i.str()); ms_dbg_a(t, 7, "Added VerifyCPF match TX.0: " + \ - std::string(i.match)); + i.str()); } goto out; diff --git a/src/operators/verify_ssn.cc b/src/operators/verify_ssn.cc index 150b2aa2ec..deb97e6f9f 100644 --- a/src/operators/verify_ssn.cc +++ b/src/operators/verify_ssn.cc @@ -121,14 +121,14 @@ bool VerifySSN::evaluate(Transaction *t, Rule *rule, for (i = 0; i < input.size() - 1 && is_ssn == false; i++) { matches = m_re->searchAll(input.substr(i, input.size())); for (const auto & i : matches) { - is_ssn = verify(i.match.c_str(), i.match.size()); + is_ssn = verify(i.str().c_str(), i.str().size()); if (is_ssn) { - logOffset(ruleMessage, i.m_offset, i.m_length); + logOffset(ruleMessage, i.offset(), i.str().size()); if (rule && t && rule->m_containsCaptureAction) { t->m_collections.m_tx_collection->storeOrUpdateFirst( - "0", std::string(i.match)); + "0", i.str()); ms_dbg_a(t, 7, "Added VerifySSN match TX.0: " + \ - std::string(i.match)); + i.str()); } goto out; diff --git a/src/utils/regex.cc b/src/utils/regex.cc index 52934aba6b..13a0661772 100644 --- a/src/utils/regex.cc +++ b/src/utils/regex.cc @@ -39,15 +39,11 @@ namespace Utils { Regex::Regex(const std::string& pattern_) - : pattern(pattern_), - m_ovector {0} { + : pattern(pattern_.empty() ? ".*" : pattern_) +{ const char *errptr = NULL; int erroffset; - if (pattern.empty() == true) { - pattern.assign(".*"); - } - m_pc = pcre_compile(pattern.c_str(), PCRE_DOTALL|PCRE_MULTILINE, &errptr, &erroffset, NULL); @@ -71,7 +67,7 @@ Regex::~Regex() { } -std::list Regex::searchAll(const std::string& s) { +std::list Regex::searchAll(const std::string& s) const { const char *subject = s.c_str(); const std::string tmpString = std::string(s.c_str(), s.size()); int ovector[OVECCOUNT]; @@ -83,7 +79,6 @@ std::list Regex::searchAll(const std::string& s) { s.size(), offset, 0, ovector, OVECCOUNT); for (i = 0; i < rc; i++) { - SMatch match; size_t start = ovector[2*i]; size_t end = ovector[2*i+1]; size_t len = end - start; @@ -91,11 +86,9 @@ std::list Regex::searchAll(const std::string& s) { rc = 0; break; } - match.match = std::string(tmpString, start, len); - match.m_offset = start; - match.m_length = len; + std::string match = std::string(tmpString, start, len); offset = start + len; - retList.push_front(match); + retList.push_front(SMatch(match, start)); if (len == 0) { rc = 0; @@ -107,24 +100,24 @@ std::list Regex::searchAll(const std::string& s) { return retList; } -int regex_search(const std::string& s, SMatch *match, - const Regex& regex) { +int Regex::search(const std::string& s, SMatch *match) const { int ovector[OVECCOUNT]; - int ret = pcre_exec(regex.m_pc, regex.m_pce, s.c_str(), + int ret = pcre_exec(m_pc, m_pce, s.c_str(), s.size(), 0, 0, ovector, OVECCOUNT) > 0; if (ret > 0) { - match->match = std::string(s, ovector[ret-1], - ovector[ret] - ovector[ret-1]); - match->size_ = ret; + *match = SMatch( + std::string(s, ovector[ret-1], ovector[ret] - ovector[ret-1]), + 0 + ); } return ret; } -int regex_search(const std::string& s, const Regex& regex) { +int Regex::search(const std::string& s) const { int ovector[OVECCOUNT]; - return pcre_exec(regex.m_pc, regex.m_pce, s.c_str(), + return pcre_exec(m_pc, m_pce, s.c_str(), s.size(), 0, 0, ovector, OVECCOUNT) > 0; } diff --git a/src/utils/regex.h b/src/utils/regex.h index d60407dc30..da0cca73e0 100644 --- a/src/utils/regex.h +++ b/src/utils/regex.h @@ -31,39 +31,48 @@ namespace Utils { class SMatch { public: - SMatch() : size_(0), - m_offset(0), - m_length(0), - match("") { } - size_t size() const { return size_; } - std::string str() const { return match; } - - int size_; - int m_offset; - int m_length; - std::string match; -}; + SMatch() + : m_match(), m_offset(0) + {} + + SMatch(const std::string &match, size_t offset) + : m_match(match), m_offset(offset) + {} + + const std::string& str() const { return m_match; } + size_t offset() const { return m_offset; } + private: + std::string m_match; + size_t m_offset; +}; class Regex { public: explicit Regex(const std::string& pattern_); ~Regex(); - std::string pattern; - pcre *m_pc = NULL; - pcre_extra *m_pce = NULL; - int m_ovector[OVECCOUNT]; - - std::list searchAll(const std::string& s); -}; + // m_pc and m_pce can't be easily copied + Regex(const Regex&) = delete; + Regex& operator=(const Regex&) = delete; -int regex_search(const std::string& s, SMatch *m, - const Regex& regex); + std::list searchAll(const std::string& s) const; + int search(const std::string &s, SMatch *m) const; + int search(const std::string &s) const; -int regex_search(const std::string& s, const Regex& r); + const std::string pattern; + private: + pcre *m_pc = NULL; + pcre_extra *m_pce = NULL; +}; +static inline int regex_search(const std::string& s, SMatch *match, const Regex& regex) { + return regex.search(s, match); +} +static inline int regex_search(const std::string& s, const Regex& regex) { + return regex.search(s); +} } // namespace Utils } // namespace modsecurity diff --git a/test/regression/regression.cc b/test/regression/regression.cc index 6f5b3a44f3..f84f414c04 100644 --- a/test/regression/regression.cc +++ b/test/regression/regression.cc @@ -202,7 +202,7 @@ void perform_unit_test(ModSecurityTest *test, SMatch match; std::string s = modsec_rules->getParserError(); - if (regex_search(s, &match, re) && match.size() >= 1) { + if (regex_search(s, &match, re)) { if (test->m_automake_output) { std::cout << ":test-result: PASS " << filename \ << ":" << t->name << std::endl; diff --git a/test/unit/unit_test.cc b/test/unit/unit_test.cc index a57cb84e4c..23bbcb2342 100644 --- a/test/unit/unit_test.cc +++ b/test/unit/unit_test.cc @@ -62,8 +62,7 @@ void json2bin(std::string *str) { modsecurity::Utils::Regex re2("\\\\u([a-z0-9A-Z]{4})"); modsecurity::Utils::SMatch match; - while (modsecurity::Utils::regex_search(*str, &match, re) - && match.size() > 0) { + while (modsecurity::Utils::regex_search(*str, &match, re)) { unsigned int p; std::string toBeReplaced = match.str(); toBeReplaced.erase(0, 2); @@ -71,8 +70,7 @@ void json2bin(std::string *str) { replaceAll(str, match.str(), p); } - while (modsecurity::Utils::regex_search(*str, &match, re2) - && match.size() > 0) { + while (modsecurity::Utils::regex_search(*str, &match, re2)) { unsigned int p; std::string toBeReplaced = match.str(); toBeReplaced.erase(0, 2);