From b8e31d19ca82188b3ee957779ce9ed21ded9d103 Mon Sep 17 00:00:00 2001 From: Felipe Zimmerle Date: Fri, 18 Jan 2019 17:52:42 -0300 Subject: [PATCH 01/12] Moving regex from utils to its own namespace --- headers/modsecurity/anchored_set_variable.h | 6 ++-- src/Makefile.am | 7 +++- src/anchored_set_variable.cc | 10 +++--- src/anchored_variable.cc | 2 +- src/audit_log/audit_log.cc | 6 ++-- .../backend/in_memory-per_process.cc | 6 ++-- src/collection/backend/lmdb.cc | 6 ++-- src/modsecurity.cc | 14 ++++---- src/operators/rx.cc | 4 +-- src/operators/rx.h | 8 ++--- src/operators/verify_cpf.h | 8 ++--- src/operators/verify_ssn.h | 8 ++--- src/operators/verify_svnr.h | 8 ++--- src/regex/backend/pcre.cc | 35 +++++++++++++++++++ src/regex/backend/pcre.h | 35 +++++++++++++++++++ src/{utils => regex}/regex.cc | 6 ++-- src/{utils => regex}/regex.h | 11 +++--- src/variables/rule.h | 10 +++--- src/variables/variable.h | 8 ++--- test/regression/custom_debug_log.cc | 6 ++-- test/regression/regression.cc | 12 +++---- test/unit/unit_test.cc | 12 +++---- 22 files changed, 152 insertions(+), 76 deletions(-) create mode 100644 src/regex/backend/pcre.cc create mode 100644 src/regex/backend/pcre.h rename src/{utils => regex}/regex.cc (97%) rename src/{utils => regex}/regex.h (94%) diff --git a/headers/modsecurity/anchored_set_variable.h b/headers/modsecurity/anchored_set_variable.h index 24d80cdac0..f5d3c53502 100644 --- a/headers/modsecurity/anchored_set_variable.h +++ b/headers/modsecurity/anchored_set_variable.h @@ -38,7 +38,7 @@ namespace modsecurity { class Transaction; -namespace Utils { +namespace regex { class Regex; } namespace variables { @@ -91,10 +91,10 @@ class AnchoredSetVariable : public std::unordered_multimap *l); - void resolveRegularExpression(Utils::Regex *r, + void resolveRegularExpression(regex::Regex *r, std::vector *l); - void resolveRegularExpression(Utils::Regex *r, + void resolveRegularExpression(regex::Regex *r, std::vector *l, variables::KeyExclusions &ke); diff --git a/src/Makefile.am b/src/Makefile.am index e6496ccc40..b442fdba2b 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -247,13 +247,17 @@ UTILS = \ utils/md5.cc \ utils/msc_tree.cc \ utils/random.cc \ - utils/regex.cc \ utils/sha1.cc \ utils/string.cc \ utils/system.cc \ utils/shared_files.cc +REGEX = \ + regex/regex.cc \ + regex/backend/pcre.cc + + COLLECTION = \ collection/collections.cc \ collection/backend/in_memory-per_process.cc \ @@ -299,6 +303,7 @@ libmodsecurity_la_SOURCES = \ ${COLLECTION} \ ${OPERATORS} \ ${UTILS} \ + ${REGEX} \ ${VARIABLES} diff --git a/src/anchored_set_variable.cc b/src/anchored_set_variable.cc index 1282af6213..e7e738e6ce 100644 --- a/src/anchored_set_variable.cc +++ b/src/anchored_set_variable.cc @@ -22,7 +22,7 @@ #include "modsecurity/anchored_set_variable.h" #include "modsecurity/modsecurity.h" #include "modsecurity/transaction.h" -#include "src/utils/regex.h" +#include "src/regex/regex.h" #include "src/variables/variable.h" namespace modsecurity { @@ -123,10 +123,10 @@ std::unique_ptr AnchoredSetVariable::resolveFirst( } -void AnchoredSetVariable::resolveRegularExpression(Utils::Regex *r, +void AnchoredSetVariable::resolveRegularExpression(regex::Regex *r, std::vector *l) { for (const auto& x : *this) { - int ret = Utils::regex_search(x.first, *r); + int ret = regex::regex_search(x.first, *r); if (ret <= 0) { continue; } @@ -135,11 +135,11 @@ void AnchoredSetVariable::resolveRegularExpression(Utils::Regex *r, } -void AnchoredSetVariable::resolveRegularExpression(Utils::Regex *r, +void AnchoredSetVariable::resolveRegularExpression(regex::Regex *r, std::vector *l, variables::KeyExclusions &ke) { for (const auto& x : *this) { - int ret = Utils::regex_search(x.first, *r); + int ret = regex::regex_search(x.first, *r); if (ret <= 0) { continue; } diff --git a/src/anchored_variable.cc b/src/anchored_variable.cc index 35be6d38fb..f3e807a419 100644 --- a/src/anchored_variable.cc +++ b/src/anchored_variable.cc @@ -22,7 +22,7 @@ #include "../headers/modsecurity/anchored_variable.h" #include "modsecurity/modsecurity.h" #include "modsecurity/transaction.h" -#include "src/utils/regex.h" +#include "src/regex/regex.h" namespace modsecurity { diff --git a/src/audit_log/audit_log.cc b/src/audit_log/audit_log.cc index f65d38b6f6..a77a6d0bbd 100644 --- a/src/audit_log/audit_log.cc +++ b/src/audit_log/audit_log.cc @@ -26,7 +26,7 @@ #include "src/audit_log/writer/parallel.h" #include "src/audit_log/writer/serial.h" #include "src/audit_log/writer/writer.h" -#include "src/utils/regex.h" +#include "src/regex/regex.h" #define PARTS_CONSTAINS(a, c) \ if (new_parts.find(toupper(a)) != std::string::npos \ @@ -278,8 +278,8 @@ bool AuditLog::isRelevant(int status) { return true; } - return Utils::regex_search(sstatus, - Utils::Regex(m_relevant)) != 0; + return regex::regex_search(sstatus, + regex::Regex(m_relevant)) != 0; } diff --git a/src/collection/backend/in_memory-per_process.cc b/src/collection/backend/in_memory-per_process.cc index 717f998216..2a83d4d25f 100644 --- a/src/collection/backend/in_memory-per_process.cc +++ b/src/collection/backend/in_memory-per_process.cc @@ -27,7 +27,7 @@ #include #include "modsecurity/variable_value.h" -#include "src/utils/regex.h" +#include "src/regex/regex.h" #include "src/utils/string.h" @@ -134,7 +134,7 @@ void InMemoryPerProcess::resolveRegularExpression(const std::string& var, //std::string name = std::string(var, var.find(":") + 2, // var.size() - var.find(":") - 3); //size_t keySize = col.size(); - Utils::Regex r(var); + regex::Regex r(var); for (const auto& x : *this) { //if (x.first.size() <= keySize + 1) { @@ -148,7 +148,7 @@ void InMemoryPerProcess::resolveRegularExpression(const std::string& var, //} //std::string content = std::string(x.first, keySize + 1, // x.first.size() - keySize - 1); - int ret = Utils::regex_search(x.first, r); + int ret = regex::regex_search(x.first, r); if (ret <= 0) { continue; } diff --git a/src/collection/backend/lmdb.cc b/src/collection/backend/lmdb.cc index 55afb1c548..d44592007a 100644 --- a/src/collection/backend/lmdb.cc +++ b/src/collection/backend/lmdb.cc @@ -23,7 +23,7 @@ #include #include "modsecurity/variable_value.h" -#include "src/utils/regex.h" +#include "src/regex/regex.h" #include "src/variables/variable.h" #undef LMDB_STDOUT_COUT @@ -537,7 +537,7 @@ void LMDB::resolveRegularExpression(const std::string& var, MDB_stat mst; MDB_cursor *cursor; - Utils::Regex r(var); + regex::Regex r(var); rc = mdb_txn_begin(m_env, NULL, 0, &txn); lmdb_debug(rc, "txn", "resolveRegularExpression"); @@ -559,7 +559,7 @@ void LMDB::resolveRegularExpression(const std::string& var, while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { char *a = reinterpret_cast(key.mv_data); - int ret = Utils::regex_search(a, r); + int ret = regex::regex_search(a, r); if (ret <= 0) { continue; } diff --git a/src/modsecurity.cc b/src/modsecurity.cc index ac8e52563a..41ad0c3249 100644 --- a/src/modsecurity.cc +++ b/src/modsecurity.cc @@ -38,7 +38,7 @@ #include "src/collection/backend/in_memory-per_process.h" #include "src/collection/backend/lmdb.h" #include "src/unique_id.h" -#include "src/utils/regex.h" +#include "src/regex/regex.h" #include "src/utils/geo_lookup.h" #include "src/actions/transformations/transformation.h" @@ -221,17 +221,17 @@ void ModSecurity::serverLog(void *data, std::shared_ptr rm) { int ModSecurity::processContentOffset(const char *content, size_t len, const char *matchString, std::string *json, const char **err) { #ifdef WITH_YAJL - Utils::Regex variables("v([0-9]+),([0-9]+)"); - Utils::Regex operators("o([0-9]+),([0-9]+)"); - Utils::Regex transformations("t:(?:(?!t:).)+"); + regex::Regex variables("v([0-9]+),([0-9]+)"); + regex::Regex operators("o([0-9]+),([0-9]+)"); + regex::Regex transformations("t:(?:(?!t:).)+"); yajl_gen g; std::string varValue; const unsigned char *buf; size_t jsonSize; - std::list vars = variables.searchAll(matchString); - std::list ops = operators.searchAll(matchString); - std::list trans = transformations.searchAll(matchString); + std::list vars = variables.searchAll(matchString); + std::list ops = operators.searchAll(matchString); + std::list trans = transformations.searchAll(matchString); g = yajl_gen_alloc(NULL); if (g == NULL) { diff --git a/src/operators/rx.cc b/src/operators/rx.cc index b4fc6ff4d7..ea677d61a3 100644 --- a/src/operators/rx.cc +++ b/src/operators/rx.cc @@ -51,11 +51,11 @@ bool Rx::evaluate(Transaction *transaction, RuleWithActions *rule, re = m_re; } - std::vector captures; + std::vector captures; re->searchOneMatch(input, captures); if (rule && rule->hasCaptureAction() && transaction) { - for (const Utils::SMatchCapture& capture : captures) { + for (const regex::SMatchCapture& capture : captures) { const std::string capture_substring(input.substr(capture.m_offset,capture.m_length)); transaction->m_collections.m_tx_collection->storeOrUpdateFirst( std::to_string(capture.m_group), capture_substring); diff --git a/src/operators/rx.h b/src/operators/rx.h index 97cc12a3aa..b91b6f20a6 100644 --- a/src/operators/rx.h +++ b/src/operators/rx.h @@ -22,13 +22,13 @@ #include #include "src/operators/operator.h" -#include "src/utils/regex.h" +#include "src/regex/regex.h" namespace modsecurity { -using Utils::SMatch; -using Utils::regex_search; -using Utils::Regex; +using regex::SMatch; +using regex::regex_search; +using regex::Regex; namespace operators { diff --git a/src/operators/verify_cpf.h b/src/operators/verify_cpf.h index c5b0dfa593..f4f72178e9 100644 --- a/src/operators/verify_cpf.h +++ b/src/operators/verify_cpf.h @@ -21,13 +21,13 @@ #include #include "src/operators/operator.h" -#include "src/utils/regex.h" +#include "src/regex/regex.h" namespace modsecurity { -using Utils::SMatch; -using Utils::regex_search; -using Utils::Regex; +using regex::SMatch; +using regex::regex_search; +using regex::Regex; namespace operators { diff --git a/src/operators/verify_ssn.h b/src/operators/verify_ssn.h index 86f3341344..01f60c478e 100644 --- a/src/operators/verify_ssn.h +++ b/src/operators/verify_ssn.h @@ -21,13 +21,13 @@ #include #include "src/operators/operator.h" -#include "src/utils/regex.h" +#include "src/regex/regex.h" namespace modsecurity { -using Utils::SMatch; -using Utils::regex_search; -using Utils::Regex; +using regex::SMatch; +using regex::regex_search; +using regex::Regex; namespace operators { diff --git a/src/operators/verify_svnr.h b/src/operators/verify_svnr.h index 6fe9df9afb..ac2bbd30b8 100644 --- a/src/operators/verify_svnr.h +++ b/src/operators/verify_svnr.h @@ -7,13 +7,13 @@ #include #include "src/operators/operator.h" -#include "src/utils/regex.h" +#include "src/regex/regex.h" namespace modsecurity { -using Utils::SMatch; -using Utils::regex_search; -using Utils::Regex; +using regex::SMatch; +using regex::regex_search; +using regex::Regex; namespace operators { diff --git a/src/regex/backend/pcre.cc b/src/regex/backend/pcre.cc new file mode 100644 index 0000000000..78dd8ecdb5 --- /dev/null +++ b/src/regex/backend/pcre.cc @@ -0,0 +1,35 @@ +/* + * ModSecurity, http://www.modsecurity.org/ + * Copyright (c) 2015 Trustwave Holdings, Inc. (http://www.trustwave.com/) + * + * You may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * If any of the files related to licensing are missing or if you have any + * other questions related to licensing please contact Trustwave Holdings, Inc. + * directly using the email address security@modsecurity.org. + * + */ + +#include + +#include +#include +#include +#include + +#ifndef SRC_REGEX_BACKEND_PCRE_H_ +#define SRC_REGEX_BACKEND_PCRE_H_ + +namespace modsecurity { +namespace regex { + + + +} // namespace regex +} // namespace modsecurity + + +#endif // SRC_REGEX_BACKEND_PCRE_H_ diff --git a/src/regex/backend/pcre.h b/src/regex/backend/pcre.h new file mode 100644 index 0000000000..78dd8ecdb5 --- /dev/null +++ b/src/regex/backend/pcre.h @@ -0,0 +1,35 @@ +/* + * ModSecurity, http://www.modsecurity.org/ + * Copyright (c) 2015 Trustwave Holdings, Inc. (http://www.trustwave.com/) + * + * You may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * If any of the files related to licensing are missing or if you have any + * other questions related to licensing please contact Trustwave Holdings, Inc. + * directly using the email address security@modsecurity.org. + * + */ + +#include + +#include +#include +#include +#include + +#ifndef SRC_REGEX_BACKEND_PCRE_H_ +#define SRC_REGEX_BACKEND_PCRE_H_ + +namespace modsecurity { +namespace regex { + + + +} // namespace regex +} // namespace modsecurity + + +#endif // SRC_REGEX_BACKEND_PCRE_H_ diff --git a/src/utils/regex.cc b/src/regex/regex.cc similarity index 97% rename from src/utils/regex.cc rename to src/regex/regex.cc index 0feb256cca..0923661f71 100644 --- a/src/utils/regex.cc +++ b/src/regex/regex.cc @@ -13,7 +13,7 @@ * */ -#include "src/utils/regex.h" +#include "src/regex/regex.h" #include #include @@ -31,7 +31,7 @@ #endif namespace modsecurity { -namespace Utils { +namespace regex { Regex::Regex(const std::string& pattern_) @@ -135,5 +135,5 @@ int Regex::search(const std::string& s) const { s.size(), 0, 0, ovector, OVECCOUNT) > 0; } -} // namespace Utils +} // namespace regex } // namespace modsecurity diff --git a/src/utils/regex.h b/src/regex/regex.h similarity index 94% rename from src/utils/regex.h rename to src/regex/regex.h index 46dab6b83e..0776c18bcd 100644 --- a/src/utils/regex.h +++ b/src/regex/regex.h @@ -21,12 +21,13 @@ #include #include -#ifndef SRC_UTILS_REGEX_H_ -#define SRC_UTILS_REGEX_H_ +#ifndef SRC_REGEX_REGEX_H_ +#define SRC_REGEX_REGEX_H_ namespace modsecurity { -namespace Utils { +namespace regex { + #define OVECCOUNT 900 @@ -90,7 +91,7 @@ static inline int regex_search(const std::string& s, const Regex& regex) { } -} // namespace Utils +} // namespace regex } // namespace modsecurity -#endif // SRC_UTILS_REGEX_H_ +#endif // SRC_REGEX_REGEX_H_ diff --git a/src/variables/rule.h b/src/variables/rule.h index 969fd030ec..607b76042d 100644 --- a/src/variables/rule.h +++ b/src/variables/rule.h @@ -200,23 +200,23 @@ class Rule_DictElementRegexp : public VariableRegex { void evaluate(Transaction *t, RuleWithActions *rule, std::vector *l) override { - if (Utils::regex_search("id", m_r) > 0) { + if (regex::regex_search("id", m_r) > 0) { Rule_DictElement::id(t, rule, l); return; } - if (Utils::regex_search("rev", m_r) > 0) { + if (regex::regex_search("rev", m_r) > 0) { Rule_DictElement::rev(t, rule, l); return; } - if (Utils::regex_search("severity", m_r) > 0) { + if (regex::regex_search("severity", m_r) > 0) { Rule_DictElement::severity(t, rule, l); return; } - if (Utils::regex_search("logdata", m_r) > 0) { + if (regex::regex_search("logdata", m_r) > 0) { Rule_DictElement::logData(t, rule, l); return; } - if (Utils::regex_search("msg", m_r) > 0) { + if (regex::regex_search("msg", m_r) > 0) { Rule_DictElement::msg(t, rule, l); return; } diff --git a/src/variables/variable.h b/src/variables/variable.h index 09dff6b8ce..2257a303f8 100644 --- a/src/variables/variable.h +++ b/src/variables/variable.h @@ -25,7 +25,7 @@ #include "modsecurity/transaction.h" #include "modsecurity/rule.h" #include "src/utils/string.h" -#include "src/utils/regex.h" +#include "src/regex/regex.h" #ifndef SRC_VARIABLES_VARIABLE_H_ #define SRC_VARIABLES_VARIABLE_H_ @@ -115,7 +115,7 @@ class KeyExclusion { // FIXME: use pre built regex. class KeyExclusionRegex : public KeyExclusion { public: - explicit KeyExclusionRegex(const Utils::Regex &re) + explicit KeyExclusionRegex(const regex::Regex &re) : m_re(re.pattern) { } explicit KeyExclusionRegex(const std::string &re) : m_re(re) { } @@ -126,7 +126,7 @@ class KeyExclusionRegex : public KeyExclusion { return m_re.searchAll(a).size() > 0; } - Utils::Regex m_re; + regex::Regex m_re; }; @@ -599,9 +599,9 @@ class VariableRegex : public Variable { m_regex(regex), Variable(name + ":" + "regex(" + regex + ")") { } - Utils::Regex m_r; // FIXME: no need for that. std::string m_regex; + regex::Regex m_r; }; class Variables : public std::vector { diff --git a/test/regression/custom_debug_log.cc b/test/regression/custom_debug_log.cc index a59fe08714..942084fc0a 100644 --- a/test/regression/custom_debug_log.cc +++ b/test/regression/custom_debug_log.cc @@ -19,7 +19,7 @@ #include #include "modsecurity/debug_log.h" -#include "src/utils/regex.h" +#include "src/regex/regex.h" namespace modsecurity_test { @@ -37,9 +37,9 @@ void CustomDebugLog::write(int level, const std::string &id, } bool const CustomDebugLog::contains(const std::string& pattern) const { - modsecurity::Utils::Regex re(pattern); + modsecurity::regex::Regex re(pattern); std::string s = m_log.str(); - return modsecurity::Utils::regex_search(s, re); + return modsecurity::regex::regex_search(s, re); } std::string const CustomDebugLog::log_messages() const { diff --git a/test/regression/regression.cc b/test/regression/regression.cc index c4600fae4c..d88561083f 100644 --- a/test/regression/regression.cc +++ b/test/regression/regression.cc @@ -30,7 +30,7 @@ #include "test/regression/regression_test.h" #include "test/common/modsecurity_test_results.h" #include "test/regression/custom_debug_log.h" -#include "src/utils/regex.h" +#include "src/regex/regex.h" using modsecurity_test::CustomDebugLog; using modsecurity_test::ModSecurityTest; @@ -38,9 +38,9 @@ using modsecurity_test::ModSecurityTestResults; using modsecurity_test::RegressionTest; using modsecurity_test::RegressionTestResult; -using modsecurity::Utils::regex_search; -using modsecurity::Utils::SMatch; -using modsecurity::Utils::Regex; +using modsecurity::regex::regex_search; +using modsecurity::regex::SMatch; +using modsecurity::regex::Regex; std::string default_test_path = "test-cases/regression"; std::list resources; @@ -53,8 +53,8 @@ void print_help() { bool contains(const std::string &s, const std::string &pattern) { bool ret; - modsecurity::Utils::Regex re(pattern); - ret = modsecurity::Utils::regex_search(s, re); + modsecurity::regex::Regex re(pattern); + ret = modsecurity::regex::regex_search(s, re); return ret; } diff --git a/test/unit/unit_test.cc b/test/unit/unit_test.cc index c85a687390..9a02764747 100644 --- a/test/unit/unit_test.cc +++ b/test/unit/unit_test.cc @@ -23,7 +23,7 @@ #include #include "test/common/colors.h" -#include "src/utils/regex.h" +#include "src/regex/regex.h" #include "src/utils/string.h" @@ -58,11 +58,11 @@ void replaceAll(std::string *s, const std::string &search, void json2bin(std::string *str) { - modsecurity::Utils::Regex re("\\\\x([a-z0-9A-Z]{2})"); - modsecurity::Utils::Regex re2("\\\\u([a-z0-9A-Z]{4})"); - modsecurity::Utils::SMatch match; + modsecurity::regex::Regex re("\\\\x([a-z0-9A-Z]{2})"); + modsecurity::regex::Regex re2("\\\\u([a-z0-9A-Z]{4})"); + modsecurity::regex::SMatch match; - while (modsecurity::Utils::regex_search(*str, &match, re)) { + while (modsecurity::regex::regex_search(*str, &match, re)) { unsigned int p; std::string toBeReplaced = match.str(); toBeReplaced.erase(0, 2); @@ -70,7 +70,7 @@ void json2bin(std::string *str) { replaceAll(str, match.str(), p); } - while (modsecurity::Utils::regex_search(*str, &match, re2)) { + while (modsecurity::regex::regex_search(*str, &match, re2)) { unsigned int p; std::string toBeReplaced = match.str(); toBeReplaced.erase(0, 2); From bf983828d0ec80fcebfebd00618225d7c2278b93 Mon Sep 17 00:00:00 2001 From: Felipe Zimmerle Date: Fri, 18 Jan 2019 18:05:31 -0300 Subject: [PATCH 02/12] Renames SMatch to RegexMatch --- src/modsecurity.cc | 6 +++--- src/operators/rx.cc | 4 ++-- src/operators/rx.h | 2 +- src/operators/verify_cpf.cc | 2 +- src/operators/verify_cpf.h | 2 +- src/operators/verify_ssn.cc | 2 +- src/operators/verify_ssn.h | 2 +- src/operators/verify_svnr.cc | 2 +- src/operators/verify_svnr.h | 2 +- src/regex/regex.cc | 14 +++++++------- src/regex/regex.h | 18 +++++++++--------- test/regression/regression.cc | 4 ++-- test/unit/unit_test.cc | 2 +- 13 files changed, 31 insertions(+), 31 deletions(-) diff --git a/src/modsecurity.cc b/src/modsecurity.cc index 41ad0c3249..e0b85164eb 100644 --- a/src/modsecurity.cc +++ b/src/modsecurity.cc @@ -229,9 +229,9 @@ int ModSecurity::processContentOffset(const char *content, size_t len, const unsigned char *buf; size_t jsonSize; - std::list vars = variables.searchAll(matchString); - std::list ops = operators.searchAll(matchString); - std::list trans = transformations.searchAll(matchString); + std::list vars = variables.searchAll(matchString); + std::list ops = operators.searchAll(matchString); + std::list trans = transformations.searchAll(matchString); g = yajl_gen_alloc(NULL); if (g == NULL) { diff --git a/src/operators/rx.cc b/src/operators/rx.cc index ea677d61a3..33f31b5f4f 100644 --- a/src/operators/rx.cc +++ b/src/operators/rx.cc @@ -51,11 +51,11 @@ bool Rx::evaluate(Transaction *transaction, RuleWithActions *rule, re = m_re; } - std::vector captures; + std::vector captures; re->searchOneMatch(input, captures); if (rule && rule->hasCaptureAction() && transaction) { - for (const regex::SMatchCapture& capture : captures) { + for (const regex::RegexMatchCapture& capture : captures) { const std::string capture_substring(input.substr(capture.m_offset,capture.m_length)); transaction->m_collections.m_tx_collection->storeOrUpdateFirst( std::to_string(capture.m_group), capture_substring); diff --git a/src/operators/rx.h b/src/operators/rx.h index b91b6f20a6..b628b564a0 100644 --- a/src/operators/rx.h +++ b/src/operators/rx.h @@ -26,7 +26,7 @@ namespace modsecurity { -using regex::SMatch; +using regex::RegexMatch; using regex::regex_search; using regex::Regex; diff --git a/src/operators/verify_cpf.cc b/src/operators/verify_cpf.cc index 0ec49ac479..e3837ab1b9 100644 --- a/src/operators/verify_cpf.cc +++ b/src/operators/verify_cpf.cc @@ -110,7 +110,7 @@ bool VerifyCPF::verify(const char *cpfnumber, int len) { bool VerifyCPF::evaluate(Transaction *t, RuleWithActions *rule, const std::string& input, std::shared_ptr ruleMessage) { - std::list matches; + std::list matches; bool is_cpf = false; int i; diff --git a/src/operators/verify_cpf.h b/src/operators/verify_cpf.h index f4f72178e9..fbeaa3d0bf 100644 --- a/src/operators/verify_cpf.h +++ b/src/operators/verify_cpf.h @@ -25,7 +25,7 @@ namespace modsecurity { -using regex::SMatch; +using regex::RegexMatch; using regex::regex_search; using regex::Regex; diff --git a/src/operators/verify_ssn.cc b/src/operators/verify_ssn.cc index 00b0c5c201..958b462906 100644 --- a/src/operators/verify_ssn.cc +++ b/src/operators/verify_ssn.cc @@ -112,7 +112,7 @@ bool VerifySSN::verify(const char *ssnumber, int len) { bool VerifySSN::evaluate(Transaction *t, RuleWithActions *rule, const std::string& input, std::shared_ptr ruleMessage) { - std::list matches; + std::list matches; bool is_ssn = false; int i; diff --git a/src/operators/verify_ssn.h b/src/operators/verify_ssn.h index 01f60c478e..15760c3d00 100644 --- a/src/operators/verify_ssn.h +++ b/src/operators/verify_ssn.h @@ -25,7 +25,7 @@ namespace modsecurity { -using regex::SMatch; +using regex::RegexMatch; using regex::regex_search; using regex::Regex; diff --git a/src/operators/verify_svnr.cc b/src/operators/verify_svnr.cc index 248e6b4ec1..37c5a866c1 100644 --- a/src/operators/verify_svnr.cc +++ b/src/operators/verify_svnr.cc @@ -79,7 +79,7 @@ bool VerifySVNR::verify(const char *svnrnumber, int len) { bool VerifySVNR::evaluate(Transaction *t, RuleWithActions *rule, const std::string& input, std::shared_ptr ruleMessage) { - std::list matches; + std::list matches; bool is_svnr = false; int i; diff --git a/src/operators/verify_svnr.h b/src/operators/verify_svnr.h index ac2bbd30b8..3c59803ba5 100644 --- a/src/operators/verify_svnr.h +++ b/src/operators/verify_svnr.h @@ -11,7 +11,7 @@ namespace modsecurity { -using regex::SMatch; +using regex::RegexMatch; using regex::regex_search; using regex::Regex; diff --git a/src/regex/regex.cc b/src/regex/regex.cc index 0923661f71..f0ade54188 100644 --- a/src/regex/regex.cc +++ b/src/regex/regex.cc @@ -62,12 +62,12 @@ Regex::~Regex() { } -std::list Regex::searchAll(const std::string& s) const { +std::list Regex::searchAll(const std::string& s) const { const char *subject = s.c_str(); const std::string tmpString = std::string(s.c_str(), s.size()); int ovector[OVECCOUNT]; int rc, i, offset = 0; - std::list retList; + std::list retList; do { rc = pcre_exec(m_pc, m_pce, subject, @@ -83,7 +83,7 @@ std::list Regex::searchAll(const std::string& s) const { } std::string match = std::string(tmpString, start, len); offset = start + len; - retList.push_front(SMatch(match, start)); + retList.push_front(RegexMatch(match, start)); if (len == 0) { rc = 0; @@ -95,7 +95,7 @@ std::list Regex::searchAll(const std::string& s) const { return retList; } -bool Regex::searchOneMatch(const std::string& s, std::vector& captures) const { +bool Regex::searchOneMatch(const std::string& s, std::vector& captures) const { const char *subject = s.c_str(); int ovector[OVECCOUNT]; @@ -108,20 +108,20 @@ bool Regex::searchOneMatch(const std::string& s, std::vector& cap if (end > s.size()) { continue; } - SMatchCapture capture(i, start, len); + RegexMatchCapture capture(i, start, len); captures.push_back(capture); } return (rc > 0); } -int Regex::search(const std::string& s, SMatch *match) const { +int Regex::search(const std::string& s, RegexMatch *match) const { int ovector[OVECCOUNT]; int ret = pcre_exec(m_pc, m_pce, s.c_str(), s.size(), 0, 0, ovector, OVECCOUNT) > 0; if (ret > 0) { - *match = SMatch( + *match = RegexMatch( std::string(s, ovector[ret-1], ovector[ret] - ovector[ret-1]), 0); } diff --git a/src/regex/regex.h b/src/regex/regex.h index 0776c18bcd..5a45e3485d 100644 --- a/src/regex/regex.h +++ b/src/regex/regex.h @@ -31,13 +31,13 @@ namespace regex { #define OVECCOUNT 900 -class SMatch { +class RegexMatch { public: - SMatch() : + RegexMatch() : m_match(), m_offset(0) { } - SMatch(const std::string &match, size_t offset) : + RegexMatch(const std::string &match, size_t offset) : m_match(match), m_offset(offset) { } @@ -49,8 +49,8 @@ class SMatch { size_t m_offset; }; -struct SMatchCapture { - SMatchCapture(size_t group, size_t offset, size_t length) : +struct RegexMatchCapture { + RegexMatchCapture(size_t group, size_t offset, size_t length) : m_group(group), m_offset(offset), m_length(length) { } @@ -69,9 +69,9 @@ class Regex { Regex(const Regex&) = delete; Regex& operator=(const Regex&) = delete; - std::list searchAll(const std::string& s) const; - bool searchOneMatch(const std::string& s, std::vector& captures) const; - int search(const std::string &s, SMatch *match) const; + std::list searchAll(const std::string& s) const; + bool searchOneMatch(const std::string& s, std::vector& captures) const; + int search(const std::string &s, RegexMatch *m) const; int search(const std::string &s) const; const std::string pattern; @@ -81,7 +81,7 @@ class Regex { }; -static inline int regex_search(const std::string& s, SMatch *match, const Regex& regex) { +static inline int regex_search(const std::string& s, RegexMatch *match, const Regex& regex) { return regex.search(s, match); } diff --git a/test/regression/regression.cc b/test/regression/regression.cc index d88561083f..d0851534b6 100644 --- a/test/regression/regression.cc +++ b/test/regression/regression.cc @@ -39,7 +39,7 @@ using modsecurity_test::RegressionTest; using modsecurity_test::RegressionTestResult; using modsecurity::regex::regex_search; -using modsecurity::regex::SMatch; +using modsecurity::regex::RegexMatch; using modsecurity::regex::Regex; std::string default_test_path = "test-cases/regression"; @@ -230,7 +230,7 @@ void perform_unit_test(ModSecurityTest *test, } Regex re(t->parser_error); - SMatch match; + RegexMatch match; std::string s = modsec_rules->getParserError(); if (regex_search(s, &match, re)) { diff --git a/test/unit/unit_test.cc b/test/unit/unit_test.cc index 9a02764747..c12161d1ff 100644 --- a/test/unit/unit_test.cc +++ b/test/unit/unit_test.cc @@ -60,7 +60,7 @@ void replaceAll(std::string *s, const std::string &search, void json2bin(std::string *str) { modsecurity::regex::Regex re("\\\\x([a-z0-9A-Z]{2})"); modsecurity::regex::Regex re2("\\\\u([a-z0-9A-Z]{4})"); - modsecurity::regex::SMatch match; + modsecurity::regex::RegexMatch match; while (modsecurity::regex::regex_search(*str, &match, re)) { unsigned int p; From d03a293646cdfa199d047c6428d7990bd536e75d Mon Sep 17 00:00:00 2001 From: Felipe Zimmerle Date: Fri, 18 Jan 2019 18:24:26 -0300 Subject: [PATCH 03/12] Removes unecessary static methods from regex class --- src/anchored_set_variable.cc | 4 ++-- src/audit_log/audit_log.cc | 3 +-- src/collection/backend/in_memory-per_process.cc | 2 +- src/collection/backend/lmdb.cc | 2 +- src/operators/rx.h | 1 - src/operators/verify_cpf.h | 1 - src/operators/verify_ssn.h | 1 - src/operators/verify_svnr.h | 1 - src/regex/regex.h | 10 ---------- src/variables/rule.h | 10 +++++----- test/regression/custom_debug_log.cc | 2 +- test/regression/regression.cc | 5 ++--- test/unit/unit_test.cc | 4 ++-- 13 files changed, 15 insertions(+), 31 deletions(-) diff --git a/src/anchored_set_variable.cc b/src/anchored_set_variable.cc index e7e738e6ce..4de82d6cd2 100644 --- a/src/anchored_set_variable.cc +++ b/src/anchored_set_variable.cc @@ -126,7 +126,7 @@ std::unique_ptr AnchoredSetVariable::resolveFirst( void AnchoredSetVariable::resolveRegularExpression(regex::Regex *r, std::vector *l) { for (const auto& x : *this) { - int ret = regex::regex_search(x.first, *r); + int ret = r->search(x.first); if (ret <= 0) { continue; } @@ -139,7 +139,7 @@ void AnchoredSetVariable::resolveRegularExpression(regex::Regex *r, std::vector *l, variables::KeyExclusions &ke) { for (const auto& x : *this) { - int ret = regex::regex_search(x.first, *r); + int ret = r->search(x.first); if (ret <= 0) { continue; } diff --git a/src/audit_log/audit_log.cc b/src/audit_log/audit_log.cc index a77a6d0bbd..645c4a2f8f 100644 --- a/src/audit_log/audit_log.cc +++ b/src/audit_log/audit_log.cc @@ -278,8 +278,7 @@ bool AuditLog::isRelevant(int status) { return true; } - return regex::regex_search(sstatus, - regex::Regex(m_relevant)) != 0; + return regex::Regex(m_relevant).search(sstatus) != 0; } diff --git a/src/collection/backend/in_memory-per_process.cc b/src/collection/backend/in_memory-per_process.cc index 2a83d4d25f..4f6059451e 100644 --- a/src/collection/backend/in_memory-per_process.cc +++ b/src/collection/backend/in_memory-per_process.cc @@ -148,7 +148,7 @@ void InMemoryPerProcess::resolveRegularExpression(const std::string& var, //} //std::string content = std::string(x.first, keySize + 1, // x.first.size() - keySize - 1); - int ret = regex::regex_search(x.first, r); + int ret = r.search(x.first); if (ret <= 0) { continue; } diff --git a/src/collection/backend/lmdb.cc b/src/collection/backend/lmdb.cc index d44592007a..d9891e5685 100644 --- a/src/collection/backend/lmdb.cc +++ b/src/collection/backend/lmdb.cc @@ -559,7 +559,7 @@ void LMDB::resolveRegularExpression(const std::string& var, while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { char *a = reinterpret_cast(key.mv_data); - int ret = regex::regex_search(a, r); + int ret = r.search(a); if (ret <= 0) { continue; } diff --git a/src/operators/rx.h b/src/operators/rx.h index b628b564a0..18d5d320c1 100644 --- a/src/operators/rx.h +++ b/src/operators/rx.h @@ -27,7 +27,6 @@ namespace modsecurity { using regex::RegexMatch; -using regex::regex_search; using regex::Regex; namespace operators { diff --git a/src/operators/verify_cpf.h b/src/operators/verify_cpf.h index fbeaa3d0bf..a69bf4710f 100644 --- a/src/operators/verify_cpf.h +++ b/src/operators/verify_cpf.h @@ -26,7 +26,6 @@ namespace modsecurity { using regex::RegexMatch; -using regex::regex_search; using regex::Regex; namespace operators { diff --git a/src/operators/verify_ssn.h b/src/operators/verify_ssn.h index 15760c3d00..ca7ecff90e 100644 --- a/src/operators/verify_ssn.h +++ b/src/operators/verify_ssn.h @@ -26,7 +26,6 @@ namespace modsecurity { using regex::RegexMatch; -using regex::regex_search; using regex::Regex; namespace operators { diff --git a/src/operators/verify_svnr.h b/src/operators/verify_svnr.h index 3c59803ba5..5ee167e7df 100644 --- a/src/operators/verify_svnr.h +++ b/src/operators/verify_svnr.h @@ -12,7 +12,6 @@ namespace modsecurity { using regex::RegexMatch; -using regex::regex_search; using regex::Regex; namespace operators { diff --git a/src/regex/regex.h b/src/regex/regex.h index 5a45e3485d..032fc747a3 100644 --- a/src/regex/regex.h +++ b/src/regex/regex.h @@ -81,16 +81,6 @@ class Regex { }; -static inline int regex_search(const std::string& s, RegexMatch *match, const Regex& regex) { - return regex.search(s, match); -} - - -static inline int regex_search(const std::string& s, const Regex& regex) { - return regex.search(s); -} - - } // namespace regex } // namespace modsecurity diff --git a/src/variables/rule.h b/src/variables/rule.h index 607b76042d..e2c539f7f9 100644 --- a/src/variables/rule.h +++ b/src/variables/rule.h @@ -200,23 +200,23 @@ class Rule_DictElementRegexp : public VariableRegex { void evaluate(Transaction *t, RuleWithActions *rule, std::vector *l) override { - if (regex::regex_search("id", m_r) > 0) { + if (m_r.search("id") > 0) { Rule_DictElement::id(t, rule, l); return; } - if (regex::regex_search("rev", m_r) > 0) { + if (m_r.search("rev") > 0) { Rule_DictElement::rev(t, rule, l); return; } - if (regex::regex_search("severity", m_r) > 0) { + if (m_r.search("severity") > 0) { Rule_DictElement::severity(t, rule, l); return; } - if (regex::regex_search("logdata", m_r) > 0) { + if (m_r.search("logdata") > 0) { Rule_DictElement::logData(t, rule, l); return; } - if (regex::regex_search("msg", m_r) > 0) { + if (m_r.search("msg") > 0) { Rule_DictElement::msg(t, rule, l); return; } diff --git a/test/regression/custom_debug_log.cc b/test/regression/custom_debug_log.cc index 942084fc0a..c2360dcb3d 100644 --- a/test/regression/custom_debug_log.cc +++ b/test/regression/custom_debug_log.cc @@ -39,7 +39,7 @@ void CustomDebugLog::write(int level, const std::string &id, bool const CustomDebugLog::contains(const std::string& pattern) const { modsecurity::regex::Regex re(pattern); std::string s = m_log.str(); - return modsecurity::regex::regex_search(s, re); + return re.search(s); } std::string const CustomDebugLog::log_messages() const { diff --git a/test/regression/regression.cc b/test/regression/regression.cc index d0851534b6..33a9eec4b1 100644 --- a/test/regression/regression.cc +++ b/test/regression/regression.cc @@ -38,7 +38,6 @@ using modsecurity_test::ModSecurityTestResults; using modsecurity_test::RegressionTest; using modsecurity_test::RegressionTestResult; -using modsecurity::regex::regex_search; using modsecurity::regex::RegexMatch; using modsecurity::regex::Regex; @@ -54,7 +53,7 @@ void print_help() { bool contains(const std::string &s, const std::string &pattern) { bool ret; modsecurity::regex::Regex re(pattern); - ret = modsecurity::regex::regex_search(s, re); + ret = re.search(s); return ret; } @@ -233,7 +232,7 @@ void perform_unit_test(ModSecurityTest *test, RegexMatch match; std::string s = modsec_rules->getParserError(); - if (regex_search(s, &match, re)) { + if (re.search(s, &match)) { if (test->m_automake_output) { std::cout << ":test-result: PASS " << filename \ << ":" << t->name << std::endl; diff --git a/test/unit/unit_test.cc b/test/unit/unit_test.cc index c12161d1ff..803955a147 100644 --- a/test/unit/unit_test.cc +++ b/test/unit/unit_test.cc @@ -62,7 +62,7 @@ void json2bin(std::string *str) { modsecurity::regex::Regex re2("\\\\u([a-z0-9A-Z]{4})"); modsecurity::regex::RegexMatch match; - while (modsecurity::regex::regex_search(*str, &match, re)) { + while (re.search(*str, &match)) { unsigned int p; std::string toBeReplaced = match.str(); toBeReplaced.erase(0, 2); @@ -70,7 +70,7 @@ void json2bin(std::string *str) { replaceAll(str, match.str(), p); } - while (modsecurity::regex::regex_search(*str, &match, re2)) { + while (re2.search(*str, &match)) { unsigned int p; std::string toBeReplaced = match.str(); toBeReplaced.erase(0, 2); From 18f0e6d3cc309c44b6900e5f73ccbe721132867c Mon Sep 17 00:00:00 2001 From: Felipe Zimmerle Date: Fri, 18 Jan 2019 18:42:15 -0300 Subject: [PATCH 04/12] regex: Organizes the classes per file moves Pcre to backends/pcre.cc moves PegexMatch to regex_match.h --- build/pcre.m4 | 51 +++++++++++----- src/regex/backend/pcre.cc | 122 ++++++++++++++++++++++++++++++++++++-- src/regex/backend/pcre.h | 28 +++++++++ src/regex/regex.cc | 108 --------------------------------- src/regex/regex.h | 61 ++++--------------- src/regex/regex_match.h | 59 ++++++++++++++++++ 6 files changed, 250 insertions(+), 179 deletions(-) create mode 100644 src/regex/regex_match.h diff --git a/build/pcre.m4 b/build/pcre.m4 index 0de3c9c994..0bff17555b 100644 --- a/build/pcre.m4 +++ b/build/pcre.m4 @@ -23,6 +23,16 @@ AC_ARG_WITH( AC_MSG_CHECKING([for libpcre config script]) +if test "x${with_pcre}" == "xno"; then + AC_MSG_NOTICE([Support for PCRE was disabled by the utilization of --without-pcre or --with-pcre=no]) + PCRE_DISABLED=yes +else + if test "x${with_pcre}" == "xyes"; then + PCRE_MANDATORY=yes + AC_MSG_NOTICE([PCRE support was marked as mandatory by the utilization of --with-pcre=yes]) + test_paths="/usr/local/libpcre /usr/local/pcre /usr/local /opt/libpcre /opt/pcre /opt /usr /opt/local" + fi + for x in ${test_paths}; do dnl # Determine if the script was specified and use it directly if test ! -d "$x" -a -e "$x"; then @@ -55,7 +65,7 @@ if test -n "${pcre_path}"; then AC_MSG_RESULT([${PCRE_CONFIG}]) PCRE_VERSION="`${PCRE_CONFIG} --version`" if test ! -z "${PCRE_VERSION}"; then AC_MSG_NOTICE(pcre VERSION: $PCRE_VERSION); fi - PCRE_CFLAGS="`${PCRE_CONFIG} --cflags`" + PCRE_CFLAGS="`${PCRE_CONFIG} --cflags` -DWITH_PCRE" if test ! -z "${PCRE_CFLAGS}"; then AC_MSG_NOTICE(pcre CFLAGS: $PCRE_CFLAGS); fi PCRE_LDADD="`${PCRE_CONFIG} --libs`" if test ! -z "${PCRE_LDADD}"; then AC_MSG_NOTICE(pcre LDADD: $PCRE_LDADD); fi @@ -90,20 +100,31 @@ if test -n "${PCRE_VERSION}"; then LDFLAGS=$save_$LDFLAGS fi -AC_SUBST(PCRE_CONFIG) -AC_SUBST(PCRE_VERSION) -AC_SUBST(PCRE_CPPFLAGS) -AC_SUBST(PCRE_CFLAGS) -AC_SUBST(PCRE_LDFLAGS) -AC_SUBST(PCRE_LDADD) -AC_SUBST(PCRE_LD_PATH) +fi + -if test -z "${PCRE_VERSION}"; then - AC_MSG_NOTICE([*** pcre library not found.]) - ifelse([$2], , AC_MSG_ERROR([pcre library is required]), $2) +if test -z "${PCRE_LDADD}"; then + if test -z "${PCRE_MANDATORY}"; then + if test -z "${PCRE_DISABLED}"; then + PCRE_FOUND=0 + else + PCRE_FOUND=2 + fi + else + AC_MSG_ERROR([PCRE was explicitly referenced but it was not found]) + PCRE_FOUND=-1 + fi else - AC_MSG_NOTICE([using pcre v${PCRE_VERSION}]) - ifelse([$1], , , $1) - PCRE_LDADD="${PCRE_LDADD} -lpcre" -fi + PCRE_FOUND=1 + AC_SUBST(PCRE_CONFIG) + AC_SUBST(PCRE_VERSION) + AC_SUBST(PCRE_CPPFLAGS) + AC_SUBST(PCRE_CFLAGS) + AC_SUBST(PCRE_LDFLAGS) + AC_SUBST(PCRE_LDADD) + AC_SUBST(PCRE_LD_PATH) + PCRE_DISPLAY="${PCRE_LDADD}, ${PCRE_CFLAGS}" + AC_SUBST(PCRE_DISPLAY) +fi + ]) diff --git a/src/regex/backend/pcre.cc b/src/regex/backend/pcre.cc index 78dd8ecdb5..452ab91ea2 100644 --- a/src/regex/backend/pcre.cc +++ b/src/regex/backend/pcre.cc @@ -13,23 +13,135 @@ * */ -#include #include #include #include #include +#include + +#include "src/regex/backend/pcre.h" +#include "src/regex/regex_match.h" -#ifndef SRC_REGEX_BACKEND_PCRE_H_ -#define SRC_REGEX_BACKEND_PCRE_H_ namespace modsecurity { namespace regex { +namespace backend { + + +#if PCRE_HAVE_JIT +#define pcre_study_opt PCRE_STUDY_JIT_COMPILE +#else +#define pcre_study_opt 0 +#endif + + +Pcre::Pcre(const std::string& pattern_) + : pattern(pattern_.empty() ? ".*" : pattern_) { + const char *errptr = NULL; + int erroffset; + + m_pc = pcre_compile(pattern.c_str(), PCRE_DOTALL|PCRE_MULTILINE, + &errptr, &erroffset, NULL); + + m_pce = pcre_study(m_pc, pcre_study_opt, &errptr); +} + + +Pcre::~Pcre() { + if (m_pc != NULL) { + pcre_free(m_pc); + m_pc = NULL; + } + if (m_pce != NULL) { +#if PCRE_HAVE_JIT + pcre_free_study(m_pce); +#else + pcre_free(m_pce); +#endif + m_pce = NULL; + } +} + + +std::list Pcre::searchAll(const std::string& s) const { + const char *subject = s.c_str(); + const std::string tmpString = std::string(s.c_str(), s.size()); + int ovector[OVECCOUNT]; + int rc, i, offset = 0; + std::list retList; + + do { + rc = pcre_exec(m_pc, m_pce, subject, + s.size(), offset, 0, ovector, OVECCOUNT); + for (i = 0; i < rc; i++) { + size_t start = ovector[2*i]; + size_t end = ovector[2*i+1]; + size_t len = end - start; + if (end > s.size()) { + rc = 0; + break; + } + std::string match = std::string(tmpString, start, len); + offset = start + len; + retList.push_front(RegexMatch(match, start)); + if (len == 0) { + rc = 0; + break; + } + } + } while (rc > 0); + return retList; +} + + +bool Pcre::searchOneMatch(const std::string& s, std::vector& captures) const { + const char *subject = s.c_str(); + int ovector[OVECCOUNT]; + + int rc = pcre_exec(m_pc, m_pce, subject, s.size(), 0, 0, ovector, OVECCOUNT); + + for (int i = 0; i < rc; i++) { + size_t start = ovector[2*i]; + size_t end = ovector[2*i+1]; + size_t len = end - start; + if (end > s.size()) { + continue; + } + RegexMatchCapture capture(i, start, len); + captures.push_back(capture); + } + + return (rc > 0); +} + + +int Pcre::search(const std::string& s, RegexMatch *match) const { + int ovector[OVECCOUNT]; + int ret = pcre_exec(m_pc, m_pce, s.c_str(), + s.size(), 0, 0, ovector, OVECCOUNT) > 0; + + if (ret > 0) { + *match = RegexMatch( + std::string(s, ovector[ret-1], ovector[ret] - ovector[ret-1]), + 0); + } + + return ret; +} + + +int Pcre::search(const std::string& s) const { + int ovector[OVECCOUNT]; + return pcre_exec(m_pc, m_pce, s.c_str(), + s.size(), 0, 0, ovector, OVECCOUNT) > 0; +} + + +} // namespace backend } // namespace regex } // namespace modsecurity - -#endif // SRC_REGEX_BACKEND_PCRE_H_ diff --git a/src/regex/backend/pcre.h b/src/regex/backend/pcre.h index 78dd8ecdb5..5ba6615406 100644 --- a/src/regex/backend/pcre.h +++ b/src/regex/backend/pcre.h @@ -19,15 +19,43 @@ #include #include #include +#include + +#include "src/regex/regex_match.h" #ifndef SRC_REGEX_BACKEND_PCRE_H_ #define SRC_REGEX_BACKEND_PCRE_H_ namespace modsecurity { namespace regex { +namespace backend { + + +#define OVECCOUNT 900 + + +class Pcre { + public: + explicit Pcre(const std::string& pattern_); + ~Pcre(); + + // m_pc and m_pce can't be easily copied + Pcre(const Pcre&) = delete; + Pcre& operator=(const Pcre&) = delete; + + std::list searchAll(const std::string& s) const; + bool searchOneMatch(const std::string& s, std::vector& captures) const; + int search(const std::string &s, RegexMatch *m) const; + int search(const std::string &s) const; + const std::string pattern; + private: + pcre *m_pc = NULL; + pcre_extra *m_pce = NULL; +}; +} // namespace backend } // namespace regex } // namespace modsecurity diff --git a/src/regex/regex.cc b/src/regex/regex.cc index f0ade54188..d31fc1dfe1 100644 --- a/src/regex/regex.cc +++ b/src/regex/regex.cc @@ -22,118 +22,10 @@ #include #include -#include "src/utils/geo_lookup.h" - -#if PCRE_HAVE_JIT -#define pcre_study_opt PCRE_STUDY_JIT_COMPILE -#else -#define pcre_study_opt 0 -#endif namespace modsecurity { namespace regex { -Regex::Regex(const std::string& pattern_) - : pattern(pattern_.empty() ? ".*" : pattern_) { - const char *errptr = NULL; - int erroffset; - - m_pc = pcre_compile(pattern.c_str(), PCRE_DOTALL|PCRE_MULTILINE, - &errptr, &erroffset, NULL); - - m_pce = pcre_study(m_pc, pcre_study_opt, &errptr); -} - - -Regex::~Regex() { - if (m_pc != NULL) { - pcre_free(m_pc); - m_pc = NULL; - } - if (m_pce != NULL) { -#if PCRE_HAVE_JIT - pcre_free_study(m_pce); -#else - pcre_free(m_pce); -#endif - m_pce = NULL; - } -} - - -std::list Regex::searchAll(const std::string& s) const { - const char *subject = s.c_str(); - const std::string tmpString = std::string(s.c_str(), s.size()); - int ovector[OVECCOUNT]; - int rc, i, offset = 0; - std::list retList; - - do { - rc = pcre_exec(m_pc, m_pce, subject, - s.size(), offset, 0, ovector, OVECCOUNT); - - for (i = 0; i < rc; i++) { - size_t start = ovector[2*i]; - size_t end = ovector[2*i+1]; - size_t len = end - start; - if (end > s.size()) { - rc = 0; - break; - } - std::string match = std::string(tmpString, start, len); - offset = start + len; - retList.push_front(RegexMatch(match, start)); - - if (len == 0) { - rc = 0; - break; - } - } - } while (rc > 0); - - return retList; -} - -bool Regex::searchOneMatch(const std::string& s, std::vector& captures) const { - const char *subject = s.c_str(); - int ovector[OVECCOUNT]; - - int rc = pcre_exec(m_pc, m_pce, subject, s.size(), 0, 0, ovector, OVECCOUNT); - - for (int i = 0; i < rc; i++) { - size_t start = ovector[2*i]; - size_t end = ovector[2*i+1]; - size_t len = end - start; - if (end > s.size()) { - continue; - } - RegexMatchCapture capture(i, start, len); - captures.push_back(capture); - } - - return (rc > 0); -} - -int Regex::search(const std::string& s, RegexMatch *match) const { - int ovector[OVECCOUNT]; - int ret = pcre_exec(m_pc, m_pce, s.c_str(), - s.size(), 0, 0, ovector, OVECCOUNT) > 0; - - if (ret > 0) { - *match = RegexMatch( - std::string(s, ovector[ret-1], ovector[ret] - ovector[ret-1]), - 0); - } - - return ret; -} - -int Regex::search(const std::string& s) const { - int ovector[OVECCOUNT]; - return pcre_exec(m_pc, m_pce, s.c_str(), - s.size(), 0, 0, ovector, OVECCOUNT) > 0; -} - } // namespace regex } // namespace modsecurity diff --git a/src/regex/regex.h b/src/regex/regex.h index 032fc747a3..1f08f8b3d8 100644 --- a/src/regex/regex.h +++ b/src/regex/regex.h @@ -13,7 +13,7 @@ * */ -#include + #include #include @@ -21,6 +21,9 @@ #include #include +#include "src/regex/backend/pcre.h" +#include "src/regex/regex_match.h" + #ifndef SRC_REGEX_REGEX_H_ #define SRC_REGEX_REGEX_H_ @@ -28,59 +31,15 @@ namespace modsecurity { namespace regex { +using selectedBackend = backend::Pcre; -#define OVECCOUNT 900 - -class RegexMatch { - public: - RegexMatch() : - m_match(), - m_offset(0) { } - - RegexMatch(const std::string &match, size_t offset) : - m_match(match), - m_offset(offset) { } - - const std::string& str() const { return m_match; } - size_t offset() const { return m_offset; } - - private: - std::string m_match; - size_t m_offset; +class Regex : public selectedBackend { + public: + explicit Regex(const std::string& pattern) : + selectedBackend(pattern) { }; + ~Regex() { }; }; -struct RegexMatchCapture { - RegexMatchCapture(size_t group, size_t offset, size_t length) : - m_group(group), - m_offset(offset), - m_length(length) { } - - size_t m_group; // E.g. 0 = full match; 6 = capture group 6 - size_t m_offset; // offset of match within the analyzed string - size_t m_length; -}; - -class Regex { - public: - explicit Regex(const std::string& pattern_); - ~Regex(); - - // m_pc and m_pce can't be easily copied - Regex(const Regex&) = delete; - Regex& operator=(const Regex&) = delete; - - std::list searchAll(const std::string& s) const; - bool searchOneMatch(const std::string& s, std::vector& captures) const; - int search(const std::string &s, RegexMatch *m) const; - int search(const std::string &s) const; - - const std::string pattern; - private: - pcre *m_pc = NULL; - pcre_extra *m_pce = NULL; -}; - - } // namespace regex } // namespace modsecurity diff --git a/src/regex/regex_match.h b/src/regex/regex_match.h new file mode 100644 index 0000000000..7b338eb9f7 --- /dev/null +++ b/src/regex/regex_match.h @@ -0,0 +1,59 @@ +/* + * ModSecurity, http://www.modsecurity.org/ + * Copyright (c) 2015 Trustwave Holdings, Inc. (http://www.trustwave.com/) + * + * You may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * If any of the files related to licensing are missing or if you have any + * other questions related to licensing please contact Trustwave Holdings, Inc. + * directly using the email address security@modsecurity.org. + * + */ + + +#ifndef SRC_REGEX_REGEX_MATCH_H_ +#define SRC_REGEX_REGEX_MATCH_H_ + +#include + +namespace modsecurity { +namespace regex { + + +class RegexMatch { + public: + RegexMatch() : + m_match(), + m_offset(0) { } + + RegexMatch(const std::string &match, size_t offset) : + m_match(match), + m_offset(offset) { } + + const std::string& str() const { return m_match; } + size_t offset() const { return m_offset; } + + private: + std::string m_match; + size_t m_offset; +}; + +struct RegexMatchCapture { + RegexMatchCapture(size_t group, size_t offset, size_t length) : + m_group(group), + m_offset(offset), + m_length(length) { } + + size_t m_group; // E.g. 0 = full match; 6 = capture group 6 + size_t m_offset; // offset of match within the analyzed string + size_t m_length; +}; + + +} // namespace regex +} // namespace modsecurity + +#endif // SRC_REGEX_REGEX_MATCH_H_ From 61427f9c4bfaf0b4c3a9ad3b7f17de8438478ff9 Mon Sep 17 00:00:00 2001 From: Felipe Zimmerle Date: Fri, 18 Jan 2019 18:59:04 -0300 Subject: [PATCH 05/12] Makes re2 detectable by the build scripts --- build/re2.m4 | 166 ++++++++++++++++++ configure.ac | 65 +++++++ .../reading_logs_via_rule_message/Makefile.am | 9 +- examples/reading_logs_with_offset/Makefile.am | 9 +- examples/using_bodies_in_chunks/Makefile.am | 9 +- src/Makefile.am | 7 +- src/parser/Makefile.am | 3 +- src/regex/backend/pcre.cc | 4 +- src/regex/backend/pcre.h | 4 + src/regex/regex.h | 6 + test/Makefile.am | 27 ++- test/benchmark/Makefile.am | 9 +- tools/rules-check/Makefile.am | 9 +- 13 files changed, 299 insertions(+), 28 deletions(-) create mode 100644 build/re2.m4 diff --git a/build/re2.m4 b/build/re2.m4 new file mode 100644 index 0000000000..3c4af9e90b --- /dev/null +++ b/build/re2.m4 @@ -0,0 +1,166 @@ +dnl Check for RE2 Libraries +dnl CHECK_RE2(ACTION-IF-FOUND [, ACTION-IF-NOT-FOUND]) + +AC_DEFUN([PROG_RE2], [ + +# Needed if pkg-config will be used. +AC_REQUIRE([PKG_PROG_PKG_CONFIG]) + +# Possible names for the re2 library/package (pkg-config) +RE2_POSSIBLE_LIB_NAMES="re2" + +# Possible extensions for the library +RE2_POSSIBLE_EXTENSIONS="so la sl dll dylib" + +# Possible paths (if pkg-config was not found, proceed with the file lookup) +RE2_POSSIBLE_PATHS="/usr/lib /usr/local/lib /usr/local/libre2 /usr/local/re2 /usr/local /opt/libre2 /opt/re2 /opt /usr /usr/lib64" + +# Variables to be set by this very own script. +RE2_VERSION="" +RE2_CFLAGS="" +RE2_CPPFLAGS="" +RE2_LDADD="" +RE2_LDFLAGS="" + +AC_ARG_WITH( + re2, + AC_HELP_STRING( + [--with-re2=PATH], + [Path to re2 prefix or config script] + ) +) + +if test "x${with_re2}" == "xno"; then + AC_DEFINE(HAVE_RE2, 0, [Support for RE2 was disabled by the utilization of --without-re2 or --with-re2=no]) + AC_MSG_NOTICE([Support for RE2 was disabled by the utilization of --without-re2 or --with-re2=no]) + RE2_DISABLED=yes +else + if test "x${with_re2}" == "xyes"; then + RE2_MANDATORY=yes + AC_MSG_NOTICE([RE2 support was marked as mandatory by the utilization of --with-re2=yes]) + fi + if test "x${with_re2}" == "x" || test "x${with_re2}" == "xyes"; then + # Nothing about RE2 was informed, using the pkg-config to figure things out. + if test -n "${PKG_CONFIG}"; then + RE2_PKG_NAME="" + for x in ${RE2_POSSIBLE_LIB_NAMES}; do + if ${PKG_CONFIG} --exists ${x}; then + RE2_PKG_NAME="$x" + break + fi + done + fi + AC_MSG_NOTICE([Nothing about RE2 was informed during the configure phase. Trying to detect it on the platform...]) + if test -n "${RE2_PKG_NAME}"; then + # Package was found using the pkg-config scripts + RE2_VERSION="`${PKG_CONFIG} ${RE2_PKG_NAME} --modversion`" + RE2_CFLAGS="`${PKG_CONFIG} ${RE2_PKG_NAME} --cflags`" + RE2_LDADD="`${PKG_CONFIG} ${RE2_PKG_NAME} --libs-only-l`" + RE2_LDFLAGS="`${PKG_CONFIG} ${RE2_PKG_NAME} --libs-only-L --libs-only-other`" + RE2_DISPLAY="${RE2_LDADD}, ${RE2_CFLAGS}" + else + # If pkg-config did not find anything useful, go over file lookup. + for x in ${RE2_POSSIBLE_PATHS}; do + CHECK_FOR_RE2_AT(${x}) + if test -n "${RE2_VERSION}"; then + break + fi + done + fi + fi + if test "x${with_re2}" != "x"; then + # An specific path was informed, lets check. + RE2_MANDATORY=yes + CHECK_FOR_RE2_AT(${with_re2}) + fi +fi + +if test -z "${RE2_LDADD}"; then + if test -z "${RE2_MANDATORY}"; then + if test -z "${RE2_DISABLED}"; then + AC_MSG_NOTICE([RE2 library was not found]) + RE2_FOUND=0 + else + RE2_FOUND=2 + fi + else + AC_MSG_ERROR([RE2 was explicitly referenced but it was not found]) + RE2_FOUND=-1 + fi +else + RE2_FOUND=1 + AC_MSG_NOTICE([using RE2 v${RE2_VERSION}]) + RE2_CFLAGS="-DWITH_RE2 ${RE2_CFLAGS}" + RE2_DISPLAY="${RE2_LDADD}, ${RE2_CFLAGS}" + AC_SUBST(RE2_VERSION) + AC_SUBST(RE2_LDADD) + AC_SUBST(RE2_LIBS) + AC_SUBST(RE2_LDFLAGS) + AC_SUBST(RE2_CFLAGS) + AC_SUBST(RE2_DISPLAY) +fi + + + +AC_SUBST(RE2_FOUND) + +]) # AC_DEFUN [PROG_RE2] + + +AC_DEFUN([CHECK_FOR_RE2_AT], [ + path=$1 + for y in ${RE2_POSSIBLE_EXTENSIONS}; do + for z in ${RE2_POSSIBLE_LIB_NAMES}; do + if test -e "${path}/${z}.${y}"; then + re2_lib_path="${path}/" + re2_lib_name="${z}" + re2_lib_file="${re2_lib_path}/${z}.${y}" + break + fi + if test -e "${path}/lib${z}.${y}"; then + re2_lib_path="${path}/" + re2_lib_name="${z}" + re2_lib_file="${re2_lib_path}/lib${z}.${y}" + break + fi + if test -e "${path}/lib/lib${z}.${y}"; then + re2_lib_path="${path}/lib/" + re2_lib_name="${z}" + re2_lib_file="${re2_lib_path}/lib${z}.${y}" + break + fi + if test -e "${path}/lib/x86_64-linux-gnu/lib${z}.${y}"; then + re2_lib_path="${path}/lib/x86_64-linux-gnu/" + re2_lib_name="${z}" + re2_lib_file="${re2_lib_path}/lib${z}.${y}" + break + fi + done + if test -n "$re2_lib_path"; then + break + fi + done + if test -e "${path}/include/re2.h"; then + re2_inc_path="${path}/include" + elif test -e "${path}/re2.h"; then + re2_inc_path="${path}" + elif test -e "${path}/include/re2/re2.h"; then + re2_inc_path="${path}/include" + fi + + if test -n "${re2_lib_path}"; then + AC_MSG_NOTICE([RE2 library found at: ${re2_lib_file}]) + fi + + if test -n "${re2_inc_path}"; then + AC_MSG_NOTICE([RE2 headers found at: ${re2_inc_path}]) + fi + + if test -n "${re2_lib_path}" -a -n "${re2_inc_path}"; then + # TODO: Compile a piece of code to check the version. + RE2_CFLAGS="-I${re2_inc_path}" + RE2_LDADD="-l${re2_lib_name}" + RE2_LDFLAGS="-L${re2_lib_path}" + RE2_DISPLAY="${re2_lib_file}, ${re2_inc_path}" + fi +]) # AC_DEFUN [CHECK_FOR_RE2_AT] diff --git a/configure.ac b/configure.ac index 33afa4c610..f6de77972e 100644 --- a/configure.ac +++ b/configure.ac @@ -105,6 +105,9 @@ AM_CONDITIONAL([SSDEEP_CFLAGS], [test "SSDEEP_CFLAGS" != ""]) CHECK_LUA AM_CONDITIONAL([LUA_CFLAGS], [test "LUA_CFLAGS" != ""]) +# Check for RE2 +PROG_RE2 +AM_CONDITIONAL([RE2_CFLAGS], [test "RE2_CFLAGS" != ""]) # # Check for curl @@ -549,6 +552,68 @@ if test "x$LUA_FOUND" = "x2"; then fi +## REGEX +echo " + Regular expression engine" + +REGEX_SELECTED_RE2=" " +REGEX_SELECTED_PCRE=" " + +if test "x$RE2_FOUND" = "x1"; then + MSC_REGEX_ENG="re2" +fi +if test "x$PCRE_FOUND" = "x1"; then + MSC_REGEX_ENG="pcre" +fi + +AC_SUBST(MSC_REGEX_ENG) + +if test "x$MSC_REGEX_ENG" = "xpcre"; then + MSC_REGEX_ENG_IS_PCRE=1 + REGEX_SELECTED_PCRE="[[selected]]" + AC_SUBST(MSC_REGEX_ENG_IS_PCRE) +fi +if test "x$MSC_REGEX_ENG" = "xre2"; then + MSC_REGEX_ENG_IS_RE2=1 + REGEX_SELECTED_PCRE="[[selected]]" + AC_SUBST(MSC_REGEX_ENG_IS_RE2) +fi + + +#### RE2 +if test "x$RE2_FOUND" = "x0"; then + echo " * RE2 (experimental) ....not found" +fi +if test "x$RE2_FOUND" = "x1"; then + echo -n " * RE2 (experimental) ${REGEX_SELECTED_RE2} ....found " + if ! test "x$RE2_VERSION" = "x"; then + echo "v${RE2_VERSION}" + else + echo "" + fi + echo " ${RE2_DISPLAY}" +fi +if test "x$RE2_FOUND" = "x2"; then + echo " * RE2 (experimental) ....disabled" +fi + +#### PCRE +if test "x$PCRE_FOUND" = "x0"; then + echo " * PCRE ....not found" +fi +if test "x$PCRE_FOUND" = "x1"; then + echo -n " * PCRE ${REGEX_SELECTED_PCRE} ....found " + if ! test "x$PCRE_VERSION" = "x"; then + echo "v${PCRE_VERSION}" + else + echo "" + fi + echo " ${PCRE_DISPLAY}" +fi +if test "x$PCRE_FOUND" = "x2"; then + echo " * PCRE ....disabled" +fi + + echo " " echo " Other Options" if test $buildTestUtilities = true; then diff --git a/examples/reading_logs_via_rule_message/Makefile.am b/examples/reading_logs_via_rule_message/Makefile.am index a9ec0853b1..e36f6f7584 100644 --- a/examples/reading_logs_via_rule_message/Makefile.am +++ b/examples/reading_logs_via_rule_message/Makefile.am @@ -15,7 +15,8 @@ simple_request_LDADD = \ $(LUA_LDADD) \ $(PCRE_LDADD) \ $(SSDEEP_LDADD) \ - $(YAJL_LDADD) + $(YAJL_LDADD) \ + $(RE2_LDADD) simple_request_LDFLAGS = \ -L$(top_builddir)/src/.libs/ \ @@ -28,7 +29,8 @@ simple_request_LDFLAGS = \ $(LUA_LDFLAGS) \ $(MAXMIND_LDFLAGS) \ $(SSDEEP_LDFLAGS) \ - $(YAJL_LDFLAGS) + $(YAJL_LDFLAGS) \ + $(RE2_LDFLAGS) simple_request_CPPFLAGS = \ $(GLOBAL_CFLAGS) \ @@ -46,7 +48,8 @@ simple_request_CPPFLAGS = \ $(LMDB_CFLAGS) \ $(LUA_CFLAGS) \ $(PCRE_CFLAGS) \ - $(LIBXML2_CFLAGS) + $(LIBXML2_CFLAGS) \ + $(RE2_CFLAGS) MAINTAINERCLEANFILES = \ diff --git a/examples/reading_logs_with_offset/Makefile.am b/examples/reading_logs_with_offset/Makefile.am index 6e27786721..2d345a61b6 100644 --- a/examples/reading_logs_with_offset/Makefile.am +++ b/examples/reading_logs_with_offset/Makefile.am @@ -15,7 +15,8 @@ read_LDADD = \ $(LUA_LDADD) \ $(PCRE_LDADD) \ $(SSDEEP_LDADD) \ - $(YAJL_LDADD) + $(YAJL_LDADD) \ + $(RE2_LDADD) read_LDFLAGS = \ -L$(top_builddir)/src/.libs/ \ @@ -28,7 +29,8 @@ read_LDFLAGS = \ $(LUA_LDFLAGS) \ $(SSDEEP_LDFLAGS) \ $(MAXMIND_LDFLAGS) \ - $(YAJL_LDFLAGS) + $(YAJL_LDFLAGS) \ + $(RE2_LDFLAGS) read_CPPFLAGS = \ $(GLOBAL_CFLAGS) \ @@ -47,7 +49,8 @@ read_CPPFLAGS = \ $(LMDB_CFLAGS) \ $(LUA_CFLAGS) \ $(PCRE_CFLAGS) \ - $(LIBXML2_CFLAGS) + $(LIBXML2_CFLAGS) \ + $(RE2_CFLAGS) MAINTAINERCLEANFILES = \ diff --git a/examples/using_bodies_in_chunks/Makefile.am b/examples/using_bodies_in_chunks/Makefile.am index 0ddc06cbb8..ca7e7d9873 100644 --- a/examples/using_bodies_in_chunks/Makefile.am +++ b/examples/using_bodies_in_chunks/Makefile.am @@ -15,7 +15,8 @@ simple_request_LDADD = \ $(LUA_LDADD) \ $(PCRE_LDADD) \ $(SSDEEP_LDADD) \ - $(YAJL_LDADD) + $(YAJL_LDADD) \ + $(RE2_LDADD) simple_request_LDFLAGS = \ -L$(top_builddir)/src/.libs/ \ @@ -29,7 +30,8 @@ simple_request_LDFLAGS = \ -lpthread \ $(LUA_LDFLAGS) \ $(SSDEEP_LDFLAGS) \ - $(YAJL_LDFLAGS) + $(YAJL_LDFLAGS) \ + $(RE2_LDFLAGS) simple_request_CPPFLAGS = \ $(GLOBAL_CFLAGS) \ @@ -48,7 +50,8 @@ simple_request_CPPFLAGS = \ $(LMDB_CFLAGS) \ $(LUA_CFLAGS) \ $(PCRE_CFLAGS) \ - $(LIBXML2_CFLAGS) + $(LIBXML2_CFLAGS) \ + $(RE2_CFLAGS) MAINTAINERCLEANFILES = \ Makefile.in diff --git a/src/Makefile.am b/src/Makefile.am index b442fdba2b..8b404b04a9 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -328,7 +328,8 @@ libmodsecurity_la_CPPFLAGS = \ $(SSDEEP_CFLAGS) \ $(MAXMIND_CFLAGS) \ $(LUA_CFLAGS) \ - $(LIBXML2_CFLAGS) + $(LIBXML2_CFLAGS) \ + $(RE2_CFLAGS) libmodsecurity_la_LDFLAGS = \ @@ -343,6 +344,7 @@ libmodsecurity_la_LDFLAGS = \ $(SSDEEP_LDFLAGS) \ $(MAXMIND_LDFLAGS) \ $(YAJL_LDFLAGS) \ + $(RE2_LDFLAGS) \ -version-info @MSC_VERSION_INFO@ @@ -358,5 +360,6 @@ libmodsecurity_la_LIBADD = \ $(PCRE_LDADD) \ $(MAXMIND_LDADD) \ $(SSDEEP_LDADD) \ - $(YAJL_LDADD) + $(YAJL_LDADD) \ + $(RE2_LDADD) diff --git a/src/parser/Makefile.am b/src/parser/Makefile.am index 74dbb8b1b5..c77790fa62 100644 --- a/src/parser/Makefile.am +++ b/src/parser/Makefile.am @@ -26,7 +26,8 @@ libmodsec_parser_la_CPPFLAGS = \ $(YAJL_CFLAGS) \ $(LMDB_CFLAGS) \ $(PCRE_CFLAGS) \ - $(LIBXML2_CFLAGS) + $(LIBXML2_CFLAGS) \ + $(RE2_CFLAGS) test.cc: seclang-parser.hh cat seclang-parser.hh | sed "s/return \*new (yyas_ ()) T (t)/return *new (yyas_ ()) T (std::move((T\&)t))/g" > seclang-parser.hh.fix && mv seclang-parser.hh.fix seclang-parser.hh diff --git a/src/regex/backend/pcre.cc b/src/regex/backend/pcre.cc index 452ab91ea2..2bc610462c 100644 --- a/src/regex/backend/pcre.cc +++ b/src/regex/backend/pcre.cc @@ -18,7 +18,7 @@ #include #include #include -#include + #include "src/regex/backend/pcre.h" #include "src/regex/regex_match.h" @@ -28,6 +28,7 @@ namespace modsecurity { namespace regex { namespace backend { +#ifdef WITH_PCRE #if PCRE_HAVE_JIT #define pcre_study_opt PCRE_STUDY_JIT_COMPILE @@ -140,6 +141,7 @@ int Pcre::search(const std::string& s) const { s.size(), 0, 0, ovector, OVECCOUNT) > 0; } +#endif } // namespace backend } // namespace regex diff --git a/src/regex/backend/pcre.h b/src/regex/backend/pcre.h index 5ba6615406..0559b2f6ba 100644 --- a/src/regex/backend/pcre.h +++ b/src/regex/backend/pcre.h @@ -13,7 +13,9 @@ * */ +#ifdef WITH_PCRE #include +#endif #include #include @@ -30,6 +32,7 @@ namespace modsecurity { namespace regex { namespace backend { +#ifdef WITH_PCRE #define OVECCOUNT 900 @@ -54,6 +57,7 @@ class Pcre { pcre_extra *m_pce = NULL; }; +#endif } // namespace backend } // namespace regex diff --git a/src/regex/regex.h b/src/regex/regex.h index 1f08f8b3d8..bde0803982 100644 --- a/src/regex/regex.h +++ b/src/regex/regex.h @@ -31,7 +31,13 @@ namespace modsecurity { namespace regex { +#ifdef WITH_PCRE using selectedBackend = backend::Pcre; +#elif WITH_RE2 +//using selectedBackend = backend::Re2; +#else +#error "no regex backend selected" +#endif class Regex : public selectedBackend { public: diff --git a/test/Makefile.am b/test/Makefile.am index 9a719342dd..d7c6df3e3e 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -51,7 +51,8 @@ unit_tests_LDADD = \ $(LUA_LDADD) \ $(PCRE_LDADD) \ $(SSDEEP_LDADD) \ - $(YAJL_LDADD) + $(YAJL_LDADD) \ + $(RE2_LDADD) unit_tests_LDFLAGS = \ @@ -65,7 +66,8 @@ unit_tests_LDFLAGS = \ $(LMDB_LDFLAGS) \ $(LUA_LDFLAGS) \ $(SSDEEP_LDFLAGS) \ - $(YAJL_LDFLAGS) + $(YAJL_LDFLAGS) \ + $(RE2_LDFLAGS) unit_tests_CPPFLAGS = \ @@ -84,7 +86,8 @@ unit_tests_CPPFLAGS = \ $(YAJL_CFLAGS) \ $(LUA_CFLAGS) \ $(SSDEEP_CFLAGS) \ - $(LIBXML2_CFLAGS) + $(LIBXML2_CFLAGS) \ + $(RE2_CFLAGS) # regression @@ -105,7 +108,8 @@ regression_tests_LDADD = \ $(LUA_LDADD) \ $(PCRE_LDADD) \ $(SSDEEP_LDADD) \ - $(YAJL_LDADD) + $(YAJL_LDADD) \ + $(RE2_LDADD) regression_tests_LDFLAGS = \ @@ -119,7 +123,8 @@ regression_tests_LDFLAGS = \ $(YAJL_LDFLAGS) \ $(LMDB_LDFLAGS) \ $(SSDEEP_LDFLAGS) \ - $(LUA_LDFLAGS) + $(LUA_LDFLAGS) \ + $(RE2_LDFLAGS) regression_tests_CPPFLAGS = \ @@ -138,7 +143,8 @@ regression_tests_CPPFLAGS = \ $(SSDEEP_CFLAGS) \ $(PCRE_CFLAGS) \ $(YAJL_CFLAGS) \ - $(LIBXML2_CFLAGS) + $(LIBXML2_CFLAGS) \ + $(RE2_CFLAGS) # optimization @@ -158,7 +164,8 @@ rules_optimization_LDADD = \ $(LUA_LDADD) \ $(PCRE_LDADD) \ $(SSDEEP_LDADD) \ - $(YAJL_LDADD) + $(YAJL_LDADD) \ + $(RE2_LDADD) rules_optimization_LDFLAGS = \ -L$(top_builddir)/src/.libs/ \ @@ -171,7 +178,8 @@ rules_optimization_LDFLAGS = \ $(LMDB_LDFLAGS) \ $(LUA_LDFLAGS) \ $(SSDEEP_LDFLAGS) \ - $(YAJL_LDFLAGS) + $(YAJL_LDFLAGS) \ + $(RE2_LDFLAGS) rules_optimization_CPPFLAGS = \ -std=c++11 \ @@ -189,5 +197,6 @@ rules_optimization_CPPFLAGS = \ $(SSDEEP_CFLAGS) \ $(PCRE_CFLAGS) \ $(YAJL_CFLAGS) \ - $(LIBXML2_CFLAGS) + $(LIBXML2_CFLAGS) \ + $(RE2_CFLAGS) diff --git a/test/benchmark/Makefile.am b/test/benchmark/Makefile.am index 73a975b33a..fee134ac64 100644 --- a/test/benchmark/Makefile.am +++ b/test/benchmark/Makefile.am @@ -15,7 +15,8 @@ benchmark_LDADD = \ $(SSDEEP_LDADD) \ $(LUA_LDADD) \ $(LIBXML2_LDADD) \ - $(GLOBAL_LDADD) + $(GLOBAL_LDADD) \ + $(RE2_LDADD) benchmark_LDFLAGS = \ -L$(top_builddir)/src/.libs/ \ @@ -29,7 +30,8 @@ benchmark_LDFLAGS = \ $(YAJL_LDFLAGS) \ $(LMDB_LDFLAGS) \ $(SSDEEP_LDFLAGS) \ - $(LUA_LDFLAGS) + $(LUA_LDFLAGS) \ + $(RE2_LDFLAGS) benchmark_CPPFLAGS = \ -std=c++11 \ @@ -37,7 +39,8 @@ benchmark_CPPFLAGS = \ $(GLOBAL_CPPFLAGS) \ $(PCRE_CFLAGS) \ $(LMDB_CFLAGS) \ - $(LIBXML2_CFLAGS) + $(LIBXML2_CFLAGS) \ + $(RE2_CFLAGS) MAINTAINERCLEANFILES = \ Makefile.in diff --git a/tools/rules-check/Makefile.am b/tools/rules-check/Makefile.am index 615d9598ed..dee9ae5cbc 100644 --- a/tools/rules-check/Makefile.am +++ b/tools/rules-check/Makefile.am @@ -16,7 +16,8 @@ modsec_rules_check_LDADD = \ $(LUA_LDADD) \ $(PCRE_LDADD) \ $(SSDEEP_LDADD) \ - $(YAJL_LDADD) + $(YAJL_LDADD) \ + $(RE2_LDADD) modsec_rules_check_LDFLAGS = \ $(GEOIP_LDFLAGS) \ @@ -25,7 +26,8 @@ modsec_rules_check_LDFLAGS = \ $(LMDB_LDFLAGS) \ $(LUA_LDFLAGS) \ $(SSDEEP_LDFLAGS) \ - $(YAJL_LDFLAGS) + $(YAJL_LDFLAGS) \ + $(RE2_LDFLAGS) modsec_rules_check_CPPFLAGS = \ -std=c++11 \ @@ -34,7 +36,8 @@ modsec_rules_check_CPPFLAGS = \ $(PCRE_CFLAGS) \ $(LMDB_CFLAGS) \ $(MAXMIND_CFLAGS) \ - $(LIBXML2_CFLAGS) + $(LIBXML2_CFLAGS) \ + $(RE2_CFLAGS) MAINTAINERCLEANFILES = \ Makefile.in From d742dd29e5291773875f9afb3489ca86213f0645 Mon Sep 17 00:00:00 2001 From: WGH Date: Tue, 22 Jan 2019 22:48:31 +0300 Subject: [PATCH 06/12] Add support for capturing group test cases This enables unit tests to compare the matching groups as well, not just binary match-no match. --- test/unit/unit.cc | 52 ++++++++++++++++++++++++++++++++++++++---- test/unit/unit_test.cc | 37 ++++++++++++++++++++++++++++++ test/unit/unit_test.h | 4 ++++ 3 files changed, 89 insertions(+), 4 deletions(-) diff --git a/test/unit/unit.cc b/test/unit/unit.cc index f610a21ebf..80d1d019fa 100644 --- a/test/unit/unit.cc +++ b/test/unit/unit.cc @@ -19,11 +19,13 @@ #include #include #include +#include #include "modsecurity/rules_set.h" #include "modsecurity/modsecurity.h" #include "src/operators/operator.h" #include "src/actions/transformations/transformation.h" +#include "src/actions/capture.h" #include "modsecurity/transaction.h" #include "modsecurity/actions/action.h" @@ -40,6 +42,9 @@ using modsecurity_test::UnitTest; using modsecurity_test::ModSecurityTest; using modsecurity_test::ModSecurityTestResults; +using modsecurity::ModSecurity; +using modsecurity::RuleWithActions; +using modsecurity::Transaction; using modsecurity::actions::transformations::Transformation; using modsecurity::operators::Operator; @@ -52,8 +57,30 @@ void print_help() { std::cout << std::endl; } +static std::vector get_capturing_groups(Transaction &transaction) { + // capturing groups are stored in the TX collection as "0", "1", and so on + std::vector res; + for (int i = 0;; i++) { + const std::string key = std::to_string(i); + auto s = transaction.m_collections.m_tx_collection->resolveFirst(key); + if (s == NULL) break; + res.push_back(*s); + } + return res; +} + +static std::unique_ptr create_fake_capture_rule() { + auto actions = new modsecurity::Actions; + actions->push_back(new modsecurity::actions::Capture("")); + auto rule = std::unique_ptr(new RuleWithActions{actions, nullptr, std::unique_ptr(new std::string("")), 1}); + + assert(rule->hasCaptureAction()); -void perform_unit_test(ModSecurityTest *test, UnitTest *t, + return rule; +} + +static void perform_unit_test(ModSecurity *modsec, + ModSecurityTest *test, UnitTest *t, ModSecurityTestResults* res) { std::string error; bool found = true; @@ -76,11 +103,26 @@ void perform_unit_test(ModSecurityTest *test, UnitTest *t, } if (t->type == "op") { + modsecurity::RulesSet rules{}; + Transaction transaction{modsec, &rules, NULL}; Operator *op = Operator::instantiate(t->name, t->param); + + // Rx operator won't capture groups otherwise + auto rule = create_fake_capture_rule(); + op->init(t->filename, &error); - int ret = op->evaluate(NULL, NULL, t->input, NULL); + int ret = op->evaluate(&transaction, rule.get(), t->input, NULL); t->obtained = ret; - if (ret != t->ret) { + + bool pass = (ret == t->ret); + if (t->re_groups.size() > 0) { + t->obtained_re_groups = get_capturing_groups(transaction); + if (t->re_groups != t->obtained_re_groups) { + pass = false; + } + } + + if (!pass) { res->push_back(t); if (test->m_automake_output) { std::cout << "FAIL "; @@ -151,6 +193,8 @@ int main(int argc, char **argv) { test.load_tests("test-cases/secrules-language-tests/transformations"); } + ModSecurity modsec{}; + for (std::pair *> a : test) { std::vector *tests = a.second; @@ -161,7 +205,7 @@ int main(int argc, char **argv) { if (!test.m_automake_output) { std::cout << " " << a.first << "...\t"; } - perform_unit_test(&test, t, &r); + perform_unit_test(&modsec, &test, t, &r); if (!test.m_automake_output) { int skp = 0; diff --git a/test/unit/unit_test.cc b/test/unit/unit_test.cc index 803955a147..d28f1d6cde 100644 --- a/test/unit/unit_test.cc +++ b/test/unit/unit_test.cc @@ -88,6 +88,20 @@ void json2bin(std::string *str) { // replaceAll(str, "\\f", '\f'); } +static void print_array(std::stringstream &i, + const std::vector array) { + i << "["; + bool first = true; + for (const auto &s : array) { + if (first) { + first = false; + } else { + i << ", "; + } + i << "\"" << modsecurity::utils::string::toHexIfNeeded(s) << "\""; + } + i << "]"; +} std::string UnitTest::print() { std::stringstream i; @@ -101,6 +115,12 @@ std::string UnitTest::print() { i << " \"input\": \"" << this->input << "\"" << std::endl; i << " \"param\": \"" << this->param << "\"" << std::endl; i << " \"output\": \"" << this->output << "\"" << std::endl; + if (this->re_groups.size() != 0) { + i << " \"re_groups\": "; + print_array(i, this->re_groups); + i << std::endl; + + } i << "}" << std::endl; if (this->ret != this->obtained) { i << "Expecting: \"" << this->ret << "\" - returned: \""; @@ -114,6 +134,13 @@ std::string UnitTest::print() { i << "\""; i << std::endl; } + if (this->re_groups.size() && this->re_groups != this->obtained_re_groups) { + i << "Expecting:\n "; + print_array(i, this->re_groups); + i << "\nObtained:\n "; + print_array(i, this->obtained_re_groups); + i << std::endl; + } return i.str(); } @@ -149,6 +176,16 @@ UnitTest *UnitTest::from_yajl_node(const yajl_val &node) { * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53690 * */ + } else if (strcmp(key, "re_groups") == 0) { + auto arr = YAJL_GET_ARRAY(val); + if (arr == NULL) { + continue; + } + for (int i = 0; i < arr->len; i++) { + const char *s = YAJL_GET_STRING(arr->values[i]); + if (s == NULL) continue; + u->re_groups.push_back(s); + } } } diff --git a/test/unit/unit_test.h b/test/unit/unit_test.h index 494d61415b..4d116ea824 100644 --- a/test/unit/unit_test.h +++ b/test/unit/unit_test.h @@ -42,6 +42,10 @@ class UnitTest { int obtained; int skipped; std::string obtainedOutput; + + // for regular expression operator tests + std::vector re_groups; + std::vector obtained_re_groups; }; } // namespace modsecurity_test From 7b31d4c1bc1061a90910b1ec8660c09c97e956f8 Mon Sep 17 00:00:00 2001 From: WGH Date: Thu, 24 Jan 2019 03:33:18 +0300 Subject: [PATCH 07/12] Fix Pcre::searchAll behaviour wrt empty capturing groups Previously, searchAll would stop search when it encountered an empty matching group in any position. This means that, for example, regular expression "(a)(b?)(c)" would match string "ac", but the resulting group list would be ["ac", "a"]. After this change, the resulting list for the aforementioned regular expression becomes ["ac", "a", "", "c"] like it should've been. Additionally, this also changes behaviour for multiple matches. For example, when "aaa00bbb" is matched by "[a-z]*", previously only "aaa" would be returned. Now the matching list is ["aaa", "", "", "bbb", ""]. The old behaviour was confusing and almost certainly a bug. The new behaviour is the same as in Python's re.findall. For reference, though, Go does it somewhat differently: empty matches at the end of non-empty matches are ignored, so in Go above example is ["aaa", "", "bbb"] instead. --- src/regex/backend/pcre.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/regex/backend/pcre.cc b/src/regex/backend/pcre.cc index 2bc610462c..7c9eb54d84 100644 --- a/src/regex/backend/pcre.cc +++ b/src/regex/backend/pcre.cc @@ -87,11 +87,11 @@ std::list Pcre::searchAll(const std::string& s) const { std::string match = std::string(tmpString, start, len); offset = start + len; retList.push_front(RegexMatch(match, start)); + } - if (len == 0) { - rc = 0; - break; - } + offset = ovector[1]; // end + if (offset == ovector[0]) { // start == end (size == 0) + offset++; } } while (rc > 0); From 70a6833b7531c89a6f6ef39c25dd86ff827d5829 Mon Sep 17 00:00:00 2001 From: WGH Date: Thu, 24 Jan 2019 18:03:07 +0300 Subject: [PATCH 08/12] Add RE2 regex backend --- src/Makefile.am | 3 +- src/regex/backend/re2.cc | 115 +++++++++++++++++++++++++++++++++++++++ src/regex/backend/re2.h | 59 ++++++++++++++++++++ src/regex/regex.h | 3 +- 4 files changed, 178 insertions(+), 2 deletions(-) create mode 100644 src/regex/backend/re2.cc create mode 100644 src/regex/backend/re2.h diff --git a/src/Makefile.am b/src/Makefile.am index 8b404b04a9..63f06df322 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -255,7 +255,8 @@ UTILS = \ REGEX = \ regex/regex.cc \ - regex/backend/pcre.cc + regex/backend/pcre.cc \ + regex/backend/re2.cc COLLECTION = \ diff --git a/src/regex/backend/re2.cc b/src/regex/backend/re2.cc new file mode 100644 index 0000000000..e9e3d6efac --- /dev/null +++ b/src/regex/backend/re2.cc @@ -0,0 +1,115 @@ +/* + * ModSecurity, http://www.modsecurity.org/ + * Copyright (c) 2019 + * + * You may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * If any of the files related to licensing are missing or if you have any + * other questions related to licensing please contact Trustwave Holdings, Inc. + * directly using the email address security@modsecurity.org. + * + */ +#include +#include +#include +#include + +#include "src/regex/backend/re2.h" +#include "src/regex/regex_match.h" + +namespace modsecurity { +namespace regex { +namespace backend { + +#ifdef WITH_RE2 + +static RE2::Options get_re2_options() { + RE2::Options res; + + res.set_dot_nl(true); + + return res; +} + + +Re2::Re2(const std::string& pattern_) + : pattern(pattern_.empty() ? ".*" : pattern_), + re(pattern, get_re2_options()) +{ +} + +std::list Re2::searchAll(const std::string& s) const { + std::list retList; + + re2::StringPiece subject(s); + + size_t offset = 0; + while (offset <= s.size()) { + int ngroups = re.NumberOfCapturingGroups() + 1; + re2::StringPiece submatches[ngroups]; + + if (!re.Match(subject, offset, s.size(), RE2::UNANCHORED, + &submatches[0], ngroups)) { + break; + } + + for (int i = 0; i < ngroups; i++) { + // N.B. StringPiece::as_string returns value, not reference + auto match_string = submatches[i].as_string(); + auto start = &submatches[i][0] - &subject[0]; + retList.push_front(RegexMatch(std::move(match_string), start)); + } + + offset = (&submatches[0][0] - &subject[0]) + submatches[0].length(); + if (submatches[0].size() == 0) { + offset++; + } + } + + return retList; +} + +bool Re2::searchOneMatch(const std::string& s, std::vector& captures) const { + re2::StringPiece subject(s); + int ngroups = re.NumberOfCapturingGroups() + 1; + re2::StringPiece submatches[ngroups]; + + if (re.Match(subject, 0, s.size(), RE2::UNANCHORED, &submatches[0], ngroups)) { + for (int i = 0; i < ngroups; i++) { + auto len = submatches[i].length(); + auto start = len != 0 ? &submatches[i][0] - &subject[0] : 0; + captures.push_back(RegexMatchCapture(i, start, len)); + } + return true; + } else { + return false; + } +} + +int Re2::search(const std::string& s, RegexMatch *match) const { + re2::StringPiece subject(s); + re2::StringPiece submatches[1]; + if (re.Match(subject, 0, s.size(), RE2::UNANCHORED, &submatches[0], 1)) { + // N.B. StringPiece::as_string returns value, not reference + auto match_string = submatches[0].as_string(); + auto start = &submatches[0][0] - &subject[0]; + *match = RegexMatch(std::move(match_string), start); + return 1; + } else { + return 0; + } +} + +int Re2::search(const std::string& s) const { + re2::StringPiece subject(s); + return re.Match(subject, 0, s.size(), RE2::UNANCHORED, NULL, 0); +} +#endif + +} // namespace backend +} // namespace regex +} // namespace modsecurity + diff --git a/src/regex/backend/re2.h b/src/regex/backend/re2.h new file mode 100644 index 0000000000..07906314e3 --- /dev/null +++ b/src/regex/backend/re2.h @@ -0,0 +1,59 @@ +/* + * ModSecurity, http://www.modsecurity.org/ + * Copyright (c) 2019 + * + * You may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * If any of the files related to licensing are missing or if you have any + * other questions related to licensing please contact Trustwave Holdings, Inc. + * directly using the email address security@modsecurity.org. + * + */ + +#ifdef WITH_RE2 +#include +#endif + +#include +#include +#include + +#include "src/regex/regex_match.h" + +#ifndef SRC_REGEX_BACKEND_RE2_H_ +#define SRC_REGEX_BACKEND_RE2_H_ + +namespace modsecurity { +namespace regex { +namespace backend { + +#ifdef WITH_RE2 + +class Re2 { + public: + explicit Re2(const std::string& pattern_); + + // RE2 class is not copyable, so neither is this + Re2(const Re2&) = delete; + Re2& operator=(const Re2&) = delete; + + std::list searchAll(const std::string& s) const; + bool searchOneMatch(const std::string& s, std::vector& captures) const; + int search(const std::string &s, RegexMatch *m) const; + int search(const std::string &s) const; + + const std::string pattern; + private: + const RE2 re; +}; + +#endif + +} // namespace backend +} // namespace regex +} // namespace modsecurity + +#endif // SRC_REGEX_BACKEND_PCRE_H_ diff --git a/src/regex/regex.h b/src/regex/regex.h index bde0803982..f08a554d19 100644 --- a/src/regex/regex.h +++ b/src/regex/regex.h @@ -22,6 +22,7 @@ #include #include "src/regex/backend/pcre.h" +#include "src/regex/backend/re2.h" #include "src/regex/regex_match.h" #ifndef SRC_REGEX_REGEX_H_ @@ -34,7 +35,7 @@ namespace regex { #ifdef WITH_PCRE using selectedBackend = backend::Pcre; #elif WITH_RE2 -//using selectedBackend = backend::Re2; +using selectedBackend = backend::Re2; #else #error "no regex backend selected" #endif From bf1699d39a7976f185085da4f985714ca6335d90 Mon Sep 17 00:00:00 2001 From: WGH Date: Sat, 5 Sep 2020 03:34:11 +0300 Subject: [PATCH 09/12] Implement RE2 fallback to libpcre RE2 doesn't support certain features, like negative lookaround, so when a regular expression cannot be compiled with RE2, it's compiled with libpcre instead. This has some runtime cost, as this fallback is implemented with an extra heap object and virtual function calls. When RE2 is not enabled, however, everything works as it did before. --- src/regex/backend/backend.h | 46 ++++++++++++++++++++++ src/regex/backend/pcre.h | 21 +++++++--- src/regex/backend/re2.cc | 9 +++-- src/regex/backend/re2.h | 18 ++++++--- src/regex/backend_fallback.h | 75 ++++++++++++++++++++++++++++++++++++ src/regex/regex.h | 14 +++++-- src/variables/variable.h | 6 +-- 7 files changed, 167 insertions(+), 22 deletions(-) create mode 100644 src/regex/backend/backend.h create mode 100644 src/regex/backend_fallback.h diff --git a/src/regex/backend/backend.h b/src/regex/backend/backend.h new file mode 100644 index 0000000000..aa7ef63082 --- /dev/null +++ b/src/regex/backend/backend.h @@ -0,0 +1,46 @@ +/* + * ModSecurity, http://www.modsecurity.org/ + * Copyright (c) 2019 + * + * You may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * If any of the files related to licensing are missing or if you have any + * other questions related to licensing please contact Trustwave Holdings, Inc. + * directly using the email address security@modsecurity.org. + * + */ +#ifndef SRC_REGEX_BACKEND_BACKEND_H_ +#define SRC_REGEX_BACKEND_BACKEND_H_ + +#include +#include +#include + +#include "src/regex/regex_match.h" + +namespace modsecurity { +namespace regex { +namespace backend { + +class Backend { +public: + virtual ~Backend() {} + + virtual bool ok() const = 0; + + virtual std::list searchAll(const std::string& s) const = 0; + virtual bool searchOneMatch(const std::string& s, std::vector& captures) const = 0; + virtual int search(const std::string &s, RegexMatch *m) const = 0; + virtual int search(const std::string &s) const = 0; + + virtual const std::string& getPattern() const = 0; +}; + +} // namespace backend +} // namespace regex +} // namespace modsecurity + +#endif // SRC_REGEX_BACKEND_BACKEND_H_ diff --git a/src/regex/backend/pcre.h b/src/regex/backend/pcre.h index 0559b2f6ba..539745e3c9 100644 --- a/src/regex/backend/pcre.h +++ b/src/regex/backend/pcre.h @@ -23,6 +23,7 @@ #include #include +#include "src/regex/backend/backend.h" #include "src/regex/regex_match.h" #ifndef SRC_REGEX_BACKEND_PCRE_H_ @@ -37,7 +38,7 @@ namespace backend { #define OVECCOUNT 900 -class Pcre { +class Pcre : public Backend { public: explicit Pcre(const std::string& pattern_); ~Pcre(); @@ -46,13 +47,21 @@ class Pcre { Pcre(const Pcre&) = delete; Pcre& operator=(const Pcre&) = delete; - std::list searchAll(const std::string& s) const; - bool searchOneMatch(const std::string& s, std::vector& captures) const; - int search(const std::string &s, RegexMatch *m) const; - int search(const std::string &s) const; + std::list searchAll(const std::string& s) const override; + bool searchOneMatch(const std::string& s, std::vector& captures) const override; + int search(const std::string &s, RegexMatch *m) const override; + int search(const std::string &s) const override; - const std::string pattern; + virtual bool ok() const override { + return m_pc != NULL; + } + + virtual const std::string& getPattern() const override { + return pattern; + }; private: + const std::string pattern; + pcre *m_pc = NULL; pcre_extra *m_pce = NULL; }; diff --git a/src/regex/backend/re2.cc b/src/regex/backend/re2.cc index e9e3d6efac..5c43cb7200 100644 --- a/src/regex/backend/re2.cc +++ b/src/regex/backend/re2.cc @@ -29,15 +29,18 @@ namespace backend { static RE2::Options get_re2_options() { RE2::Options res; + // Re2 is usually used with fallback to libpcre, + // so disable unnecessary stderr noise + res.set_log_errors(false); + res.set_dot_nl(true); return res; } -Re2::Re2(const std::string& pattern_) - : pattern(pattern_.empty() ? ".*" : pattern_), - re(pattern, get_re2_options()) +Re2::Re2(const std::string& pattern) + : re(pattern.empty() ? ".*" : pattern, get_re2_options()) { } diff --git a/src/regex/backend/re2.h b/src/regex/backend/re2.h index 07906314e3..7fb36ce89b 100644 --- a/src/regex/backend/re2.h +++ b/src/regex/backend/re2.h @@ -21,6 +21,7 @@ #include #include +#include "src/regex/backend/backend.h" #include "src/regex/regex_match.h" #ifndef SRC_REGEX_BACKEND_RE2_H_ @@ -32,7 +33,7 @@ namespace backend { #ifdef WITH_RE2 -class Re2 { +class Re2 : public Backend { public: explicit Re2(const std::string& pattern_); @@ -40,12 +41,17 @@ class Re2 { Re2(const Re2&) = delete; Re2& operator=(const Re2&) = delete; - std::list searchAll(const std::string& s) const; - bool searchOneMatch(const std::string& s, std::vector& captures) const; - int search(const std::string &s, RegexMatch *m) const; - int search(const std::string &s) const; + std::list searchAll(const std::string& s) const override; + bool searchOneMatch(const std::string& s, std::vector& captures) const override; + int search(const std::string &s, RegexMatch *m) const override; + int search(const std::string &s) const override; + virtual bool ok() const override { + return re.ok(); + } - const std::string pattern; + virtual const std::string& getPattern() const override { + return re.pattern(); + }; private: const RE2 re; }; diff --git a/src/regex/backend_fallback.h b/src/regex/backend_fallback.h new file mode 100644 index 0000000000..00f759dd15 --- /dev/null +++ b/src/regex/backend_fallback.h @@ -0,0 +1,75 @@ +/* + * ModSecurity, http://www.modsecurity.org/ + * Copyright (c) 2019 + * + * You may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * If any of the files related to licensing are missing or if you have any + * other questions related to licensing please contact Trustwave Holdings, Inc. + * directly using the email address security@modsecurity.org. + * + */ +#ifndef SRC_REGEX_BACKEND_FALLBACK_H_ +#define SRC_REGEX_BACKEND_FALLBACK_H_ + +#include + +#include "src/regex/backend/backend.h" + +namespace modsecurity { +namespace regex { + +template +static backend::Backend* compile_regex_fallback(const std::string& pattern) { + return new T(pattern); +} + +template +static backend::Backend* compile_regex_fallback(const std::string& pattern) { + T *regex = new T{pattern}; + if (regex->ok()) { + return regex; + } else { + delete regex; + return compile_regex_fallback(pattern); + } +} + +template +class BackendFallback : public backend::Backend { +public: + BackendFallback(const std::string& pattern) + : backend(compile_regex_fallback(pattern)) + {} + + virtual bool ok() const override { + return backend->ok(); + } + + std::list searchAll(const std::string& s) const override { + return backend->searchAll(s); + } + bool searchOneMatch(const std::string& s, std::vector& captures) const override { + return backend->searchOneMatch(s, captures); + } + int search(const std::string &s, RegexMatch *m) const override { + return backend->search(s, m); + } + int search(const std::string &s) const override { + return backend->search(s); + } + + const std::string& getPattern() const override { + return backend->getPattern(); + } +private: + std::unique_ptr backend; +}; + +} // namespace regex +} // namespace modsecurity + +#endif // SRC_REGEX_BACKEND_FALLBACK_H_ diff --git a/src/regex/regex.h b/src/regex/regex.h index f08a554d19..695a372f0c 100644 --- a/src/regex/regex.h +++ b/src/regex/regex.h @@ -21,9 +21,11 @@ #include #include +#include "src/regex/backend/backend.h" #include "src/regex/backend/pcre.h" #include "src/regex/backend/re2.h" #include "src/regex/regex_match.h" +#include "src/regex/backend_fallback.h" #ifndef SRC_REGEX_REGEX_H_ #define SRC_REGEX_REGEX_H_ @@ -33,11 +35,15 @@ namespace modsecurity { namespace regex { #ifdef WITH_PCRE -using selectedBackend = backend::Pcre; -#elif WITH_RE2 -using selectedBackend = backend::Re2; +# ifdef WITH_RE2 + using selectedBackend = BackendFallback< + backend::Re2, backend::Pcre + >; +# else + using selectedBackend = backend::Pcre; +# endif #else -#error "no regex backend selected" +# error "PCRE is not available" #endif class Regex : public selectedBackend { diff --git a/src/variables/variable.h b/src/variables/variable.h index 2257a303f8..8c3fd71ef9 100644 --- a/src/variables/variable.h +++ b/src/variables/variable.h @@ -115,9 +115,9 @@ class KeyExclusion { // FIXME: use pre built regex. class KeyExclusionRegex : public KeyExclusion { public: - explicit KeyExclusionRegex(const regex::Regex &re) - : m_re(re.pattern) { } - explicit KeyExclusionRegex(const std::string &re) + explicit KeyExclusionRegex(regex::Regex re) + : m_re(re.getPattern()) { } + explicit KeyExclusionRegex(std::string re) : m_re(re) { } ~KeyExclusionRegex() override { } From 5437b44e3b9b47263ad05b7d3576265449474194 Mon Sep 17 00:00:00 2001 From: WGH Date: Fri, 25 Jan 2019 17:30:14 +0300 Subject: [PATCH 10/12] Enable RE2 testing with Travis Ubuntu 14.04 doesn't have RE2 package altogether, and Ubuntu 16.04 RE2 package is too old. Ubuntu 18.04 RE2 package might work, but this Ubuntu verison it's not supported by Travis yet. So build RE2 from source. --- .travis.yml | 2 ++ .travis/install_re2.sh | 16 ++++++++++++++++ 2 files changed, 18 insertions(+) create mode 100755 .travis/install_re2.sh diff --git a/.travis.yml b/.travis.yml index 9e78797c12..add0899724 100644 --- a/.travis.yml +++ b/.travis.yml @@ -30,6 +30,7 @@ env: - OPTS="--without-ssdeep $OPTS" - OPTS="--without-lua $OPTS" - OPTS="--without-maxmind $OPTS" + - OPTS="--without-re2 $OPTS" before_script: - echo $TRAVIS_OS_NAME @@ -41,6 +42,7 @@ before_script: - '[ "$TRAVIS_OS_NAME" != linux ] || sudo apt-get update' - '[ "$TRAVIS_OS_NAME" != linux ] || sudo apt-cache search maxmind' - '[ "$TRAVIS_OS_NAME" != linux ] || sudo apt-get install -y libmaxminddb-dev' + - '[ "$OPTS" == "*--without-re2*" ] || ./.travis/install_re2.sh' script: - ./build.sh diff --git a/.travis/install_re2.sh b/.travis/install_re2.sh new file mode 100755 index 0000000000..90d0a9e9e4 --- /dev/null +++ b/.travis/install_re2.sh @@ -0,0 +1,16 @@ +#!/bin/sh + +# Ubuntu 14.04 doesn't have RE2 package altogether, +# and Ubuntu 16.04 RE2 package is too old. +# +# Ubuntu 18.04 RE2 package might work, but this Ubuntu verison it's not +# supported by Travis yet. + +RELEASE=2019-01-01 + +set -ex + +cd /tmp +wget https://github.com/google/re2/archive/$RELEASE.tar.gz -O re2.tar.gz +tar -xf re2.tar.gz +(cd re2-$RELEASE && make && sudo make install) From 95f16ea86d12bd9016203772407131df9f9516e6 Mon Sep 17 00:00:00 2001 From: WGH Date: Mon, 4 Feb 2019 21:04:36 +0300 Subject: [PATCH 11/12] Add proper error handling to @rx operator --- src/operators/rx.cc | 13 +++++ src/regex/backend/backend.h | 2 +- src/regex/backend/pcre.cc | 10 ++++ src/regex/backend/pcre.h | 13 ++++- src/regex/backend/re2.h | 10 +++- src/regex/backend_fallback.h | 4 +- test/test-cases/regression/operator-rx.json | 57 +++++++++++++++++++++ 7 files changed, 102 insertions(+), 7 deletions(-) diff --git a/src/operators/rx.cc b/src/operators/rx.cc index 33f31b5f4f..16eb620524 100644 --- a/src/operators/rx.cc +++ b/src/operators/rx.cc @@ -29,7 +29,12 @@ namespace operators { bool Rx::init(const std::string &arg, std::string *error) { if (m_string->m_containsMacro == false) { + std::string regex_error; m_re = new Regex(m_param); + if (!m_re->ok(®ex_error)) { + *error = "Failed to compile regular expression " + m_re->getPattern() + ": " + regex_error; + return false; + } } return true; @@ -47,6 +52,14 @@ bool Rx::evaluate(Transaction *transaction, RuleWithActions *rule, if (m_string->m_containsMacro) { std::string eparam(m_string->evaluate(transaction)); re = new Regex(eparam); + std::string regex_error; + if (!re->ok(®ex_error)) { + ms_dbg_a(transaction, 2, + "Failed to compile regular expression with macro " + + re->getPattern() + ": " + regex_error); + delete re; + return false; + } } else { re = m_re; } diff --git a/src/regex/backend/backend.h b/src/regex/backend/backend.h index aa7ef63082..4cb2b4fce9 100644 --- a/src/regex/backend/backend.h +++ b/src/regex/backend/backend.h @@ -29,7 +29,7 @@ class Backend { public: virtual ~Backend() {} - virtual bool ok() const = 0; + virtual bool ok(std::string *error = nullptr) const = 0; virtual std::list searchAll(const std::string& s) const = 0; virtual bool searchOneMatch(const std::string& s, std::vector& captures) const = 0; diff --git a/src/regex/backend/pcre.cc b/src/regex/backend/pcre.cc index 7c9eb54d84..355bdea731 100644 --- a/src/regex/backend/pcre.cc +++ b/src/regex/backend/pcre.cc @@ -44,8 +44,18 @@ Pcre::Pcre(const std::string& pattern_) m_pc = pcre_compile(pattern.c_str(), PCRE_DOTALL|PCRE_MULTILINE, &errptr, &erroffset, NULL); + if (m_pc == NULL) { + m_error = "pcre_compile error at offset " + std::to_string(erroffset) + ": " + std::string(errptr); + return; + } m_pce = pcre_study(m_pc, pcre_study_opt, &errptr); + if (m_pce == NULL) { + m_error = "pcre_study error: " + std::string(errptr); + pcre_free(m_pc); + m_pc = nullptr; + return; + } } diff --git a/src/regex/backend/pcre.h b/src/regex/backend/pcre.h index 539745e3c9..5f1d4fe31b 100644 --- a/src/regex/backend/pcre.h +++ b/src/regex/backend/pcre.h @@ -52,8 +52,15 @@ class Pcre : public Backend { int search(const std::string &s, RegexMatch *m) const override; int search(const std::string &s) const override; - virtual bool ok() const override { - return m_pc != NULL; + virtual bool ok(std::string *error = nullptr) const override { + if (m_pc != NULL) { + return true; + } + if (error != nullptr) { + *error= m_error; + } + + return false; } virtual const std::string& getPattern() const override { @@ -64,6 +71,8 @@ class Pcre : public Backend { pcre *m_pc = NULL; pcre_extra *m_pce = NULL; + + std::string m_error; }; #endif diff --git a/src/regex/backend/re2.h b/src/regex/backend/re2.h index 7fb36ce89b..e6b2bf631d 100644 --- a/src/regex/backend/re2.h +++ b/src/regex/backend/re2.h @@ -45,8 +45,14 @@ class Re2 : public Backend { bool searchOneMatch(const std::string& s, std::vector& captures) const override; int search(const std::string &s, RegexMatch *m) const override; int search(const std::string &s) const override; - virtual bool ok() const override { - return re.ok(); + virtual bool ok(std::string *error = nullptr) const override { + if (re.ok()) { + return true; + } + if (error != nullptr) { + *error = re.error(); + } + return false; } virtual const std::string& getPattern() const override { diff --git a/src/regex/backend_fallback.h b/src/regex/backend_fallback.h index 00f759dd15..751c199e2d 100644 --- a/src/regex/backend_fallback.h +++ b/src/regex/backend_fallback.h @@ -45,8 +45,8 @@ class BackendFallback : public backend::Backend { : backend(compile_regex_fallback(pattern)) {} - virtual bool ok() const override { - return backend->ok(); + virtual bool ok(std::string *error = nullptr) const override { + return backend->ok(error); } std::list searchAll(const std::string& s) const override { diff --git a/test/test-cases/regression/operator-rx.json b/test/test-cases/regression/operator-rx.json index d6b9839fe5..cebba8e68c 100644 --- a/test/test-cases/regression/operator-rx.json +++ b/test/test-cases/regression/operator-rx.json @@ -85,5 +85,62 @@ "SecRuleEngine On", "SecRule REQUEST_HEADERS:Content-Length \"!^0$\" \"id:1,phase:2,pass,t:trim,block\"" ] + }, + { + "enabled":1, + "version_min":300000, + "version_max":0, + "title":"Testing Operator :: @rx with invalid regular expression", + "expected":{ + "parser_error":"Rules error.*Failed to compile regular expression \\(\\(value1\\):" + }, + "rules":[ + "SecRuleEngine On", + "SecRule ARGS \"@rx ((value1)\" \"id:1,phase:2,pass,t:trim\"" + ] + }, + { + "enabled":1, + "version_min":300000, + "title":"Testing Operator :: @rx with invalid regular expression after macro expansion", + "client":{ + "ip":"200.249.12.31", + "port":123 + }, + "server":{ + "ip":"200.249.12.31", + "port":80 + }, + "request":{ + "headers":{ + "Host":"localhost", + "User-Agent":"curl/7.38.0", + "Accept":"*/*", + "Content-Length": "27", + "Content-Type": "application/x-www-form-urlencoded" + }, + "uri":"/", + "method":"POST", + "body": [ + "param1=value1¶m2=value2" + ] + }, + "response":{ + "headers":{ + "Date":"Mon, 13 Jul 2015 20:02:41 GMT", + "Last-Modified":"Sun, 26 Oct 2014 22:33:37 GMT", + "Content-Type":"text/html" + }, + "body":[ + "no need." + ] + }, + "expected":{ + "debug_log":"Failed to compile regular expression with macro \\(\\(\\):" + }, + "rules":[ + "SecRuleEngine On", + "SecRule ARGS \"@rx ((%{TX.DOESNT_NEED_TO_EXIST_IT_WILL_BE_AN_EMPTY_STRING})\" \"id:1,phase:2,pass,t:trim\"" + ] } ] From af3fde8c4e808662e9ac1759d543c3351ef5775c Mon Sep 17 00:00:00 2001 From: WGH Date: Thu, 19 Nov 2020 16:41:51 +0000 Subject: [PATCH 12/12] Fix pkg-config Libs.private wrt RE2 --- modsecurity.pc.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modsecurity.pc.in b/modsecurity.pc.in index 96cdf5ca79..3d7953dc6a 100644 --- a/modsecurity.pc.in +++ b/modsecurity.pc.in @@ -8,4 +8,4 @@ Description: ModSecurity API Version: @MSC_VERSION_WITH_PATCHLEVEL@ Cflags: -I@includedir@ Libs: -L@libdir@ -lmodsecurity -Libs.private: @CURL_LDADD@ @GEOIP_LDADD@ @MAXMIND_LDADD@ @GLOBAL_LDADD@ @LIBXML2_LDADD@ @LMDB_LDADD@ @LUA_LDADD@ @PCRE_LDADD@ @SSDEEP_LDADD@ @YAJL_LDADD@ +Libs.private: @CURL_LDADD@ @GEOIP_LDADD@ @MAXMIND_LDADD@ @GLOBAL_LDADD@ @LIBXML2_LDADD@ @LMDB_LDADD@ @LUA_LDADD@ @PCRE_LDADD@ @SSDEEP_LDADD@ @YAJL_LDADD@ @RE2_LDADD@