Skip to content

Refactor regex code #2003

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ v3.0.4 - YYYY-MMM-DD (to be released)
[Issue #1943 - @victorhora, @allanbomsft]
- Fix buffer size for utf8toUnicode transformation
[Issue #1208 - @katef, @victorhora]
- Refactoring on Regex and SMatch classes.
[@WGH-]


v3.0.3 - 2018-Nov-05
Expand Down
2 changes: 1 addition & 1 deletion src/collection/backend/in_memory-per_process.cc
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ void InMemoryPerProcess::resolveRegularExpression(const std::string& var,
//std::string name = std::string(var, var.find(":") + 2,
// var.size() - var.find(":") - 3);
//size_t keySize = col.size();
Utils::Regex r = Utils::Regex(var);
Utils::Regex r(var);

for (const auto& x : *this) {
//if (x.first.size() <= keySize + 1) {
Expand Down
14 changes: 7 additions & 7 deletions src/modsecurity.cc
Original file line number Diff line number Diff line change
Expand Up @@ -259,9 +259,9 @@ int ModSecurity::processContentOffset(const char *content, size_t len,
std::string value;
yajl_gen_map_open(g);
vars.pop_back();
std::string startingAt = vars.back().match;
const std::string &startingAt = vars.back().str();
vars.pop_back();
std::string size = vars.back().match;
const std::string &size = vars.back().str();
vars.pop_back();
yajl_gen_string(g,
reinterpret_cast<const unsigned char*>("startingAt"),
Expand Down Expand Up @@ -311,11 +311,11 @@ int ModSecurity::processContentOffset(const char *content, size_t len,
strlen("transformation"));

yajl_gen_string(g,
reinterpret_cast<const unsigned char*>(trans.back().match.c_str()),
trans.back().match.size());
reinterpret_cast<const unsigned char*>(trans.back().str().c_str()),
trans.back().str().size());

t = modsecurity::actions::transformations::Transformation::instantiate(
trans.back().match.c_str());
trans.back().str().c_str());
varValueRes = t->evaluate(varValue, NULL);
varValue.assign(varValueRes);
trans.pop_back();
Expand Down Expand Up @@ -343,9 +343,9 @@ int ModSecurity::processContentOffset(const char *content, size_t len,
strlen("highlight"));
yajl_gen_map_open(g);
ops.pop_back();
std::string startingAt = ops.back().match;
std::string startingAt = ops.back().str();
ops.pop_back();
std::string size = ops.back().match;
std::string size = ops.back().str();
ops.pop_back();
yajl_gen_string(g,
reinterpret_cast<const unsigned char*>("startingAt"),
Expand Down
9 changes: 4 additions & 5 deletions src/operators/rx.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ bool Rx::init(const std::string &arg, std::string *error) {

bool Rx::evaluate(Transaction *transaction, Rule *rule,
const std::string& input, std::shared_ptr<RuleMessage> ruleMessage) {
SMatch match;
std::list<SMatch> matches;
Regex *re;

Expand All @@ -59,16 +58,16 @@ bool Rx::evaluate(Transaction *transaction, Rule *rule,
matches.reverse();
for (const SMatch& a : matches) {
transaction->m_collections.m_tx_collection->storeOrUpdateFirst(
std::to_string(i), a.match);
std::to_string(i), a.str());
ms_dbg_a(transaction, 7, "Added regex subexpression TX." +
std::to_string(i) + ": " + a.match);
transaction->m_matched.push_back(a.match);
std::to_string(i) + ": " + a.str());
transaction->m_matched.push_back(a.str());
i++;
}
}

for (const auto & i : matches) {
logOffset(ruleMessage, i.m_offset, i.m_length);
logOffset(ruleMessage, i.offset(), i.str().size());
}

if (m_string->m_containsMacro) {
Expand Down
8 changes: 4 additions & 4 deletions src/operators/verify_cpf.cc
Original file line number Diff line number Diff line change
Expand Up @@ -130,14 +130,14 @@ bool VerifyCPF::evaluate(Transaction *t, Rule *rule,
for (i = 0; i < input.size() - 1 && is_cpf == false; i++) {
matches = m_re->searchAll(input.substr(i, input.size()));
for (const auto & i : matches) {
is_cpf = verify(i.match.c_str(), i.match.size());
is_cpf = verify(i.str().c_str(), i.str().size());
if (is_cpf) {
logOffset(ruleMessage, i.m_offset, i.m_length);
logOffset(ruleMessage, i.offset(), i.str().size());
if (rule && t && rule->m_containsCaptureAction) {
t->m_collections.m_tx_collection->storeOrUpdateFirst(
"0", std::string(i.match));
"0", i.str());
ms_dbg_a(t, 7, "Added VerifyCPF match TX.0: " + \
std::string(i.match));
i.str());
}

goto out;
Expand Down
8 changes: 4 additions & 4 deletions src/operators/verify_ssn.cc
Original file line number Diff line number Diff line change
Expand Up @@ -121,14 +121,14 @@ bool VerifySSN::evaluate(Transaction *t, Rule *rule,
for (i = 0; i < input.size() - 1 && is_ssn == false; i++) {
matches = m_re->searchAll(input.substr(i, input.size()));
for (const auto & i : matches) {
is_ssn = verify(i.match.c_str(), i.match.size());
is_ssn = verify(i.str().c_str(), i.str().size());
if (is_ssn) {
logOffset(ruleMessage, i.m_offset, i.m_length);
logOffset(ruleMessage, i.offset(), i.str().size());
if (rule && t && rule->m_containsCaptureAction) {
t->m_collections.m_tx_collection->storeOrUpdateFirst(
"0", std::string(i.match));
"0", i.str());
ms_dbg_a(t, 7, "Added VerifySSN match TX.0: " + \
std::string(i.match));
i.str());
}

goto out;
Expand Down
33 changes: 13 additions & 20 deletions src/utils/regex.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,11 @@ namespace Utils {


Regex::Regex(const std::string& pattern_)
: pattern(pattern_),
m_ovector {0} {
: pattern(pattern_.empty() ? ".*" : pattern_)
{
const char *errptr = NULL;
int erroffset;

if (pattern.empty() == true) {
pattern.assign(".*");
}

m_pc = pcre_compile(pattern.c_str(), PCRE_DOTALL|PCRE_MULTILINE,
&errptr, &erroffset, NULL);

Expand All @@ -71,7 +67,7 @@ Regex::~Regex() {
}


std::list<SMatch> Regex::searchAll(const std::string& s) {
std::list<SMatch> Regex::searchAll(const std::string& s) const {
const char *subject = s.c_str();
const std::string tmpString = std::string(s.c_str(), s.size());
int ovector[OVECCOUNT];
Expand All @@ -83,19 +79,16 @@ std::list<SMatch> Regex::searchAll(const std::string& s) {
s.size(), offset, 0, ovector, OVECCOUNT);

for (i = 0; i < rc; i++) {
SMatch match;
size_t start = ovector[2*i];
size_t end = ovector[2*i+1];
size_t len = end - start;
if (end > s.size()) {
rc = 0;
break;
}
match.match = std::string(tmpString, start, len);
match.m_offset = start;
match.m_length = len;
std::string match = std::string(tmpString, start, len);
offset = start + len;
retList.push_front(match);
retList.push_front(SMatch(match, start));

if (len == 0) {
rc = 0;
Expand All @@ -107,24 +100,24 @@ std::list<SMatch> Regex::searchAll(const std::string& s) {
return retList;
}

int regex_search(const std::string& s, SMatch *match,
const Regex& regex) {
int Regex::search(const std::string& s, SMatch *match) const {
int ovector[OVECCOUNT];
int ret = pcre_exec(regex.m_pc, regex.m_pce, s.c_str(),
int ret = pcre_exec(m_pc, m_pce, s.c_str(),
s.size(), 0, 0, ovector, OVECCOUNT) > 0;

if (ret > 0) {
match->match = std::string(s, ovector[ret-1],
ovector[ret] - ovector[ret-1]);
match->size_ = ret;
*match = SMatch(
std::string(s, ovector[ret-1], ovector[ret] - ovector[ret-1]),
0
);
}

return ret;
}

int regex_search(const std::string& s, const Regex& regex) {
int Regex::search(const std::string& s) const {
int ovector[OVECCOUNT];
return pcre_exec(regex.m_pc, regex.m_pce, s.c_str(),
return pcre_exec(m_pc, m_pce, s.c_str(),
s.size(), 0, 0, ovector, OVECCOUNT) > 0;
}

Expand Down
53 changes: 31 additions & 22 deletions src/utils/regex.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,39 +31,48 @@ namespace Utils {

class SMatch {
public:
SMatch() : size_(0),
m_offset(0),
m_length(0),
match("") { }
size_t size() const { return size_; }
std::string str() const { return match; }

int size_;
int m_offset;
int m_length;
std::string match;
};
SMatch()
: m_match(), m_offset(0)
{}

SMatch(const std::string &match, size_t offset)
: m_match(match), m_offset(offset)
{}

const std::string& str() const { return m_match; }
size_t offset() const { return m_offset; }

private:
std::string m_match;
size_t m_offset;
};

class Regex {
public:
explicit Regex(const std::string& pattern_);
~Regex();
std::string pattern;
pcre *m_pc = NULL;
pcre_extra *m_pce = NULL;
int m_ovector[OVECCOUNT];

std::list<SMatch> searchAll(const std::string& s);
};

// m_pc and m_pce can't be easily copied
Regex(const Regex&) = delete;
Regex& operator=(const Regex&) = delete;

int regex_search(const std::string& s, SMatch *m,
const Regex& regex);
std::list<SMatch> searchAll(const std::string& s) const;
int search(const std::string &s, SMatch *m) const;
int search(const std::string &s) const;

int regex_search(const std::string& s, const Regex& r);
const std::string pattern;
private:
pcre *m_pc = NULL;
pcre_extra *m_pce = NULL;
};

static inline int regex_search(const std::string& s, SMatch *match, const Regex& regex) {
return regex.search(s, match);
}

static inline int regex_search(const std::string& s, const Regex& regex) {
return regex.search(s);
}

} // namespace Utils
} // namespace modsecurity
Expand Down
2 changes: 1 addition & 1 deletion test/regression/regression.cc
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ void perform_unit_test(ModSecurityTest<RegressionTest> *test,
SMatch match;
std::string s = modsec_rules->getParserError();

if (regex_search(s, &match, re) && match.size() >= 1) {
if (regex_search(s, &match, re)) {
if (test->m_automake_output) {
std::cout << ":test-result: PASS " << filename \
<< ":" << t->name << std::endl;
Expand Down
6 changes: 2 additions & 4 deletions test/unit/unit_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -62,17 +62,15 @@ void json2bin(std::string *str) {
modsecurity::Utils::Regex re2("\\\\u([a-z0-9A-Z]{4})");
modsecurity::Utils::SMatch match;

while (modsecurity::Utils::regex_search(*str, &match, re)
&& match.size() > 0) {
while (modsecurity::Utils::regex_search(*str, &match, re)) {
unsigned int p;
std::string toBeReplaced = match.str();
toBeReplaced.erase(0, 2);
sscanf(toBeReplaced.c_str(), "%x", &p);
replaceAll(str, match.str(), p);
}

while (modsecurity::Utils::regex_search(*str, &match, re2)
&& match.size() > 0) {
while (modsecurity::Utils::regex_search(*str, &match, re2)) {
unsigned int p;
std::string toBeReplaced = match.str();
toBeReplaced.erase(0, 2);
Expand Down