Skip to content

Commit 5b3fe25

Browse files
committed
Improving comparison engine (removing shared prelude part to further sharpen if pages are identical - especially noticable in small test pages)
1 parent 6f7f9dd commit 5b3fe25

File tree

1 file changed

+19
-8
lines changed

1 file changed

+19
-8
lines changed

lib/request/comparison.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from lib.core.settings import HTML_TITLE_REGEX
2323
from lib.core.settings import MIN_RATIO
2424
from lib.core.settings import MAX_RATIO
25+
from lib.core.settings import REFLECTED_VALUE_MARKER
2526
from lib.core.settings import LOWER_RATIO_BOUND
2627
from lib.core.settings import UPPER_RATIO_BOUND
2728
from lib.core.threads import getCurrentThreadData
@@ -114,16 +115,26 @@ def _comparison(page, headers, code, getRatioValue, pageLength):
114115
seq1 = getFilteredPageContent(seqMatcher.a, True) if conf.textOnly else seqMatcher.a
115116
seq2 = getFilteredPageContent(page, True) if conf.textOnly else page
116117

117-
if seq1 is not None:
118-
seqMatcher.set_seq1(seq1)
119-
120-
if seq2 is not None:
121-
seqMatcher.set_seq2(seq2)
122-
123118
if seq1 is None or seq2 is None:
124119
return None
125-
else:
126-
ratio = round(seqMatcher.quick_ratio(), 3)
120+
121+
seq1 = seq1.replace(REFLECTED_VALUE_MARKER, "")
122+
seq2 = seq2.replace(REFLECTED_VALUE_MARKER, "")
123+
124+
count = 0
125+
while count < min(len(seq1), len(seq2)):
126+
if seq1[count] == seq2[count]:
127+
count += 1
128+
else:
129+
break
130+
if count:
131+
seq1 = seq1[count:]
132+
seq2 = seq2[count:]
133+
134+
seqMatcher.set_seq1(seq1)
135+
seqMatcher.set_seq2(seq2)
136+
137+
ratio = round(seqMatcher.quick_ratio(), 3)
127138

128139
# If the url is stable and we did not set yet the match ratio and the
129140
# current injected value changes the url page content

0 commit comments

Comments
 (0)