Skip to content

Commit c57b5af

Browse files
committed
feat: Modernize LCS Internals
Simplified string enumerable sequence handling. Signed-off-by: Austin Ziegler <austin@zieglers.ca>
1 parent d4b760c commit c57b5af

1 file changed

Lines changed: 91 additions & 95 deletions

File tree

lib/diff/lcs/internals.rb

Lines changed: 91 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,31 @@
11
# frozen_string_literal: true
22

33
class << Diff::LCS
4+
def callbacks_for(callbacks) # :nodoc:
5+
callbacks.new
6+
rescue
7+
callbacks
8+
end
9+
private :callbacks_for
10+
411
def diff_traversal(method, seq1, seq2, callbacks, &block)
512
callbacks = callbacks_for(callbacks)
13+
614
case method
715
when :diff
816
traverse_sequences(seq1, seq2, callbacks)
917
when :sdiff
1018
traverse_balanced(seq1, seq2, callbacks)
1119
end
20+
1221
callbacks.finish if callbacks.respond_to? :finish
1322

1423
if block
1524
callbacks.diffs.map do |hunk|
1625
if hunk.is_a? Array
17-
hunk.map { |hunk_block| block[hunk_block] }
26+
hunk.map { block.call(_1) }
1827
else
19-
block[hunk]
28+
block.call(hunk)
2029
end
2130
end
2231
else
@@ -27,18 +36,16 @@ def diff_traversal(method, seq1, seq2, callbacks, &block)
2736
end
2837

2938
module Diff::LCS::Internals # :nodoc:
30-
end
31-
32-
class << Diff::LCS::Internals
33-
# Compute the longest common subsequence between the sequenced
34-
# Enumerables +a+ and +b+. The result is an array whose contents is such
35-
# that
39+
# Compute the longest common subsequence between the sequenced enumerable values `a` and
40+
# `b`. The result is an array whose contents is such that
3641
#
37-
# result = Diff::LCS::Internals.lcs(a, b)
38-
# result.each_with_index do |e, i|
39-
# assert_equal(a[i], b[e]) unless e.nil?
40-
# end
41-
def lcs(a, b)
42+
# ```ruby
43+
# result = Diff::LCS::Internals.lcs(a, b)
44+
# result.each_with_index do |e, i|
45+
# assert_equal(a[i], b[e]) unless e.nil?
46+
# end
47+
# ```
48+
def self.lcs(a, b)
4249
a_start = b_start = 0
4350
a_finish = a.size - 1
4451
b_finish = b.size - 1
@@ -58,21 +65,20 @@ def lcs(a, b)
5865
b_finish -= 1
5966
end
6067

61-
# Now, compute the equivalence classes of positions of elements.
62-
# An explanation for how this works: https://codeforces.com/topic/92191
68+
# Now, compute the equivalence classes of positions of elements. An explanation for
69+
# how this works: https://codeforces.com/topic/92191
6370
b_matches = position_hash(b, b_start..b_finish)
6471

6572
thresh = []
6673
links = []
67-
string = a.is_a?(String)
6874

6975
(a_start..a_finish).each do |i|
70-
ai = string ? a[i, 1] : a[i]
76+
ai = a[i]
7177
bm = b_matches[ai]
7278
k = nil
7379
bm.reverse_each do |j|
74-
# Although the threshold check is not mandatory for this to work,
75-
# it may have an optimization purpose
80+
# Although the threshold check is not mandatory for this to work, it may have an
81+
# optimization purpose.
7682
# An attempt to remove it: https://github.com/halostatue/diff-lcs/pull/72
7783
# Why it is reintroduced: https://github.com/halostatue/diff-lcs/issues/78
7884
if k && (thresh[k] > j) && (thresh[k - 1] < j)
@@ -95,11 +101,10 @@ def lcs(a, b)
95101
vector
96102
end
97103

98-
# This method will analyze the provided patchset to provide a single-pass
99-
# normalization (conversion of the array form of Diff::LCS::Change objects to
100-
# the object form of same) and detection of whether the patchset represents
101-
# changes to be made.
102-
def analyze_patchset(patchset, depth = 0)
104+
# This method will analyze the provided patchset to provide a single-pass normalization
105+
# (conversion of the array form of Diff::LCS::Change objects to the object form of same)
106+
# and detection of whether the patchset represents changes to be made.
107+
def self.analyze_patchset(patchset, depth = 0)
103108
fail "Patchset too complex" if depth > 1
104109

105110
has_changes = false
@@ -137,24 +142,22 @@ def analyze_patchset(patchset, depth = 0)
137142
[has_changes, new_patchset]
138143
end
139144

140-
# Examine the patchset and the source to see in which direction the
141-
# patch should be applied.
145+
# Examine the patchset and the source to see in which direction the patch should be
146+
# applied.
142147
#
143-
# WARNING: By default, this examines the whole patch, so this could take
144-
# some time. This also works better with Diff::LCS::ContextChange or
145-
# Diff::LCS::Change as its source, as an array will cause the creation
146-
# of one of the above.
147-
def intuit_diff_direction(src, patchset, limit = nil)
148-
string = src.is_a?(String)
148+
# WARNING: By default, this examines the whole patch, so this could take some time. This
149+
# also works better with Diff::LCS::ContextChange or Diff::LCS::Change as its source, as
150+
# an array will cause the creation of one of the above.
151+
def self.intuit_diff_direction(src, patchset, limit = nil)
149152
count = left_match = left_miss = right_match = right_miss = 0
150153

151154
patchset.each do |change|
152155
count += 1
153156

154157
case change
155158
when Diff::LCS::ContextChange
156-
le = string ? src[change.old_position, 1] : src[change.old_position]
157-
re = string ? src[change.new_position, 1] : src[change.new_position]
159+
le = src[change.old_position]
160+
re = src[change.new_position]
158161

159162
case change.action
160163
when "-" # Remove details from the old string
@@ -183,10 +186,10 @@ def intuit_diff_direction(src, patchset, limit = nil)
183186
end
184187
end
185188
when Diff::LCS::Change
186-
# With a simplistic change, we can't tell the difference between
187-
# the left and right on '!' actions, so we ignore those. On '='
188-
# actions, if there's a miss, we miss both left and right.
189-
element = string ? src[change.position, 1] : src[change.position]
189+
# With a simplistic change, we can't tell the difference between the left and
190+
# right on '!' actions, so we ignore those. On '=' actions, if there's a miss, we
191+
# miss both left and right.
192+
element = src[change.position]
190193

191194
case change.action
192195
when "-"
@@ -235,74 +238,67 @@ def intuit_diff_direction(src, patchset, limit = nil)
235238
:patch
236239
end
237240
else
238-
fail "The provided patchset does not appear to apply to the provided \
239-
enumerable as either source or destination value."
241+
fail "The provided patchset does not appear to apply to the provided enumerable as either source or destination value."
240242
end
241243
end
242244
end
243245

244-
# Find the place at which +value+ would normally be inserted into the
245-
# Enumerable. If that place is already occupied by +value+, do nothing
246-
# and return +nil+. If the place does not exist (i.e., it is off the end
247-
# of the Enumerable), add it to the end. Otherwise, replace the element
248-
# at that point with +value+. It is assumed that the Enumerable's values
249-
# are numeric.
250-
#
251-
# This operation preserves the sort order.
252-
def replace_next_larger(enum, value, last_index = nil)
253-
# Off the end?
254-
if enum.empty? || (value > enum[-1])
255-
enum << value
256-
return enum.size - 1
257-
end
246+
class << self
247+
# Find the place at which `value` would normally be inserted into the Enumerable. If
248+
# that place is already occupied by `value`, do nothing and return `nil`. If the place
249+
# does not exist (i.e., it is off the end of the Enumerable), add it to the end.
250+
# Otherwise, replace the element at that point with `value`. It is assumed that the
251+
# Enumerable's values are numeric.
252+
#
253+
# This operation preserves the sort order.
254+
def replace_next_larger(enum, value, last_index = nil)
255+
# Off the end?
256+
if enum.empty? || (value > enum[-1])
257+
enum << value
258+
return enum.size - 1
259+
end
258260

259-
# Binary search for the insertion point
260-
last_index ||= enum.size - 1
261-
first_index = 0
262-
while first_index <= last_index
263-
i = (first_index + last_index) >> 1
261+
# Binary search for the insertion point
262+
last_index ||= enum.size - 1
263+
first_index = 0
264+
while first_index <= last_index
265+
i = (first_index + last_index) >> 1
264266

265-
found = enum[i]
267+
found = enum[i]
266268

267-
return nil if value == found
269+
return nil if value == found
268270

269-
if value > found
270-
first_index = i + 1
271-
else
272-
last_index = i - 1
271+
if value > found
272+
first_index = i + 1
273+
else
274+
last_index = i - 1
275+
end
273276
end
274-
end
275277

276-
# The insertion point is in first_index; overwrite the next larger
277-
# value.
278-
enum[first_index] = value
279-
first_index
280-
end
281-
private :replace_next_larger
282-
283-
# If +vector+ maps the matching elements of another collection onto this
284-
# Enumerable, compute the inverse of +vector+ that maps this Enumerable
285-
# onto the collection. (Currently unused.)
286-
def inverse_vector(a, vector)
287-
inverse = a.dup
288-
(0...vector.size).each do |i|
289-
inverse[vector[i]] = i unless vector[i].nil?
278+
# The insertion point is in first_index; overwrite the next larger value.
279+
enum[first_index] = value
280+
first_index
290281
end
291-
inverse
292-
end
293-
private :inverse_vector
294-
295-
# Returns a hash mapping each element of an Enumerable to the set of
296-
# positions it occupies in the Enumerable, optionally restricted to the
297-
# elements specified in the range of indexes specified by +interval+.
298-
def position_hash(enum, interval)
299-
string = enum.is_a?(String)
300-
hash = Hash.new { |h, k| h[k] = [] }
301-
interval.each do |i|
302-
k = string ? enum[i, 1] : enum[i]
303-
hash[k] << i
282+
private :replace_next_larger
283+
284+
# If `vector` maps the matching elements of another collection onto this Enumerable,
285+
# compute the inverse of `vector` that maps this Enumerable onto the collection.
286+
# (Currently unused.)
287+
def inverse_vector(a, vector)
288+
inverse = a.dup
289+
(0...vector.size).each do
290+
inverse[vector[_1]] = i unless vector[_1].nil?
291+
end
292+
inverse
293+
end
294+
private :inverse_vector
295+
296+
# Returns a hash mapping each element of an Enumerable to the set of positions it
297+
# occupies in the Enumerable, optionally restricted to the elements specified in the
298+
# range of indexes specified by `interval`.
299+
def position_hash(enum, interval)
300+
Hash.new { |h, k| h[k] = [] }.tap { |hash| interval.each { hash[enum[_1]] << _1 } }
304301
end
305-
hash
302+
private :position_hash
306303
end
307-
private :position_hash
308304
end

0 commit comments

Comments
 (0)