Skip to content

Commit a5db3d3

Browse files
committed
io, dif: gitpython-developers#519: FIX DIFF freeze when reading from GIL
+ CAUSE: In Windows, Diffs freeze while reading Popen streams, probably buffers smaller; good-thin(TM) in this case because reading a Popen-proc from the launching-thread freezes GIL. The alternative to use `proc.communicate()` also relies on big buffers. + SOLUTION: Use `cmd.handle_process_output()` to consume Diff-proc streams. + Retroffited `handle_process_output()` code to support also byte-streams, both Threading(Windows) and Select/Poll (Posix) paths updated. - TODO: Unfortunately, `Diff._index_from_patch_format()` still slurps input; need to re-phrase header-regexes linewise to resolve it.
1 parent 4674163 commit a5db3d3

File tree

3 files changed

+105
-88
lines changed

3 files changed

+105
-88
lines changed

git/cmd.py

+74-67
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
is_win,
4545
)
4646
import io
47+
from _io import UnsupportedOperation
4748

4849
execute_kwargs = set(('istream', 'with_keep_cwd', 'with_extended_output',
4950
'with_exceptions', 'as_process', 'stdout_as_string',
@@ -56,7 +57,7 @@
5657
__all__ = ('Git',)
5758

5859
if is_win:
59-
WindowsError = OSError
60+
WindowsError = OSError # @ReservedAssignment
6061

6162
if PY3:
6263
_bchr = bchr
@@ -72,7 +73,8 @@ def _bchr(c):
7273
# Documentation
7374
## @{
7475

75-
def handle_process_output(process, stdout_handler, stderr_handler, finalizer):
76+
def handle_process_output(process, stdout_handler, stderr_handler, finalizer,
77+
decode_stdout=True, decode_stderr=True):
7678
"""Registers for notifications to lean that process output is ready to read, and dispatches lines to
7779
the respective line handlers. We are able to handle carriage returns in case progress is sent by that
7880
mean. For performance reasons, we only apply this to stderr.
@@ -82,8 +84,6 @@ def handle_process_output(process, stdout_handler, stderr_handler, finalizer):
8284
:param stdout_handler: f(stdout_line_string), or None
8385
:param stderr_hanlder: f(stderr_line_string), or None
8486
:param finalizer: f(proc) - wait for proc to finish"""
85-
fdmap = {process.stdout.fileno(): (stdout_handler, [b'']),
86-
process.stderr.fileno(): (stderr_handler, [b''])}
8787

8888
def _parse_lines_from_buffer(buf):
8989
line = b''
@@ -94,7 +94,7 @@ def _parse_lines_from_buffer(buf):
9494
bi += 1
9595

9696
if char in (b'\r', b'\n') and line:
97-
yield bi, line
97+
yield bi, line + b'\n'
9898
line = b''
9999
else:
100100
line += char
@@ -114,105 +114,111 @@ def _read_lines_from_fno(fno, last_buf_list):
114114
# keep remainder
115115
last_buf_list[0] = buf[bi:]
116116

117-
def _dispatch_single_line(line, handler):
118-
line = line.decode(defenc)
117+
def _dispatch_single_line(line, handler, decode):
118+
if decode:
119+
line = line.decode(defenc)
119120
if line and handler:
120121
handler(line)
121122
# end dispatch helper
122123
# end single line helper
123124

124-
def _dispatch_lines(fno, handler, buf_list):
125+
def _dispatch_lines(fno, handler, buf_list, decode):
125126
lc = 0
126127
for line in _read_lines_from_fno(fno, buf_list):
127-
_dispatch_single_line(line, handler)
128+
_dispatch_single_line(line, handler, decode)
128129
lc += 1
129130
# for each line
130131
return lc
131132
# end
132133

133-
def _deplete_buffer(fno, handler, buf_list, wg=None):
134+
def _deplete_buffer(fno, handler, buf_list, decode):
134135
lc = 0
135136
while True:
136-
line_count = _dispatch_lines(fno, handler, buf_list)
137+
line_count = _dispatch_lines(fno, handler, buf_list, decode)
137138
lc += line_count
138139
if line_count == 0:
139140
break
140141
# end deplete buffer
141142

142143
if buf_list[0]:
143-
_dispatch_single_line(buf_list[0], handler)
144+
_dispatch_single_line(buf_list[0], handler, decode)
144145
lc += 1
145146
# end
146147

147-
if wg:
148-
wg.done()
149-
150148
return lc
151149
# end
152150

153-
if hasattr(select, 'poll'):
154-
# poll is preferred, as select is limited to file handles up to 1024 ... . This could otherwise be
155-
# an issue for us, as it matters how many handles our own process has
156-
poll = select.poll()
157-
READ_ONLY = select.POLLIN | select.POLLPRI | select.POLLHUP | select.POLLERR
158-
CLOSED = select.POLLHUP | select.POLLERR
159-
160-
poll.register(process.stdout, READ_ONLY)
161-
poll.register(process.stderr, READ_ONLY)
162-
163-
closed_streams = set()
164-
while True:
165-
# no timeout
166-
167-
try:
168-
poll_result = poll.poll()
169-
except select.error as e:
170-
if e.args[0] == errno.EINTR:
171-
continue
172-
raise
173-
# end handle poll exception
174-
175-
for fd, result in poll_result:
176-
if result & CLOSED:
177-
closed_streams.add(fd)
178-
else:
179-
_dispatch_lines(fd, *fdmap[fd])
180-
# end handle closed stream
181-
# end for each poll-result tuple
182-
183-
if len(closed_streams) == len(fdmap):
184-
break
185-
# end its all done
186-
# end endless loop
187-
188-
# Depelete all remaining buffers
189-
for fno, (handler, buf_list) in fdmap.items():
190-
_deplete_buffer(fno, handler, buf_list)
191-
# end for each file handle
192-
193-
for fno in fdmap.keys():
194-
poll.unregister(fno)
195-
# end don't forget to unregister !
196-
else:
197-
# Oh ... probably we are on windows. select.select() can only handle sockets, we have files
151+
try:
152+
outfn = process.stdout.fileno()
153+
errfn = process.stderr.fileno()
154+
poll = select.poll() # @UndefinedVariable
155+
except (UnsupportedOperation, AttributeError):
156+
# Oh ... probably we are on windows. or TC mockap provided for streams.
157+
# Anyhow, select.select() can only handle sockets, we have files
198158
# The only reliable way to do this now is to use threads and wait for both to finish
199-
def _handle_lines(fd, handler):
159+
def _handle_lines(fd, handler, decode):
200160
for line in fd:
201-
line = line.decode(defenc)
202-
if line and handler:
161+
if handler:
162+
if decode:
163+
line = line.decode(defenc)
203164
handler(line)
204165

205166
threads = []
206-
for fd, handler in zip((process.stdout, process.stderr),
207-
(stdout_handler, stderr_handler)):
208-
t = threading.Thread(target=_handle_lines, args=(fd, handler))
167+
for fd, handler, decode in zip((process.stdout, process.stderr),
168+
(stdout_handler, stderr_handler),
169+
(decode_stdout, decode_stderr),):
170+
t = threading.Thread(target=_handle_lines, args=(fd, handler, decode))
209171
t.setDaemon(True)
210172
t.start()
211173
threads.append(t)
212174

213175
for t in threads:
214176
t.join()
215-
# end
177+
else:
178+
# poll is preferred, as select is limited to file handles up to 1024 ... . This could otherwise be
179+
# an issue for us, as it matters how many handles our own process has
180+
fdmap = {outfn: (stdout_handler, [b''], decode_stdout),
181+
errfn: (stderr_handler, [b''], decode_stderr)}
182+
183+
READ_ONLY = select.POLLIN | select.POLLPRI | select.POLLHUP | select.POLLERR # @UndefinedVariable
184+
CLOSED = select.POLLHUP | select.POLLERR # @UndefinedVariable
185+
186+
poll.register(process.stdout, READ_ONLY)
187+
poll.register(process.stderr, READ_ONLY)
188+
189+
closed_streams = set()
190+
while True:
191+
# no timeout
192+
193+
try:
194+
poll_result = poll.poll()
195+
except select.error as e:
196+
if e.args[0] == errno.EINTR:
197+
continue
198+
raise
199+
# end handle poll exception
200+
201+
for fd, result in poll_result:
202+
if result & CLOSED:
203+
closed_streams.add(fd)
204+
else:
205+
_dispatch_lines(fd, *fdmap[fd])
206+
# end handle closed stream
207+
# end for each poll-result tuple
208+
209+
if len(closed_streams) == len(fdmap):
210+
break
211+
# end its all done
212+
# end endless loop
213+
214+
# Depelete all remaining buffers
215+
for fno, (handler, buf_list, decode) in fdmap.items():
216+
_deplete_buffer(fno, handler, buf_list, decode)
217+
# end for each file handle
218+
219+
for fno in fdmap.keys():
220+
poll.unregister(fno)
221+
# end don't forget to unregister !
216222

217223
return finalizer(process)
218224

@@ -458,6 +464,7 @@ def next(self):
458464
line = self.readline()
459465
if not line:
460466
raise StopIteration
467+
461468
return line
462469

463470
def __del__(self):

git/diff.py

+21-11
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
defenc,
1616
PY3
1717
)
18+
from git.cmd import handle_process_output
19+
from git.util import finalize_process
1820

1921
__all__ = ('Diffable', 'DiffIndex', 'Diff', 'NULL_TREE')
2022

@@ -145,10 +147,10 @@ def diff(self, other=Index, paths=None, create_patch=False, **kwargs):
145147
kwargs['as_process'] = True
146148
proc = diff_cmd(*self._process_diff_args(args), **kwargs)
147149

148-
diff_method = Diff._index_from_raw_format
149-
if create_patch:
150-
diff_method = Diff._index_from_patch_format
151-
index = diff_method(self.repo, proc.stdout)
150+
diff_method = (Diff._index_from_patch_format
151+
if create_patch
152+
else Diff._index_from_raw_format)
153+
index = diff_method(self.repo, proc)
152154

153155
proc.wait()
154156
return index
@@ -397,13 +399,18 @@ def _pick_best_path(cls, path_match, rename_match, path_fallback_match):
397399
return None
398400

399401
@classmethod
400-
def _index_from_patch_format(cls, repo, stream):
402+
def _index_from_patch_format(cls, repo, proc):
401403
"""Create a new DiffIndex from the given text which must be in patch format
402404
:param repo: is the repository we are operating on - it is required
403405
:param stream: result of 'git diff' as a stream (supporting file protocol)
404406
:return: git.DiffIndex """
407+
408+
## FIXME: Here SLURPING raw, need to re-phrase header-regexes linewise.
409+
text = []
410+
handle_process_output(proc, text.append, None, finalize_process, decode_stdout=False)
411+
405412
# for now, we have to bake the stream
406-
text = stream.read()
413+
text = b''.join(text)
407414
index = DiffIndex()
408415
previous_header = None
409416
for header in cls.re_header.finditer(text):
@@ -450,17 +457,19 @@ def _index_from_patch_format(cls, repo, stream):
450457
return index
451458

452459
@classmethod
453-
def _index_from_raw_format(cls, repo, stream):
460+
def _index_from_raw_format(cls, repo, proc):
454461
"""Create a new DiffIndex from the given stream which must be in raw format.
455462
:return: git.DiffIndex"""
456463
# handles
457464
# :100644 100644 687099101... 37c5e30c8... M .gitignore
465+
458466
index = DiffIndex()
459-
for line in stream.readlines():
467+
468+
def handle_diff_line(line):
460469
line = line.decode(defenc)
461470
if not line.startswith(":"):
462-
continue
463-
# END its not a valid diff line
471+
return
472+
464473
meta, _, path = line[1:].partition('\t')
465474
old_mode, new_mode, a_blob_id, b_blob_id, change_type = meta.split(None, 4)
466475
path = path.strip()
@@ -489,6 +498,7 @@ def _index_from_raw_format(cls, repo, stream):
489498
diff = Diff(repo, a_path, b_path, a_blob_id, b_blob_id, old_mode, new_mode,
490499
new_file, deleted_file, rename_from, rename_to, '', change_type)
491500
index.append(diff)
492-
# END for each line
501+
502+
handle_process_output(proc, handle_diff_line, None, finalize_process, decode_stdout=False)
493503

494504
return index

git/test/test_diff.py

+10-10
Original file line numberDiff line numberDiff line change
@@ -79,15 +79,15 @@ def test_diff_with_staged_file(self, rw_dir):
7979

8080
def test_list_from_string_new_mode(self):
8181
output = StringProcessAdapter(fixture('diff_new_mode'))
82-
diffs = Diff._index_from_patch_format(self.rorepo, output.stdout)
82+
diffs = Diff._index_from_patch_format(self.rorepo, output)
8383
self._assert_diff_format(diffs)
8484

8585
assert_equal(1, len(diffs))
8686
assert_equal(8, len(diffs[0].diff.splitlines()))
8787

8888
def test_diff_with_rename(self):
8989
output = StringProcessAdapter(fixture('diff_rename'))
90-
diffs = Diff._index_from_patch_format(self.rorepo, output.stdout)
90+
diffs = Diff._index_from_patch_format(self.rorepo, output)
9191
self._assert_diff_format(diffs)
9292

9393
assert_equal(1, len(diffs))
@@ -102,7 +102,7 @@ def test_diff_with_rename(self):
102102
assert isinstance(str(diff), str)
103103

104104
output = StringProcessAdapter(fixture('diff_rename_raw'))
105-
diffs = Diff._index_from_raw_format(self.rorepo, output.stdout)
105+
diffs = Diff._index_from_raw_format(self.rorepo, output)
106106
self.assertEqual(len(diffs), 1)
107107
diff = diffs[0]
108108
self.assertIsNotNone(diff.renamed_file)
@@ -113,7 +113,7 @@ def test_diff_with_rename(self):
113113

114114
def test_diff_of_modified_files_not_added_to_the_index(self):
115115
output = StringProcessAdapter(fixture('diff_abbrev-40_full-index_M_raw_no-color'))
116-
diffs = Diff._index_from_raw_format(self.rorepo, output.stdout)
116+
diffs = Diff._index_from_raw_format(self.rorepo, output)
117117

118118
self.assertEqual(len(diffs), 1, 'one modification')
119119
self.assertEqual(len(list(diffs.iter_change_type('M'))), 1, 'one modification')
@@ -126,7 +126,7 @@ def test_diff_of_modified_files_not_added_to_the_index(self):
126126
)
127127
def test_binary_diff(self, case):
128128
method, file_name = case
129-
res = method(None, StringProcessAdapter(fixture(file_name)).stdout)
129+
res = method(None, StringProcessAdapter(fixture(file_name)))
130130
self.assertEqual(len(res), 1)
131131
self.assertEqual(len(list(res.iter_change_type('M'))), 1)
132132
if res[0].diff:
@@ -137,7 +137,7 @@ def test_binary_diff(self, case):
137137

138138
def test_diff_index(self):
139139
output = StringProcessAdapter(fixture('diff_index_patch'))
140-
res = Diff._index_from_patch_format(None, output.stdout)
140+
res = Diff._index_from_patch_format(None, output)
141141
self.assertEqual(len(res), 6)
142142
for dr in res:
143143
self.assertTrue(dr.diff.startswith(b'@@'), dr)
@@ -149,7 +149,7 @@ def test_diff_index(self):
149149

150150
def test_diff_index_raw_format(self):
151151
output = StringProcessAdapter(fixture('diff_index_raw'))
152-
res = Diff._index_from_raw_format(None, output.stdout)
152+
res = Diff._index_from_raw_format(None, output)
153153
self.assertIsNotNone(res[0].deleted_file)
154154
self.assertIsNone(res[0].b_path,)
155155

@@ -171,7 +171,7 @@ def test_diff_initial_commit(self):
171171

172172
def test_diff_unsafe_paths(self):
173173
output = StringProcessAdapter(fixture('diff_patch_unsafe_paths'))
174-
res = Diff._index_from_patch_format(None, output.stdout)
174+
res = Diff._index_from_patch_format(None, output)
175175

176176
# The "Additions"
177177
self.assertEqual(res[0].b_path, u'path/ starting with a space')
@@ -207,12 +207,12 @@ def test_diff_patch_format(self):
207207

208208
for fixture_name in fixtures:
209209
diff_proc = StringProcessAdapter(fixture(fixture_name))
210-
Diff._index_from_patch_format(self.rorepo, diff_proc.stdout)
210+
Diff._index_from_patch_format(self.rorepo, diff_proc)
211211
# END for each fixture
212212

213213
def test_diff_with_spaces(self):
214214
data = StringProcessAdapter(fixture('diff_file_with_spaces'))
215-
diff_index = Diff._index_from_patch_format(self.rorepo, data.stdout)
215+
diff_index = Diff._index_from_patch_format(self.rorepo, data)
216216
self.assertIsNone(diff_index[0].a_path, repr(diff_index[0].a_path))
217217
self.assertEqual(diff_index[0].b_path, u'file with spaces', repr(diff_index[0].b_path))
218218

0 commit comments

Comments
 (0)