Skip to content

Commit 3fd20f1

Browse files
authored
Merge pull request #390 from reef-technologies/circular-links
Fixing circular symlinks (issue 513)
2 parents df4a1db + 09a8a3d commit 3fd20f1

File tree

8 files changed

+593
-46
lines changed

8 files changed

+593
-46
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
66

77
## [Unreleased]
88

9+
### Fixed
10+
* Circular symlinks no longer cause infinite loops when syncing a folder
11+
912
## [1.21.0] - 2023-04-17
1013

1114
### Added

b2sdk/scan/folder.py

Lines changed: 59 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,13 @@
1010

1111
import logging
1212
import os
13+
from pathlib import Path
1314
import platform
1415
import re
1516
import sys
1617

1718
from abc import ABCMeta, abstractmethod
18-
from typing import Iterator, Optional
19+
from typing import Iterator, Optional, Set
1920

2021
from ..utils import fix_windows_path_limit, get_file_mtime, is_file_readable
2122
from .exception import EmptyDirectory, EnvironmentEncodingError, NotADirectory, UnableToCreateDirectory, UnsupportedFilename
@@ -132,7 +133,8 @@ def all_files(self, reporter: Optional[ProgressReport],
132133
:param reporter: a place to report errors
133134
:param policies_manager: a policy manager object, default is DEFAULT_SCAN_MANAGER
134135
"""
135-
yield from self._walk_relative_paths(self.root, '', reporter, policies_manager)
136+
root_path = Path(self.root)
137+
yield from self._walk_relative_paths(root_path, Path(''), reporter, policies_manager)
136138

137139
def make_full_path(self, file_name):
138140
"""
@@ -178,17 +180,23 @@ def ensure_non_empty(self):
178180
raise EmptyDirectory(self.root)
179181

180182
def _walk_relative_paths(
181-
self, local_dir: str, relative_dir_path: str, reporter,
182-
policies_manager: ScanPoliciesManager
183+
self,
184+
local_dir: Path,
185+
relative_dir_path: Path,
186+
reporter: ProgressReport,
187+
policies_manager: ScanPoliciesManager,
188+
visited_symlinks: Optional[Set[int]] = None,
183189
):
184190
"""
185191
Yield a File object for each of the files anywhere under this folder, in the
186192
order they would appear in B2, unless the path is excluded by policies manager.
187193
188-
:param relative_dir_path: the path of this dir relative to the scan point, or '' if at scan point
194+
:param local_dir: the path to the local directory that we are currently inspecting
195+
:param relative_dir_path: the path of this dir relative to the scan point, or Path('') if at scan point
196+
:param reporter: a reporter object to report errors and warnings
197+
:param policies_manager: a policies manager object
198+
:param visited_symlinks: a set of paths to symlinks that have already been visited. Using inode numbers to reduce memory usage
189199
"""
190-
if not isinstance(local_dir, str):
191-
raise ValueError('folder path should be unicode: %s' % repr(local_dir))
192200

193201
# Collect the names. We do this before returning any results, because
194202
# directories need to sort as if their names end in '/'.
@@ -204,39 +212,59 @@ def _walk_relative_paths(
204212
#
205213
# This is because in Unicode '.' comes before '/', which comes before '0'.
206214
names = [] # list of (name, local_path, relative_file_path)
207-
for name in os.listdir(local_dir):
208-
# We expect listdir() to return unicode if dir_path is unicode.
209-
# If the file name is not valid, based on the file system
210-
# encoding, then listdir() will return un-decoded str/bytes.
211-
if not isinstance(name, str):
212-
name = self._handle_non_unicode_file_name(name)
215+
216+
visited_symlinks = visited_symlinks or set()
217+
218+
if local_dir.is_symlink():
219+
real_path = local_dir.resolve()
220+
inode_number = real_path.stat().st_ino
221+
222+
visited_symlinks_count = len(visited_symlinks)
223+
224+
# Add symlink to visited_symlinks to prevent infinite symlink loops
225+
visited_symlinks.add(inode_number)
226+
227+
# Check if set size has changed, if not, symlink has already been visited
228+
if len(visited_symlinks) == visited_symlinks_count:
229+
# Infinite symlink loop detected, report warning and skip symlink
230+
if reporter is not None:
231+
reporter.circular_symlink_skipped(str(local_dir))
232+
return
233+
234+
visited_symlinks.add(inode_number)
235+
236+
for name in (x.name for x in local_dir.iterdir()):
213237

214238
if '/' in name:
215239
raise UnsupportedFilename(
216240
"scan does not support file names that include '/'",
217241
"%s in dir %s" % (name, local_dir)
218242
)
219243

220-
local_path = os.path.join(local_dir, name)
244+
local_path = local_dir / name
221245
relative_file_path = join_b2_path(
222-
relative_dir_path, name
246+
str(relative_dir_path), name
223247
) # file path relative to the scan point
224248

225249
# Skip broken symlinks or other inaccessible files
226-
if not is_file_readable(local_path, reporter):
250+
if not is_file_readable(str(local_path), reporter):
227251
continue
228252

229-
if policies_manager.exclude_all_symlinks and os.path.islink(local_path):
253+
if policies_manager.exclude_all_symlinks and local_path.is_symlink():
230254
if reporter is not None:
231-
reporter.symlink_skipped(local_path)
255+
reporter.symlink_skipped(str(local_path))
232256
continue
233257

234-
if os.path.isdir(local_path):
258+
if local_path.is_dir():
235259
name += '/'
236-
if policies_manager.should_exclude_local_directory(relative_file_path):
260+
if policies_manager.should_exclude_local_directory(str(relative_file_path)):
237261
continue
238262

239-
names.append((name, local_path, relative_file_path))
263+
# remove the leading './' from the relative path to ensure backward compatibility
264+
relative_file_path_str = str(relative_file_path)
265+
if relative_file_path_str.startswith("./"):
266+
relative_file_path_str = relative_file_path_str[2:]
267+
names.append((name, local_path, relative_file_path_str))
240268

241269
# Yield all of the answers.
242270
#
@@ -245,19 +273,23 @@ def _walk_relative_paths(
245273
for (name, local_path, relative_file_path) in sorted(names):
246274
if name.endswith('/'):
247275
for subdir_file in self._walk_relative_paths(
248-
local_path, relative_file_path, reporter, policies_manager
276+
local_path,
277+
relative_file_path,
278+
reporter,
279+
policies_manager,
280+
visited_symlinks,
249281
):
250282
yield subdir_file
251283
else:
252284
# Check that the file still exists and is accessible, since it can take a long time
253285
# to iterate through large folders
254-
if is_file_readable(local_path, reporter):
255-
file_mod_time = get_file_mtime(local_path)
256-
file_size = os.path.getsize(local_path)
286+
if is_file_readable(str(local_path), reporter):
287+
file_mod_time = get_file_mtime(str(local_path))
288+
file_size = local_path.stat().st_size
257289

258290
local_scan_path = LocalPath(
259-
absolute_path=self.make_full_path(relative_file_path),
260-
relative_path=relative_file_path,
291+
absolute_path=self.make_full_path(str(relative_file_path)),
292+
relative_path=str(relative_file_path),
261293
mod_time=file_mod_time,
262294
size=file_size,
263295
)

b2sdk/scan/folder_parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,4 +51,4 @@ def _parse_bucket_and_folder(bucket_and_path, api, b2_folder_class):
5151
(bucket_name, folder_name) = bucket_and_path.split('/', 1)
5252
if folder_name.endswith('/'):
5353
folder_name = folder_name[:-1]
54-
return b2_folder_class(bucket_name, folder_name, api)
54+
return b2_folder_class(bucket_name, folder_name, api)

b2sdk/scan/path.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,4 +89,4 @@ def __eq__(self, other):
8989
self.relative_path == other.relative_path and
9090
self.selected_version == other.selected_version and
9191
self.all_versions == other.all_versions
92-
)
92+
)

b2sdk/scan/report.py

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -66,29 +66,27 @@ def __enter__(self):
6666
def __exit__(self, exc_type, exc_val, exc_tb):
6767
self.close()
6868

69-
def error(self, message):
69+
def error(self, message: str) -> None:
7070
"""
7171
Print an error, gracefully interleaving it with a progress bar.
7272
7373
:param message: an error message
74-
:type message: str
7574
"""
7675
self.print_completion(message)
7776

78-
def print_completion(self, message):
77+
def print_completion(self, message: str) -> None:
7978
"""
8079
Remove the progress bar, prints a message, and puts the progress
8180
bar back.
8281
8382
:param message: an error message
84-
:type message: str
8583
"""
8684
with self.lock:
8785
self._print_line(message, True)
8886
self._last_update_time = 0
8987
self._update_progress()
9088

91-
def update_count(self, delta: int):
89+
def update_count(self, delta: int) -> None:
9290
"""
9391
Report that items have been processed.
9492
"""
@@ -117,14 +115,12 @@ def _update_progress(self):
117115

118116
self._print_line(message, False)
119117

120-
def _print_line(self, line, newline):
118+
def _print_line(self, line: str, newline: bool) -> None:
121119
"""
122120
Print a line to stdout.
123121
124122
:param line: a string without a \r or \n in it.
125-
:type line: str
126123
:param newline: True if the output should move to a new line after this one.
127-
:type newline: bool
128124
"""
129125
if len(line) < len(self.current_line):
130126
line += ' ' * (len(self.current_line) - len(line))
@@ -150,48 +146,55 @@ def _print_line(self, line, newline):
150146
self.current_line = line
151147
self.stdout.flush()
152148

153-
def update_total(self, delta):
149+
def update_total(self, delta: int) -> None:
154150
"""
155151
Report that more files have been found for comparison.
156152
157153
:param delta: number of files found since the last check
158-
:type delta: int
159154
"""
160155
with self.lock:
161156
self.total_count += delta
162157
self._update_progress()
163158

164-
def end_total(self):
159+
def end_total(self) -> None:
165160
"""
166161
Total files count is done. Can proceed to step 2.
167162
"""
168163
with self.lock:
169164
self.total_done = True
170165
self._update_progress()
171166

172-
def local_access_error(self, path):
167+
def local_access_error(self, path: str) -> None:
173168
"""
174169
Add a file access error message to the list of warnings.
175170
176171
:param path: file path
177-
:type path: str
178172
"""
179173
self.warnings.append('WARNING: %s could not be accessed (broken symlink?)' % (path,))
180174

181-
def local_permission_error(self, path):
175+
def local_permission_error(self, path: str) -> None:
182176
"""
183177
Add a permission error message to the list of warnings.
184178
185179
:param path: file path
186-
:type path: str
187180
"""
188181
self.warnings.append(
189182
'WARNING: %s could not be accessed (no permissions to read?)' % (path,)
190183
)
191184

192-
def symlink_skipped(self, path):
185+
def symlink_skipped(self, path: str) -> None:
193186
pass
194187

188+
def circular_symlink_skipped(self, path: str) -> None:
189+
"""
190+
Add a circular symlink error message to the list of warnings.
191+
192+
:param path: file path
193+
"""
194+
self.warnings.append(
195+
'WARNING: %s is a circular symlink, which was already visited. Skipping.' % (path,)
196+
)
197+
195198

196199
def sample_report_run():
197200
"""

noxfile.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
'pytest-lazy-fixture==0.6.3',
4040
'pyfakefs==4.5.6',
4141
'pytest-xdist==2.5.0',
42+
'pytest-timeout==2.1.0',
4243
]
4344
REQUIREMENTS_BUILD = ['setuptools>=20.2']
4445

0 commit comments

Comments
 (0)