10
10
11
11
import logging
12
12
import os
13
+ from pathlib import Path
13
14
import platform
14
15
import re
15
16
import sys
16
17
17
18
from abc import ABCMeta , abstractmethod
18
- from typing import Iterator , Optional
19
+ from typing import Iterator , Optional , Set
19
20
20
21
from ..utils import fix_windows_path_limit , get_file_mtime , is_file_readable
21
22
from .exception import EmptyDirectory , EnvironmentEncodingError , NotADirectory , UnableToCreateDirectory , UnsupportedFilename
@@ -132,7 +133,8 @@ def all_files(self, reporter: Optional[ProgressReport],
132
133
:param reporter: a place to report errors
133
134
:param policies_manager: a policy manager object, default is DEFAULT_SCAN_MANAGER
134
135
"""
135
- yield from self ._walk_relative_paths (self .root , '' , reporter , policies_manager )
136
+ root_path = Path (self .root )
137
+ yield from self ._walk_relative_paths (root_path , Path ('' ), reporter , policies_manager )
136
138
137
139
def make_full_path (self , file_name ):
138
140
"""
@@ -178,17 +180,23 @@ def ensure_non_empty(self):
178
180
raise EmptyDirectory (self .root )
179
181
180
182
def _walk_relative_paths (
181
- self , local_dir : str , relative_dir_path : str , reporter ,
182
- policies_manager : ScanPoliciesManager
183
+ self ,
184
+ local_dir : Path ,
185
+ relative_dir_path : Path ,
186
+ reporter : ProgressReport ,
187
+ policies_manager : ScanPoliciesManager ,
188
+ visited_symlinks : Optional [Set [int ]] = None ,
183
189
):
184
190
"""
185
191
Yield a File object for each of the files anywhere under this folder, in the
186
192
order they would appear in B2, unless the path is excluded by policies manager.
187
193
188
- :param relative_dir_path: the path of this dir relative to the scan point, or '' if at scan point
194
+ :param local_dir: the path to the local directory that we are currently inspecting
195
+ :param relative_dir_path: the path of this dir relative to the scan point, or Path('') if at scan point
196
+ :param reporter: a reporter object to report errors and warnings
197
+ :param policies_manager: a policies manager object
198
+ :param visited_symlinks: a set of paths to symlinks that have already been visited. Using inode numbers to reduce memory usage
189
199
"""
190
- if not isinstance (local_dir , str ):
191
- raise ValueError ('folder path should be unicode: %s' % repr (local_dir ))
192
200
193
201
# Collect the names. We do this before returning any results, because
194
202
# directories need to sort as if their names end in '/'.
@@ -204,39 +212,59 @@ def _walk_relative_paths(
204
212
#
205
213
# This is because in Unicode '.' comes before '/', which comes before '0'.
206
214
names = [] # list of (name, local_path, relative_file_path)
207
- for name in os .listdir (local_dir ):
208
- # We expect listdir() to return unicode if dir_path is unicode.
209
- # If the file name is not valid, based on the file system
210
- # encoding, then listdir() will return un-decoded str/bytes.
211
- if not isinstance (name , str ):
212
- name = self ._handle_non_unicode_file_name (name )
215
+
216
+ visited_symlinks = visited_symlinks or set ()
217
+
218
+ if local_dir .is_symlink ():
219
+ real_path = local_dir .resolve ()
220
+ inode_number = real_path .stat ().st_ino
221
+
222
+ visited_symlinks_count = len (visited_symlinks )
223
+
224
+ # Add symlink to visited_symlinks to prevent infinite symlink loops
225
+ visited_symlinks .add (inode_number )
226
+
227
+ # Check if set size has changed, if not, symlink has already been visited
228
+ if len (visited_symlinks ) == visited_symlinks_count :
229
+ # Infinite symlink loop detected, report warning and skip symlink
230
+ if reporter is not None :
231
+ reporter .circular_symlink_skipped (str (local_dir ))
232
+ return
233
+
234
+ visited_symlinks .add (inode_number )
235
+
236
+ for name in (x .name for x in local_dir .iterdir ()):
213
237
214
238
if '/' in name :
215
239
raise UnsupportedFilename (
216
240
"scan does not support file names that include '/'" ,
217
241
"%s in dir %s" % (name , local_dir )
218
242
)
219
243
220
- local_path = os . path . join ( local_dir , name )
244
+ local_path = local_dir / name
221
245
relative_file_path = join_b2_path (
222
- relative_dir_path , name
246
+ str ( relative_dir_path ) , name
223
247
) # file path relative to the scan point
224
248
225
249
# Skip broken symlinks or other inaccessible files
226
- if not is_file_readable (local_path , reporter ):
250
+ if not is_file_readable (str ( local_path ) , reporter ):
227
251
continue
228
252
229
- if policies_manager .exclude_all_symlinks and os . path . islink ( local_path ):
253
+ if policies_manager .exclude_all_symlinks and local_path . is_symlink ( ):
230
254
if reporter is not None :
231
- reporter .symlink_skipped (local_path )
255
+ reporter .symlink_skipped (str ( local_path ) )
232
256
continue
233
257
234
- if os . path . isdir ( local_path ):
258
+ if local_path . is_dir ( ):
235
259
name += '/'
236
- if policies_manager .should_exclude_local_directory (relative_file_path ):
260
+ if policies_manager .should_exclude_local_directory (str ( relative_file_path ) ):
237
261
continue
238
262
239
- names .append ((name , local_path , relative_file_path ))
263
+ # remove the leading './' from the relative path to ensure backward compatibility
264
+ relative_file_path_str = str (relative_file_path )
265
+ if relative_file_path_str .startswith ("./" ):
266
+ relative_file_path_str = relative_file_path_str [2 :]
267
+ names .append ((name , local_path , relative_file_path_str ))
240
268
241
269
# Yield all of the answers.
242
270
#
@@ -245,19 +273,23 @@ def _walk_relative_paths(
245
273
for (name , local_path , relative_file_path ) in sorted (names ):
246
274
if name .endswith ('/' ):
247
275
for subdir_file in self ._walk_relative_paths (
248
- local_path , relative_file_path , reporter , policies_manager
276
+ local_path ,
277
+ relative_file_path ,
278
+ reporter ,
279
+ policies_manager ,
280
+ visited_symlinks ,
249
281
):
250
282
yield subdir_file
251
283
else :
252
284
# Check that the file still exists and is accessible, since it can take a long time
253
285
# to iterate through large folders
254
- if is_file_readable (local_path , reporter ):
255
- file_mod_time = get_file_mtime (local_path )
256
- file_size = os . path . getsize ( local_path )
286
+ if is_file_readable (str ( local_path ) , reporter ):
287
+ file_mod_time = get_file_mtime (str ( local_path ) )
288
+ file_size = local_path . stat (). st_size
257
289
258
290
local_scan_path = LocalPath (
259
- absolute_path = self .make_full_path (relative_file_path ),
260
- relative_path = relative_file_path ,
291
+ absolute_path = self .make_full_path (str ( relative_file_path ) ),
292
+ relative_path = str ( relative_file_path ) ,
261
293
mod_time = file_mod_time ,
262
294
size = file_size ,
263
295
)
0 commit comments