Skip to content

Commit cd6189c

Browse files
committed
pythonGH-104996: Implement path joining algorithm in pathlib
Copy the `ntpath.join()` algorithm into pathlib and adjust it to remove string concatenation. The resulting drive, root and tail are stored on the path object without creating an intermediate joined path.
1 parent ffeaec7 commit cd6189c

File tree

2 files changed

+39
-16
lines changed

2 files changed

+39
-16
lines changed

Lib/pathlib.py

Lines changed: 37 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -332,34 +332,55 @@ def with_segments(self, *pathsegments):
332332
return type(self)(*pathsegments)
333333

334334
@classmethod
335-
def _parse_path(cls, path):
336-
if not path:
335+
def _parse_paths(cls, paths):
336+
if not paths:
337337
return '', '', []
338338
sep = cls._flavour.sep
339339
altsep = cls._flavour.altsep
340-
if altsep:
341-
path = path.replace(altsep, sep)
342-
drv, root, rel = cls._flavour.splitroot(path)
343-
if not root and drv.startswith(sep) and not drv.endswith(sep):
340+
splitroot = cls._flavour.splitroot
341+
342+
# Join paths like ntpath.join(), but without concatenating strings.
343+
drv, root, tail = '', '', []
344+
for path in paths:
345+
if altsep:
346+
path = path.replace(altsep, sep)
347+
p_drv, p_root, p_rel = splitroot(path)
348+
p_tail = p_rel.split(sep)
349+
if p_root:
350+
if p_drv:
351+
drv = p_drv
352+
root = p_root
353+
tail = p_tail
354+
elif p_drv and p_drv != drv:
355+
if p_drv.lower() != drv.lower():
356+
drv = p_drv
357+
root = p_root
358+
tail = p_tail
359+
else:
360+
drv = p_drv
361+
tail.extend(p_tail)
362+
else:
363+
tail.extend(p_tail)
364+
365+
# Normalize UNC path.
366+
if drv and not root and drv[-1] not in ':\\':
367+
if any(tail):
368+
# Join onto partial UNC drive - must join and re-split.
369+
parts = [drv] + [x for x in tail if x]
370+
drv, root, rel = splitroot(sep.join(parts))
371+
tail = rel.split(sep)
344372
drv_parts = drv.split(sep)
345373
if len(drv_parts) == 4 and drv_parts[2] not in '?.':
346374
# e.g. //server/share
347375
root = sep
348376
elif len(drv_parts) == 6:
349377
# e.g. //?/unc/server/share
350378
root = sep
351-
parsed = [sys.intern(str(x)) for x in rel.split(sep) if x and x != '.']
379+
parsed = [sys.intern(str(x)) for x in tail if x and x != '.']
352380
return drv, root, parsed
353381

354382
def _load_parts(self):
355-
paths = self._raw_paths
356-
if len(paths) == 0:
357-
path = ''
358-
elif len(paths) == 1:
359-
path = paths[0]
360-
else:
361-
path = self._flavour.join(*paths)
362-
drv, root, tail = self._parse_path(path)
383+
drv, root, tail = self._parse_paths(self._raw_paths)
363384
self._drv = drv
364385
self._root = root
365386
self._tail_cached = tail
@@ -1385,7 +1406,7 @@ def expanduser(self):
13851406
homedir = self._flavour.expanduser(self._tail[0])
13861407
if homedir[:1] == "~":
13871408
raise RuntimeError("Could not determine home directory.")
1388-
drv, root, tail = self._parse_path(homedir)
1409+
drv, root, tail = self._parse_paths((homedir,))
13891410
return self._from_parsed_parts(drv, root, tail + self._tail[1:])
13901411

13911412
return self
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Improve performance of path joining in :class:`pathlib.PurePath` by
2+
implementing a variant of :func:`os.path.join`.

0 commit comments

Comments
 (0)