Skip to content
This repository was archived by the owner on Apr 14, 2022. It is now read-only.

Commit c08dc38

Browse files
CTrandoMikhail Arkhipov
authored and
Mikhail Arkhipov
committed
Adding support for egg and zip files (#1477)
* Preliminary zip file * Refactoring and adding .egg support * refactoring * More refactoring * removing irrelevant classes * Preliminary working in memory * More things to make it work * Getting rid of comment * Refactoring * More refactoring * more changes * refactoring * Removing unneeded changes * adding back * PR feedback * Test fix
1 parent d6260fe commit c08dc38

18 files changed

+295
-18
lines changed

src/Analysis/Ast/Impl/Modules/Resolution/ModuleResolutionBase.cs

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ public IPythonModule GetOrLoadModule(string name) {
8787
moduleRef = Modules.GetOrAdd(name, new ModuleRef());
8888
return moduleRef.GetOrCreate(name, this);
8989
}
90-
90+
9191
public ModulePath FindModule(string filePath) {
9292
var bestLibraryPath = string.Empty;
9393

@@ -102,11 +102,24 @@ public ModulePath FindModule(string filePath) {
102102
}
103103

104104
protected void ReloadModulePaths(in IEnumerable<string> rootPaths) {
105-
foreach (var moduleFile in rootPaths.Where(Directory.Exists).SelectMany(p => PathUtils.EnumerateFiles(FileSystem, p))) {
106-
PathResolver.TryAddModulePath(moduleFile.FullName, moduleFile.Length, false, out _);
105+
foreach (var root in rootPaths) {
106+
foreach (var moduleFile in PathUtils.EnumerateFiles(FileSystem, root)) {
107+
PathResolver.TryAddModulePath(moduleFile.FullName, moduleFile.Length, false, out _);
108+
}
109+
110+
if (PathUtils.TryGetZipFilePath(root, out var zipFilePath, out var _) && File.Exists(zipFilePath)) {
111+
foreach (var moduleFile in PathUtils.EnumerateZip(zipFilePath)) {
112+
if (!PathUtils.PathStartsWith(moduleFile.FullName, "EGG-INFO")) {
113+
PathResolver.TryAddModulePath(
114+
Path.Combine(zipFilePath,
115+
PathUtils.NormalizePath(moduleFile.FullName)),
116+
moduleFile.Length, false, out _
117+
);
118+
}
119+
}
120+
}
107121
}
108122
}
109-
110123
protected class ModuleRef {
111124
private readonly object _syncObj = new object();
112125
private IPythonModule _module;

src/Analysis/Ast/Impl/get_search_paths.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -68,13 +68,18 @@ def clean(path):
6868
BEFORE_SITE.discard(None)
6969
AFTER_SITE.discard(None)
7070

71+
import zipfile
72+
7173
for p in sys.path:
7274
p = clean(p)
73-
if os.path.isdir(p):
74-
if p in BEFORE_SITE:
75-
print("%s|stdlib|" % p)
76-
elif p in AFTER_SITE:
77-
if p in SITE_PKGS:
78-
print("%s|site|" % p)
79-
else:
80-
print("%s|pth|" % p)
75+
76+
if not os.path.isdir(p) and not (os.path.isfile(p) and zipfile.is_zipfile(p)):
77+
continue
78+
79+
if p in BEFORE_SITE:
80+
print("%s|stdlib|" % p)
81+
elif p in AFTER_SITE:
82+
if p in SITE_PKGS:
83+
print("%s|site|" % p)
84+
else:
85+
print("%s|pth|" % p)

src/Analysis/Ast/Test/ImportTests.cs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
// See the Apache Version 2.0 License for specific language governing
1414
// permissions and limitations under the License.
1515

16-
using System.IO;
1716
using System.Linq;
1817
using System.Threading.Tasks;
1918
using FluentAssertions;
@@ -23,7 +22,6 @@
2322
using Microsoft.Python.Analysis.Types;
2423
using Microsoft.Python.Core;
2524
using Microsoft.Python.Parsing.Tests;
26-
using Microsoft.Python.Tests.Utilities.FluentAssertions;
2725
using Microsoft.VisualStudio.TestTools.UnitTesting;
2826
using TestUtilities;
2927

src/Core/Impl/IO/FileSystem.cs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,13 @@ public long FileSize(string path) {
2424
return fileInfo.Length;
2525
}
2626

27-
public string ReadAllText(string path) => File.ReadAllText(path);
27+
public string ReadAllText(string filePath) {
28+
if (PathUtils.TryGetZipFilePath(filePath, out var zipPath, out var relativeZipPath)) {
29+
return PathUtils.GetZipContent(zipPath, relativeZipPath);
30+
}
31+
return File.ReadAllText(filePath);
32+
}
33+
2834
public void WriteAllText(string path, string content) => File.WriteAllText(path, content);
2935
public IEnumerable<string> FileReadAllLines(string path) => File.ReadLines(path);
3036
public void FileWriteAllLines(string path, IEnumerable<string> contents) => File.WriteAllLines(path, contents);

src/Core/Impl/IO/PathUtils.cs

Lines changed: 88 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
using System;
1717
using System.Collections.Generic;
1818
using System.IO;
19+
using System.IO.Compression;
1920
using System.Linq;
2021
using System.Runtime.InteropServices;
2122
using System.Threading;
@@ -47,7 +48,6 @@ public static bool IsValidFileNameCharacter(char character)
4748
public static bool HasEndSeparator(string path)
4849
=> !string.IsNullOrEmpty(path) && IsDirectorySeparator(path[path.Length - 1]);
4950

50-
5151
public static bool IsDirectorySeparator(char c) => Array.IndexOf(DirectorySeparators, c) != -1;
5252

5353
public static bool PathStartsWith(string s, string prefix)
@@ -117,7 +117,7 @@ public static string FindFile(IFileSystem fileSystem,
117117
int depthLimit = 2,
118118
IEnumerable<string> firstCheck = null
119119
) {
120-
if (!Directory.Exists(root)) {
120+
if (!fileSystem.DirectoryExists(root)) {
121121
return null;
122122
}
123123

@@ -185,12 +185,17 @@ public static IEnumerable<string> EnumerateDirectories(IFileSystem fileSystem, s
185185
var path = queue.Dequeue();
186186
path = EnsureEndSeparator(path);
187187

188+
if (!fileSystem.DirectoryExists(path)) {
189+
continue;
190+
}
191+
188192
IEnumerable<string> dirs = null;
189193
try {
190194
dirs = fileSystem.GetDirectories(path);
191195
} catch (UnauthorizedAccessException) {
192196
} catch (IOException) {
193197
}
198+
194199
if (dirs == null) {
195200
continue;
196201
}
@@ -308,6 +313,87 @@ public static IEnumerable<IFileInfo> EnumerateFiles(IFileSystem fileSystem, stri
308313
}
309314
}
310315

316+
public static bool TryGetZipFilePath(string filePath, out string zipPath, out string relativeZipPath) {
317+
zipPath = string.Empty;
318+
relativeZipPath = string.Empty;
319+
if (string.IsNullOrEmpty(filePath)) {
320+
return false;
321+
}
322+
323+
var workingPath = filePath;
324+
// Filepath doesn't have zip or egg in it, bail
325+
if (!filePath.Contains(".zip") && !filePath.Contains(".egg")) {
326+
return false;
327+
}
328+
329+
while (!string.IsNullOrEmpty(workingPath)) {
330+
if (IsZipFile(workingPath, out zipPath)) {
331+
// File path is '..\\test\\test.zip\\test\\a.py'
332+
// Working path is '..\\test\\test.zip'
333+
// Relative path in zip file becomes 'test/a.py'
334+
relativeZipPath = filePath.Substring(workingPath.Length);
335+
336+
// According to https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT, zip files must have forward slashes
337+
foreach (var separator in DirectorySeparators) {
338+
relativeZipPath = relativeZipPath.Replace(separator, '/');
339+
}
340+
return true;
341+
}
342+
// \\test\\test.zip => \\test\\
343+
workingPath = GetParent(workingPath);
344+
}
345+
346+
// Filepath had .zip or .egg in it but no zip or egg files
347+
// e.g /tmp/tmp.zip.txt
348+
return false;
349+
}
350+
351+
/// <summary>
352+
/// Returns whether the given file path is a path to a zip (or egg) file
353+
/// The path can be of the form ..\\test.zip or ..\\test.zip\\
354+
/// </summary>
355+
public static bool IsZipFile(string rawZipPath, out string zipPath) {
356+
var path = NormalizePathAndTrim(rawZipPath);
357+
var extension = Path.GetExtension(path);
358+
switch (extension) {
359+
case ".zip":
360+
case ".egg":
361+
zipPath = path;
362+
return true;
363+
default:
364+
zipPath = string.Empty;
365+
return false;
366+
}
367+
}
368+
369+
/// <summary>
370+
/// Given the path to the zip file and the relative path to a file inside the zip,
371+
/// returns the contents of the zip entry
372+
/// e.g
373+
/// test.zip
374+
/// a.py
375+
/// b.py
376+
/// Can get the contents of a.py by passing in "test.zip" and "a.py"
377+
/// </summary>
378+
public static string GetZipContent(string zipPath, string relativeZipPath) {
379+
using (var zip = ZipFile.OpenRead(zipPath)) {
380+
var zipFile = zip.GetEntry(relativeZipPath);
381+
// Could not open zip, bail
382+
if (zipFile == null) {
383+
return null;
384+
}
385+
using (var reader = new StreamReader(zipFile.Open())) {
386+
return reader.ReadToEnd();
387+
}
388+
}
389+
}
390+
391+
public static IEnumerable<ZipArchiveEntry> EnumerateZip(string root) {
392+
using (var zip = ZipFile.OpenRead(root)) {
393+
return zip.Entries.ToList();
394+
}
395+
}
396+
311397
/// <summary>
312398
/// Deletes a file, making multiple attempts and suppressing any
313399
/// IO-related errors.

src/Core/Test/PathUtilsTests.cs

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
// Copyright(c) Microsoft Corporation
2+
// All rights reserved.
3+
//
4+
// Licensed under the Apache License, Version 2.0 (the License); you may not use
5+
// this file except in compliance with the License. You may obtain a copy of the
6+
// License at http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// THIS CODE IS PROVIDED ON AN *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS
9+
// OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY
10+
// IMPLIED WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
11+
// MERCHANTABILITY OR NON-INFRINGEMENT.
12+
//
13+
// See the Apache Version 2.0 License for specific language governing
14+
// permissions and limitations under the License.
15+
16+
using FluentAssertions;
17+
using Microsoft.Python.Core.IO;
18+
using Microsoft.VisualStudio.TestTools.UnitTesting;
19+
20+
namespace Microsoft.Python.Core.Tests {
21+
[TestClass]
22+
public class PathUtilsTests {
23+
[TestMethod, Priority(0)]
24+
public void ZipFileUNCPath() {
25+
PathUtils.TryGetZipFilePath(@"\\server\home\share\test.zip", out var zipPath, out var relativeZipPath);
26+
zipPath.Should().Be(@"\\server\home\share\test.zip");
27+
relativeZipPath.Should().BeEmpty();
28+
29+
PathUtils.TryGetZipFilePath(@"\\server\home\share\test.zip\test\a.py", out zipPath, out relativeZipPath);
30+
zipPath.Should().Be(@"\\server\home\share\test.zip");
31+
relativeZipPath.Should().Be("test/a.py");
32+
33+
PathUtils.TryGetZipFilePath("\\path\\foo\\baz\\test.zip\\test\\a.py", out zipPath, out relativeZipPath);
34+
zipPath.Should().Be("\\path\\foo\\baz\\test.zip");
35+
relativeZipPath.Should().Be("test/a.py");
36+
}
37+
38+
[TestMethod, Priority(0)]
39+
public void ZipFilePath() {
40+
PathUtils.TryGetZipFilePath("\\path\\foo\\baz\\test.zip", out var zipPath, out var relativeZipPath);
41+
zipPath.Should().Be("\\path\\foo\\baz\\test.zip");
42+
relativeZipPath.Should().BeEmpty();
43+
44+
PathUtils.TryGetZipFilePath("\\path\\foo\\baz\\test.zip\\test\\a.py", out zipPath, out relativeZipPath);
45+
zipPath.Should().Be("\\path\\foo\\baz\\test.zip");
46+
relativeZipPath.Should().Be("test/a.py");
47+
48+
PathUtils.TryGetZipFilePath("\\path\\foo\\baz\\test.zip\\test\\foo\\baz.py", out zipPath, out relativeZipPath);
49+
zipPath.Should().Be("\\path\\foo\\baz\\test.zip");
50+
relativeZipPath.Should().Be("test/foo/baz.py");
51+
}
52+
}
53+
}

src/LanguageServer/Test/ImportsTests.cs

Lines changed: 97 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,10 @@
1616
using System;
1717
using System.IO;
1818
using System.Threading.Tasks;
19-
using Microsoft.Python.Analysis;
2019
using Microsoft.Python.Analysis.Analyzer;
2120
using Microsoft.Python.Analysis.Documents;
21+
using Microsoft.Python.Analysis.Tests.FluentAssertions;
22+
using Microsoft.Python.Analysis.Types;
2223
using Microsoft.Python.Core.Text;
2324
using Microsoft.Python.LanguageServer.Completion;
2425
using Microsoft.Python.LanguageServer.Sources;
@@ -830,5 +831,100 @@ import module2
830831
comps = cs.GetCompletions(analysis, new SourceLocation(4, 9));
831832
comps.Should().HaveLabels("Y");
832833
}
834+
835+
[DataRow("Basic.egg")]
836+
[DataRow("Basic.zip")]
837+
[DataTestMethod, Priority(0)]
838+
public async Task BasicEggZip(string eggZipFilePath) {
839+
var root = Path.Combine(GetAnalysisTestDataFilesPath(), "EggZip");
840+
await CreateServicesAsync(root, PythonVersions.LatestAvailable3X, searchPaths: new[] { root, Path.Combine(root, eggZipFilePath) });
841+
var rdt = Services.GetService<IRunningDocumentTable>();
842+
var analyzer = Services.GetService<IPythonAnalyzer>();
843+
844+
var uriPath = Path.Combine(root, "BasicEggZip.py");
845+
var code = await File.ReadAllTextAsync(uriPath);
846+
var moduleUri = TestData.GetTestSpecificUri(uriPath);
847+
var module = rdt.OpenDocument(moduleUri, code);
848+
849+
await analyzer.WaitForCompleteAnalysisAsync();
850+
var analysis = await module.GetAnalysisAsync(-1);
851+
analysis.Should().HaveVariable("i").OfType(BuiltinTypeId.Int);
852+
}
853+
854+
[DataRow("ZipImports.zip")]
855+
[DataRow("EggImports.egg")]
856+
[DataTestMethod, Priority(0)]
857+
public async Task EggZipImports(string eggZipFilePath) {
858+
var root = Path.Combine(GetAnalysisTestDataFilesPath(), "EggZip");
859+
await CreateServicesAsync(root, PythonVersions.LatestAvailable3X, searchPaths: new[] { root, Path.Combine(root, eggZipFilePath, "test") });
860+
var rdt = Services.GetService<IRunningDocumentTable>();
861+
var analyzer = Services.GetService<IPythonAnalyzer>();
862+
863+
var uriPath = Path.Combine(root, "EggZipImports.py");
864+
var code = await File.ReadAllTextAsync(uriPath);
865+
var moduleUri = TestData.GetTestSpecificUri(uriPath);
866+
var module = rdt.OpenDocument(moduleUri, code);
867+
868+
await analyzer.WaitForCompleteAnalysisAsync();
869+
var analysis = await module.GetAnalysisAsync(-1);
870+
analysis.Should().HaveVariable("h").OfType("X");
871+
analysis.Should().HaveVariable("y").OfType(BuiltinTypeId.Int);
872+
analysis.Should().HaveVariable("b").OfType("A");
873+
analysis.Should().HaveVariable("i").OfType(BuiltinTypeId.Int);
874+
}
875+
876+
[DataRow("ZipRelativeImports.zip")]
877+
[DataRow("EggRelativeImports.egg")]
878+
[DataTestMethod, Priority(0)]
879+
public async Task EggZipRelativeImports(string eggZipFilePath) {
880+
var root = Path.Combine(GetAnalysisTestDataFilesPath(), "EggZip");
881+
await CreateServicesAsync(root, PythonVersions.LatestAvailable3X, searchPaths: new[] { root, Path.Combine(root, eggZipFilePath, "test") });
882+
var rdt = Services.GetService<IRunningDocumentTable>();
883+
var analyzer = Services.GetService<IPythonAnalyzer>();
884+
885+
var uriPath = Path.Combine(root, "EggZipRelativeImports.py");
886+
var code = await File.ReadAllTextAsync(uriPath);
887+
var moduleUri = TestData.GetTestSpecificUri(uriPath);
888+
var module = rdt.OpenDocument(moduleUri, code);
889+
890+
await analyzer.WaitForCompleteAnalysisAsync();
891+
var analysis = await module.GetAnalysisAsync(-1);
892+
analysis.Should().HaveVariable("h").OfType(BuiltinTypeId.Float);
893+
analysis.Should().HaveVariable("i").OfType(BuiltinTypeId.Int);
894+
analysis.Should().HaveVariable("s").OfType(BuiltinTypeId.Str);
895+
}
896+
897+
[DataRow("simplejson.egg")]
898+
[DataRow("simplejson.zip")]
899+
[DataTestMethod, Priority(0)]
900+
public async Task SimpleJsonEggZip(string eggZipFilePath) {
901+
var root = Path.Combine(GetAnalysisTestDataFilesPath(), "EggZip");
902+
await CreateServicesAsync(root, PythonVersions.LatestAvailable3X, searchPaths: new[] { root, Path.Combine(root, eggZipFilePath) });
903+
var rdt = Services.GetService<IRunningDocumentTable>();
904+
var analyzer = Services.GetService<IPythonAnalyzer>();
905+
906+
const string code = "import simplejson";
907+
var uriPath = Path.Combine(root, "test.py");
908+
var moduleUri = TestData.GetTestSpecificUri(uriPath);
909+
var module = rdt.OpenDocument(moduleUri, code);
910+
911+
await analyzer.WaitForCompleteAnalysisAsync();
912+
var analysis = await module.GetAnalysisAsync(-1);
913+
analysis.Should().HaveVariable("simplejson").Which.Should().HaveMembers(
914+
"Decimal",
915+
"JSONDecodeError",
916+
"JSONDecoder",
917+
"JSONEncoder",
918+
"JSONEncoderForHTML",
919+
"OrderedDict",
920+
"RawJSON",
921+
"dump",
922+
"dumps",
923+
"load",
924+
"loads",
925+
"simple_first"
926+
);
927+
}
928+
833929
}
834930
}
Binary file not shown.
Binary file not shown.
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
import sys
2+
import test.a
3+
4+
a = test.a.A()
5+
i = a.test()
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)