10
10
11
11
ZIMFILES = [
12
12
{
13
- ' filename' : str (DATA_DIR / "wikipedia_es_physics_mini.zim" ),
14
- ' checksum' : u "99ea7a5598c6040c4f50b8ac0653b703" ,
15
- ' namespaces' : u "-AIMX" ,
16
- ' article_count' : 22027 ,
17
- ' main_page_url' : u "A/index" ,
13
+ " filename" : str (DATA_DIR / "wikipedia_es_physics_mini.zim" ),
14
+ " checksum" : "99ea7a5598c6040c4f50b8ac0653b703" ,
15
+ " namespaces" : "-AIMX" ,
16
+ " article_count" : 22027 ,
17
+ " main_page_url" : "A/index" ,
18
18
}
19
19
]
20
20
21
21
22
-
23
-
24
22
@pytest .fixture (params = ZIMFILES )
25
23
def zimdata (request ):
26
24
return request .param
27
25
26
+
28
27
@pytest .fixture
29
28
def reader (zimdata ):
30
- return File (zimdata [' filename' ])
29
+ return File (zimdata [" filename" ])
31
30
32
31
33
32
@pytest .fixture
34
33
def article_data ():
35
34
return {
36
- ' url' : u "A/Albert_Einstein" ,
37
- ' title' : u "Albert Einstein" ,
38
- ' mimetype' : u "text/html" ,
39
- ' article_id' : 663 ,
40
- ' size' : 17343
35
+ " url" : "A/Albert_Einstein" ,
36
+ " title" : "Albert Einstein" ,
37
+ " mimetype" : "text/html" ,
38
+ " article_id" : 663 ,
39
+ " size" : 17343 ,
41
40
}
42
41
43
42
44
43
def test_zim_filename (reader , zimdata ):
45
44
for k , v in zimdata .items ():
46
45
assert getattr (reader , k ) == v
47
46
47
+
48
48
def test_zim_read (reader , article_data ):
49
- article = reader .get_article (article_data [' url' ])
49
+ article = reader .get_article (article_data [" url" ])
50
50
51
- assert article .longurl == article_data [' url' ]
52
- assert article .title == article_data [' title' ]
53
- assert article .url == article_data [' url' ][2 :]
54
- assert article .mimetype == article_data [' mimetype' ]
51
+ assert article .longurl == article_data [" url" ]
52
+ assert article .title == article_data [" title" ]
53
+ assert article .url == article_data [" url" ][2 :]
54
+ assert article .mimetype == article_data [" mimetype" ]
55
55
assert isinstance (article .content , memoryview )
56
- assert len (article .content ) == article_data ['size' ]
56
+ assert len (article .content ) == article_data ["size" ]
57
+
57
58
58
59
def test_content_ref_keep (reader ):
59
60
"""Get the memoryview on a content and loose the reference on the article.
60
61
We try to load a lot of other articles to detect possible use of dandling pointer
61
62
"""
62
- content = None
63
+ content = None
64
+
63
65
def get_content ():
64
66
nonlocal content
65
- article = reader .get_article (u "A/Albert_Einstein" )
67
+ article = reader .get_article ("A/Albert_Einstein" )
66
68
assert isinstance (article .content , memoryview )
67
69
content = article .content
68
- get_content () # Now we have a content but no reference to the article.
70
+
71
+ get_content () # Now we have a content but no reference to the article.
69
72
gc .collect ()
70
73
# Load a lot of content
71
74
for i in range (0 , reader .article_count , 2 ):
@@ -74,28 +77,35 @@ def get_content():
74
77
c = article .content
75
78
# Check everything is ok
76
79
assert len (content ) == 17343
77
- assert bytes (content [:100 ]) == b'<!DOCTYPE html>\n <html class="client-js"><head>\n <meta charset="UTF-8">\n <title>Albert Einstein</ti'
80
+ assert (
81
+ bytes (content [:100 ])
82
+ == b'<!DOCTYPE html>\n <html class="client-js"><head>\n <meta charset="UTF-8">\n <title>Albert Einstein</ti'
83
+ )
84
+
78
85
79
86
def test_get_article_by_id (reader , article_data ):
80
87
return
81
- article = reader .get_article_by_id (article_data ['article_id' ])
88
+ article = reader .get_article_by_id (article_data ["article_id" ])
89
+
90
+ assert article .longurl == article_data ["url" ]
91
+ assert article .title == article_data ["title" ]
92
+ assert article .url == article_data ["url" ][2 :]
93
+ assert article .mimetype == article_data ["mimetype" ]
82
94
83
- assert article .longurl == article_data ['url' ]
84
- assert article .title == article_data ['title' ]
85
- assert article .url == article_data ['url' ][2 :]
86
- assert article .mimetype == article_data ['mimetype' ]
87
95
88
96
def test_namespace_count (reader ):
89
97
namespaces = reader .namespaces
90
98
num_articles = sum (reader .get_namespaces_count (ns ) for ns in namespaces )
91
99
assert reader .article_count == num_articles
92
100
101
+
93
102
def test_suggest (reader ):
94
- results = reader .suggest (u"Einstein" )
95
- assert u"A/Albert_Einstein" in list (results )
103
+ results = reader .suggest ("Einstein" )
104
+ assert "A/Albert_Einstein" in list (results )
105
+
96
106
97
107
def test_search (reader ):
98
- results = reader .search (u "Einstein" )
108
+ results = reader .search ("Einstein" )
99
109
assert len (list (results )) == 10
100
110
101
111
0 commit comments