Skip to content

Commit 9c5cd26

Browse files
committed
FIx PhpStan error.
1 parent 0639180 commit 9c5cd26

File tree

7 files changed

+78
-70
lines changed

7 files changed

+78
-70
lines changed

phpstan.neon

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@ parameters:
66
checkGenericClassInNonGenericObjectType: false
77
ignoreErrors:
88
- '#Regex pattern is invalid: Compilation failed: invalid range in character class at offset 122 in pattern: .*#'
9-
- '#Offset .*? does not exist on array\(\).*#'
9+
- '#Construct empty\(\) is not allowed\. Use more strict comparison\.#'

src/Crawler.php

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -20,16 +20,16 @@ final class Crawler
2020

2121
private \Nette\Http\Url $startingUrl;
2222

23-
/** @var string[] */
23+
/** @var array<int, string> */
2424
private array $urlList = [];
2525

26-
/** @var string[] */
26+
/** @var array<int, string> */
2727
private array $allUrls = [];
2828

29-
/** @var string[][] */
29+
/** @var array<string, array<int, string>> */
3030
private array $urlReferences = [];
3131

32-
/** @var mixed[][] */
32+
/** @var array<int, array{url: string, message: string, trace: array<int, mixed>}> */
3333
private array $errors = [];
3434

3535

@@ -161,9 +161,14 @@ public function setTextSeparator(ITextSeparator $textSeparator): void
161161

162162
private function processBasicConfig(string $url): void
163163
{
164-
$this->startingUrl = $startingUrl = new \Nette\Http\Url($url);
164+
$startingUrl = new \Nette\Http\Url($url);
165+
$this->startingUrl = $startingUrl;
165166
$this->addUrl($url);
166-
$this->addUrl(($startingUrl->getScheme() === 'https' ? 'http' : 'https') . '://' . $startingUrl->getAuthority());
167+
$this->addUrl(sprintf(
168+
'%s://%s',
169+
$startingUrl->getScheme() === 'https' ? 'http' : 'https',
170+
$startingUrl->getAuthority(),
171+
));
167172
}
168173

169174

@@ -181,7 +186,7 @@ private function addUrl(string $url): void
181186
if ($canAdd === true) { // Is allowed?
182187
$isAllowed = false;
183188
foreach ($this->config->getAllowedUrls() as $allow) {
184-
if (preg_match('/^' . $allow . '$/', $url)) {
189+
if (preg_match('/^' . $allow . '$/', $url) === 1) {
185190
$isAllowed = true;
186191
break;
187192
}
@@ -193,7 +198,7 @@ private function addUrl(string $url): void
193198
if ($canAdd === true) { // Is forbidden?
194199
$isForbidden = false;
195200
foreach ($this->config->getForbiddenUrls() as $forbidden) {
196-
if (preg_match('/^' . $forbidden . '$/', $url)) {
201+
if (preg_match('/^' . $forbidden . '$/', $url) === 1) {
197202
$isForbidden = true;
198203
break;
199204
}
@@ -236,16 +241,16 @@ private function loadUrl(string $url): HttpResponse
236241
$header = substr($response, 0, $headerSize);
237242
$contentType = '';
238243

239-
if (preg_match('/Content-Type:\s+(\S+)/', $response, $contentTypeParser)) {
244+
if (preg_match('/Content-Type:\s+(\S+)/', $response, $contentTypeParser) === 1) {
240245
$contentType = $contentTypeParser[1];
241246
}
242247
if ($contentType === 'application/xml' || strncmp($contentType, 'text/', 5) === 0) {
243-
$html = Strings::normalize((string) substr($response, $headerSize));
248+
$html = Strings::normalize(substr($response, $headerSize));
244249
$size = strlen($html);
245250

246251
if (
247-
strpos($html, '<?xml') !== false
248-
&& preg_match_all('/<loc>(https?\:\/\/[^\s\<]+)\<\/loc>/', $html, $sitemapUrls)
252+
str_contains($html, '<?xml')
253+
&& preg_match_all('/<loc>(https?\:\/\/[^\s\<]+)\<\/loc>/', $html, $sitemapUrls) === 1
249254
) {
250255
foreach ($sitemapUrls[1] ?? [] as $sitemapUrl) {
251256
if (Validators::isUrl($sitemapUrl)) {
@@ -255,7 +260,7 @@ private function loadUrl(string $url): HttpResponse
255260
}
256261
} else {
257262
$html = '<!-- FILE ' . $url . ' -->';
258-
if (preg_match('/Content-Length:\s+(\d+)/', $response, $contentLength)) {
263+
if (preg_match('/Content-Length:\s+(\d+)/', $response, $contentLength) === 1) {
259264
$size = (int) $contentLength[1];
260265
} else {
261266
$size = strlen($response) - $headerSize;
@@ -271,7 +276,7 @@ private function loadUrl(string $url): HttpResponse
271276
$this->formatHeaders($header),
272277
self::timer($url) * 1_000,
273278
(int) ($httpCodeParser['httpCode'] ?? 500),
274-
$size < 0 ? 0 : $size,
279+
max($size, 0),
275280
);
276281
}
277282

@@ -294,7 +299,7 @@ private function formatHeaders(string $header): array
294299
{
295300
$return = [];
296301
foreach (explode("\n", Strings::normalize($header)) as $_header) {
297-
if (preg_match('/^(?<name>[^:]+):\s*(?<value>.*)$/', $_header, $headerParser)) {
302+
if (preg_match('/^(?<name>[^:]+):\s*(?<value>.*)$/', $_header, $headerParser) === 1) {
298303
$return[$headerParser['name']] = $headerParser['value'];
299304
}
300305
}
@@ -309,10 +314,10 @@ private function formatHeaders(string $header): array
309314
private function getLinksFromHTML(string $url, string $html): array
310315
{
311316
$return = [];
312-
if (preg_match_all('/<a[^>]+>/', $html, $aLinks)) {
317+
if (preg_match_all('/<a[^>]+>/', $html, $aLinks) > 0) {
313318
foreach ($aLinks[0] as $aLink) {
314-
if (preg_match('/href=[\'"](?<url>[^\'"]+)[\'"]/', $aLink, $link)
315-
&& !preg_match('/^(?:mailto|tel|phone)\:/', $link['url'])
319+
if (preg_match('/href=[\'"](?<url>[^\'"]+)[\'"]/', $aLink, $link) === 1
320+
&& preg_match('/^(?:mailto|tel|phone)\:/', $link['url']) !== 1
316321
) {
317322
$formattedLink = RelativeUrlToAbsoluteUrl::process($url, $link['url']);
318323
if ($formattedLink !== null && !in_array($formattedLink, $return, true)) {
@@ -351,9 +356,10 @@ private function processRobots(string $url): ?string
351356
$response = $this->loadUrl($url);
352357
if ($response->getHttpCode() === 200) {
353358
$this->addUrl($url);
354-
foreach (explode("\n", $return = Strings::normalize($response->getHtml())) as $line) {
359+
$return = Strings::normalize($response->getHtml());
360+
foreach (explode("\n", $return) as $line) {
355361
$line = trim($line);
356-
if (preg_match('/^[Ss]itemap:\s+(https?\:\/\/\S+)/', $line, $robots)) {
362+
if (preg_match('/^[Ss]itemap:\s+(https?\:\/\/\S+)/', $line, $robots) === 1) {
357363
$this->addUrl($robots[1]);
358364
}
359365
}

src/RelativeUrlToAbsoluteUrl.php

Lines changed: 22 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ public static function process(string $baseUrl, string $relativeUrl): ?string
1515
return null;
1616
}
1717
if (!empty($r['scheme'])) {
18-
if (!empty($r['path']) && strpos($r['path'], '/') === 0) {
18+
if (!empty($r['path']) && str_starts_with($r['path'], '/')) {
1919
$r['path'] = self::urlRemoveDotSegments($r['path']);
2020
}
2121

@@ -93,7 +93,7 @@ private static function splitUrl(string $url, bool $decode = true): ?array
9393
$xaPath = '(' . $xPathAuthAbs . '|' . $xPathAbs . '|' . $xPathRel . ')';
9494
$xQueryFrag = '([' . $xpChar . '/?' . ']*)';
9595
$xUrl = '^(' . $xScheme . ':)?' . $xaPath . '?(\?' . $xQueryFrag . ')?(#' . $xQueryFrag . ')?$';
96-
if (!preg_match('!' . $xUrl . '!', $url, $m)) {
96+
if (preg_match('!' . $xUrl . '!', $url, $m) !== 1) {
9797
return null;
9898
}
9999

@@ -173,7 +173,7 @@ private static function urlRemoveDotSegments(string $path): string
173173
}
174174

175175
$outPath = implode('/', $outputSegments);
176-
if (strpos($path, '/') === 0) {
176+
if (str_starts_with($path, '/')) {
177177
$outPath = '/' . $outPath;
178178
}
179179
if ($outPath !== '/' && (mb_strlen($path) - 1) === mb_strrpos($path, '/', 0, 'UTF-8')) {
@@ -186,29 +186,26 @@ private static function urlRemoveDotSegments(string $path): string
186186

187187
/**
188188
* @param string[] $parts
189-
* @return string
190189
*/
191-
private static function joinUrl(array $parts, bool $encode = true): string
190+
private static function joinUrl(array $parts): string
192191
{
193-
if ($encode) {
194-
if (isset($parts['user'])) {
195-
$parts['user'] = rawurlencode($parts['user']);
196-
}
197-
if (isset($parts['pass'])) {
198-
$parts['pass'] = rawurlencode($parts['pass']);
199-
}
200-
if (isset($parts['host']) && !preg_match('!^(\[[\da-f.:]+\]])|([\da-f.:]+)$!ui', $parts['host'])) {
201-
$parts['host'] = rawurlencode($parts['host']);
202-
}
203-
if (!empty($parts['path'])) {
204-
$parts['path'] = preg_replace('!%2F!ui', '/', rawurlencode($parts['path']));
205-
}
206-
if (isset($parts['query'])) {
207-
$parts['query'] = rawurlencode($parts['query']);
208-
}
209-
if (isset($parts['fragment'])) {
210-
$parts['fragment'] = rawurlencode($parts['fragment']);
211-
}
192+
if (isset($parts['user'])) {
193+
$parts['user'] = rawurlencode($parts['user']);
194+
}
195+
if (isset($parts['pass'])) {
196+
$parts['pass'] = rawurlencode($parts['pass']);
197+
}
198+
if (isset($parts['host']) && preg_match('!^(\[[\da-f.:]+\]])|([\da-f.:]+)$!ui', $parts['host']) !== 1) {
199+
$parts['host'] = rawurlencode($parts['host']);
200+
}
201+
if (!empty($parts['path'])) {
202+
$parts['path'] = preg_replace('!%2F!ui', '/', rawurlencode($parts['path']));
203+
}
204+
if (isset($parts['query'])) {
205+
$parts['query'] = rawurlencode($parts['query']);
206+
}
207+
if (isset($parts['fragment'])) {
208+
$parts['fragment'] = rawurlencode($parts['fragment']);
212209
}
213210

214211
$url = '';
@@ -227,7 +224,7 @@ private static function joinUrl(array $parts, bool $encode = true): string
227224
$url .= '@';
228225
}
229226

230-
if (preg_match('!^[\da-f]*:[\da-f.:]+$!ui', $parts['host'])) {
227+
if (preg_match('!^[\da-f]*:[\da-f.:]+$!ui', $parts['host']) === 1) {
231228
$url .= '[' . $parts['host'] . ']';
232229
} else {
233230
$url .= $parts['host'];

src/TextSeparator/TextSeparator.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ public function getTexts(string $html): TextSeparatorEntity
2525
$uniqueTexts = [];
2626
foreach ($texts[0] ?? [] as $text) {
2727
$canAdd = true;
28-
if (preg_match('/^\-\-\s*.+(\s*\-\-)?$/', $text = trim($text))) {
28+
if (preg_match('/^\-\-\s*.+(\s*\-\-)?$/', $text = trim($text)) === 1) {
2929
$canAdd = false;
3030
}
3131

src/entity/Config.php

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,22 +15,29 @@ final class Config
1515

1616
private int $maxCrawlTimeInSeconds;
1717

18-
/** @var string[] */
18+
/** @var array<int, string> */
1919
private array $allowedUrls;
2020

21-
/** @var string[] */
21+
/** @var array<int, string> */
2222
private array $forbiddenUrls;
2323

2424

2525
/**
26-
* @param mixed[] $config
26+
* @param array{
27+
* followExternalLinks?: bool,
28+
* sleepBetweenRequests?: int,
29+
* maxHttpRequests?: int,
30+
* maxCrawlTimeInSeconds?: int,
31+
* allowedUrls?: array<int, string>,
32+
* forbiddenUrls?: array<int, string>
33+
* } $config
2734
*/
2835
public function __construct(array $config = [])
2936
{
30-
$this->followExternalLinks = (bool) ($config['followExternalLinks'] ?? false);
31-
$this->sleepBetweenRequests = (int) ($config['sleepBetweenRequests'] ?? 1_000);
32-
$this->maxHttpRequests = (int) ($config['maxHttpRequests'] ?? 1_000_000);
33-
$this->maxCrawlTimeInSeconds = (int) ($config['maxCrawlTimeInSeconds'] ?? 30);
37+
$this->followExternalLinks = $config['followExternalLinks'] ?? false;
38+
$this->sleepBetweenRequests = $config['sleepBetweenRequests'] ?? 1_000;
39+
$this->maxHttpRequests = $config['maxHttpRequests'] ?? 1_000_000;
40+
$this->maxCrawlTimeInSeconds = $config['maxCrawlTimeInSeconds'] ?? 30;
3441
$this->allowedUrls = $config['allowedUrls'] ?? ['.+'];
3542
$this->forbiddenUrls = $config['forbiddenUrls'] ?? [''];
3643
}
@@ -62,7 +69,7 @@ public function getMaxCrawlTimeInSeconds(): int
6269

6370

6471
/**
65-
* @return string[]
72+
* @return array<int, string>
6673
*/
6774
public function getAllowedUrls(): array
6875
{
@@ -71,7 +78,7 @@ public function getAllowedUrls(): array
7178

7279

7380
/**
74-
* @return string[]
81+
* @return array<int, string>
7582
*/
7683
public function getForbiddenUrls(): array
7784
{

src/entity/CrawledResult.php

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77

88
final class CrawledResult
99
{
10-
1110
/** @var string[] */
1211
private array $allUrls;
1312

@@ -17,13 +16,13 @@ final class CrawledResult
1716
/** @var string[] */
1817
private array $openedUrls;
1918

20-
/** @var string[][] */
19+
/** @var array<string, array<int, string>> */
2120
private array $urlReferences;
2221

2322
/** @var Url[] */
2423
private array $urls;
2524

26-
/** @var mixed[][] */
25+
/** @var array<int, array{url: string, message: string, trace: array<int, mixed>}> */
2726
private array $errors;
2827

2928
/** Content of robots.txt file if exist. */
@@ -34,9 +33,9 @@ final class CrawledResult
3433
* @param string[] $allUrls
3534
* @param string[] $followedUrls
3635
* @param string[] $openedUrls
37-
* @param string[][] $urlReferences
36+
* @param array<string, array<int, string>> $urlReferences
3837
* @param Url[] $urls
39-
* @param mixed[][] $errors
38+
* @param array<int, array{url: string, message: string, trace: array<int, mixed>}> $errors
4039
*/
4140
public function __construct(
4241
array $allUrls,
@@ -85,7 +84,7 @@ public function getOpenedUrls(): array
8584

8685

8786
/**
88-
* @return string[][]
87+
* @return array<string, array<int, string>>
8988
*/
9089
public function getUrlReferences(): array
9190
{
@@ -103,7 +102,7 @@ public function getUrls(): array
103102

104103

105104
/**
106-
* @return mixed[][]
105+
* @return array<int, array{url: string, message: string, trace: array<int, mixed>}>
107106
*/
108107
public function getErrors(): array
109108
{

src/entity/TextSeparatorEntity.php

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,16 @@
77

88
final class TextSeparatorEntity
99
{
10-
11-
/** @var string[] */
10+
/** @var array<int, string> */
1211
private array $regularTexts;
1312

14-
/** @var string[] */
13+
/** @var array<int, string> */
1514
private array $uniqueTexts;
1615

1716

1817
/**
19-
* @param string[] $regularTexts
20-
* @param string[] $uniqueTexts
18+
* @param array<int, string> $regularTexts
19+
* @param array<int, string> $uniqueTexts
2120
*/
2221
public function __construct(array $regularTexts, array $uniqueTexts)
2322
{
@@ -27,7 +26,7 @@ public function __construct(array $regularTexts, array $uniqueTexts)
2726

2827

2928
/**
30-
* @return string[]
29+
* @return array<int, string>
3130
*/
3231
public function getRegularTexts(): array
3332
{
@@ -36,7 +35,7 @@ public function getRegularTexts(): array
3635

3736

3837
/**
39-
* @return string[]
38+
* @return array<int, string>
4039
*/
4140
public function getUniqueTexts(): array
4241
{

0 commit comments

Comments
 (0)