Skip to content
This repository was archived by the owner on Jan 10, 2023. It is now read-only.

Commit 25a2e44

Browse files
abokssebastianbergmann
authored andcommitted
Fix #85
The PMD logger now replaces all characters that are invalid in XML by the Unicode replacement character (U+FFFD).
1 parent 3e4cf13 commit 25a2e44

File tree

5 files changed

+118
-5
lines changed

5 files changed

+118
-5
lines changed

src/Log/AbstractXmlLogger.php

+24
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,30 @@ protected function isUtf8($string)
9696
return true;
9797
}
9898

99+
/**
100+
* Escapes a string for inclusion inside an XML tag.
101+
*
102+
* Converts the string to UTF-8, substitutes the unicode replacement
103+
* character for every character disallowed in XML, and escapes
104+
* special characters.
105+
*
106+
* @param string $string
107+
* @return string
108+
*/
109+
protected function escapeForXml($string)
110+
{
111+
$string = $this->convertToUtf8($string);
112+
113+
// Substitute the unicode replacement character for disallowed chars
114+
$string = preg_replace(
115+
'/[^\x09\x0A\x0D\x{0020}-\x{D7FF}\x{E000}-\x{FFFD}]/u',
116+
"\xEF\xBF\xBD",
117+
$string
118+
);
119+
120+
return htmlspecialchars($string, ENT_COMPAT, 'UTF-8');
121+
}
122+
99123
/**
100124
* Processes a list of clones.
101125
*

src/Log/PMD.php

+1-5
Original file line numberDiff line numberDiff line change
@@ -49,11 +49,7 @@ public function processClones(CodeCloneMap $clones)
4949
$duplication->appendChild(
5050
$this->document->createElement(
5151
'codefragment',
52-
htmlspecialchars(
53-
$this->convertToUtf8($clone->getLines()),
54-
ENT_COMPAT,
55-
'UTF-8'
56-
)
52+
$this->escapeForXml($clone->getLines())
5753
)
5854
);
5955
}

tests/Log/PMDTest.php

+70
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
<?php
2+
use SebastianBergmann\PHPCPD\CodeClone;
3+
use SebastianBergmann\PHPCPD\CodeCloneFile;
4+
use SebastianBergmann\PHPCPD\CodeCloneMap;
5+
use SebastianBergmann\PHPCPD\Log\PMD;
6+
7+
class PHPCPD_Log_PMDTest extends PHPUnit_Framework_TestCase
8+
{
9+
/** @var string */
10+
private $testFile1;
11+
/** @var @var string */
12+
private $testFile2;
13+
/** @var string */
14+
private $pmdLogFile;
15+
/** @var string */
16+
private $expectedPmdLogFile;
17+
/** @var \SebastianBergmann\PHPCPD\Log\PMD */
18+
private $pmdLogger;
19+
20+
protected function setUp()
21+
{
22+
$this->testFile1 = __DIR__ . '/_files/with_ascii_escape.php';
23+
$this->testFile2 = __DIR__ . '/_files/with_ascii_escape2.php';
24+
25+
$this->pmdLogFile = tempnam(sys_get_temp_dir(), 'pmd');
26+
27+
$this->expectedPmdLogFile = tempnam(sys_get_temp_dir(), 'pmd');
28+
$expectedPmdLogTemplate = __DIR__ . '/_files/pmd_expected.xml';
29+
$expectedPmdLogContents = strtr(
30+
file_get_contents($expectedPmdLogTemplate),
31+
array(
32+
'%file1%' => $this->testFile1,
33+
'%file2%' => $this->testFile2
34+
)
35+
);
36+
file_put_contents($this->expectedPmdLogFile, $expectedPmdLogContents);
37+
38+
$this->pmdLogger = new PMD($this->pmdLogFile);
39+
}
40+
41+
protected function tearDown()
42+
{
43+
if (file_exists($this->pmdLogFile)) {
44+
unlink($this->pmdLogFile);
45+
}
46+
if (file_exists($this->expectedPmdLogFile)) {
47+
unlink($this->expectedPmdLogFile);
48+
}
49+
}
50+
51+
/**
52+
* @covers SebastianBergmann\PHPCPD\Log\PMD
53+
* @covers SebastianBergmann\PHPCPD\Log\AbstractXmlLogger
54+
*/
55+
public function testSubstitutesDisallowedCharacters()
56+
{
57+
$file1 = new CodeCloneFile($this->testFile1, 8);
58+
$file2 = new CodeCloneFile($this->testFile2, 8);
59+
$clone = new CodeClone($file1, $file2, 4, 4);
60+
$cloneMap = new CodeCloneMap();
61+
$cloneMap->addClone($clone);
62+
63+
$this->pmdLogger->processClones($cloneMap);
64+
65+
$this->assertXmlFileEqualsXmlFile(
66+
$this->expectedPmdLogFile,
67+
$this->pmdLogFile
68+
);
69+
}
70+
}

tests/Log/_files/pmd_expected.xml

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<pmd-cpd>
3+
<duplication lines="4" tokens="4">
4+
<file path="%file1%" line="8"/>
5+
<file path="%file2%" line="8"/>
6+
<codefragment>function getAsciiEscapeChar()
7+
{
8+
return "�";
9+
}
10+
</codefragment>
11+
</duplication>
12+
</pmd-cpd>
+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
<?php
2+
3+
/**
4+
* This function returns an ASCII escape character:
5+
*
6+
* @return string
7+
*/
8+
function getAsciiEscapeChar()
9+
{
10+
return "";
11+
}

0 commit comments

Comments
 (0)