Skip to content

Commit 678b5de

Browse files
committed
The PMD logger now replaces all characters that are invalid in XML by the Unicode replacement character (U+FFFD).
1 parent 5351836 commit 678b5de

File tree

5 files changed

+118
-5
lines changed

5 files changed

+118
-5
lines changed

src/Log/AbstractXmlLogger.php

+24
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,30 @@ protected function isUtf8($string)
131131
return true;
132132
}
133133

134+
/**
135+
* Escapes a string for inclusion inside an XML tag.
136+
*
137+
* Converts the string to UTF-8, substitutes the unicode replacement
138+
* character for every character disallowed in XML, and escapes
139+
* special characters.
140+
*
141+
* @param string $string
142+
* @return string
143+
*/
144+
protected function escapeForXml($string)
145+
{
146+
$string = $this->convertToUtf8($string);
147+
148+
// Substitute the unicode replacement character for disallowed chars
149+
$string = preg_replace(
150+
'/[^\x09\x0A\x0D\x{0020}-\x{D7FF}\x{E000}-\x{FFFD}]/u',
151+
"\xEF\xBF\xBD",
152+
$string
153+
);
154+
155+
return htmlspecialchars($string, ENT_COMPAT, 'UTF-8');
156+
}
157+
134158
/**
135159
* Processes a list of clones.
136160
*

src/Log/PMD.php

+1-5
Original file line numberDiff line numberDiff line change
@@ -87,11 +87,7 @@ public function processClones(CodeCloneMap $clones)
8787
$duplication->appendChild(
8888
$this->document->createElement(
8989
'codefragment',
90-
htmlspecialchars(
91-
$this->convertToUtf8($clone->getLines()),
92-
ENT_COMPAT,
93-
'UTF-8'
94-
)
90+
$this->escapeForXml($clone->getLines())
9591
)
9692
);
9793
}

tests/Log/PMDTest.php

+70
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
<?php
2+
use SebastianBergmann\PHPCPD\CodeClone;
3+
use SebastianBergmann\PHPCPD\CodeCloneFile;
4+
use SebastianBergmann\PHPCPD\CodeCloneMap;
5+
use SebastianBergmann\PHPCPD\Log\PMD;
6+
7+
class PHPCPD_Log_PMDTest extends PHPUnit_Framework_TestCase
8+
{
9+
/** @var string */
10+
private $testFile1;
11+
/** @var @var string */
12+
private $testFile2;
13+
/** @var string */
14+
private $pmdLogFile;
15+
/** @var string */
16+
private $expectedPmdLogFile;
17+
/** @var \SebastianBergmann\PHPCPD\Log\PMD */
18+
private $pmdLogger;
19+
20+
protected function setUp()
21+
{
22+
$this->testFile1 = __DIR__ . '/_files/with_ascii_escape.php';
23+
$this->testFile2 = __DIR__ . '/_files/with_ascii_escape2.php';
24+
25+
$this->pmdLogFile = tempnam(sys_get_temp_dir(), 'pmd');
26+
27+
$this->expectedPmdLogFile = tempnam(sys_get_temp_dir(), 'pmd');
28+
$expectedPmdLogTemplate = __DIR__ . '/_files/pmd_expected.xml';
29+
$expectedPmdLogContents = strtr(
30+
file_get_contents($expectedPmdLogTemplate),
31+
array(
32+
'%file1%' => $this->testFile1,
33+
'%file2%' => $this->testFile2
34+
)
35+
);
36+
file_put_contents($this->expectedPmdLogFile, $expectedPmdLogContents);
37+
38+
$this->pmdLogger = new PMD($this->pmdLogFile);
39+
}
40+
41+
protected function tearDown()
42+
{
43+
if (file_exists($this->pmdLogFile)) {
44+
unlink($this->pmdLogFile);
45+
}
46+
if (file_exists($this->expectedPmdLogFile)) {
47+
unlink($this->expectedPmdLogFile);
48+
}
49+
}
50+
51+
/**
52+
* @covers SebastianBergmann\PHPCPD\Log\PMD
53+
* @covers SebastianBergmann\PHPCPD\Log\AbstractXmlLogger
54+
*/
55+
public function testSubstitutesDisallowedCharacters()
56+
{
57+
$file1 = new CodeCloneFile($this->testFile1, 8);
58+
$file2 = new CodeCloneFile($this->testFile2, 8);
59+
$clone = new CodeClone($file1, $file2, 4, 4);
60+
$cloneMap = new CodeCloneMap();
61+
$cloneMap->addClone($clone);
62+
63+
$this->pmdLogger->processClones($cloneMap);
64+
65+
$this->assertXmlFileEqualsXmlFile(
66+
$this->expectedPmdLogFile,
67+
$this->pmdLogFile
68+
);
69+
}
70+
}

tests/Log/_files/pmd_expected.xml

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<pmd-cpd>
3+
<duplication lines="4" tokens="4">
4+
<file path="%file1%" line="8"/>
5+
<file path="%file2%" line="8"/>
6+
<codefragment>function getAsciiEscapeChar()
7+
{
8+
return "�";
9+
}
10+
</codefragment>
11+
</duplication>
12+
</pmd-cpd>
+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
<?php
2+
3+
/**
4+
* This function returns an ASCII escape character:
5+
*
6+
* @return string
7+
*/
8+
function getAsciiEscapeChar()
9+
{
10+
return "";
11+
}

0 commit comments

Comments
 (0)