Skip to content

Commit 59c2c9e

Browse files
author
Igor Melnikov
committed
MAGETWO-57271: Modify escapeHtml function to filter not allowed attributes and tags
Modifying function to filter not allowed tags and attributes
1 parent 1630c06 commit 59c2c9e

File tree

2 files changed

+236
-19
lines changed

2 files changed

+236
-19
lines changed

lib/internal/Magento/Framework/Escaper.php

Lines changed: 160 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,25 +16,81 @@ class Escaper
1616
private $escaper;
1717

1818
/**
19-
* Escape HTML entities
19+
* @var \Psr\Log\LoggerInterface
20+
*/
21+
private $logger;
22+
23+
/**
24+
* @var string[]
25+
*/
26+
private $notAllowedTags = ['script', 'img'];
27+
28+
/**
29+
* @var string[]
30+
*/
31+
private $allowedAttributes = ['id', 'class', 'href', 'target', 'title'];
32+
33+
/**
34+
* @var string[]
35+
*/
36+
private $escapeAsUrlAttributes = ['href'];
37+
38+
/**
39+
* Escape string for HTML context, allowedTags will not be escaped
2040
*
2141
* @param string|array $data
2242
* @param array $allowedTags
2343
* @return string|array
2444
*/
25-
public function escapeHtml($data, $allowedTags = null)
45+
public function escapeHtml($data, $allowedTags = [])
2646
{
2747
if (is_array($data)) {
2848
$result = [];
2949
foreach ($data as $item) {
30-
$result[] = $this->escapeHtml($item);
50+
$result[] = $this->escapeHtml($item, $allowedTags);
3151
}
3252
} elseif (strlen($data)) {
3353
if (is_array($allowedTags) && !empty($allowedTags)) {
34-
$allowed = implode('|', $allowedTags);
35-
$result = preg_replace('/<([\/\s\r\n]*)(' . $allowed . ')([\/\s\r\n]*)>/si', '##$1$2$3##', $data);
36-
$result = htmlspecialchars($result, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8', false);
37-
$result = preg_replace('/##([\/\s\r\n]*)(' . $allowed . ')([\/\s\r\n]*)##/si', '<$1$2$3>', $result);
54+
$notAllowedTags = array_intersect(
55+
array_map('strtolower', $allowedTags),
56+
$this->notAllowedTags
57+
);
58+
if (!empty($notAllowedTags)) {
59+
$this->getLogger()->critical(
60+
'The following tag(s) are not allowed: ' . implode(', ', $notAllowedTags)
61+
);
62+
return '';
63+
}
64+
$wrapperElementId = uniqid();
65+
$domDocument = new \DOMDocument('1.0', 'UTF-8');
66+
set_error_handler(
67+
/**
68+
* @SuppressWarnings(PHPMD.UnusedFormalParameter)
69+
*/
70+
function ($errorNumber, $errorString, $errorFile, $errorLine) {
71+
throw new \Exception($errorString, $errorNumber);
72+
}
73+
);
74+
$string = mb_convert_encoding($data, 'HTML-ENTITIES', 'UTF-8');
75+
try {
76+
$domDocument->loadHTML(
77+
'<html><body id="' . $wrapperElementId . '">' . $string . '</body></html>'
78+
);
79+
} catch (\Exception $e) {
80+
restore_error_handler();
81+
$this->getLogger()->critical($e);
82+
return '';
83+
}
84+
restore_error_handler();
85+
86+
$this->removeNotAllowedTags($domDocument, $allowedTags);
87+
$this->removeNotAllowedAttributes($domDocument);
88+
$this->escapeText($domDocument);
89+
$this->escapeAttributeValues($domDocument);
90+
91+
$result = mb_convert_encoding($domDocument->saveHTML(), 'UTF-8', 'HTML-ENTITIES');
92+
preg_match('/<body id="' . $wrapperElementId . '">(.+)<\/body><\/html>$/si', $result, $matches);
93+
return $matches[1];
3894
} else {
3995
$result = htmlspecialchars($data, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8', false);
4096
}
@@ -44,6 +100,88 @@ public function escapeHtml($data, $allowedTags = null)
44100
return $result;
45101
}
46102

103+
/**
104+
* Remove not allowed tags
105+
*
106+
* @param \DOMDocument $domDocument
107+
* @param string[] $allowedTags
108+
* @return void
109+
*/
110+
private function removeNotAllowedTags(\DOMDocument $domDocument, array $allowedTags)
111+
{
112+
$xpath = new \DOMXPath($domDocument);
113+
$nodes = $xpath->query('//node()[name() != \''
114+
. implode('\' and name() != \'', array_merge($allowedTags, ['html', 'body'])) . '\']');
115+
foreach ($nodes as $node) {
116+
if ($node->nodeName != '#text' && $node->nodeName != '#comment') {
117+
$node->parentNode->replaceChild($domDocument->createTextNode($node->textContent), $node);
118+
}
119+
}
120+
}
121+
122+
/**
123+
* Remove not allowed attributes
124+
*
125+
* @param \DOMDocument $domDocument
126+
* @return void
127+
*/
128+
private function removeNotAllowedAttributes(\DOMDocument $domDocument)
129+
{
130+
$xpath = new \DOMXPath($domDocument);
131+
$nodes = $xpath->query(
132+
'//@*[name() != \'' . implode('\' and name() != \'', $this->allowedAttributes) . '\']'
133+
);
134+
foreach ($nodes as $node) {
135+
$node->parentNode->removeAttribute($node->nodeName);
136+
}
137+
}
138+
139+
/**
140+
* Escape text
141+
*
142+
* @param \DOMDocument $domDocument
143+
* @return void
144+
*/
145+
private function escapeText(\DOMDocument $domDocument)
146+
{
147+
$xpath = new \DOMXPath($domDocument);
148+
$nodes = $xpath->query('//text()');
149+
foreach ($nodes as $node) {
150+
$node->textContent = $this->escapeHtml($node->textContent);
151+
}
152+
}
153+
154+
/**
155+
* Escape attribute values
156+
*
157+
* @param \DOMDocument $domDocument
158+
* @return void
159+
*/
160+
private function escapeAttributeValues(\DOMDocument $domDocument)
161+
{
162+
$xpath = new \DOMXPath($domDocument);
163+
$nodes = $xpath->query('//@*');
164+
foreach ($nodes as $node) {
165+
$value = $this->escapeAttributeValue(
166+
$node->nodeName,
167+
$node->parentNode->getAttribute($node->nodeName)
168+
);
169+
$node->parentNode->setAttribute($node->nodeName, $value);
170+
}
171+
}
172+
173+
/**
174+
* Escape attribute value using escapeHtml or escapeUrl
175+
*
176+
* @param string $name
177+
* @param string $value
178+
* @return string
179+
*/
180+
private function escapeAttributeValue($name, $value)
181+
{
182+
return in_array($name, $this->escapeAsUrlAttributes) ? $this->escapeUrl($value) : $this->escapeHtml($value);
183+
}
184+
47185
/**
48186
* Escape a string for the HTML attribute context
49187
*
@@ -172,4 +310,19 @@ private function getEscaper()
172310
}
173311
return $this->escaper;
174312
}
313+
314+
/**
315+
* Get logger
316+
*
317+
* @return \Psr\Log\LoggerInterface
318+
* @deprecated
319+
*/
320+
private function getLogger()
321+
{
322+
if ($this->logger == null) {
323+
$this->logger = \Magento\Framework\App\ObjectManager::getInstance()
324+
->get(\Psr\Log\LoggerInterface::class);
325+
}
326+
return $this->logger;
327+
}
175328
}

lib/internal/Magento/Framework/Test/Unit/EscaperTest.php

Lines changed: 76 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -23,20 +23,42 @@ class EscaperTest extends \PHPUnit_Framework_TestCase
2323
*/
2424
private $zendEscaper;
2525

26+
/**
27+
* @var \Psr\Log\LoggerInterface
28+
*/
29+
private $loggerMock;
30+
2631
protected function setUp()
2732
{
2833
$this->_escaper = new Escaper();
2934
$this->zendEscaper = new \Magento\Framework\ZendEscaper();
35+
$this->loggerMock = $this->getMockForAbstractClass(\Psr\Log\LoggerInterface::class);
3036
$objectManagerHelper = new ObjectManager($this);
3137
$objectManagerHelper->setBackwardCompatibleProperty($this->_escaper, 'escaper', $this->zendEscaper);
38+
$objectManagerHelper->setBackwardCompatibleProperty($this->_escaper, 'logger', $this->loggerMock);
3239
}
3340

3441
/**
3542
* @covers \Magento\Framework\Escaper::escapeHtml
3643
* @dataProvider escapeHtmlDataProvider
3744
*/
38-
public function testEscapeHtml($data, $expected, $allowedTags = null)
45+
public function testEscapeHtml($data, $expected, $allowedTags = [])
46+
{
47+
$actual = $this->_escaper->escapeHtml($data, $allowedTags);
48+
$this->assertEquals($expected, $actual);
49+
}
50+
51+
/**
52+
* @covers \Magento\Framework\Escaper::escapeHtml
53+
*/
54+
public function testEscapeHtmlWithInvalidData()
3955
{
56+
$data = '<span><script>some text in tags</script></span>';
57+
$expected = '';
58+
$allowedTags = ['script', 'span'];
59+
$this->loggerMock->expects($this->once())
60+
->method('critical')
61+
->with('The following tag(s) can not be used in the translation: script');
4062
$actual = $this->_escaper->escapeHtml($data, $allowedTags);
4163
$this->assertEquals($expected, $actual);
4264
}
@@ -47,22 +69,64 @@ public function testEscapeHtml($data, $expected, $allowedTags = null)
4769
public function escapeHtmlDataProvider()
4870
{
4971
return [
50-
'array data' => [
72+
'array -> [text with no tags, text with no allowed tags]' => [
5173
'data' => ['one', '<two>three</two>'],
5274
'expected' => ['one', '&lt;two&gt;three&lt;/two&gt;'],
53-
null,
5475
],
55-
'string data conversion' => [
56-
'data' => '<two>three</two>',
57-
'expected' => '&lt;two&gt;three&lt;/two&gt;',
58-
null,
76+
'text with special characters' => [
77+
'data' => '&<>"\'&amp;&lt;&gt;&quot;&#039;&#9;',
78+
'expected' => '&amp;&lt;&gt;&quot;&#039;&amp;&lt;&gt;&quot;&#039;&#9;'
79+
],
80+
'text with multiple allowed tags, includes self closing tag' => [
81+
'data' => '<span>some text in tags<br /></span>',
82+
'expected' => '<span>some text in tags<br></span>',
83+
'allowedTags' => ['span', 'br'],
5984
],
60-
'string data no conversion' => ['data' => 'one', 'expected' => 'one'],
61-
'string data with allowed tags' => [
62-
'data' => '<span><b>some text in tags</b></span>',
63-
'expected' => '<span><b>some text in tags</b></span>',
85+
'text with multiple allowed tags and allowed attribute in double quotes' => [
86+
'data' => 'Only <span id="sku_max_allowed"><b>2</b></span> in stock',
87+
'expected' => 'Only <span id="sku_max_allowed"><b>2</b></span> in stock',
6488
'allowedTags' => ['span', 'b'],
65-
]
89+
],
90+
'text with multiple allowed tags and allowed attribute in single quotes' => [
91+
'data' => 'Only <span id=\'sku_max_allowed\'><b>2</b></span> in stock',
92+
'expected' => 'Only <span id="sku_max_allowed"><b>2</b></span> in stock',
93+
'allowedTags' => ['span', 'b'],
94+
],
95+
'text with multiple allowed tags with allowed attribute' => [
96+
'data' => 'Only registered users can write reviews. Please <a href="%1">Sign in</a> or <a href="%2">create an account</a>',
97+
'expected' => 'Only registered users can write reviews. Please <a href="%1">Sign in</a> or <a href="%2">create an account</a>',
98+
'allowedTags' => ['a'],
99+
],
100+
'text with not allowed attribute in single quotes' => [
101+
'data' => 'Only <span type=\'1\'><b>2</b></span> in stock',
102+
'expected' => 'Only <span><b>2</b></span> in stock',
103+
'allowedTags' => ['span', 'b'],
104+
],
105+
'text with allowed and not allowed tags' => [
106+
'data' => 'Only registered users can write reviews. Please <a href="%1">Sign in<span>three</span></a> or <a href="%2"><span id="action">create an account</span></a>',
107+
'expected' => 'Only registered users can write reviews. Please <a href="%1">Sign inthree</a> or <a href="%2">create an account</a>',
108+
'allowedTags' => ['a'],
109+
],
110+
'text with allowed and not allowed tags, with allowed and not allowed attributes' => [
111+
'data' => 'Some test <span>text in span tag</span> <strong>text in strong tag</strong> <a type="some-type" href="http://domain.com/" onclick="alert(1)">Click here</a><script>alert(1)</script>',
112+
'expected' => 'Some test <span>text in span tag</span> text in strong tag <a href="http://domain.com/">Click here</a>alert(1)',
113+
'allowedTags' => ['a', 'span'],
114+
],
115+
'text with html comment' => [
116+
'data' => 'Only <span><b>2</b></span> in stock <!-- HTML COMMENT -->',
117+
'expected' => 'Only <span><b>2</b></span> in stock <!-- HTML COMMENT -->',
118+
'allowedTags' => ['span', 'b'],
119+
],
120+
'text with non ascii characters' => [
121+
'data' => ['абвгд', 'مثال'],
122+
'expected' => ['абвгд', 'مثال'],
123+
'allowedTags' => [],
124+
],
125+
'html and body tags' => [
126+
'data' => '<html><body><span>String</span></body></html>',
127+
'expected' => '',
128+
'allowedTags' => ['span'],
129+
],
66130
];
67131
}
68132

0 commit comments

Comments
 (0)