@@ -16,25 +16,81 @@ class Escaper
16
16
private $ escaper ;
17
17
18
18
/**
19
- * Escape HTML entities
19
+ * @var \Psr\Log\LoggerInterface
20
+ */
21
+ private $ logger ;
22
+
23
+ /**
24
+ * @var string[]
25
+ */
26
+ private $ notAllowedTags = ['script ' , 'img ' ];
27
+
28
+ /**
29
+ * @var string[]
30
+ */
31
+ private $ allowedAttributes = ['id ' , 'class ' , 'href ' , 'target ' , 'title ' ];
32
+
33
+ /**
34
+ * @var string[]
35
+ */
36
+ private $ escapeAsUrlAttributes = ['href ' ];
37
+
38
+ /**
39
+ * Escape string for HTML context, allowedTags will not be escaped
20
40
*
21
41
* @param string|array $data
22
42
* @param array $allowedTags
23
43
* @return string|array
24
44
*/
25
- public function escapeHtml ($ data , $ allowedTags = null )
45
+ public function escapeHtml ($ data , $ allowedTags = [] )
26
46
{
27
47
if (is_array ($ data )) {
28
48
$ result = [];
29
49
foreach ($ data as $ item ) {
30
- $ result [] = $ this ->escapeHtml ($ item );
50
+ $ result [] = $ this ->escapeHtml ($ item, $ allowedTags );
31
51
}
32
52
} elseif (strlen ($ data )) {
33
53
if (is_array ($ allowedTags ) && !empty ($ allowedTags )) {
34
- $ allowed = implode ('| ' , $ allowedTags );
35
- $ result = preg_replace ('/<([\/\s\r\n]*)( ' . $ allowed . ')([\/\s\r\n]*)>/si ' , '##$1$2$3## ' , $ data );
36
- $ result = htmlspecialchars ($ result , ENT_QUOTES | ENT_SUBSTITUTE , 'UTF-8 ' , false );
37
- $ result = preg_replace ('/##([\/\s\r\n]*)( ' . $ allowed . ')([\/\s\r\n]*)##/si ' , '<$1$2$3> ' , $ result );
54
+ $ notAllowedTags = array_intersect (
55
+ array_map ('strtolower ' , $ allowedTags ),
56
+ $ this ->notAllowedTags
57
+ );
58
+ if (!empty ($ notAllowedTags )) {
59
+ $ this ->getLogger ()->critical (
60
+ 'The following tag(s) are not allowed: ' . implode (', ' , $ notAllowedTags )
61
+ );
62
+ return '' ;
63
+ }
64
+ $ wrapperElementId = uniqid ();
65
+ $ domDocument = new \DOMDocument ('1.0 ' , 'UTF-8 ' );
66
+ set_error_handler (
67
+ /**
68
+ * @SuppressWarnings(PHPMD.UnusedFormalParameter)
69
+ */
70
+ function ($ errorNumber , $ errorString , $ errorFile , $ errorLine ) {
71
+ throw new \Exception ($ errorString , $ errorNumber );
72
+ }
73
+ );
74
+ $ string = mb_convert_encoding ($ data , 'HTML-ENTITIES ' , 'UTF-8 ' );
75
+ try {
76
+ $ domDocument ->loadHTML (
77
+ '<html><body id=" ' . $ wrapperElementId . '"> ' . $ string . '</body></html> '
78
+ );
79
+ } catch (\Exception $ e ) {
80
+ restore_error_handler ();
81
+ $ this ->getLogger ()->critical ($ e );
82
+ return '' ;
83
+ }
84
+ restore_error_handler ();
85
+
86
+ $ this ->removeNotAllowedTags ($ domDocument , $ allowedTags );
87
+ $ this ->removeNotAllowedAttributes ($ domDocument );
88
+ $ this ->escapeText ($ domDocument );
89
+ $ this ->escapeAttributeValues ($ domDocument );
90
+
91
+ $ result = mb_convert_encoding ($ domDocument ->saveHTML (), 'UTF-8 ' , 'HTML-ENTITIES ' );
92
+ preg_match ('/<body id=" ' . $ wrapperElementId . '">(.+)<\/body><\/html>$/si ' , $ result , $ matches );
93
+ return $ matches [1 ];
38
94
} else {
39
95
$ result = htmlspecialchars ($ data , ENT_QUOTES | ENT_SUBSTITUTE , 'UTF-8 ' , false );
40
96
}
@@ -44,6 +100,88 @@ public function escapeHtml($data, $allowedTags = null)
44
100
return $ result ;
45
101
}
46
102
103
+ /**
104
+ * Remove not allowed tags
105
+ *
106
+ * @param \DOMDocument $domDocument
107
+ * @param string[] $allowedTags
108
+ * @return void
109
+ */
110
+ private function removeNotAllowedTags (\DOMDocument $ domDocument , array $ allowedTags )
111
+ {
112
+ $ xpath = new \DOMXPath ($ domDocument );
113
+ $ nodes = $ xpath ->query ('//node()[name() != \''
114
+ . implode ('\' and name() != \'' , array_merge ($ allowedTags , ['html ' , 'body ' ])) . '\'] ' );
115
+ foreach ($ nodes as $ node ) {
116
+ if ($ node ->nodeName != '#text ' && $ node ->nodeName != '#comment ' ) {
117
+ $ node ->parentNode ->replaceChild ($ domDocument ->createTextNode ($ node ->textContent ), $ node );
118
+ }
119
+ }
120
+ }
121
+
122
+ /**
123
+ * Remove not allowed attributes
124
+ *
125
+ * @param \DOMDocument $domDocument
126
+ * @return void
127
+ */
128
+ private function removeNotAllowedAttributes (\DOMDocument $ domDocument )
129
+ {
130
+ $ xpath = new \DOMXPath ($ domDocument );
131
+ $ nodes = $ xpath ->query (
132
+ '//@*[name() != \'' . implode ('\' and name() != \'' , $ this ->allowedAttributes ) . '\'] '
133
+ );
134
+ foreach ($ nodes as $ node ) {
135
+ $ node ->parentNode ->removeAttribute ($ node ->nodeName );
136
+ }
137
+ }
138
+
139
+ /**
140
+ * Escape text
141
+ *
142
+ * @param \DOMDocument $domDocument
143
+ * @return void
144
+ */
145
+ private function escapeText (\DOMDocument $ domDocument )
146
+ {
147
+ $ xpath = new \DOMXPath ($ domDocument );
148
+ $ nodes = $ xpath ->query ('//text() ' );
149
+ foreach ($ nodes as $ node ) {
150
+ $ node ->textContent = $ this ->escapeHtml ($ node ->textContent );
151
+ }
152
+ }
153
+
154
+ /**
155
+ * Escape attribute values
156
+ *
157
+ * @param \DOMDocument $domDocument
158
+ * @return void
159
+ */
160
+ private function escapeAttributeValues (\DOMDocument $ domDocument )
161
+ {
162
+ $ xpath = new \DOMXPath ($ domDocument );
163
+ $ nodes = $ xpath ->query ('//@* ' );
164
+ foreach ($ nodes as $ node ) {
165
+ $ value = $ this ->escapeAttributeValue (
166
+ $ node ->nodeName ,
167
+ $ node ->parentNode ->getAttribute ($ node ->nodeName )
168
+ );
169
+ $ node ->parentNode ->setAttribute ($ node ->nodeName , $ value );
170
+ }
171
+ }
172
+
173
+ /**
174
+ * Escape attribute value using escapeHtml or escapeUrl
175
+ *
176
+ * @param string $name
177
+ * @param string $value
178
+ * @return string
179
+ */
180
+ private function escapeAttributeValue ($ name , $ value )
181
+ {
182
+ return in_array ($ name , $ this ->escapeAsUrlAttributes ) ? $ this ->escapeUrl ($ value ) : $ this ->escapeHtml ($ value );
183
+ }
184
+
47
185
/**
48
186
* Escape a string for the HTML attribute context
49
187
*
@@ -172,4 +310,19 @@ private function getEscaper()
172
310
}
173
311
return $ this ->escaper ;
174
312
}
313
+
314
+ /**
315
+ * Get logger
316
+ *
317
+ * @return \Psr\Log\LoggerInterface
318
+ * @deprecated
319
+ */
320
+ private function getLogger ()
321
+ {
322
+ if ($ this ->logger == null ) {
323
+ $ this ->logger = \Magento \Framework \App \ObjectManager::getInstance ()
324
+ ->get (\Psr \Log \LoggerInterface::class);
325
+ }
326
+ return $ this ->logger ;
327
+ }
175
328
}
0 commit comments