Skip to content

Commit bcbac42

Browse files
OlivierJaquemetjhy
andauthored
Fix #2422 - Cloned Parser does not reuse TagSet from original (#2423)
--------- Co-authored-by: Jonathan Hedley <[email protected]>
1 parent cc90f72 commit bcbac42

File tree

4 files changed

+27
-0
lines changed

4 files changed

+27
-0
lines changed

CHANGES.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
* Null characters in the HTML body were not consistently removed; and in foreign content were not correctly replaced. [#2395](https://github.com/jhy/jsoup/issues/2395)
2626
* An IndexOutOfBoundsException could be thrown when parsing a body fragment with crafted input. Now logged as a parse error. [#2397](https://github.com/jhy/jsoup/issues/2397), [#2406](https://github.com/jhy/jsoup/issues/2406)
2727
* When using StructuralEvaluators (e.g., a `parent child` selector) across many retained threads, their memoized results could also be retained, increasing memory use. These results are now cleared immediately after use, reducing overall memory consumption. [#2411](https://github.com/jhy/jsoup/issues/2411)
28+
* Cloning a `Parser` now preserves any custom `TagSet` applied to the parser. [#2422](https://github.com/jhy/jsoup/issues/2422), [#2423](https://github.com/jhy/jsoup/pull/2423)
2829
* Custom tags marked as `Tag.Void` now parse and serialize like the built-in void elements: they no longer consume following content, and the XML serializer emits the expected self-closing form. [#2425](https://github.com/jhy/jsoup/issues/2425)
2930

3031
### Internal Changes

src/main/java/org/jsoup/parser/Parser.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ private Parser(Parser copy) {
6060
errors = new ParseErrorList(copy.errors); // only copies size, not contents
6161
settings = new ParseSettings(copy.settings);
6262
trackPosition = copy.trackPosition;
63+
tagSet = new TagSet(copy.tagSet());
6364
}
6465

6566
/**

src/main/java/org/jsoup/parser/TagSet.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ public TagSet() {
4141

4242
public TagSet(TagSet original) {
4343
this.source = original;
44+
if (original.customizers != null)
45+
this.customizers = new ArrayList<>(original.customizers);
4446
}
4547

4648
/**

src/test/java/org/jsoup/parser/ParserTest.java

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,10 @@
99
import java.nio.charset.StandardCharsets;
1010

1111
import static org.junit.jupiter.api.Assertions.assertEquals;
12+
import static org.junit.jupiter.api.Assertions.assertNotNull;
1213
import static org.junit.jupiter.api.Assertions.assertNotSame;
14+
import static org.junit.jupiter.api.Assertions.assertNull;
15+
import static org.junit.jupiter.api.Assertions.assertTrue;
1316

1417
public class ParserTest {
1518

@@ -76,4 +79,24 @@ public void testClone() {
7679
assertEquals(xmlParser.settings().preserveTagCase(), xmlClone.settings().preserveTagCase());
7780
assertEquals(xmlParser.settings().preserveAttributeCase(), xmlClone.settings().preserveAttributeCase());
7881
}
82+
83+
@Test
84+
public void testCloneCopyTagSet() {
85+
Parser parser = Parser.htmlParser();
86+
parser.tagSet().add(new Tag("foo"));
87+
parser.tagSet().onNewTag(tag -> tag.set(Tag.SelfClose));
88+
Parser clone = parser.clone();
89+
90+
// Ensure the tagsets are different instances
91+
assertNotSame(clone.tagSet(), parser.tagSet());
92+
// Check that cloned tagset contains same tag
93+
assertNotNull(clone.tagSet().get("foo", Parser.NamespaceHtml));
94+
// Ensure onNewTag customizers are retained
95+
Tag custom = clone.tagSet().valueOf("qux", Parser.NamespaceHtml);
96+
assertTrue(custom.isSelfClosing());
97+
// Check that cloned tagset uses the original tag as source when original is modified
98+
assertNull(clone.tagSet().get("bar", Parser.NamespaceHtml));
99+
parser.tagSet().add(new Tag("bar"));
100+
assertNotNull(clone.tagSet().get("bar", Parser.NamespaceHtml));
101+
}
79102
}

0 commit comments

Comments
 (0)