Skip to content

Commit d8000be

Browse files
committed
SOLR-11477: Disallow resolving of external entities in Lucene queryparser/xml/CoreParser and SolrCoreParser (defType=xmlparser or {!xmlparser ...}) by default.
(Michael Stepankin, Olga Barinova, Uwe Schindler, Christine Poerschke) This closes #263.
1 parent 2f5ecbc commit d8000be

7 files changed

Lines changed: 160 additions & 16 deletions

File tree

lucene/CHANGES.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,19 @@ http://s.apache.org/luceneversions
55

66
======================= Lucene 5.5.5 =======================
77

8+
Changes in Runtime Behavior
9+
10+
* Resolving of external entities in queryparser/xml/CoreParser is disallowed
11+
by default. See SOLR-11477 for details.
12+
813
Bug Fixes
914

1015
* LUCENE-7419: Fix performance bug with TokenStream.end(), where it would lookup
1116
PositionIncrementAttribute every time. (Mike McCandless, Robert Muir)
1217

18+
* SOLR-11477: Disallow resolving of external entities in queryparser/xml/CoreParser
19+
by default. (Michael Stepankin, Olga Barinova, Uwe Schindler, Christine Poerschke)
20+
1321
======================= Lucene 5.5.4 =======================
1422

1523
Bug Fixes

lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/CoreParser.java

Lines changed: 59 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,19 @@
2222
import org.apache.lucene.search.Query;
2323
import org.w3c.dom.Document;
2424
import org.w3c.dom.Element;
25+
import org.xml.sax.EntityResolver;
26+
import org.xml.sax.ErrorHandler;
27+
import org.xml.sax.InputSource;
28+
import org.xml.sax.SAXException;
2529

30+
import javax.xml.XMLConstants;
2631
import javax.xml.parsers.DocumentBuilder;
2732
import javax.xml.parsers.DocumentBuilderFactory;
33+
import javax.xml.parsers.ParserConfigurationException;
2834

35+
import java.io.IOException;
2936
import java.io.InputStream;
37+
import java.util.Locale;
3038

3139
/**
3240
* Assembles a QueryBuilder which uses only core Lucene Query objects
@@ -118,6 +126,10 @@ protected CoreParser(String defaultField, Analyzer analyzer, QueryParser parser)
118126
queryFactory.addBuilder("SpanNot", snot);
119127
}
120128

129+
/**
130+
* Parses the given stream as XML file and returns a {@link Query}.
131+
* By default this disallows external entities for security reasons.
132+
*/
121133
public Query parse(InputStream xmlStream) throws ParserException {
122134
return getQuery(parseXML(xmlStream).getDocumentElement());
123135
}
@@ -130,28 +142,63 @@ public void addFilterBuilder(String nodeName, FilterBuilder builder) {
130142
filterFactory.addBuilder(nodeName, builder);
131143
}
132144

133-
private static Document parseXML(InputStream pXmlFile) throws ParserException {
134-
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
135-
DocumentBuilder db = null;
145+
/**
146+
* Returns a SAX {@link EntityResolver} to be used by {@link DocumentBuilder}.
147+
* By default this returns {@link #DISALLOW_EXTERNAL_ENTITY_RESOLVER}, which disallows the
148+
* expansion of external entities (for security reasons). To restore legacy behavior,
149+
* override this method to return {@code null}.
150+
*/
151+
protected EntityResolver getEntityResolver() {
152+
return DISALLOW_EXTERNAL_ENTITY_RESOLVER;
153+
}
154+
155+
/**
156+
* Subclass and override to return a SAX {@link ErrorHandler} to be used by {@link DocumentBuilder}.
157+
* By default this returns {@code null} so no error handler is used.
158+
* This method can be used to redirect XML parse errors/warnings to a custom logger.
159+
*/
160+
protected ErrorHandler getErrorHandler() {
161+
return null;
162+
}
163+
164+
private Document parseXML(InputStream pXmlFile) throws ParserException {
165+
final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
166+
dbf.setValidating(false);
136167
try {
137-
db = dbf.newDocumentBuilder();
138-
}
139-
catch (Exception se) {
140-
throw new ParserException("XML Parser configuration error", se);
168+
dbf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
169+
} catch (ParserConfigurationException e) {
170+
// ignore since all implementations are required to support the
171+
// {@link javax.xml.XMLConstants#FEATURE_SECURE_PROCESSING} feature
141172
}
142-
org.w3c.dom.Document doc = null;
173+
final DocumentBuilder db;
143174
try {
144-
doc = db.parse(pXmlFile);
175+
db = dbf.newDocumentBuilder();
176+
} catch (Exception se) {
177+
throw new ParserException("XML Parser configuration error.", se);
145178
}
146-
catch (Exception se) {
147-
throw new ParserException("Error parsing XML stream:" + se, se);
179+
try {
180+
db.setEntityResolver(getEntityResolver());
181+
db.setErrorHandler(getErrorHandler());
182+
return db.parse(pXmlFile);
183+
} catch (Exception se) {
184+
throw new ParserException("Error parsing XML stream: " + se, se);
148185
}
149-
return doc;
150186
}
151187

152188

153189
@Override
154190
public Query getQuery(Element e) throws ParserException {
155191
return queryFactory.getQuery(e);
156192
}
193+
194+
public static final EntityResolver DISALLOW_EXTERNAL_ENTITY_RESOLVER =
195+
new EntityResolver() {
196+
@Override
197+
public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
198+
throw new SAXException(String.format(Locale.ENGLISH,
199+
"External Entity resolving unsupported: publicId=\"%s\" systemId=\"%s\"",
200+
publicId, systemId));
201+
}
202+
};
203+
157204
}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<!DOCTYPE TermQuery SYSTEM "foo://bar.xyz/mydtd">
3+
<!--
4+
Licensed to the Apache Software Foundation (ASF) under one or more
5+
contributor license agreements. See the NOTICE file distributed with
6+
this work for additional information regarding copyright ownership.
7+
The ASF licenses this file to You under the Apache License, Version 2.0
8+
(the "License"); you may not use this file except in compliance with
9+
the License. You may obtain a copy of the License at
10+
11+
http://www.apache.org/licenses/LICENSE-2.0
12+
13+
Unless required by applicable law or agreed to in writing, software
14+
distributed under the License is distributed on an "AS IS" BASIS,
15+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
See the License for the specific language governing permissions and
17+
limitations under the License.
18+
-->
19+
<TermQuery fieldName="contents">sumitomo</TermQuery>
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<!DOCTYPE TermQuery [
3+
<!ENTITY internalTerm "sumitomo">
4+
<!ENTITY externalTerm SYSTEM "foo://bar.xyz/external">
5+
<!ENTITY % myParameterEntity "foo://bar.xyz/param">
6+
]>
7+
<!--
8+
Licensed to the Apache Software Foundation (ASF) under one or more
9+
contributor license agreements. See the NOTICE file distributed with
10+
this work for additional information regarding copyright ownership.
11+
The ASF licenses this file to You under the Apache License, Version 2.0
12+
(the "License"); you may not use this file except in compliance with
13+
the License. You may obtain a copy of the License at
14+
15+
http://www.apache.org/licenses/LICENSE-2.0
16+
17+
Unless required by applicable law or agreed to in writing, software
18+
distributed under the License is distributed on an "AS IS" BASIS,
19+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20+
See the License for the specific language governing permissions and
21+
limitations under the License.
22+
-->
23+
<TermQuery fieldName="contents">&internalTerm;&externalTerm;</TermQuery>

lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/TestCoreParser.java

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
import org.apache.lucene.document.Document;
2424
import org.apache.lucene.document.Field;
2525
import org.apache.lucene.document.IntField;
26-
import org.apache.lucene.index.LeafReaderContext;
2726
import org.apache.lucene.index.DirectoryReader;
2827
import org.apache.lucene.index.IndexReader;
2928
import org.apache.lucene.index.IndexWriter;
@@ -35,15 +34,13 @@
3534
import org.apache.lucene.store.Directory;
3635
import org.apache.lucene.util.LuceneTestCase;
3736
import org.junit.AfterClass;
38-
import org.junit.Assume;
3937
import org.junit.BeforeClass;
4038

4139
import java.io.BufferedReader;
4240
import java.io.IOException;
4341
import java.io.InputStream;
4442
import java.io.InputStreamReader;
4543
import java.nio.charset.StandardCharsets;
46-
import java.util.List;
4744

4845

4946
public class TestCoreParser extends LuceneTestCase {
@@ -101,6 +98,32 @@ public void testSimpleXML() throws ParserException, IOException {
10198
dumpResults("TermQuery", q, 5);
10299
}
103100

101+
public void test_DOCTYPE_TermQueryXML() throws ParserException, IOException {
102+
try {
103+
parse("DOCTYPE_TermQuery.xml");
104+
fail("should have thrown a ParserException!");
105+
}
106+
catch (ParserException saxe) {
107+
assertTrue(saxe.getMessage().equals(
108+
"Error parsing XML stream: org.xml.sax.SAXException: External Entity resolving unsupported: publicId=\"null\" systemId=\"foo://bar.xyz/mydtd\""));
109+
return;
110+
}
111+
fail("should have thrown a ParserException!");
112+
}
113+
114+
public void test_ENTITY_TermQueryXML() throws ParserException, IOException {
115+
try {
116+
parse("ENTITY_TermQuery.xml");
117+
fail("should have thrown a ParserException!");
118+
}
119+
catch (ParserException saxe) {
120+
assertTrue(saxe.getMessage().equals(
121+
"Error parsing XML stream: org.xml.sax.SAXException: External Entity resolving unsupported: publicId=\"null\" systemId=\"foo://bar.xyz/external\""));
122+
return;
123+
}
124+
fail("should have thrown a ParserException!");
125+
}
126+
104127
public void testSimpleTermsQueryXML() throws ParserException, IOException {
105128
Query q = parse("TermsQuery.xml");
106129
dumpResults("TermsQuery", q, 5);

solr/CHANGES.txt

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,20 @@ Apache UIMA 2.3.1
2121
Apache ZooKeeper 3.4.6
2222
Jetty 9.2.13.v20150730
2323

24+
Upgrade Notes
25+
----------------------
26+
27+
* SOLR-11477: in the XML query parser (defType=xmlparser or {!xmlparser ... })
28+
the resolving of external entities is now disallowed by default.
29+
2430
Bug Fixes
2531
----------------------
2632

27-
* SOLR-10420: Solr 6.x leaking one SolrZkClient instance per second (Scott Blum, Cao Manh Dat, Markus Jelsma, Steve Rowe)
33+
* SOLR-10420: Leaking one SolrZkClient instance per second (Scott Blum, Cao Manh Dat, Markus Jelsma, Steve Rowe)
34+
35+
* SOLR-11477: Disallow resolving of external entities in the XML query parser (defType=xmlparser).
36+
(Michael Stepankin, Olga Barinova, Uwe Schindler, Christine Poerschke)
37+
2838

2939
======================= 5.5.4 =======================
3040

solr/core/src/java/org/apache/solr/search/SolrCoreParser.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,17 +16,26 @@
1616
*/
1717
package org.apache.solr.search;
1818

19+
import java.lang.invoke.MethodHandles;
20+
1921
import org.apache.lucene.analysis.Analyzer;
2022
import org.apache.lucene.queryparser.xml.CoreParser;
2123

2224
import org.apache.solr.request.SolrQueryRequest;
25+
import org.apache.solr.common.util.XMLErrorLogger;
26+
import org.slf4j.Logger;
27+
import org.slf4j.LoggerFactory;
28+
import org.xml.sax.ErrorHandler;
2329

2430
/**
2531
* Assembles a QueryBuilder which uses Query objects from Solr's <code>search</code> module
2632
* in addition to Query objects supported by the Lucene <code>CoreParser</code>.
2733
*/
2834
public class SolrCoreParser extends CoreParser {
2935

36+
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
37+
private static final XMLErrorLogger xmllog = new XMLErrorLogger(log);
38+
3039
public SolrCoreParser(String defaultField, Analyzer analyzer,
3140
SolrQueryRequest req) {
3241
super(defaultField, analyzer);
@@ -35,4 +44,9 @@ public SolrCoreParser(String defaultField, Analyzer analyzer,
3544
// lucene_parser.addQueryBuilder("SomeOtherQuery", new SomeOtherQueryBuilder(schema));
3645
}
3746

47+
@Override
48+
protected ErrorHandler getErrorHandler() {
49+
return xmllog;
50+
}
51+
3852
}

0 commit comments

Comments
 (0)