Skip to content

Commit a856d1e

Browse files
authored
fix: JRuby NONET bypass in XML::Schema (v1.19.x) (#3639)
**What problem is this PR intended to solve?** [JRuby] `XML::Schema` now enforces the `NONET` parse option, which Nokogiri enables by default. It was not enforced on JRuby, so a schema parsed with default options could still fetch external resources over the network, potentially enabling SSRF or XXE attacks and bypassing the mitigation for CVE-2020-26247. See [GHSA-8678-w3jw-xfc2](GHSA-8678-w3jw-xfc2) for more information. **Have you included adequate test coverage?** Yes. **Does this change affect the behavior of either the C or the Java implementations?** Java only — this fixes the JRuby (Java) implementation. CRuby is not affected, because libxml2's `xmlNoNetExternalEntityLoader` already blocks all network schemes regardless of scheme or case.
2 parents 6a0aa1e + f658a54 commit a856d1e

8 files changed

Lines changed: 401 additions & 175 deletions

File tree

ext/java/nokogiri/XmlSchema.java

Lines changed: 91 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
import java.io.Reader;
99
import java.io.StringReader;
1010

11+
import java.net.URI;
12+
import java.net.URISyntaxException;
13+
1114
import javax.xml.XMLConstants;
1215
import javax.xml.transform.Source;
1316
import javax.xml.transform.dom.DOMSource;
@@ -285,24 +288,103 @@ private class SchemaResourceResolver implements LSResourceResolver
285288
String systemId,
286289
String baseURI)
287290
{
288-
if (noNet && systemId != null && (systemId.startsWith("http://") || systemId.startsWith("ftp://"))) {
289-
if (systemId.startsWith(XMLConstants.W3C_XML_SCHEMA_NS_URI)) {
290-
return null; // use default resolver
291-
}
291+
if (noNet && !effectiveResourceIsLocal(systemId, baseURI)) {
292292
try {
293293
this.errorHandler.warning(new SAXParseException(String.format("Attempt to load network entity '%s'", systemId), null));
294294
} catch (SAXException ex) {
295295
}
296-
} else {
297-
String adjusted = adjustSystemIdIfNecessary(currentDir, scriptFileName, baseURI, systemId);
298-
lsInput.setPublicId(publicId);
299-
lsInput.setSystemId(adjusted != null ? adjusted : systemId);
300-
lsInput.setBaseURI(baseURI);
296+
return new SchemaLSInput(); // an empty input blocks the fetch
301297
}
298+
299+
String adjusted = adjustSystemIdIfNecessary(currentDir, scriptFileName, baseURI, systemId);
300+
lsInput.setPublicId(publicId);
301+
lsInput.setSystemId(adjusted != null ? adjusted : systemId);
302+
lsInput.setBaseURI(baseURI);
302303
return lsInput;
303304
}
304305
}
305306

307+
// We enforce NONET for schema resolution by hand because Xerces-J (the JAXP implementation
308+
// backing XML::Schema on JRuby) does not implement the standard JAXP property
309+
// XMLConstants.ACCESS_EXTERNAL_SCHEMA — so we cannot simply restrict external access on the
310+
// SchemaFactory and must classify each resolved resource in the LSResourceResolver instead.
311+
//
312+
// Decides whether a schema-import resource may be resolved while NONET is on: true means
313+
// local (allowed), false means a network resource (blocked). A relative systemId inherits
314+
// its document's base, so it is resolved against baseURI before classification — a relative
315+
// import under a remote base is a network fetch even though the systemId alone looks local.
316+
private static boolean
317+
effectiveResourceIsLocal(String systemId, String baseURI)
318+
{
319+
// a null systemId means there is nothing external to resolve
320+
if (systemId == null) {
321+
return true;
322+
}
323+
try {
324+
URI uri = new URI(systemId);
325+
if (baseURI != null && !baseURI.isEmpty()) {
326+
uri = new URI(baseURI).resolve(uri);
327+
}
328+
return isLocalResource(uri);
329+
} catch (URISyntaxException | IllegalArgumentException e) {
330+
// fail closed: an unparseable base or systemId (e.g. a raw UNC path "\\host\share") is
331+
// not provably local, and the JVM's file/URL handling may still reach the network
332+
return false;
333+
}
334+
}
335+
336+
// Test seam for the Ruby suite: local_resource?(systemId, baseURI = nil).
337+
@JRubyMethod(meta = true, name = "local_resource?", required = 1, optional = 1, visibility = Visibility.PRIVATE)
338+
public static IRubyObject
339+
local_resource_eh(ThreadContext context, IRubyObject klazz, IRubyObject[] args)
340+
{
341+
String systemId = args[0].isNil() ? null : args[0].asJavaString();
342+
String baseURI = (args.length > 1 && !args[1].isNil()) ? args[1].asJavaString() : null;
343+
return context.runtime.newBoolean(effectiveResourceIsLocal(systemId, baseURI));
344+
}
345+
346+
// Classifies an already-parsed URI. Local is a missing scheme, or the "file" scheme, with
347+
// no remote authority and no UNC-shaped path. This is intentionally stricter than libxml2's
348+
// xmlNoNetExternalEntityLoader, which folds a remote host (file://host/...) into a local
349+
// path rather than rejecting it.
350+
//
351+
// TODO: a Windows drive-letter path like "C:\path" parses as scheme "c" and would be
352+
// blocked; support those if we need it later.
353+
private static boolean
354+
isLocalResource(URI uri)
355+
{
356+
// only a missing scheme (a relative or absolute path) or file: can be local; any
357+
// other scheme is a network resource
358+
String scheme = uri.getScheme();
359+
if (scheme != null && !scheme.equalsIgnoreCase("file")) {
360+
return false;
361+
}
362+
363+
// an opaque "file:" URI (e.g. file:foo, with no "//") is not a usable local path; reject
364+
// it, matching libxml2, which does not resolve that form as a local file either
365+
if (uri.isOpaque()) {
366+
return false;
367+
}
368+
369+
// a non-empty, non-localhost authority is a remote host — file://host/path, or the
370+
// schemeless network-path form //host/path. Stricter than libxml2, which folds such a
371+
// host into a (failing) local path.
372+
String authority = uri.getRawAuthority();
373+
if (authority != null && !authority.isEmpty() && !authority.equalsIgnoreCase("localhost")) {
374+
return false;
375+
}
376+
377+
// reject UNC-shaped paths even under an allowed authority: file:////host/share,
378+
// file://localhost//host/share, and %2f/%5c-encoded variants. getPath() is decoded, so
379+
// the encoded forms are normalized before this check.
380+
String path = uri.getPath();
381+
if (path != null && (path.startsWith("//") || path.indexOf('\\') >= 0)) {
382+
return false;
383+
}
384+
385+
return true;
386+
}
387+
306388
private class SchemaLSInput implements LSInput
307389
{
308390
protected String fPublicId;

ext/nokogiri/nokogiri.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -203,9 +203,6 @@ Init_nokogiri(void)
203203
rb_const_set(mNokogiri, rb_intern("LIBXSLT_COMPILED_VERSION"), NOKOGIRI_STR_NEW2(LIBXSLT_DOTTED_VERSION));
204204
rb_const_set(mNokogiri, rb_intern("LIBXSLT_LOADED_VERSION"), NOKOGIRI_STR_NEW2(xsltEngineVersion));
205205

206-
rb_const_set(mNokogiri, rb_intern("LIBXML_ZLIB_ENABLED"),
207-
xmlHasFeature(XML_WITH_ZLIB) == 1 ? Qtrue : Qfalse);
208-
209206
#ifdef NOKOGIRI_PACKAGED_LIBRARIES
210207
rb_const_set(mNokogiri, rb_intern("PACKAGED_LIBRARIES"), Qtrue);
211208
# ifdef NOKOGIRI_PRECOMPILED_LIBRARIES
@@ -228,6 +225,12 @@ Init_nokogiri(void)
228225
rb_const_set(mNokogiri, rb_intern("LIBXML_ICONV_ENABLED"), Qfalse);
229226
#endif
230227

228+
rb_const_set(mNokogiri, rb_intern("LIBXML_ZLIB_ENABLED"),
229+
xmlHasFeature(XML_WITH_ZLIB) == 1 ? Qtrue : Qfalse);
230+
231+
rb_const_set(mNokogiri, rb_intern("LIBXML_HTTP_ENABLED"),
232+
xmlHasFeature(XML_WITH_HTTP) == 1 ? Qtrue : Qfalse);
233+
231234
#ifdef NOKOGIRI_OTHER_LIBRARY_VERSIONS
232235
rb_const_set(mNokogiri, rb_intern("OTHER_LIBRARY_VERSIONS"), NOKOGIRI_STR_NEW2(NOKOGIRI_OTHER_LIBRARY_VERSIONS));
233236
#endif

lib/nokogiri/version/info.rb

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,14 @@ def libxml2_has_iconv?
5353
defined?(Nokogiri::LIBXML_ICONV_ENABLED) && Nokogiri::LIBXML_ICONV_ENABLED
5454
end
5555

56+
def libxml2_has_zlib?
57+
defined?(Nokogiri::LIBXML_ZLIB_ENABLED) && Nokogiri::LIBXML_ZLIB_ENABLED
58+
end
59+
60+
def libxml2_has_http?
61+
defined?(Nokogiri::LIBXML_HTTP_ENABLED) && Nokogiri::LIBXML_HTTP_ENABLED
62+
end
63+
5664
def libxslt_has_datetime?
5765
defined?(Nokogiri::LIBXSLT_DATETIME_ENABLED) && Nokogiri::LIBXSLT_DATETIME_ENABLED
5866
end
@@ -145,6 +153,8 @@ def to_hash
145153
end
146154
libxml["memory_management"] = Nokogiri::LIBXML_MEMORY_MANAGEMENT
147155
libxml["iconv_enabled"] = libxml2_has_iconv?
156+
libxml["zlib_enabled"] = libxml2_has_zlib?
157+
libxml["http_enabled"] = libxml2_has_http?
148158
libxml["compiled"] = compiled_libxml_version.to_s
149159
libxml["loaded"] = loaded_libxml_version.to_s
150160
end

test/helper.rb

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
require "nokogiri"
2323

2424
require_relative "helpers/memory_debugger"
25+
require_relative "helpers/mock_server"
2526

2627
if ENV["TEST_NOKOGIRI_WITH_LIBXML_RUBY"]
2728
#
@@ -103,6 +104,31 @@ def file_url_for(path)
103104
path_with_leading_slash = path.start_with?("/") ? path : "/#{path}"
104105
"file://#{path_with_leading_slash}"
105106
end
107+
108+
def assert_network_connection(scheme: "http", path: "/making-a-request", &block)
109+
assert tcp_connection_attempted?(scheme:, path:, &block), "should open a network connection"
110+
end
111+
112+
def refute_network_connection(scheme: "http", path: "/making-a-request", &block)
113+
refute tcp_connection_attempted?(scheme:, path:, &block), "should not open a network connection"
114+
end
115+
116+
def ignoring_syntax_errors
117+
yield
118+
rescue Nokogiri::XML::SyntaxError
119+
end
120+
121+
def tcp_connection_attempted?(scheme: "http", path: "/making-a-request")
122+
flunk("MockServer is not supported on this platform; the calling test must skip explicitly") unless MockServer.supported?
123+
124+
listener = MockServer.listener_class.new
125+
begin
126+
yield("#{scheme}://127.0.0.1:#{listener.port}#{path}")
127+
ensure
128+
connections = listener.stop
129+
end
130+
connections.positive?
131+
end
106132
end
107133

108134
class TestBenchmark < Minitest::BenchSpec

test/helpers/mock_server.rb

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
# frozen_string_literal: true
2+
3+
require "socket"
4+
5+
require_relative "memory_debugger"
6+
7+
module Nokogiri
8+
module MockServer
9+
def self.listener_class
10+
Nokogiri.jruby? ? ThreadListener : ProcessListener
11+
end
12+
13+
def self.supported?
14+
listener_class.supported?
15+
end
16+
17+
# Accepts and immediately closes TCP connections on an ephemeral port,
18+
# counting them. Runs in a forked child process because on CRuby a parse
19+
# that accesses the network blocks in a C call that holds the GVL, so an
20+
# in-process listener thread would be starved and could never accept the
21+
# connection. The child writes one byte per connection to a pipe, and the
22+
# parent counts the bytes after reaping the child.
23+
class ProcessListener
24+
# Process.fork rules out platforms like Windows, and we also avoid
25+
# forking under memory debuggers, where children are traced too.
26+
def self.supported?
27+
Process.respond_to?(:fork) && !MemoryDebugger.active?
28+
end
29+
30+
attr_reader :port
31+
32+
def initialize
33+
server = TCPServer.new("127.0.0.1", 0)
34+
@port = server.addr[1]
35+
@reader, writer = IO.pipe
36+
@pid = Process.fork do
37+
@reader.close
38+
writer.sync = true
39+
loop do
40+
client = server.accept
41+
writer.write(".")
42+
client.close
43+
end
44+
end
45+
writer.close
46+
server.close
47+
end
48+
49+
def stop
50+
Process.kill(:TERM, @pid)
51+
Process.wait(@pid)
52+
@reader.read.length
53+
ensure
54+
@reader.close
55+
end
56+
end
57+
58+
# Same API as ProcessListener, but runs in an in-process thread. JRuby has
59+
# no GVL, so the listener thread can accept connections while a parse is
60+
# blocked, and avoiding process spawn keeps the tests fast on the JVM.
61+
class ThreadListener
62+
def self.supported?
63+
true
64+
end
65+
66+
attr_reader :port
67+
68+
def initialize
69+
@server = TCPServer.new("127.0.0.1", 0)
70+
@port = @server.addr[1]
71+
@connections = 0
72+
@thread = Thread.new do
73+
loop do
74+
client = @server.accept
75+
@connections += 1
76+
client.close
77+
end
78+
rescue IOError, Errno::EBADF
79+
end
80+
end
81+
82+
def stop
83+
@server.close
84+
@thread.join
85+
@connections
86+
end
87+
end
88+
end
89+
end

test/test_version.rb

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ def test_version_info_for_libxml
6969
assert_equal("#{major}.#{minor}.#{bug}", Nokogiri::VERSION_INFO["libxml"]["loaded"])
7070

7171
assert(version_info["libxml"].key?("iconv_enabled"))
72+
assert(version_info["libxml"].key?("zlib_enabled"))
73+
assert(version_info["libxml"].key?("http_enabled"))
7274
assert(version_info["libxslt"].key?("datetime_enabled"))
7375
end
7476

0 commit comments

Comments
 (0)