|
8 | 8 | import java.io.Reader; |
9 | 9 | import java.io.StringReader; |
10 | 10 |
|
| 11 | +import java.net.URI; |
| 12 | +import java.net.URISyntaxException; |
| 13 | + |
11 | 14 | import javax.xml.XMLConstants; |
12 | 15 | import javax.xml.transform.Source; |
13 | 16 | import javax.xml.transform.dom.DOMSource; |
@@ -285,24 +288,103 @@ private class SchemaResourceResolver implements LSResourceResolver |
285 | 288 | String systemId, |
286 | 289 | String baseURI) |
287 | 290 | { |
288 | | - if (noNet && systemId != null && (systemId.startsWith("http://") || systemId.startsWith("ftp://"))) { |
289 | | - if (systemId.startsWith(XMLConstants.W3C_XML_SCHEMA_NS_URI)) { |
290 | | - return null; // use default resolver |
291 | | - } |
| 291 | + if (noNet && !effectiveResourceIsLocal(systemId, baseURI)) { |
292 | 292 | try { |
293 | 293 | this.errorHandler.warning(new SAXParseException(String.format("Attempt to load network entity '%s'", systemId), null)); |
294 | 294 | } catch (SAXException ex) { |
295 | 295 | } |
296 | | - } else { |
297 | | - String adjusted = adjustSystemIdIfNecessary(currentDir, scriptFileName, baseURI, systemId); |
298 | | - lsInput.setPublicId(publicId); |
299 | | - lsInput.setSystemId(adjusted != null ? adjusted : systemId); |
300 | | - lsInput.setBaseURI(baseURI); |
| 296 | + return new SchemaLSInput(); // an empty input blocks the fetch |
301 | 297 | } |
| 298 | + |
| 299 | + String adjusted = adjustSystemIdIfNecessary(currentDir, scriptFileName, baseURI, systemId); |
| 300 | + lsInput.setPublicId(publicId); |
| 301 | + lsInput.setSystemId(adjusted != null ? adjusted : systemId); |
| 302 | + lsInput.setBaseURI(baseURI); |
302 | 303 | return lsInput; |
303 | 304 | } |
304 | 305 | } |
305 | 306 |
|
| 307 | + // We enforce NONET for schema resolution by hand because Xerces-J (the JAXP implementation |
| 308 | + // backing XML::Schema on JRuby) does not implement the standard JAXP property |
| 309 | + // XMLConstants.ACCESS_EXTERNAL_SCHEMA — so we cannot simply restrict external access on the |
| 310 | + // SchemaFactory and must classify each resolved resource in the LSResourceResolver instead. |
| 311 | + // |
| 312 | + // Decides whether a schema-import resource may be resolved while NONET is on: true means |
| 313 | + // local (allowed), false means a network resource (blocked). A relative systemId inherits |
| 314 | + // its document's base, so it is resolved against baseURI before classification — a relative |
| 315 | + // import under a remote base is a network fetch even though the systemId alone looks local. |
| 316 | + private static boolean |
| 317 | + effectiveResourceIsLocal(String systemId, String baseURI) |
| 318 | + { |
| 319 | + // a null systemId means there is nothing external to resolve |
| 320 | + if (systemId == null) { |
| 321 | + return true; |
| 322 | + } |
| 323 | + try { |
| 324 | + URI uri = new URI(systemId); |
| 325 | + if (baseURI != null && !baseURI.isEmpty()) { |
| 326 | + uri = new URI(baseURI).resolve(uri); |
| 327 | + } |
| 328 | + return isLocalResource(uri); |
| 329 | + } catch (URISyntaxException | IllegalArgumentException e) { |
| 330 | + // fail closed: an unparseable base or systemId (e.g. a raw UNC path "\\host\share") is |
| 331 | + // not provably local, and the JVM's file/URL handling may still reach the network |
| 332 | + return false; |
| 333 | + } |
| 334 | + } |
| 335 | + |
| 336 | + // Test seam for the Ruby suite: local_resource?(systemId, baseURI = nil). |
| 337 | + @JRubyMethod(meta = true, name = "local_resource?", required = 1, optional = 1, visibility = Visibility.PRIVATE) |
| 338 | + public static IRubyObject |
| 339 | + local_resource_eh(ThreadContext context, IRubyObject klazz, IRubyObject[] args) |
| 340 | + { |
| 341 | + String systemId = args[0].isNil() ? null : args[0].asJavaString(); |
| 342 | + String baseURI = (args.length > 1 && !args[1].isNil()) ? args[1].asJavaString() : null; |
| 343 | + return context.runtime.newBoolean(effectiveResourceIsLocal(systemId, baseURI)); |
| 344 | + } |
| 345 | + |
| 346 | + // Classifies an already-parsed URI. Local is a missing scheme, or the "file" scheme, with |
| 347 | + // no remote authority and no UNC-shaped path. This is intentionally stricter than libxml2's |
| 348 | + // xmlNoNetExternalEntityLoader, which folds a remote host (file://host/...) into a local |
| 349 | + // path rather than rejecting it. |
| 350 | + // |
| 351 | + // TODO: a Windows drive-letter path like "C:\path" parses as scheme "c" and would be |
| 352 | + // blocked; support those if we need it later. |
| 353 | + private static boolean |
| 354 | + isLocalResource(URI uri) |
| 355 | + { |
| 356 | + // only a missing scheme (a relative or absolute path) or file: can be local; any |
| 357 | + // other scheme is a network resource |
| 358 | + String scheme = uri.getScheme(); |
| 359 | + if (scheme != null && !scheme.equalsIgnoreCase("file")) { |
| 360 | + return false; |
| 361 | + } |
| 362 | + |
| 363 | + // an opaque "file:" URI (e.g. file:foo, with no "//") is not a usable local path; reject |
| 364 | + // it, matching libxml2, which does not resolve that form as a local file either |
| 365 | + if (uri.isOpaque()) { |
| 366 | + return false; |
| 367 | + } |
| 368 | + |
| 369 | + // a non-empty, non-localhost authority is a remote host — file://host/path, or the |
| 370 | + // schemeless network-path form //host/path. Stricter than libxml2, which folds such a |
| 371 | + // host into a (failing) local path. |
| 372 | + String authority = uri.getRawAuthority(); |
| 373 | + if (authority != null && !authority.isEmpty() && !authority.equalsIgnoreCase("localhost")) { |
| 374 | + return false; |
| 375 | + } |
| 376 | + |
| 377 | + // reject UNC-shaped paths even under an allowed authority: file:////host/share, |
| 378 | + // file://localhost//host/share, and %2f/%5c-encoded variants. getPath() is decoded, so |
| 379 | + // the encoded forms are normalized before this check. |
| 380 | + String path = uri.getPath(); |
| 381 | + if (path != null && (path.startsWith("//") || path.indexOf('\\') >= 0)) { |
| 382 | + return false; |
| 383 | + } |
| 384 | + |
| 385 | + return true; |
| 386 | + } |
| 387 | + |
306 | 388 | private class SchemaLSInput implements LSInput |
307 | 389 | { |
308 | 390 | protected String fPublicId; |
|
0 commit comments