Skip to content

Commit 74a05a9

Browse files
committed
Allow contexts to be HTML documents, with preference towards script elements of type application/ld+json;profile=http://www.w3.org/ns/json-ld#context`.
For w3c/json-ld-syntax#66.
1 parent 1e99447 commit 74a05a9

File tree

3 files changed

+153
-21
lines changed

3 files changed

+153
-21
lines changed

lib/json/ld/api.rb

Lines changed: 41 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -384,9 +384,12 @@ def self.frame(input, frame, expanded: false, **options)
384384
when IO, StringIO then MultiJson.load(frame.read)
385385
when String
386386
remote_doc = options[:documentLoader].call(frame)
387-
case remote_doc.document
388-
when String then MultiJson.load(remote_doc.document)
389-
else remote_doc.document
387+
if remote_doc.content_type == 'text/html'
388+
load_html(remote_doc.document, url: context.to_s, profile: 'http://www.w3.org/ns/json-ld#frame')
389+
elsif remote_doc.document.is_a?(String)
390+
MultiJson.load(remote_doc.document)
391+
else
392+
remote_doc.document
390393
end
391394
end
392395

@@ -587,22 +590,22 @@ class << self
587590
alias :fromRDF :fromRdf
588591
end
589592

590-
private
591-
def validate_input(input, url:)
592-
return unless defined?(JsonLint)
593-
jsonlint = JsonLint::Linter.new
594-
input = StringIO.new(input) unless input.respond_to?(:read)
595-
unless jsonlint.check_stream(input)
596-
raise JsonLdError::LoadingDocumentFailed, "url: #{url}\n" + jsonlint.errors[''].join("\n")
597-
end
598-
input.rewind
599-
end
600-
601593
##
602594
# Load one or more script tags from an HTML source.
603595
# Unescapes and uncomments input, returns the internal representation
604596
# Yields document base
605-
def load_html(input, url:, library: nil, extractAllScripts: false, **options)
597+
# @param [String] input
598+
# @param [String] url Original URL
599+
# @param [:nokogiri, :rexml] library (nil)
600+
# @param [Boolean] extractAllScripts (false)
601+
# @param [Boolean] profile (nil) Optional priortized profile when loading a single script by type.
602+
# @param [Hash{Symbol => Object}] options
603+
def load_html(input, url:,
604+
library: nil,
605+
extractAllScripts: false,
606+
profile: nil,
607+
**options)
608+
606609
if input.is_a?(String)
607610
library ||= begin
608611
require 'nokogiri'
@@ -647,7 +650,8 @@ def load_html(input, url:, library: nil, extractAllScripts: false, **options)
647650
MultiJson.load(content, options)
648651
elsif extractAllScripts
649652
res = []
650-
input.xpath("//script[starts-with(@type, 'application/ld+json')]").each do |element|
653+
elements = input.xpath("//script[starts-with(@type, 'application/ld+json')]")
654+
elements.each do |element|
651655
content = element.inner_html
652656
validate_input(content, url: url) if options[:validate]
653657
r = MultiJson.load(content, options)
@@ -660,7 +664,8 @@ def load_html(input, url:, library: nil, extractAllScripts: false, **options)
660664
res
661665
else
662666
# Find the first script with type application/ld+json.
663-
element = input.at_xpath("//script[starts-with(@type, 'application/ld+json')]")
667+
element = input.at_xpath("//script[starts-with(@type, 'application/ld+json;profile=#{profile}')]") if profile
668+
element ||= input.at_xpath("//script[starts-with(@type, 'application/ld+json')]")
664669
content = element ? element.inner_html : "[]"
665670
validate_input(content, url: url) if options[:validate]
666671
MultiJson.load(content, options)
@@ -669,6 +674,25 @@ def load_html(input, url:, library: nil, extractAllScripts: false, **options)
669674
raise JSON::LD::JsonLdError::InvalidScriptElement, e.message
670675
end
671676

677+
# Use from a differnet location
678+
# @see {#load_html}
679+
def self.load_html(input, **options)
680+
self.new([], nil).load_html(input, **options)
681+
end
682+
683+
##
684+
# Validate JSON using JsonLint, if loaded
685+
private
686+
def validate_input(input, url:)
687+
return unless defined?(JsonLint)
688+
jsonlint = JsonLint::Linter.new
689+
input = StringIO.new(input) unless input.respond_to?(:read)
690+
unless jsonlint.check_stream(input)
691+
raise JsonLdError::LoadingDocumentFailed, "url: #{url}\n" + jsonlint.errors[''].join("\n")
692+
end
693+
input.rewind
694+
end
695+
672696
##
673697
# A {RemoteDocument} is returned from a {documentLoader}.
674698
#

lib/json/ld/context.rb

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -486,10 +486,14 @@ def parse(local_context, remote_contexts: [], from_term: nil)
486486
context_opts.delete(:headers)
487487
@options[:documentLoader].call(context.to_s, context_opts) do |remote_doc|
488488
# 3.2.5) Dereference context. If the dereferenced document has no top-level JSON object with an @context member, an invalid remote context has been detected and processing is aborted; otherwise, set context to the value of that member.
489-
jo = case remote_doc.document
490-
when String then MultiJson.load(remote_doc.document)
491-
else remote_doc.document
489+
jo = if remote_doc.content_type == 'text/html'
490+
API.load_html(remote_doc.document, url: context.to_s, profile: 'http://www.w3.org/ns/json-ld#context')
491+
elsif remote_doc.document.is_a?(String)
492+
MultiJson.load(remote_doc.document)
493+
else
494+
remote_doc.document
492495
end
496+
493497
raise JsonLdError::InvalidRemoteContext, "#{context}" unless jo.is_a?(Hash) && jo.has_key?('@context')
494498
context = jo['@context']
495499
end
@@ -701,6 +705,10 @@ def create_term_definition(local_context, term, defined, from_term: nil, protect
701705
raise JsonLdError::InvalidIRIMapping, "non-absolute @reverse IRI: #{definition.id} on term #{term.inspect}" unless
702706
definition.id.is_a?(RDF::URI) && definition.id.absolute?
703707

708+
if term.include?(':') && (term_iri = expand_iri(term)) != definition.id
709+
raise JsonLdError::InvalidIRIMapping, "term #{term} expands to #{definition.id}, not #{term_iri}"
710+
end
711+
704712
warn "[DEPRECATION] Blank Node terms deprecated in JSON-LD 1.1." if (processingMode || "json-ld-1.1") >= "json-ld-1.1" && definition.id.start_with?("_:")
705713

706714
# If value contains an @container member, set the container mapping of definition to its value; if its value is neither @set, @index, @type, @id, an absolute IRI nor null, an invalid reverse property error has been detected (reverse properties only support set- and index-containers) and processing is aborted.
@@ -723,6 +731,10 @@ def create_term_definition(local_context, term, defined, from_term: nil, protect
723731
raise JsonLdError::InvalidKeywordAlias, "expected value of @id to not be @context on term #{term.inspect}" if
724732
definition.id == '@context'
725733

734+
if term.match?(/:[^:]/) && (term_iri = expand_iri(term)) != definition.id
735+
raise JsonLdError::InvalidIRIMapping, "term #{term} expands to #{definition.id}, not #{term_iri}"
736+
end
737+
726738
warn "[DEPRECATION] Blank Node terms deprecated in JSON-LD 1.1." if (processingMode || "json-ld-1.1") >= "json-ld-1.1" && definition.id.start_with?("_:")
727739

728740
# If id ends with a gen-delim, it may be used as a prefix for simple terms

spec/context_spec.rb

Lines changed: 97 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,9 @@ def containers
3232
"homepage": {"@id": "http://xmlns.com/foaf/0.1/homepage", "@type": "@id"},
3333
"avatar": {"@id": "http://xmlns.com/foaf/0.1/avatar", "@type": "@id"}
3434
}
35-
}), base_uri: "http://example.com/context")
35+
}),
36+
base_uri: "http://example.com/context",
37+
headers: {content_type: "application/ld+json"})
3638
end
3739
subject {context}
3840

@@ -78,6 +80,90 @@ def containers
7880
}, logger)
7981
end
8082

83+
it "retrieves and parses a remote context document in HTML using the context profile" do
84+
remote_doc =
85+
RDF::Util::File::RemoteDocument.new(%q(
86+
<html><head>
87+
<script>Not This</script>
88+
<script type="application/ld+json">
89+
{
90+
"@context": {
91+
"homepage": {"@id": "http://example.com/this-would-be-wrong", "@type": "@id"},
92+
"avatar": {"@id": "http://example.com/this-would-be-wrong", "@type": "@id"}
93+
}
94+
}
95+
</script>
96+
<script type="application/ld+json;profile=http://www.w3.org/ns/json-ld#context">
97+
{
98+
"@context": {
99+
"xsd": "http://www.w3.org/2001/XMLSchema#",
100+
"name": "http://xmlns.com/foaf/0.1/name",
101+
"homepage": {"@id": "http://xmlns.com/foaf/0.1/homepage", "@type": "@id"},
102+
"avatar": {"@id": "http://xmlns.com/foaf/0.1/avatar", "@type": "@id"}
103+
}
104+
}
105+
</script>
106+
<script type="application/ld+json;profile=http://www.w3.org/ns/json-ld#context">
107+
{
108+
"@context": {
109+
"homepage": {"@id": "http://example.com/this-would-also-be-wrong", "@type": "@id"},
110+
"avatar": {"@id": "http://example.com/this-would-also-be-wrong", "@type": "@id"}
111+
}
112+
}
113+
</script>
114+
</head></html>
115+
),
116+
base_uri: "http://example.com/context",
117+
headers: {content_type: "text/html"})
118+
JSON::LD::Context::PRELOADED.clear
119+
expect(JSON::LD::API).to receive(:documentLoader).with("http://example.com/context", anything).and_yield(remote_doc)
120+
ec = subject.parse("http://example.com/context")
121+
expect(ec.send(:mappings)).to produce({
122+
"xsd" => "http://www.w3.org/2001/XMLSchema#",
123+
"name" => "http://xmlns.com/foaf/0.1/name",
124+
"homepage" => "http://xmlns.com/foaf/0.1/homepage",
125+
"avatar" => "http://xmlns.com/foaf/0.1/avatar"
126+
}, logger)
127+
end
128+
129+
it "retrieves and parses a remote context document in HTML" do
130+
remote_doc =
131+
RDF::Util::File::RemoteDocument.new(%q(
132+
<html><head>
133+
<script>Not This</script>
134+
<script type="application/ld+json">
135+
{
136+
"@context": {
137+
"xsd": "http://www.w3.org/2001/XMLSchema#",
138+
"name": "http://xmlns.com/foaf/0.1/name",
139+
"homepage": {"@id": "http://xmlns.com/foaf/0.1/homepage", "@type": "@id"},
140+
"avatar": {"@id": "http://xmlns.com/foaf/0.1/avatar", "@type": "@id"}
141+
}
142+
}
143+
</script>
144+
<script type="application/ld+json">
145+
{
146+
"@context": {
147+
"homepage": {"@id": "http://example.com/this-would-also-be-wrong", "@type": "@id"},
148+
"avatar": {"@id": "http://example.com/this-would-also-be-wrong", "@type": "@id"}
149+
}
150+
}
151+
</script>
152+
</head></html>
153+
),
154+
base_uri: "http://example.com/context",
155+
headers: {content_type: "text/html"})
156+
JSON::LD::Context::PRELOADED.clear
157+
expect(JSON::LD::API).to receive(:documentLoader).with("http://example.com/context", anything).and_yield(remote_doc)
158+
ec = subject.parse("http://example.com/context")
159+
expect(ec.send(:mappings)).to produce({
160+
"xsd" => "http://www.w3.org/2001/XMLSchema#",
161+
"name" => "http://xmlns.com/foaf/0.1/name",
162+
"homepage" => "http://xmlns.com/foaf/0.1/homepage",
163+
"avatar" => "http://xmlns.com/foaf/0.1/avatar"
164+
}, logger)
165+
end
166+
81167
it "notes non-existing @context" do
82168
expect {subject.parse(StringIO.new("{}"))}.to raise_error(JSON::LD::JsonLdError::InvalidRemoteContext)
83169
end
@@ -376,6 +462,16 @@ def containers
376462
"@prefix string" => {"foo" => {"@id" => 'http://example.org/', "@prefix" => "str"}},
377463
"@prefix array" => {"foo" => {"@id" => 'http://example.org/', "@prefix" => []}},
378464
"@prefix object" => {"foo" => {"@id" => 'http://example.org/', "@prefix" => {}}},
465+
"IRI term expands to different IRI" => {
466+
"ex" => "http://example.com/",
467+
"ex2" => "http://example.com/2/",
468+
"ex:foo" => "ex2:foo"
469+
},
470+
"IRI term expands to different IRI (reverse)" => {
471+
"ex" => "http://example.com/",
472+
"ex2" => "http://example.com/2/",
473+
"ex:foo" => {"@reverse" => "ex2:foo"}
474+
}
379475
}.each do |title, context|
380476
it title do
381477
expect {

0 commit comments

Comments
 (0)