Skip to content

Commit 1ed4441

Browse files
committed
feat: Handling invalid RDF in canonized JSON LD
Previously: we weren't dropping undefined terms from RDFs. Fix: changed normalization aproach through json-gold library so that it returns parsing error whenever invalid data found in dataset. Added error handling logic for invalid RDF data errors where aries json ld processor is going to remove the invalid data from dataset and try again recursively. (Following digitalbazaar/jsonld.js#199) closes hyperledger-aries#1592 Signed-off-by: sudesh.shetty <[email protected]>
1 parent 7e4eafc commit 1ed4441

File tree

11 files changed

+533
-152
lines changed

11 files changed

+533
-152
lines changed

pkg/doc/signature/jsonld/processor.go

Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
/*
2+
Copyright SecureKey Technologies Inc. All Rights Reserved.
3+
4+
SPDX-License-Identifier: Apache-2.0
5+
*/
6+
7+
package jsonld
8+
9+
import (
10+
"fmt"
11+
"regexp"
12+
"strconv"
13+
"strings"
14+
15+
"github.com/piprate/json-gold/ld"
16+
17+
"github.com/hyperledger/aries-framework-go/pkg/common/log"
18+
)
19+
20+
const (
21+
format = "application/n-quads"
22+
algorithm = "URDNA2015"
23+
handleNormalizeErr = "Error while parsing N-Quads; invalid quad. line:"
24+
)
25+
26+
var logger = log.New("aries-framework/json-ld-processor")
27+
28+
// nolint:gochecknoglobals
29+
var (
30+
invalidRDFLinePattern = regexp.MustCompile("[0-9]*$")
31+
)
32+
33+
// Processor is JSON-LD processor for aries.
34+
// processing mode JSON-LD 1.0 {RFC: https://www.w3.org/TR/2014/REC-json-ld-20140116}
35+
type Processor struct {
36+
}
37+
38+
// NewProcessor returns new JSON-LD processor for aries
39+
func NewProcessor() *Processor {
40+
return &Processor{}
41+
}
42+
43+
// GetCanonicalDocument returns canonized document of given json ld
44+
func (p *Processor) GetCanonicalDocument(doc map[string]interface{}) ([]byte, error) {
45+
proc := ld.NewJsonLdProcessor()
46+
options := ld.NewJsonLdOptions("")
47+
options.ProcessingMode = ld.JsonLd_1_1
48+
options.Algorithm = algorithm
49+
options.ProduceGeneralizedRdf = true
50+
51+
normalizedTriples, err := proc.Normalize(doc, options)
52+
if err != nil {
53+
normalizedTriples, err = p.retryForInvalidRDFError(proc, options, doc, err)
54+
if err != nil {
55+
return nil, fmt.Errorf("failed to normalize JSON-LD document: %w", err)
56+
}
57+
}
58+
59+
if ds, ok := normalizedTriples.(*ld.RDFDataset); ok {
60+
serializer := ld.NQuadRDFSerializer{}
61+
resp, err := serializer.Serialize(ds)
62+
63+
if err != nil {
64+
return nil, fmt.Errorf("failed to serialize normalized RDF dataset : %w", err)
65+
}
66+
67+
return []byte(resp.(string)), nil
68+
}
69+
70+
return nil, fmt.Errorf("failed to normalize JSON-LD document, unexpected RDF dataset")
71+
}
72+
73+
// Compact compacts given json ld object
74+
func (p *Processor) Compact(input, context interface{}, loader ld.DocumentLoader) (map[string]interface{}, error) {
75+
proc := ld.NewJsonLdProcessor()
76+
options := ld.NewJsonLdOptions("")
77+
options.ProcessingMode = ld.JsonLd_1_1
78+
options.Format = format
79+
options.ProduceGeneralizedRdf = true
80+
81+
if loader != nil {
82+
options.DocumentLoader = loader
83+
}
84+
85+
return proc.Compact(input, context, options)
86+
}
87+
88+
// retryForInvalidRDFError handles incorrect RDF data error and returns new data set by
89+
// removing invalid line from data set
90+
func (p *Processor) retryForInvalidRDFError(proc *ld.JsonLdProcessor, opts *ld.JsonLdOptions,
91+
doc map[string]interface{}, err error) (interface{}, error) {
92+
if err != nil && !strings.Contains(err.Error(), handleNormalizeErr) {
93+
return nil, err
94+
}
95+
96+
lineNumber, err := findLineNumber(err)
97+
if err != nil {
98+
return nil, err
99+
}
100+
101+
// handling invalid RDF data, by following pattern [https://github.com/digitalbazaar/jsonld.js/issues/199]
102+
logger.Warnf("Failed to normalize JSON-LD document due to invalid RDF, retrying after removing invalid data.")
103+
104+
// prepare data set
105+
opts.Format = ""
106+
107+
datasetObj, err := proc.ToRDF(doc, opts)
108+
if err != nil {
109+
return nil, fmt.Errorf("failed to create dataset: %w", err)
110+
}
111+
112+
dataset, ok := datasetObj.(*ld.RDFDataset)
113+
if !ok {
114+
return nil, fmt.Errorf("unexpected RDF data set found")
115+
}
116+
117+
// normalize dataset and get view
118+
opts.Format = format
119+
120+
r, err := ld.NewNormalisationAlgorithm(opts.Algorithm).Main(dataset, opts)
121+
if err != nil {
122+
return nil, fmt.Errorf("failed to create normalized state: %w", err)
123+
}
124+
125+
return p.parseNQuadsWithRetry(r.(string), lineNumber)
126+
}
127+
128+
// handleInvalidRDF handles invalid RDF data by discarding incorrect line
129+
func (p *Processor) parseNQuadsWithRetry(view string, lineNumber int) (interface{}, error) {
130+
logger.Warnf("Removing incorrect RDF from line number %d", lineNumber)
131+
// polish view and parse again
132+
view = removeQuad(view, lineNumber-1)
133+
134+
normalizedTriples, err := ld.ParseNQuads(view)
135+
if err != nil && strings.Contains(err.Error(), handleNormalizeErr) {
136+
lineNumber, err = findLineNumber(err)
137+
if err != nil {
138+
return nil, err
139+
}
140+
141+
return p.parseNQuadsWithRetry(view, lineNumber)
142+
}
143+
144+
return normalizedTriples, nil
145+
}
146+
147+
// removeQuad removes quad from given index of view
148+
func removeQuad(view string, index int) string {
149+
lines := strings.Split(view, "\n")
150+
return strings.Join(append(lines[:index], lines[index+1:]...), "\n")
151+
}
152+
153+
// findLineNumber finds problematic line number from error
154+
func findLineNumber(err error) (int, error) {
155+
s := invalidRDFLinePattern.FindString(err.Error())
156+
157+
i, err := strconv.Atoi(s)
158+
if err != nil {
159+
return -1, fmt.Errorf("unable to locate invalid RDF data line number: %w", err)
160+
}
161+
162+
return i, nil
163+
}

0 commit comments

Comments
 (0)