Skip to content

Commit 2a60213

Browse files
committed
address #524 fully
proper exposure of the error now to the best of my ability, for now.
1 parent 62c9890 commit 2a60213

File tree

1 file changed

+136
-91
lines changed

1 file changed

+136
-91
lines changed

functions/openapi/oas_schema.go

Lines changed: 136 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ package openapi
77
import (
88
"crypto/sha256"
99
"fmt"
10+
"hash/fnv"
1011
"strings"
1112

1213
"github.com/daveshanley/vacuum/model"
@@ -35,7 +36,11 @@ type ErrorContext struct {
3536
InstanceLocation []string
3637
ErrorKind jsonschema.ErrorKind
3738
SchemaKeyword string // Extracted from SchemaURL: "unevaluatedProperties", "additionalProperties", etc.
38-
InstancePath string // e.g., "/paths/v1/webhooks-events"
39+
}
40+
41+
// InstancePath returns the instance location as a slash-delimited path (computed lazily)
42+
func (ec ErrorContext) InstancePath() string {
43+
return "/" + strings.Join(ec.InstanceLocation, "/")
3944
}
4045

4146
// NewErrorContext extracts semantic context from a ValidationError
@@ -44,7 +49,6 @@ func NewErrorContext(err *jsonschema.ValidationError) ErrorContext {
4449
SchemaURL: err.SchemaURL,
4550
InstanceLocation: err.InstanceLocation,
4651
ErrorKind: err.ErrorKind,
47-
InstancePath: "/" + strings.Join(err.InstanceLocation, "/"),
4852
}
4953
ctx.SchemaKeyword = extractSchemaKeyword(err.SchemaURL)
5054
return ctx
@@ -59,6 +63,33 @@ func extractSchemaKeyword(schemaURL string) string {
5963
return ""
6064
}
6165

66+
// String returns a string representation of the classification for debugging
67+
func (ec ErrorClassification) String() string {
68+
switch ec {
69+
case ErrorClassNoise:
70+
return "Noise"
71+
case ErrorClassLowPriority:
72+
return "LowPriority"
73+
case ErrorClassHighPriority:
74+
return "HighPriority"
75+
default:
76+
return "Unknown"
77+
}
78+
}
79+
80+
// isConstraintViolation checks if an ErrorKind represents a concrete constraint violation
81+
func isConstraintViolation(ek jsonschema.ErrorKind) bool {
82+
switch ek.(type) {
83+
case *kind.Type, *kind.Pattern, *kind.Format, *kind.Const,
84+
*kind.AdditionalProperties,
85+
*kind.MinLength, *kind.MaxLength, *kind.Minimum, *kind.Maximum,
86+
*kind.MinItems, *kind.MaxItems, *kind.MinProperties, *kind.MaxProperties,
87+
*kind.UniqueItems, *kind.PropertyNames, *kind.MultipleOf:
88+
return true
89+
}
90+
return false
91+
}
92+
6293
// ClassifyError determines how to handle an error based on full context
6394
func ClassifyError(ctx ErrorContext) ErrorClassification {
6495
if ctx.ErrorKind == nil {
@@ -70,19 +101,12 @@ func ClassifyError(ctx ErrorContext) ErrorClassification {
70101
return classifyFalseSchema(ctx)
71102
case *kind.Required:
72103
return classifyRequired(k)
73-
case *kind.Type, *kind.Pattern, *kind.Format, *kind.Const:
74-
return ErrorClassHighPriority
75-
case *kind.AdditionalProperties:
76-
return ErrorClassHighPriority
77-
case *kind.MinLength, *kind.MaxLength, *kind.Minimum, *kind.Maximum:
78-
return ErrorClassHighPriority
79-
case *kind.MinItems, *kind.MaxItems, *kind.MinProperties, *kind.MaxProperties:
80-
return ErrorClassHighPriority
81-
case *kind.UniqueItems, *kind.PropertyNames, *kind.MultipleOf:
82-
return ErrorClassHighPriority
83104
case *kind.Enum:
84105
return ErrorClassLowPriority
85106
default:
107+
if isConstraintViolation(ctx.ErrorKind) {
108+
return ErrorClassHighPriority
109+
}
86110
return ErrorClassLowPriority
87111
}
88112
}
@@ -115,22 +139,67 @@ type ErrorFormatter interface {
115139
Format(ctx ErrorContext) string
116140
}
117141

118-
// OpenAPIErrorFormatter provides OpenAPI-aware error formatting
119-
type OpenAPIErrorFormatter struct{}
142+
// DefaultErrorFormatter provides generic jsonschema error formatting
143+
type DefaultErrorFormatter struct{}
144+
145+
// Format converts ErrorKind to a readable string
146+
func (f DefaultErrorFormatter) Format(ctx ErrorContext) string {
147+
if ctx.ErrorKind == nil {
148+
return ""
149+
}
150+
switch k := ctx.ErrorKind.(type) {
151+
case *kind.Required:
152+
return fmt.Sprintf("missing properties: `%v`", k.Missing)
153+
case *kind.AdditionalProperties:
154+
return fmt.Sprintf("additional properties not allowed: `%v`", k.Properties)
155+
case *kind.Type:
156+
return fmt.Sprintf("expected type `%v`, got `%v`", k.Want, k.Got)
157+
case *kind.Enum:
158+
return fmt.Sprintf("value must be one of: `%v`", k.Want)
159+
case *kind.FalseSchema:
160+
return "property not allowed"
161+
case *kind.Pattern:
162+
return fmt.Sprintf("does not match pattern `%s`", k.Want)
163+
case *kind.MinLength:
164+
return fmt.Sprintf("length must be >= `%d`", k.Want)
165+
case *kind.MaxLength:
166+
return fmt.Sprintf("length must be <= `%d`", k.Want)
167+
case *kind.Minimum:
168+
return fmt.Sprintf("must be >= `%v`", k.Want)
169+
case *kind.Maximum:
170+
return fmt.Sprintf("must be <= `%v`", k.Want)
171+
case *kind.MinItems:
172+
return fmt.Sprintf("must have >= `%d` items", k.Want)
173+
case *kind.MaxItems:
174+
return fmt.Sprintf("must have <= `%d` items", k.Want)
175+
case *kind.MinProperties:
176+
return fmt.Sprintf("must have >= `%d` properties", k.Want)
177+
case *kind.MaxProperties:
178+
return fmt.Sprintf("must have <= `%d` properties", k.Want)
179+
case *kind.Const:
180+
return fmt.Sprintf("must be `%v`", k.Want)
181+
case *kind.Format:
182+
return fmt.Sprintf("invalid format: expected `%s`", k.Want)
183+
default:
184+
return fmt.Sprintf("%v", ctx.ErrorKind)
185+
}
186+
}
187+
188+
// OpenAPIErrorFormatter provides OpenAPI-aware error formatting with context-specific messages
189+
type OpenAPIErrorFormatter struct {
190+
DefaultErrorFormatter
191+
}
120192

121-
// Format generates a context-aware error message
193+
// Format generates a context-aware error message with OpenAPI-specific handling
122194
func (f OpenAPIErrorFormatter) Format(ctx ErrorContext) string {
123-
// Handle FalseSchema with context-specific messages
124195
if _, ok := ctx.ErrorKind.(*kind.FalseSchema); ok {
125196
return f.formatFalseSchema(ctx)
126197
}
127-
// Fall back to generic formatting
128-
return errorKindToString(ctx.ErrorKind)
198+
return f.DefaultErrorFormatter.Format(ctx)
129199
}
130200

131201
// formatFalseSchema generates specific messages for FalseSchema errors based on context
132202
func (f OpenAPIErrorFormatter) formatFalseSchema(ctx ErrorContext) string {
133-
// Check if this is a path validation error
134203
if len(ctx.InstanceLocation) >= 2 && ctx.InstanceLocation[0] == "paths" {
135204
pathKey := ctx.InstanceLocation[1]
136205
if !strings.HasPrefix(pathKey, "/") && !strings.HasPrefix(pathKey, "x-") {
@@ -139,14 +208,12 @@ func (f OpenAPIErrorFormatter) formatFalseSchema(ctx ErrorContext) string {
139208
return fmt.Sprintf("path `%s` is not allowed", pathKey)
140209
}
141210

142-
// Check for other unevaluatedProperties contexts
143211
if ctx.SchemaKeyword == "unevaluatedProperties" && len(ctx.InstanceLocation) > 0 {
144212
property := ctx.InstanceLocation[len(ctx.InstanceLocation)-1]
145213
return fmt.Sprintf("property `%s` is not allowed", property)
146214
}
147215

148-
// Generic fallback
149-
return "property not allowed by schema"
216+
return f.DefaultErrorFormatter.Format(ctx)
150217
}
151218

152219
// OASSchema will check that the document is a valid OpenAPI schema.
@@ -173,8 +240,6 @@ func (os OASSchema) RunRule(nodes []*yaml.Node, context model.RuleFunctionContex
173240
}
174241

175242
var results []model.RuleFunctionResult
176-
177-
// grab the original bytes and the spec info from context.
178243
info := context.SpecInfo
179244

180245
if info.SpecType == "" {
@@ -238,11 +303,10 @@ func (os OASSchema) RunRule(nodes []*yaml.Node, context model.RuleFunctionContex
238303
return results // Return any nullable violations even if document is otherwise valid
239304
}
240305

241-
// duplicates are possible, so we need to de-dupe them.
306+
// validation errors can appear multiple times across different schema branches; deduplicate by hash
242307
seen := make(map[string]*errors.SchemaValidationFailure)
243308
for i := range validationErrors {
244309
for y := range validationErrors[i].SchemaValidationErrors {
245-
// skip, seen it.
246310
if _, ok := seen[hashResult(validationErrors[i].SchemaValidationErrors[y])]; ok {
247311
continue
248312
}
@@ -288,12 +352,11 @@ func (os OASSchema) RunRule(nodes []*yaml.Node, context model.RuleFunctionContex
288352
}
289353
}
290354

291-
// Fallback to Reason if no leaf errors extracted
292355
if reason == "" {
293356
reason = schemaErr.Reason
294357
}
295358

296-
// Final fallback
359+
// guard against empty reason from upstream validator
297360
if reason == "" {
298361
reason = "schema validation failed"
299362
}
@@ -352,15 +415,40 @@ func checkForNullableKeyword(context model.RuleFunctionContext) []model.RuleFunc
352415
return results
353416
}
354417

355-
// extractLeafValidationErrors extracts only meaningful leaf error messages from a jsonschema.ValidationError tree
356-
// It uses ErrorContext and ClassifyError to filter noise from oneOf/anyOf schema structures
357-
// and provides context-aware formatting for OpenAPI-specific errors (issues #524, #766)
418+
// defaultFormatter is a package-level singleton to avoid repeated allocations
419+
var defaultFormatter = OpenAPIErrorFormatter{}
420+
421+
// hashInstanceLocation creates a fast hash of an instance location for deduplication
422+
func hashInstanceLocation(s []string) uint64 {
423+
h := fnv.New64a()
424+
for _, str := range s {
425+
h.Write([]byte(str))
426+
h.Write([]byte{0})
427+
}
428+
return h.Sum64()
429+
}
430+
431+
// addUniqueError appends a formatted error message if not already seen
432+
func addUniqueError(errors *[]string, seen map[string]bool, ctx ErrorContext) {
433+
msg := defaultFormatter.Format(ctx)
434+
fullMsg := "`" + ctx.InstancePath() + "` " + msg
435+
if !seen[fullMsg] {
436+
seen[fullMsg] = true
437+
*errors = append(*errors, fullMsg)
438+
}
439+
}
440+
441+
// extractLeafValidationErrors extracts meaningful validation errors from a jsonschema error tree.
442+
// It recursively walks to leaf nodes, classifies them by priority, and filters noise.
443+
// Returns high-priority errors if any exist, otherwise low-priority errors.
444+
// High-priority errors suppress low-priority errors for the same instance path.
445+
// Addresses issues #524 (path validation), #766 (oneOf/anyOf noise).
358446
func extractLeafValidationErrors(err *jsonschema.ValidationError) []string {
359-
var highPriority []string
360-
var lowPriority []string
361-
seen := make(map[string]bool)
362-
seenPaths := make(map[string]bool)
363-
formatter := OpenAPIErrorFormatter{}
447+
highPriority := make([]string, 0, 10)
448+
lowPriority := make([]string, 0, 5)
449+
seen := make(map[string]bool, 100)
450+
seenPaths := make(map[uint64]bool, 50)
451+
// prevent stack overflow from deeply nested schemas
364452
const maxDepth = 50
365453

366454
var extract func(e *jsonschema.ValidationError, depth int)
@@ -369,7 +457,6 @@ func extractLeafValidationErrors(err *jsonschema.ValidationError) []string {
369457
return
370458
}
371459

372-
// if this is a leaf node (no causes), extract the message
373460
if len(e.Causes) == 0 {
374461
ctx := NewErrorContext(e)
375462
classification := ClassifyError(ctx)
@@ -378,76 +465,34 @@ func extractLeafValidationErrors(err *jsonschema.ValidationError) []string {
378465
return
379466
}
380467

381-
msg := formatter.Format(ctx)
382-
if msg != "" {
383-
fullMsg := fmt.Sprintf("`%s` %s", ctx.InstancePath, msg)
384-
if !seen[fullMsg] {
385-
seen[fullMsg] = true
386-
387-
if classification == ErrorClassHighPriority {
388-
highPriority = append(highPriority, fullMsg)
389-
seenPaths[ctx.InstancePath] = true
390-
} else if !seenPaths[ctx.InstancePath] {
391-
lowPriority = append(lowPriority, fullMsg)
392-
}
468+
pathHash := hashInstanceLocation(ctx.InstanceLocation)
469+
470+
// seenPaths prevents low-priority errors for paths that already have high-priority errors
471+
if classification == ErrorClassHighPriority {
472+
if !seenPaths[pathHash] {
473+
addUniqueError(&highPriority, seen, ctx)
474+
seenPaths[pathHash] = true
393475
}
476+
} else if !seenPaths[pathHash] {
477+
addUniqueError(&lowPriority, seen, ctx)
394478
}
395479
}
396480

397-
// recurse into causes
398481
for _, cause := range e.Causes {
399482
extract(cause, depth+1)
400483
}
401484
}
402485

403486
extract(err, 0)
404487

405-
// Return high priority errors first, then low priority
406488
if len(highPriority) > 0 {
407489
return highPriority
408490
}
409491
return lowPriority
410492
}
411493

412-
// errorKindToString converts a jsonschema ErrorKind to a human-readable string
494+
// errorKindToString converts a jsonschema ErrorKind to a human-readable string.
495+
// Deprecated: Use DefaultErrorFormatter.Format instead.
413496
func errorKindToString(ek jsonschema.ErrorKind) string {
414-
if ek == nil {
415-
return ""
416-
}
417-
switch k := ek.(type) {
418-
case *kind.Required:
419-
return fmt.Sprintf("missing properties: `%v`", k.Missing)
420-
case *kind.AdditionalProperties:
421-
return fmt.Sprintf("additional properties not allowed: `%v`", k.Properties)
422-
case *kind.Type:
423-
return fmt.Sprintf("expected type `%v`, got `%v`", k.Want, k.Got)
424-
case *kind.Enum:
425-
return fmt.Sprintf("value must be one of: `%v`", k.Want)
426-
case *kind.FalseSchema:
427-
return "property not allowed"
428-
case *kind.Pattern:
429-
return fmt.Sprintf("does not match pattern `%s`", k.Want)
430-
case *kind.MinLength:
431-
return fmt.Sprintf("length must be >= `%d`", k.Want)
432-
case *kind.MaxLength:
433-
return fmt.Sprintf("length must be <= `%d`", k.Want)
434-
case *kind.Minimum:
435-
return fmt.Sprintf("must be >= `%v`", k.Want)
436-
case *kind.Maximum:
437-
return fmt.Sprintf("must be <= `%v`", k.Want)
438-
case *kind.MinItems:
439-
return fmt.Sprintf("must have >= `%d` items", k.Want)
440-
case *kind.MaxItems:
441-
return fmt.Sprintf("must have <= `%d` items", k.Want)
442-
case *kind.MinProperties:
443-
return fmt.Sprintf("must have >= `%d` properties", k.Want)
444-
case *kind.MaxProperties:
445-
return fmt.Sprintf("must have <= `%d` properties", k.Want)
446-
case *kind.Const:
447-
return fmt.Sprintf("must be `%v`", k.Want)
448-
case *kind.Format:
449-
return fmt.Sprintf("invalid format: expected `%s`", k.Want)
450-
default:
451-
return fmt.Sprintf("%v", ek)
452-
}
497+
return DefaultErrorFormatter{}.Format(ErrorContext{ErrorKind: ek})
453498
}

0 commit comments

Comments
 (0)