@@ -7,6 +7,7 @@ package openapi
77import (
88 "crypto/sha256"
99 "fmt"
10+ "hash/fnv"
1011 "strings"
1112
1213 "github.com/daveshanley/vacuum/model"
@@ -35,7 +36,11 @@ type ErrorContext struct {
3536 InstanceLocation []string
3637 ErrorKind jsonschema.ErrorKind
3738 SchemaKeyword string // Extracted from SchemaURL: "unevaluatedProperties", "additionalProperties", etc.
38- InstancePath string // e.g., "/paths/v1/webhooks-events"
39+ }
40+
41+ // InstancePath returns the instance location as a slash-delimited path (computed lazily)
42+ func (ec ErrorContext ) InstancePath () string {
43+ return "/" + strings .Join (ec .InstanceLocation , "/" )
3944}
4045
4146// NewErrorContext extracts semantic context from a ValidationError
@@ -44,7 +49,6 @@ func NewErrorContext(err *jsonschema.ValidationError) ErrorContext {
4449 SchemaURL : err .SchemaURL ,
4550 InstanceLocation : err .InstanceLocation ,
4651 ErrorKind : err .ErrorKind ,
47- InstancePath : "/" + strings .Join (err .InstanceLocation , "/" ),
4852 }
4953 ctx .SchemaKeyword = extractSchemaKeyword (err .SchemaURL )
5054 return ctx
@@ -59,6 +63,33 @@ func extractSchemaKeyword(schemaURL string) string {
5963 return ""
6064}
6165
66+ // String returns a string representation of the classification for debugging
67+ func (ec ErrorClassification ) String () string {
68+ switch ec {
69+ case ErrorClassNoise :
70+ return "Noise"
71+ case ErrorClassLowPriority :
72+ return "LowPriority"
73+ case ErrorClassHighPriority :
74+ return "HighPriority"
75+ default :
76+ return "Unknown"
77+ }
78+ }
79+
80+ // isConstraintViolation checks if an ErrorKind represents a concrete constraint violation
81+ func isConstraintViolation (ek jsonschema.ErrorKind ) bool {
82+ switch ek .(type ) {
83+ case * kind.Type , * kind.Pattern , * kind.Format , * kind.Const ,
84+ * kind.AdditionalProperties ,
85+ * kind.MinLength , * kind.MaxLength , * kind.Minimum , * kind.Maximum ,
86+ * kind.MinItems , * kind.MaxItems , * kind.MinProperties , * kind.MaxProperties ,
87+ * kind.UniqueItems , * kind.PropertyNames , * kind.MultipleOf :
88+ return true
89+ }
90+ return false
91+ }
92+
6293// ClassifyError determines how to handle an error based on full context
6394func ClassifyError (ctx ErrorContext ) ErrorClassification {
6495 if ctx .ErrorKind == nil {
@@ -70,19 +101,12 @@ func ClassifyError(ctx ErrorContext) ErrorClassification {
70101 return classifyFalseSchema (ctx )
71102 case * kind.Required :
72103 return classifyRequired (k )
73- case * kind.Type , * kind.Pattern , * kind.Format , * kind.Const :
74- return ErrorClassHighPriority
75- case * kind.AdditionalProperties :
76- return ErrorClassHighPriority
77- case * kind.MinLength , * kind.MaxLength , * kind.Minimum , * kind.Maximum :
78- return ErrorClassHighPriority
79- case * kind.MinItems , * kind.MaxItems , * kind.MinProperties , * kind.MaxProperties :
80- return ErrorClassHighPriority
81- case * kind.UniqueItems , * kind.PropertyNames , * kind.MultipleOf :
82- return ErrorClassHighPriority
83104 case * kind.Enum :
84105 return ErrorClassLowPriority
85106 default :
107+ if isConstraintViolation (ctx .ErrorKind ) {
108+ return ErrorClassHighPriority
109+ }
86110 return ErrorClassLowPriority
87111 }
88112}
@@ -115,22 +139,67 @@ type ErrorFormatter interface {
115139 Format (ctx ErrorContext ) string
116140}
117141
118- // OpenAPIErrorFormatter provides OpenAPI-aware error formatting
119- type OpenAPIErrorFormatter struct {}
142+ // DefaultErrorFormatter provides generic jsonschema error formatting
143+ type DefaultErrorFormatter struct {}
144+
145+ // Format converts ErrorKind to a readable string
146+ func (f DefaultErrorFormatter ) Format (ctx ErrorContext ) string {
147+ if ctx .ErrorKind == nil {
148+ return ""
149+ }
150+ switch k := ctx .ErrorKind .(type ) {
151+ case * kind.Required :
152+ return fmt .Sprintf ("missing properties: `%v`" , k .Missing )
153+ case * kind.AdditionalProperties :
154+ return fmt .Sprintf ("additional properties not allowed: `%v`" , k .Properties )
155+ case * kind.Type :
156+ return fmt .Sprintf ("expected type `%v`, got `%v`" , k .Want , k .Got )
157+ case * kind.Enum :
158+ return fmt .Sprintf ("value must be one of: `%v`" , k .Want )
159+ case * kind.FalseSchema :
160+ return "property not allowed"
161+ case * kind.Pattern :
162+ return fmt .Sprintf ("does not match pattern `%s`" , k .Want )
163+ case * kind.MinLength :
164+ return fmt .Sprintf ("length must be >= `%d`" , k .Want )
165+ case * kind.MaxLength :
166+ return fmt .Sprintf ("length must be <= `%d`" , k .Want )
167+ case * kind.Minimum :
168+ return fmt .Sprintf ("must be >= `%v`" , k .Want )
169+ case * kind.Maximum :
170+ return fmt .Sprintf ("must be <= `%v`" , k .Want )
171+ case * kind.MinItems :
172+ return fmt .Sprintf ("must have >= `%d` items" , k .Want )
173+ case * kind.MaxItems :
174+ return fmt .Sprintf ("must have <= `%d` items" , k .Want )
175+ case * kind.MinProperties :
176+ return fmt .Sprintf ("must have >= `%d` properties" , k .Want )
177+ case * kind.MaxProperties :
178+ return fmt .Sprintf ("must have <= `%d` properties" , k .Want )
179+ case * kind.Const :
180+ return fmt .Sprintf ("must be `%v`" , k .Want )
181+ case * kind.Format :
182+ return fmt .Sprintf ("invalid format: expected `%s`" , k .Want )
183+ default :
184+ return fmt .Sprintf ("%v" , ctx .ErrorKind )
185+ }
186+ }
187+
188+ // OpenAPIErrorFormatter provides OpenAPI-aware error formatting with context-specific messages
189+ type OpenAPIErrorFormatter struct {
190+ DefaultErrorFormatter
191+ }
120192
121- // Format generates a context-aware error message
193+ // Format generates a context-aware error message with OpenAPI-specific handling
122194func (f OpenAPIErrorFormatter ) Format (ctx ErrorContext ) string {
123- // Handle FalseSchema with context-specific messages
124195 if _ , ok := ctx .ErrorKind .(* kind.FalseSchema ); ok {
125196 return f .formatFalseSchema (ctx )
126197 }
127- // Fall back to generic formatting
128- return errorKindToString (ctx .ErrorKind )
198+ return f .DefaultErrorFormatter .Format (ctx )
129199}
130200
131201// formatFalseSchema generates specific messages for FalseSchema errors based on context
132202func (f OpenAPIErrorFormatter ) formatFalseSchema (ctx ErrorContext ) string {
133- // Check if this is a path validation error
134203 if len (ctx .InstanceLocation ) >= 2 && ctx .InstanceLocation [0 ] == "paths" {
135204 pathKey := ctx .InstanceLocation [1 ]
136205 if ! strings .HasPrefix (pathKey , "/" ) && ! strings .HasPrefix (pathKey , "x-" ) {
@@ -139,14 +208,12 @@ func (f OpenAPIErrorFormatter) formatFalseSchema(ctx ErrorContext) string {
139208 return fmt .Sprintf ("path `%s` is not allowed" , pathKey )
140209 }
141210
142- // Check for other unevaluatedProperties contexts
143211 if ctx .SchemaKeyword == "unevaluatedProperties" && len (ctx .InstanceLocation ) > 0 {
144212 property := ctx .InstanceLocation [len (ctx .InstanceLocation )- 1 ]
145213 return fmt .Sprintf ("property `%s` is not allowed" , property )
146214 }
147215
148- // Generic fallback
149- return "property not allowed by schema"
216+ return f .DefaultErrorFormatter .Format (ctx )
150217}
151218
152219// OASSchema will check that the document is a valid OpenAPI schema.
@@ -173,8 +240,6 @@ func (os OASSchema) RunRule(nodes []*yaml.Node, context model.RuleFunctionContex
173240 }
174241
175242 var results []model.RuleFunctionResult
176-
177- // grab the original bytes and the spec info from context.
178243 info := context .SpecInfo
179244
180245 if info .SpecType == "" {
@@ -238,11 +303,10 @@ func (os OASSchema) RunRule(nodes []*yaml.Node, context model.RuleFunctionContex
238303 return results // Return any nullable violations even if document is otherwise valid
239304 }
240305
241- // duplicates are possible, so we need to de-dupe them.
306+ // validation errors can appear multiple times across different schema branches; deduplicate by hash
242307 seen := make (map [string ]* errors.SchemaValidationFailure )
243308 for i := range validationErrors {
244309 for y := range validationErrors [i ].SchemaValidationErrors {
245- // skip, seen it.
246310 if _ , ok := seen [hashResult (validationErrors [i ].SchemaValidationErrors [y ])]; ok {
247311 continue
248312 }
@@ -288,12 +352,11 @@ func (os OASSchema) RunRule(nodes []*yaml.Node, context model.RuleFunctionContex
288352 }
289353 }
290354
291- // Fallback to Reason if no leaf errors extracted
292355 if reason == "" {
293356 reason = schemaErr .Reason
294357 }
295358
296- // Final fallback
359+ // guard against empty reason from upstream validator
297360 if reason == "" {
298361 reason = "schema validation failed"
299362 }
@@ -352,15 +415,40 @@ func checkForNullableKeyword(context model.RuleFunctionContext) []model.RuleFunc
352415 return results
353416}
354417
355- // extractLeafValidationErrors extracts only meaningful leaf error messages from a jsonschema.ValidationError tree
356- // It uses ErrorContext and ClassifyError to filter noise from oneOf/anyOf schema structures
357- // and provides context-aware formatting for OpenAPI-specific errors (issues #524, #766)
418+ // defaultFormatter is a package-level singleton to avoid repeated allocations
419+ var defaultFormatter = OpenAPIErrorFormatter {}
420+
421+ // hashInstanceLocation creates a fast hash of an instance location for deduplication
422+ func hashInstanceLocation (s []string ) uint64 {
423+ h := fnv .New64a ()
424+ for _ , str := range s {
425+ h .Write ([]byte (str ))
426+ h .Write ([]byte {0 })
427+ }
428+ return h .Sum64 ()
429+ }
430+
431+ // addUniqueError appends a formatted error message if not already seen
432+ func addUniqueError (errors * []string , seen map [string ]bool , ctx ErrorContext ) {
433+ msg := defaultFormatter .Format (ctx )
434+ fullMsg := "`" + ctx .InstancePath () + "` " + msg
435+ if ! seen [fullMsg ] {
436+ seen [fullMsg ] = true
437+ * errors = append (* errors , fullMsg )
438+ }
439+ }
440+
441+ // extractLeafValidationErrors extracts meaningful validation errors from a jsonschema error tree.
442+ // It recursively walks to leaf nodes, classifies them by priority, and filters noise.
443+ // Returns high-priority errors if any exist, otherwise low-priority errors.
444+ // High-priority errors suppress low-priority errors for the same instance path.
445+ // Addresses issues #524 (path validation), #766 (oneOf/anyOf noise).
358446func extractLeafValidationErrors (err * jsonschema.ValidationError ) []string {
359- var highPriority []string
360- var lowPriority []string
361- seen := make (map [string ]bool )
362- seenPaths := make (map [string ]bool )
363- formatter := OpenAPIErrorFormatter {}
447+ highPriority := make ( []string , 0 , 10 )
448+ lowPriority := make ( []string , 0 , 5 )
449+ seen := make (map [string ]bool , 100 )
450+ seenPaths := make (map [uint64 ]bool , 50 )
451+ // prevent stack overflow from deeply nested schemas
364452 const maxDepth = 50
365453
366454 var extract func (e * jsonschema.ValidationError , depth int )
@@ -369,7 +457,6 @@ func extractLeafValidationErrors(err *jsonschema.ValidationError) []string {
369457 return
370458 }
371459
372- // if this is a leaf node (no causes), extract the message
373460 if len (e .Causes ) == 0 {
374461 ctx := NewErrorContext (e )
375462 classification := ClassifyError (ctx )
@@ -378,76 +465,34 @@ func extractLeafValidationErrors(err *jsonschema.ValidationError) []string {
378465 return
379466 }
380467
381- msg := formatter .Format (ctx )
382- if msg != "" {
383- fullMsg := fmt .Sprintf ("`%s` %s" , ctx .InstancePath , msg )
384- if ! seen [fullMsg ] {
385- seen [fullMsg ] = true
386-
387- if classification == ErrorClassHighPriority {
388- highPriority = append (highPriority , fullMsg )
389- seenPaths [ctx .InstancePath ] = true
390- } else if ! seenPaths [ctx .InstancePath ] {
391- lowPriority = append (lowPriority , fullMsg )
392- }
468+ pathHash := hashInstanceLocation (ctx .InstanceLocation )
469+
470+ // seenPaths prevents low-priority errors for paths that already have high-priority errors
471+ if classification == ErrorClassHighPriority {
472+ if ! seenPaths [pathHash ] {
473+ addUniqueError (& highPriority , seen , ctx )
474+ seenPaths [pathHash ] = true
393475 }
476+ } else if ! seenPaths [pathHash ] {
477+ addUniqueError (& lowPriority , seen , ctx )
394478 }
395479 }
396480
397- // recurse into causes
398481 for _ , cause := range e .Causes {
399482 extract (cause , depth + 1 )
400483 }
401484 }
402485
403486 extract (err , 0 )
404487
405- // Return high priority errors first, then low priority
406488 if len (highPriority ) > 0 {
407489 return highPriority
408490 }
409491 return lowPriority
410492}
411493
412- // errorKindToString converts a jsonschema ErrorKind to a human-readable string
494+ // errorKindToString converts a jsonschema ErrorKind to a human-readable string.
495+ // Deprecated: Use DefaultErrorFormatter.Format instead.
413496func errorKindToString (ek jsonschema.ErrorKind ) string {
414- if ek == nil {
415- return ""
416- }
417- switch k := ek .(type ) {
418- case * kind.Required :
419- return fmt .Sprintf ("missing properties: `%v`" , k .Missing )
420- case * kind.AdditionalProperties :
421- return fmt .Sprintf ("additional properties not allowed: `%v`" , k .Properties )
422- case * kind.Type :
423- return fmt .Sprintf ("expected type `%v`, got `%v`" , k .Want , k .Got )
424- case * kind.Enum :
425- return fmt .Sprintf ("value must be one of: `%v`" , k .Want )
426- case * kind.FalseSchema :
427- return "property not allowed"
428- case * kind.Pattern :
429- return fmt .Sprintf ("does not match pattern `%s`" , k .Want )
430- case * kind.MinLength :
431- return fmt .Sprintf ("length must be >= `%d`" , k .Want )
432- case * kind.MaxLength :
433- return fmt .Sprintf ("length must be <= `%d`" , k .Want )
434- case * kind.Minimum :
435- return fmt .Sprintf ("must be >= `%v`" , k .Want )
436- case * kind.Maximum :
437- return fmt .Sprintf ("must be <= `%v`" , k .Want )
438- case * kind.MinItems :
439- return fmt .Sprintf ("must have >= `%d` items" , k .Want )
440- case * kind.MaxItems :
441- return fmt .Sprintf ("must have <= `%d` items" , k .Want )
442- case * kind.MinProperties :
443- return fmt .Sprintf ("must have >= `%d` properties" , k .Want )
444- case * kind.MaxProperties :
445- return fmt .Sprintf ("must have <= `%d` properties" , k .Want )
446- case * kind.Const :
447- return fmt .Sprintf ("must be `%v`" , k .Want )
448- case * kind.Format :
449- return fmt .Sprintf ("invalid format: expected `%s`" , k .Want )
450- default :
451- return fmt .Sprintf ("%v" , ek )
452- }
497+ return DefaultErrorFormatter {}.Format (ErrorContext {ErrorKind : ek })
453498}
0 commit comments