@@ -143,6 +143,8 @@ struct BuildArgs {
143
143
#[ arg( long) ]
144
144
html : bool ,
145
145
#[ arg( long) ]
146
+ csv : bool ,
147
+ #[ arg( long) ]
146
148
inline : bool ,
147
149
#[ arg( long) ]
148
150
ignore_html_whitespace : bool ,
@@ -175,7 +177,7 @@ fn is_html(s: &str) -> bool {
175
177
s. trim_start ( ) . starts_with ( '<' ) && s. trim_end ( ) . ends_with ( '>' )
176
178
}
177
179
178
- const IGNORE : & [ & str ] = & [
180
+ const IGNORED_KEYS : & [ & str ] = & [
179
181
"doc.flaws" ,
180
182
"blogMeta.readTime" ,
181
183
"doc.modified" ,
@@ -189,9 +191,70 @@ const IGNORE: &[&str] = &[
189
191
"doc.summary" ,
190
192
] ;
191
193
194
+ static SKIP_GLOB_LIST : LazyLock < Vec < & str > > = LazyLock :: new ( || {
195
+ vec ! [
196
+ "docs/mdn/writing_guidelines/" ,
197
+ "docs/mozilla/add-ons/webextensions/" ,
198
+ "docs/mozilla/firefox/releases/" ,
199
+ ]
200
+ } ) ;
201
+
202
+ static ALLOWLIST : LazyLock < HashSet < ( & str , & str ) > > = LazyLock :: new ( || {
203
+ vec ! [
204
+ // Wrong auto-linking of example.com properly escaped link, unfixable in yari
205
+ ( "docs/glossary/http/index.json" , "doc.body.0.value.content" ) ,
206
+ ( "docs/learn/html/multimedia_and_embedding/other_embedding_technologies/index.json" , "doc.body.4.value.content" ) ,
207
+ // Relative link to MDN Playground gets rendered as dead link in yari, correct in rari
208
+ ( "docs/learn/learning_and_getting_help/index.json" , "doc.body.3.value.content" ) ,
209
+ // 'unsupported templ: livesamplelink' in rari, remove when supported
210
+ ( "docs/learn/forms/form_validation/index.json" , "doc.body.12.value.content" ) ,
211
+ ( "docs/mdn/writing_guidelines/page_structures/live_samples/index.json" , "doc.body.9.value.content" ) ,
212
+ // p tag removal in lists
213
+ ( "docs/learn/server-side/express_nodejs/deployment/index.json" , "doc.body.11.value.content" ) ,
214
+ // link element re-structure, better in rari
215
+ ( "docs/learn/common_questions/design_and_accessibility/design_for_all_types_of_users/index.json" , "doc.body.5.value.content" ) ,
216
+ ( "docs/learn/html/multimedia_and_embedding/video_and_audio_content/index.json" , "doc.body.2.value.content" ) ,
217
+ // id changes, no problem
218
+ ( "docs/learn/css/howto/css_faq/index.json" , "doc.body.11.value.id" ) ,
219
+ ( "docs/learn/forms/property_compatibility_table_for_form_controls/index.json" , "doc.body.2.value.content" ) ,
220
+ ( "docs/learn/html/howto/define_terms_with_html/index.json" , "doc.body.0.value.content" ) ,
221
+ ( "docs/learn/tools_and_testing/client-side_javascript_frameworks/react_interactivity_filtering_conditional_rendering/index.json" , "doc.toc.3.id" ) ,
222
+ ( "docs/learn/tools_and_testing/client-side_javascript_frameworks/react_interactivity_filtering_conditional_rendering/index.json" , "doc.body.4.value.id" ) ,
223
+ ( "docs/mdn/mdn_product_advisory_board/index.json" , "doc.body.1.value.content" ) ,
224
+ ( "docs/mdn/writing_guidelines/page_structures/live_samples/index.json" , "doc.body.11.value.content" ) ,
225
+ ( "docs/mdn/writing_guidelines/page_structures/live_samples/index.json" , "doc.body.12.value.content" ) ,
226
+ ( "docs/mdn/writing_guidelines/page_structures/live_samples/index.json" , "doc.body.3.value.content" ) ,
227
+ // absolute to relative link change, no problem
228
+ ( "docs/learn/forms/styling_web_forms/index.json" , "doc.body.10.value.content" ) ,
229
+ ( "docs/mdn/kitchensink/index.json" , "doc.body.24.value.content" ) ,
230
+ // encoding changes, no problem
231
+ ( "docs/learn/html/introduction_to_html/html_text_fundamentals/index.json" , "doc.body.15.value.content" ) ,
232
+ ( "docs/learn/tools_and_testing/client-side_javascript_frameworks/vue_computed_properties/index.json" , "doc.body.1.value.content" ) ,
233
+ ( "docs/learn/tools_and_testing/client-side_javascript_frameworks/react_interactivity_filtering_conditional_rendering/index.json" , "doc.body.4.value.i" ) ,
234
+ ( "docs/mdn/writing_guidelines/page_structures/links/index.json" , "doc.body.3.value.content" ) ,
235
+ ( "docs/mdn/writing_guidelines/page_structures/links/index.json" , "doc.body.4.value.content" ) ,
236
+ ( "docs/mdn/writing_guidelines/page_structures/macros/commonly_used_macros/index.json" , "doc.body.14.value.content" ) ,
237
+ // internal linking fixed in rari
238
+ ( "docs/mdn/community/discussions/index.json" , "doc.body.0.value.content" ) ,
239
+ // baseline change no problem
240
+ ( "docs/mdn/kitchensink/index.json" , "doc.baseline" ) ,
241
+ ( "docs/mdn/writing_guidelines/page_structures/compatibility_tables/index.json" , "doc.baseline" ) ,
242
+ // whitespace changes no problem
243
+ ( "docs/mdn/kitchensink/index.json" , "doc.body.23.value.title" ) ,
244
+ ( "docs/mdn/writing_guidelines/howto/write_an_api_reference/index.json" , "doc.body.8.value.content" ) ,
245
+ ( "docs/mdn/writing_guidelines/page_structures/code_examples/index.json" , "doc.body.7.value.content" ) ,
246
+ // bug in yari
247
+ ( "docs/mdn/writing_guidelines/howto/write_an_api_reference/information_contained_in_a_webidl_file/index.json" , "doc.body.23.value.content" ) ,
248
+ ]
249
+ . into_iter ( )
250
+ . collect ( )
251
+ } ) ;
252
+
192
253
static WS_DIFF : LazyLock < Regex > =
193
254
LazyLock :: new ( || Regex :: new ( r#"(?<x>>)[\n ]+|[\n ]+(?<y></)"# ) . unwrap ( ) ) ;
194
255
256
+ static EMPTY_P_DIFF : LazyLock < Regex > = LazyLock :: new ( || Regex :: new ( r#"<p>[\n ]*</p>"# ) . unwrap ( ) ) ;
257
+
195
258
static DIFF_MAP : LazyLock < Arc < DashMap < String , String > > > =
196
259
LazyLock :: new ( || Arc :: new ( DashMap :: new ( ) ) ) ;
197
260
@@ -204,6 +267,10 @@ fn pre_diff_element_massaging_handlers<'a>() -> Vec<(Cow<'a, Selector>, ElementC
204
267
el. remove_attribute( "data-flaw-src" ) ;
205
268
Ok ( ( ) )
206
269
} ) ,
270
+ element!( "*[data-flaw]" , |el| {
271
+ el. remove_attribute( "data-flaw" ) ;
272
+ Ok ( ( ) )
273
+ } ) ,
207
274
// remove ids from notecards, example-headers, code-examples
208
275
element!( "div.notecard, div.example-header, div.code-example" , |el| {
209
276
el. remove_attribute( "id" ) ;
@@ -215,6 +282,7 @@ fn pre_diff_element_massaging_handlers<'a>() -> Vec<(Cow<'a, Selector>, ElementC
215
282
fn full_diff (
216
283
lhs : & Value ,
217
284
rhs : & Value ,
285
+ file : & str ,
218
286
path : & [ PathIndex ] ,
219
287
diff : & mut BTreeMap < String , String > ,
220
288
fast : bool ,
@@ -227,9 +295,19 @@ fn full_diff(
227
295
}
228
296
}
229
297
}
298
+ let key = make_key ( path) ;
299
+
300
+ if SKIP_GLOB_LIST . iter ( ) . any ( |i| file. starts_with ( i) ) {
301
+ return ;
302
+ }
303
+
304
+ if ALLOWLIST . contains ( & ( file, & key) ) {
305
+ return ;
306
+ }
307
+
230
308
if lhs != rhs {
231
- let key = make_key ( path ) ;
232
- if IGNORE . iter ( ) . any ( |i| key . starts_with ( i ) ) || key == "doc.sidebarHTML" && !sidebars {
309
+ if IGNORED_KEYS . iter ( ) . any ( |i| key. starts_with ( i ) ) || key == "doc.sidebarHTML" && !sidebars
310
+ {
233
311
return ;
234
312
}
235
313
match ( lhs, rhs) {
@@ -241,6 +319,7 @@ fn full_diff(
241
319
full_diff (
242
320
lhs. get ( i) . unwrap_or ( & Value :: Null ) ,
243
321
rhs. get ( i) . unwrap_or ( & Value :: Null ) ,
322
+ file,
244
323
& path,
245
324
diff,
246
325
fast,
@@ -257,6 +336,7 @@ fn full_diff(
257
336
full_diff (
258
337
lhs. get ( key) . unwrap_or ( & Value :: Null ) ,
259
338
rhs. get ( key) . unwrap_or ( & Value :: Null ) ,
339
+ file,
260
340
& path,
261
341
diff,
262
342
fast,
@@ -281,6 +361,8 @@ fn full_diff(
281
361
if is_html ( & lhs) && is_html ( & rhs) {
282
362
let lhs_t = WS_DIFF . replace_all ( & lhs, "$x$y" ) ;
283
363
let rhs_t = WS_DIFF . replace_all ( & rhs, "$x$y" ) ;
364
+ let lhs_t = EMPTY_P_DIFF . replace_all ( & lhs_t, "" ) ;
365
+ let rhs_t = EMPTY_P_DIFF . replace_all ( & rhs_t, "" ) ;
284
366
let lhs_t = rewrite_str (
285
367
& lhs_t,
286
368
RewriteStrSettings {
@@ -360,7 +442,7 @@ fn main() -> Result<(), anyhow::Error> {
360
442
let left = v;
361
443
let right = b. get ( k) . unwrap_or ( & Value :: Null ) ;
362
444
let mut diff = BTreeMap :: new ( ) ;
363
- full_diff ( left, right, & [ ] , & mut diff, arg. fast , arg. sidebars ) ;
445
+ full_diff ( left, right, k , & [ ] , & mut diff, arg. fast , arg. sidebars ) ;
364
446
if !diff. is_empty ( ) {
365
447
return Some ( format ! (
366
448
r#"<li><span>{k}</span><div class="r"><pre><code>{}</code></pre></div></li>"# ,
@@ -412,11 +494,45 @@ fn main() -> Result<(), anyhow::Error> {
412
494
let mut file = File :: create ( & arg. out ) ?;
413
495
file. write_all ( html ( & out. into_iter ( ) . collect :: < String > ( ) ) . as_bytes ( ) ) ?;
414
496
}
497
+ if arg. csv {
498
+ let mut out = Vec :: new ( ) ;
499
+ out. push ( "File;JSON Path\n " . to_string ( ) ) ;
500
+ out. extend (
501
+ a. par_iter ( )
502
+ . filter_map ( |( k, v) | {
503
+ if b. get ( k) == Some ( v) {
504
+ same. fetch_add ( 1 , Relaxed ) ;
505
+ return None ;
506
+ }
507
+
508
+ let left = v;
509
+ let right = b. get ( k) . unwrap_or ( & Value :: Null ) ;
510
+ let mut diff = BTreeMap :: new ( ) ;
511
+ full_diff ( left, right, k, & [ ] , & mut diff, arg. fast , arg. sidebars ) ;
512
+ if !diff. is_empty ( ) {
513
+ return Some ( format ! (
514
+ "{}\n " ,
515
+ diff. into_keys( )
516
+ . map( |jsonpath| format!( "{};{}" , k, jsonpath) )
517
+ . collect:: <Vec <_>>( )
518
+ . join( "\n " )
519
+ ) ) ;
520
+ } else {
521
+ same. fetch_add ( 1 , Relaxed ) ;
522
+ }
523
+ None
524
+ } )
525
+ . collect :: < Vec < _ > > ( ) ,
526
+ ) ;
527
+ let mut file = File :: create ( & arg. out ) ?;
528
+ file. write_all ( out. into_iter ( ) . collect :: < String > ( ) . as_bytes ( ) ) ?;
529
+ }
415
530
416
531
println ! (
417
- "Took: {:?} - {}/{hits}" ,
532
+ "Took: {:?} - {}/{hits} ok, {} remaining " ,
418
533
start. elapsed( ) ,
419
- same. load( Relaxed )
534
+ same. load( Relaxed ) ,
535
+ hits - same. load( Relaxed )
420
536
) ;
421
537
}
422
538
}
0 commit comments