Skip to content

Commit 3ebf64b

Browse files
arglfiji-flo
andauthored
chore(diff): add allowlist support for diffing content (#32)
* add allowlist support for diffing content * more ignored files, added the csv output option for listing differences by file and json key --------- Co-authored-by: Florian Dieminger <[email protected]>
1 parent 80c083a commit 3ebf64b

File tree

1 file changed

+122
-6
lines changed

1 file changed

+122
-6
lines changed

crates/diff-test/src/main.rs

Lines changed: 122 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,8 @@ struct BuildArgs {
143143
#[arg(long)]
144144
html: bool,
145145
#[arg(long)]
146+
csv: bool,
147+
#[arg(long)]
146148
inline: bool,
147149
#[arg(long)]
148150
ignore_html_whitespace: bool,
@@ -175,7 +177,7 @@ fn is_html(s: &str) -> bool {
175177
s.trim_start().starts_with('<') && s.trim_end().ends_with('>')
176178
}
177179

178-
const IGNORE: &[&str] = &[
180+
const IGNORED_KEYS: &[&str] = &[
179181
"doc.flaws",
180182
"blogMeta.readTime",
181183
"doc.modified",
@@ -189,9 +191,70 @@ const IGNORE: &[&str] = &[
189191
"doc.summary",
190192
];
191193

194+
static SKIP_GLOB_LIST: LazyLock<Vec<&str>> = LazyLock::new(|| {
195+
vec![
196+
"docs/mdn/writing_guidelines/",
197+
"docs/mozilla/add-ons/webextensions/",
198+
"docs/mozilla/firefox/releases/",
199+
]
200+
});
201+
202+
static ALLOWLIST: LazyLock<HashSet<(&str, &str)>> = LazyLock::new(|| {
203+
vec![
204+
// Wrong auto-linking of example.com properly escaped link, unfixable in yari
205+
("docs/glossary/http/index.json", "doc.body.0.value.content"),
206+
("docs/learn/html/multimedia_and_embedding/other_embedding_technologies/index.json", "doc.body.4.value.content"),
207+
// Relative link to MDN Playground gets rendered as dead link in yari, correct in rari
208+
("docs/learn/learning_and_getting_help/index.json", "doc.body.3.value.content"),
209+
// 'unsupported templ: livesamplelink' in rari, remove when supported
210+
("docs/learn/forms/form_validation/index.json", "doc.body.12.value.content"),
211+
("docs/mdn/writing_guidelines/page_structures/live_samples/index.json", "doc.body.9.value.content"),
212+
// p tag removal in lists
213+
("docs/learn/server-side/express_nodejs/deployment/index.json", "doc.body.11.value.content"),
214+
// link element re-structure, better in rari
215+
("docs/learn/common_questions/design_and_accessibility/design_for_all_types_of_users/index.json", "doc.body.5.value.content"),
216+
("docs/learn/html/multimedia_and_embedding/video_and_audio_content/index.json", "doc.body.2.value.content"),
217+
// id changes, no problem
218+
("docs/learn/css/howto/css_faq/index.json", "doc.body.11.value.id"),
219+
("docs/learn/forms/property_compatibility_table_for_form_controls/index.json", "doc.body.2.value.content"),
220+
("docs/learn/html/howto/define_terms_with_html/index.json", "doc.body.0.value.content"),
221+
("docs/learn/tools_and_testing/client-side_javascript_frameworks/react_interactivity_filtering_conditional_rendering/index.json", "doc.toc.3.id"),
222+
("docs/learn/tools_and_testing/client-side_javascript_frameworks/react_interactivity_filtering_conditional_rendering/index.json", "doc.body.4.value.id"),
223+
("docs/mdn/mdn_product_advisory_board/index.json", "doc.body.1.value.content"),
224+
("docs/mdn/writing_guidelines/page_structures/live_samples/index.json", "doc.body.11.value.content"),
225+
("docs/mdn/writing_guidelines/page_structures/live_samples/index.json", "doc.body.12.value.content"),
226+
("docs/mdn/writing_guidelines/page_structures/live_samples/index.json", "doc.body.3.value.content"),
227+
// absolute to relative link change, no problem
228+
("docs/learn/forms/styling_web_forms/index.json", "doc.body.10.value.content"),
229+
("docs/mdn/kitchensink/index.json", "doc.body.24.value.content"),
230+
// encoding changes, no problem
231+
("docs/learn/html/introduction_to_html/html_text_fundamentals/index.json", "doc.body.15.value.content"),
232+
("docs/learn/tools_and_testing/client-side_javascript_frameworks/vue_computed_properties/index.json", "doc.body.1.value.content"),
233+
("docs/learn/tools_and_testing/client-side_javascript_frameworks/react_interactivity_filtering_conditional_rendering/index.json", "doc.body.4.value.i"),
234+
("docs/mdn/writing_guidelines/page_structures/links/index.json", "doc.body.3.value.content"),
235+
("docs/mdn/writing_guidelines/page_structures/links/index.json", "doc.body.4.value.content"),
236+
("docs/mdn/writing_guidelines/page_structures/macros/commonly_used_macros/index.json", "doc.body.14.value.content"),
237+
// internal linking fixed in rari
238+
("docs/mdn/community/discussions/index.json", "doc.body.0.value.content"),
239+
// baseline change no problem
240+
("docs/mdn/kitchensink/index.json", "doc.baseline"),
241+
("docs/mdn/writing_guidelines/page_structures/compatibility_tables/index.json", "doc.baseline"),
242+
// whitespace changes no problem
243+
("docs/mdn/kitchensink/index.json", "doc.body.23.value.title"),
244+
("docs/mdn/writing_guidelines/howto/write_an_api_reference/index.json", "doc.body.8.value.content"),
245+
("docs/mdn/writing_guidelines/page_structures/code_examples/index.json", "doc.body.7.value.content"),
246+
// bug in yari
247+
("docs/mdn/writing_guidelines/howto/write_an_api_reference/information_contained_in_a_webidl_file/index.json", "doc.body.23.value.content"),
248+
]
249+
.into_iter()
250+
.collect()
251+
});
252+
192253
static WS_DIFF: LazyLock<Regex> =
193254
LazyLock::new(|| Regex::new(r#"(?<x>>)[\n ]+|[\n ]+(?<y></)"#).unwrap());
194255

256+
static EMPTY_P_DIFF: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"<p>[\n ]*</p>"#).unwrap());
257+
195258
static DIFF_MAP: LazyLock<Arc<DashMap<String, String>>> =
196259
LazyLock::new(|| Arc::new(DashMap::new()));
197260

@@ -204,6 +267,10 @@ fn pre_diff_element_massaging_handlers<'a>() -> Vec<(Cow<'a, Selector>, ElementC
204267
el.remove_attribute("data-flaw-src");
205268
Ok(())
206269
}),
270+
element!("*[data-flaw]", |el| {
271+
el.remove_attribute("data-flaw");
272+
Ok(())
273+
}),
207274
// remove ids from notecards, example-headers, code-examples
208275
element!("div.notecard, div.example-header, div.code-example", |el| {
209276
el.remove_attribute("id");
@@ -215,6 +282,7 @@ fn pre_diff_element_massaging_handlers<'a>() -> Vec<(Cow<'a, Selector>, ElementC
215282
fn full_diff(
216283
lhs: &Value,
217284
rhs: &Value,
285+
file: &str,
218286
path: &[PathIndex],
219287
diff: &mut BTreeMap<String, String>,
220288
fast: bool,
@@ -227,9 +295,19 @@ fn full_diff(
227295
}
228296
}
229297
}
298+
let key = make_key(path);
299+
300+
if SKIP_GLOB_LIST.iter().any(|i| file.starts_with(i)) {
301+
return;
302+
}
303+
304+
if ALLOWLIST.contains(&(file, &key)) {
305+
return;
306+
}
307+
230308
if lhs != rhs {
231-
let key = make_key(path);
232-
if IGNORE.iter().any(|i| key.starts_with(i)) || key == "doc.sidebarHTML" && !sidebars {
309+
if IGNORED_KEYS.iter().any(|i| key.starts_with(i)) || key == "doc.sidebarHTML" && !sidebars
310+
{
233311
return;
234312
}
235313
match (lhs, rhs) {
@@ -241,6 +319,7 @@ fn full_diff(
241319
full_diff(
242320
lhs.get(i).unwrap_or(&Value::Null),
243321
rhs.get(i).unwrap_or(&Value::Null),
322+
file,
244323
&path,
245324
diff,
246325
fast,
@@ -257,6 +336,7 @@ fn full_diff(
257336
full_diff(
258337
lhs.get(key).unwrap_or(&Value::Null),
259338
rhs.get(key).unwrap_or(&Value::Null),
339+
file,
260340
&path,
261341
diff,
262342
fast,
@@ -281,6 +361,8 @@ fn full_diff(
281361
if is_html(&lhs) && is_html(&rhs) {
282362
let lhs_t = WS_DIFF.replace_all(&lhs, "$x$y");
283363
let rhs_t = WS_DIFF.replace_all(&rhs, "$x$y");
364+
let lhs_t = EMPTY_P_DIFF.replace_all(&lhs_t, "");
365+
let rhs_t = EMPTY_P_DIFF.replace_all(&rhs_t, "");
284366
let lhs_t = rewrite_str(
285367
&lhs_t,
286368
RewriteStrSettings {
@@ -360,7 +442,7 @@ fn main() -> Result<(), anyhow::Error> {
360442
let left = v;
361443
let right = b.get(k).unwrap_or(&Value::Null);
362444
let mut diff = BTreeMap::new();
363-
full_diff(left, right, &[], &mut diff, arg.fast, arg.sidebars);
445+
full_diff(left, right, k, &[], &mut diff, arg.fast, arg.sidebars);
364446
if !diff.is_empty() {
365447
return Some(format!(
366448
r#"<li><span>{k}</span><div class="r"><pre><code>{}</code></pre></div></li>"#,
@@ -412,11 +494,45 @@ fn main() -> Result<(), anyhow::Error> {
412494
let mut file = File::create(&arg.out)?;
413495
file.write_all(html(&out.into_iter().collect::<String>()).as_bytes())?;
414496
}
497+
if arg.csv {
498+
let mut out = Vec::new();
499+
out.push("File;JSON Path\n".to_string());
500+
out.extend(
501+
a.par_iter()
502+
.filter_map(|(k, v)| {
503+
if b.get(k) == Some(v) {
504+
same.fetch_add(1, Relaxed);
505+
return None;
506+
}
507+
508+
let left = v;
509+
let right = b.get(k).unwrap_or(&Value::Null);
510+
let mut diff = BTreeMap::new();
511+
full_diff(left, right, k, &[], &mut diff, arg.fast, arg.sidebars);
512+
if !diff.is_empty() {
513+
return Some(format!(
514+
"{}\n",
515+
diff.into_keys()
516+
.map(|jsonpath| format!("{};{}", k, jsonpath))
517+
.collect::<Vec<_>>()
518+
.join("\n")
519+
));
520+
} else {
521+
same.fetch_add(1, Relaxed);
522+
}
523+
None
524+
})
525+
.collect::<Vec<_>>(),
526+
);
527+
let mut file = File::create(&arg.out)?;
528+
file.write_all(out.into_iter().collect::<String>().as_bytes())?;
529+
}
415530

416531
println!(
417-
"Took: {:?} - {}/{hits}",
532+
"Took: {:?} - {}/{hits} ok, {} remaining",
418533
start.elapsed(),
419-
same.load(Relaxed)
534+
same.load(Relaxed),
535+
hits - same.load(Relaxed)
420536
);
421537
}
422538
}

0 commit comments

Comments
 (0)