Skip to content

Commit dc37714

Browse files
committed
add tool to "enforce" semantic line breaks
See #1132
1 parent e68dfb8 commit dc37714

File tree

4 files changed

+327
-0
lines changed

4 files changed

+327
-0
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ book
44
book.toml
55

66
ci/date-check/target/
7+
ci/semantic-line-breaks/target/
78

89
# Generated by check-in.sh
910
pulls.json

ci/semantic-line-breaks/Cargo.lock

+193
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

ci/semantic-line-breaks/Cargo.toml

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
[package]
2+
name = "semantic-line-breaks"
3+
version = "0.0.0"
4+
edition = "2021"
5+
6+
[dependencies]
7+
anyhow = "1"
8+
ignore = "0.4"
9+
10+
[dependencies.regex]
11+
version = "1"
12+
features = ["pattern"]

ci/semantic-line-breaks/src/main.rs

+121
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
use std::{env, fs, process};
2+
3+
use anyhow::Result;
4+
use ignore::Walk;
5+
use regex::Regex;
6+
7+
fn main() -> Result<()> {
8+
let mut args = env::args();
9+
if args.len() == 1 {
10+
eprintln!("error: expected root Markdown directory as CLI argument");
11+
process::exit(1);
12+
}
13+
let root_dir = args.nth(1).unwrap();
14+
for result in Walk::new(root_dir) {
15+
let entry = result?;
16+
if entry.file_type().expect("no stdin").is_dir() {
17+
continue;
18+
}
19+
let path = entry.path();
20+
if let Some(extension) = path.extension() {
21+
if extension != "md" {
22+
continue;
23+
}
24+
} else {
25+
continue;
26+
}
27+
let old = fs::read_to_string(path)?;
28+
let new = comply(&old)?;
29+
if new != old {
30+
fs::write(path, new)?;
31+
}
32+
}
33+
Ok(())
34+
}
35+
36+
fn comply(content: &str) -> Result<String> {
37+
let content: Vec<_> = content.lines().map(|line| line.to_owned()).collect();
38+
let mut new_content = content.clone();
39+
let mut new_n = 0;
40+
let mut in_code_block = false;
41+
let split_re = Regex::new(r"(\.|\?|;|!)\s+")?;
42+
let ignore_re = Regex::new(r"(\d\.|\-|\*|r\?)\s+")?;
43+
for (n, line) in content.iter().enumerate() {
44+
if n != 0 {
45+
new_n += 1;
46+
}
47+
if ignore_re.is_match(line) {
48+
continue;
49+
}
50+
let line = line.trim_end();
51+
if line.is_empty() {
52+
continue;
53+
}
54+
// not eol
55+
if line.contains("e.g.") {
56+
continue;
57+
}
58+
// not eol
59+
if line.contains("i.e.") {
60+
continue;
61+
}
62+
// tables
63+
if line.contains(" | ") {
64+
continue;
65+
}
66+
// code blocks
67+
if line.starts_with("```") {
68+
if in_code_block {
69+
in_code_block = false;
70+
} else {
71+
in_code_block = true;
72+
continue;
73+
}
74+
}
75+
if in_code_block {
76+
continue;
77+
}
78+
if split_re.is_match(line) {
79+
let indent = line.find(|ch: char| !ch.is_whitespace()).unwrap();
80+
let new_lines: Vec<_> = line
81+
.split_inclusive(&split_re)
82+
.map(|portion| format!("{:indent$}{}", "", portion.trim()))
83+
.collect();
84+
new_content.splice(new_n..new_n + 1, new_lines.clone());
85+
new_n += new_lines.len() - 1;
86+
}
87+
}
88+
Ok(new_content.join("\n") + "\n")
89+
}
90+
91+
#[test]
92+
fn test() {
93+
let original = "\
94+
# some heading
95+
96+
must! be; split? now.
97+
1. ignore numbered
98+
ignore | tables
99+
ignore e.g. and i.e. for realsies
100+
```
101+
some code. block
102+
```
103+
some more text.
104+
";
105+
let reformatted = "\
106+
# some heading
107+
108+
must!
109+
be;
110+
split?
111+
now.
112+
1. ignore numbered
113+
ignore | tables
114+
ignore e.g. and i.e. for realsies
115+
```
116+
some code. block
117+
```
118+
some more text.
119+
";
120+
assert_eq!(comply(original).unwrap(), reformatted);
121+
}

0 commit comments

Comments
 (0)