Skip to content

Commit 5b96336

Browse files
committed
Add xgettext command to extract translatable strings
This command is one half of a Gettext-based translation (i18n) workflow. It iterates over each chapter and extracts all translatable text into a `messages.pot` file. The text is split on paragraph boundaries, which helps ensure less churn in the output when the text is edited. The other half of the workflow is a `gettext` command which will take a source Markdown file and a `xx.po` file and output a translated Markdown file. Part of the solution for #5.
1 parent 40c06f5 commit 5b96336

File tree

5 files changed

+155
-0
lines changed

5 files changed

+155
-0
lines changed

Cargo.lock

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ shlex = "1"
3434
tempfile = "3.0"
3535
toml = "0.5.1"
3636
topological-sort = "0.1.0"
37+
polib = "0.1.0"
3738

3839
# Watch feature
3940
notify = { version = "4.0", optional = true }

src/cmd/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@ pub mod serve;
88
pub mod test;
99
#[cfg(feature = "watch")]
1010
pub mod watch;
11+
pub mod xgettext;

src/cmd/xgettext.rs

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
use crate::get_book_dir;
2+
use anyhow::Context;
3+
use clap::{arg, App, ArgMatches};
4+
use lazy_static::lazy_static;
5+
use mdbook::MDBook;
6+
use polib::catalog::Catalog;
7+
use polib::message::Message;
8+
use regex::Regex;
9+
use std::path::Path;
10+
11+
// Create clap subcommand arguments
12+
pub fn make_subcommand<'help>() -> App<'help> {
13+
App::new("xgettext")
14+
.about("Extract translatable strings from all chapters")
15+
.arg(arg!(-o --output [FILE]
16+
"Write output to the specified file. Defaults to `messages.pot`."
17+
))
18+
.arg(arg!([dir]
19+
"Root directory for the book{n}\
20+
(Defaults to the Current Directory when omitted)"
21+
))
22+
}
23+
24+
/// Extract paragraphs from text.
25+
///
26+
/// Paragraphs are separated by at least two newlines. Returns an
27+
/// iterator over line numbers (starting from 1) and paragraphs.
28+
pub fn extract_paragraphs(text: &str) -> impl Iterator<Item = (usize, &str)> {
29+
// TODO: This could be make more sophisticated by parsing the
30+
// Markdown and stripping off the markup characters.
31+
//
32+
// As an example, a header like "## My heading" could become just
33+
// "My heading" in the `.pot` file. Similarly, paragraphs could be
34+
// unfolded and list items could be translated one-by-one.
35+
lazy_static! {
36+
static ref PARAGRAPH_SEPARATOR: Regex = Regex::new(r"\n\n+").unwrap();
37+
}
38+
39+
// Skip over leading empty lines.
40+
let trimmed = text.trim_start_matches('\n');
41+
let mut matches = PARAGRAPH_SEPARATOR.find_iter(trimmed);
42+
let mut lineno = 1 + text.len() - trimmed.len();
43+
let mut last = 0;
44+
45+
std::iter::from_fn(move || match matches.next() {
46+
Some(m) => {
47+
let result = (lineno, &trimmed[last..m.start()]);
48+
lineno += trimmed[last..m.end()].lines().count();
49+
last = m.end();
50+
Some(result)
51+
}
52+
None => {
53+
if last < trimmed.len() {
54+
let result = (lineno, &trimmed[last..]);
55+
last = trimmed.len();
56+
Some(result)
57+
} else {
58+
None
59+
}
60+
}
61+
})
62+
}
63+
64+
// Xgettext command implementation
65+
pub fn execute(args: &ArgMatches) -> mdbook::errors::Result<()> {
66+
let book_dir = get_book_dir(args);
67+
let book = MDBook::load(&book_dir)?;
68+
69+
let mut catalog = Catalog::new();
70+
71+
for item in book.iter() {
72+
match item {
73+
mdbook::BookItem::Chapter(chapter) if !chapter.is_draft_chapter() => {
74+
for (lineno, paragraph) in extract_paragraphs(&chapter.content) {
75+
let source = &chapter
76+
.source_path
77+
.as_ref()
78+
.map(|path| format!("{}:{}", path.to_string_lossy(), lineno))
79+
.unwrap_or_default();
80+
catalog.add_message(Message::new_singular("", source, "", "", &paragraph, ""));
81+
}
82+
}
83+
mdbook::BookItem::PartTitle(part_title) => {
84+
// TODO: would it be better to process SUMMARY.md like
85+
// a normal chapter and split the text by paragraph?
86+
catalog.add_message(Message::new_singular(
87+
"",
88+
"SUMMARY.md",
89+
"",
90+
"",
91+
&part_title,
92+
"",
93+
));
94+
}
95+
_ => {}
96+
}
97+
}
98+
99+
let output_path = Path::new(args.value_of("output").unwrap_or("messages.pot"));
100+
polib::po_file::write(&catalog, output_path)
101+
.with_context(|| format!("Could not write {:?}", output_path))?;
102+
103+
Ok(())
104+
}
105+
106+
#[cfg(test)]
107+
mod tests {
108+
use super::*;
109+
110+
macro_rules! assert_iter_eq {
111+
($left_iter:expr, $right:expr) => {
112+
assert_eq!($left_iter.collect::<Vec<_>>(), $right)
113+
};
114+
}
115+
116+
#[test]
117+
fn test_extract_paragraphs_empty() {
118+
assert_iter_eq!(extract_paragraphs(""), vec![]);
119+
}
120+
121+
#[test]
122+
fn test_extract_paragraphs_single_line() {
123+
assert_iter_eq!(
124+
extract_paragraphs("This is a paragraph."),
125+
vec![(1, "This is a paragraph.")]
126+
);
127+
}
128+
129+
#[test]
130+
fn test_extract_paragraphs_simple() {
131+
assert_iter_eq!(
132+
extract_paragraphs("This is\na paragraph.\n\nNext paragraph."),
133+
vec![(1, "This is\na paragraph."), (4, "Next paragraph.")]
134+
);
135+
}
136+
137+
#[test]
138+
fn test_extract_paragraphs_leading_newlines() {
139+
assert_iter_eq!(
140+
extract_paragraphs("\n\n\nThis is\na paragraph."),
141+
vec![(4, "This is\na paragraph.")]
142+
);
143+
}
144+
}

src/main.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ fn main() {
3434
#[cfg(feature = "serve")]
3535
Some(("serve", sub_matches)) => cmd::serve::execute(sub_matches),
3636
Some(("test", sub_matches)) => cmd::test::execute(sub_matches),
37+
Some(("xgettext", sub_matches)) => cmd::xgettext::execute(sub_matches),
3738
Some(("completions", sub_matches)) => (|| {
3839
let shell: Shell = sub_matches
3940
.value_of("shell")
@@ -76,6 +77,7 @@ fn create_clap_app() -> App<'static> {
7677
.subcommand(cmd::build::make_subcommand())
7778
.subcommand(cmd::test::make_subcommand())
7879
.subcommand(cmd::clean::make_subcommand())
80+
.subcommand(cmd::xgettext::make_subcommand())
7981
.subcommand(
8082
App::new("completions")
8183
.about("Generate shell completions for your shell to stdout")

0 commit comments

Comments
 (0)