Skip to content

Add commands for Gettext-based translations i18n #1864

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ shlex = "1"
tempfile = "3.0"
toml = "0.5.1"
topological-sort = "0.1.0"
polib = "0.1.0"

# Watch feature
notify = { version = "4.0", optional = true }
Expand Down
101 changes: 101 additions & 0 deletions src/cmd/gettext.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
use crate::cmd::xgettext::extract_paragraphs;
use crate::get_book_dir;
use crate::utils;
use anyhow::anyhow;
use anyhow::Context;
use clap::{arg, App, Arg, ArgMatches};
use mdbook::book::Chapter;
use mdbook::BookItem;
use mdbook::MDBook;
use polib::catalog::Catalog;
use polib::po_file::parse;
use std::path::Path;

// Create clap subcommand arguments
pub fn make_subcommand<'help>() -> App<'help> {
App::new("gettext")
.about("Output translated book")
.arg(
Arg::new("dest-dir")
.short('d')
.long("dest-dir")
.value_name("dest-dir")
.help(
"Output directory for the translated book{n}\
Relative paths are interpreted relative to the book's root directory{n}\
If omitted, mdBook defaults to `./src/xx` where `xx` is the language of the PO file."
),
)
.arg(arg!(<po> "PO file to generate translation for"))
.arg(arg!([dir]
"Root directory for the book{n}\
(Defaults to the Current Directory when omitted)"
))
}

fn translate(text: &str, catalog: &Catalog) -> String {
let mut output = String::with_capacity(text.len());
let mut current_lineno = 1;

for (lineno, paragraph) in extract_paragraphs(text) {
// Fill in blank lines between paragraphs. This is
// important for code blocks where blank lines can
// be significant.
while current_lineno < lineno {
output.push('\n');
current_lineno += 1;
}
current_lineno += paragraph.lines().count();

let translated = catalog
.find_message(paragraph)
.and_then(|msg| msg.get_msgstr().ok())
.filter(|msgstr| !msgstr.is_empty())
.map(|msgstr| msgstr.as_str())
.unwrap_or(paragraph);
output.push_str(translated);
output.push('\n');
}

output
}

// Gettext command implementation
pub fn execute(args: &ArgMatches) -> mdbook::errors::Result<()> {
let book_dir = get_book_dir(args);
let book = MDBook::load(&book_dir)?;

let po_file = Path::new(args.value_of("po").unwrap());
let lang = po_file
.file_stem()
.ok_or_else(|| anyhow!("Could not determine language from PO file {:?}", po_file))?;
let catalog = parse(po_file)
.map_err(|err| anyhow!(err.to_string()))
.with_context(|| format!("Could not parse PO file {:?}", po_file))?;
let dest_dir = book.root.join(match args.value_of("dest-dir") {
Some(path) => path.into(),
None => Path::new(&book.config.book.src).join(lang),
});

let summary_path = book_dir.join(&book.config.book.src).join("SUMMARY.md");
let summary = std::fs::read_to_string(&summary_path)?;
utils::fs::write_file(
&dest_dir,
"SUMMARY.md",
translate(&summary, &catalog).as_bytes(),
)?;

for item in book.iter() {
if let BookItem::Chapter(Chapter {
content,
path: Some(path),
..
}) = item
{
let output = translate(content, &catalog);
utils::fs::write_file(&dest_dir, path, output.as_bytes())?;
}
}

Ok(())
}
2 changes: 2 additions & 0 deletions src/cmd/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@

pub mod build;
pub mod clean;
pub mod gettext;
pub mod init;
#[cfg(feature = "serve")]
pub mod serve;
pub mod test;
#[cfg(feature = "watch")]
pub mod watch;
pub mod xgettext;
158 changes: 158 additions & 0 deletions src/cmd/xgettext.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
use crate::get_book_dir;
use anyhow::Context;
use clap::{arg, App, ArgMatches};
use lazy_static::lazy_static;
use mdbook::book::Chapter;
use mdbook::{BookItem, Config, MDBook};
use polib::catalog::Catalog;
use polib::message::Message;
use regex::Regex;
use std::path::Path;

// Create clap subcommand arguments
pub fn make_subcommand<'help>() -> App<'help> {
App::new("xgettext")
.about("Extract translatable strings from all chapters")
.arg(arg!(-o --output [FILE]
"Write output to the specified file. Defaults to `messages.pot`."
))
.arg(arg!([dir]
"Root directory for the book{n}\
(Defaults to the Current Directory when omitted)"
))
}

/// Extract paragraphs from text.
///
/// Paragraphs are separated by at least two newlines. Returns an
/// iterator over line numbers (starting from 1) and paragraphs.
pub fn extract_paragraphs(text: &str) -> impl Iterator<Item = (usize, &str)> {
// TODO: This could be make more sophisticated by parsing the
// Markdown and stripping off the markup characters.
//
// As an example, a header like "## My heading" could become just
// "My heading" in the `.pot` file. Similarly, paragraphs could be
// unfolded and list items could be translated one-by-one.
lazy_static! {
static ref PARAGRAPH_SEPARATOR: Regex = Regex::new(r"\n\n+").unwrap();
}

// Skip over leading empty lines.
let trimmed = text.trim_start_matches('\n');
let mut matches = PARAGRAPH_SEPARATOR.find_iter(trimmed);
let mut lineno = 1 + text.len() - trimmed.len();
let mut last = 0;

std::iter::from_fn(move || match matches.next() {
Some(m) => {
let result = (lineno, &trimmed[last..m.start()]);
lineno += trimmed[last..m.end()].lines().count();
last = m.end();
Some(result)
}
None => {
if last < trimmed.len() {
let result = (lineno, trimmed[last..].trim_end_matches('\n'));
last = trimmed.len();
Some(result)
} else {
None
}
}
})
}

/// Split `content` into paragraphs and add them all to `catalog.`
fn add_messages<P: AsRef<Path>>(
config: &Config,
catalog: &mut Catalog,
content: &str,
reference: P,
) {
let path = config.book.src.join(reference.as_ref());
for (lineno, paragraph) in extract_paragraphs(content) {
let source = format!("{}:{}", &path.display(), lineno);
let sources = match catalog.find_message(paragraph) {
Some(msg) => format!("{}\n{}", msg.source, source),
None => source,
};
let message = Message::new_singular("", &sources, "", "", paragraph, "");
// Carefully update the existing message or add a
// new one. It's an error to create a catalog
// duplicate msgids.
match catalog.find_message_index(paragraph) {
Some(&idx) => catalog.update_message_by_index(idx, message).unwrap(),
None => catalog.add_message(message),
}
}
}

// Xgettext command implementation
pub fn execute(args: &ArgMatches) -> mdbook::errors::Result<()> {
let book_dir = get_book_dir(args);
let book = MDBook::load(&book_dir)?;

let mut catalog = Catalog::new();
catalog.metadata.content_type = String::from("text/plain; charset=UTF-8");

let summary_path = book_dir.join(&book.config.book.src).join("SUMMARY.md");
let summary = std::fs::read_to_string(&summary_path)?;
add_messages(&book.config, &mut catalog, &summary, "SUMMARY.md");

for item in book.iter() {
if let BookItem::Chapter(Chapter {
content,
path: Some(path),
..
}) = item
{
add_messages(&book.config, &mut catalog, content, path);
}
}

let output_path = Path::new(args.value_of("output").unwrap_or("messages.pot"));
polib::po_file::write(&catalog, output_path)
.with_context(|| format!("Could not write {:?}", output_path))?;

Ok(())
}

#[cfg(test)]
mod tests {
use super::*;

macro_rules! assert_iter_eq {
($left_iter:expr, $right:expr) => {
assert_eq!($left_iter.collect::<Vec<_>>(), $right)
};
}

#[test]
fn test_extract_paragraphs_empty() {
assert_iter_eq!(extract_paragraphs(""), vec![]);
}

#[test]
fn test_extract_paragraphs_single_line() {
assert_iter_eq!(
extract_paragraphs("This is a paragraph."),
vec![(1, "This is a paragraph.")]
);
}

#[test]
fn test_extract_paragraphs_simple() {
assert_iter_eq!(
extract_paragraphs("This is\na paragraph.\n\nNext paragraph."),
vec![(1, "This is\na paragraph."), (4, "Next paragraph.")]
);
}

#[test]
fn test_extract_paragraphs_leading_newlines() {
assert_iter_eq!(
extract_paragraphs("\n\n\nThis is\na paragraph."),
vec![(4, "This is\na paragraph.")]
);
}
}
4 changes: 4 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ fn main() {
#[cfg(feature = "serve")]
Some(("serve", sub_matches)) => cmd::serve::execute(sub_matches),
Some(("test", sub_matches)) => cmd::test::execute(sub_matches),
Some(("xgettext", sub_matches)) => cmd::xgettext::execute(sub_matches),
Some(("gettext", sub_matches)) => cmd::gettext::execute(sub_matches),
Some(("completions", sub_matches)) => (|| {
let shell: Shell = sub_matches
.value_of("shell")
Expand Down Expand Up @@ -76,6 +78,8 @@ fn create_clap_app() -> App<'static> {
.subcommand(cmd::build::make_subcommand())
.subcommand(cmd::test::make_subcommand())
.subcommand(cmd::clean::make_subcommand())
.subcommand(cmd::xgettext::make_subcommand())
.subcommand(cmd::gettext::make_subcommand())
.subcommand(
App::new("completions")
.about("Generate shell completions for your shell to stdout")
Expand Down