|
| 1 | +use crate::get_book_dir; |
| 2 | +use anyhow::Context; |
| 3 | +use clap::{arg, App, ArgMatches}; |
| 4 | +use lazy_static::lazy_static; |
| 5 | +use mdbook::MDBook; |
| 6 | +use polib::catalog::Catalog; |
| 7 | +use polib::message::Message; |
| 8 | +use regex::Regex; |
| 9 | +use std::path::Path; |
| 10 | + |
| 11 | +// Create clap subcommand arguments |
| 12 | +pub fn make_subcommand<'help>() -> App<'help> { |
| 13 | + App::new("xgettext") |
| 14 | + .about("Extract translatable strings from all chapters") |
| 15 | + .arg(arg!(-o --output [FILE] |
| 16 | + "Write output to the specified file. Defaults to `messages.pot`." |
| 17 | + )) |
| 18 | + .arg(arg!([dir] |
| 19 | + "Root directory for the book{n}\ |
| 20 | + (Defaults to the Current Directory when omitted)" |
| 21 | + )) |
| 22 | +} |
| 23 | + |
| 24 | +/// Extract paragraphs from text. |
| 25 | +/// |
| 26 | +/// Paragraphs are separated by at least two newlines. Returns an |
| 27 | +/// iterator over line numbers (starting from 1) and paragraphs. |
| 28 | +pub fn extract_paragraphs(text: &str) -> impl Iterator<Item = (usize, &str)> { |
| 29 | + // TODO: This could be make more sophisticated by parsing the |
| 30 | + // Markdown and stripping off the markup characters. |
| 31 | + // |
| 32 | + // As an example, a header like "## My heading" could become just |
| 33 | + // "My heading" in the `.pot` file. Similarly, paragraphs could be |
| 34 | + // unfolded and list items could be translated one-by-one. |
| 35 | + lazy_static! { |
| 36 | + static ref PARAGRAPH_SEPARATOR: Regex = Regex::new(r"\n\n+").unwrap(); |
| 37 | + } |
| 38 | + |
| 39 | + // Skip over leading empty lines. |
| 40 | + let trimmed = text.trim_start_matches('\n'); |
| 41 | + let mut matches = PARAGRAPH_SEPARATOR.find_iter(trimmed); |
| 42 | + let mut lineno = 1 + text.len() - trimmed.len(); |
| 43 | + let mut last = 0; |
| 44 | + |
| 45 | + std::iter::from_fn(move || match matches.next() { |
| 46 | + Some(m) => { |
| 47 | + let result = (lineno, &trimmed[last..m.start()]); |
| 48 | + lineno += trimmed[last..m.end()].lines().count(); |
| 49 | + last = m.end(); |
| 50 | + Some(result) |
| 51 | + } |
| 52 | + None => { |
| 53 | + if last < trimmed.len() { |
| 54 | + let result = (lineno, &trimmed[last..]); |
| 55 | + last = trimmed.len(); |
| 56 | + Some(result) |
| 57 | + } else { |
| 58 | + None |
| 59 | + } |
| 60 | + } |
| 61 | + }) |
| 62 | +} |
| 63 | + |
| 64 | +// Xgettext command implementation |
| 65 | +pub fn execute(args: &ArgMatches) -> mdbook::errors::Result<()> { |
| 66 | + let book_dir = get_book_dir(args); |
| 67 | + let book = MDBook::load(&book_dir)?; |
| 68 | + |
| 69 | + let mut catalog = Catalog::new(); |
| 70 | + |
| 71 | + for item in book.iter() { |
| 72 | + match item { |
| 73 | + mdbook::BookItem::Chapter(chapter) if !chapter.is_draft_chapter() => { |
| 74 | + for (lineno, paragraph) in extract_paragraphs(&chapter.content) { |
| 75 | + let source = &chapter |
| 76 | + .source_path |
| 77 | + .as_ref() |
| 78 | + .map(|path| format!("{}:{}", path.to_string_lossy(), lineno)) |
| 79 | + .unwrap_or_default(); |
| 80 | + catalog.add_message(Message::new_singular("", source, "", "", ¶graph, "")); |
| 81 | + } |
| 82 | + } |
| 83 | + mdbook::BookItem::PartTitle(part_title) => { |
| 84 | + // TODO: would it be better to process SUMMARY.md like |
| 85 | + // a normal chapter and split the text by paragraph? |
| 86 | + catalog.add_message(Message::new_singular( |
| 87 | + "", |
| 88 | + "SUMMARY.md", |
| 89 | + "", |
| 90 | + "", |
| 91 | + &part_title, |
| 92 | + "", |
| 93 | + )); |
| 94 | + } |
| 95 | + _ => {} |
| 96 | + } |
| 97 | + } |
| 98 | + |
| 99 | + let output_path = Path::new(args.value_of("output").unwrap_or("messages.pot")); |
| 100 | + polib::po_file::write(&catalog, output_path) |
| 101 | + .with_context(|| format!("Could not write {:?}", output_path))?; |
| 102 | + |
| 103 | + Ok(()) |
| 104 | +} |
| 105 | + |
| 106 | +#[cfg(test)] |
| 107 | +mod tests { |
| 108 | + use super::*; |
| 109 | + |
| 110 | + macro_rules! assert_iter_eq { |
| 111 | + ($left_iter:expr, $right:expr) => { |
| 112 | + assert_eq!($left_iter.collect::<Vec<_>>(), $right) |
| 113 | + }; |
| 114 | + } |
| 115 | + |
| 116 | + #[test] |
| 117 | + fn test_extract_paragraphs_empty() { |
| 118 | + assert_iter_eq!(extract_paragraphs(""), vec![]); |
| 119 | + } |
| 120 | + |
| 121 | + #[test] |
| 122 | + fn test_extract_paragraphs_single_line() { |
| 123 | + assert_iter_eq!( |
| 124 | + extract_paragraphs("This is a paragraph."), |
| 125 | + vec![(1, "This is a paragraph.")] |
| 126 | + ); |
| 127 | + } |
| 128 | + |
| 129 | + #[test] |
| 130 | + fn test_extract_paragraphs_simple() { |
| 131 | + assert_iter_eq!( |
| 132 | + extract_paragraphs("This is\na paragraph.\n\nNext paragraph."), |
| 133 | + vec![(1, "This is\na paragraph."), (4, "Next paragraph.")] |
| 134 | + ); |
| 135 | + } |
| 136 | + |
| 137 | + #[test] |
| 138 | + fn test_extract_paragraphs_leading_newlines() { |
| 139 | + assert_iter_eq!( |
| 140 | + extract_paragraphs("\n\n\nThis is\na paragraph."), |
| 141 | + vec![(4, "This is\na paragraph.")] |
| 142 | + ); |
| 143 | + } |
| 144 | +} |
0 commit comments