Skip to content

Commit b6975b3

Browse files
committed
Refactor BAM cache path helper
1 parent 426a917 commit b6975b3

File tree

5 files changed

+187
-62
lines changed

5 files changed

+187
-62
lines changed

docs/coverm-contig.html

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,12 @@ <h1>OUTPUT</h1>
401401
</dl>
402402
<!-- -->
403403
<dl>
404+
<dt><strong>--bam-file-cache-names</strong> <em>FILE</em>...</dt>
405+
<dd><p>Output BAM files generated during alignment to these files. The order of files should correspond to: single-ended reads (-s), -1/-2, --coupled, --interleaved. [default: not used]</p>
406+
</dd>
407+
</dl>
408+
<!-- -->
409+
<dl>
404410
<dt><strong>--discard-unmapped</strong></dt>
405411
<dd><p>Exclude unmapped reads from cached BAM files. [default: not set]</p>
406412
</dd>

docs/coverm-genome.html

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -582,6 +582,12 @@ <h1>OUTPUT</h1>
582582
</dl>
583583
<!-- -->
584584
<dl>
585+
<dt><strong>--bam-file-cache-names</strong> <em>FILE</em>...</dt>
586+
<dd><p>Output BAM files generated during alignment to these files. The order of files should correspond to: single-ended reads (-s), -1/-2, --coupled, --interleaved. [default: not used]</p>
587+
</dd>
588+
</dl>
589+
<!-- -->
590+
<dl>
585591
<dt><strong>--discard-unmapped</strong></dt>
586592
<dd><p>Exclude unmapped reads from cached BAM files. [default: not set]</p>
587593
</dd>

src/bin/coverm.rs

Lines changed: 96 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1354,24 +1354,7 @@ where
13541354
Some(prev) => Some(format!("{prev}|{reference_name}")),
13551355
None => Some(reference_name),
13561356
};
1357-
let bam_file_cache = |naming_readset| -> Option<String> {
1358-
let bam_file_cache_path;
1359-
match m.contains_id("bam-file-cache-directory") {
1360-
false => None,
1361-
true => {
1362-
bam_file_cache_path = generate_cached_bam_file_name(
1363-
m.get_one::<String>("bam-file-cache-directory").unwrap(),
1364-
match reference_tempfile {
1365-
Some(_) => CONCATENATED_REFERENCE_CACHE_STEM,
1366-
None => reference,
1367-
},
1368-
naming_readset,
1369-
);
1370-
info!("Caching BAM file to {bam_file_cache_path}");
1371-
Some(bam_file_cache_path)
1372-
}
1373-
}
1374-
};
1357+
let mut bam_file_cache = build_bam_file_cache_fn(m, reference_tempfile, reference);
13751358

13761359
for p in reference_wise_params {
13771360
bam_readers.push(
@@ -1382,7 +1365,9 @@ where
13821365
p.read2,
13831366
p.read_format.clone(),
13841367
p.threads,
1385-
bam_file_cache(p.read1).as_ref().map(String::as_ref),
1368+
bam_file_cache(p.read1, p.read2)
1369+
.as_ref()
1370+
.map(String::as_ref),
13861371
discard_unmapped,
13871372
p.mapping_options,
13881373
),
@@ -1432,24 +1417,7 @@ fn get_streamed_bam_readers(
14321417
let index = setup_mapping_index(&reference_wise_params, m, mapping_program);
14331418

14341419
let reference = reference_wise_params.reference;
1435-
let bam_file_cache = |naming_readset| -> Option<String> {
1436-
let bam_file_cache_path;
1437-
match m.contains_id("bam-file-cache-directory") {
1438-
false => None,
1439-
true => {
1440-
bam_file_cache_path = generate_cached_bam_file_name(
1441-
m.get_one::<String>("bam-file-cache-directory").unwrap(),
1442-
match reference_tempfile {
1443-
Some(_) => CONCATENATED_REFERENCE_CACHE_STEM,
1444-
None => reference,
1445-
},
1446-
naming_readset,
1447-
);
1448-
info!("Caching BAM file to {bam_file_cache_path}");
1449-
Some(bam_file_cache_path)
1450-
}
1451-
}
1452-
};
1420+
let mut bam_file_cache = build_bam_file_cache_fn(m, reference_tempfile, reference);
14531421

14541422
for p in reference_wise_params {
14551423
bam_readers.push(
@@ -1460,7 +1428,9 @@ fn get_streamed_bam_readers(
14601428
p.read2,
14611429
p.read_format.clone(),
14621430
p.threads,
1463-
bam_file_cache(p.read1).as_ref().map(String::as_ref),
1431+
bam_file_cache(p.read1, p.read2)
1432+
.as_ref()
1433+
.map(String::as_ref),
14641434
discard_unmapped,
14651435
p.mapping_options,
14661436
reference_tempfile.is_none(),
@@ -1580,6 +1550,90 @@ fn setup_bam_cache_directory(cache_directory: &str) {
15801550
}
15811551
}
15821552

1553+
fn build_cache_name_iter(m: &clap::ArgMatches) -> Option<std::vec::IntoIter<String>> {
1554+
if m.contains_id("bam-file-cache-names") {
1555+
let names: Vec<String> = m
1556+
.get_many::<String>("bam-file-cache-names")
1557+
.unwrap()
1558+
.map(|s| s.to_string())
1559+
.collect();
1560+
1561+
let single_count = m.get_many::<String>("single").map(|v| v.len()).unwrap_or(0);
1562+
let read1_count = m.get_many::<String>("read1").map(|v| v.len()).unwrap_or(0);
1563+
let coupled_count = m
1564+
.get_many::<String>("coupled")
1565+
.map(|v| v.len() / 2)
1566+
.unwrap_or(0);
1567+
let interleaved_count = m
1568+
.get_many::<String>("interleaved")
1569+
.map(|v| v.len())
1570+
.unwrap_or(0);
1571+
let expected = single_count + read1_count + coupled_count + interleaved_count;
1572+
if names.len() != expected {
1573+
error!(
1574+
"--bam-file-cache-names specified {names_len} names but {expected} read sets were provided",
1575+
names_len = names.len()
1576+
);
1577+
process::exit(1);
1578+
}
1579+
1580+
let mut idx = 0;
1581+
let single_names = names[idx..idx + single_count].to_vec();
1582+
idx += single_count;
1583+
let read1_names = names[idx..idx + read1_count].to_vec();
1584+
idx += read1_count;
1585+
let coupled_names = names[idx..idx + coupled_count].to_vec();
1586+
idx += coupled_count;
1587+
let interleaved_names = names[idx..idx + interleaved_count].to_vec();
1588+
1589+
let mut iter_order = Vec::new();
1590+
iter_order.extend(read1_names);
1591+
iter_order.extend(coupled_names);
1592+
iter_order.extend(interleaved_names);
1593+
iter_order.extend(single_names);
1594+
Some(iter_order.into_iter())
1595+
} else {
1596+
None
1597+
}
1598+
}
1599+
1600+
fn build_bam_file_cache_fn<'a>(
1601+
m: &'a clap::ArgMatches,
1602+
reference_tempfile: &'a Option<NamedTempFile>,
1603+
reference: &'a str,
1604+
) -> impl FnMut(&str, Option<&str>) -> Option<String> + 'a {
1605+
let mut bam_cache_name_iter = build_cache_name_iter(m);
1606+
move |read1: &str, read2: Option<&str>| -> Option<String> {
1607+
if let Some(iter) = bam_cache_name_iter.as_mut() {
1608+
let name = iter.next().unwrap_or_else(|| {
1609+
error!("Not enough BAM file cache names specified");
1610+
process::exit(1);
1611+
});
1612+
match read2 {
1613+
Some(r2) => info!("Caching BAM file to {name} for readset {read1} {r2}"),
1614+
None => info!("Caching BAM file to {name} for readset {read1}"),
1615+
}
1616+
Some(name)
1617+
} else if m.contains_id("bam-file-cache-directory") {
1618+
let path = generate_cached_bam_file_name(
1619+
m.get_one::<String>("bam-file-cache-directory").unwrap(),
1620+
match reference_tempfile {
1621+
Some(_) => CONCATENATED_REFERENCE_CACHE_STEM,
1622+
None => reference,
1623+
},
1624+
read1,
1625+
);
1626+
match read2 {
1627+
Some(r2) => info!("Caching BAM file to {path} for readset {read1} {r2}"),
1628+
None => info!("Caching BAM file to {path} for readset {read1}"),
1629+
}
1630+
Some(path)
1631+
} else {
1632+
None
1633+
}
1634+
}
1635+
}
1636+
15831637
fn get_streamed_filtered_bam_readers(
15841638
m: &clap::ArgMatches,
15851639
mapping_program: MappingProgram,
@@ -1599,24 +1653,7 @@ fn get_streamed_filtered_bam_readers(
15991653
let index = setup_mapping_index(&reference_wise_params, m, mapping_program);
16001654

16011655
let reference = reference_wise_params.reference;
1602-
let bam_file_cache = |naming_readset| -> Option<String> {
1603-
let bam_file_cache_path;
1604-
match m.contains_id("bam-file-cache-directory") {
1605-
false => None,
1606-
true => {
1607-
bam_file_cache_path = generate_cached_bam_file_name(
1608-
m.get_one::<String>("bam-file-cache-directory").unwrap(),
1609-
match reference_tempfile {
1610-
Some(_) => CONCATENATED_REFERENCE_CACHE_STEM,
1611-
None => reference,
1612-
},
1613-
naming_readset,
1614-
);
1615-
info!("Caching BAM file to {bam_file_cache_path}");
1616-
Some(bam_file_cache_path)
1617-
}
1618-
}
1619-
};
1656+
let mut bam_file_cache = build_bam_file_cache_fn(m, reference_tempfile, reference);
16201657

16211658
for p in reference_wise_params {
16221659
bam_readers.push(
@@ -1627,7 +1664,9 @@ fn get_streamed_filtered_bam_readers(
16271664
p.read2,
16281665
p.read_format.clone(),
16291666
p.threads,
1630-
bam_file_cache(p.read1).as_ref().map(String::as_ref),
1667+
bam_file_cache(p.read1, p.read2)
1668+
.as_ref()
1669+
.map(String::as_ref),
16311670
filter_params.flag_filters.clone(),
16321671
filter_params.min_aligned_length_single,
16331672
filter_params.min_percent_identity_single,

src/cli.rs

Lines changed: 44 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -605,6 +605,13 @@ pub fn contig_full_help() -> Manual {
605605
[default: not used]",
606606
),
607607
)
608+
.option(
609+
Opt::new("FILE")
610+
.long("--bam-file-cache-names")
611+
.help(
612+
"Output BAM files generated during alignment to these files. The order of files should correspond to: single-ended reads (-s), -1/-2, --coupled, --interleaved. [default: not used]",
613+
),
614+
)
608615
.flag(
609616
Flag::new()
610617
.long("--discard-unmapped")
@@ -847,6 +854,13 @@ pub fn genome_full_help() -> Manual {
847854
[default: not used]",
848855
),
849856
)
857+
.option(
858+
Opt::new("FILE")
859+
.long("--bam-file-cache-names")
860+
.help(
861+
"Output BAM files generated during alignment to these files. The order of files should correspond to: single-ended reads (-s), -1/-2, --coupled, --interleaved. [default: not used]",
862+
),
863+
)
850864
.flag(
851865
Flag::new()
852866
.long("--discard-unmapped")
@@ -1163,7 +1177,20 @@ Ben J. Woodcroft <benjwoodcroft near gmail.com>
11631177
.arg(
11641178
Arg::new("bam-file-cache-directory")
11651179
.long("bam-file-cache-directory")
1166-
.conflicts_with("bam-files"),
1180+
.conflicts_with("bam-files")
1181+
.conflicts_with("bam-file-cache-names"),
1182+
)
1183+
.arg(
1184+
Arg::new("bam-file-cache-names")
1185+
.long("bam-file-cache-names")
1186+
.action(clap::ArgAction::Append)
1187+
.num_args(1..)
1188+
.conflicts_with("bam-files")
1189+
.conflicts_with("bam-file-cache-directory"),
1190+
)
1191+
.group(
1192+
ArgGroup::new("bam-file-cache")
1193+
.args(["bam-file-cache-directory", "bam-file-cache-names"]),
11671194
)
11681195
.arg(
11691196
Arg::new("threads")
@@ -1215,7 +1242,7 @@ Ben J. Woodcroft <benjwoodcroft near gmail.com>
12151242
.arg(
12161243
Arg::new("discard-unmapped")
12171244
.long("discard-unmapped")
1218-
.requires("bam-file-cache-directory")
1245+
.requires("bam-file-cache")
12191246
.action(clap::ArgAction::SetTrue),
12201247
)
12211248
.arg(
@@ -1684,7 +1711,20 @@ Ben J. Woodcroft <benjwoodcroft near gmail.com>
16841711
.arg(
16851712
Arg::new("bam-file-cache-directory")
16861713
.long("bam-file-cache-directory")
1687-
.conflicts_with("bam-files"),
1714+
.conflicts_with("bam-files")
1715+
.conflicts_with("bam-file-cache-names"),
1716+
)
1717+
.arg(
1718+
Arg::new("bam-file-cache-names")
1719+
.long("bam-file-cache-names")
1720+
.action(clap::ArgAction::Append)
1721+
.num_args(1..)
1722+
.conflicts_with("bam-files")
1723+
.conflicts_with("bam-file-cache-directory"),
1724+
)
1725+
.group(
1726+
ArgGroup::new("bam-file-cache")
1727+
.args(["bam-file-cache-directory", "bam-file-cache-names"]),
16881728
)
16891729
.arg(
16901730
Arg::new("threads")
@@ -1735,7 +1775,7 @@ Ben J. Woodcroft <benjwoodcroft near gmail.com>
17351775
.arg(
17361776
Arg::new("discard-unmapped")
17371777
.long("discard-unmapped")
1738-
.requires("bam-file-cache-directory")
1778+
.requires("bam-file-cache")
17391779
.action(clap::ArgAction::SetTrue),
17401780
)
17411781
.arg(

tests/test_cmdline.rs

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,12 @@ mod tests {
428428

429429
#[test]
430430
fn test_unwriteable_cache_bam_files() {
431+
let td = tempfile::TempDir::new().unwrap();
432+
let dir = td.path().to_path_buf();
433+
let mut perms = std::fs::metadata(&dir).unwrap().permissions();
434+
perms.set_readonly(true);
435+
std::fs::set_permissions(&dir, perms).unwrap();
436+
431437
Assert::main_binary()
432438
.with_args(&[
433439
"contig",
@@ -437,10 +443,38 @@ mod tests {
437443
"--reference",
438444
"tests/data/7seqs.fna",
439445
"--bam-file-cache-directory",
440-
"/",
446+
dir.to_str().unwrap(),
441447
])
442448
.fails()
443449
.unwrap();
450+
451+
let mut perms = std::fs::metadata(&dir).unwrap().permissions();
452+
perms.set_readonly(false);
453+
std::fs::set_permissions(&dir, perms).unwrap();
454+
}
455+
456+
#[test]
457+
fn test_cache_bam_files_names() {
458+
let td = tempfile::TempDir::new().unwrap();
459+
let name = td.path().join("cached.bam");
460+
Assert::main_binary()
461+
.with_args(&[
462+
"contig",
463+
"--coupled",
464+
"tests/data/reads_for_seq1_and_seq2.1.fq.gz",
465+
"tests/data/reads_for_seq1_and_seq2.2.fq.gz",
466+
"--output-format",
467+
"sparse",
468+
"--reference",
469+
"tests/data/7seqs.fna",
470+
"-p",
471+
"minimap2-sr",
472+
"--bam-file-cache-names",
473+
name.to_str().unwrap(),
474+
])
475+
.succeeds()
476+
.unwrap();
477+
assert!(name.is_file());
444478
}
445479

446480
#[test]

0 commit comments

Comments
 (0)