diff --git a/README.md b/README.md index cf3087e..a46dfa2 100644 --- a/README.md +++ b/README.md @@ -102,21 +102,30 @@ tsalign align -p pair.fa -o alignment.toml -c sample_tsa_config --memory-limit 1 tsalign align -p pair.fa -o alignment-no-ts.toml -c sample_tsa_config --memory-limit 1000000000 --no-ts ``` -Then, create the visualisation as follows: +Then, create the visualisation as follows (`-t` is for plain text): ```bash -tsalign show -i alignment.toml -n alignment-no-ts.toml +tsalign show -i alignment.toml -n alignment-no-ts.toml -t ``` -If you want more than just a simple command-line visualisation, use the parameter `-s visualisation.svg` to render the visualisation as SVG. +Note that `-t` supports only simple cases of template switches, and may crash for more complicated cases. +For a more detailed and robust visualisation, use the parameter `-s visualisation.svg` to render the visualisation as SVG. Since SVGs are not always well supported, you can also use the switch `-p` to render the visualisation also as PNG. Note that the `-p` switch requires setting the `-s` parameter. +```bash +tsalign show -i alignment.toml -n alignment-no-ts.toml -ps visualisation.svg +``` + There are some switches to add more detail to the SVG and PNG visualisation. * `-a` adds arrows for the jumps of the alignments * `-c` renders more of the complements of the input sequences +```bash +tsalign show -i alignment.toml -n alignment-no-ts.toml -ps visualisation.svg -ac +``` + ### Setting the alignment range When searching for template switches, researchers are often interested in specific mutation hotspots that are only tens of characters long, while the 2-3-alignment of the template switch may also align outside of the mutation hotspot. diff --git a/lib_tsshow/src/plain_text/mutlipair_alignment_renderer.rs b/lib_tsshow/src/plain_text/mutlipair_alignment_renderer.rs index d24985b..2684f2a 100644 --- a/lib_tsshow/src/plain_text/mutlipair_alignment_renderer.rs +++ b/lib_tsshow/src/plain_text/mutlipair_alignment_renderer.rs @@ -212,6 +212,7 @@ impl do_lowercasing: bool, invert_alignment: bool, ) -> AlignmentRenderResult { + let alignment = alignment.into_iter(); let mut reference_gaps = Vec::new(); let (reference_sequence_name, mut translated_reference_sequence) = self @@ -223,6 +224,14 @@ impl translated_reference_sequence.len() ); trace!("translated_reference_sequence_offset: {rendered_sequence_offset}"); + trace!( + "alignment_length: ({}, {})", + alignment.size_hint().0, + alignment + .size_hint() + .1 + .map_or(String::from("unknown"), |len| len.to_string()) + ); let (query_sequence_name, mut translated_query_sequence) = self.sequences.remove_entry(query_sequence_name).unwrap(); @@ -233,14 +242,14 @@ impl let mut query_offset_column = None; let mut query_limit_column = None; - for alignment_type in alignment.into_iter().map(|alignment_type| { + for alignment_type in alignment.map(|alignment_type| { if invert_alignment { alignment_type.inverted() } else { alignment_type } }) { - //trace!("alignment_type: {alignment_type}"); + trace!("alignment_type: {alignment_type}"); while matches!( translated_reference_sequence @@ -248,7 +257,7 @@ impl .map(Character::kind), Some(CharacterKind::Blank) ) { - //trace!("Skipping blank"); + trace!("Skipping blank"); translated_query_sequence.push(Character::new_blank(blank_data_generator())); index += 1; } @@ -348,7 +357,11 @@ impl } } - assert!(index <= translated_reference_sequence.len()); + assert!( + index <= translated_reference_sequence.len(), + "index: {index}, translated_reference_sequence.len(): {}", + translated_reference_sequence.len(), + ); } assert!(extension.next().is_none()); diff --git a/lib_tsshow/src/svg.rs b/lib_tsshow/src/svg.rs index edeadad..66e1369 100644 --- a/lib_tsshow/src/svg.rs +++ b/lib_tsshow/src/svg.rs @@ -75,7 +75,7 @@ pub fn create_ts_svg( else { return Err(Error::AlignmentHasNoTarget); }; - debug!("Alignment: {alignment:?}"); + debug!("Alignment: {}", alignment.cigar()); let reference = &statistics.sequences.reference; let query = &statistics.sequences.query; diff --git a/lib_tsshow/src/ts_arrangement/inner.rs b/lib_tsshow/src/ts_arrangement/inner.rs index 9d7e390..0435f92 100644 --- a/lib_tsshow/src/ts_arrangement/inner.rs +++ b/lib_tsshow/src/ts_arrangement/inner.rs @@ -56,24 +56,43 @@ impl TsInnerArrangement { ); for ts in template_switches { - trace!("source_inner: {:?}", ts.inner); + trace!( + "source_inner: {:?}", + ts.inner + .iter() + .map(|c| format!("{}", c.source_column())) + .collect::>() + ); + trace!("inner_alignment: {:?}", ts.inner_alignment.cigar()); let (mut sp2_secondary, mut sp3_secondary) = match ts.secondary { TemplateSwitchSecondary::Reference => ( source_arrangement .try_reference_source_to_arrangement_column(ts.sp2_secondary) - .unwrap_or_else(|| source_arrangement.reference().len().into()), + .unwrap_or_else(|| { + trace!("SP2 is at reference end"); + source_arrangement.reference().len().into() + }), source_arrangement .try_reference_source_to_arrangement_column(ts.sp3_secondary) - .unwrap_or_else(|| source_arrangement.reference().len().into()), + .unwrap_or_else(|| { + trace!("SP3 is at reference end"); + source_arrangement.reference().len().into() + }), ), TemplateSwitchSecondary::Query => ( source_arrangement .try_query_source_to_arrangement_column(ts.sp2_secondary) - .unwrap_or_else(|| source_arrangement.query().len().into()), + .unwrap_or_else(|| { + trace!("SP2 is at query end"); + source_arrangement.query().len().into() + }), source_arrangement .try_query_source_to_arrangement_column(ts.sp3_secondary) - .unwrap_or_else(|| source_arrangement.query().len().into()), + .unwrap_or_else(|| { + trace!("SP3 is at query end"); + source_arrangement.query().len().into() + }), ), }; let forward = sp2_secondary < sp3_secondary; diff --git a/lib_tsshow/src/ts_arrangement/source.rs b/lib_tsshow/src/ts_arrangement/source.rs index 108f8ae..7ba22a0 100644 --- a/lib_tsshow/src/ts_arrangement/source.rs +++ b/lib_tsshow/src/ts_arrangement/source.rs @@ -6,7 +6,7 @@ use lib_tsalign::a_star_aligner::{ AlignmentType, TemplateSwitchDirection, TemplateSwitchPrimary, TemplateSwitchSecondary, }, }; -use log::trace; +use log::{debug, trace}; use tagged_vec::TaggedVec; use super::{ @@ -98,6 +98,12 @@ impl TsSourceArrangement { debug_assert_eq!(current_reference_index, current_query_index); while let Some(alignment_type) = alignment.next() { + trace!( + "Source alignment type: {alignment_type}; R/Q indices: {current_reference_index}/{current_query_index} {}/{}", + result.reference_arrangement_to_source_column(current_reference_index), + result.query_arrangement_to_source_column(current_query_index), + ); + match alignment_type { AlignmentType::PrimaryInsertion | AlignmentType::PrimaryFlankInsertion => { result.reference.insert( @@ -213,6 +219,7 @@ impl TsSourceArrangement { current_reference_index: &mut ArrangementColumn, current_query_index: &mut ArrangementColumn, ) -> TemplateSwitch { + debug!("Aligning template switch #{ts_index}"); let sp1_reference = self.reference_arrangement_to_arrangement_char_column(*current_reference_index); let sp1_query = self.query_arrangement_to_arrangement_char_column(*current_query_index); @@ -220,18 +227,29 @@ impl TsSourceArrangement { TemplateSwitchSecondary::Reference => { let source_current_reference_index = self.reference_arrangement_to_source_column(*current_reference_index); + let adjusted_source_current_reference_index = + source_current_reference_index + .checked_sub(self.count_reference_copy_chars_before_next_real_char( + *current_reference_index, + )) + .unwrap(); trace!( - "current_reference_index: {current_reference_index} -> {source_current_reference_index}" + "current_reference_index: {current_reference_index} -> {source_current_reference_index} -> {adjusted_source_current_reference_index}" ); - source_current_reference_index + adjusted_source_current_reference_index } TemplateSwitchSecondary::Query => { let source_current_query_index = self.query_arrangement_to_source_column(*current_query_index); + let adjusted_source_current_query_index = source_current_query_index + .checked_sub( + self.count_query_copy_chars_before_next_real_char(*current_query_index), + ) + .unwrap(); trace!( - "current_query_index: {current_query_index} -> {source_current_query_index}" + "current_query_index: {current_query_index} -> {source_current_query_index} -> {adjusted_source_current_query_index}" ); - source_current_query_index + adjusted_source_current_query_index } } + first_offset; trace!("first_offset: {first_offset}"); @@ -242,21 +260,22 @@ impl TsSourceArrangement { let mut inner_alignment = Alignment::new(); let anti_primary_gap = loop { - match alignment.next() { - Some(AlignmentType::TemplateSwitchExit { anti_primary_gap }) => { + let alignment_type = alignment.next().unwrap_or_else(|| unreachable!()); + trace!("Secondary alignment type: {alignment_type}"); + + match alignment_type { + AlignmentType::TemplateSwitchExit { anti_primary_gap } => { break anti_primary_gap; } - Some(alignment_type @ AlignmentType::SecondaryDeletion) => { + AlignmentType::SecondaryDeletion => { match ts_direction { TemplateSwitchDirection::Forward => sp3_secondary += 1, TemplateSwitchDirection::Reverse => sp3_secondary -= 1, } inner_alignment.push(alignment_type); } - Some( - alignment_type @ (AlignmentType::SecondarySubstitution - | AlignmentType::SecondaryMatch), - ) => { + + AlignmentType::SecondarySubstitution | AlignmentType::SecondaryMatch => { match ts_direction { TemplateSwitchDirection::Forward => sp3_secondary += 1, TemplateSwitchDirection::Reverse => sp3_secondary -= 1, @@ -264,11 +283,11 @@ impl TsSourceArrangement { primary_inner_length += 1; inner_alignment.push(alignment_type); } - Some(alignment_type @ AlignmentType::SecondaryInsertion) => { + AlignmentType::SecondaryInsertion => { primary_inner_length += 1; inner_alignment.push(alignment_type); } - Some(AlignmentType::SecondaryRoot) => { /* Do nothing */ } + AlignmentType::SecondaryRoot => { /* Do nothing */ } _ => unreachable!(), } }; @@ -690,6 +709,26 @@ impl TsSourceArrangement { .nth(column.primitive()) .unwrap() } + + fn count_reference_copy_chars_before_next_real_char(&self, offset: ArrangementColumn) -> usize { + Self::count_copy_chars_before_next_real_char(&self.reference, offset) + } + + fn count_query_copy_chars_before_next_real_char(&self, offset: ArrangementColumn) -> usize { + Self::count_copy_chars_before_next_real_char(&self.query, offset) + } + + fn count_copy_chars_before_next_real_char( + sequence: &TaggedVec, + offset: ArrangementColumn, + ) -> usize { + sequence + .iter_values() + .skip(offset.into()) + .take_while(|c| !c.is_source_char()) + .filter(|c| c.is_char() && c.is_copy()) + .count() + } } impl SourceChar { @@ -808,6 +847,7 @@ impl Char for SourceChar { matches!(self, Self::Blank) } + /// Returns true if this is a source char and not a copy of a source char. fn is_source_char(&self) -> bool { matches!( self, diff --git a/test_files/LINC00271_92.sh b/test_files/LINC00271_92.sh index aa863bc..b62b1ff 100755 --- a/test_files/LINC00271_92.sh +++ b/test_files/LINC00271_92.sh @@ -1,11 +1,11 @@ #!/usr/bin/env bash cargo build -cargo build --release if [ ! -f test_files/LINC00271_92.toml ]; then + cargo build --release target/release/tsalign align -p test_files/LINC00271_92.fa -o test_files/LINC00271_92.toml --alignment-method a-star-template-switch --skip-characters 'N-' --alphabet dna -c test_files/config/bench --ts-node-ord-strategy anti-diagonal --ts-min-length-strategy preprocess-price -k 0 --max-chaining-successors 0 --max-exact-cost-function-cost 0 --chaining-closed-list special --chaining-open-list linear-heap --rq-ranges R196..227Q196..202 target/release/tsalign align -p test_files/LINC00271_92.fa -o test_files/LINC00271_92.no_ts.toml --alignment-method a-star-template-switch --skip-characters 'N-' --alphabet dna -c test_files/config/bench --ts-node-ord-strategy anti-diagonal --ts-min-length-strategy preprocess-price -k 0 --max-chaining-successors 0 --max-exact-cost-function-cost 0 --chaining-closed-list special --chaining-open-list linear-heap --rq-ranges R196..227Q196..202 --no-ts fi -target/debug/tsalign show -i test_files/LINC00271_92.toml -n test_files/LINC00271_92.no_ts.toml -ps test_files/LINC00271_92.svg +target/debug/tsalign show -i test_files/LINC00271_92.toml -n test_files/LINC00271_92.no_ts.toml -ps test_files/LINC00271_92.svg --log-level trace diff --git a/test_files/twin_show_repeated_chars.fa b/test_files/twin_show_repeated_chars.fa new file mode 100644 index 0000000..a21b038 --- /dev/null +++ b/test_files/twin_show_repeated_chars.fa @@ -0,0 +1,14 @@ +>Reference +AAAAAAAAAAAA +TCTCGG +TTTTCCTTTT +TCTC +GGGGCCGGGG +CCCCCCCCCCCC +>Query +AAAAAGGAAAAA +TCTCGG + + + +CCCCCGGCCCCC \ No newline at end of file diff --git a/tsalign/src/show.rs b/tsalign/src/show.rs index 22bd191..52e3bb3 100644 --- a/tsalign/src/show.rs +++ b/tsalign/src/show.rs @@ -4,14 +4,14 @@ use std::{ path::PathBuf, }; -use anyhow::{Context, Result}; +use anyhow::{Context, Result, bail}; use clap::Parser; use lib_tsshow::{ plain_text::show_template_switches, svg::{SvgConfig, create_error_svg, create_ts_svg}, svg_to_png, }; -use log::{LevelFilter, info, warn}; +use log::{LevelFilter, error, info, warn}; use simplelog::{ColorChoice, TermLogger, TerminalMode}; #[derive(Parser)] @@ -40,6 +40,10 @@ pub struct Cli { #[clap(long, short = 'p')] png: bool, + /// Output the template switches in plain text format to stdout. + #[clap(long, short = 't')] + plain_text: bool, + /// Always render the SVG (and optionally PNG), even if an error occurs. In the case of an error, the SVG simply contains the error message. #[clap(long, short = 'r')] render_always: bool, @@ -62,6 +66,11 @@ pub fn cli(cli: Cli) -> Result<()> { ) .unwrap(); + if cli.svg.is_none() && !cli.plain_text { + error!("Neither --svg nor --plain-text is set. Nothing to do."); + bail!("Neither --svg nor --plain-text is set. Nothing to do."); + } + info!("Reading tsalign output toml file {:?}", cli.input); let mut buffer = String::new(); File::open(cli.input) @@ -81,7 +90,9 @@ pub fn cli(cli: Cli) -> Result<()> { toml::from_str(&buffer).unwrap_or_else(|error| panic!("Error parsing input file: {error}")) }); - show_template_switches(stdout(), &result, &no_ts_result)?; + if cli.plain_text { + show_template_switches(stdout(), &result, &no_ts_result)?; + } if let Some(svg_out_path) = cli.svg.as_ref() { let mut svg = Vec::new();