vg
tools for working with variation graphs
Public Types | Public Member Functions | Public Attributes | Protected Member Functions | Static Protected Member Functions | Protected Attributes | List of all members
vg::Surjector Class Reference

#include <surjector.hpp>

Inheritance diagram for vg::Surjector:
vg::AlignerClient

Public Types

using path_chunk_t = pair< pair< string::const_iterator, string::const_iterator >, Path >
 a local type that represents a read interval matched to a portion of the alignment path More...
 

Public Member Functions

 Surjector (const PathPositionHandleGraph *graph)
 
Alignment surject (const Alignment &source, const unordered_set< path_handle_t > &paths, string &path_name_out, int64_t &path_pos_out, bool &path_rev_out, bool allow_negative_scores=false, bool preserve_deletions=false) const
 
Alignment surject (const Alignment &source, const unordered_set< path_handle_t > &paths, bool allow_negative_scores=false, bool preserve_deletions=false) const
 
multipath_alignment_t surject (const multipath_alignment_t &source, const unordered_set< path_handle_t > &paths, string &path_name_out, int64_t &path_pos_out, bool &path_rev_out, bool allow_negative_scores=false, bool preserve_deletions=false) const
 
- Public Member Functions inherited from vg::AlignerClient
void set_alignment_scores (int8_t match, int8_t mismatch, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus)
 Set all the aligner scoring parameters and create the stored aligner instances. More...
 
void set_alignment_scores (std::istream &matrix_stream, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus)
 
void set_alignment_scores (const int8_t *score_matrix, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus)
 

Public Attributes

int64_t min_splice_length = 20
 the minimum length deletion that the spliced algorithm will interpret as a splice event More...
 
int64_t dominated_path_chunk_diff = 10
 
- Public Attributes inherited from vg::AlignerClient
bool adjust_alignments_for_base_quality = false
 

Protected Member Functions

void surject_internal (const Alignment *source_aln, const multipath_alignment_t *source_mp_aln, Alignment *aln_out, multipath_alignment_t *mp_aln_out, const unordered_set< path_handle_t > &paths, string &path_name_out, int64_t &path_pos_out, bool &path_rev_out, bool allow_negative_scores, bool preserve_deletions) const
 
Alignment realigning_surject (const PathPositionHandleGraph *graph, const Alignment &source, const path_handle_t &path_handle, const vector< path_chunk_t > &path_chunks, pair< step_handle_t, step_handle_t > &path_range_out, bool allow_negative_scores, bool preserve_N_alignments=false, bool preserve_tail_indel_anchors=false) const
 
multipath_alignment_t spliced_surject (const PathPositionHandleGraph *path_position_graph, const string &src_sequence, const string &src_quality, const int32_t src_mapping_quality, const path_handle_t &path_handle, vector< path_chunk_t > &path_chunks, vector< pair< step_handle_t, step_handle_t >> &ref_chunks, vector< tuple< size_t, size_t, int32_t >> &connections, pair< step_handle_t, step_handle_t > &path_range_out, bool allow_negative_scores, bool deletions_as_splices) const
 
unordered_map< path_handle_t, pair< vector< path_chunk_t >, vector< pair< step_handle_t, step_handle_t > > > > extract_overlapping_paths (const PathPositionHandleGraph *graph, const Alignment &source, const unordered_set< path_handle_t > &surjection_paths) const
 get the chunks of the alignment path that follow the given reference paths More...
 
unordered_map< path_handle_t, pair< vector< path_chunk_t >, vector< pair< step_handle_t, step_handle_t > > > > extract_overlapping_paths (const PathPositionHandleGraph *graph, const multipath_alignment_t &source, const unordered_set< path_handle_t > &surjection_paths, unordered_map< path_handle_t, vector< tuple< size_t, size_t, int32_t >>> &connections_out) const
 same semantics except for a multipath alignment More...
 
void filter_redundant_path_chunks (vector< path_chunk_t > &path_chunks, vector< pair< step_handle_t, step_handle_t >> &ref_chunks, vector< tuple< size_t, size_t, int32_t >> &connections) const
 
pair< size_t, size_t > compute_path_interval (const PathPositionHandleGraph *graph, const Alignment &source, path_handle_t path_handle, const vector< path_chunk_t > &path_chunks) const
 compute the widest interval of path positions that the realigned sequence could align to More...
 
unordered_map< id_t, pair< id_t, bool > > extract_linearized_path_graph (const PathPositionHandleGraph *graph, MutableHandleGraph *into, path_handle_t path_handle, size_t first, size_t last) const
 make a linear graph that corresponds to a path interval, possibly duplicating nodes in case of cycles More...
 
void set_path_position (const PathPositionHandleGraph *graph, const pos_t &init_surj_pos, const pos_t &final_surj_pos, const step_handle_t &range_begin, const step_handle_t &range_end, string &path_name_out, int64_t &path_pos_out, bool &path_rev_out) const
 use the graph position bounds and the path range bounds to assign a path position to a surjected read More...
 
vector< vector< size_t > > reverse_adjacencies (const vector< vector< size_t >> &adj) const
 reverses an adjacency list More...
 
vector< size_t > connected_components (const vector< vector< size_t >> &adj, const vector< vector< size_t >> &rev_adj, size_t *num_comps_out) const
 
vector< vector< size_t > > transitive_reduction (const vector< vector< size_t >> &adj) const
 returns the transitive reduction of a topologically sorted DAG's adjacency list More...
 
vector< vector< size_t > > remove_dominated_chunks (const string &src_sequence, const vector< vector< size_t >> &adj, vector< path_chunk_t > &path_chunks, vector< pair< step_handle_t, step_handle_t >> &ref_chunks, vector< tuple< size_t, size_t, int32_t >> &connections) const
 eliminate any path chunks that have the exact same colinearities as another but are much shorter More...
 
vector< pair< vector< size_t >, vector< size_t > > > find_constriction_bicliques (const vector< vector< size_t >> &adj, const string &src_sequence, const vector< path_chunk_t > &path_chunks, const vector< tuple< size_t, size_t, int32_t >> &connections) const
 
void prune_unconnectable (vector< vector< size_t >> &adj, vector< vector< tuple< size_t, int32_t, bool >>> &splice_adj, vector< size_t > &component, vector< vector< size_t >> &comp_groups, vector< path_chunk_t > &path_chunks, vector< pair< step_handle_t, step_handle_t >> &ref_chunks) const
 
- Protected Member Functions inherited from vg::AlignerClient
 AlignerClient (double gc_content_estimate=vg::default_gc_content)
 
const GSSWAlignerget_aligner (bool have_qualities=true) const
 
const QualAdjAlignerget_qual_adj_aligner () const
 
const Alignerget_regular_aligner () const
 

Static Protected Member Functions

static Alignment make_null_alignment (const Alignment &source)
 make a sentinel meant to indicate an unmapped read More...
 
static multipath_alignment_t make_null_mp_alignment (const multipath_alignment_t &source)
 

Protected Attributes

const PathPositionHandleGraphgraph = nullptr
 the graph we're surjecting onto More...
 

Additional Inherited Members

- Static Public Member Functions inherited from vg::AlignerClient
static int8_t * parse_matrix (std::istream &matrix_stream)
 Allocates an array to hold a 4x4 substitution matrix and returns it. More...
 

Member Typedef Documentation

◆ path_chunk_t

using vg::Surjector::path_chunk_t = pair<pair<string::const_iterator, string::const_iterator>, Path>

a local type that represents a read interval matched to a portion of the alignment path

Constructor & Destructor Documentation

◆ Surjector()

vg::Surjector::Surjector ( const PathPositionHandleGraph graph)

Member Function Documentation

◆ compute_path_interval()

pair< size_t, size_t > vg::Surjector::compute_path_interval ( const PathPositionHandleGraph graph,
const Alignment source,
path_handle_t  path_handle,
const vector< path_chunk_t > &  path_chunks 
) const
protected

compute the widest interval of path positions that the realigned sequence could align to

◆ connected_components()

vector< size_t > vg::Surjector::connected_components ( const vector< vector< size_t >> &  adj,
const vector< vector< size_t >> &  rev_adj,
size_t *  num_comps_out 
) const
protected

returns a vector assignming each node to a connectd component, requires both the forward and reverse adjacency lists. optionally also returns the total number of components

◆ extract_linearized_path_graph()

unordered_map< id_t, pair< id_t, bool > > vg::Surjector::extract_linearized_path_graph ( const PathPositionHandleGraph graph,
MutableHandleGraph into,
path_handle_t  path_handle,
size_t  first,
size_t  last 
) const
protected

make a linear graph that corresponds to a path interval, possibly duplicating nodes in case of cycles

◆ extract_overlapping_paths() [1/2]

unordered_map< path_handle_t, pair< vector< Surjector::path_chunk_t >, vector< pair< step_handle_t, step_handle_t > > > > vg::Surjector::extract_overlapping_paths ( const PathPositionHandleGraph graph,
const Alignment source,
const unordered_set< path_handle_t > &  surjection_paths 
) const
protected

get the chunks of the alignment path that follow the given reference paths

◆ extract_overlapping_paths() [2/2]

unordered_map< path_handle_t, pair< vector< Surjector::path_chunk_t >, vector< pair< step_handle_t, step_handle_t > > > > vg::Surjector::extract_overlapping_paths ( const PathPositionHandleGraph graph,
const multipath_alignment_t source,
const unordered_set< path_handle_t > &  surjection_paths,
unordered_map< path_handle_t, vector< tuple< size_t, size_t, int32_t >>> &  connections_out 
) const
protected

same semantics except for a multipath alignment

◆ filter_redundant_path_chunks()

void vg::Surjector::filter_redundant_path_chunks ( vector< path_chunk_t > &  path_chunks,
vector< pair< step_handle_t, step_handle_t >> &  ref_chunks,
vector< tuple< size_t, size_t, int32_t >> &  connections 
) const
protected

remove any path chunks and corresponding ref chunks that are identical to a longer path chunk over the region where they overlap

◆ find_constriction_bicliques()

vector< pair< vector< size_t >, vector< size_t > > > vg::Surjector::find_constriction_bicliques ( const vector< vector< size_t >> &  adj,
const string &  src_sequence,
const vector< path_chunk_t > &  path_chunks,
const vector< tuple< size_t, size_t, int32_t >> &  connections 
) const
protected

returns all sets of chunks such that 1) all of chunks on the left set abut all of the chunks on the right set on the read, 2) all source-to-sink paths in the connected component go through an edge between the left and right sides, 3) all of the chunks that do not have a connection between them are fully connected (i.e. form a biclique)

◆ make_null_alignment()

Alignment vg::Surjector::make_null_alignment ( const Alignment source)
staticprotected

make a sentinel meant to indicate an unmapped read

◆ make_null_mp_alignment()

multipath_alignment_t vg::Surjector::make_null_mp_alignment ( const multipath_alignment_t source)
staticprotected

◆ prune_unconnectable()

void vg::Surjector::prune_unconnectable ( vector< vector< size_t >> &  adj,
vector< vector< tuple< size_t, int32_t, bool >>> &  splice_adj,
vector< size_t > &  component,
vector< vector< size_t >> &  comp_groups,
vector< path_chunk_t > &  path_chunks,
vector< pair< step_handle_t, step_handle_t >> &  ref_chunks 
) const
protected

◆ realigning_surject()

Alignment vg::Surjector::realigning_surject ( const PathPositionHandleGraph graph,
const Alignment source,
const path_handle_t path_handle,
const vector< path_chunk_t > &  path_chunks,
pair< step_handle_t, step_handle_t > &  path_range_out,
bool  allow_negative_scores,
bool  preserve_N_alignments = false,
bool  preserve_tail_indel_anchors = false 
) const
protected

◆ remove_dominated_chunks()

vector< vector< size_t > > vg::Surjector::remove_dominated_chunks ( const string &  src_sequence,
const vector< vector< size_t >> &  adj,
vector< path_chunk_t > &  path_chunks,
vector< pair< step_handle_t, step_handle_t >> &  ref_chunks,
vector< tuple< size_t, size_t, int32_t >> &  connections 
) const
protected

eliminate any path chunks that have the exact same colinearities as another but are much shorter

◆ reverse_adjacencies()

vector< vector< size_t > > vg::Surjector::reverse_adjacencies ( const vector< vector< size_t >> &  adj) const
protected

reverses an adjacency list

◆ set_path_position()

void vg::Surjector::set_path_position ( const PathPositionHandleGraph graph,
const pos_t init_surj_pos,
const pos_t final_surj_pos,
const step_handle_t range_begin,
const step_handle_t range_end,
string &  path_name_out,
int64_t &  path_pos_out,
bool &  path_rev_out 
) const
protected

use the graph position bounds and the path range bounds to assign a path position to a surjected read

◆ spliced_surject()

multipath_alignment_t vg::Surjector::spliced_surject ( const PathPositionHandleGraph path_position_graph,
const string &  src_sequence,
const string &  src_quality,
const int32_t  src_mapping_quality,
const path_handle_t path_handle,
vector< path_chunk_t > &  path_chunks,
vector< pair< step_handle_t, step_handle_t >> &  ref_chunks,
vector< tuple< size_t, size_t, int32_t >> &  connections,
pair< step_handle_t, step_handle_t > &  path_range_out,
bool  allow_negative_scores,
bool  deletions_as_splices 
) const
protected

◆ surject() [1/3]

Alignment vg::Surjector::surject ( const Alignment source,
const unordered_set< path_handle_t > &  paths,
bool  allow_negative_scores = false,
bool  preserve_deletions = false 
) const

Extract the portions of an alignment that are on a chosen set of paths and try to align realign the portions that are off of the chosen paths to the intervening path segments to obtain an alignment that is fully restricted to the paths.

Replaces the alignment's refpos with the path name, position, and strand the alignment has been surjected to.

Optionally either allow softclips so that the alignment has a nonnegative score on the path or require the full-length alignment, possibly creating a negative score.

Also optionally leaves deletions against the reference path in the final alignment (useful for splicing).

◆ surject() [2/3]

Alignment vg::Surjector::surject ( const Alignment source,
const unordered_set< path_handle_t > &  paths,
string &  path_name_out,
int64_t &  path_pos_out,
bool &  path_rev_out,
bool  allow_negative_scores = false,
bool  preserve_deletions = false 
) const

Extract the portions of an alignment that are on a chosen set of paths and try to align realign the portions that are off of the chosen paths to the intervening path segments to obtain an alignment that is fully restricted to the paths.

Also returns the path name, position, and strand of the new alignment.

Optionally either allow softclips so that the alignment has a nonnegative score on the path or require the full-length alignment, possibly creating a negative score.

Also optionally leaves deletions against the reference path in the final alignment (useful for splicing).

◆ surject() [3/3]

multipath_alignment_t vg::Surjector::surject ( const multipath_alignment_t source,
const unordered_set< path_handle_t > &  paths,
string &  path_name_out,
int64_t &  path_pos_out,
bool &  path_rev_out,
bool  allow_negative_scores = false,
bool  preserve_deletions = false 
) const

Same semantics as with alignments except that connections are always preserved as splices. The output consists of a multipath alignment with a single path, separated by splices (either from large deletions or from connections)

◆ surject_internal()

void vg::Surjector::surject_internal ( const Alignment source_aln,
const multipath_alignment_t source_mp_aln,
Alignment aln_out,
multipath_alignment_t mp_aln_out,
const unordered_set< path_handle_t > &  paths,
string &  path_name_out,
int64_t &  path_pos_out,
bool &  path_rev_out,
bool  allow_negative_scores,
bool  preserve_deletions 
) const
protected

◆ transitive_reduction()

vector< vector< size_t > > vg::Surjector::transitive_reduction ( const vector< vector< size_t >> &  adj) const
protected

returns the transitive reduction of a topologically sorted DAG's adjacency list

Member Data Documentation

◆ dominated_path_chunk_diff

int64_t vg::Surjector::dominated_path_chunk_diff = 10

◆ graph

const PathPositionHandleGraph* vg::Surjector::graph = nullptr
protected

the graph we're surjecting onto

◆ min_splice_length

int64_t vg::Surjector::min_splice_length = 20

the minimum length deletion that the spliced algorithm will interpret as a splice event


The documentation for this class was generated from the following files: