seismicrna package
Subpackages
- seismicrna.align package
- Subpackages
- Submodules
FastqUnit
FastqUnit.BOWTIE2_FLAGS
FastqUnit.KEY_DINTER
FastqUnit.KEY_DMATED
FastqUnit.KEY_DSINGLE
FastqUnit.KEY_INTER
FastqUnit.KEY_MATE1
FastqUnit.KEY_MATE2
FastqUnit.KEY_MATED
FastqUnit.KEY_SINGLE
FastqUnit.MAX_PHRED_ENC
FastqUnit.bowtie2_inputs
FastqUnit.fields()
FastqUnit.from_paths()
FastqUnit.get_sample_ref_exts()
FastqUnit.kind
FastqUnit.n_reads
FastqUnit.parent
FastqUnit.phred_arg
FastqUnit.seg_types
FastqUnit.to_new()
MissingFastqMate
MissingFastqMate1
MissingFastqMate2
count_fastq_reads()
fastq_gz()
format_phred_arg()
get_args_count_fastq_reads()
parse_stdout_count_fastq_reads()
run()
AlignRefReport
AlignReport
AlignSampleReport
run()
split_xam_file()
align_samples()
calc_flags_sep_strands()
check_fqs_xams()
extract_reference()
format_ref_reverse()
fq_pipeline()
fqs_pipeline()
list_alignments()
list_fqs_xams()
merge_nondemult_fqs()
separate_strands()
split_references()
write_tmp_ref_files()
bowtie2_build_cmd()
bowtie2_cmd()
export_cmd()
fastp_cmd()
flags_cmd()
flags_cmds()
get_bowtie2_index_paths()
parse_bowtie2()
realign_cmd()
xamgen_cmd()
- seismicrna.cleanfa package
- seismicrna.cluster package
- Subpackages
- Submodules
ClusterMutsBatch
ClusterReadBatch
ClusterDataset
ClusterMutsDataset
ClusterReadDataset
JoinClusterMutsDataset
get_clust_params()
EMRun
EMRunsK
assign_clusterings()
calc_mean_arcsine_distance_clusters()
calc_mean_pearson_clusters()
find_best_k()
get_common_k()
sort_runs()
ClusterBatchIO
ClusterBatchWriter
ClusterIO
bootstrap_jackpot_scores()
calc_jackpot_quotient()
calc_jackpot_score()
calc_jackpot_score_ci()
calc_semi_g_anomaly()
linearize_ends_matrix()
sim_obs_exp()
run()
calc_marginal()
calc_marginal_resps()
- Cluster – Names Module
assemble_log_obs_exp()
format_exp_count_col()
graph_log_obs_exp()
table_log_obs_exp()
write_obs_exp_counts()
get_table_path()
write_mus()
write_pis()
write_single_run_table()
ClusterReport
JoinClusterReport
graph_attr()
graph_attrs()
tabulate()
tabulate_attr()
write_summaries()
write_table()
ClusterDatasetTabulator
UniqReads
UniqReads.from_dataset()
UniqReads.from_dataset_contig()
UniqReads.get_cov_matrix()
UniqReads.get_mut_matrix()
UniqReads.log_obs
UniqReads.num_batches
UniqReads.num_nonuniq
UniqReads.num_obs
UniqReads.num_uniq
UniqReads.read_end3s_zero
UniqReads.read_end5s_zero
UniqReads.ref
UniqReads.seg_end3s_zero
UniqReads.seg_end5s_zero
UniqReads.uniq_names
get_uniq_reads()
cluster()
run_k()
run_ks()
- seismicrna.core package
- Subpackages
- seismicrna.core.arg package
- seismicrna.core.batch package
- seismicrna.core.extern package
- seismicrna.core.io package
- seismicrna.core.mu package
- seismicrna.core.ngs package
- seismicrna.core.rel package
- seismicrna.core.rna package
- seismicrna.core.seq package
- seismicrna.core.table package
- seismicrna.core.tests package
- Submodules
calc_inverse()
check_naturals()
ensure_order()
ensure_same_length()
find_dims()
get_length()
list_naturals()
locate_elements()
sanitize_values()
triangular()
Dataset
FailedToLoadDatasetError
LoadFunction
LoadedDataset
MergedDataset
MergedRegionDataset
MergedUnbiasDataset
MissingBatchError
MissingBatchTypeError
MultistepDataset
MultistepDataset.data_dirs
MultistepDataset.get_batch()
MultistepDataset.get_dataset1_load_func()
MultistepDataset.get_dataset1_report_file()
MultistepDataset.get_dataset2_load_func()
MultistepDataset.get_dataset2_type()
MultistepDataset.get_report_type()
MultistepDataset.load_dataset1()
MultistepDataset.load_dataset2()
MultistepDataset.num_batches
MultistepDataset.refseq
MultistepDataset.timestamp
MutsDataset
RegionDataset
ReversedTimeStampError
TallDataset
UnbiasDataset
WideDataset
WideMutsDataset
load_datasets()
IncompatibleValuesError
InconsistentValueError
OutOfBoundsError
ClustHeader
Header
RelClustHeader
RelHeader
deduplicate_rels()
format_clust_name()
format_clust_names()
list_clusts()
list_k_clusts()
list_ks_clusts()
make_header()
parse_header()
validate_k_clust()
validate_ks()
JoinMutsDataset
JoinReport
AnsiCode
ConsoleStream
FileStream
Filterer
Formatter
Level
Logger
LoggerConfig
Message
Stream
erase_config()
exc_info()
format_console_color()
format_console_plain()
format_logfile()
get_config()
log_exceptions()
restore_config()
set_config()
Field
Path
PathError
PathTypeError
PathValueError
Segment
WrongFileExtensionError
build()
builddir()
buildpar()
cast_path()
check_file_extension()
create_path_type()
deduplicate()
deduplicated()
fill_whitespace()
find_files()
find_files_chain()
get_fields_in_seg_types()
get_seismicrna_project_dir()
get_seismicrna_source_dir()
mkdir_if_needed()
parse()
parse_top_separate()
path_matches()
randdir()
rmdir_if_needed()
sanitize()
symlink_if_needed()
transpath()
transpaths()
validate_int()
validate_str()
validate_top()
stochastic_round()
BatchedRefseqReport
BatchedReport
Field
InvalidReportFieldKeyError
InvalidReportFieldTitleError
MissingFieldWithNoDefaultError
OptionField
RefseqReport
Report
ReportDoesNotHaveFieldError
ReportFieldAttributeError
ReportFieldError
ReportFieldKeyError
ReportFieldTypeError
ReportFieldValueError
calc_dt_minutes()
calc_taken()
default_key()
field_keys()
field_titles()
fields()
get_oconv_dict()
get_oconv_dict_list()
get_oconv_float()
get_oconv_list()
iconv_array_int()
iconv_datetime()
iconv_dict_str_dict_int_dict_int_int()
iconv_dict_str_int()
iconv_int_keys()
key_to_title()
lookup_key()
lookup_title()
oconv_datetime()
log_command()
run_func()
calc_beta_mv()
calc_beta_params()
calc_dirichlet_mv()
calc_dirichlet_params()
Task
as_list_of_tuples()
calc_pool_size()
dispatch()
get_release_working_dirs()
release_to_out()
with_tmp_dir()
fit_uint_size()
fit_uint_type()
get_byte_dtype()
get_dtype()
get_max_uint()
get_max_value()
get_uint_dtype()
get_uint_size()
get_uint_type()
calc_n_reads_per_pos()
calc_p_clust()
calc_p_clust_given_ends_noclose()
calc_p_clust_given_noclose()
calc_p_ends()
calc_p_ends_given_clust_noclose()
calc_p_ends_given_noclose()
calc_p_ends_observed()
calc_p_mut_given_span()
calc_p_mut_given_span_noclose()
calc_p_noclose()
calc_p_noclose_given_clust()
calc_p_noclose_given_ends()
calc_p_noclose_given_ends_auto()
calc_p_nomut_window()
calc_params()
calc_params_observed()
calc_rectangular_sum()
triu_allclose()
triu_dot()
triu_log()
triu_sum()
format_version()
parse_version()
need_write()
write_mode()
- Subpackages
- seismicrna.demult package
- Submodules
Sequence_Obj
append_files()
check_all_done()
check_done()
create_report()
demultiplex_run()
finds_multigrepped_reads()
grep_both_fastq()
make_dict_from_fasta()
make_sequence_objects_from_csv()
makes_dict_from_fastq()
parallel_grepping()
regular_grepping()
resolve_or_analyze_multigrepped_reads()
reverse_compliment()
run_multi_greps()
run_seqkit_grep()
run_seqkit_grep_function()
super_fastq
run_dm()
- Submodules
- seismicrna.draw package
- Submodules
ColorBlock
JinjaData
RNArtistRun
RNArtistRun.best_struct
RNArtistRun.color_dict
RNArtistRun.edited_numbers
RNArtistRun.get_ct_file()
RNArtistRun.get_db_file()
RNArtistRun.get_script_file()
RNArtistRun.get_svg_file()
RNArtistRun.get_varna_color_file()
RNArtistRun.process_struct()
RNArtistRun.run()
RNArtistRun.table
RNArtistRun.table_class
RNArtistRun.table_classes
RNArtistRun.table_file
RNArtistRun.table_loader
build_jinja_data()
draw()
parse_color_file()
run()
- Submodules
- seismicrna.export package
- Submodules
run()
combine_metadata()
parse_refs_metadata()
parse_samples_metadata()
conform_series()
export_sample()
format_metadata()
get_db_structs()
get_ref_metadata()
get_reg_metadata()
get_sample_data()
get_sample_metadata()
get_table_data()
iter_clust_table_data()
iter_pos_table_data()
iter_pos_table_series()
iter_pos_table_struct()
iter_read_table_data()
iter_table_data()
- Submodules
- seismicrna.fold package
- Submodules
run_datapath()
find_ct_files()
load_ct_structs()
fold_profile()
fold_region()
load_foldable_tables()
run()
FoldReport
ConnectivityTableAlreadyRetitledError
RNAStructureConnectivityTableTitleLineFormatError
check_data_path()
fold()
format_retitled_ct_line()
guess_data_path()
make_fold_cmd()
parse_energy()
parse_rnastructure_ct_title()
require_data_path()
retitle_ct()
- Submodules
- seismicrna.graph package
- Submodules
ClusterAbundanceGraph
ClusterAbundanceGraph.col_tracks
ClusterAbundanceGraph.data
ClusterAbundanceGraph.details
ClusterAbundanceGraph.get_traces()
ClusterAbundanceGraph.graph_kind()
ClusterAbundanceGraph.path_subject
ClusterAbundanceGraph.predicate
ClusterAbundanceGraph.row_tracks
ClusterAbundanceGraph.what()
ClusterAbundanceGraph.x_title
ClusterAbundanceGraph.y_title
ClusterAbundanceRunner
ClusterAbundanceWriter
RollingAUCGraph
RollingAUCRunner
RollingAUCWriter
Annotation
BaseGraph
BaseGraph.annotations
BaseGraph.data
BaseGraph.details
BaseGraph.figure
BaseGraph.get_path()
BaseGraph.get_path_fields()
BaseGraph.get_path_segs()
BaseGraph.get_traces()
BaseGraph.graph_filename
BaseGraph.graph_kind()
BaseGraph.path_subject
BaseGraph.predicate
BaseGraph.ref
BaseGraph.reg
BaseGraph.sample
BaseGraph.seq
BaseGraph.title
BaseGraph.title_action_sample
BaseGraph.top
BaseGraph.what()
BaseGraph.write()
BaseGraph.write_csv()
BaseGraph.write_html()
BaseGraph.write_pdf()
BaseGraph.write_png()
BaseGraph.write_svg()
BaseGraph.x_title
BaseGraph.y_title
BaseRunner
BaseWriter
get_action_name()
make_path_subject()
make_title_action_sample()
ClusterGroupGraph
ClusterGroupRunner
cgroup_table()
get_ks()
get_ks_clusts()
make_tracks()
ColorMap
ColorMapGraph
RelColorMap
SeqColorMap
get_cmap()
get_colormaps()
RollingCorrelationGraph
RollingCorrelationRunner
RollingCorrelationWriter
DatasetGraph
DatasetRunner
DatasetWriter
DeltaProfileGraph
DeltaProfileRunner
DeltaProfileWriter
RollingGiniGraph
RollingGiniRunner
RollingGiniWriter
HistogramGraph
HistogramRunner
HistogramWriter
get_edges_index()
PositionHistogramGraph
PositionHistogramRunner
PositionHistogramWriter
ReadHistogramGraph
ReadHistogramRunner
ReadHistogramWriter
MutationDistanceGraph
MutationDistanceGraph.data
MutationDistanceGraph.g_test
MutationDistanceGraph.get_cmap_type()
MutationDistanceGraph.get_traces()
MutationDistanceGraph.graph_kind()
MutationDistanceGraph.hists
MutationDistanceGraph.loc_clusters
MutationDistanceGraph.max_read_length
MutationDistanceGraph.row_tracks
MutationDistanceGraph.table
MutationDistanceGraph.tabulator
MutationDistanceGraph.what()
MutationDistanceGraph.x_title
MutationDistanceGraph.y_title
MutationDistanceRunner
MutationDistanceWriter
get_null_name()
OneSourceClusterGroupGraph
OneSourceGraph
StructOneTableGraph
StructOneTableRunner
StructOneTableWriter
OneTableGraph
OneTableRelClusterGroupGraph
OneTableRelClusterGroupRunner
OneTableRelClusterGroupWriter
OneTableRunner
OneTableWriter
PositionCorrelationGraph
PositionCorrelationRunner
PositionCorrelationWriter
calc_phi()
PositionPairGraph
PositionPairRunner
PositionPairWriter
calc_phi()
MultiRelsProfileGraph
OneRelProfileGraph
ProfileGraph
ProfileRunner
ProfileWriter
MultiRelsGraph
OneRelGraph
RelGraph
RelRunner
ROCGraph
ROCRunner
ROCWriter
rename_columns()
RollingGraph
RollingRunner
ScatterGraph
ScatterRunner
ScatterWriter
RollingSNRGraph
RollingSNRRunner
RollingSNRWriter
RollingStatGraph
RollingStatRunner
RollingStatWriter
AbundanceTableRunner
PositionTableRunner
ReadTableRunner
RelTableGraph
RelTableRunner
TableGraph
TableRunner
TableWriter
load_abundance_tables()
load_pos_tables()
load_read_tables()
get_hist_trace()
get_line_trace()
get_pairwise_position_trace()
get_roc_trace()
get_rolling_auc_trace()
get_seq_base_bar_trace()
get_seq_base_scatter_trace()
get_seq_line_trace()
get_seq_stack_bar_trace()
iter_hist_traces()
iter_line_traces()
iter_roc_traces()
iter_rolling_auc_traces()
iter_seq_base_bar_traces()
iter_seq_base_scatter_traces()
iter_seq_line_traces()
iter_seqbar_stack_traces()
iter_stack_bar_traces()
TwoTableGraph
TwoTableMergedClusterGroupGraph
TwoTableRelClusterGroupGraph
TwoTableRelClusterGroupRunner
TwoTableRelClusterGroupWriter
TwoTableRunner
TwoTableWriter
iter_table_pairs()
- Submodules
- seismicrna.lists package
- seismicrna.mask package
- Subpackages
- Submodules
MaskMutsBatch
MaskReadBatch
PartialReadBatch
PartialRegionMutsBatch
apply_mask()
JoinMaskMutsDataset
MaskDataset
MaskMutsDataset
MaskReadDataset
MaskBatchIO
MaskIO
load_regions()
run()
JoinMaskReport
MaskReport
MaskBatchTabulator
MaskCountTabulator
MaskDatasetTabulator
MaskPositionTable
MaskPositionTableLoader
MaskPositionTableWriter
MaskReadTable
MaskReadTableLoader
MaskReadTableWriter
MaskTable
MaskTabulator
PartialDatasetTabulator
PartialPositionTable
PartialReadTable
PartialTable
PartialTabulator
adjust_counts()
Masker
Masker.CHECKSUM_KEY
Masker.MASK_POS_FMUT
Masker.MASK_POS_NINFO
Masker.MASK_READ_DISCONTIG
Masker.MASK_READ_FINFO
Masker.MASK_READ_FMUT
Masker.MASK_READ_GAP
Masker.MASK_READ_INIT
Masker.MASK_READ_KEPT
Masker.MASK_READ_LIST
Masker.MASK_READ_NCOV
Masker.PATTERN_KEY
Masker.create_report()
Masker.mask()
Masker.n_reads_discontig
Masker.n_reads_init
Masker.n_reads_kept
Masker.n_reads_list
Masker.n_reads_max_fmut
Masker.n_reads_min_finfo
Masker.n_reads_min_gap
Masker.n_reads_min_ncov
Masker.pos_gu
Masker.pos_kept
Masker.pos_list
Masker.pos_max_fmut
Masker.pos_min_ninfo
Masker.pos_polya
Masker.read_names_dataset
mask_region()
- seismicrna.relate package
- Subpackages
- Submodules
FullReadBatch
FullRegionMutsBatch
ReadNamesBatch
RelateBatch
format_read_name()
AverageDataset
NamesDataset
PoolDataset
PoolReadNamesDataset
ReadNamesDataset
RelateDataset
RelateMutsDataset
ReadNamesBatchIO
RelateBatchIO
RelateIO
from_reads()
check_duplicates()
run()
PoolReport
RelateReport
refseq_file_auto_fields()
refseq_file_path()
refseq_file_seg_types()
XamViewer
line_attrs()
tmp_xam_cmd()
simulate_batch()
simulate_batches()
simulate_cluster()
simulate_relate()
generate_both_strands()
write_both_strands()
AverageTable
AverageTabulator
FullPositionTable
FullReadTable
FullTable
FullTabulator
PositionTableLoader
ReadTableLoader
RelTypeTableLoader
RelateBatchTabulator
RelateCountTabulator
RelateDatasetTabulator
RelatePositionTable
RelatePositionTableLoader
RelatePositionTableWriter
RelateReadTable
RelateReadTableLoader
RelateReadTableWriter
RelateTable
RelateTabulator
TableLoader
RelationWriter
generate_batch()
relate_records()
relate_xam()
- seismicrna.renumct package
- seismicrna.sim package
- Subpackages
- Submodules
load_pclust()
run()
sim_pclust()
sim_pclust_ct()
load_pends()
run()
sim_pends()
sim_pends_ct()
from_param_dir()
from_report()
generate_fastq()
generate_fastq_record()
run()
fold_region()
get_ct_path()
run()
calc_pmut_pattern()
load_pmut()
make_pmut_means()
make_pmut_means_paired()
make_pmut_means_unpaired()
run()
run_struct()
sim_pmut()
verify_proportions()
run()
get_fasta_path()
run()
from_param_dir()
get_param_dir_fields()
load_param_dir()
run()
run()
- seismicrna.test package
- seismicrna.tests package
- Submodules
TestCalcRegions
TestEnsembles
TestEnsembles.MODULES
TestEnsembles.PROFILE
TestEnsembles.REF
TestEnsembles.REFS
TestEnsembles.SAMPLE
TestEnsembles.SIM_DIR
TestEnsembles.run_ensembles()
TestEnsembles.setUp()
TestEnsembles.sim_data()
TestEnsembles.tearDown()
TestEnsembles.test_modules012_read120()
TestEnsembles.test_modules012_read180()
TestEnsembles.test_modules012_read60()
TestEnsembles.test_modules02_read60()
TestWorkflow
TestWorkflowTwoOutDirs
TestWorkflowTwoOutDirs.CJOINED
TestWorkflowTwoOutDirs.MJOINED
TestWorkflowTwoOutDirs.NUMBERS
TestWorkflowTwoOutDirs.OUT_DIR
TestWorkflowTwoOutDirs.OUT_DIRS
TestWorkflowTwoOutDirs.POOLED
TestWorkflowTwoOutDirs.REF
TestWorkflowTwoOutDirs.REFS
TestWorkflowTwoOutDirs.SAMPLE
TestWorkflowTwoOutDirs.SIM_DIR
TestWorkflowTwoOutDirs.SIM_DIRS
TestWorkflowTwoOutDirs.check_no_identical()
TestWorkflowTwoOutDirs.setUp()
TestWorkflowTwoOutDirs.tearDown()
TestWorkflowTwoOutDirs.test_wf_two_out_dirs()
list_actions()
list_profiles()
list_step_dir_contents()
- Submodules
Submodules
- exception seismicrna.ensembles.CalcRefRegionLengthError
Bases:
ValueError
Error when calculating mutation densities.
- class seismicrna.ensembles.RegionInfo(reg: str, end5: int, end3: int, ks: Iterable[int], report_file: Path, verify_times: bool, max_procs: int)
Bases:
object
- property clust_params
- property ends
- seismicrna.ensembles.calc_ref_region_length(datasets: Iterable[MutsDataset], pattern: RelPattern, mask_discontig: bool, min_mut_gap: int)
- seismicrna.ensembles.calc_regions(total_end5: int, total_end3: int, region_length: int, region_min_overlap: float)
- seismicrna.ensembles.generate_regions(input_path: Iterable[str | Path], coords: Iterable[tuple[str, int, int]], primers: Iterable[tuple[str, DNA, DNA]], primer_gap: int, regions_file: str | None, region_length: int, region_min_overlap: float, mask_del: bool, mask_ins: bool, mask_mut: list[str], mask_discontig: bool, min_mut_gap: int)
For each reference, list the regions over which to mask.
- seismicrna.ensembles.group_clusters(cluster_dirs: Iterable[Path], max_marcd_join, verify_times: bool, max_procs: int)
- seismicrna.ensembles.run(input_path: Iterable[str | Path], *, tmp_pfx: str | Path = './tmp', keep_tmp: bool = False, brotli_level: int = 10, force: bool = False, max_procs: int = 4, mask_coords: Iterable[tuple[str, int, int]] = (), mask_primers: Iterable[tuple[str, DNA, DNA]] = (), primer_gap: int = 0, mask_regions_file: str | None = None, mask_del: bool = True, mask_ins: bool = True, mask_mut: Iterable[str] = (), mask_polya: int = 5, mask_gu: bool = True, mask_pos: Iterable[tuple[str, int]] = (), mask_pos_file: str | None = None, mask_read: Iterable[str] = (), mask_read_file: str | None = None, mask_discontig: bool = True, min_ncov_read: int = 1, min_finfo_read: float = 0.95, max_fmut_read: float = 1.0, min_mut_gap: int = 3, min_ninfo_pos: int = 1000, max_fmut_pos: float = 1.0, quick_unbias: bool = True, quick_unbias_thresh: float = 0.001, max_mask_iter: int = 0, mask_pos_table: bool = True, mask_read_table: bool = True, min_clusters: int = 1, max_clusters: int = 0, em_runs: int = 12, jackpot: bool = True, jackpot_conf_level: float = 0.95, max_jackpot_quotient: float = 1.1, min_em_iter: int = 10, max_em_iter: int = 500, em_thresh: float = 0.37, min_marcd_run: float = 0.0175, max_pearson_run: float = 0.9, max_loglike_vs_best: float = 0.0, min_pearson_vs_best: float = 0.98, max_marcd_vs_best: float = 0.005, try_all_ks: bool = False, write_all_ks: bool = False, cluster_pos_table: bool = True, cluster_abundance_table: bool = True, verify_times: bool = True, joined: str = '', region_length: int = 0, region_min_overlap: float = 0.6666666666666666, max_marcd_join: float = 1.5)
Infer independent structure ensembles along an entire RNA.
- Parameters:
tmp_pfx (
str | pathlib._local.Path
) – Write all temporary files to a directory with this prefix [keyword-only, default: ‘./tmp’]keep_tmp (
bool
) – Keep temporary files after finishing [keyword-only, default: False]brotli_level (
int
) – Compress pickle files with this level of Brotli (0 - 11) [keyword-only, default: 10]force (
bool
) – Force all tasks to run, overwriting any existing output files [keyword-only, default: False]max_procs (
int
) – Run up to this many processes simultaneously [keyword-only, default: 4]mask_coords (
Iterable
) – Select a region of a reference given its 5’ and 3’ end coordinates [keyword-only, default: ()]mask_primers (
Iterable
) – Select a region of a reference given its forward and reverse primers [keyword-only, default: ()]primer_gap (
int
) – Leave a gap of this many bases between the primer and the region [keyword-only, default: 0]mask_regions_file (
str | None
) – Select regions of references from coordinates/primers in a CSV file [keyword-only, default: None]mask_del (
bool
) – Mask deletions [keyword-only, default: True]mask_ins (
bool
) – Mask insertions [keyword-only, default: True]mask_mut (
Iterable
) – Mask this type of mutation [keyword-only, default: ()]mask_polya (
int
) – Mask stretches of at least this many consecutive A bases (0 disables) [keyword-only, default: 5]mask_gu (
bool
) – Mask G and U bases [keyword-only, default: True]mask_pos (
Iterable
) – Mask this position in this reference [keyword-only, default: ()]mask_pos_file (
str | None
) – Mask positions in references from a file [keyword-only, default: None]mask_read (
Iterable
) – Mask the read with this name [keyword-only, default: ()]mask_read_file (
str | None
) – Mask the reads with names in this file [keyword-only, default: None]mask_discontig (
bool
) – Mask paired-end reads with discontiguous mates [keyword-only, default: True]min_ncov_read (
int
) – Mask reads with fewer than this many bases covering the region [keyword-only, default: 1]min_finfo_read (
float
) – Mask reads with less than this fraction of informative base calls [keyword-only, default: 0.95]max_fmut_read (
float
) – Mask reads with more than this fraction of mutated base calls [keyword-only, default: 1.0]min_mut_gap (
int
) – Mask reads with two mutations separated by fewer than this many bases [keyword-only, default: 3]min_ninfo_pos (
int
) – Mask positions with fewer than this many informative base calls [keyword-only, default: 1000]max_fmut_pos (
float
) – Mask positions with more than this fraction of mutated base calls [keyword-only, default: 1.0]quick_unbias (
bool
) – Correct observer bias using a quick (typically linear time) heuristic [keyword-only, default: True]quick_unbias_thresh (
float
) – Treat mutated fractions under this threshold as 0 with –quick-unbias [keyword-only, default: 0.001]max_mask_iter (
int
) – Stop masking after this many iterations (0 for no limit) [keyword-only, default: 0]mask_pos_table (
bool
) – Tabulate relationships per position for mask data [keyword-only, default: True]mask_read_table (
bool
) – Tabulate relationships per read for mask data [keyword-only, default: True]min_clusters (
int
) – Start at this many clusters [keyword-only, default: 1]max_clusters (
int
) – Stop at this many clusters (0 for no limit) [keyword-only, default: 0]em_runs (
int
) – Run EM this many times for each number of clusters (K) except K = 1 [keyword-only, default: 12]jackpot (
bool
) – Calculate the jackpotting quotient to find over-represented reads [keyword-only, default: True]jackpot_conf_level (
float
) – Confidence level for the jackpotting quotient confidence interval [keyword-only, default: 0.95]max_jackpot_quotient (
float
) – Remove runs whose jackpotting quotient exceeds this limit [keyword-only, default: 1.1]min_em_iter (
int
) – Run EM for at least this many iterations (times number of clusters) [keyword-only, default: 10]max_em_iter (
int
) – Run EM for at most this many iterations (times number of clusters) [keyword-only, default: 500]em_thresh (
float
) – Stop EM when the log likelihood increases by less than this threshold [keyword-only, default: 0.37]min_marcd_run (
float
) – Remove runs with two clusters different by less than this MARCD [keyword-only, default: 0.0175]max_pearson_run (
float
) – Remove runs with two clusters more similar than this correlation [keyword-only, default: 0.9]max_loglike_vs_best (
float
) – Remove Ks with a log likelihood gap larger than this (0 for no limit) [keyword-only, default: 0.0]min_pearson_vs_best (
float
) – Remove Ks where every run has less than this correlation vs. the best [keyword-only, default: 0.98]max_marcd_vs_best (
float
) – Remove Ks where every run has more than this MARCD vs. the best [keyword-only, default: 0.005]try_all_ks (
bool
) – Try all numbers of clusters (Ks), even after finding the best number [keyword-only, default: False]write_all_ks (
bool
) – Write all numbers of clusters (Ks), rather than only the best number [keyword-only, default: False]cluster_pos_table (
bool
) – Tabulate relationships per position for cluster data [keyword-only, default: True]cluster_abundance_table (
bool
) – Tabulate number of reads per cluster for cluster data [keyword-only, default: True]verify_times (
bool
) – Verify that report files from later steps have later timestamps [keyword-only, default: True]joined (
str
) – Name of the region formed by joining other regions [keyword-only, default: ‘’]region_length (
int
) – Make each region this length (if 0, then calculate the length over which the average read has 2 mutations) [keyword-only, default: 0]region_min_overlap (
float
) – Make adjacent regions overlap by at least this fraction of length [keyword-only, default: 0.6666666666666666]max_marcd_join (
float
) – Join regions with the same numbers of clusters only if the mean arcsine distance (MARCD) of their mutation rates and proportions does not exceed this threshold [keyword-only, default: 1.5]
- seismicrna.join.join_regions(out_dir: Path, name: str, sample: str, ref: str, regs: Iterable[str], clustered: bool, *, clusts: dict[str, dict[int, dict[int, int]]], mask_pos_table: bool, mask_read_table: bool, cluster_pos_table: bool, cluster_abundance_table: bool, verify_times: bool, n_procs: int, force: bool, tmp_pfx, keep_tmp)
Join one or more regions (horizontally).
- Parameters:
out_dir (
pathlib.Path
) – Output directory.name (
str
) – Name of the joined region.sample (
str
) – Name of the sample.ref (
str
) – Name of the reference.regs (
Iterable[str]
) – Names of the regions being joined.clustered (
bool
) – Whether the dataset is clustered.tmp_dir (
Path
) – Temporary directory.clusts (
dict[str
,dict[int
,dict[int
,int]]]
) – For each region, for each number of clusters, the cluster from the original region to use as the cluster in the joined region (ignored if clustered is False).mask_pos_table (
bool
) – Tabulate relationships per position for mask data.mask_read_table (
bool
) – Tabulate relationships per read for mask datacluster_pos_table (
bool
) – Tabulate relationships per position for cluster data.cluster_abundance_table (
bool
) – Tabulate number of reads per cluster for cluster data.verify_times (
bool
) – Verify that report files from later steps have later timestamps.n_procs (
bool
) – Number of processors to use.force (
bool
) – Force the report to be written, even if it exists.
- Returns:
Path of the Join report file.
- Return type:
- seismicrna.join.joined_mask_report_exists(top: Path, sample: str, ref: str, joined: str, regs: Iterable[str])
Return whether a mask report for the joined region exists.
- seismicrna.join.run(input_path: Iterable[str | Path], *, joined: str = '', join_clusts: str | None = None, mask_pos_table: bool = True, mask_read_table: bool = True, cluster_pos_table: bool = True, cluster_abundance_table: bool = True, verify_times: bool = True, tmp_pfx: str | Path = './tmp', keep_tmp: bool = False, max_procs: int = 4, force: bool = False) list[Path]
Merge regions (horizontally) from the Mask or Cluster step.
- Parameters:
joined (
str
) – Name of the region formed by joining other regions [keyword-only, default: ‘’]join_clusts (
str | None
) – Specify which clusters to join clusters using this CSV file [keyword-only, default: None]mask_pos_table (
bool
) – Tabulate relationships per position for mask data [keyword-only, default: True]mask_read_table (
bool
) – Tabulate relationships per read for mask data [keyword-only, default: True]cluster_pos_table (
bool
) – Tabulate relationships per position for cluster data [keyword-only, default: True]cluster_abundance_table (
bool
) – Tabulate number of reads per cluster for cluster data [keyword-only, default: True]verify_times (
bool
) – Verify that report files from later steps have later timestamps [keyword-only, default: True]tmp_pfx (
str | pathlib._local.Path
) – Write all temporary files to a directory with this prefix [keyword-only, default: ‘./tmp’]keep_tmp (
bool
) – Keep temporary files after finishing [keyword-only, default: False]max_procs (
int
) – Run up to this many processes simultaneously [keyword-only, default: 4]force (
bool
) – Force all tasks to run, overwriting any existing output files [keyword-only, default: False]
- seismicrna.join.write_report(report_type: type[JoinReport], out_dir: Path, **kwargs)
- seismicrna.migrate.migrate_cluster_reg_dir(reg_dir: Path)
- seismicrna.migrate.migrate_fold_reg_dir(reg_dir: Path)
- seismicrna.migrate.migrate_graph_file(graph_file: Path)
- seismicrna.migrate.migrate_mask_reg_dir(reg_dir: Path)
- seismicrna.migrate.migrate_relate_ref_dir(ref_dir: Path)
- seismicrna.migrate.migrate_table_reg_dir(reg_dir: Path)
- seismicrna.migrate.run(input_path: Iterable[str | Path], *, max_procs: int = 4) list[Path]
Migrate output directories from v0.21 to v0.22
- Parameters:
max_procs (
int
) – Run up to this many processes simultaneously [keyword-only, default: 4]
- seismicrna.pool.pool_samples(out_dir: Path, name: str, ref: str, samples: Iterable[str], *, relate_pos_table: bool, relate_read_table: bool, verify_times: bool, n_procs: int, force: bool, tmp_pfx, keep_tmp)
Pool one or more samples (vertically).
- Parameters:
out_dir (
pathlib.Path
) – Output directory.name (
str
) – Name of the pool.ref (
str
) – Name of the referencesamples (
Iterable[str]
) – Names of the samples in the pool.tmp_dir (
Path
) – Temporary directory.relate_pos_table (
bool
) – Tabulate relationships per position for relate data.relate_read_table (
bool
) – Tabulate relationships per read for relate dataverify_times (
bool
) – Verify that report files from later steps have later timestamps.n_procs (
bool
) – Number of processors to use.force (
bool
) – Force the report to be written, even if it exists.
- Returns:
Path of the Pool report file.
- Return type:
- seismicrna.pool.run(input_path: Iterable[str | Path], *, pooled: str = '', relate_pos_table: bool = True, relate_read_table: bool = False, verify_times: bool = True, tmp_pfx: str | Path = './tmp', keep_tmp: bool = False, max_procs: int = 4, force: bool = False) list[Path]
Merge samples (vertically) from the Relate step.
- Parameters:
pooled (
str
) – Pooled sample name [keyword-only, default: ‘’]relate_pos_table (
bool
) – Tabulate relationships per position for relate data [keyword-only, default: True]relate_read_table (
bool
) – Tabulate relationships per read for relate data [keyword-only, default: False]verify_times (
bool
) – Verify that report files from later steps have later timestamps [keyword-only, default: True]tmp_pfx (
str | pathlib._local.Path
) – Write all temporary files to a directory with this prefix [keyword-only, default: ‘./tmp’]keep_tmp (
bool
) – Keep temporary files after finishing [keyword-only, default: False]max_procs (
int
) – Run up to this many processes simultaneously [keyword-only, default: 4]force (
bool
) – Force all tasks to run, overwriting any existing output files [keyword-only, default: False]
- seismicrna.pool.write_report(out_dir: Path, **kwargs)
- seismicrna.table.get_dataset_flags(dataset: MutsDataset, relate_pos_table: bool, relate_read_table: bool, mask_pos_table: bool, mask_read_table: bool, cluster_pos_table: bool, cluster_abundance_table: bool)
Return the tabulator and table flags for a dataset.
- seismicrna.table.get_tabulator_type(dataset_type: type[Dataset], count: bool = False)
Determine which class of Tabulator can process the dataset.
- seismicrna.table.load_all_datasets(input_path: Iterable[str | Path], verify_times: bool)
Load datasets from all steps of the workflow.
- seismicrna.table.run(input_path: Iterable[str | Path], *, relate_pos_table: bool = True, relate_read_table: bool = False, mask_pos_table: bool = True, mask_read_table: bool = True, cluster_pos_table: bool = True, cluster_abundance_table: bool = True, verify_times: bool = True, max_procs: int = 4, force: bool = False) list[Path]
Tabulate counts of relationships per read and position.
- Parameters:
relate_pos_table (
bool
) – Tabulate relationships per position for relate data [keyword-only, default: True]relate_read_table (
bool
) – Tabulate relationships per read for relate data [keyword-only, default: False]mask_pos_table (
bool
) – Tabulate relationships per position for mask data [keyword-only, default: True]mask_read_table (
bool
) – Tabulate relationships per read for mask data [keyword-only, default: True]cluster_pos_table (
bool
) – Tabulate relationships per position for cluster data [keyword-only, default: True]cluster_abundance_table (
bool
) – Tabulate number of reads per cluster for cluster data [keyword-only, default: True]verify_times (
bool
) – Verify that report files from later steps have later timestamps [keyword-only, default: True]max_procs (
int
) – Run up to this many processes simultaneously [keyword-only, default: 4]force (
bool
) – Force all tasks to run, overwriting any existing output files [keyword-only, default: False]
- seismicrna.table.tabulate(dataset: MutsDataset, tabulator_type: type[DatasetTabulator], pos_table: bool, read_table: bool, clust_table: bool, force: bool, n_procs: int)
- seismicrna.wf.run(fasta: str | Path, input_path: Iterable[str | Path], *, out_dir: str | Path = './out', tmp_pfx: str | Path = './tmp', keep_tmp: bool = False, brotli_level: int = 10, force: bool = False, max_procs: int = 4, fastqz: Iterable[str | Path] = (), fastqy: Iterable[str | Path] = (), fastqx: Iterable[str | Path] = (), phred_enc: int = 33, demulti_overwrite: bool = False, demult_on: bool = False, parallel_demultiplexing: bool = False, clipped: int = 0, mismatch_tolerence: int = 0, index_tolerance: int = 0, barcode_start: int = 0, barcode_end: int = 0, dmfastqz: Iterable[str | Path] = (), dmfastqy: Iterable[str | Path] = (), dmfastqx: Iterable[str | Path] = (), fastp: bool = True, fastp_5: bool = False, fastp_3: bool = True, fastp_w: int = 6, fastp_m: int = 25, fastp_poly_g: str = 'auto', fastp_poly_g_min_len: int = 10, fastp_poly_x: bool = False, fastp_poly_x_min_len: int = 10, fastp_adapter_trimming: bool = True, fastp_adapter_1: str = '', fastp_adapter_2: str = '', fastp_adapter_fasta: str | None = None, fastp_detect_adapter_for_pe: bool = True, fastp_min_length: int = 9, bt2_local: bool = True, bt2_discordant: bool = False, bt2_mixed: bool = False, bt2_dovetail: bool = False, bt2_contain: bool = True, bt2_score_min_e2e: str = 'L,-1,-0.8', bt2_score_min_loc: str = 'L,1,0.8', bt2_i: int = 0, bt2_x: int = 600, bt2_gbar: int = 4, bt2_l: int = 20, bt2_s: str = 'L,1,0.1', bt2_d: int = 4, bt2_r: int = 2, bt2_dpad: int = 2, bt2_orient: str = 'fr', bt2_un: bool = True, min_mapq: int = 25, sep_strands: bool = False, f1r2_fwd: bool = False, rev_label: str = '-rev', min_phred: int = 25, min_reads: int = 1000, insert3: bool = True, ambindel: bool = True, overhangs: bool = True, clip_end5: int = 4, clip_end3: int = 4, batch_size: int = 65536, write_read_names: bool = False, relate_pos_table: bool = True, relate_read_table: bool = False, relate_cx: bool = True, mask_coords: Iterable[tuple[str, int, int]] = (), mask_primers: Iterable[tuple[str, DNA, DNA]] = (), primer_gap: int = 0, mask_regions_file: str | None = None, mask_del: bool = True, mask_ins: bool = True, mask_mut: Iterable[str] = (), mask_polya: int = 5, mask_gu: bool = True, mask_pos: Iterable[tuple[str, int]] = (), mask_pos_file: str | None = None, mask_read: Iterable[str] = (), mask_read_file: str | None = None, mask_discontig: bool = True, min_ncov_read: int = 1, min_finfo_read: float = 0.95, max_fmut_read: float = 1.0, min_mut_gap: int = 3, min_ninfo_pos: int = 1000, max_fmut_pos: float = 1.0, quick_unbias: bool = True, quick_unbias_thresh: float = 0.001, max_mask_iter: int = 0, mask_pos_table: bool = True, mask_read_table: bool = True, cluster: bool = False, min_clusters: int = 1, max_clusters: int = 0, em_runs: int = 12, jackpot: bool = True, jackpot_conf_level: float = 0.95, max_jackpot_quotient: float = 1.1, min_em_iter: int = 10, max_em_iter: int = 500, em_thresh: float = 0.37, min_marcd_run: float = 0.0175, max_pearson_run: float = 0.9, max_loglike_vs_best: float = 0.0, min_pearson_vs_best: float = 0.98, max_marcd_vs_best: float = 0.005, try_all_ks: bool = False, write_all_ks: bool = False, cluster_pos_table: bool = True, cluster_abundance_table: bool = True, verify_times: bool = True, fold: bool = False, fold_coords: Iterable[tuple[str, int, int]] = (), fold_primers: Iterable[tuple[str, DNA, DNA]] = (), fold_regions_file: str | None = None, fold_full: bool = True, quantile: float = 0.0, fold_temp: float = 310.15, fold_constraint: str | None = None, fold_md: int = 0, fold_mfe: bool = False, fold_max: int = 20, fold_percent: float = 20.0, draw: bool = False, struct_num: Iterable[int] = (), color: bool = True, export: bool = False, samples_meta: str = None, refs_meta: str = None, all_pos: bool = True, cgroup: str = 'k', hist_bins: int = 10, hist_margin: float = 0.1, struct_file: Iterable[str | Path] = (), window: int = 45, winmin: int = 9, csv: bool = True, html: bool = True, svg: bool = False, pdf: bool = False, png: bool = False, graph_mprof: bool = True, graph_tmprof: bool = True, graph_ncov: bool = True, graph_mhist: bool = True, graph_abundance: bool = True, graph_giniroll: bool = False, graph_roc: bool = True, graph_aucroll: bool = False, graph_poscorr: bool = False, graph_mutdist: bool = False, mutdist_null: bool = True)
Run the entire workflow.
- Parameters:
out_dir (
str | pathlib._local.Path
) – Write all output files to this directory [keyword-only, default: ‘./out’]tmp_pfx (
str | pathlib._local.Path
) – Write all temporary files to a directory with this prefix [keyword-only, default: ‘./tmp’]keep_tmp (
bool
) – Keep temporary files after finishing [keyword-only, default: False]brotli_level (
int
) – Compress pickle files with this level of Brotli (0 - 11) [keyword-only, default: 10]force (
bool
) – Force all tasks to run, overwriting any existing output files [keyword-only, default: False]max_procs (
int
) – Run up to this many processes simultaneously [keyword-only, default: 4]fastqz (
Iterable
) – FASTQ file(s) of single-end reads [keyword-only, default: ()]fastqy (
Iterable
) – FASTQ file(s) of paired-end reads with mates 1 and 2 interleaved [keyword-only, default: ()]fastqx (
Iterable
) – FASTQ files of paired-end reads with mates 1 and 2 in separate files [keyword-only, default: ()]phred_enc (
int
) – Specify the Phred score encoding of FASTQ and SAM/BAM/CRAM files [keyword-only, default: 33]demulti_overwrite (
bool
) – Desiginates whether to overwrite the grepped fastq. should only be used if changing setting on the same sample [keyword-only, default: False]demult_on (
bool
) – Enable demultiplexing [keyword-only, default: False]parallel_demultiplexing (
bool
) – Whether to run demultiplexing at maximum speed by submitting multithreaded grep functions [keyword-only, default: False]clipped (
int
) – Designates the amount of clipped patterns to search for in the sample, will raise compution time [keyword-only, default: 0]mismatch_tolerence (
int
) – Designates the allowable amount of mismatches allowed in a string and still be considered a valid pattern find. will increase non-parallel computation at a factorial rate. use caution going above 2 mismatches. does not apply to clipped sequences. [keyword-only, default: 0]index_tolerance (
int
) – Designates the allowable amount of distance you allow the pattern to be found in a read from the reference index [keyword-only, default: 0]barcode_start (
int
) – Index of start of barcode [keyword-only, default: 0]barcode_end (
int
) – Length of barcode [keyword-only, default: 0]dmfastqz (
Iterable
) – Demultiplexed FASTQ files of single-end reads [keyword-only, default: ()]dmfastqy (
Iterable
) – Demultiplexed FASTQ files of paired-end reads interleaved in one file [keyword-only, default: ()]dmfastqx (
Iterable
) – Demultiplexed FASTQ files of mate 1 and mate 2 reads [keyword-only, default: ()]fastp (
bool
) – Use fastp to QC, filter, and trim reads before alignment [keyword-only, default: True]fastp_5 (
bool
) – Trim low-quality bases from the 5’ ends of reads [keyword-only, default: False]fastp_3 (
bool
) – Trim low-quality bases from the 3’ ends of reads [keyword-only, default: True]fastp_w (
int
) – Use this window size (nt) for –fastp-5 and –fastp-3 [keyword-only, default: 6]fastp_m (
int
) – Use this mean quality threshold for –fastp-5 and –fastp-3 [keyword-only, default: 25]fastp_poly_g (
str
) – Trim poly(G) tails (two-color sequencing artifacts) from the 3’ end [keyword-only, default: ‘auto’]fastp_poly_g_min_len (
int
) – Minimum number of Gs to consider a poly(G) tail for –fastp-poly-g [keyword-only, default: 10]fastp_poly_x (
bool
) – Trim poly(X) tails (i.e. of any nucleotide) from the 3’ end [keyword-only, default: False]fastp_poly_x_min_len (
int
) – Minimum number of bases to consider a poly(X) tail for –fastp-poly-x [keyword-only, default: 10]fastp_adapter_trimming (
bool
) – Trim adapter sequences from the 3’ ends of reads [keyword-only, default: True]fastp_adapter_1 (
str
) – Trim this adapter sequence from the 3’ ends of read 1s [keyword-only, default: ‘’]fastp_adapter_2 (
str
) – Trim this adapter sequence from the 3’ ends of read 2s [keyword-only, default: ‘’]fastp_adapter_fasta (
str | None
) – Trim adapter sequences in this FASTA file from the 3’ ends of reads [keyword-only, default: None]fastp_detect_adapter_for_pe (
bool
) – Automatically detect the adapter sequences for paired-end reads [keyword-only, default: True]fastp_min_length (
int
) – Discard reads shorter than this length [keyword-only, default: 9]bt2_local (
bool
) – Align reads in local mode rather than end-to-end mode [keyword-only, default: True]bt2_discordant (
bool
) – Output paired-end reads whose mates align discordantly [keyword-only, default: False]bt2_mixed (
bool
) – Attempt to align individual mates of pairs that fail to align [keyword-only, default: False]bt2_dovetail (
bool
) – Consider dovetailed mate pairs to align concordantly [keyword-only, default: False]bt2_contain (
bool
) – Consider nested mate pairs to align concordantly [keyword-only, default: True]bt2_score_min_e2e (
str
) – Discard alignments that score below this threshold in end-to-end mode [keyword-only, default: ‘L,-1,-0.8’]bt2_score_min_loc (
str
) – Discard alignments that score below this threshold in local mode [keyword-only, default: ‘L,1,0.8’]bt2_i (
int
) – Discard paired-end alignments shorter than this many bases [keyword-only, default: 0]bt2_x (
int
) – Discard paired-end alignments longer than this many bases [keyword-only, default: 600]bt2_gbar (
int
) – Do not place gaps within this many bases from the end of a read [keyword-only, default: 4]bt2_l (
int
) – Use this seed length for Bowtie2 [keyword-only, default: 20]bt2_s (
str
) – Seed Bowtie2 alignments at this interval [keyword-only, default: ‘L,1,0.1’]bt2_d (
int
) – Discard alignments if over this many consecutive seed extensions fail [keyword-only, default: 4]bt2_r (
int
) – Re-seed reads with repetitive seeds up to this many times [keyword-only, default: 2]bt2_dpad (
int
) – Pad the alignment matrix with this many bases (to allow gaps) [keyword-only, default: 2]bt2_orient (
str
) – Require paired mates to have this orientation [keyword-only, default: ‘fr’]bt2_un (
bool
) – Output unaligned reads to a FASTQ file [keyword-only, default: True]min_mapq (
int
) – Discard reads with mapping qualities below this threshold [keyword-only, default: 25]sep_strands (
bool
) – Separate each alignment map into forward- and reverse-strand reads [keyword-only, default: False]f1r2_fwd (
bool
) – With –sep-strands, consider forward mate 1s and reverse mate 2s to be forward-stranded [keyword-only, default: False]rev_label (
str
) – With –sep-strands, add this label to each reverse-strand reference [keyword-only, default: ‘-rev’]min_phred (
int
) – Mark base calls with Phred scores lower than this threshold as ambiguous [keyword-only, default: 25]min_reads (
int
) – Discard alignment maps with fewer than this many reads [keyword-only, default: 1000]insert3 (
bool
) – Mark each insertion on the base to its 3’ (True) or 5’ (False) side [keyword-only, default: True]ambindel (
bool
) – Mark all ambiguous insertions and deletions (indels) [keyword-only, default: True]overhangs (
bool
) – Retain the overhangs of paired-end mates that dovetail [keyword-only, default: True]clip_end5 (
int
) – Clip this many bases from the 5’ end of each read [keyword-only, default: 4]clip_end3 (
int
) – Clip this many bases from the 3’ end of each read [keyword-only, default: 4]batch_size (
int
) – Limit batches to at most this many reads [keyword-only, default: 65536]write_read_names (
bool
) – Write the name of each read in a second set of batches (necessary for the options –mask-read or –mask-read-file) [keyword-only, default: False]relate_pos_table (
bool
) – Tabulate relationships per position for relate data [keyword-only, default: True]relate_read_table (
bool
) – Tabulate relationships per read for relate data [keyword-only, default: False]relate_cx (
bool
) – Use a fast (C extension module) version of the relate algorithm; the slow (Python) version is still avilable as a fallback if the C extension cannot be loaded, and for debugging/benchmarking [keyword-only, default: True]mask_coords (
Iterable
) – Select a region of a reference given its 5’ and 3’ end coordinates [keyword-only, default: ()]mask_primers (
Iterable
) – Select a region of a reference given its forward and reverse primers [keyword-only, default: ()]primer_gap (
int
) – Leave a gap of this many bases between the primer and the region [keyword-only, default: 0]mask_regions_file (
str | None
) – Select regions of references from coordinates/primers in a CSV file [keyword-only, default: None]mask_del (
bool
) – Mask deletions [keyword-only, default: True]mask_ins (
bool
) – Mask insertions [keyword-only, default: True]mask_mut (
Iterable
) – Mask this type of mutation [keyword-only, default: ()]mask_polya (
int
) – Mask stretches of at least this many consecutive A bases (0 disables) [keyword-only, default: 5]mask_gu (
bool
) – Mask G and U bases [keyword-only, default: True]mask_pos (
Iterable
) – Mask this position in this reference [keyword-only, default: ()]mask_pos_file (
str | None
) – Mask positions in references from a file [keyword-only, default: None]mask_read (
Iterable
) – Mask the read with this name [keyword-only, default: ()]mask_read_file (
str | None
) – Mask the reads with names in this file [keyword-only, default: None]mask_discontig (
bool
) – Mask paired-end reads with discontiguous mates [keyword-only, default: True]min_ncov_read (
int
) – Mask reads with fewer than this many bases covering the region [keyword-only, default: 1]min_finfo_read (
float
) – Mask reads with less than this fraction of informative base calls [keyword-only, default: 0.95]max_fmut_read (
float
) – Mask reads with more than this fraction of mutated base calls [keyword-only, default: 1.0]min_mut_gap (
int
) – Mask reads with two mutations separated by fewer than this many bases [keyword-only, default: 3]min_ninfo_pos (
int
) – Mask positions with fewer than this many informative base calls [keyword-only, default: 1000]max_fmut_pos (
float
) – Mask positions with more than this fraction of mutated base calls [keyword-only, default: 1.0]quick_unbias (
bool
) – Correct observer bias using a quick (typically linear time) heuristic [keyword-only, default: True]quick_unbias_thresh (
float
) – Treat mutated fractions under this threshold as 0 with –quick-unbias [keyword-only, default: 0.001]max_mask_iter (
int
) – Stop masking after this many iterations (0 for no limit) [keyword-only, default: 0]mask_pos_table (
bool
) – Tabulate relationships per position for mask data [keyword-only, default: True]mask_read_table (
bool
) – Tabulate relationships per read for mask data [keyword-only, default: True]cluster (
bool
) – Cluster reads to find alternative structures [keyword-only, default: False]min_clusters (
int
) – Start at this many clusters [keyword-only, default: 1]max_clusters (
int
) – Stop at this many clusters (0 for no limit) [keyword-only, default: 0]em_runs (
int
) – Run EM this many times for each number of clusters (K) except K = 1 [keyword-only, default: 12]jackpot (
bool
) – Calculate the jackpotting quotient to find over-represented reads [keyword-only, default: True]jackpot_conf_level (
float
) – Confidence level for the jackpotting quotient confidence interval [keyword-only, default: 0.95]max_jackpot_quotient (
float
) – Remove runs whose jackpotting quotient exceeds this limit [keyword-only, default: 1.1]min_em_iter (
int
) – Run EM for at least this many iterations (times number of clusters) [keyword-only, default: 10]max_em_iter (
int
) – Run EM for at most this many iterations (times number of clusters) [keyword-only, default: 500]em_thresh (
float
) – Stop EM when the log likelihood increases by less than this threshold [keyword-only, default: 0.37]min_marcd_run (
float
) – Remove runs with two clusters different by less than this MARCD [keyword-only, default: 0.0175]max_pearson_run (
float
) – Remove runs with two clusters more similar than this correlation [keyword-only, default: 0.9]max_loglike_vs_best (
float
) – Remove Ks with a log likelihood gap larger than this (0 for no limit) [keyword-only, default: 0.0]min_pearson_vs_best (
float
) – Remove Ks where every run has less than this correlation vs. the best [keyword-only, default: 0.98]max_marcd_vs_best (
float
) – Remove Ks where every run has more than this MARCD vs. the best [keyword-only, default: 0.005]try_all_ks (
bool
) – Try all numbers of clusters (Ks), even after finding the best number [keyword-only, default: False]write_all_ks (
bool
) – Write all numbers of clusters (Ks), rather than only the best number [keyword-only, default: False]cluster_pos_table (
bool
) – Tabulate relationships per position for cluster data [keyword-only, default: True]cluster_abundance_table (
bool
) – Tabulate number of reads per cluster for cluster data [keyword-only, default: True]verify_times (
bool
) – Verify that report files from later steps have later timestamps [keyword-only, default: True]fold (
bool
) – Predict the secondary structure using the RNAstructure Fold program [keyword-only, default: False]fold_coords (
Iterable
) – Fold a region of a reference given its 5’ and 3’ end coordinates [keyword-only, default: ()]fold_primers (
Iterable
) – Fold a region of a reference given its forward and reverse primers [keyword-only, default: ()]fold_regions_file (
str | None
) – Fold regions of references from coordinates/primers in a CSV file [keyword-only, default: None]fold_full (
bool
) – If no regions are specified, whether to default to the full region or to the table’s region [keyword-only, default: True]quantile (
float
) – Normalize and winsorize ratios to this quantile (0.0 disables) [keyword-only, default: 0.0]fold_temp (
float
) – Predict structures at this temperature (Kelvin) [keyword-only, default: 310.15]fold_constraint (
str | None
) – Force bases to be paired/unpaired from a file of constraints [keyword-only, default: None]fold_md (
int
) – Limit base pair distances to this number of bases (0 for no limit) [keyword-only, default: 0]fold_mfe (
bool
) – Predict only the minimum free energy (MFE) structure [keyword-only, default: False]fold_max (
int
) – Output at most this many structures (overriden by –fold-mfe) [keyword-only, default: 20]fold_percent (
float
) – Stop outputting structures when the % difference in energy exceeds this value (overriden by –fold-mfe) [keyword-only, default: 20.0]draw (
bool
) – Draw secondary structures with RNArtist. [keyword-only, default: False]struct_num (
Iterable
) – Draw the specified structure (zero-indexed) or -1 for all structures. By default, draw the structure with the best AUROC. [keyword-only, default: ()]color (
bool
) – Color bases by their reactivity [keyword-only, default: True]export (
bool
) – Export each sample to SEISMICgraph (https://seismicrna.org) [keyword-only, default: False]samples_meta (
str
) – Add sample metadata from this CSV file to exported results [keyword-only, default: None]refs_meta (
str
) – Add reference metadata from this CSV file to exported results [keyword-only, default: None]all_pos (
bool
) – Export all positions (not just unmasked positions) [keyword-only, default: True]cgroup (
str
) – Put each Cluster in its own file, each K in its own file, or All clusters in one file [keyword-only, default: ‘k’]hist_bins (
int
) – Number of bins in each histogram; must be ≥ 1 [keyword-only, default: 10]hist_margin (
float
) – Autofill margins of at most this width in histograms of ratios [keyword-only, default: 0.1]struct_file (
Iterable
) – Compare mutational profiles to the structure(s) in this CT file [keyword-only, default: ()]window (
int
) – Use a sliding window of this many bases [keyword-only, default: 45]winmin (
int
) – Mask sliding windows with fewer than this number of data [keyword-only, default: 9]csv (
bool
) – Output the data for each graph in a Comma-Separated Values file [keyword-only, default: True]html (
bool
) – Output each graph in an interactive HyperText Markup Language file [keyword-only, default: True]svg (
bool
) – Output each graph in a Scalable Vector Graphics file [keyword-only, default: False]pdf (
bool
) – Output each graph in a Portable Document Format file [keyword-only, default: False]png (
bool
) – Output each graph in a Portable Network Graphics file [keyword-only, default: False]graph_mprof (
bool
) – Graph mutational profiles [keyword-only, default: True]graph_tmprof (
bool
) – Graph typed mutational profiles [keyword-only, default: True]graph_ncov (
bool
) – Graph coverages per position [keyword-only, default: True]graph_mhist (
bool
) – Graph histograms of mutations per read [keyword-only, default: True]graph_abundance (
bool
) – Graph abundance of each cluster [keyword-only, default: True]graph_giniroll (
bool
) – Graph rolling Gini coefficients [keyword-only, default: False]graph_roc (
bool
) – Graph receiver operating characteristic curves [keyword-only, default: True]graph_aucroll (
bool
) – Graph rolling areas under receiver operating characteristic curves [keyword-only, default: False]graph_poscorr (
bool
) – Graph phi correlations between positions [keyword-only, default: False]graph_mutdist (
bool
) – Graph distances between mutations [keyword-only, default: False]mutdist_null (
bool
) – Include the null distribution of distances between mutations [keyword-only, default: True]