RNAlib-2.1.9
|
Various utility- and helper-functions used throughout the Vienna RNA package. More...
Go to the source code of this file.
Macros | |
#define | VRNA_INPUT_ERROR 1U |
#define | VRNA_INPUT_QUIT 2U |
#define | VRNA_INPUT_MISC 4U |
#define | VRNA_INPUT_FASTA_HEADER 8U |
#define | VRNA_INPUT_SEQUENCE 16U |
#define | VRNA_INPUT_CONSTRAINT 32U |
#define | VRNA_INPUT_NO_TRUNCATION 256U |
#define | VRNA_INPUT_NO_REST 512U |
#define | VRNA_INPUT_NO_SPAN 1024U |
#define | VRNA_INPUT_NOSKIP_BLANK_LINES 2048U |
#define | VRNA_INPUT_BLANK_LINE 4096U |
#define | VRNA_INPUT_NOSKIP_COMMENTS 128U |
#define | VRNA_INPUT_COMMENT 8192U |
#define | VRNA_CONSTRAINT_PIPE 1U |
#define | VRNA_CONSTRAINT_DOT 2U |
#define | VRNA_CONSTRAINT_X 4U |
#define | VRNA_CONSTRAINT_ANG_BRACK 8U |
#define | VRNA_CONSTRAINT_RND_BRACK 16U |
#define | VRNA_CONSTRAINT_MULTILINE 32U |
#define | VRNA_CONSTRAINT_NO_HEADER 64U |
#define | VRNA_CONSTRAINT_ALL 128U |
#define | VRNA_CONSTRAINT_G 256U |
#define | VRNA_OPTION_MULTILINE 32U |
#define | MIN2(A, B) ((A) < (B) ? (A) : (B)) |
#define | MAX2(A, B) ((A) > (B) ? (A) : (B)) |
#define | MIN3(A, B, C) (MIN2( (MIN2((A),(B))) ,(C))) |
#define | MAX3(A, B, C) (MAX2( (MAX2((A),(B))) ,(C))) |
#define | XSTR(s) STR(s) |
#define | STR(s) #s |
#define | FILENAME_MAX_LENGTH 80 |
Maximum length of filenames that are generated by our programs. More... | |
#define | FILENAME_ID_LENGTH 42 |
Maximum length of id taken from fasta header for filename generation. More... | |
Functions | |
void * | space (unsigned size) |
Allocate space safely. More... | |
void * | xrealloc (void *p, unsigned size) |
Reallocate space safely. More... | |
void | nrerror (const char message[]) |
Die with an error message. More... | |
void | warn_user (const char message[]) |
Print a warning message. More... | |
void | init_rand (void) |
Make random number seeds. | |
double | urn (void) |
get a random number from [0..1] More... | |
int | int_urn (int from, int to) |
Generates a pseudo random integer in a specified range. More... | |
char * | time_stamp (void) |
Get a timestamp. More... | |
char * | random_string (int l, const char symbols[]) |
Create a random string using characters from a specified symbol set. More... | |
int | hamming (const char *s1, const char *s2) |
Calculate hamming distance between two sequences. More... | |
int | hamming_bound (const char *s1, const char *s2, int n) |
Calculate hamming distance between two sequences up to a specified length. More... | |
char * | get_line (FILE *fp) |
Read a line of arbitrary length from a stream. More... | |
unsigned int | get_input_line (char **string, unsigned int options) |
unsigned int | read_record (char **header, char **sequence, char ***rest, unsigned int options) |
Get a data record from stdin. More... | |
char * | pack_structure (const char *struc) |
Pack secondary secondary structure, 5:1 compression using base 3 encoding. More... | |
char * | unpack_structure (const char *packed) |
Unpack secondary structure previously packed with pack_structure() More... | |
short * | make_pair_table (const char *structure) |
Create a pair table of a secondary structure. More... | |
short * | copy_pair_table (const short *pt) |
Get an exact copy of a pair table. More... | |
short * | alimake_pair_table (const char *structure) |
short * | make_pair_table_snoop (const char *structure) |
int * | make_loop_index_pt (short *pt) |
Compute the "base pair" distance between two secondary structures s1 and s2. More... | |
void | print_tty_input_seq (void) |
Print a line to stdout that asks for an input sequence. More... | |
void | print_tty_input_seq_str (const char *s) |
Print a line with a user defined string and a ruler to stdout. More... | |
void | print_tty_constraint_full (void) |
Print structure constraint characters to stdout (full constraint support) | |
void | print_tty_constraint (unsigned int option) |
Print structure constraint characters to stdout. (constraint support is specified by option parameter) More... | |
void | str_DNA2RNA (char *sequence) |
Convert a DNA input sequence to RNA alphabet. More... | |
void | str_uppercase (char *sequence) |
Convert an input sequence to uppercase. More... | |
int * | get_iindx (unsigned int length) |
Get an index mapper array (iindx) for accessing the energy matrices, e.g. in partition function related functions. More... | |
int * | get_indx (unsigned int length) |
Get an index mapper array (indx) for accessing the energy matrices, e.g. in MFE related functions. More... | |
void | constrain_ptypes (const char *constraint, unsigned int length, char *ptype, int *BP, int min_loop_size, unsigned int idx_type) |
Insert constraining pair types according to constraint structure string. More... | |
Variables | |
unsigned short | xsubi [3] |
Current 48 bit random number. More... | |
Various utility- and helper-functions used throughout the Vienna RNA package.
#define VRNA_INPUT_ERROR 1U |
Output flag of get_input_line(): "An ERROR has occured, maybe EOF"
#define VRNA_INPUT_QUIT 2U |
Output flag of get_input_line(): "the user requested quitting the program"
#define VRNA_INPUT_MISC 4U |
Output flag of get_input_line(): "something was read"
#define VRNA_INPUT_FASTA_HEADER 8U |
Input/Output flag of get_input_line():
if used as input option this tells get_input_line() that the data to be read should comply with the FASTA format
the function will return this flag if a fasta header was read
#define VRNA_INPUT_SEQUENCE 16U |
Input flag for get_input_line():
Tell get_input_line() that we assume to read a nucleotide sequence
#define VRNA_INPUT_CONSTRAINT 32U |
Input flag for get_input_line():
Tell get_input_line() that we assume to read a structure constraint
#define VRNA_INPUT_NO_TRUNCATION 256U |
Input switch for get_input_line(): "do not trunkate the line by eliminating white spaces at end of line"
#define VRNA_INPUT_NO_REST 512U |
Input switch for read_record(): "do fill rest array"
#define VRNA_INPUT_NO_SPAN 1024U |
Input switch for read_record(): "never allow data to span more than one line"
#define VRNA_INPUT_NOSKIP_BLANK_LINES 2048U |
Input switch for read_record(): "do not skip empty lines"
#define VRNA_INPUT_BLANK_LINE 4096U |
Output flag for read_record(): "read an empty line"
#define VRNA_INPUT_NOSKIP_COMMENTS 128U |
Input switch for get_input_line(): "do not skip comment lines"
#define VRNA_INPUT_COMMENT 8192U |
Output flag for read_record(): "read a comment"
#define VRNA_CONSTRAINT_PIPE 1U |
pipe sign '|' switch for structure constraints (paired with another base)
#define VRNA_CONSTRAINT_DOT 2U |
dot '.' switch for structure constraints (no constraint at all)
#define VRNA_CONSTRAINT_X 4U |
'x' switch for structure constraint (base must not pair)
#define VRNA_CONSTRAINT_ANG_BRACK 8U |
angle brackets '<', '>' switch for structure constraint (paired downstream/upstream)
#define VRNA_CONSTRAINT_RND_BRACK 16U |
round brackets '(',')' switch for structure constraint (base i pairs base j)
#define VRNA_CONSTRAINT_MULTILINE 32U |
constraint may span over several lines
#define VRNA_CONSTRAINT_NO_HEADER 64U |
do not print the header information line
#define VRNA_CONSTRAINT_ALL 128U |
placeholder for all constraining characters
#define VRNA_CONSTRAINT_G 256U |
'+' switch for structure constraint (base is involved in a gquad)
#define VRNA_OPTION_MULTILINE 32U |
Tell a function that an input is assumed to span several lines if used as input-option A function might also be returning this state telling that it has read data from multiple lines.
#define MIN2 | ( | A, | |
B | |||
) | ((A) < (B) ? (A) : (B)) |
Get the minimum of two comparable values
#define MAX2 | ( | A, | |
B | |||
) | ((A) > (B) ? (A) : (B)) |
Get the maximum of two comparable values
#define XSTR | ( | s | ) | STR(s) |
Stringify a macro after expansion
#define STR | ( | s | ) | #s |
Stringify a macro argument
#define FILENAME_MAX_LENGTH 80 |
Maximum length of filenames that are generated by our programs.
This definition should be used throughout the complete ViennaRNA package wherever a static array holding filenames of output files is declared.
#define FILENAME_ID_LENGTH 42 |
Maximum length of id taken from fasta header for filename generation.
this has to be smaller than FILENAME_MAX_LENGTH since in most cases, some suffix will be appended to the ID
void* space | ( | unsigned | size | ) |
Allocate space safely.
size | The size of the memory to be allocated in bytes |
void* xrealloc | ( | void * | p, |
unsigned | size | ||
) |
Reallocate space safely.
p | A pointer to the memory region to be reallocated |
size | The size of the memory to be allocated in bytes |
void nrerror | ( | const char | message[] | ) |
Die with an error message.
message | The error message to be printed before exiting with 'FAILURE' |
void warn_user | ( | const char | message[] | ) |
Print a warning message.
Print a warning message to stderr
message | The warning message |
double urn | ( | void | ) |
get a random number from [0..1]
int int_urn | ( | int | from, |
int | to | ||
) |
Generates a pseudo random integer in a specified range.
from | The first number in range |
to | The last number in range |
char* time_stamp | ( | void | ) |
Get a timestamp.
Returns a string containing the current date in the format
Fri Mar 19 21:10:57 1993
char* random_string | ( | int | l, |
const char | symbols[] | ||
) |
Create a random string using characters from a specified symbol set.
l | The length of the sequence |
symbols | The symbol set |
int hamming | ( | const char * | s1, |
const char * | s2 | ||
) |
Calculate hamming distance between two sequences.
Calculate the number of positions in which
s1 | The first sequence |
s2 | The second sequence |
int hamming_bound | ( | const char * | s1, |
const char * | s2, | ||
int | n | ||
) |
Calculate hamming distance between two sequences up to a specified length.
This function is similar to hamming() but instead of comparing both sequences up to their actual length only the first 'n' characters are taken into account
s1 | The first sequence |
s2 | The second sequence |
char* get_line | ( | FILE * | fp | ) |
Read a line of arbitrary length from a stream.
Returns a pointer to the resulting string. The necessary memory is allocated and should be released using free() when the string is no longer needed.
fp | A file pointer to the stream where the function should read from |
unsigned int get_input_line | ( | char ** | string, |
unsigned int | options | ||
) |
Retrieve a line from 'stdin' savely while skipping comment characters and other features This function returns the type of input it has read if recognized. An option argument allows to switch between different reading modes.
Currently available options are:
#VRNA_INPUT_NOPRINT_COMMENTS, VRNA_INPUT_NOSKIP_COMMENTS, #VRNA_INPUT_NOELIM_WS_SUFFIX
pass a collection of options as one value like this:
get_input_line(string, option_1 | option_2 | option_n)
If the function recognizes the type of input, it will report it in the return value. It also reports if a user defined 'quit' command (@-sign on 'stdin') was given. Possible return values are:
VRNA_INPUT_FASTA_HEADER, VRNA_INPUT_ERROR, VRNA_INPUT_MISC, VRNA_INPUT_QUIT
string | A pointer to the character array that contains the line read |
options | A collection of options for switching the functions behavior |
unsigned int read_record | ( | char ** | header, |
char ** | sequence, | ||
char *** | rest, | ||
unsigned int | options | ||
) |
Get a data record from stdin.
This function may be used to obtain complete datasets from stdin. A dataset is always defined to contain at least a sequence. If data on stdin starts with a fasta header, i.e. a line like
>some header info
then read_record() will assume that the sequence that follows the header may span over several lines. To disable this behavior and to assign a single line to the argument 'sequence' one can pass VRNA_INPUT_NO_SPAN in the 'options' argument. If no fasta header is read in the beginning of a data block, a sequence must not span over multiple lines!
Unless the options VRNA_INPUT_NOSKIP_COMMENTS or VRNA_INPUT_NOSKIP_BLANK_LINES are passed, a sequence may be interrupted by lines starting with a comment character or empty lines.
A sequence is regarded as completely read if it was either assumed to not span over multiple lines, a secondary structure or structure constraint follows the sequence on the next line or a new header marks the beginning of a new sequence...
All lines following the sequence (this includes comments) and not initiating a new dataset are available through the line-array 'rest'. Here one can usually find the structure constraint or other information belonging to the current dataset. Filling of 'rest' may be prevented by passing VRNA_INPUT_NO_REST to the options argument.
The main purpose of this function is to be able to easily parse blocks of data from stdin in the header of a loop where all calculations for the appropriate data is done inside the loop. The loop may be then left on certain return values, e.g.:
char *id, *seq, **rest; int i; while(!(read_record(&id, &seq, &rest, 0) & (VRNA_INPUT_ERROR | VRNA_INPUT_QUIT))){ if(id) printf("%s\n", id); printf("%s\n", seq); if(rest) for(i=0;rest[i];i++) printf("%s\n", rest[i]); }
In the example above, the while loop will be terminated when read_record() returns either an error or a user initiated quit request.\n As long as data is read from stdin, the id is printed if it is available for the current block of data. The sequence will be printed in any case and if some more lines belong to the current block of data each line will be printed as well.
header | A pointer which will be set such that it points to the header of the record |
sequence | A pointer which will be set such that it points to the sequence of the record |
rest | A pointer which will be set such that it points to an array of lines which also belong to the record |
options | Some options which may be passed to alter the behavior of the function, use 0 for no options |
char* pack_structure | ( | const char * | struc | ) |
Pack secondary secondary structure, 5:1 compression using base 3 encoding.
Returns a binary string encoding of the secondary structure using a 5:1 compression scheme. The string is NULL terminated and can therefore be used with standard string functions such as strcmp(). Useful for programs that need to keep many structures in memory.
struc | The secondary structure in dot-bracket notation |
char* unpack_structure | ( | const char * | packed | ) |
Unpack secondary structure previously packed with pack_structure()
Translate a compressed binary string produced by pack_structure() back into the familiar dot-bracket notation.
packed | The binary encoded packed secondary structure |
short* make_pair_table | ( | const char * | structure | ) |
Create a pair table of a secondary structure.
Returns a newly allocated table, such that table[i]=j if (i.j) pair or 0 if i is unpaired, table[0] contains the length of the structure.
structure | The secondary structure in dot-bracket notation |
short* copy_pair_table | ( | const short * | pt | ) |
Get an exact copy of a pair table.
pt | The pair table to be copied |
short* alimake_pair_table | ( | const char * | structure | ) |
***Pair table for snoop align
short* make_pair_table_snoop | ( | const char * | structure | ) |
returns a newly allocated table, such that: table[i]=j if (i.j) pair or 0 if i is unpaired, table[0] contains the length of the structure. The special pseudoknotted H/ACA-mRNA structure is taken into account.
int* make_loop_index_pt | ( | short * | pt | ) |
Compute the "base pair" distance between two secondary structures s1 and s2.
The sequences should have the same length. dist = number of base pairs in one structure but not in the other same as edit distance with open-pair close-pair as move-set
str1 | First structure in dot-bracket notation |
str2 | Second structure in dot-bracket notation |
void print_tty_input_seq | ( | void | ) |
Print a line to stdout that asks for an input sequence.
There will also be a ruler (scale line) printed that helps orientation of the sequence positions
void print_tty_input_seq_str | ( | const char * | s | ) |
Print a line with a user defined string and a ruler to stdout.
(usually this is used to ask for user input) There will also be a ruler (scale line) printed that helps orientation of the sequence positions
s | A user defined string that will be printed to stdout |
void print_tty_constraint | ( | unsigned int | option | ) |
Print structure constraint characters to stdout. (constraint support is specified by option parameter)
Currently available options are:
VRNA_CONSTRAINT_PIPE (paired with another base)
VRNA_CONSTRAINT_DOT (no constraint at all)
VRNA_CONSTRAINT_X (base must not pair)
VRNA_CONSTRAINT_ANG_BRACK (paired downstream/upstream)
VRNA_CONSTRAINT_RND_BRACK (base i pairs base j)
pass a collection of options as one value like this:
print_tty_constraint(option_1 | option_2 | option_n)
option | Option switch that tells which constraint help will be printed |
void str_DNA2RNA | ( | char * | sequence | ) |
Convert a DNA input sequence to RNA alphabet.
This function substitudes T and t with U and u, respectively
sequence | The sequence to be converted |
void str_uppercase | ( | char * | sequence | ) |
Convert an input sequence to uppercase.
sequence | The sequence to be converted |
int* get_iindx | ( | unsigned int | length | ) |
Get an index mapper array (iindx) for accessing the energy matrices, e.g. in partition function related functions.
Access of a position "(i,j)" is then accomplished by using
(i,j) ~ iindx[i]-j
This function is necessary as most of the two-dimensional energy matrices are actually one-dimensional arrays throughout the ViennaRNAPackage
Consult the implemented code to find out about the mapping formula ;)
length | The length of the RNA sequence |
int* get_indx | ( | unsigned int | length | ) |
Get an index mapper array (indx) for accessing the energy matrices, e.g. in MFE related functions.
Access of a position "(i,j)" is then accomplished by using
(i,j) ~ indx[j]+i
This function is necessary as most of the two-dimensional energy matrices are actually one-dimensional arrays throughout the ViennaRNAPackage
Consult the implemented code to find out about the mapping formula ;)
length | The length of the RNA sequence |
void constrain_ptypes | ( | const char * | constraint, |
unsigned int | length, | ||
char * | ptype, | ||
int * | BP, | ||
int | min_loop_size, | ||
unsigned int | idx_type | ||
) |
Insert constraining pair types according to constraint structure string.
constraint | The structure constraint string |
length | The actual length of the sequence (constraint may be shorter) |
ptype | A pointer to the basepair type array |
min_loop_size | The minimal loop size (usually TURN ) |
idx_type | Define the access type for base pair type array (0 = indx, 1 = iindx) |