|
def | FileType (pathname) |
| Low level compression determination. More...
|
|
def | OpenFile4Reading (pathname, mode="r") |
| Opens file for reading. More...
|
|
def | TypeConvert (value, toType, warn=True) |
| Data conversion with error logging. More...
|
|
def | GetGRChVersion (dver) |
| Get the human Genome Reference Consortium genome build version used by Ensembl. More...
|
|
def | SplitGenomicCoord (coord, dver, warn=True, retainBuild=False) |
| Splits a standard string for genomic location. More...
|
|
def | SplitSNPGenomicCoord (coord, dver, warn=True) |
| Splits genomic coordinates from a SNP. More...
|
|
def | GetGenomicCoordinateString (build, chromosome, startPos, endPos=None, warn=True) |
| Create the Genomic Coordinate string in the format Build:Chromosome:Start -End or Build:Chromosome:Start (in the case where start and end are equal). More...
|
|
def | ComplementaryDNA (seq, reverse=False) |
| Compute the complementary DNA sequence of a given sequence string. More...
|
|
def | WCheckInputColumns (writer, cols, line, lineNr, permissiveness, warn=True, exitCode=1, index0=True) |
| Check applicability of column indices and apply permissiveness mode. More...
|
|
def | CheckInputColumns (cols, nrCols, lineNr, stop, warn=True, exitCode=1, index0=True, lastLen=None) |
| Check applicability of column indices. More...
|
|
def | CheckInputLengths (len, lastLen, lineNr, warn=True) |
| Check the length of an input line by comparing it to the last line. More...
|
|
def | ConvertInputIndices (cols, warn=True, exit=False) |
| Convert input column(s) to table indices. More...
|
|
Miscellaneous functions useful all over the project.
- Author
- Chris X. Weichenberger
- Date
- 2012-02-17
- Copyright
- See LICENSE file.
def common.Misc.CheckInputLengths |
( |
|
len, |
|
|
|
lastLen, |
|
|
|
lineNr, |
|
|
|
warn = True |
|
) |
| |
Check the length of an input line by comparing it to the last line.
@param len Lengths of the current input line
@param lastLen Lengths of the last input line
@param lineNr Line number of the input file, used for reporting
@param warn If set to @c False, warnings will not be issued.
@return @c True, if the lengths are the same. @c False, if the lengths are
different.
def common.Misc.ComplementaryDNA |
( |
|
seq, |
|
|
|
reverse = False |
|
) |
| |
Compute the complementary DNA sequence of a given sequence string.
For a given sequence of DNA bases, compute the case sensitive sequence on
the reverse strand, allowing also gap characters.
@param seq DNA sequence as a @c String, can be single character.
@param reverse If set to @c True, the string is also position-reversed.
@return Complementary DNA sequence.
def common.Misc.ConvertInputIndices |
( |
|
cols, |
|
|
|
warn = True , |
|
|
|
exit = False |
|
) |
| |
Convert input column(s) to table indices.
This function is meant for checking the validity of column indices passed
by the command line interface. A check for applicability of the indices in
the context of an input line is done by @ref CheckInputColumns and @ref
WCheckInputColumns.
@param cols Input columns as a List of Strings.
@param warn If set to @c False, prevent printing of error messages.
@param exit If set to @c True, the program execution stops with exit code
1.
@return flag,columns @c flag is @c True, if the conversion was successful
and @c columns holds the converted indices (starting from 0). If @c flag
is @c False, an empty list is returned indicating an error in conversion,
ie. not an integer.
def common.Misc.FileType |
( |
|
pathname | ) |
|
Low level compression determination.
This function tells reliably if the file is GZIP or COMPRESS compressed,
independently of the extension by reading the magic number.
@param pathname Full path name of file.
@return FT_FAILED, FT_UNK, FT_COMPRESS, FT_GZIP, FT_BZIP2
def common.Misc.SplitGenomicCoord |
( |
|
coord, |
|
|
|
dver, |
|
|
|
warn = True , |
|
|
|
retainBuild = False |
|
) |
| |
Splits a standard string for genomic location.
Most generic location of a nucleotide in a genome in our system is given
by 'build:chromosome:position'. In case @c build is missing, it is
replaced with the current Ensembl build version string.
@param coord Genomic location @c String.
@param dver Data version @c String for config file lookup.
@param warn If set to @c False, prevent printing of warnings.
@param retainBuild If set to @c True, the function returns the exact
genomic build given, even if it is not equal to the default genome build
version.
@return (build, chromosome, position) as (@c String, @c String, @c Int),
all of them @c None if an error occurred.
def common.Misc.SplitSNPGenomicCoord |
( |
|
coord, |
|
|
|
dver, |
|
|
|
warn = True |
|
) |
| |
Splits genomic coordinates from a SNP.
A SNP position in the genome can either be a single base pair, or it can
also be a short interval, as insertions and deletions affect more than a
single base pair. As for an example, rs60709680 codes for an insertion and
is represented as GRCh37:8:67970310-67970311 (build, chromosome, fromPos,
toPos).
@param coord Genomic SNP location @c String.
@param dver Data version @c String for config file lookup.
@param warn If set to @c False, prevent printing of warnings.
@return (build, chromosome, fromPos, toPos) as
(@c String, @c String, @c Int, @c Int). All @c None if an error
occurred. It is guaranteed that @c fromPos <= @c toPos.
def common.Misc.TypeConvert |
( |
|
value, |
|
|
|
toType, |
|
|
|
warn = True |
|
) |
| |
Data conversion with error logging.
Converts between built-in Python data types and conditionally issues an
error message if the conversion failed.
@param value Value to be converted, can be any built-in data type.
@param toType Data type to convert to, eg. @c int.
@param warn If set to @c False, prevent printing of warnings.
@return conv. If conversion failed, @c conv will be set to @c None,
otherwise it holds the converted @c value.
def common.Misc.WCheckInputColumns |
( |
|
writer, |
|
|
|
cols, |
|
|
|
line, |
|
|
|
lineNr, |
|
|
|
permissiveness, |
|
|
|
warn = True , |
|
|
|
exitCode = 1 , |
|
|
|
index0 = True |
|
) |
| |
Check applicability of column indices and apply permissiveness mode.
This function checks if the values in @c cols are valid indices for
accessing columns represented in a @c list in @c line. Depending on the
permissiveness model, a call to this function may lead to program
abortion!
@param writer A @ref cls.DefaultTable.CDefaultTableWriter instance.
@param cols A @c list of @c ints to be checked if they can be used as
indices for a line. For convenience, this can also be an @c int in case
of a single column.
@param line A @c list of @c strings, representing the line to be
checked for indexing validity.
@param lineNr An @c int, indicating the line number in the file. Used for
reporting errors/warnings.
@param permissiveness One of the three possible permissiveness models:
@c PERMISSIVENESS_ECHO, write the line using parameter @c writer;
@c PERMISSIVENESS_SKIP, do not write the line;
@c PERMISSIVENESS_STOP, stop processing and exit with an error code.
@param warn If set to @c False, warnings will not be issued.
@param exitCode If the @c permissiveness is set to @c PERMISSIVENESS_STOP,
the program exits with this exit code.
@param index0 If set to @c True, values in @c cols are interpreted as
Python indices starting with 0. If set to @c False, indices are
interpreted to be starting with 1.
@return @c True, if all checks were fulfilled and the values in @c cols
can be used for indexing into the list @c line. @c False, if one of the
values would produce an indexing error in @c line. If the permissiveness
model equals to @c PERMISSIVENESS_STOP, the program has been terminated
and does not return! Otherwise, warning messages have been written
accordingly (@c warn flag), the line has been written if
@c PERMISSIVENESS_ECHO was used.