Package Bio :: Module utils
[hide private]
[frames] | no frames]

Source Code for Module Bio.utils

  1  # Copyright 2000 by Andrew Dalke. 
  2  # All rights reserved. 
  3  # This code is part of the Biopython distribution and governed by its 
  4  # license.  Please see the LICENSE file that should have been included 
  5  # as part of this package. 
  6   
  7  """Miscellaneous functions for dealing with sequences (obsolete?).""" 
  8   
  9  import Seq 
 10  import Alphabet 
 11   
 12  from PropertyManager import default_manager 
 13   
14 -def ungap(seq):
15 """given a sequence with gap encoding, return the ungapped sequence""" 16 #TODO - Fix this? It currently assumes the outmost AlphabetEncoder 17 #is for the gap. Consider HasStopCodon(Gapped(Protein())) as a test case. 18 import warnings 19 warnings.warn("Bio.utils.ungap() has been deprecated, and we" 20 " intend to remove it in a future release of Biopython." 21 " Instead, please use the ungap method of the Seq object " 22 " (added in Biopython 1.53).", 23 DeprecationWarning) 24 gap = seq.gap_char 25 letters = [] 26 for c in seq.data: 27 if c != gap: 28 letters.append(c) 29 return Seq.Seq("".join(letters), seq.alphabet.alphabet)
30
31 -def verify_alphabet(seq):
32 letters = {} 33 for c in seq.alphabet.letters: 34 letters[c] = 1 35 try: 36 for c in seq.data: 37 letters[c] 38 except KeyError: 39 return 0 40 return 1
41
42 -def count_monomers(seq):
43 dict = {} 44 # bugfix: string.count(s,c) raises an AttributeError. Iddo Friedberg 16 Mar. 04 45 # s = buffer(seq.data) # works for strings and array.arrays 46 for c in seq.alphabet.letters: 47 dict[c] = seq.data.count(c) 48 return dict
49
50 -def percent_monomers(seq):
51 dict2 = {} 52 seq_len = len(seq) 53 dict = count_monomers(seq) 54 for m in dict: 55 dict2[m] = dict[m] * 100. / seq_len 56 return dict2
57
58 -def sum(seq, table, zero = 0.0):
59 total = zero 60 for c in getattr(seq, "data", seq): 61 total = total + table[c] 62 return total
63 64 # For ranged addition
65 -def sum_2ple(seq, table, zero = (0.0, 0.0)):
66 x, y = zero 67 data = getattr(seq, "data", seq) 68 for c in data: 69 x2, y2 = table[c] 70 x = x + x2 71 y = y + y2 72 return (x, y)
73
74 -def total_weight(seq, weight_table = None):
75 if weight_table is None: 76 weight_table = default_manager.resolve(seq.alphabet, "weight_table") 77 return sum(seq, weight_table)
78
79 -def total_weight_range(seq, weight_table = None):
80 if weight_table is None: 81 weight_table = default_manager.resolve(seq.alphabet, "weight_range_table") 82 return sum_2ple(seq, weight_table)
83
84 -def reduce_sequence(seq, reduction_table,new_alphabet=None):
85 """ given an amino-acid sequence, return it in reduced alphabet form based 86 on the letter-translation table passed. Some "standard" tables are in 87 Alphabet.Reduced. 88 seq: a Seq.Seq type sequence 89 reduction_table: a dictionary whose keys are the "from" alphabet, and values 90 are the "to" alphabet""" 91 if new_alphabet is None: 92 new_alphabet = Alphabet.single_letter_alphabet 93 new_alphabet.letters = '' 94 for letter in reduction_table: 95 new_alphabet.letters += letter 96 new_alphabet.size = len(new_alphabet.letters) 97 new_seq = Seq.Seq('',new_alphabet) 98 for letter in seq: 99 new_seq += reduction_table[letter] 100 return new_seq
101