1
2
3
4
5
6
7
8 """Alphabets used in Seq objects etc to declare sequence type and letters.
9
10 This is used by sequences which contain a finite number of similar words.
11 """
12
14 size = None
15 letters = None
16
18 return self.__class__.__name__ + "()"
19
21 """Does this alphabet 'contain' the other (OBSOLETE?).
22
23 Returns a boolean. This relies on the Alphabet subclassing
24 hierarchy only, and does not check the letters property.
25 This isn't ideal, and doesn't seem to work as intended
26 with the AlphabetEncoder classes."""
27 return isinstance(other, self.__class__)
28
44
46 """Return an upper case variant of the current alphabet (PRIVATE)."""
47 if not self.letters or self.letters==self.letters.upper():
48
49 return self
50 else:
51
52 return self._case_less()
53
55 """Return a lower case variant of the current alphabet (PRIVATE)."""
56 if not self.letters or self.letters==self.letters.lower():
57
58 return self
59 else:
60
61 return self._case_less()
62
63 generic_alphabet = Alphabet()
64
68
69 single_letter_alphabet = SingleLetterAlphabet()
70
71
72
75
76 generic_protein = ProteinAlphabet()
77
78
81
82 generic_nucleotide = NucleotideAlphabet()
83
86
87 generic_dna = DNAAlphabet()
88
89
90
91
94
95 generic_rna = RNAAlphabet()
96
97
98
99
100
103
105 size = 3
106 letters = [
107 "Ala", "Asx", "Cys", "Asp", "Glu", "Phe", "Gly", "His", "Ile",
108 "Lys", "Leu", "Met", "Asn", "Pro", "Gln", "Arg", "Ser", "Thr",
109 "Sec", "Val", "Trp", "Xaa", "Tyr", "Glx",
110 ]
111
112
113
114
115
117 - def __init__(self, alphabet, new_letters):
125 if key[:2] == "__" and key[-2:] == "__":
126 raise AttributeError(key)
127 return getattr(self.alphabet, key)
128
130 return "%s(%r, %r)" % (self.__class__.__name__, self.alphabet,
131 self.new_letters)
132
134 """Does this alphabet 'contain' the other (OBSOLETE?).
135
136 This is isn't implemented for the base AlphabetEncoder,
137 which will always return 0 (False)."""
138 return 0
139
143
147
148
149 -class Gapped(AlphabetEncoder):
150 - def __init__(self, alphabet, gap_char = "-"):
153
155 """Does this alphabet 'contain' the other (OBSOLETE?).
156
157 Returns a boolean. This relies on the Alphabet subclassing
158 hierarchy, and attempts to check the gap character. This fails
159 if the other alphabet does not have a gap character!
160 """
161 return other.gap_char == self.gap_char and \
162 self.alphabet.contains(other.alphabet)
163
165 """Return an upper case variant of the current alphabet (PRIVATE)."""
166 return Gapped(self.alphabet._upper(), self.gap_char.upper())
167
169 """Return a lower case variant of the current alphabet (PRIVATE)."""
170 return Gapped(self.alphabet._lower(), self.gap_char.lower())
171
172
174 - def __init__(self, alphabet, stop_symbol = "*"):
177
179 x = cmp(self.alphabet, other.alphabet)
180 if x == 0:
181 return cmp(self.stop_symbol, other.stop_symbol)
182 return x
183
185 """Does this alphabet 'contain' the other (OBSOLETE?).
186
187 Returns a boolean. This relies on the Alphabet subclassing
188 hierarchy, and attempts to check the stop symbol. This fails
189 if the other alphabet does not have a stop symbol!
190 """
191 return other.stop_symbol == self.stop_symbol and \
192 self.alphabet.contains(other.alphabet)
193
197
201
202
204 """Returns the non-gapped non-stop-codon Alphabet object (PRIVATE)."""
205 a = alphabet
206 while isinstance(a, AlphabetEncoder):
207 a = a.alphabet
208 assert isinstance(a, Alphabet), \
209 "Invalid alphabet found, %s" % repr(a)
210 return a
211
225
227 """Returns a common but often generic base alphabet object (PRIVATE).
228
229 This throws away any AlphabetEncoder information, e.g. Gapped alphabets.
230
231 Note that DNA+RNA -> Nucleotide, and Nucleotide+Protein-> generic single
232 letter. These DO NOT raise an exception!"""
233 common = None
234 for alpha in alphabets:
235 a = _get_base_alphabet(alpha)
236 if common is None:
237 common = a
238 elif common == a:
239 pass
240 elif isinstance(a, common.__class__):
241 pass
242 elif isinstance(common, a.__class__):
243 common = a
244 elif isinstance(a, NucleotideAlphabet) \
245 and isinstance(common, NucleotideAlphabet):
246
247 common = generic_nucleotide
248 elif isinstance(a, SingleLetterAlphabet) \
249 and isinstance(common, SingleLetterAlphabet):
250
251 common = single_letter_alphabet
252 else:
253
254 return generic_alphabet
255 if common is None:
256
257 return generic_alphabet
258 return common
259
261 """Returns a common but often generic alphabet object (PRIVATE).
262
263 Note that DNA+RNA -> Nucleotide, and Nucleotide+Protein-> generic single
264 letter. These DO NOT raise an exception!
265
266 This is aware of Gapped and HasStopCodon and new letters added by
267 other AlphabetEncoders. This WILL raise an exception if more than
268 one gap character or stop symbol is present."""
269 base = _consensus_base_alphabet(alphabets)
270 gap = None
271 stop = None
272 new_letters = ""
273 for alpha in alphabets:
274
275 if not hasattr(alpha, "gap_char"):
276 pass
277 elif gap is None:
278 gap = alpha.gap_char
279 elif gap == alpha.gap_char:
280 pass
281 else:
282 raise ValueError("More than one gap character present")
283
284 if not hasattr(alpha, "stop_symbol"):
285 pass
286 elif stop is None:
287 stop = alpha.stop_symbol
288 elif stop == alpha.stop_symbol:
289 pass
290 else:
291 raise ValueError("More than one stop symbol present")
292
293 if hasattr(alpha, "new_letters"):
294 for letter in alpha.new_letters:
295 if letter not in new_letters \
296 and letter != gap and letter != stop:
297 new_letters += letter
298
299 alpha = base
300 if new_letters:
301 alpha = AlphabetEncoder(alpha, new_letters)
302 if gap:
303 alpha = Gapped(alpha, gap_char=gap)
304 if stop:
305 alpha = HasStopCodon(alpha, stop_symbol=stop)
306 return alpha
307
309 """Returns True except for DNA+RNA or Nucleotide+Protein (PRIVATE).
310
311 This relies on the Alphabet subclassing hierarchy. It does not
312 check things like gap characters or stop symbols."""
313 dna, rna, nucl, protein = False, False, False, False
314 for alpha in alphabets:
315 a = _get_base_alphabet(alpha)
316 if isinstance(a, DNAAlphabet):
317 dna = True
318 nucl = True
319 if rna or protein : return False
320 elif isinstance(a, RNAAlphabet):
321 rna = True
322 nucl = True
323 if dna or protein : return False
324 elif isinstance(a, NucleotideAlphabet):
325 nucl = True
326 if protein : return False
327 elif isinstance(a, ProteinAlphabet):
328 protein = True
329 if nucl : return False
330 return True
331