Package Bio :: Package Data :: Module CodonTable
[hide private]
[frames] | no frames]

Source Code for Module Bio.Data.CodonTable

  1  # This code is part of the Biopython distribution and governed by its 
  2  # license.  Please see the LICENSE file that should have been included 
  3  # as part of this package. 
  4  """Codon tables based on those from the NCBI. 
  5   
  6  These tables are based on parsing the NCBI file: 
  7  ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt 
  8   
  9  Last updated for Version 3.9 
 10  """ 
 11   
 12  from Bio import Alphabet 
 13  from Bio.Alphabet import IUPAC 
 14  from Bio.Data import IUPACData 
 15   
 16  unambiguous_dna_by_name = {} 
 17  unambiguous_dna_by_id = {} 
 18  unambiguous_rna_by_name = {} 
 19  unambiguous_rna_by_id = {} 
 20  generic_by_name = {} # unambiguous DNA or RNA 
 21  generic_by_id = {} # unambiguous DNA or RNA 
 22  ambiguous_generic_by_name = {} # ambiguous DNA or RNA 
 23  ambiguous_generic_by_id = {} # ambiguous DNA or RNA  
 24   
 25  # standard IUPAC unambiguous codons 
 26  standard_dna_table = None 
 27  standard_rna_table = None 
 28   
 29  # In the future, the back_table could return a statistically 
 30  # appropriate distribution of codons, so do not cache the results of 
 31  # back_table lookups! 
 32   
33 -class TranslationError(Exception):
34 pass
35
36 -class CodonTable:
37 nucleotide_alphabet = Alphabet.generic_nucleotide 38 protein_alphabet = Alphabet.generic_protein 39 40 forward_table = {} # only includes codons which actually code 41 back_table = {} # for back translations 42 start_codons = [] 43 stop_codons = [] 44 # Not always called from derived classes!
45 - def __init__(self, nucleotide_alphabet = nucleotide_alphabet, 46 protein_alphabet = protein_alphabet, 47 forward_table = forward_table, back_table = back_table, 48 start_codons = start_codons, stop_codons = stop_codons):
55
56 - def __str__(self):
57 """Returns a simple text representation of the codon table 58 59 e.g. 60 >>> import Bio.Data.CodonTable 61 >>> print Bio.Data.CodonTable.standard_dna_table 62 >>> print Bio.Data.CodonTable.generic_by_id[1]""" 63 64 if self.id: 65 answer = "Table %i" % self.id 66 else: 67 answer = "Table ID unknown" 68 if self.names: 69 answer += " " + ", ".join(filter(None, self.names)) 70 71 #Use the main four letters (and the conventional ordering) 72 #even for ambiguous tables 73 letters = self.nucleotide_alphabet.letters 74 if isinstance(self.nucleotide_alphabet, Alphabet.DNAAlphabet) \ 75 or (letters is not None and "T" in letters): 76 letters = "TCAG" 77 else: 78 #Should be either RNA or generic nucleotides, 79 #e.g. Bio.Data.CodonTable.generic_by_id[1] 80 letters = "UCAG" 81 82 #Build the table... 83 answer=answer + "\n\n |" + "|".join( \ 84 [" %s " % c2 for c2 in letters] \ 85 ) + "|" 86 answer=answer + "\n--+" \ 87 + "+".join(["---------" for c2 in letters]) + "+--" 88 for c1 in letters: 89 for c3 in letters: 90 line = c1 + " |" 91 for c2 in letters: 92 codon = c1+c2+c3 93 line = line + " %s" % codon 94 if codon in self.stop_codons: 95 line = line + " Stop|" 96 else: 97 try: 98 amino = self.forward_table[codon] 99 except KeyError: 100 amino = "?" 101 except TranslationError: 102 amino = "?" 103 if codon in self.start_codons: 104 line = line + " %s(s)|" % amino 105 else: 106 line = line + " %s |" % amino 107 line = line + " " + c3 108 answer = answer + "\n"+ line 109 answer=answer + "\n--+" \ 110 + "+".join(["---------" for c2 in letters]) + "+--" 111 return answer
112
113 -def make_back_table(table, default_stop_codon):
114 # ONLY RETURNS A SINGLE CODON 115 # Do the sort so changes in the hash implementation won't affect 116 # the result when one amino acid is coded by more than one codon. 117 back_table = {} 118 keys = table.keys() ; keys.sort() 119 for key in keys: 120 back_table[table[key]] = key 121 back_table[None] = default_stop_codon 122 return back_table
123 124
125 -class NCBICodonTable(CodonTable):
126 nucleotide_alphabet = Alphabet.generic_nucleotide 127 protein_alphabet = IUPAC.protein 128
129 - def __init__(self, id, names, table, start_codons, stop_codons):
130 self.id = id 131 self.names = names 132 self.forward_table = table 133 self.back_table = make_back_table(table, stop_codons[0]) 134 self.start_codons = start_codons 135 self.stop_codons = stop_codons
136 137
138 -class NCBICodonTableDNA(NCBICodonTable):
139 nucleotide_alphabet = IUPAC.unambiguous_dna
140
141 -class NCBICodonTableRNA(NCBICodonTable):
142 nucleotide_alphabet = IUPAC.unambiguous_rna
143 144 145
146 -def register_ncbi_table(name, alt_name, id, 147 table, start_codons, stop_codons):
148 #In most cases names are divided by "; ", however there is also 149 #'Bacterial and Plant Plastid' (which used to be just 'Bacterial') 150 names = [x.strip() for x in name.replace(" and ","; ").split("; ")] 151 152 dna = NCBICodonTableDNA(id, names + [alt_name], table, start_codons, 153 stop_codons) 154 # replace all T's with U's for the RNA tables 155 rna_table = {} 156 generic_table = {} 157 for codon, val in table.items(): 158 generic_table[codon] = val 159 codon = codon.replace("T", "U") 160 generic_table[codon] = val 161 rna_table[codon] = val 162 rna_start_codons = [] 163 generic_start_codons = [] 164 for codon in start_codons: 165 generic_start_codons.append(codon) 166 codon = codon.replace("T", "U") 167 generic_start_codons.append(codon) 168 rna_start_codons.append(codon) 169 rna_stop_codons = [] 170 generic_stop_codons = [] 171 for codon in stop_codons: 172 generic_stop_codons.append(codon) 173 codon = codon.replace("T", "U") 174 generic_stop_codons.append(codon) 175 rna_stop_codons.append(codon) 176 177 generic = NCBICodonTable(id, names + [alt_name], generic_table, 178 generic_start_codons, generic_stop_codons) 179 rna = NCBICodonTableRNA(id, names + [alt_name], rna_table, 180 rna_start_codons, rna_stop_codons) 181 182 if id == 1: 183 global standard_dna_table, standard_rna_table 184 standard_dna_table = dna 185 standard_rna_table = rna 186 187 unambiguous_dna_by_id[id] = dna 188 unambiguous_rna_by_id[id] = rna 189 generic_by_id[id] = generic 190 191 if alt_name is not None: 192 names.append(alt_name) 193 194 for name in names: 195 unambiguous_dna_by_name[name] = dna 196 unambiguous_rna_by_name[name] = rna 197 generic_by_name[name] = generic
198 199 ### These tables created from the data file 200 ### ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt 201 ### using the following: 202 ##import re 203 ##for line in open("gc.prt").readlines(): 204 ## if line[:2] == " {": 205 ## names = [] 206 ## id = None 207 ## aa = None 208 ## start = None 209 ## bases = [] 210 ## elif line[:6] == " name": 211 ## names.append(re.search('"([^"]*)"', line).group(1)) 212 ## elif line[:8] == " name": 213 ## names.append(re.search('"(.*)$', line).group(1)) 214 ## elif line == ' Mitochondrial; Mycoplasma; Spiroplasma" ,\n': 215 ## names[-1] = names[-1] + " Mitochondrial; Mycoplasma; Spiroplasma" 216 ## elif line[:4] == " id": 217 ## id = int(re.search('(\d+)', line).group(1)) 218 ## elif line[:10] == " ncbieaa ": 219 ## aa = line[12:12+64] 220 ## elif line[:10] == " sncbieaa": 221 ## start = line[12:12+64] 222 ## elif line[:9] == " -- Base": 223 ## bases.append(line[12:12+64]) 224 ## elif line[:2] == " }": 225 ## assert names != [] and id is not None and aa is not None 226 ## assert start is not None and bases != [] 227 ## if len(names) == 1: 228 ## names.append(None) 229 ## print "register_ncbi_table(name = %s," % repr(names[0]) 230 ## print " alt_name = %s, id = %d," % \ 231 ## (repr(names[1]), id) 232 ## print " table = {" 233 ## s = " " 234 ## for i in range(64): 235 ## if aa[i] != "*": 236 ## t = " '%s%s%s': '%s'," % (bases[0][i], bases[1][i], 237 ## bases[2][i], aa[i]) 238 ## if len(s) + len(t) > 75: 239 ## print s 240 ## s = " " + t 241 ## else: 242 ## s = s + t 243 ## print s, "}," 244 245 ## s = " stop_codons = [" 246 ## for i in range(64): 247 ## if aa[i] == "*": 248 ## t = " '%s%s%s'," % (bases[0][i], bases[1][i], bases[2][i]) 249 ## if len(s) + len(t) > 75: 250 ## print s 251 ## s = " " + t 252 ## else: 253 ## s = s + t 254 ## print s, "]," 255 256 ## s = " start_codons = [" 257 ## for i in range(64): 258 ## if start[i] == "M": 259 ## t = " '%s%s%s'," % (bases[0][i], bases[1][i], bases[2][i]) 260 ## if len(s) + len(t) > 75: 261 ## print s 262 ## s = " " + t 263 ## else: 264 ## s = s + t 265 ## print s, "]" 266 ## print " )" 267 ## elif line[:2] == "--" or line == "\n" or line == "}\n" or \ 268 ## line == 'Genetic-code-table ::= {\n': 269 ## pass 270 ## else: 271 ## raise Exception("Unparsed: " + repr(line)) 272 273 register_ncbi_table(name = 'Standard', 274 alt_name = 'SGC0', id = 1, 275 table = { 276 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 277 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 278 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 279 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 280 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 281 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I', 282 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T', 283 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 284 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', 285 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 286 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 287 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 288 'GGG': 'G', }, 289 stop_codons = [ 'TAA', 'TAG', 'TGA', ], 290 start_codons = [ 'TTG', 'CTG', 'ATG', ] 291 ) 292 register_ncbi_table(name = 'Vertebrate Mitochondrial', 293 alt_name = 'SGC1', id = 2, 294 table = { 295 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 296 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 297 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L', 298 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 299 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 300 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 301 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T', 302 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 303 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'GTT': 'V', 304 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 'GCC': 'A', 305 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 306 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', }, 307 stop_codons = [ 'TAA', 'TAG', 'AGA', 'AGG', ], 308 start_codons = [ 'ATT', 'ATC', 'ATA', 'ATG', 'GTG', ] 309 ) 310 register_ncbi_table(name = 'Yeast Mitochondrial', 311 alt_name = 'SGC2', id = 3, 312 table = { 313 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 314 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 315 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'T', 316 'CTC': 'T', 'CTA': 'T', 'CTG': 'T', 'CCT': 'P', 'CCC': 'P', 317 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 318 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 319 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T', 320 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 321 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 322 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 323 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 324 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 325 'GGA': 'G', 'GGG': 'G', }, 326 stop_codons = [ 'TAA', 'TAG', ], 327 start_codons = [ 'ATA', 'ATG', ] 328 ) 329 register_ncbi_table(name = 'Mold Mitochondrial; Protozoan Mitochondrial; Coelenterate Mitochondrial; Mycoplasma; Spiroplasma', 330 alt_name = 'SGC3', id = 4, 331 table = { 332 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 333 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 334 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L', 335 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 336 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 337 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 338 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 339 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 340 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 341 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 342 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 343 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 344 'GGA': 'G', 'GGG': 'G', }, 345 stop_codons = [ 'TAA', 'TAG', ], 346 start_codons = [ 'TTA', 'TTG', 'CTG', 'ATT', 'ATC', 347 'ATA', 'ATG', 'GTG', ] 348 ) 349 register_ncbi_table(name = 'Invertebrate Mitochondrial', 350 alt_name = 'SGC4', id = 5, 351 table = { 352 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 353 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 354 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L', 355 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 356 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 357 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 358 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T', 359 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 360 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'S', 361 'AGG': 'S', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 362 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 363 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 364 'GGA': 'G', 'GGG': 'G', }, 365 stop_codons = [ 'TAA', 'TAG', ], 366 start_codons = [ 'TTG', 'ATT', 'ATC', 'ATA', 'ATG', 367 'GTG', ] 368 ) 369 register_ncbi_table(name = 'Ciliate Nuclear; Dasycladacean Nuclear; Hexamita Nuclear', 370 alt_name = 'SGC5', id = 6, 371 table = { 372 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 373 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 374 'TAA': 'Q', 'TAG': 'Q', 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 375 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 376 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 377 'CAA': 'Q', 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 378 'CGG': 'R', 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 379 'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 380 'AAC': 'N', 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 381 'AGA': 'R', 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 382 'GTG': 'V', 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 383 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 384 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', }, 385 stop_codons = [ 'TGA', ], 386 start_codons = [ 'ATG', ] 387 ) 388 register_ncbi_table(name = 'Echinoderm Mitochondrial; Flatworm Mitochondrial', 389 alt_name = 'SGC8', id = 9, 390 table = { 391 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 392 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 393 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L', 394 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 395 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 396 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 397 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 398 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 399 'AAA': 'N', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'S', 400 'AGG': 'S', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 401 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 402 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 403 'GGA': 'G', 'GGG': 'G', }, 404 stop_codons = [ 'TAA', 'TAG', ], 405 start_codons = [ 'ATG', 'GTG', ] 406 ) 407 register_ncbi_table(name = 'Euplotid Nuclear', 408 alt_name = 'SGC9', id = 10, 409 table = { 410 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 411 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 412 'TGT': 'C', 'TGC': 'C', 'TGA': 'C', 'TGG': 'W', 'CTT': 'L', 413 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 414 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 415 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 416 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 417 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 418 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 419 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 420 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 421 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 422 'GGA': 'G', 'GGG': 'G', }, 423 stop_codons = [ 'TAA', 'TAG', ], 424 start_codons = [ 'ATG', ] 425 ) 426 register_ncbi_table(name = 'Bacterial and Plant Plastid', 427 alt_name = None, id = 11, 428 table = { 429 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 430 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 431 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 432 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 433 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 434 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I', 435 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T', 436 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 437 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', 438 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 439 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 440 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 441 'GGG': 'G', }, 442 stop_codons = [ 'TAA', 'TAG', 'TGA', ], 443 start_codons = [ 'TTG', 'CTG', 'ATT', 'ATC', 'ATA', 444 'ATG', 'GTG', ] 445 ) 446 register_ncbi_table(name = 'Alternative Yeast Nuclear', 447 alt_name = None, id = 12, 448 table = { 449 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 450 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 451 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 452 'CTA': 'L', 'CTG': 'S', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 453 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 454 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I', 455 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T', 456 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 457 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', 458 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 459 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 460 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 461 'GGG': 'G', }, 462 stop_codons = [ 'TAA', 'TAG', 'TGA', ], 463 start_codons = [ 'CTG', 'ATG', ] 464 ) 465 register_ncbi_table(name = 'Ascidian Mitochondrial', 466 alt_name = None, id = 13, 467 table = { 468 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 469 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 470 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L', 471 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 472 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 473 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 474 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T', 475 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 476 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'G', 477 'AGG': 'G', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 478 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 479 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 480 'GGA': 'G', 'GGG': 'G', }, 481 stop_codons = [ 'TAA', 'TAG', ], 482 start_codons = [ 'TTG', 'ATA', 'ATG', 'GTG', ] 483 ) 484 register_ncbi_table(name = 'Alternative Flatworm Mitochondrial', 485 alt_name = None, id = 14, 486 table = { 487 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 488 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 489 'TAA': 'Y', 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 490 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 491 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 492 'CAA': 'Q', 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 493 'CGG': 'R', 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 494 'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 495 'AAC': 'N', 'AAA': 'N', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 496 'AGA': 'S', 'AGG': 'S', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 497 'GTG': 'V', 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 498 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 499 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', }, 500 stop_codons = [ 'TAG', ], 501 start_codons = [ 'ATG', ] 502 ) 503 register_ncbi_table(name = 'Blepharisma Macronuclear', 504 alt_name = None, id = 15, 505 table = { 506 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 507 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 508 'TAG': 'Q', 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 509 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 510 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 511 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 512 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 513 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 514 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 515 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 516 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 517 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 518 'GGA': 'G', 'GGG': 'G', }, 519 stop_codons = [ 'TAA', 'TGA', ], 520 start_codons = [ 'ATG', ] 521 ) 522 register_ncbi_table(name = 'Chlorophycean Mitochondrial', 523 alt_name = None, id = 16, 524 table = { 525 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 526 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 527 'TAG': 'L', 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 528 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 529 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 530 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 531 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 532 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 533 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 534 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 535 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 536 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 537 'GGA': 'G', 'GGG': 'G', }, 538 stop_codons = [ 'TAA', 'TGA', ], 539 start_codons = [ 'ATG', ] 540 ) 541 register_ncbi_table(name = 'Trematode Mitochondrial', 542 alt_name = None, id = 21, 543 table = { 544 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 545 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 546 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L', 547 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 548 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 549 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 550 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T', 551 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 552 'AAA': 'N', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'S', 553 'AGG': 'S', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 554 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 555 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 556 'GGA': 'G', 'GGG': 'G', }, 557 stop_codons = [ 'TAA', 'TAG', ], 558 start_codons = [ 'ATG', 'GTG', ] 559 ) 560 register_ncbi_table(name = 'Scenedesmus obliquus Mitochondrial', 561 alt_name = None, id = 22, 562 table = { 563 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 564 'TCC': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 'TAG': 'L', 565 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 566 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 567 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 568 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I', 569 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T', 570 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 571 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', 572 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 573 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 574 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 575 'GGG': 'G', }, 576 stop_codons = [ 'TCA', 'TAA', 'TGA', ], 577 start_codons = [ 'ATG', ] 578 ) 579 register_ncbi_table(name = 'Thraustochytrium Mitochondrial', 580 alt_name = None, id = 23, 581 table = { 582 'TTT': 'F', 'TTC': 'F', 'TTG': 'L', 'TCT': 'S', 'TCC': 'S', 583 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 'TGT': 'C', 584 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 585 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', 586 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 'CGT': 'R', 587 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I', 'ATC': 'I', 588 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 589 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 'AAG': 'K', 590 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', 'GTT': 'V', 591 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 'GCC': 'A', 592 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 593 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', }, 594 stop_codons = [ 'TTA', 'TAA', 'TAG', 'TGA', ], 595 start_codons = [ 'ATT', 'ATG', 'GTG', ] 596 ) 597 598 599 ######### Deal with ambiguous forward translations 600
601 -class AmbiguousCodonTable(CodonTable):
602 - def __init__(self, codon_table, 603 ambiguous_nucleotide_alphabet, 604 ambiguous_nucleotide_values, 605 ambiguous_protein_alphabet, 606 ambiguous_protein_values):
607 CodonTable.__init__(self, 608 ambiguous_nucleotide_alphabet, 609 ambiguous_protein_alphabet, 610 AmbiguousForwardTable(codon_table.forward_table, 611 ambiguous_nucleotide_values, 612 ambiguous_protein_values), 613 codon_table.back_table, 614 615 # These two are WRONG! I need to get the 616 # list of ambiguous codons which code for 617 # the stop codons XXX 618 list_ambiguous_codons(codon_table.start_codons, ambiguous_nucleotide_values), 619 list_ambiguous_codons(codon_table.stop_codons, ambiguous_nucleotide_values) 620 ) 621 self._codon_table = codon_table
622 623 # Be sneaky and forward attribute lookups to the original table. 624 # This lets us get the names, if the original table is an NCBI 625 # table.
626 - def __getattr__(self, name):
627 return getattr(self._codon_table, name)
628
629 -def list_possible_proteins(codon, forward_table, ambiguous_nucleotide_values):
630 c1, c2, c3 = codon 631 x1 = ambiguous_nucleotide_values[c1] 632 x2 = ambiguous_nucleotide_values[c2] 633 x3 = ambiguous_nucleotide_values[c3] 634 possible = {} 635 stops = [] 636 for y1 in x1: 637 for y2 in x2: 638 for y3 in x3: 639 try: 640 possible[forward_table[y1+y2+y3]] = 1 641 except KeyError: 642 # If tripping over a stop codon 643 stops.append(y1+y2+y3) 644 if stops: 645 if possible.keys(): 646 raise TranslationError("ambiguous codon '%s' codes " % codon \ 647 + "for both proteins and stop codons") 648 # This is a true stop codon - tell the caller about it 649 raise KeyError(codon) 650 return possible.keys()
651
652 -def list_ambiguous_codons(codons, ambiguous_nucleotide_values):
653 """Extends a codon list to include all possible ambigous codons. 654 655 e.g. ['TAG', 'TAA'] -> ['TAG', 'TAA', 'TAR'] 656 ['UAG', 'UGA'] -> ['UAG', 'UGA', 'URA'] 657 658 Note that ['TAG', 'TGA'] -> ['TAG', 'TGA'], this does not add 'TRR'. 659 Thus only two more codons are added in the following: 660 661 e.g. ['TGA', 'TAA', 'TAG'] -> ['TGA', 'TAA', 'TAG', 'TRA', 'TAR'] 662 663 Returns a new (longer) list of codon strings. 664 """ 665 666 #Note ambiguous_nucleotide_values['R'] = 'AG' (etc) 667 #This will generate things like 'TRR' from ['TAG', 'TGA'], which 668 #we don't want to include: 669 c1_list = sorted(letter for (letter, meanings) \ 670 in ambiguous_nucleotide_values.iteritems() \ 671 if set([codon[0] for codon in codons]).issuperset(set(meanings))) 672 c2_list = sorted(letter for (letter, meanings) \ 673 in ambiguous_nucleotide_values.iteritems() \ 674 if set([codon[1] for codon in codons]).issuperset(set(meanings))) 675 c3_list = sorted(letter for (letter, meanings) \ 676 in ambiguous_nucleotide_values.iteritems() \ 677 if set([codon[2] for codon in codons]).issuperset(set(meanings))) 678 #candidates is a list (not a set) to preserve the iteration order 679 candidates = [] 680 for c1 in c1_list: 681 for c2 in c2_list: 682 for c3 in c3_list: 683 codon = c1+c2+c3 684 if codon not in candidates and codon not in codons: 685 candidates.append(codon) 686 answer = codons[:] #copy 687 #print "Have %i new candidates" % len(candidates) 688 for ambig_codon in candidates: 689 wanted = True 690 #e.g. 'TRR' -> 'TAA', 'TAG', 'TGA', 'TGG' 691 for codon in [c1+c2+c3 \ 692 for c1 in ambiguous_nucleotide_values[ambig_codon[0]] \ 693 for c2 in ambiguous_nucleotide_values[ambig_codon[1]] \ 694 for c3 in ambiguous_nucleotide_values[ambig_codon[2]]]: 695 if codon not in codons: 696 #This ambiguous codon can code for a non-stop, exclude it! 697 wanted=False 698 #print "Rejecting %s" % ambig_codon 699 continue 700 if wanted: 701 answer.append(ambig_codon) 702 return answer
703 704 assert list_ambiguous_codons(['TGA', 'TAA'],IUPACData.ambiguous_dna_values) == ['TGA', 'TAA', 'TRA'] 705 assert list_ambiguous_codons(['TAG', 'TGA'],IUPACData.ambiguous_dna_values) == ['TAG', 'TGA'] 706 assert list_ambiguous_codons(['TAG', 'TAA'],IUPACData.ambiguous_dna_values) == ['TAG', 'TAA', 'TAR'] 707 assert list_ambiguous_codons(['UAG', 'UAA'],IUPACData.ambiguous_rna_values) == ['UAG', 'UAA', 'UAR'] 708 assert list_ambiguous_codons(['TGA', 'TAA', 'TAG'],IUPACData.ambiguous_dna_values) == ['TGA', 'TAA', 'TAG', 'TAR', 'TRA'] 709 710 # Forward translation is "onto", that is, any given codon always maps 711 # to the same protein, or it doesn't map at all. Thus, I can build 712 # off of an existing table to produce the ambiguous mappings. 713 # 714 # This handles the general case. Perhaps it's overkill? 715 # >>> t = CodonTable.ambiguous_dna_by_id[1] 716 # >>> t.forward_table["AAT"] 717 # 'N' 718 # >>> t.forward_table["GAT"] 719 # 'D' 720 # >>> t.forward_table["RAT"] 721 # 'B' 722 # >>> t.forward_table["YTA"] 723 # 'L' 724
725 -class AmbiguousForwardTable:
726 - def __init__(self, forward_table, ambiguous_nucleotide, ambiguous_protein):
727 self.forward_table = forward_table 728 729 self.ambiguous_nucleotide = ambiguous_nucleotide 730 self.ambiguous_protein = ambiguous_protein 731 732 inverted = {} 733 for name, val in ambiguous_protein.items(): 734 for c in val: 735 x = inverted.get(c, {}) 736 x[name] = 1 737 inverted[c] = x 738 for name, val in inverted.items(): 739 inverted[name] = val.keys() 740 self._inverted = inverted 741 742 self._cache = {}
743
744 - def get(self, codon, failobj = None):
745 try: 746 return self.__getitem__(codon) 747 except KeyError: 748 return failobj
749
750 - def __getitem__(self, codon):
751 try: 752 x = self._cache[codon] 753 except KeyError: 754 pass 755 else: 756 if x is TranslationError: 757 raise TranslationError(codon) # no unique translation 758 if x is KeyError: 759 raise KeyError(codon) # it's a stop codon 760 return x 761 try: 762 x = self.forward_table[codon] 763 self._cache[codon] = x 764 return x 765 except KeyError: 766 pass 767 768 # XXX Need to make part of this into a method which returns 769 # a list of all possible encodings for a codon! 770 try: 771 possible = list_possible_proteins(codon, 772 self.forward_table, 773 self.ambiguous_nucleotide) 774 except KeyError: 775 self._cache[codon] = KeyError 776 raise KeyError(codon) # stop codon 777 except TranslationError: 778 self._cache[codon] = TranslationError 779 raise TranslationError(codon) # does not code 780 assert len(possible) > 0, "unambiguous codons must code" 781 782 # Hah! Only one possible protein, so use it 783 if len(possible) == 1: 784 self._cache[codon] = possible[0] 785 return possible[0] 786 787 # See if there's an ambiguous protein encoding for the multiples. 788 # Find residues which exist in every coding set. 789 ambiguous_possible = {} 790 for amino in possible: 791 for term in self._inverted[amino]: 792 ambiguous_possible[term] = ambiguous_possible.get(term, 0) + 1 793 794 n = len(possible) 795 possible = [] 796 for amino, val in ambiguous_possible.items(): 797 if val == n: 798 possible.append(amino) 799 800 # No amino acid encoding for the results 801 if len(possible) == 0: 802 self._cache[codon] = TranslationError 803 raise TranslationError(codon) # no valid translation 804 805 # All of these are valid, so choose one 806 # To be unique, sort by smallet ambiguity then alphabetically 807 # Can get this if "X" encodes for everything. 808 def _sort(x, y, table = self.ambiguous_protein): 809 a = cmp(len(table[x]), len(table[y])) 810 if a == 0: 811 return cmp(x, y) 812 return a
813 possible.sort(_sort) 814 815 x = possible[0] 816 self._cache[codon] = x 817 return x
818 819 #Prepare the ambiguous tables for DNA, RNA and Generic (DNA or RNA) 820 ambiguous_dna_by_name = {} 821 for key, val in unambiguous_dna_by_name.items(): 822 ambiguous_dna_by_name[key] = AmbiguousCodonTable(val, 823 IUPAC.ambiguous_dna, 824 IUPACData.ambiguous_dna_values, 825 IUPAC.extended_protein, 826 IUPACData.extended_protein_values) 827 ambiguous_dna_by_id = {} 828 for key, val in unambiguous_dna_by_id.items(): 829 ambiguous_dna_by_id[key] = AmbiguousCodonTable(val, 830 IUPAC.ambiguous_dna, 831 IUPACData.ambiguous_dna_values, 832 IUPAC.extended_protein, 833 IUPACData.extended_protein_values) 834 835 ambiguous_rna_by_name = {} 836 for key, val in unambiguous_rna_by_name.items(): 837 ambiguous_rna_by_name[key] = AmbiguousCodonTable(val, 838 IUPAC.ambiguous_rna, 839 IUPACData.ambiguous_rna_values, 840 IUPAC.extended_protein, 841 IUPACData.extended_protein_values) 842 ambiguous_rna_by_id = {} 843 for key, val in unambiguous_rna_by_id.items(): 844 ambiguous_rna_by_id[key] = AmbiguousCodonTable(val, 845 IUPAC.ambiguous_rna, 846 IUPACData.ambiguous_rna_values, 847 IUPAC.extended_protein, 848 IUPACData.extended_protein_values) 849 850 #The following isn't very elegant, but seems to work nicely. 851 _merged_values = dict(IUPACData.ambiguous_rna_values.iteritems()) 852 _merged_values["T"] = "U" 853 854 for key, val in generic_by_name.items(): 855 ambiguous_generic_by_name[key] = AmbiguousCodonTable(val, 856 Alphabet.NucleotideAlphabet(), 857 _merged_values, 858 IUPAC.extended_protein, 859 IUPACData.extended_protein_values) 860 861 for key, val in generic_by_id.items(): 862 ambiguous_generic_by_id[key] = AmbiguousCodonTable(val, 863 Alphabet.NucleotideAlphabet(), 864 _merged_values, 865 IUPAC.extended_protein, 866 IUPACData.extended_protein_values) 867 del _merged_values 868 del key, val 869 870 #Basic sanity test, 871 for n in ambiguous_generic_by_id.keys(): 872 assert ambiguous_rna_by_id[n].forward_table["GUU"] == "V" 873 assert ambiguous_rna_by_id[n].forward_table["GUN"] == "V" 874 if n != 23 : 875 #For table 23, UUN = F, L or stop. 876 assert ambiguous_rna_by_id[n].forward_table["UUN"] == "X" #F or L 877 #R = A or G, so URR = UAA or UGA / TRA = TAA or TGA = stop codons 878 if "UAA" in unambiguous_rna_by_id[n].stop_codons \ 879 and "UGA" in unambiguous_rna_by_id[n].stop_codons: 880 try: 881 print ambiguous_dna_by_id[n].forward_table["TRA"] 882 assert False, "Should be a stop only" 883 except KeyError: 884 pass 885 assert "URA" in ambiguous_generic_by_id[n].stop_codons 886 assert "URA" in ambiguous_rna_by_id[n].stop_codons 887 assert "TRA" in ambiguous_generic_by_id[n].stop_codons 888 assert "TRA" in ambiguous_dna_by_id[n].stop_codons 889 del n 890 assert ambiguous_generic_by_id[1].stop_codons == ambiguous_generic_by_name["Standard"].stop_codons 891 assert ambiguous_generic_by_id[4].stop_codons == ambiguous_generic_by_name["SGC3"].stop_codons 892 assert ambiguous_generic_by_id[11].stop_codons == ambiguous_generic_by_name["Bacterial"].stop_codons 893 assert ambiguous_generic_by_id[11].stop_codons == ambiguous_generic_by_name["Plant Plastid"].stop_codons 894 assert ambiguous_generic_by_id[15].stop_codons == ambiguous_generic_by_name['Blepharisma Macronuclear'].stop_codons 895