1 """Utilities for working with FASTA-formatted sequences (DEPRECATED).
2
3 Classes:
4 Record Holds FASTA sequence data.
5 Iterator Iterates over sequence data in a FASTA file.
6 RecordParser Parses FASTA sequence data into a Record object.
7 SequenceParser Parses FASTA sequence data into a SeqRecord object.
8
9 For a long time this module was the most commonly used and best documented
10 FASTA parser in Biopython. However, we now recommend using Bio.SeqIO instead.
11 After being declared obsolete, Bio.Fasta has now been officially deprecated
12 (with a warning message when imported) and will be removed in a future
13 release.
14
15 If you are already using Bio.Fasta with the SequenceParser to get SeqRecord
16 objects, then you should be able to switch to the more recent Bio.SeqIO module
17 very easily as that too uses SeqRecord objects. For example,
18
19 from Bio import Fasta
20 handle = open("example.fas")
21 for seq_record in Fasta.Iterator(handle, Fasta.SequenceParser()):
22 print seq_record.description
23 print seq_record.seq
24 handle.close()
25
26 Using Bio.SeqIO instead this becomes:
27
28 from Bio import SeqIO
29 handle = open("example.fas")
30 for seq_record in SeqIO.parse(handle, "fasta"):
31 print seq_record.description
32 print seq_record.seq
33 handle.close()
34
35 Converting an existing code which uses the RecordParser is a little more
36 complicated as the Bio.Fasta.Record object differs from the SeqRecord.
37
38 from Bio import Fasta
39 handle = open("example.fas")
40 for record in Fasta.Iterator(handle, Fasta.RecordParser()):
41 #record is a Bio.Fasta.Record object
42 print record.title #The full title line as a string
43 print record.sequence #The sequence as a string
44 handle.close()
45
46 Using Bio.SeqIO instead this becomes:
47
48 from Bio import SeqIO
49 handle = open("example.fas")
50 for seq_record in SeqIO.parse(handle, "fasta"):
51 print seq_record.description #The full title line as a string
52 print str(seq_record.seq) #The sequence as a string
53 handle.close()
54
55 Very old code may have used Bio.Fasta.index_file and Dictionary, which were
56 deprecated in Biopython 1.44 and removed in Biopython 1.46. These allowed
57 indexing of a FASTA file and access to the records with a dictionary like
58 interface. Currently using Bio.SeqIO.to_dict to create an in memory dictionary
59 of SeqRecord objects is the best replacement, but for very large files
60 additional indexing support for Bio.SeqIO is being considered.
61 """
62 from Bio import Seq
63 from Bio import SeqRecord
64 from Bio import Alphabet
65
66 import warnings
67 warnings.warn('Bio.Fasta is deprecated. Please use the "fasta" support in '
68 'Bio.SeqIO (or Bio.AlignIO) instead.', DeprecationWarning)
69
71 """Holds information from a FASTA record.
72
73 Members:
74 title Title line ('>' character not included).
75 sequence The sequence.
76
77 """
79 """__init__(self, colwidth=60)
80
81 Create a new Record. colwidth specifies the number of residues
82 to put on each line when generating FASTA format.
83
84 """
85 self.title = ''
86 self.sequence = ''
87 self._colwidth = colwidth
88
99
101 """Returns one record at a time from a FASTA file.
102 """
103 - def __init__(self, handle, parser = None, debug = 0):
104 """Initialize a new iterator.
105 """
106 self.handle = handle
107 self._parser = parser
108 self._debug = debug
109
110
111 while True:
112 line = handle.readline()
113 if not line or line[0] == ">":
114 break
115 if debug : print "Skipping: " + line
116 self._lookahead = line
117
119 return iter(self.next, None)
120
122 """Return the next record in the file"""
123 line = self._lookahead
124 if not line:
125 return None
126 assert line[0]==">", line
127 lines = [line.rstrip()]
128 line = self.handle.readline()
129 while line:
130 if line[0] == ">": break
131 if line[0] == "#":
132 if self._debug : print "Ignoring comment line"
133 pass
134 else:
135 lines.append(line.rstrip())
136 line = self.handle.readline()
137 self._lookahead = line
138 if self._debug : print "Debug: '%s' and '%s'" % (title, "".join(lines))
139 if self._parser is None:
140 return "\n".join(lines)
141 else:
142 return self._parser.parse_string("\n".join(lines))
143
145 """Parses FASTA sequence data into a Fasta.Record object.
146 """
149
160
161 - def parse(self, handle):
163
165 """Parses FASTA sequence data into a SeqRecord object.
166 """
169 """Initialize a Scanner and Sequence Consumer.
170
171 Arguments:
172 o alphabet - The alphabet of the sequences to be parsed. If not
173 passed, this will be set as generic_alphabet.
174 o title2ids - A function that, when given the title of the FASTA
175 file (without the beginning >), will return the id, name and
176 description (in that order) for the record. If this is not given,
177 then the entire title line will be used as the description.
178 """
179 self.alphabet = alphabet
180 self.title2ids = title2ids
181
201
202 - def parse(self, handle):
204