1
2
3
4
5
6
7 import warnings
8 import numpy
9
10
11 from StructureBuilder import StructureBuilder
12 from PDBExceptions import PDBConstructionException, PDBConstructionWarning
13 from parse_pdb_header import _parse_pdb_header_list
14
15 __doc__="Parser for PDB files."
16
17
18
19
20
22 """
23 Parse a PDB file and return a Structure object.
24 """
25
26 - def __init__(self, PERMISSIVE=1, get_header=0, structure_builder=None):
27 """
28 The PDB parser call a number of standard methods in an aggregated
29 StructureBuilder object. Normally this object is instanciated by the
30 PDBParser object itself, but if the user provides his own StructureBuilder
31 object, the latter is used instead.
32
33 Arguments:
34 o PERMISSIVE - int, if this is 0 exceptions in constructing the
35 SMCRA data structure are fatal. If 1 (DEFAULT), the exceptions are
36 caught, but some residues or atoms will be missing. THESE EXCEPTIONS
37 ARE DUE TO PROBLEMS IN THE PDB FILE!.
38 o structure_builder - an optional user implemented StructureBuilder class.
39 """
40 if structure_builder!=None:
41 self.structure_builder=structure_builder
42 else:
43 self.structure_builder=StructureBuilder()
44 self.header=None
45 self.trailer=None
46 self.line_counter=0
47 self.PERMISSIVE=PERMISSIVE
48
49
50
52 """Return the structure.
53
54 Arguments:
55 o id - string, the id that will be used for the structure
56 o file - name of the PDB file OR an open filehandle
57 """
58 self.header=None
59 self.trailer=None
60
61 self.structure_builder.init_structure(id)
62 if isinstance(file, basestring):
63 file=open(file)
64 self._parse(file.readlines())
65 self.structure_builder.set_header(self.header)
66
67 return self.structure_builder.get_structure()
68
70 "Return the header."
71 return self.header
72
74 "Return the trailer."
75 return self.trailer
76
77
78
79 - def _parse(self, header_coords_trailer):
85
87 "Get the header of the PDB file, return the rest."
88 structure_builder=self.structure_builder
89 for i in range(0, len(header_coords_trailer)):
90 structure_builder.set_line_counter(i+1)
91 line=header_coords_trailer[i]
92 record_type=line[0:6]
93 if(record_type=='ATOM ' or record_type=='HETATM' or record_type=='MODEL '):
94 break
95 header=header_coords_trailer[0:i]
96
97 self.line_counter=i
98 coords_trailer=header_coords_trailer[i:]
99 header_dict=_parse_pdb_header_list(header)
100 return header_dict, coords_trailer
101
103 "Parse the atomic data in the PDB file."
104 local_line_counter=0
105 structure_builder=self.structure_builder
106 current_model_id=0
107
108 model_open=0
109 current_chain_id=None
110 current_segid=None
111 current_residue_id=None
112 current_resname=None
113 for i in range(0, len(coords_trailer)):
114 line=coords_trailer[i]
115 record_type=line[0:6]
116 global_line_counter=self.line_counter+local_line_counter+1
117 structure_builder.set_line_counter(global_line_counter)
118 if(record_type=='ATOM ' or record_type=='HETATM'):
119
120 if not model_open:
121 structure_builder.init_model(current_model_id)
122 current_model_id+=1
123 model_open=1
124 fullname=line[12:16]
125
126 split_list=fullname.split()
127 if len(split_list)!=1:
128
129
130 name=fullname
131 else:
132
133 name=split_list[0]
134 altloc=line[16:17]
135 resname=line[17:20]
136 chainid=line[21:22]
137 try:
138 serial_number=int(line[6:11])
139 except:
140 serial_number=0
141 resseq=int(line[22:26].split()[0])
142 icode=line[26:27]
143 if record_type=='HETATM':
144 if resname=="HOH" or resname=="WAT":
145 hetero_flag="W"
146 else:
147 hetero_flag="H"
148 else:
149 hetero_flag=" "
150 residue_id=(hetero_flag, resseq, icode)
151
152 try:
153 x=float(line[30:38])
154 y=float(line[38:46])
155 z=float(line[46:54])
156 except:
157
158
159 raise PDBContructionError("Invalid or missing coordinate(s) at line %i." \
160 % global_line_counter)
161 coord=numpy.array((x, y, z), 'f')
162
163 try:
164 occupancy=float(line[54:60])
165 except:
166 self._handle_PDB_exception("Invalid or missing occupancy",
167 global_line_counter)
168 occupancy = 0.0
169 try:
170 bfactor=float(line[60:66])
171 except:
172 self._handle_PDB_exception("Invalid or missing B factor",
173 global_line_counter)
174 bfactor = 0.0
175 segid=line[72:76]
176 element=line[76:78].strip()
177 if current_segid!=segid:
178 current_segid=segid
179 structure_builder.init_seg(current_segid)
180 if current_chain_id!=chainid:
181 current_chain_id=chainid
182 structure_builder.init_chain(current_chain_id)
183 current_residue_id=residue_id
184 current_resname=resname
185 try:
186 structure_builder.init_residue(resname, hetero_flag, resseq, icode)
187 except PDBConstructionException, message:
188 self._handle_PDB_exception(message, global_line_counter)
189 elif current_residue_id!=residue_id or current_resname!=resname:
190 current_residue_id=residue_id
191 current_resname=resname
192 try:
193 structure_builder.init_residue(resname, hetero_flag, resseq, icode)
194 except PDBConstructionException, message:
195 self._handle_PDB_exception(message, global_line_counter)
196
197 try:
198 structure_builder.init_atom(name, coord, bfactor, occupancy, altloc,
199 fullname, serial_number, element)
200 except PDBConstructionException, message:
201 self._handle_PDB_exception(message, global_line_counter)
202 elif(record_type=='ANISOU'):
203 anisou=map(float, (line[28:35], line[35:42], line[43:49], line[49:56], line[56:63], line[63:70]))
204
205 anisou_array=(numpy.array(anisou, 'f')/10000.0).astype('f')
206 structure_builder.set_anisou(anisou_array)
207 elif(record_type=='MODEL '):
208 structure_builder.init_model(current_model_id)
209 current_model_id+=1
210 model_open=1
211 current_chain_id=None
212 current_residue_id=None
213 elif(record_type=='END ' or record_type=='CONECT'):
214
215 self.line_counter=self.line_counter+local_line_counter
216 return coords_trailer[local_line_counter:]
217 elif(record_type=='ENDMDL'):
218 model_open=0
219 current_chain_id=None
220 current_residue_id=None
221 elif(record_type=='SIGUIJ'):
222
223 siguij=map(float, (line[28:35], line[35:42], line[42:49], line[49:56], line[56:63], line[63:70]))
224
225 siguij_array=(numpy.array(siguij, 'f')/10000.0).astype('f')
226 structure_builder.set_siguij(siguij_array)
227 elif(record_type=='SIGATM'):
228
229 sigatm=map(float, (line[30:38], line[38:45], line[46:54], line[54:60], line[60:66]))
230 sigatm_array=numpy.array(sigatm, 'f')
231 structure_builder.set_sigatm(sigatm_array)
232 local_line_counter=local_line_counter+1
233
234 self.line_counter=self.line_counter+local_line_counter
235 return []
236
238 """
239 This method catches an exception that occurs in the StructureBuilder
240 object (if PERMISSIVE==1), or raises it again, this time adding the
241 PDB line number to the error message.
242 """
243 message="%s at line %i." % (message, line_counter)
244 if self.PERMISSIVE:
245
246 warnings.warn("PDBConstructionException: %s\n"
247 "Exception ignored.\n"
248 "Some atoms or residues may be missing in the data structure."
249 % message, PDBConstructionWarning)
250 else:
251
252 raise PDBConstructionException(message)
253
254
255 if __name__=="__main__":
256
257 import sys
258
259 p=PDBParser(PERMISSIVE=1)
260
261 filename = sys.argv[1]
262 s=p.get_structure("scr", filename)
263
264 for m in s:
265 p=m.get_parent()
266 assert(p is s)
267 for c in m:
268 p=c.get_parent()
269 assert(p is m)
270 for r in c:
271 print r
272 p=r.get_parent()
273 assert(p is c)
274 for a in r:
275 p=a.get_parent()
276 if not p is r:
277 print p, r
278