1
2
3
4
5
6 from types import StringType
7
8 from Bio.Alphabet import ProteinAlphabet
9 from Bio.Seq import Seq
10 from Bio.SCOP.Raf import to_one_letter_code
11 from Bio.PDB.PDBExceptions import PDBException
12 from Bio.PDB.Residue import Residue, DisorderedResidue
13 from Vector import calc_dihedral, calc_angle
14
15 __doc__="""
16 Polypeptide related classes (construction and representation).
17
18 Example:
19
20 >>> ppb=PPBuilder()
21 >>> for pp in ppb.build_peptides(structure):
22 >>> print pp.get_sequence()
23 """
24
25 standard_aa_names=["ALA", "CYS", "ASP", "GLU", "PHE", "GLY", "HIS", "ILE", "LYS",
26 "LEU", "MET", "ASN", "PRO", "GLN", "ARG", "SER", "THR", "VAL",
27 "TRP", "TYR"]
28
29
30 aa1="ACDEFGHIKLMNPQRSTVWY"
31 aa3=standard_aa_names
32
33 d1_to_index={}
34 dindex_to_1={}
35 d3_to_index={}
36 dindex_to_3={}
37
38
39 for i in range(0, 20):
40 n1=aa1[i]
41 n3=aa3[i]
42 d1_to_index[n1]=i
43 dindex_to_1[i]=n1
44 d3_to_index[n3]=i
45 dindex_to_3[i]=n3
46
48 """
49 Index to corresponding one letter amino acid name.
50 For example: 0 to A.
51 """
52 return dindex_to_1[index]
53
55 """
56 One letter code to index.
57 For example: A to 0.
58 """
59 return d1_to_index[s]
60
62 """
63 Index to corresponding three letter amino acid name.
64 For example: 0 to ALA.
65 """
66 return dindex_to_3[i]
67
69 """
70 Three letter code to index.
71 For example: ALA to 0.
72 """
73 return d3_to_index[s]
74
76 """
77 Three letter code to one letter code.
78 For example: ALA to A.
79 """
80 i=d3_to_index[s]
81 return dindex_to_1[i]
82
84 """
85 One letter code to three letter code.
86 For example: A to ALA.
87 """
88 i=d1_to_index[s]
89 return dindex_to_3[i]
90
91 -def is_aa(residue, standard=0):
92 """
93 Return 1 if residue object/string is an amino acid.
94
95 @param residue: a L{Residue} object OR a three letter amino acid code
96 @type residue: L{Residue} or string
97
98 @param standard: flag to check for the 20 AA (default false)
99 @type standard: boolean
100 """
101 if not type(residue)==StringType:
102 residue=residue.get_resname()
103 residue=residue.upper()
104 if standard:
105 return d3_to_index.has_key(residue)
106 else:
107 return to_one_letter_code.has_key(residue)
108
109
111 """
112 A polypeptide is simply a list of L{Residue} objects.
113 """
115 """
116 @return: the list of C-alpha atoms
117 @rtype: [L{Atom}, L{Atom}, ...]
118 """
119 ca_list=[]
120 for res in self:
121 ca=res["CA"]
122 ca_list.append(ca)
123 return ca_list
124
126 """
127 Return the list of phi/psi dihedral angles
128 """
129 ppl=[]
130 lng=len(self)
131 for i in range(0, lng):
132 res=self[i]
133 try:
134 n=res['N'].get_vector()
135 ca=res['CA'].get_vector()
136 c=res['C'].get_vector()
137 except:
138
139
140 ppl.append((None, None))
141 res.xtra["PHI"]=None
142 res.xtra["PSI"]=None
143 continue
144
145 if i>0:
146 rp=self[i-1]
147 try:
148 cp=rp['C'].get_vector()
149 phi=calc_dihedral(cp, n, ca, c)
150 except:
151 phi=None
152 else:
153
154 phi=None
155
156 if i<(lng-1):
157 rn=self[i+1]
158 try:
159 nn=rn['N'].get_vector()
160 psi=calc_dihedral(n, ca, c, nn)
161 except:
162 psi=None
163 else:
164
165 psi=None
166 ppl.append((phi, psi))
167
168 res.xtra["PHI"]=phi
169 res.xtra["PSI"]=psi
170 return ppl
171
173 """
174 Return list of tau torsions angles for all 4 consecutive
175 Calpha atoms.
176 """
177 ca_list=self.get_ca_list()
178 tau_list=[]
179 for i in range(0, len(ca_list)-3):
180 atom_list=[ca_list[i], ca_list[i+1], ca_list[i+2], ca_list[i+3]]
181 vector_list=map(lambda a: a.get_vector(), atom_list)
182 v1, v2, v3, v4=vector_list
183 tau=calc_dihedral(v1, v2, v3, v4)
184 tau_list.append(tau)
185
186 res=ca_list[i+2].get_parent()
187 res.xtra["TAU"]=tau
188 return tau_list
189
191 """
192 Return list of theta angles for all 3 consecutive
193 Calpha atoms.
194 """
195 theta_list=[]
196 ca_list=self.get_ca_list()
197 for i in range(0, len(ca_list)-2):
198 atom_list=[ca_list[i], ca_list[i+1], ca_list[i+2]]
199 vector_list=map(lambda a: a.get_vector(), atom_list)
200 v1, v2, v3=vector_list
201 theta=calc_angle(v1, v2, v3)
202 theta_list.append(theta)
203
204 res=ca_list[i+1].get_parent()
205 res.xtra["THETA"]=theta
206 return theta_list
207
225
227 """
228 Return <Polypeptide start=START end=END>, where START
229 and END are sequence identifiers of the outer residues.
230 """
231 start=self[0].get_id()[1]
232 end=self[-1].get_id()[1]
233 s="<Polypeptide start=%s end=%s>" % (start, end)
234 return s
235
237 """
238 Base class to extract polypeptides.
239 It checks if two consecutive residues in a chain
240 are connected. The connectivity test is implemented by a
241 subclass.
242 """
244 """
245 @param radius: distance
246 @type radius: float
247 """
248 self.radius=radius
249
251 "Check if the residue is an amino acid."
252 if is_aa(residue):
253 return 1
254 else:
255 if "CA" in residue.child_dict:
256
257
258
259 import warnings
260 warnings.warn("Assuming residue %s is an unknown modified "
261 "amino acid" % residue.get_resname())
262 return 1
263
264 return 0
265
267 """
268 Build and return a list of Polypeptide objects.
269
270 @param entity: polypeptides are searched for in this object
271 @type entity: L{Structure}, L{Model} or L{Chain}
272
273 @param aa_only: if 1, the residue needs to be a standard AA
274 @type aa_only: int
275 """
276 is_connected=self._is_connected
277 accept=self._accept
278 level=entity.get_level()
279
280 if level=="S":
281 model=entity[0]
282 chain_list=model.get_list()
283 elif level=="M":
284 chain_list=entity.get_list()
285 elif level=="C":
286 chain_list=[entity]
287 else:
288 raise PDBException("Entity should be Structure, Model or Chain.")
289 pp_list=[]
290 for chain in chain_list:
291 chain_it=iter(chain)
292 prev=chain_it.next()
293 pp=None
294 for next in chain_it:
295 if aa_only and not accept(prev):
296 prev=next
297 continue
298 if is_connected(prev, next):
299 if pp is None:
300 pp=Polypeptide()
301 pp.append(prev)
302 pp_list.append(pp)
303 pp.append(next)
304 else:
305 pp=None
306 prev=next
307 return pp_list
308
309
311 """
312 Use CA--CA distance to find polypeptides.
313 """
316
337
338
340 """
341 Use C--N distance to find polypeptides.
342 """
345
380
382 "Return 1 if distance between atoms<radius"
383 if (c-n)<self.radius:
384 return 1
385 else:
386 return 0
387
388
389 if __name__=="__main__":
390
391 import sys
392
393 from Bio.PDB.PDBParser import PDBParser
394
395 p=PDBParser(PERMISSIVE=1)
396
397 s=p.get_structure("scr", sys.argv[1])
398
399 ppb=PPBuilder()
400
401 print "C-N"
402 for pp in ppb.build_peptides(s):
403 print pp.get_sequence()
404 for pp in ppb.build_peptides(s[0]):
405 print pp.get_sequence()
406 for pp in ppb.build_peptides(s[0]["A"]):
407 print pp.get_sequence()
408
409 for pp in ppb.build_peptides(s):
410 for phi, psi in pp.get_phi_psi_list():
411 print phi, psi
412
413 ppb=CaPPBuilder()
414
415 print "CA-CA"
416 for pp in ppb.build_peptides(s):
417 print pp.get_sequence()
418 for pp in ppb.build_peptides(s[0]):
419 print pp.get_sequence()
420 for pp in ppb.build_peptides(s[0]["A"]):
421 print pp.get_sequence()
422