1
2
3
4
5
6 """
7 This module provides code to work with GenePop.
8
9 See http://wbiomed.curtin.edu.au/genepop/ , the format is documented
10 here: http://wbiomed.curtin.edu.au/genepop/help_input.html .
11
12 Classes:
13 Record Holds GenePop data.
14 RecordParser Parses a GenePop record (file) into a Record object.
15
16 _Scanner Scans a GenePop record.
17 _RecordConsumer Consumes GenePop data to a Record object.
18
19 Partially inspired on MedLine Code.
20
21 """
22 from copy import deepcopy
23 from types import *
24
25 from Bio import File
26 from Bio.ParserSupport import *
27
28
30 """Holds information from a GenePop record.
31
32 Members:
33 marker_len The marker length (2 or 3 digit code per allele).
34
35 comment_line Comment line.
36
37 loci_list List of loci names.
38
39 pop_list List of population names.
40
41 populations List of population data.
42
43 In most genepop files, the population name is not trustable.
44 It is strongly recommended that populations are referred by index.
45
46 populations has one element per population. Each element is itself
47 a list of individuals, each individual is a pair composed by individual
48 name and a list of alleles (2 per marker): Example
49 [
50 [
51 ('Ind1', [(1,2), (3,3), (200,201)],
52 ('Ind2', [(2,None), (3,3), (None,None)],
53 ],
54 [
55 ('Other1', [(1,1), (4,3), (200,200)],
56 ]
57 ]
58
59
60 """
62 self.marker_len = 0
63 self.comment_line = ""
64 self.loci_list = []
65 self.pop_list = []
66 self.populations = []
67
69 """Returns (reconstructs) a GenePop textual representation.
70 """
71 rep = [self.comment_line + '\n']
72 rep.append('\n'.join(self.loci_list) + '\n')
73 for pop in self.populations:
74 rep.append('Pop\n')
75 for indiv in pop:
76 name, markers = indiv
77 rep.append(name)
78 rep.append(',')
79 for marker in markers:
80 rep.append(' ')
81 for al in marker:
82 if al == None:
83 al = '0'
84 aStr = str(al)
85 while len(aStr)<self.marker_len:
86 aStr = "".join(['0', aStr])
87 rep.append(aStr)
88 rep.append('\n')
89 return "".join(rep)
90
92 """Splits a GP record in a dictionary with 1 pop per entry.
93
94 Given a record with n pops and m loci returns a dictionary
95 of records (key pop_name) where each item is a record
96 with a single pop and m loci.
97
98 Parameters:
99 pop_names - Population names
100 """
101 gp_pops = {}
102 for i in range(len(self.populations)):
103 gp_pop = GenePop.Record()
104 gp_pop.marker_len = self.marker_len
105 gp_pop.comment_line = self.comment_line
106 gp_pop.loci_list = deepcopy(self.loci_list)
107 gp_pop.populations = [deepcopy(self.populations[i])]
108 gp_pops[pop_names[i]] = gp_pop
109 return gp_pops
110
112 """Splits a GP record in a dictionary with 1 locus per entry.
113
114 Given a record with n pops and m loci returns a dictionary
115 of records (key locus name) where each item is a record
116 with a single locus and n pops.
117 """
118 gp_loci = {}
119 for i in range(len(self.loci_list)):
120 gp_pop = GenePop.Record()
121 gp_pop.marker_len = self.marker_len
122 gp_pop.comment_line = self.comment_line
123 gp_pop.loci_list = [self.loci_list[i]]
124 gp_pop.populations = []
125 for pop in self.populations:
126 my_pop = []
127 for indiv in pop:
128 my_pop.append((indiv[0], [indiv[1][i]]))
129 gp_pop.populations.append(my_pop)
130 gp_loci[gp_pop.loci_list[0]] = gp_pop
131 return gp_loci
132
133
135 """Removes a population (by position).
136 """
137 del self.populations[pos]
138
140 """Removes a locus by position.
141 """
142 del self.loci_list[pos]
143 for pop in self.populations:
144 for indiv in pop:
145 name, loci = indiv
146 del loci[pos]
147
155
156
157
158
160 """Parses GenePop data into a Record object.
161
162 """
166
167 - def parse(self, handle):
168 self._scanner.feed(handle, self._consumer)
169 return self._consumer.data
170
172 """Parses a handle containing a GenePop file.
173 """
174 parser = RecordParser()
175 return parser.parse(handle)
176
178 """Scans a GenePop record.
179
180 There is only one record per file.
181
182 """
183
184 - def feed(self, handle, consumer):
185 """feed(self, handle, consumer)
186
187 Feed in a GenePop unit record for scanning. handle is a file-like
188 object that contains a Genepop record. consumer is a
189 Consumer object that will receive events as the report is scanned.
190
191 """
192 if isinstance(handle, File.UndoHandle):
193 uhandle = handle
194 else:
195 uhandle = File.UndoHandle(handle)
196
197
198 consumer.start_record()
199
200 comment_line = uhandle.readline().rstrip()
201 consumer.comment(comment_line)
202
203
204
205
206 sample_loci_line = uhandle.readline().rstrip().replace(',', '')
207 all_loci = sample_loci_line.split(' ')
208 if len(all_loci)>1:
209 for locus in all_loci:
210 consumer.loci_name(locus)
211 else:
212 consumer.loci_name(sample_loci_line)
213 next_line = uhandle.readline().rstrip()
214 while next_line.upper()!='POP':
215 if next_line == '':
216 raise ValueError('No population data found, file probably not GenePop related')
217 consumer.loci_name(next_line)
218 next_line = uhandle.readline().rstrip()
219 consumer.start_pop()
220 first_individual = True
221 line = uhandle.readline().rstrip()
222 while line!='':
223 if line.upper()=='POP':
224 consumer.start_pop()
225 else:
226 (indiv_name, marker_line) = line.split(',')
227 markers = marker_line.replace('\t', ' ').split(' ')
228 for i in range(len(markers), 0, -1):
229 if markers[i-1] == '':
230 del(markers[i-1])
231 if first_individual:
232 first_individual = False
233 if len(markers[0]) == 4:
234 marker_len = 2
235 else:
236 marker_len = 3
237 consumer.marker_len(marker_len)
238 allele_list = []
239 for marker in markers:
240 allele_list.append((
241 int(marker[0:marker_len]),
242 int(marker[marker_len:])
243 ))
244 consumer.individual(indiv_name, allele_list)
245 line = uhandle.readline().rstrip()
246 consumer.end_record()
247
249 """Consumer that converts a GenePop record to a Record object.
250
251 Members:
252 data Record with GenePop data.
253
254 """
257
260
262 pops = self.data.populations
263 loci = self.data.loci_list
264 for pop_i in range(len(pops)):
265 self.data.pop_list.append(pops[pop_i][-1][0])
266 for indiv_i in range(len(pops[pop_i])):
267 for mk_i in range(len(loci)):
268 mk_orig = pops[pop_i][indiv_i][1][mk_i]
269 mk_real = []
270 for al in mk_orig:
271 if al == 0:
272 mk_real.append(None)
273 else:
274 mk_real.append(al)
275 pops[pop_i][indiv_i][1][mk_i] = tuple(mk_real)
276
279
282
285
287 self.current_pop = []
288 self.data.populations.append(self.current_pop)
289
291 self.current_pop.append((indiv_name, allele_list))
292