Package Bio :: Package PopGen :: Package GenePop :: Module Controller
[hide private]
[frames] | no frames]

Source Code for Module Bio.PopGen.GenePop.Controller

  1  # Copyright 2009 by Tiago Antao <tiagoantao@gmail.com>.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6   
  7   
  8  """ 
  9  This module allows to control GenePop. 
 10   
 11  """ 
 12   
 13  import os 
 14  import re 
 15  import shutil 
 16  import tempfile 
 17   
 18  from Bio.Application import AbstractCommandline, _Argument, _Option, generic_run 
 19   
20 -def _gp_float(tok):
21 """Gets a float from a token, if it fails, returns the string. 22 """ 23 try: 24 return float(tok) 25 except ValueError: 26 return str(tok)
27
28 -def _gp_int(tok):
29 """Gets a int from a token, if it fails, returns the string. 30 """ 31 try: 32 return int(tok) 33 except ValueError: 34 return str(tok)
35 36
37 -def _read_allele_freq_table(f):
38 l = f.readline() 39 while l.find(" --")==-1: 40 if l == "": 41 self.done = True 42 raise StopIteration 43 l = f.readline() 44 alleles = filter(lambda x: x != '', f.readline().rstrip().split(" ")) 45 alleles = map(lambda x: _gp_int(x), alleles) 46 l = f.readline().rstrip() 47 table = [] 48 while l != "": 49 line = filter(lambda x: x != '', l.split(" ")) 50 try: 51 table.append( 52 (line[0], 53 map(lambda x: _gp_float(x), line[1:-1]), 54 _gp_int(line[-1]))) 55 except ValueError: 56 table.append( 57 (line[0], 58 [None] * len(alleles), 59 0)) 60 l = f.readline().rstrip() 61 return alleles, table
62
63 -def _read_table(f, funs):
64 table = [] 65 l = f.readline().rstrip() 66 while l.find("---")==-1: 67 l = f.readline().rstrip() 68 l = f.readline().rstrip() 69 while l.find("===")==-1 and l.find("---")==-1 and l != "": 70 toks = filter(lambda x: x != "", l.split(" ")) 71 line = [] 72 for i in range(len(toks)): 73 try: 74 line.append(funs[i](toks[i])) 75 except ValueError: 76 line.append(toks[i]) # Could not cast 77 table.append(tuple(line)) 78 l = f.readline().rstrip() 79 return table
80
81 -def _read_triangle_matrix(f):
82 matrix = [] 83 l = f.readline().rstrip() 84 while l != "": 85 matrix.append( 86 map(lambda x: _gp_float(x), 87 filter(lambda y: y != "", l.split(" ")))) 88 l = f.readline().rstrip() 89 return matrix
90
91 -def _read_headed_triangle_matrix(f):
92 matrix = {} 93 header = f.readline().rstrip() 94 if header.find("---")>-1 or header.find("===")>-1: 95 header = f.readline().rstrip() 96 nlines = len(filter(lambda x:x != '', header.split(' '))) - 1 97 for line_pop in range(nlines): 98 l = f.readline().rstrip() 99 vals = filter(lambda x:x != '', l.split(' ')[1:]) 100 clean_vals = [] 101 for val in vals: 102 try: 103 clean_vals.append(_gp_float(val)) 104 except ValueError: 105 clean_vals.append(None) 106 for col_pop in range(len(clean_vals)): 107 matrix[(line_pop+1, col_pop)] = clean_vals[col_pop] 108 return matrix
109
110 -def _hw_func(stream, is_locus, has_fisher = False):
111 l = stream.readline() 112 if is_locus: 113 hook = "Locus " 114 else: 115 hook = " Pop : " 116 while l != "": 117 if l.startswith(hook): 118 stream.readline() 119 stream.readline() 120 stream.readline() 121 table = _read_table(stream,[str,_gp_float,_gp_float,_gp_float,_gp_float,_gp_int,str]) 122 #loci might mean pop if hook="Locus " 123 loci = {} 124 for entry in table: 125 if len(entry) < 3: 126 loci[entry[0]] = None 127 else: 128 locus, p, se, fis_wc, fis_rh, steps = entry[:-1] 129 if se == "-": se = None 130 loci[locus] = p, se, fis_wc, fis_rh, steps 131 return loci 132 l = stream.readline() 133 self.done = True 134 raise StopIteration 135
136 -class _FileIterator:
137 """Iterator which crawls over a stream of lines with a function. 138 139 The generator function is expected to yield a tuple, while 140 consuming input 141 """
142 - def __init__(self, func, stream, fname):
143 self.func = func 144 self.stream = stream 145 self.fname = fname 146 self.done = False
147
148 - def __iter__(self):
149 if self.done: 150 self.done = True 151 raise StopIteration 152 return self
153
154 - def next(self):
155 return self.func(self)
156
157 - def __del__(self):
158 self.stream.close() 159 os.remove(self.fname)
160
161 -class _GenePopCommandline(AbstractCommandline):
162 """ Command Line Wrapper for GenePop. 163 """
164 - def __init__(self, genepop_dir=None, cmd='Genepop', **kwargs):
165 self.parameters = [ 166 _Argument(["command"], 167 ["INTEGER(.INTEGER)*"], 168 None, 169 True, 170 "GenePop option to be called"), 171 _Argument(["mode"], 172 ["Dont touch this"], 173 None, 174 True, 175 "Should allways be batch"), 176 _Argument(["input"], 177 ["input"], 178 None, 179 True, 180 "Input file"), 181 _Argument(["Dememorization"], 182 ["input"], 183 None, 184 False, 185 "Dememorization step"), 186 _Argument(["BatchNumber"], 187 ["input"], 188 None, 189 False, 190 "Number of MCMC batches"), 191 _Argument(["BatchLength"], 192 ["input"], 193 None, 194 False, 195 "Length of MCMC chains"), 196 _Argument(["HWtests"], 197 ["input"], 198 None, 199 False, 200 "Enumeration or MCMC"), 201 _Argument(["IsolBDstatistic"], 202 ["input"], 203 None, 204 False, 205 "IBD statistic (a or e)"), 206 _Argument(["MinimalDistance"], 207 ["input"], 208 None, 209 False, 210 "Minimal IBD distance"), 211 _Argument(["GeographicScale"], 212 ["input"], 213 None, 214 False, 215 "Log or Linear"), 216 ] 217 AbstractCommandline.__init__(self, cmd, **kwargs) 218 self.set_parameter("mode", "Mode=Batch")
219
220 - def set_menu(self, option_list):
221 """Sets the menu option. 222 223 Example set_menu([6,1]) = get all F statistics (menu 6.1) 224 """ 225 self.set_parameter("command", "MenuOptions="+ 226 ".".join(map(lambda x:str(x),option_list)))
227
228 - def set_input(self, fname):
229 """Sets the input file name. 230 """ 231 self.set_parameter("input", "InputFile="+fname)
232
233 -class GenePopController:
234 - def __init__(self, genepop_dir = None):
235 """Initializes the controller. 236 237 genepop_dir is the directory where GenePop is. 238 239 The binary should be called Genepop (capital G) 240 241 """ 242 self.controller = _GenePopCommandline(genepop_dir)
243
244 - def _remove_garbage(self, fname_out):
245 try: 246 if fname_out != None: os.remove(fname_out) 247 except OSError: 248 pass # safe 249 try: 250 os.remove("genepop.txt") 251 except OSError: 252 pass # safe 253 try: 254 os.remove("fichier.in") 255 except OSError: 256 pass # safe 257 try: 258 os.remove("cmdline.txt") 259 except OSError: 260 pass # safe
261
262 - def _get_opts(self, dememorization, batches, iterations, enum_test=None):
263 opts = {} 264 opts["Dememorization"]=dememorization 265 opts["BatchNumber"]=batches 266 opts["BatchLength"]=iterations 267 if enum_test != None: 268 if enum_test == True: 269 opts["HWtests"]="Enumeration" 270 else: 271 opts["HWtests"]="MCMC" 272 return opts
273
274 - def _run_genepop(self, extensions, option, fname, opts={}):
275 for extension in extensions: 276 self._remove_garbage(fname + extension) 277 self.controller.set_menu(option) 278 self.controller.set_input(fname) 279 for opt in opts: 280 self.controller.set_parameter(opt, opt+"="+str(opts[opt])) 281 ret, out, err = generic_run(self.controller) 282 self._remove_garbage(None) 283 if ret.return_code != 0: raise IOError("GenePop not found") 284 return ret, out, err
285 286
287 - def _test_pop_hz_both(self, fname, type, ext, enum_test = True, 288 dememorization = 10000, batches = 20, iterations = 5000):
289 """Hardy-Weinberg test for heterozygote deficiency/excess. 290 291 Returns a population iterator containg 292 A dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps) 293 Some loci have a None if the info is not available 294 SE might be none (for enumerations) 295 """ 296 opts = self._get_opts(dememorization, batches, iterations, enum_test) 297 ret, out, err = self._run_genepop([ext], [1, type], fname, opts) 298 f = open(fname + ext) 299 def hw_func(self): 300 return _hw_func(self.stream, False)
301 return _FileIterator(hw_func, f, fname + ext)
302
303 - def _test_global_hz_both(self, fname, type, ext, enum_test = True, 304 dememorization = 10000, batches = 20, iterations = 5000):
305 """Global Hardy-Weinberg test for heterozygote deficiency/excess. 306 307 Returns a triple with: 308 A list per population containg 309 (pop_name, P-val, SE, switches) 310 Some pops have a None if the info is not available 311 SE might be none (for enumerations) 312 A list per loci containg 313 (locus_name, P-val, SE, switches) 314 Some loci have a None if the info is not available 315 SE might be none (for enumerations) 316 Overall results (P-val, SE, switches) 317 318 """ 319 opts = self._get_opts(dememorization, batches, iterations, enum_test) 320 ret, out, err = self._run_genepop([ext], [1, type], fname, opts) 321 def hw_pop_func(self): 322 return _read_table(self.stream, [str, _gp_float, _gp_float, _gp_float])
323 f1 = open(fname + ext) 324 l = f1.readline() 325 while l.find("by population") == -1: 326 l = f1.readline() 327 pop_p = _read_table(f1, [str, _gp_float, _gp_float, _gp_float]) 328 f2 = open(fname + ext) 329 l = f2.readline() 330 while l.find("by locus") == -1: 331 l = f2.readline() 332 loc_p = _read_table(f2, [str, _gp_float, _gp_float, _gp_float]) 333 f = open(fname + ext) 334 l = f.readline() 335 while l.find("all locus") == -1: 336 l = f.readline() 337 f.readline() 338 f.readline() 339 f.readline() 340 f.readline() 341 l = f.readline().rstrip() 342 p, se, switches = tuple(map(lambda x: _gp_float(x), 343 filter(lambda y: y != "",l.split(" ")))) 344 f.close() 345 return pop_p, loc_p, (p, se, switches) 346 347 #1.1
348 - def test_pop_hz_deficiency(self, fname, enum_test = True, 349 dememorization = 10000, batches = 20, iterations = 5000):
350 """Hardy-Weinberg test for heterozygote deficiency. 351 352 Returns a population iterator containg 353 A dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps) 354 Some loci have a None if the info is not available 355 SE might be none (for enumerations) 356 """ 357 return self._test_pop_hz_both(fname, 1, ".D", enum_test, 358 dememorization, batches, iterations)
359 360 #1.2
361 - def test_pop_hz_excess(self, fname, enum_test = True, 362 dememorization = 10000, batches = 20, iterations = 5000):
363 """Hardy-Weinberg test for heterozygote deficiency. 364 365 Returns a population iterator containg 366 A dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps) 367 Some loci have a None if the info is not available 368 SE might be none (for enumerations) 369 """ 370 return self._test_pop_hz_both(fname, 2, ".E", enum_test, 371 dememorization, batches, iterations)
372 373 #1.3 P file
374 - def test_pop_hw_prob(self, fname, enum_test = False, 375 dememorization = 10000, batches = 20, iterations = 5000):
376 """Hardy-Weinberg test based on probability. 377 378 Returns 2 iterators and a final tuple: 379 380 1. Returns a loci iterator containing 381 b. A dictionary[pop_pos]=(P-val, SE, Fis-WC, Fis-RH, steps) 382 Some pops have a None if the info is not available 383 SE might be none (for enumerations) 384 c. Result of Fisher's test (Chi2, deg freedom, prob) 385 2. Returns a population iterator containg 386 a. A dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps) 387 Some loci have a None if the info is not available 388 SE might be none (for enumerations) 389 b. Result of Fisher's test (Chi2, deg freedom, prob) 390 3. (Chi2, deg freedom, prob) 391 """ 392 opts = self._get_opts(dememorization, batches, iterations, enum_test) 393 ret, out, err = self._run_genepop([ext], [1, 3], fname, opts) 394 def hw_prob_loci_func(self): 395 return _hw_func(self.stream, True, True)
396 def hw_prob_pop_func(self): 397 return _hw_func(self.stream, False, True) 398 shutil.copyfile(fname+".P", fname+".P2") 399 f1 = open(fname + ".P") 400 f2 = open(fname + ".P2") 401 return _FileIterator(hw_prob_loci_func, f1, fname + ".P"), _FileIterator(hw_prob_pop_func, f2, fname + ".P2") 402 403 #1.4
404 - def test_global_hz_deficiency(self, fname, enum_test = True, 405 dememorization = 10000, batches = 20, iterations = 5000):
406 """Global Hardy-Weinberg test for heterozygote deficiency. 407 408 Returns a triple with: 409 An list per population containg 410 (pop_name, P-val, SE, switches) 411 Some pops have a None if the info is not available 412 SE might be none (for enumerations) 413 An list per loci containg 414 (locus_name, P-val, SE, switches) 415 Some loci have a None if the info is not available 416 SE might be none (for enumerations) 417 Overall results (P-val, SE, switches) 418 """ 419 return self._test_global_hz_both(fname, 4, ".DG", enum_test, 420 dememorization, batches, iterations)
421 422 423 #1.5
424 - def test_global_hz_excess(self, fname, enum_test = True, 425 dememorization = 10000, batches = 20, iterations = 5000):
426 """Global Hardy-Weinberg test for heterozygote excess. 427 428 Returns a triple with: 429 An list per population containg 430 (pop_name, P-val, SE, switches) 431 Some pops have a None if the info is not available 432 SE might be none (for enumerations) 433 An list per loci containg 434 (locus_name, P-val, SE, switches) 435 Some loci have a None if the info is not available 436 SE might be none (for enumerations) 437 Overall results (P-val, SE, switches) 438 """ 439 return self._test_global_hz_both(fname, 5, ".EG", enum_test, 440 dememorization, batches, iterations)
441 442 #2.1
443 - def test_ld(self, fname, 444 dememorization = 10000, batches = 20, iterations = 5000):
445 opts = self._get_opts(dememorization, batches, iterations) 446 ret, out, err = self._run_genepop([".DIS"], [2, 1], fname, opts) 447 def ld_pop_func(self): 448 l = self.stream.readline().rstrip() 449 if l == "": 450 self.done = True 451 raise StopIteration 452 toks = filter(lambda x: x != "", l.split(" ")) 453 pop, locus1, locus2 = toks[0], toks[1], toks[2] 454 if not hasattr(self, "start_locus1"): 455 start_locus1, start_locus2 = locus1, locus2 456 current_pop = -1 457 if locus1 == start_locus1 and locus2 == start_locus2: 458 currrent_pop += 1 459 if toks[3] == "No": 460 return current_pop, pop, (locus1, locus2), None 461 p, se, switches = _gp_float(toks[3]), _gp_float(toks[4]), _gp_int(toks[5]) 462 return current_pop, pop, (locus1, locus2), (p, se, switches)
463 def ld_func(self): 464 l = self.stream.readline().rstrip() 465 if l == "": 466 self.done = True 467 raise StopIteration 468 toks = filter(lambda x: x != "", l.split(" ")) 469 locus1, locus2 = toks[0], toks[2] 470 try: 471 chi2, df, p = _gp_float(toks[3]), _gp_int(toks[4]), _gp_float(toks[5]) 472 except ValueError: 473 return (locus1, locus2), None 474 return (locus1, locus2), (chi2, df, p) 475 f1 = open(fname + ".DIS") 476 l = f1.readline() 477 while l.find("----")==-1: 478 l = f1.readline() 479 shutil.copyfile(fname + ".DIS", fname + ".DI2") 480 f2 = open(fname + ".DI2") 481 l = f2.readline() 482 while l.find("Locus pair")==-1: 483 l = f2.readline() 484 while l.find("----")==-1: 485 l = f2.readline() 486 return _FileIterator(ld_pop_func, f1, fname+".DIS"), _FileIterator(ld_func, f2, fname + ".DI2") 487 488 #2.2
489 - def create_contingency_tables(self, fname):
490 raise NotImplementedError
491 492 #3.1 PR/GE files
493 - def test_genic_diff_all(self, fname, 494 dememorization = 10000, batches = 20, iterations = 5000):
495 raise NotImplementedError
496 497 #3.2 PR2/GE2 files
498 - def test_genic_diff_pair(self, fname, 499 dememorization = 10000, batches = 20, iterations = 5000):
500 raise NotImplementedError
501 502 #3.3 G files
503 - def test_genotypic_diff_all(self, fname, 504 dememorization = 10000, batches = 20, iterations = 5000):
505 raise NotImplementedError
506 507 #3.4 2G2 files
508 - def test_genotypic_diff_pair(self, fname, 509 dememorization = 10000, batches = 20, iterations = 5000):
510 raise NotImplementedError
511 512 #4
513 - def estimate_nm(self, fname):
514 ret, out, err = self._run_genepop(["PRI"], [4], fname) 515 f = open(fname + ".PRI") 516 lines = f.readlines() # Small file, it is ok 517 f.close() 518 for line in lines: 519 m = re.search("Mean sample size: ([.0-9]+)", line) 520 if m != None: 521 mean_sample_size = _gp_float(m.group(1)) 522 m = re.search("Mean frequency of private alleles p\(1\)= ([.0-9]+)", line) 523 if m != None: 524 mean_priv_alleles = _gp_float(m.group(1)) 525 m = re.search("N=10: ([.0-9]+)", line) 526 if m != None: 527 mig10 = _gp_float(m.group(1)) 528 m = re.search("N=25: ([.0-9]+)", line) 529 if m != None: 530 mig25 = _gp_float(m.group(1)) 531 m = re.search("N=50: ([.0-9]+)", line) 532 if m != None: 533 mig50 = _gp_float(m.group(1)) 534 m = re.search("for size= ([.0-9]+)", line) 535 if m != None: 536 mig_corrected = _gp_float(m.group(1)) 537 os.remove(fname + ".PRI") 538 return mean_sample_size, mean_priv_alleles, mig10, mig25, mig50, mig_corrected
539 540 #5.1
541 - def calc_allele_genotype_freqs(self, fname):
542 """Calculates allele and genotype frequencies per locus and per sample. 543 544 Parameters: 545 fname - file name 546 547 Returns tuple with 2 elements: 548 Population iterator with 549 population name 550 Locus dictionary with key = locus name and content tuple as 551 Genotype List with 552 (Allele1, Allele2, observed, expected) 553 (expected homozygotes, observed hm, 554 expected heterozygotes, observed ht) 555 Allele frequency/Fis dictionary with allele as key and 556 (count, frequency, Fis Weir & Cockerham) 557 Totals as a pair 558 count 559 Fis Weir & Cockerham, 560 Fis Robertson & Hill 561 Locus iterator with 562 Locus name 563 allele list 564 Population list with a triple 565 population name 566 list of allele frequencies in the same order as allele list above 567 number of genes 568 569 570 Will create a file called fname.INF 571 """ 572 ret, out, err = self._run_genepop(["INF"], [5,1], fname) 573 #First pass, general information 574 #num_loci = None 575 #num_pops = None 576 #f = open(fname + ".INF") 577 #l = f.readline() 578 #while (num_loci == None or num_pops == None) and l != '': 579 # m = re.search("Number of populations detected : ([0-9+])", l) 580 # if m != None: 581 # num_pops = _gp_int(m.group(1)) 582 # m = re.search("Number of loci detected : ([0-9+])", l) 583 # if m != None: 584 # num_loci = _gp_int(m.group(1)) 585 # l = f.readline() 586 #f.close() 587 def pop_parser(self): 588 if hasattr(self, "old_line"): 589 l = self.old_line 590 del self.old_line 591 else: 592 l = self.stream.readline() 593 loci_content = {} 594 while l<>'': 595 l = l.rstrip() 596 if l.find("Tables of allelic frequencies for each locus")>-1: 597 return self.curr_pop, loci_content 598 match = re.match(".*Pop: (.+) Locus: (.+)", l) 599 if match != None: 600 pop = match.group(1) 601 locus = match.group(2) 602 if not hasattr(self, "first_locus"): 603 self.first_locus = locus 604 if hasattr(self, "curr_pop"): 605 if self.first_locus == locus: 606 old_pop = self.curr_pop 607 #self.curr_pop = pop 608 self.old_line = l 609 del self.first_locus 610 del self.curr_pop 611 return old_pop, loci_content 612 self.curr_pop = pop 613 else: 614 l = self.stream.readline() 615 continue 616 geno_list = [] 617 l = self.stream.readline() 618 if l.find("No data")>-1: continue 619 620 while l.find("Genotypes Obs.")==-1: 621 l = self.stream.readline() 622 623 while l<>"\n": 624 m2 = re.match(" +([0-9]+) , ([0-9]+) *([0-9]+) *(.+)",l) 625 if m2 != None: 626 geno_list.append((_gp_int(m2.group(1)), _gp_int(m2.group(2)), 627 _gp_int(m2.group(3)), _gp_float(m2.group(4)))) 628 else: 629 l = self.stream.readline() 630 continue 631 l = self.stream.readline() 632 633 while l.find("Expected number of ho")==-1: 634 l = self.stream.readline() 635 expHo = _gp_float(l[38:]) 636 l = self.stream.readline() 637 obsHo = _gp_int(l[38:]) 638 l = self.stream.readline() 639 expHe = _gp_float(l[38:]) 640 l = self.stream.readline() 641 obsHe = _gp_int(l[38:]) 642 l = self.stream.readline() 643 644 while l.find("Sample count")==-1: 645 l = self.stream.readline() 646 l = self.stream.readline() 647 freq_fis={} 648 overall_fis = None 649 while l.find("----")==-1: 650 vals = filter(lambda x: x!='', 651 l.rstrip().split(' ')) 652 if vals[0]=="Tot": 653 overall_fis = _gp_int(vals[1]), \ 654 _gp_float(vals[2]), _gp_float(vals[3]) 655 else: 656 freq_fis[_gp_int(vals[0])] = _gp_int(vals[1]), \ 657 _gp_float(vals[2]), _gp_float(vals[3]) 658 l = self.stream.readline() 659 loci_content[locus] = geno_list, \ 660 (expHo, obsHo, expHe, obsHe), \ 661 freq_fis, overall_fis 662 self.done = True 663 raise StopIteration
664 def locus_parser(self): 665 l = self.stream.readline() 666 while l != "": 667 l = l.rstrip() 668 match = re.match(" Locus: (.+)", l) 669 if match != None: 670 locus = match.group(1) 671 alleles, table = _read_allele_freq_table(self.stream) 672 return locus, alleles, table 673 l = self.stream.readline() 674 self.done = True 675 raise StopIteration 676 677 popf = open(fname + ".INF") 678 shutil.copyfile(fname + ".INF", fname + ".IN2") 679 locf = open(fname + ".IN2") 680 pop_iter = _FileIterator(pop_parser, popf, fname + ".INF") 681 locus_iter = _FileIterator(locus_parser, locf, fname + ".IN2") 682 return (pop_iter, locus_iter) 683
684 - def _calc_diversities_fis(self, fname, ext):
685 ret, out, err = self._run_genepop([ext], [5,2], fname) 686 f = open(fname + ext) 687 l = f.readline() 688 while l<>"": 689 l = l.rstrip() 690 if l.startswith("Statistics per sample over all loci with at least two individuals typed"): 691 avg_fis = _read_table(f, [str, _gp_float, _gp_float, _gp_float]) 692 avg_Qintra = _read_table(f, [str, _gp_float]) 693 l = f.readline() 694 f.close() 695 def fis_func(self): 696 l = self.stream.readline() 697 while l != "": 698 l = l.rstrip() 699 m = re.search("Locus: (.+)", l) 700 if m != None: 701 locus = m.group(1) 702 self.stream.readline() 703 if self.stream.readline().find("No complete")>-1: return locus, None 704 self.stream.readline() 705 fis_table = _read_table(self.stream, [str, _gp_float, _gp_float, _gp_float]) 706 self.stream.readline() 707 avg_qinter, avg_fis = tuple(map (lambda x: _gp_float(x), 708 filter(lambda y:y != "", self.stream.readline().split(" ")))) 709 return locus, fis_table, avg_qinter, avg_fis 710 l = self.stream.readline() 711 self.done = True 712 raise StopIteration
713 dvf = open(fname + ext) 714 return _FileIterator(fis_func, dvf, fname + ext), avg_fis, avg_Qintra 715 716 #5.2
717 - def calc_diversities_fis_with_identity(self, fname):
718 return self._calc_diversities_fis(fname, ".DIV")
719 720 #5.3
721 - def calc_diversities_fis_with_size(self, fname):
722 raise NotImplementedError
723 724 #6.1 Less genotype frequencies
725 - def calc_fst_all(self, fname):
726 """Executes GenePop and gets Fst/Fis/Fit (all populations) 727 728 Parameters: 729 fname - file name 730 731 Returns: 732 (multiLocusFis, multiLocusFst, multiLocus Fit), 733 Iterator of tuples 734 (Locus name, Fis, Fst, Fit, Qintra, Qinter) 735 736 Will create a file called fname.FST . 737 738 This does not return the genotype frequencies. 739 740 """ 741 ret, out, err = self._run_genepop([".FST"], [6,1], fname) 742 f = open(fname + ".FST") 743 l = f.readline() 744 while l<>'': 745 if l.startswith(' All:'): 746 toks=filter(lambda x:x!="", l.rstrip().split(' ')) 747 try: 748 allFis = _gp_float(toks[1]) 749 except ValueError: 750 allFis = None 751 try: 752 allFst = _gp_float(toks[2]) 753 except ValueError: 754 allFst = None 755 try: 756 allFit = _gp_float(toks[3]) 757 except ValueError: 758 allFit = None 759 l = f.readline() 760 f.close() 761 f = open(fname + ".FST") 762 def proc(self): 763 if hasattr(self, "last_line"): 764 l = self.last_line 765 del self.last_line 766 else: 767 l = self.stream.readline() 768 locus = None 769 fis = None 770 fst = None 771 fit = None 772 qintra = None 773 qinter = None 774 while l<>'': 775 l = l.rstrip() 776 if l.startswith(' Locus:'): 777 if locus != None: 778 self.last_line = l 779 return locus, fis, fst, fit, qintra, qinter 780 else: 781 locus = l.split(':')[1].lstrip() 782 elif l.startswith('Fis^='): 783 fis = _gp_float(l.split(' ')[1]) 784 elif l.startswith('Fst^='): 785 fst = _gp_float(l.split(' ')[1]) 786 elif l.startswith('Fit^='): 787 fit = _gp_float(l.split(' ')[1]) 788 elif l.startswith('1-Qintra^='): 789 qintra = _gp_float(l.split(' ')[1]) 790 elif l.startswith('1-Qinter^='): 791 qinter = _gp_float(l.split(' ')[1]) 792 return locus, fis, fst, fit, qintra, qinter 793 l = self.stream.readline() 794 if locus != None: 795 return locus, fis, fst, fit, qintra, qinter 796 self.stream.close() 797 self.done = True 798 raise StopIteration
799 return (allFis, allFst, allFit), _FileIterator(proc , f, fname + ".FST") 800 801 #6.2
802 - def calc_fst_pair(self, fname):
803 ret, out, err = self._run_genepop([".ST2", ".MIG"], [6,2], fname) 804 f = open(fname + ".ST2") 805 l = f.readline() 806 while l<>"": 807 l = l.rstrip() 808 if l.startswith("Estimates for all loci"): 809 avg_fst = _read_headed_triangle_matrix(f) 810 l = f.readline() 811 f.close() 812 def loci_func(self): 813 l = self.stream.readline() 814 while l != "": 815 l = l.rstrip() 816 m = re.search(" Locus: (.+)", l) 817 if m != None: 818 locus = m.group(1) 819 matrix = _read_headed_triangle_matrix(self.stream) 820 return locus, matrix 821 l = self.stream.readline() 822 self.done = True 823 raise StopIteration
824 stf = open(fname + ".ST2") 825 os.remove(fname + ".MIG") 826 return _FileIterator(loci_func, stf, fname + ".ST2"), avg_fst 827 828 #6.3
829 - def calc_rho_all(self, fname):
830 raise NotImplementedError
831 832 #6.4
833 - def calc_rho_pair(self, fname):
834 raise NotImplementedError
835
836 - def _calc_ibd(self, fname, sub, stat="a", scale="Log", min_dist=0.00001):
837 """Calculates isolation by distance statistics 838 """ 839 ret, out, err = self._run_genepop([".GRA", ".MIG", ".ISO"], [6,sub], 840 fname, opts = { 841 "MinimalDistance" : min_dist, 842 "GeographicScale" : scale, 843 "IsolBDstatistic" : stat, 844 }) 845 f = open(fname + ".ISO") 846 f.readline() 847 f.readline() 848 f.readline() 849 f.readline() 850 estimate = _read_triangle_matrix(f) 851 f.readline() 852 f.readline() 853 distance = _read_triangle_matrix(f) 854 f.readline() 855 match = re.match("a = (.+), b = (.+)", f.readline().rstrip()) 856 a = _gp_float(match.group(1)) 857 b = _gp_float(match.group(2)) 858 f.readline() 859 f.readline() 860 match = re.match(" b=(.+)", f.readline().rstrip()) 861 bb = _gp_float(match.group(1)) 862 match = re.match(".*\[(.+) ; (.+)\]", f.readline().rstrip()) 863 bblow = _gp_float(match.group(1)) 864 bbhigh = _gp_float(match.group(2)) 865 f.close() 866 os.remove(fname + ".MIG") 867 os.remove(fname + ".GRA") 868 os.remove(fname + ".ISO") 869 return estimate, distance, (a, b), (bb, bblow, bbhigh)
870 871 #6.5
872 - def calc_ibd_diplo(self, fname, stat="a", scale="Log", min_dist=0.00001):
873 """Calculates isolation by distance statistics for diploid data. 874 875 See _calc_ibd for parameter details. 876 Note that each pop can only have a single individual and 877 the individual name has to be the sample coordinates. 878 """ 879 return self._calc_ibd(fname, 5, stat, scale, min_dist)
880 881 #6.6
882 - def calc_ibd_haplo(self, fname, stat="a", scale="Log", min_dist=0.00001):
883 """Calculates isolation by distance statistics for haploid data. 884 885 See _calc_ibd for parameter details. 886 Note that each pop can only have a single individual and 887 the individual name has to be the sample coordinates. 888 """ 889 return self._calc_ibd(fname, 6, stat, scale, min_dist)
890