1
2
3
4
5
6
7
8 """
9 This module provides code to work with the standalone version of
10 BLAST, either blastall, rpsblast or blastpgp, provided by the NCBI.
11 http://www.ncbi.nlm.nih.gov/BLAST/
12
13 Classes:
14 LowQualityBlastError Except that indicates low quality query sequences.
15 BlastParser Parses output from blast.
16 BlastErrorParser Parses output and tries to diagnose possible errors.
17 PSIBlastParser Parses output from psi-blast.
18 Iterator Iterates over a file of blast results.
19
20 _Scanner Scans output from standalone BLAST.
21 _BlastConsumer Consumes output from blast.
22 _PSIBlastConsumer Consumes output from psi-blast.
23 _HeaderConsumer Consumes header information.
24 _DescriptionConsumer Consumes description information.
25 _AlignmentConsumer Consumes alignment information.
26 _HSPConsumer Consumes hsp information.
27 _DatabaseReportConsumer Consumes database report information.
28 _ParametersConsumer Consumes parameters information.
29
30 Functions:
31 blastall Execute blastall (OBSOLETE).
32 blastpgp Execute blastpgp (OBSOLETE).
33 rpsblast Execute rpsblast (OBSOLETE).
34
35 For calling the BLAST command line tools, we encourage you to use the
36 command line wrappers in Bio.Blast.Applications - the three functions
37 blastall, blastpgp and rpsblast are considered to be obsolete now, and
38 are likely to be deprecated and then removed in future releases.
39 """
40
41 import os
42 import re
43
44 from Bio import File
45 from Bio.ParserSupport import *
46 from Bio.Blast import Record
47 from Bio.Application import _escape_filename
48
50 """Error caused by running a low quality sequence through BLAST.
51
52 When low quality sequences (like GenBank entries containing only
53 stretches of a single nucleotide) are BLASTed, they will result in
54 BLAST generating an error and not being able to perform the BLAST.
55 search. This error should be raised for the BLAST reports produced
56 in this case.
57 """
58 pass
59
61 """Error caused by running a short query sequence through BLAST.
62
63 If the query sequence is too short, BLAST outputs warnings and errors:
64 Searching[blastall] WARNING: [000.000] AT1G08320: SetUpBlastSearch failed.
65 [blastall] ERROR: [000.000] AT1G08320: Blast:
66 [blastall] ERROR: [000.000] AT1G08320: Blast: Query must be at least wordsize
67 done
68
69 This exception is raised when that condition is detected.
70
71 """
72 pass
73
74
76 """Scan BLAST output from blastall or blastpgp.
77
78 Tested with blastall and blastpgp v2.0.10, v2.0.11
79
80 Methods:
81 feed Feed data into the scanner.
82
83 """
84 - def feed(self, handle, consumer):
104
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149 consumer.start_header()
150
151 read_and_call(uhandle, consumer.version, contains='BLAST')
152 read_and_call_while(uhandle, consumer.noevent, blank=1)
153
154
155 attempt_read_and_call(uhandle, consumer.noevent, start="<pre>")
156
157
158 while attempt_read_and_call(uhandle,
159 consumer.reference, start='Reference'):
160
161
162 while 1:
163 line = uhandle.readline()
164 if is_blank_line(line):
165 consumer.noevent(line)
166 break
167 elif line.startswith("RID"):
168 break
169 else:
170
171 consumer.reference(line)
172
173
174 read_and_call_while(uhandle, consumer.noevent, blank=1)
175 attempt_read_and_call(uhandle, consumer.reference, start="RID:")
176 read_and_call_while(uhandle, consumer.noevent, blank=1)
177
178
179
180 if attempt_read_and_call(
181 uhandle, consumer.reference, start="Reference"):
182 read_and_call_until(uhandle, consumer.reference, blank=1)
183 read_and_call_while(uhandle, consumer.noevent, blank=1)
184
185
186 if attempt_read_and_call(
187 uhandle, consumer.reference, start="Reference"):
188 read_and_call_until(uhandle, consumer.reference, blank=1)
189 read_and_call_while(uhandle, consumer.noevent, blank=1)
190
191 line = uhandle.peekline()
192 assert line.strip() != ""
193 assert not line.startswith("RID:")
194 if line.startswith("Query="):
195
196
197
198 read_and_call(uhandle, consumer.query_info, start='Query=')
199 read_and_call_until(uhandle, consumer.query_info, blank=1)
200 read_and_call_while(uhandle, consumer.noevent, blank=1)
201
202
203 read_and_call_until(uhandle, consumer.database_info, end='total letters')
204 read_and_call(uhandle, consumer.database_info, contains='sequences')
205 read_and_call_while(uhandle, consumer.noevent, blank=1)
206 elif line.startswith("Database:"):
207
208 read_and_call_until(uhandle, consumer.database_info, end='total letters')
209 read_and_call(uhandle, consumer.database_info, contains='sequences')
210 read_and_call_while(uhandle, consumer.noevent, blank=1)
211
212
213
214
215 read_and_call(uhandle, consumer.query_info, start='Query=')
216
217 while True:
218 line = uhandle.peekline()
219 if not line.strip() : break
220 if "Score E" in line : break
221
222 read_and_call(uhandle, consumer.query_info)
223 read_and_call_while(uhandle, consumer.noevent, blank=1)
224 else:
225 raise ValueError("Invalid header?")
226
227 consumer.end_header()
228
246
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270 consumer.start_descriptions()
271
272
273
274 attempt_read_and_call(uhandle, consumer.noevent, start='Searching')
275
276
277
278 if not uhandle.peekline():
279 raise ValueError("Unexpected end of blast report. " + \
280 "Looks suspiciously like a PSI-BLAST crash.")
281
282
283
284
285
286
287
288
289
290 line = uhandle.peekline()
291 if line.find("ERROR:") != -1 or line.startswith("done"):
292 read_and_call_while(uhandle, consumer.noevent, contains="ERROR:")
293 read_and_call(uhandle, consumer.noevent, start="done")
294
295
296
297
298
299
300
301
302
303
304
305
306
307 read_and_call_while(uhandle, consumer.noevent, blank=1)
308
309 if attempt_read_and_call(uhandle, consumer.round, start='Results'):
310 read_and_call_while(uhandle, consumer.noevent, blank=1)
311
312
313
314
315
316
317
318
319 if not attempt_read_and_call(
320 uhandle, consumer.description_header,
321 has_re=re.compile(r'Score +E')):
322
323 attempt_read_and_call(uhandle, consumer.no_hits,
324 contains='No hits found')
325 try:
326 read_and_call_while(uhandle, consumer.noevent, blank=1)
327 except ValueError, err:
328 if str(err) != "Unexpected end of stream." : raise err
329
330 consumer.end_descriptions()
331
332 return
333
334
335 read_and_call(uhandle, consumer.description_header,
336 start='Sequences producing')
337
338
339 attempt_read_and_call(uhandle, consumer.model_sequences,
340 start='Sequences used in model')
341 read_and_call_while(uhandle, consumer.noevent, blank=1)
342
343
344
345
346 if safe_peekline(uhandle).startswith(" Database:"):
347 consumer.end_descriptions()
348
349 return
350
351
352
353 if not uhandle.peekline().startswith('Sequences not found'):
354 read_and_call_until(uhandle, consumer.description, blank=1)
355 read_and_call_while(uhandle, consumer.noevent, blank=1)
356
357
358
359
360
361 if attempt_read_and_call(uhandle, consumer.nonmodel_sequences,
362 start='Sequences not found'):
363
364 read_and_call_while(uhandle, consumer.noevent, blank=1)
365 l = safe_peekline(uhandle)
366
367
368
369
370 if not l.startswith('CONVERGED') and l[0] != '>' \
371 and not l.startswith('QUERY'):
372 read_and_call_until(uhandle, consumer.description, blank=1)
373 read_and_call_while(uhandle, consumer.noevent, blank=1)
374
375 attempt_read_and_call(uhandle, consumer.converged, start='CONVERGED')
376 read_and_call_while(uhandle, consumer.noevent, blank=1)
377
378 consumer.end_descriptions()
379
399
406
423
452
458
460
461
462
463
464
465
466 read_and_call(uhandle, consumer.score, start=' Score')
467 read_and_call(uhandle, consumer.identities, start=' Identities')
468
469 attempt_read_and_call(uhandle, consumer.strand, start = ' Strand')
470
471 attempt_read_and_call(uhandle, consumer.frame, start = ' Frame')
472 read_and_call(uhandle, consumer.noevent, blank=1)
473
475
476
477
478
479
480
481
482
483
484 while 1:
485
486 attempt_read_and_call(uhandle, consumer.noevent, start=' ')
487 read_and_call(uhandle, consumer.query, start='Query')
488 read_and_call(uhandle, consumer.align, start=' ')
489 read_and_call(uhandle, consumer.sbjct, start='Sbjct')
490 try:
491 read_and_call_while(uhandle, consumer.noevent, blank=1)
492 except ValueError, err:
493 if str(err) != "Unexpected end of stream." : raise err
494
495
496
497 break
498 line = safe_peekline(uhandle)
499
500 if not (line.startswith('Query') or line.startswith(' ')):
501 break
502
525
526 - def _eof(self, uhandle):
527 try:
528 line = safe_peekline(uhandle)
529 except ValueError, err:
530 if str(err) != "Unexpected end of stream." : raise err
531 line = ""
532 return not line
533
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561 if self._eof(uhandle) : return
562
563 consumer.start_database_report()
564
565
566
567
568 if attempt_read_and_call(uhandle, consumer.noevent, start=" Subset"):
569 read_and_call(uhandle, consumer.noevent, contains="letters")
570 read_and_call(uhandle, consumer.noevent, contains="sequences")
571 read_and_call(uhandle, consumer.noevent, start=" ")
572
573
574
575 while attempt_read_and_call(uhandle, consumer.database,
576 start=' Database'):
577
578
579
580 if not uhandle.peekline().strip() \
581 or uhandle.peekline().startswith("BLAST"):
582 consumer.end_database_report()
583 return
584
585
586 read_and_call_until(uhandle, consumer.database, start=' Posted')
587 read_and_call(uhandle, consumer.posted_date, start=' Posted')
588 read_and_call(uhandle, consumer.num_letters_in_database,
589 start=' Number of letters')
590 read_and_call(uhandle, consumer.num_sequences_in_database,
591 start=' Number of sequences')
592
593 attempt_read_and_call(uhandle, consumer.noevent, start=' ')
594
595 line = safe_readline(uhandle)
596 uhandle.saveline(line)
597 if line.find('Lambda') != -1:
598 break
599
600 read_and_call(uhandle, consumer.noevent, start='Lambda')
601 read_and_call(uhandle, consumer.ka_params)
602
603
604 attempt_read_and_call(uhandle, consumer.noevent, blank=1)
605
606
607 attempt_read_and_call(uhandle, consumer.gapped, start='Gapped')
608
609 if attempt_read_and_call(uhandle, consumer.noevent, start='Lambda'):
610 read_and_call(uhandle, consumer.ka_params_gap)
611
612
613
614
615 try:
616 read_and_call_while(uhandle, consumer.noevent, blank=1)
617 except ValueError, x:
618 if str(x) != "Unexpected end of stream.":
619 raise
620 consumer.end_database_report()
621
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680 if not uhandle.peekline().strip():
681 return
682
683
684
685 consumer.start_parameters()
686
687
688 attempt_read_and_call(uhandle, consumer.matrix, start='Matrix')
689
690 attempt_read_and_call(uhandle, consumer.gap_penalties, start='Gap')
691
692 attempt_read_and_call(uhandle, consumer.num_sequences,
693 start='Number of Sequences')
694 attempt_read_and_call(uhandle, consumer.num_hits,
695 start='Number of Hits')
696 attempt_read_and_call(uhandle, consumer.num_sequences,
697 start='Number of Sequences')
698 attempt_read_and_call(uhandle, consumer.num_extends,
699 start='Number of extensions')
700 attempt_read_and_call(uhandle, consumer.num_good_extends,
701 start='Number of successful')
702
703 attempt_read_and_call(uhandle, consumer.num_seqs_better_e,
704 start='Number of sequences')
705
706
707 if attempt_read_and_call(uhandle, consumer.hsps_no_gap,
708 start="Number of HSP's better"):
709
710 if attempt_read_and_call(uhandle, consumer.noevent,
711 start="Number of HSP's gapped:"):
712 read_and_call(uhandle, consumer.noevent,
713 start="Number of HSP's successfully")
714
715 attempt_read_and_call(uhandle, consumer.noevent,
716 start="Number of extra gapped extensions")
717 else:
718 read_and_call(uhandle, consumer.hsps_prelim_gapped,
719 start="Number of HSP's successfully")
720 read_and_call(uhandle, consumer.hsps_prelim_gap_attempted,
721 start="Number of HSP's that")
722 read_and_call(uhandle, consumer.hsps_gapped,
723 start="Number of HSP's gapped")
724
725 elif attempt_read_and_call(uhandle, consumer.noevent,
726 start="Number of HSP's gapped"):
727 read_and_call(uhandle, consumer.noevent,
728 start="Number of HSP's successfully")
729
730
731 attempt_read_and_call(uhandle, consumer.query_length,
732 has_re=re.compile(r"[Ll]ength of query"))
733
734 attempt_read_and_call(uhandle, consumer.database_length,
735 has_re=re.compile(r"[Ll]ength of \s*[Dd]atabase"))
736
737
738 attempt_read_and_call(uhandle, consumer.noevent,
739 start="Length adjustment")
740 attempt_read_and_call(uhandle, consumer.effective_hsp_length,
741 start='effective HSP')
742
743 attempt_read_and_call(
744 uhandle, consumer.effective_query_length,
745 has_re=re.compile(r'[Ee]ffective length of query'))
746
747
748 attempt_read_and_call(
749 uhandle, consumer.effective_database_length,
750 has_re=re.compile(r'[Ee]ffective length of \s*[Dd]atabase'))
751
752
753 attempt_read_and_call(
754 uhandle, consumer.effective_search_space,
755 has_re=re.compile(r'[Ee]ffective search space:'))
756
757 attempt_read_and_call(
758 uhandle, consumer.effective_search_space_used,
759 has_re=re.compile(r'[Ee]ffective search space used'))
760
761
762 attempt_read_and_call(uhandle, consumer.frameshift, start='frameshift')
763
764
765 attempt_read_and_call(uhandle, consumer.threshold, start='T')
766
767 attempt_read_and_call(uhandle, consumer.threshold, start='Neighboring words threshold')
768
769
770 attempt_read_and_call(uhandle, consumer.window_size, start='A')
771
772 attempt_read_and_call(uhandle, consumer.window_size, start='Window for multiple hits')
773
774
775 attempt_read_and_call(uhandle, consumer.dropoff_1st_pass, start='X1')
776
777 attempt_read_and_call(uhandle, consumer.gap_x_dropoff, start='X2')
778
779
780 attempt_read_and_call(uhandle, consumer.gap_x_dropoff_final,
781 start='X3')
782
783
784 attempt_read_and_call(uhandle, consumer.gap_trigger, start='S1')
785
786
787
788 if not is_blank_line(uhandle.peekline(), allow_spaces=1):
789 read_and_call(uhandle, consumer.blast_cutoff, start='S2')
790
791 consumer.end_parameters()
792
794 """Parses BLAST data into a Record.Blast object.
795
796 """
801
802 - def parse(self, handle):
803 """parse(self, handle)"""
804 self._scanner.feed(handle, self._consumer)
805 return self._consumer.data
806
808 """Parses BLAST data into a Record.PSIBlast object.
809
810 """
815
816 - def parse(self, handle):
817 """parse(self, handle)"""
818 self._scanner.feed(handle, self._consumer)
819 return self._consumer.data
820
824
826 c = line.split()
827 self._header.application = c[0]
828 self._header.version = c[1]
829 if len(c) > 2:
830
831
832 self._header.date = c[2][1:-1]
833
839
854
871
876
879 self._descriptions = []
880 self._model_sequences = []
881 self._nonmodel_sequences = []
882 self._converged = 0
883 self._type = None
884 self._roundnum = None
885
886 self.__has_n = 0
887
889 if line.startswith('Sequences producing'):
890 cols = line.split()
891 if cols[-1] == 'N':
892 self.__has_n = 1
893
895 dh = self._parse(line)
896 if self._type == 'model':
897 self._model_sequences.append(dh)
898 elif self._type == 'nonmodel':
899 self._nonmodel_sequences.append(dh)
900 else:
901 self._descriptions.append(dh)
902
905
907 self._type = 'nonmodel'
908
911
914
916 if not line.startswith('Results from round'):
917 raise ValueError("I didn't understand the round line\n%s" % line)
918 self._roundnum = _safe_int(line[18:].strip())
919
922
923 - def _parse(self, description_line):
924 line = description_line
925 dh = Record.Description()
926
927
928
929
930
931
932
933
934 cols = line.split()
935 if len(cols) < 3:
936 raise ValueError( \
937 "Line does not appear to contain description:\n%s" % line)
938 if self.__has_n:
939 i = line.rfind(cols[-1])
940 i = line.rfind(cols[-2], 0, i)
941 i = line.rfind(cols[-3], 0, i)
942 else:
943 i = line.rfind(cols[-1])
944 i = line.rfind(cols[-2], 0, i)
945 if self.__has_n:
946 dh.title, dh.score, dh.e, dh.num_alignments = \
947 line[:i].rstrip(), cols[-3], cols[-2], cols[-1]
948 else:
949 dh.title, dh.score, dh.e, dh.num_alignments = \
950 line[:i].rstrip(), cols[-2], cols[-1], 1
951 dh.num_alignments = _safe_int(dh.num_alignments)
952 dh.score = _safe_int(dh.score)
953 dh.e = _safe_float(dh.e)
954 return dh
955
957
958
959
960
964
966 if self._alignment.title:
967 self._alignment.title += " "
968 self._alignment.title += line.strip()
969
971
972 parts = line.replace(" ","").split("=")
973 assert len(parts)==2, "Unrecognised format length line"
974 self._alignment.length = parts[1]
975 self._alignment.length = _safe_int(self._alignment.length)
976
978
979 if line.startswith('QUERY') or line.startswith('blast_tmp'):
980
981
982
983
984
985 try:
986 name, start, seq, end = line.split()
987 except ValueError:
988 raise ValueError("I do not understand the line\n%s" % line)
989 self._start_index = line.index(start, len(name))
990 self._seq_index = line.index(seq,
991 self._start_index+len(start))
992
993 self._name_length = self._start_index - 1
994 self._start_length = self._seq_index - self._start_index - 1
995 self._seq_length = line.rfind(end) - self._seq_index - 1
996
997
998
999
1000
1001
1002
1003
1004
1005 name = line[:self._name_length]
1006 name = name.rstrip()
1007 start = line[self._start_index:self._start_index+self._start_length]
1008 start = start.rstrip()
1009 if start:
1010 start = _safe_int(start)
1011 end = line[self._seq_index+self._seq_length:].rstrip()
1012 if end:
1013 end = _safe_int(end)
1014 seq = line[self._seq_index:self._seq_index+self._seq_length].rstrip()
1015
1016 if len(seq) < self._seq_length:
1017 seq = seq + ' '*(self._seq_length-len(seq))
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036 align = self._multiple_alignment.alignment
1037 align.append((name, start, seq, end))
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1087
1088 if self._alignment:
1089 self._alignment.title = self._alignment.title.rstrip()
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111 try:
1112 del self._seq_index
1113 del self._seq_length
1114 del self._start_index
1115 del self._start_length
1116 del self._name_length
1117 except AttributeError:
1118 pass
1119
1123
1125 self._hsp.bits, self._hsp.score = _re_search(
1126 r"Score =\s*([0-9.e+]+) bits \(([0-9]+)\)", line,
1127 "I could not find the score in line\n%s" % line)
1128 self._hsp.score = _safe_float(self._hsp.score)
1129 self._hsp.bits = _safe_float(self._hsp.bits)
1130
1131 x, y = _re_search(
1132 r"Expect\(?(\d*)\)? = +([0-9.e\-|\+]+)", line,
1133 "I could not find the expect in line\n%s" % line)
1134 if x:
1135 self._hsp.num_alignments = _safe_int(x)
1136 else:
1137 self._hsp.num_alignments = 1
1138 self._hsp.expect = _safe_float(y)
1139
1141 x, y = _re_search(
1142 r"Identities = (\d+)\/(\d+)", line,
1143 "I could not find the identities in line\n%s" % line)
1144 self._hsp.identities = _safe_int(x), _safe_int(y)
1145 self._hsp.align_length = _safe_int(y)
1146
1147 if line.find('Positives') != -1:
1148 x, y = _re_search(
1149 r"Positives = (\d+)\/(\d+)", line,
1150 "I could not find the positives in line\n%s" % line)
1151 self._hsp.positives = _safe_int(x), _safe_int(y)
1152 assert self._hsp.align_length == _safe_int(y)
1153
1154 if line.find('Gaps') != -1:
1155 x, y = _re_search(
1156 r"Gaps = (\d+)\/(\d+)", line,
1157 "I could not find the gaps in line\n%s" % line)
1158 self._hsp.gaps = _safe_int(x), _safe_int(y)
1159 assert self._hsp.align_length == _safe_int(y)
1160
1161
1163 self._hsp.strand = _re_search(
1164 r"Strand = (\w+) / (\w+)", line,
1165 "I could not find the strand in line\n%s" % line)
1166
1168
1169
1170
1171 if line.find('/') != -1:
1172 self._hsp.frame = _re_search(
1173 r"Frame = ([-+][123]) / ([-+][123])", line,
1174 "I could not find the frame in line\n%s" % line)
1175 else:
1176 self._hsp.frame = _re_search(
1177 r"Frame = ([-+][123])", line,
1178 "I could not find the frame in line\n%s" % line)
1179
1180
1181
1182
1183
1184
1185 _query_re = re.compile(r"Query(:?) \s*(\d+)\s*(.+) (\d+)")
1187 m = self._query_re.search(line)
1188 if m is None:
1189 raise ValueError("I could not find the query in line\n%s" % line)
1190
1191
1192
1193 colon, start, seq, end = m.groups()
1194 self._hsp.query = self._hsp.query + seq
1195 if self._hsp.query_start is None:
1196 self._hsp.query_start = _safe_int(start)
1197
1198
1199
1200 self._hsp.query_end = _safe_int(end)
1201
1202
1203 self._query_start_index = m.start(3)
1204 self._query_len = len(seq)
1205
1207 seq = line[self._query_start_index:].rstrip()
1208 if len(seq) < self._query_len:
1209
1210 seq = seq + ' ' * (self._query_len-len(seq))
1211 elif len(seq) < self._query_len:
1212 raise ValueError("Match is longer than the query in line\n%s" \
1213 % line)
1214 self._hsp.match = self._hsp.match + seq
1215
1216
1217
1218 _sbjct_re = re.compile(r"Sbjct(:?) \s*(\d+)\s*(.+) (\d+)")
1220 m = self._sbjct_re.search(line)
1221 if m is None:
1222 raise ValueError("I could not find the sbjct in line\n%s" % line)
1223 colon, start, seq, end = m.groups()
1224
1225
1226
1227
1228 if not seq.strip():
1229 seq = ' ' * self._query_len
1230 self._hsp.sbjct = self._hsp.sbjct + seq
1231 if self._hsp.sbjct_start is None:
1232 self._hsp.sbjct_start = _safe_int(start)
1233
1234 self._hsp.sbjct_end = _safe_int(end)
1235 if len(seq) != self._query_len:
1236 raise ValueError( \
1237 "QUERY and SBJCT sequence lengths don't match in line\n%s" \
1238 % line)
1239
1240 del self._query_start_index
1241 del self._query_len
1242
1245
1247
1250
1252 m = re.search(r"Database: (.+)$", line)
1253 if m:
1254 self._dr.database_name.append(m.group(1))
1255 elif self._dr.database_name:
1256
1257 self._dr.database_name[-1] = "%s%s" % (self._dr.database_name[-1],
1258 line.strip())
1259
1260 - def posted_date(self, line):
1261 self._dr.posted_date.append(_re_search(
1262 r"Posted date:\s*(.+)$", line,
1263 "I could not find the posted date in line\n%s" % line))
1264
1269
1274
1278
1281
1285
1288
1292
1295
1300
1302 if line.find('1st pass') != -1:
1303 x, = _get_cols(line, (-4,), ncols=11, expected={2:"Hits"})
1304 self._params.num_hits = _safe_int(x)
1305 else:
1306 x, = _get_cols(line, (-1,), ncols=6, expected={2:"Hits"})
1307 self._params.num_hits = _safe_int(x)
1308
1310 if line.find('1st pass') != -1:
1311 x, = _get_cols(line, (-4,), ncols=9, expected={2:"Sequences:"})
1312 self._params.num_sequences = _safe_int(x)
1313 else:
1314 x, = _get_cols(line, (-1,), ncols=4, expected={2:"Sequences:"})
1315 self._params.num_sequences = _safe_int(x)
1316
1318 if line.find('1st pass') != -1:
1319 x, = _get_cols(line, (-4,), ncols=9, expected={2:"extensions:"})
1320 self._params.num_extends = _safe_int(x)
1321 else:
1322 x, = _get_cols(line, (-1,), ncols=4, expected={2:"extensions:"})
1323 self._params.num_extends = _safe_int(x)
1324
1326 if line.find('1st pass') != -1:
1327 x, = _get_cols(line, (-4,), ncols=10, expected={3:"extensions:"})
1328 self._params.num_good_extends = _safe_int(x)
1329 else:
1330 x, = _get_cols(line, (-1,), ncols=5, expected={3:"extensions:"})
1331 self._params.num_good_extends = _safe_int(x)
1332
1338
1343
1349
1355
1360
1365
1370
1376
1382
1388
1394
1400
1404
1406 if line[:2] == "T:":
1407
1408 self._params.threshold, = _get_cols(
1409 line, (1,), ncols=2, expected={0:"T:"})
1410 elif line[:28] == "Neighboring words threshold:":
1411 self._params.threshold, = _get_cols(
1412 line, (3,), ncols=4, expected={0:"Neighboring", 1:"words", 2:"threshold:"})
1413 else:
1414 raise ValueError("Unrecognised threshold line:\n%s" % line)
1415 self._params.threshold = _safe_int(self._params.threshold)
1416
1418 if line[:2] == "A:":
1419 self._params.window_size, = _get_cols(
1420 line, (1,), ncols=2, expected={0:"A:"})
1421 elif line[:25] == "Window for multiple hits:":
1422 self._params.window_size, = _get_cols(
1423 line, (4,), ncols=5, expected={0:"Window", 2:"multiple", 3:"hits:"})
1424 else:
1425 raise ValueError("Unrecognised window size line:\n%s" % line)
1426 self._params.window_size = _safe_int(self._params.window_size)
1427
1433
1439
1445
1451
1457
1460
1461
1462 -class _BlastConsumer(AbstractConsumer,
1463 _HeaderConsumer,
1464 _DescriptionConsumer,
1465 _AlignmentConsumer,
1466 _HSPConsumer,
1467 _DatabaseReportConsumer,
1468 _ParametersConsumer
1469 ):
1470
1471
1472
1473
1474
1475
1476
1477
1478
1481
1483
1484 raise ValueError("This consumer doesn't handle PSI-BLAST data")
1485
1489
1493
1495 self.data.descriptions = self._descriptions
1496
1498 _AlignmentConsumer.end_alignment(self)
1499 if self._alignment.hsps:
1500 self.data.alignments.append(self._alignment)
1501 if self._multiple_alignment.alignment:
1502 self.data.multiple_alignment = self._multiple_alignment
1503
1505 _HSPConsumer.end_hsp(self)
1506 try:
1507 self._alignment.hsps.append(self._hsp)
1508 except AttributeError:
1509 raise ValueError("Found an HSP before an alignment")
1510
1514
1518
1519 -class _PSIBlastConsumer(AbstractConsumer,
1520 _HeaderConsumer,
1521 _DescriptionConsumer,
1522 _AlignmentConsumer,
1523 _HSPConsumer,
1524 _DatabaseReportConsumer,
1525 _ParametersConsumer
1526 ):
1529
1533
1537
1542
1544 _DescriptionConsumer.end_descriptions(self)
1545 self._round.number = self._roundnum
1546 if self._descriptions:
1547 self._round.new_seqs.extend(self._descriptions)
1548 self._round.reused_seqs.extend(self._model_sequences)
1549 self._round.new_seqs.extend(self._nonmodel_sequences)
1550 if self._converged:
1551 self.data.converged = 1
1552
1554 _AlignmentConsumer.end_alignment(self)
1555 if self._alignment.hsps:
1556 self._round.alignments.append(self._alignment)
1557 if self._multiple_alignment:
1558 self._round.multiple_alignment = self._multiple_alignment
1559
1561 _HSPConsumer.end_hsp(self)
1562 try:
1563 self._alignment.hsps.append(self._hsp)
1564 except AttributeError:
1565 raise ValueError("Found an HSP before an alignment")
1566
1570
1574
1576 """Iterates over a file of multiple BLAST results.
1577
1578 Methods:
1579 next Return the next record from the stream, or None.
1580
1581 """
1582 - def __init__(self, handle, parser=None):
1583 """__init__(self, handle, parser=None)
1584
1585 Create a new iterator. handle is a file-like object. parser
1586 is an optional Parser object to change the results into another form.
1587 If set to None, then the raw contents of the file will be returned.
1588
1589 """
1590 try:
1591 handle.readline
1592 except AttributeError:
1593 raise ValueError(
1594 "I expected a file handle or file-like object, got %s"
1595 % type(handle))
1596 self._uhandle = File.UndoHandle(handle)
1597 self._parser = parser
1598 self._header = []
1599
1601 """next(self) -> object
1602
1603 Return the next Blast record from the file. If no more records,
1604 return None.
1605
1606 """
1607 lines = []
1608 query = False
1609 while 1:
1610 line = self._uhandle.readline()
1611 if not line:
1612 break
1613
1614 if lines and (line.startswith('BLAST')
1615 or line.startswith('BLAST', 1)
1616 or line.startswith('<?xml ')):
1617 self._uhandle.saveline(line)
1618 break
1619
1620 if line.startswith("Query="):
1621 if not query:
1622 if not self._header:
1623 self._header = lines[:]
1624 query = True
1625 else:
1626
1627 self._uhandle.saveline(line)
1628 break
1629 lines.append(line)
1630
1631 if query and "BLAST" not in lines[0]:
1632
1633
1634
1635
1636
1637
1638 lines = self._header + lines
1639
1640 if not lines:
1641 return None
1642
1643 data = ''.join(lines)
1644 if self._parser is not None:
1645 return self._parser.parse(File.StringHandle(data))
1646 return data
1647
1649 return iter(self.next, None)
1650
1651 -def blastall(blastcmd, program, database, infile, align_view='7', **keywds):
1652 """Execute and retrieve data from standalone BLASTPALL as handles (OBSOLETE).
1653
1654 NOTE - This function is obsolete, you are encouraged to the command
1655 line wrapper Bio.Blast.Applications.BlastallCommandline instead.
1656
1657 Execute and retrieve data from blastall. blastcmd is the command
1658 used to launch the 'blastall' executable. program is the blast program
1659 to use, e.g. 'blastp', 'blastn', etc. database is the path to the database
1660 to search against. infile is the path to the file containing
1661 the sequence to search with.
1662
1663 The return values are two handles, for standard output and standard error.
1664
1665 You may pass more parameters to **keywds to change the behavior of
1666 the search. Otherwise, optional values will be chosen by blastall.
1667 The Blast output is by default in XML format. Use the align_view keyword
1668 for output in a different format.
1669
1670 Scoring
1671 matrix Matrix to use.
1672 gap_open Gap open penalty.
1673 gap_extend Gap extension penalty.
1674 nuc_match Nucleotide match reward. (BLASTN)
1675 nuc_mismatch Nucleotide mismatch penalty. (BLASTN)
1676 query_genetic_code Genetic code for Query.
1677 db_genetic_code Genetic code for database. (TBLAST[NX])
1678
1679 Algorithm
1680 gapped Whether to do a gapped alignment. T/F (not for TBLASTX)
1681 expectation Expectation value cutoff.
1682 wordsize Word size.
1683 strands Query strands to search against database.([T]BLAST[NX])
1684 keep_hits Number of best hits from a region to keep.
1685 xdrop Dropoff value (bits) for gapped alignments.
1686 hit_extend Threshold for extending hits.
1687 region_length Length of region used to judge hits.
1688 db_length Effective database length.
1689 search_length Effective length of search space.
1690
1691 Processing
1692 filter Filter query sequence for low complexity (with SEG)? T/F
1693 believe_query Believe the query defline. T/F
1694 restrict_gi Restrict search to these GI's.
1695 nprocessors Number of processors to use.
1696 oldengine Force use of old engine T/F
1697
1698 Formatting
1699 html Produce HTML output? T/F
1700 descriptions Number of one-line descriptions.
1701 alignments Number of alignments.
1702 align_view Alignment view. Integer 0-11,
1703 passed as a string or integer.
1704 show_gi Show GI's in deflines? T/F
1705 seqalign_file seqalign file to output.
1706 outfile Output file for report. Filename to write to, if
1707 ommitted standard output is used (which you can access
1708 from the returned handles).
1709 """
1710
1711 _security_check_parameters(keywds)
1712
1713 att2param = {
1714 'matrix' : '-M',
1715 'gap_open' : '-G',
1716 'gap_extend' : '-E',
1717 'nuc_match' : '-r',
1718 'nuc_mismatch' : '-q',
1719 'query_genetic_code' : '-Q',
1720 'db_genetic_code' : '-D',
1721
1722 'gapped' : '-g',
1723 'expectation' : '-e',
1724 'wordsize' : '-W',
1725 'strands' : '-S',
1726 'keep_hits' : '-K',
1727 'xdrop' : '-X',
1728 'hit_extend' : '-f',
1729 'region_length' : '-L',
1730 'db_length' : '-z',
1731 'search_length' : '-Y',
1732
1733 'program' : '-p',
1734 'database' : '-d',
1735 'infile' : '-i',
1736 'filter' : '-F',
1737 'believe_query' : '-J',
1738 'restrict_gi' : '-l',
1739 'nprocessors' : '-a',
1740 'oldengine' : '-V',
1741
1742 'html' : '-T',
1743 'descriptions' : '-v',
1744 'alignments' : '-b',
1745 'align_view' : '-m',
1746 'show_gi' : '-I',
1747 'seqalign_file' : '-O',
1748 'outfile' : '-o',
1749 }
1750 from Applications import BlastallCommandline
1751 cline = BlastallCommandline(blastcmd)
1752 cline.set_parameter(att2param['program'], program)
1753 cline.set_parameter(att2param['database'], database)
1754 cline.set_parameter(att2param['infile'], infile)
1755 cline.set_parameter(att2param['align_view'], str(align_view))
1756 for key, value in keywds.iteritems():
1757 cline.set_parameter(att2param[key], str(value))
1758 return _invoke_blast(cline)
1759
1760
1761 -def blastpgp(blastcmd, database, infile, align_view='7', **keywds):
1762 """Execute and retrieve data from standalone BLASTPGP as handles (OBSOLETE).
1763
1764 NOTE - This function is obsolete, you are encouraged to the command
1765 line wrapper Bio.Blast.Applications.BlastpgpCommandline instead.
1766
1767 Execute and retrieve data from blastpgp. blastcmd is the command
1768 used to launch the 'blastpgp' executable. database is the path to the
1769 database to search against. infile is the path to the file containing
1770 the sequence to search with.
1771
1772 The return values are two handles, for standard output and standard error.
1773
1774 You may pass more parameters to **keywds to change the behavior of
1775 the search. Otherwise, optional values will be chosen by blastpgp.
1776 The Blast output is by default in XML format. Use the align_view keyword
1777 for output in a different format.
1778
1779 Scoring
1780 matrix Matrix to use.
1781 gap_open Gap open penalty.
1782 gap_extend Gap extension penalty.
1783 window_size Multiple hits window size.
1784 npasses Number of passes.
1785 passes Hits/passes. Integer 0-2.
1786
1787 Algorithm
1788 gapped Whether to do a gapped alignment. T/F
1789 expectation Expectation value cutoff.
1790 wordsize Word size.
1791 keep_hits Number of beset hits from a region to keep.
1792 xdrop Dropoff value (bits) for gapped alignments.
1793 hit_extend Threshold for extending hits.
1794 region_length Length of region used to judge hits.
1795 db_length Effective database length.
1796 search_length Effective length of search space.
1797 nbits_gapping Number of bits to trigger gapping.
1798 pseudocounts Pseudocounts constants for multiple passes.
1799 xdrop_final X dropoff for final gapped alignment.
1800 xdrop_extension Dropoff for blast extensions.
1801 model_threshold E-value threshold to include in multipass model.
1802 required_start Start of required region in query.
1803 required_end End of required region in query.
1804
1805 Processing
1806 XXX should document default values
1807 program The blast program to use. (PHI-BLAST)
1808 filter Filter query sequence for low complexity (with SEG)? T/F
1809 believe_query Believe the query defline? T/F
1810 nprocessors Number of processors to use.
1811
1812 Formatting
1813 html Produce HTML output? T/F
1814 descriptions Number of one-line descriptions.
1815 alignments Number of alignments.
1816 align_view Alignment view. Integer 0-11,
1817 passed as a string or integer.
1818 show_gi Show GI's in deflines? T/F
1819 seqalign_file seqalign file to output.
1820 align_outfile Output file for alignment.
1821 checkpoint_outfile Output file for PSI-BLAST checkpointing.
1822 restart_infile Input file for PSI-BLAST restart.
1823 hit_infile Hit file for PHI-BLAST.
1824 matrix_outfile Output file for PSI-BLAST matrix in ASCII.
1825 align_outfile Output file for alignment. Filename to write to, if
1826 ommitted standard output is used (which you can access
1827 from the returned handles).
1828
1829 align_infile Input alignment file for PSI-BLAST restart.
1830
1831 """
1832
1833 _security_check_parameters(keywds)
1834
1835 att2param = {
1836 'matrix' : '-M',
1837 'gap_open' : '-G',
1838 'gap_extend' : '-E',
1839 'window_size' : '-A',
1840 'npasses' : '-j',
1841 'passes' : '-P',
1842
1843 'gapped' : '-g',
1844 'expectation' : '-e',
1845 'wordsize' : '-W',
1846 'keep_hits' : '-K',
1847 'xdrop' : '-X',
1848 'hit_extend' : '-f',
1849 'region_length' : '-L',
1850 'db_length' : '-Z',
1851 'search_length' : '-Y',
1852 'nbits_gapping' : '-N',
1853 'pseudocounts' : '-c',
1854 'xdrop_final' : '-Z',
1855 'xdrop_extension' : '-y',
1856 'model_threshold' : '-h',
1857 'required_start' : '-S',
1858 'required_end' : '-H',
1859
1860 'program' : '-p',
1861 'database' : '-d',
1862 'infile' : '-i',
1863 'filter' : '-F',
1864 'believe_query' : '-J',
1865 'nprocessors' : '-a',
1866
1867 'html' : '-T',
1868 'descriptions' : '-v',
1869 'alignments' : '-b',
1870 'align_view' : '-m',
1871 'show_gi' : '-I',
1872 'seqalign_file' : '-O',
1873 'align_outfile' : '-o',
1874 'checkpoint_outfile' : '-C',
1875 'restart_infile' : '-R',
1876 'hit_infile' : '-k',
1877 'matrix_outfile' : '-Q',
1878 'align_infile' : '-B',
1879 }
1880 from Applications import BlastpgpCommandline
1881 cline = BlastpgpCommandline(blastcmd)
1882 cline.set_parameter(att2param['database'], database)
1883 cline.set_parameter(att2param['infile'], infile)
1884 cline.set_parameter(att2param['align_view'], str(align_view))
1885 for key, value in keywds.iteritems():
1886 cline.set_parameter(att2param[key], str(value))
1887 return _invoke_blast(cline)
1888
1889
1890 -def rpsblast(blastcmd, database, infile, align_view="7", **keywds):
1891 """Execute and retrieve data from standalone RPS-BLAST as handles (OBSOLETE).
1892
1893 NOTE - This function is obsolete, you are encouraged to the command
1894 line wrapper Bio.Blast.Applications.RpsBlastCommandline instead.
1895
1896 Execute and retrieve data from standalone RPS-BLAST. blastcmd is the
1897 command used to launch the 'rpsblast' executable. database is the path
1898 to the database to search against. infile is the path to the file
1899 containing the sequence to search with.
1900
1901 The return values are two handles, for standard output and standard error.
1902
1903 You may pass more parameters to **keywds to change the behavior of
1904 the search. Otherwise, optional values will be chosen by rpsblast.
1905
1906 Please note that this function will give XML output by default, by
1907 setting align_view to seven (i.e. command line option -m 7).
1908 You should use the NCBIXML.parse() function to read the resulting output.
1909 This is because NCBIStandalone.BlastParser() does not understand the
1910 plain text output format from rpsblast.
1911
1912 WARNING - The following text and associated parameter handling has not
1913 received extensive testing. Please report any errors we might have made...
1914
1915 Algorithm/Scoring
1916 gapped Whether to do a gapped alignment. T/F
1917 multihit 0 for multiple hit (default), 1 for single hit
1918 expectation Expectation value cutoff.
1919 range_restriction Range restriction on query sequence (Format: start,stop) blastp only
1920 0 in 'start' refers to the beginning of the sequence
1921 0 in 'stop' refers to the end of the sequence
1922 Default = 0,0
1923 xdrop Dropoff value (bits) for gapped alignments.
1924 xdrop_final X dropoff for final gapped alignment (in bits).
1925 xdrop_extension Dropoff for blast extensions (in bits).
1926 search_length Effective length of search space.
1927 nbits_gapping Number of bits to trigger gapping.
1928 protein Query sequence is protein. T/F
1929 db_length Effective database length.
1930
1931 Processing
1932 filter Filter query sequence for low complexity? T/F
1933 case_filter Use lower case filtering of FASTA sequence T/F, default F
1934 believe_query Believe the query defline. T/F
1935 nprocessors Number of processors to use.
1936 logfile Name of log file to use, default rpsblast.log
1937
1938 Formatting
1939 html Produce HTML output? T/F
1940 descriptions Number of one-line descriptions.
1941 alignments Number of alignments.
1942 align_view Alignment view. Integer 0-11,
1943 passed as a string or integer.
1944 show_gi Show GI's in deflines? T/F
1945 seqalign_file seqalign file to output.
1946 align_outfile Output file for alignment. Filename to write to, if
1947 ommitted standard output is used (which you can access
1948 from the returned handles).
1949 """
1950
1951 _security_check_parameters(keywds)
1952
1953 att2param = {
1954 'multihit' : '-P',
1955 'gapped' : '-g',
1956 'expectation' : '-e',
1957 'range_restriction' : '-L',
1958 'xdrop' : '-X',
1959 'xdrop_final' : '-Z',
1960 'xdrop_extension' : '-y',
1961 'search_length' : '-Y',
1962 'nbits_gapping' : '-N',
1963 'protein' : '-p',
1964 'db_length' : '-z',
1965
1966 'database' : '-d',
1967 'infile' : '-i',
1968 'filter' : '-F',
1969 'case_filter' : '-U',
1970 'believe_query' : '-J',
1971 'nprocessors' : '-a',
1972 'logfile' : '-l',
1973
1974 'html' : '-T',
1975 'descriptions' : '-v',
1976 'alignments' : '-b',
1977 'align_view' : '-m',
1978 'show_gi' : '-I',
1979 'seqalign_file' : '-O',
1980 'align_outfile' : '-o',
1981 }
1982
1983 from Applications import RpsBlastCommandline
1984 cline = RpsBlastCommandline(blastcmd)
1985 cline.set_parameter(att2param['database'], database)
1986 cline.set_parameter(att2param['infile'], infile)
1987 cline.set_parameter(att2param['align_view'], str(align_view))
1988 for key, value in keywds.iteritems():
1989 cline.set_parameter(att2param[key], str(value))
1990 return _invoke_blast(cline)
1991
1992
1994 m = re.search(regex, line)
1995 if not m:
1996 raise ValueError(error_msg)
1997 return m.groups()
1998
1999 -def _get_cols(line, cols_to_get, ncols=None, expected={}):
2000 cols = line.split()
2001
2002
2003 if ncols is not None and len(cols) != ncols:
2004 raise ValueError("I expected %d columns (got %d) in line\n%s" \
2005 % (ncols, len(cols), line))
2006
2007
2008 for k in expected.keys():
2009 if cols[k] != expected[k]:
2010 raise ValueError("I expected '%s' in column %d in line\n%s" \
2011 % (expected[k], k, line))
2012
2013
2014 results = []
2015 for c in cols_to_get:
2016 results.append(cols[c])
2017 return tuple(results)
2018
2020 try:
2021 return int(str)
2022 except ValueError:
2023
2024
2025 str = str.replace(',', '')
2026 try:
2027
2028 return int(str)
2029 except ValueError:
2030 pass
2031
2032
2033 return long(float(str))
2034
2036
2037
2038
2039
2040
2041 if str and str[0] in ['E', 'e']:
2042 str = '1' + str
2043 try:
2044 return float(str)
2045 except ValueError:
2046
2047 str = str.replace(',', '')
2048
2049 return float(str)
2050
2051
2053 """Start BLAST and returns handles for stdout and stderr (PRIVATE).
2054
2055 Expects a command line wrapper object from Bio.Blast.Applications
2056 """
2057 import subprocess, sys
2058 blast_cmd = cline.program_name
2059 if not os.path.exists(blast_cmd):
2060 raise ValueError("BLAST executable does not exist at %s" % blast_cmd)
2061
2062
2063
2064
2065 blast_process = subprocess.Popen(str(cline),
2066 stdin=subprocess.PIPE,
2067 stdout=subprocess.PIPE,
2068 stderr=subprocess.PIPE,
2069 shell=(sys.platform!="win32"))
2070 blast_process.stdin.close()
2071 return blast_process.stdout, blast_process.stderr
2072
2073
2075 """Look for any attempt to insert a command into a parameter.
2076
2077 e.g. blastall(..., matrix='IDENTITY -F 0; rm -rf /etc/passwd')
2078
2079 Looks for ";" or "&&" in the strings (Unix and Windows syntax
2080 for appending a command line), or ">", "<" or "|" (redirection)
2081 and if any are found raises an exception.
2082 """
2083 for key, value in param_dict.iteritems():
2084 str_value = str(value)
2085 for bad_str in [";", "&&", ">", "<", "|"]:
2086 if bad_str in str_value:
2087 raise ValueError("Rejecting suspicious argument for %s" % key)
2088
2099
2101 """Attempt to catch and diagnose BLAST errors while parsing.
2102
2103 This utilizes the BlastParser module but adds an additional layer
2104 of complexity on top of it by attempting to diagnose ValueErrors
2105 that may actually indicate problems during BLAST parsing.
2106
2107 Current BLAST problems this detects are:
2108 o LowQualityBlastError - When BLASTing really low quality sequences
2109 (ie. some GenBank entries which are just short streches of a single
2110 nucleotide), BLAST will report an error with the sequence and be
2111 unable to search with this. This will lead to a badly formatted
2112 BLAST report that the parsers choke on. The parser will convert the
2113 ValueError to a LowQualityBlastError and attempt to provide useful
2114 information.
2115
2116 """
2117 - def __init__(self, bad_report_handle = None):
2118 """Initialize a parser that tries to catch BlastErrors.
2119
2120 Arguments:
2121 o bad_report_handle - An optional argument specifying a handle
2122 where bad reports should be sent. This would allow you to save
2123 all of the bad reports to a file, for instance. If no handle
2124 is specified, the bad reports will not be saved.
2125 """
2126 self._bad_report_handle = bad_report_handle
2127
2128
2129 self._scanner = _Scanner()
2130 self._consumer = _BlastErrorConsumer()
2131
2132 - def parse(self, handle):
2133 """Parse a handle, attempting to diagnose errors.
2134 """
2135 results = handle.read()
2136
2137 try:
2138 self._scanner.feed(File.StringHandle(results), self._consumer)
2139 except ValueError, msg:
2140
2141 if self._bad_report_handle:
2142
2143 self._bad_report_handle.write(results)
2144
2145
2146 self._diagnose_error(
2147 File.StringHandle(results), self._consumer.data)
2148
2149
2150
2151 raise
2152 return self._consumer.data
2153
2155 """Attempt to diagnose an error in the passed handle.
2156
2157 Arguments:
2158 o handle - The handle potentially containing the error
2159 o data_record - The data record partially created by the consumer.
2160 """
2161 line = handle.readline()
2162
2163 while line:
2164
2165
2166
2167 if line.startswith('Searchingdone'):
2168 raise LowQualityBlastError("Blast failure occured on query: ",
2169 data_record.query)
2170 line = handle.readline()
2171