1
2
3
4
5
6
7 """Definitions for interacting with BLAST related applications.
8
9 Obsolete wrappers for the old/classic NCBI BLAST tools (written in C):
10
11 - FastacmdCommandline
12 - BlastallCommandline
13 - BlastpgpCommandline
14 - RpsBlastCommandline
15
16 Wrappers for the new NCBI BLAST+ tools (written in C++):
17
18 - NcbiblastpCommandline - Protein-Protein BLAST
19 - NcbiblastnCommandline - Nucleotide-Nucleotide BLAST
20 - NcbiblastxCommandline - Translated Query-Protein Subject BLAST
21 - NcbitblastnCommandline - Protein Query-Translated Subject BLAST
22 - NcbitblastxCommandline - Translated Query-Protein Subject BLAST
23 - NcbipsiblastCommandline - Position-Specific Initiated BLAST
24 - NcbirpsblastCommandline - Reverse Position Specific BLAST
25 - NcbirpstblastnCommandline - Translated Reverse Position Specific BLAST
26
27 """
28 from Bio.Application import _Option, AbstractCommandline, _Switch
29
31 """Create a commandline for the fasta program from NCBI (OBSOLETE).
32
33 """
34 - def __init__(self, cmd="fastacmd", **kwargs):
35 self.parameters = \
36 [
37 _Option(["-d", "database"], ["input"], None, 1,
38 "The database to retrieve from."),
39 _Option(["-s", "search_string"], ["input"], None, 1,
40 "The id to search for.")
41 ]
42 AbstractCommandline.__init__(self, cmd, **kwargs)
43
44
46 """Base Commandline object for (classic) NCBI BLAST wrappers (PRIVATE).
47
48 This is provided for subclassing, it deals with shared options
49 common to all the BLAST tools (blastall, rpsblast, blastpgp).
50 """
52 assert cmd is not None
53 extra_parameters = [\
54 _Switch(["--help", "help"], ["input"],
55 "Print USAGE, DESCRIPTION and ARGUMENTS description; ignore other arguments."),
56 _Option(["-d", "database"], ["input"], None, 1,
57 "The database to BLAST against.", False),
58 _Option(["-i", "infile"], ["input", "file"], None, 1,
59 "The sequence to search with.", False),
60 _Option(["-e", "expectation"], ["input"], None, 0,
61 "Expectation value cutoff.", False),
62 _Option(["-m", "align_view"], ["input"], None, 0,
63 "Alignment view. Integer 0-11. Use 7 for XML output.",
64 False),
65 _Option(["-o", "align_outfile", "outfile"], ["output", "file"], None, 0,
66 "Output file for alignment.", False),
67 _Option(["-y", "xdrop_extension"], ["input"], None, 0,
68 "Dropoff for blast extensions.", False),
69 _Option(["-F", "filter"], ["input"], None, 0,
70 "Filter query sequence with SEG? T/F", False),
71 _Option(["-X", "xdrop"], ["input"], None, 0,
72 "Dropoff value (bits) for gapped alignments."),
73 _Option(["-I", "show_gi"], ["input"], None, 0,
74 "Show GI's in deflines? T/F", False),
75 _Option(["-J", "believe_query"], ["input"], None, 0,
76 "Believe the query defline? T/F", False),
77 _Option(["-Z", "xdrop_final"], ["input"], None, 0,
78 "X dropoff for final gapped alignment.", False),
79 _Option(["-z", "db_length"], ["input"], None, 0,
80 "Effective database length.", False),
81 _Option(["-O", "seqalign_file"], ["output", "file"], None, 0,
82 "seqalign file to output.", False),
83 _Option(["-v", "descriptions"], ["input"], None, 0,
84 "Number of one-line descriptions.", False),
85 _Option(["-b", "alignments"], ["input"], None, 0,
86 "Number of alignments.", False),
87 _Option(["-Y", "search_length"], ["input"], None, 0,
88 "Effective length of search space (use zero for the " + \
89 "real size).", False),
90 _Option(["-T", "html"], ["input"], None, 0,
91 "Produce HTML output? T/F", False),
92 _Option(["-U", "case_filter"], ["input"], None, 0,
93 "Use lower case filtering of FASTA sequence? T/F", False),
94
95 _Option(["-a", "nprocessors"], ["input"], None, 0,
96 "Number of processors to use.", False),
97 _Option(["-g", "gapped"], ["input"], None, 0,
98 "Whether to do a gapped alignment. T/F", False),
99 ]
100 try:
101
102
103 self.parameters = extra_parameters + self.parameters
104 except AttributeError:
105
106 self.parameters = extra_parameters
107 AbstractCommandline.__init__(self, cmd, **kwargs)
108
114
115
117 """Base Commandline object for NCBI BLAST wrappers (PRIVATE).
118
119 This is provided for subclassing, it deals with shared options
120 common to all the blastall and blastpgp tools (but not rpsblast).
121 """
122 - def __init__(self, cmd=None, **kwargs):
123 assert cmd is not None
124 extra_parameters = [\
125 _Option(["-G", "gap_open"], ["input"], None, 0,
126 "Gap open penalty", False),
127 _Option(["-E", "gap_extend"], ["input"], None, 0,
128 "Gap extension penalty", False),
129 _Option(["-A", "window_size"], ["input"], None, 0,
130 "Multiple hits window size", False),
131 _Option(["-f", "hit_extend"], ["input"], None, 0,
132 "Threshold for extending hits.", False),
133 _Option(["-K", "keep_hits"], ["input"], None, 0,
134 " Number of best hits from a region to keep.", False),
135 _Option(["-W", "wordsize"], ["input"], None, 0,
136 "Word size", False),
137 _Option(["-P", "passes"], ["input"], None, 0,
138 "Hits/passes. Integer 0-2. 0 for multiple hit, "
139 "1 for single hit (does not apply to blastn)", False),
140 ]
141 try:
142
143
144 self.parameters = extra_parameters + self.parameters
145 except AttributeError:
146
147 self.parameters = extra_parameters
148 _BlastCommandLine.__init__(self, cmd, **kwargs)
149
150
152 """Create a commandline for the blastall program from NCBI (OBSOLETE).
153
154 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
155 are replacing blastall with separate tools blastn, blastp, blastx, tblastn
156 and tblastx.
157
158 Like blastall, this wrapper is now obsolete, and will be deprecated and
159 removed in a future release of Biopython.
160
161 >>> from Bio.Blast.Applications import BlastallCommandline
162 >>> cline = BlastallCommandline(program="blastx", infile="m_cold.fasta",
163 ... database="nr", expectation=0.001)
164 >>> cline
165 BlastallCommandline(cmd='blastall', database='nr', infile='m_cold.fasta', expectation=0.001, program='blastx')
166 >>> print cline
167 blastall -d nr -i m_cold.fasta -e 0.001 -p blastx
168
169 You would typically run the command line with the Python subprocess module,
170 as described in the Biopython tutorial.
171 """
172
173 - def __init__(self, cmd="blastall",**kwargs):
174 self.parameters = [ \
175
176
177
178 _Option(["-p", "program"], ["input"], None, 1,
179 "The blast program to use (e.g. blastp, blastn).", False),
180 _Option(["-q", "nuc_mismatch"], ["input"], None, 0,
181 "Penalty for a nucleotide mismatch (blastn only).", False),
182 _Option(["-r", "nuc_match"], ["input"], None, 0,
183 "Reward for a nucleotide match (blastn only).", False),
184 _Option(["-Q", "query_genetic_code"], ["input"], None, 0,
185 "Query Genetic code to use.", False),
186 _Option(["-D", "db_genetic_code"], ["input"], None, 0,
187 "DB Genetic code (for tblast[nx] only).", False),
188 _Option(["-M", "matrix"], ["input"], None, 0,
189 "Matrix to use", False),
190 _Option(["-S", "strands"], ["input"], None, 0,
191 "Query strands to search against database (for blast[nx], " + \
192 "and tblastx). 3 is both, 1 is top, 2 is bottom.", False),
193 _Option(["-l", "restrict_gi"], ["input"], None, 0,
194 "Restrict search of database to list of GI's.", False),
195 _Option(["-R"], ["input", "file"], None, 0,
196 "PSI-TBLASTN checkpoint input file.", False),
197 _Option(["-n", "megablast"], ["input"], None, 0,
198 "MegaBlast search T/F.", False),
199
200
201 _Option(["-L", "region_length", "range_restriction"], ["input"],
202 None, 0,
203 """Location on query sequence (string format start,end).
204
205 In older versions of BLAST, -L set the length of region
206 used to judge hits (see -K parameter).""", False),
207 _Option(["-w"], ["input"], None, 0,
208 "Frame shift penalty (OOF algorithm for blastx).", False),
209 _Option(["-t"], ["input"], None, 0,
210 "Length of the largest intron allowed in a translated " + \
211 "nucleotide sequence when linking multiple distinct " + \
212 "alignments. (0 invokes default behavior; a negative value " + \
213 "disables linking.)", False),
214 _Option(["-B"], ["input"], None, 0,
215 "Number of concatenated queries, for blastn and tblastn.",
216 False),
217 _Option(["-V", "oldengine"], ["input"], None, 0,
218 "Force use of the legacy BLAST engine.", False),
219 _Option(["-C"], ["input"], None, 0,
220 """Use composition-based statistics for tblastn:
221 D or d: default (equivalent to F)
222 0 or F or f: no composition-based statistics
223 1 or T or t: Composition-based statistics as in NAR 29:2994-3005, 2001
224 2: Composition-based score adjustment as in Bioinformatics
225 21:902-911, 2005, conditioned on sequence properties
226 3: Composition-based score adjustment as in Bioinformatics
227 21:902-911, 2005, unconditionally
228 For programs other than tblastn, must either be absent or be
229 D, F or 0.""", False),
230 _Option(["-s"], ["input"], None, 0,
231 "Compute locally optimal Smith-Waterman alignments (This " + \
232 "option is only available for gapped tblastn.) T/F", False),
233 ]
234 _BlastAllOrPgpCommandLine.__init__(self, cmd, **kwargs)
235
236
238 """Create a commandline for the blastpgp program from NCBI (OBSOLETE).
239
240 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
241 are replacing blastpgp with a renamed tool psiblast. This module provides
242 NcbipsiblastCommandline as a wrapper for the new tool psiblast.
243
244 Like blastpgp (and blastall), this wrapper is now obsolete, and will be
245 deprecated and removed in a future release of Biopython.
246
247 >>> from Bio.Blast.Applications import BlastpgpCommandline
248 >>> cline = BlastpgpCommandline(help=True)
249 >>> cline
250 BlastpgpCommandline(cmd='blastpgp', help=True)
251 >>> print cline
252 blastpgp --help
253
254 You would typically run the command line with the Python subprocess module,
255 as described in the Biopython tutorial.
256 """
257 - def __init__(self, cmd="blastpgp",**kwargs):
258 self.parameters = [ \
259 _Option(["-C", "checkpoint_outfile"], ["output", "file"], None, 0,
260 "Output file for PSI-BLAST checkpointing.", False),
261 _Option(["-R", "restart_infile"], ["input", "file"], None, 0,
262 "Input file for PSI-BLAST restart.", False),
263 _Option(["-k", "hit_infile"], ["input", "file"], None, 0,
264 "Hit file for PHI-BLAST.", False),
265 _Option(["-Q", "matrix_outfile"], ["output", "file"], None, 0,
266 "Output file for PSI-BLAST matrix in ASCII.", False),
267 _Option(["-B", "align_infile"], ["input", "file"], None, 0,
268 "Input alignment file for PSI-BLAST restart.", False),
269 _Option(["-S", "required_start"], ["input"], None, 0,
270 "Start of required region in query.", False),
271 _Option(["-H", "required_end"], ["input"], None, 0,
272 "End of required region in query.", False),
273 _Option(["-j", "npasses"], ["input"], None, 0,
274 "Number of passes", False),
275 _Option(["-N", "nbits_gapping"], ["input"], None, 0,
276 "Number of bits to trigger gapping.", False),
277 _Option(["-c", "pseudocounts"], ["input"], None, 0,
278 "Pseudocounts constants for multiple passes.", False),
279 _Option(["-h", "model_threshold"], ["input"], None, 0,
280 "E-value threshold to include in multipass model.", False),
281
282 _Option(["-L", "region_length"], ["input"], None, 0,
283 "Cost to decline alignment (disabled when zero).", False),
284 _Option(["-M", "matrix"], ["input"], None, 0,
285 "Matrix (string, default BLOSUM62).", False),
286 _Option(["-p", "program"], ["input"], None, 1,
287 "The blast program to use (e.g blastpgp, patseedp or seedp).", False),
288 ]
289 _BlastAllOrPgpCommandLine.__init__(self, cmd, **kwargs)
290
291
293 """Create a commandline for the classic rpsblast program from NCBI (OBSOLETE).
294
295 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
296 are replacing the old rpsblast with a new version of the same name plus a
297 second tool rpstblastn, both taking different command line arguments. This
298 module provides NcbirpsblastCommandline and NcbirpsblasntCommandline as
299 wrappers for the new tools.
300
301 Like the old rpsblast (and blastall), this wrapper is now obsolete, and will
302 be deprecated and removed in a future release of Biopython.
303
304 >>> from Bio.Blast.Applications import RpsBlastCommandline
305 >>> cline = RpsBlastCommandline(help=True)
306 >>> cline
307 RpsBlastCommandline(cmd='rpsblast', help=True)
308 >>> print cline
309 rpsblast --help
310
311 You would typically run the command line with the Python subprocess module,
312 as described in the Biopython tutorial.
313 """
314 - def __init__(self, cmd="rpsblast",**kwargs):
315 self.parameters = [ \
316
317 _Option(["-N", "nbits_gapping"], ["input"], None, 0,
318 "Number of bits to trigger gapping.", False),
319
320
321 _Option(["-P", "multihit"], ["input"], None, 0,
322 "0 for multiple hit, 1 for single hit", False),
323 _Option(["-l", "logfile"], ["output", "file"], None, 0,
324 "Logfile name.", False),
325 _Option(["-p", "protein"], ["input"], None, 0,
326 "Query sequence is protein. T/F", False),
327 _Option(["-L", "range_restriction"], ["input"], None, 0,
328 "Location on query sequence (string format start,end).",
329 False),
330 ]
331 _BlastCommandLine.__init__(self, cmd, **kwargs)
332
333
335 """Base Commandline object for (classic) NCBI BLAST wrappers (PRIVATE).
336
337 This is provided for subclassing, it deals with shared options
338 common to all the BLAST tools (blastn, rpsblast, rpsblast, etc).
339 """
340 - def __init__(self, cmd=None, **kwargs):
341 assert cmd is not None
342 extra_parameters = [ \
343
344 _Switch(["-h", "h"], ["input"],
345 "Print USAGE and DESCRIPTION; ignore other arguments."),
346 _Switch(["-help", "help"], ["input"],
347 "Print USAGE, DESCRIPTION and ARGUMENTS description; ignore other arguments."),
348 _Switch(["-version", "version"], ["input"],
349 "Print version number; ignore other arguments."),
350
351 _Option(["-query", "query"], ["input", "file"], None, 0,
352 "The sequence to search with.", False),
353 _Option(["-query_loc", "query_loc"], ["input"], None, 0,
354 "Location on the query sequence (Format: start-stop)", False),
355
356 _Option(["-db", "db"], ["input"], None, 0,
357 "The database to BLAST against.", False),
358 _Option(["-out", "out"], ["output", "file"], None, 0,
359 "Output file for alignment.", False),
360 _Option(["-evalue", "evalue"], ["input"], None, 0,
361 "Expectation value cutoff.", False),
362 _Option(["-word_size","word_size"], ["input"], None, 0,
363 """Word size for wordfinder algorithm.
364
365 Integer. Minimum 2.""", False),
366
367
368
369 _Option(["-outfmt", "outfmt"], ["input"], None, 0,
370 "Alignment view. Integer 0-10. Use 5 for XML output (differs from classic BLAST which used 7 for XML).",
371 False),
372 _Switch(["-show_gis","show_gis"], ["input"],
373 "Show NCBI GIs in deflines?"),
374 _Option(["-num_descriptions","num_descriptions"], ["input"], None, 0,
375 """Number of database sequences to show one-line descriptions for.
376
377 Integer argument (at least zero). Default is 500.
378 See also num_alignments.""", False),
379 _Option(["-num_alignments","num_alignments"], ["input"], None, 0,
380 """Number of database sequences to show num_alignments for.
381
382 Integer argument (at least zero). Default is 200.
383 See also num_alignments.""", False),
384 _Switch(["-html", "html"], ["input"],
385 "Produce HTML output? See also the outfmt option."),
386
387
388
389
390 _Switch(["-lcase_masking", "lcase_masking"], ["input"],
391 "Use lower case filtering in query and subject sequence(s)?"),
392
393 _Option(["-gilist", "gilist"], ["input", "file"], None, 0,
394 """Restrict search of database to list of GI's.
395
396 Incompatible with: negative_gilist, remote, subject, subject_loc""",
397 False),
398 _Option(["-negative_gilist", "negative_gilist"], ["input", "file"], None, 0,
399 """Restrict search of database to everything except the listed GIs.
400
401 Incompatible with: gilist, remote, subject, subject_loc""",
402 False),
403 _Option(["-entrez_query", "entrez_query"], ["input"], None, 0,
404 "Restrict search with the given Entrez query (requires remote).", False),
405 _Option(["-max_target_seqs", "max_target_seqs"], ["input"], None, 0,
406 """Maximum number of aligned sequences to keep.
407
408 Integer argument (at least one).""", False),
409
410 _Option(["-dbsize", "dbsize"], ["input"], None, 0,
411 "Effective length of the database (integer)", False),
412 _Option(["-searchsp", "searchsp"], ["input"], None, 0,
413 "Effective length of the search space (integer)", False),
414
415 _Option(["-xdrop_ungap", "xdrop_ungap"], ["input"], None, 0,
416 "X-dropoff value (in bits) for ungapped extensions. Float.",
417 False),
418 _Option(["-xdrop_gap", "xdrop_gap"], ["input"], None, 0,
419 "X-dropoff value (in bits) for preliminary gapped extensions. Float.",
420 False),
421 _Option(["-xdrop_gap_final", "xdrop_gap_final"], ["input"], None, 0,
422 "X-dropoff value (in bits) for final gapped alignment. Float.",
423 False),
424 _Option(["-window_size", "window_size"], ["input"], None, 0,
425 "Multiple hits window size, use 0 to specify 1-hit algorithm. Integer.",
426 False),
427
428 _Option(["-import_search_strategy", "import_search_strategy"],
429 ["input", "file"], None, 0,
430 """Search strategy to use.
431
432 Incompatible with: export_search_strategy""", False),
433 _Option(["-export_search_strategy", "export_search_strategy"],
434 ["output", "file"], None, 0,
435 """File name to record the search strategy used.
436
437 Incompatible with: import_search_strategy""", False),
438
439 _Switch(["-parse_deflines", "parse_deflines"], ["input"],
440 "Should the query and subject defline(s) be parsed?"),
441 _Option(["-num_threads", "num_threads"], ["input"], None, 0,
442 """Number of threads to use in the BLAST search.
443
444 Integer of at least one. Default is one.
445 Incompatible with: remote""", False),
446 _Switch(["-remote", "remote"], ["input"],
447 """Execute search remotely?
448
449 Incompatible with: gilist, negative_gilist, subject_loc, num_threads, ..."""),
450 ]
451 try:
452
453
454 self.parameters = extra_parameters + self.parameters
455 except AttributeError:
456
457 self.parameters = extra_parameters
458 AbstractCommandline.__init__(self, cmd, **kwargs)
459
461 incompatibles = {"remote":["gilist", "negative_gilist", "num_threads"],
462 "import_search_strategy" : ["export_search_strategy"],
463 "gilist":["negative_gilist"]}
464 for a in incompatibles:
465 if self._get_parameter(a):
466 for b in incompatibles[a]:
467 if self._get_parameter(b):
468 raise ValueError("Options %s and %s are incompatible." \
469 % (a,b))
470 if self.entrez_query and not self.remote :
471 raise ValueError("Option entrez_query requires remote option.")
472 AbstractCommandline._validate(self)
473
475 """Base Commandline object for (classic) NCBI BLAST wrappers (PRIVATE).
476
477 This is provided for subclassing, it deals with shared options
478 common to all the BLAST tools supporting two-sequence BLAST
479 (blastn, psiblast, etc) but not rpsblast or rpstblastn.
480 """
481 - def __init__(self, cmd=None, **kwargs):
482 assert cmd is not None
483 extra_parameters = [ \
484
485 _Option(["-gapopen", "gapopen"], ["input"], None, 0,
486 "Cost to open a gap (integer).", False),
487 _Option(["-gapextend", "gapextend"], ["input"], None, 0,
488 "Cost to extend a gap (integer).", False),
489
490 _Option(["-subject", "subject"], ["input", "file"], None, 0,
491 """Subject sequence(s) to search.
492
493 Incompatible with: db, gilist, negative_gilist.
494 See also subject_loc.""", False),
495 _Option(["-subject_loc", "subject_loc"], ["input"], None, 0,
496 """Location on the subject sequence (Format: start-stop)
497
498 Incompatible with: db, gilist, negative_gilist, remote.
499 See also subject.""", False),
500
501 _Option(["-culling_limit", "culling_limit"], ["input"], None, 0,
502 """Hit culling limit (integer).
503
504 If the query range of a hit is enveloped by that of at least this many
505 higher-scoring hits, delete the hit.
506
507 Incompatible with: best_hit_overhang, best_hit_score_edge.""", False),
508 _Option(["-best_hit_overhang", "best_hit_overhang"], ["input"], None, 0,
509 """Best Hit algorithm overhang value (recommended value: 0.1)
510
511 Float between 0.0 and 0.5 inclusive.
512
513 Incompatible with: culling_limit.""", False),
514 _Option(["-best_hit_score_edge", "best_hit_score_edge"], ["input"], None, 0,
515 """Best Hit algorithm score edge value (recommended value: 0.1)
516
517 Float between 0.0 and 0.5 inclusive.
518
519 Incompatible with: culling_limit.""", False), ]
520 try:
521
522
523 self.parameters = extra_parameters + self.parameters
524 except AttributeError:
525
526 self.parameters = extra_parameters
527 _NcbiblastCommandline.__init__(self, cmd, **kwargs)
528
529
531 incompatibles = {"subject_loc":["db, gilist, negative_gilist, remote"],
532 "culling_limit":["best_hit_overhang","best_hit_score_edge"],
533 "subject":["db", "gilist", "negative_gilist"]}
534 for a in incompatibles:
535 if self._get_parameter(a):
536 for b in incompatibles[a]:
537 if self._get_parameter(b):
538 raise ValueError("Options %s and %s are incompatible." \
539 % (a,b))
540 _NcbiblastCommandline._validate(self)
541
543 """Create a commandline for the NCBI BLAST+ program blastp (for proteins).
544
545 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
546 replaced the old blastall tool with separate tools for each of the searches.
547 This wrapper therefore replaces BlastallCommandline with option -p blastp.
548
549 >>> from Bio.Blast.Applications import NcbiblastpCommandline
550 >>> cline = NcbiblastpCommandline(query="rosemary.pro", db="nr",
551 ... evalue=0.001, remote=True, ungapped=True)
552 >>> cline
553 NcbiblastpCommandline(cmd='blastp', query='rosemary.pro', db='nr', evalue=0.001, remote=True, ungapped=True)
554 >>> print cline
555 blastp -query rosemary.pro -db nr -evalue 0.001 -remote -ungapped
556
557 You would typically run the command line with the Python subprocess module,
558 as described in the Biopython tutorial.
559 """
560 - def __init__(self, cmd="blastp", **kwargs):
561 self.parameters = [ \
562
563 _Option(["-task", "task"], ["input"],
564 lambda value : value in ["blastp", "blastp-short"], 0,
565 "Task to execute (string, blastp (default) or blastp-short).", False),
566 _Option(["-matrix", "matrix"], ["input"], None, 0,
567 "Scoring matrix name (default BLOSUM62).", False),
568 _Option(["-threshold", "threshold"], ["input"], None, 0,
569 "Minimum word score such that the word is added to the BLAST lookup table (float)", False),
570 _Option(["-comp_based_stats", "comp_based_stats"], ["input"],
571 lambda value : value in "0Ft2TtDd", 0,
572 """Use composition-based statistics (string, default 2, i.e. True).
573
574 0, F or f: no composition-based statistics
575 2, T or t, D or d : Composition-based score adjustment as in
576 Bioinformatics 21:902-911, 2005, conditioned on sequence properties
577
578 Note that tblastn also supports values of 1 and 3.""", False),
579
580 _Option(["-seg", "seg"], ["input"], None, 0,
581 """Filter query sequence with SEG (string).
582
583 Format: "yes", "window locut hicut", or "no" to disable.
584 Default is "12 2.2 2.5""", False),
585
586 _Option(["-db_soft_mask", "db_soft_mask"], ["input"], None, 0,
587 """Filtering algorithm for soft masking (integer).
588
589 Filtering algorithm ID to apply to the BLAST database as soft masking.
590
591 Incompatible with: subject, subject_loc""", False),
592
593 _Switch(["-ungapped", "ungapped"], ["input"],
594 "Perform ungapped alignment only?"),
595
596 _Switch(["-use_sw_tback", "use_sw_tback"], ["input"],
597 "Compute locally optimal Smith-Waterman alignments?"),
598 ]
599 _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs)
600
610
611
613 """Wrapper for the NCBI BLAST+ program blastn (for nucleotides).
614
615 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
616 replaced the old blastall tool with separate tools for each of the searches.
617 This wrapper therefore replaces BlastallCommandline with option -p blastn.
618
619 For example, to run a search against the "nt" nucleotide database using the
620 FASTA nucleotide file "m_code.fasta" as the query, with an expectation value
621 cut off of 0.001, saving the output to a file in XML format:
622
623 >>> from Bio.Blast.Applications import NcbiblastnCommandline
624 >>> cline = NcbiblastnCommandline(query="m_cold.fasta", db="nt", strand="plus",
625 ... evalue=0.001, out="m_cold.xml", outfmt=5)
626 >>> cline
627 NcbiblastnCommandline(cmd='blastn', query='m_cold.fasta', db='nt', out='m_cold.xml', evalue=0.001, outfmt=5, strand='plus')
628 >>> print cline
629 blastn -query m_cold.fasta -db nt -out m_cold.xml -evalue 0.001 -outfmt 5 -strand plus
630
631 You would typically run the command line with the Python subprocess module,
632 as described in the Biopython tutorial.
633 """
634 - def __init__(self, cmd="blastn", **kwargs):
635 self.parameters = [ \
636
637 _Option(["-strand", "strand"], ["input"],
638 lambda value : value in ["both", "minus", "plus"],0,
639 """Query strand(s) to search against database/subject.
640
641 Values allowed are "both" (default), "minus", "plus".""", False),
642
643 _Option(["-task", "task"], ["input"],
644 lambda value : value in ['blastn', 'blastn-short', 'dc-megablast',
645 'megablast', 'vecscreen'], 0,
646 """Task to execute (string, default 'megablast')
647
648 Allowed values 'blastn', 'blastn-short', 'dc-megablast', 'megablast'
649 (the default), or 'vecscreen'.""", False),
650 _Option(["-penalty", "penalty"], ["input"], None, 0,
651 "Penalty for a nucleotide mismatch (integer, at most zero).", False),
652 _Option(["-reward", "reward"], ["input"], None, 0,
653 "Reward for a nucleotide match (integer, at least zero).", False),
654
655
656
657 _Option(["-index_name", "index_name"], ["input"], None, 0,
658 "MegaBLAST database index name.", False),
659
660 _Option(["-dust", "dust"], ["input"], None, 0,
661 """Filter query sequence with DUST (string).
662
663 Format: 'yes', 'level window linker', or 'no' to disable.
664 Default = '20 64 1'.
665 """, False),
666 _Option(["-filtering_db", "filtering_db"], ["input"], None, 0,
667 "BLAST database containing filtering elements (i.e. repeats).", False),
668 _Option(["-window_masker_taxid", "window_masker_taxid"], ["input"], None, 0,
669 "Enable WindowMasker filtering using a Taxonomic ID (integer).", False),
670 _Option(["-window_masker_db", "window_masker_db"], ["input"], None, 0,
671 "Enable WindowMasker filtering using this repeats database (string).", False),
672
673 _Option(["-db_soft_mask", "db_soft_mask"], ["input"], None, 0,
674 """Filtering algorithm for soft masking (integer).
675
676 Filtering algorithm ID to apply to the BLAST database as soft masking.
677
678 Incompatible with: subject, subject_loc""", False),
679 _Option(["-perc_identity", "perc_identity"], ["input"], None, 0,
680 "Percent identity (real, 0 to 100 inclusive).", False),
681
682 _Option(["-template_type", "template_type"], ["input"],
683 lambda value : value in ['coding', 'coding_and_optimal','optimal'], 0,
684 """Discontiguous MegaBLAST template type (string).
685
686 Allowed values: 'coding', 'coding_and_optimal' or 'optimal'
687 Requires: template_length.""", False),
688 _Option(["-template_length", "template_length"], ["input"],
689 lambda value : value in [16,18,21,'16','18','21'], 0,
690 """Discontiguous MegaBLAST template length (integer).
691
692 Allowed values: 16, 18, 21
693
694 Requires: template_type.""", False),
695
696 _Switch(["-no_greedy", "no_greedy"], ["input"],
697 "Use non-greedy dynamic programming extension"),
698 _Option(["-min_raw_gapped_score", "min_raw_gapped_score"], ["input"], None, 0,
699 "Minimum raw gapped score to keep an alignment in the preliminary gapped and traceback stages (integer).", False),
700 _Switch(["-ungapped", "ungapped"], ["input"],
701 "Perform ungapped alignment only?"),
702 ]
703 _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs)
704
706 incompatibles = {"db_soft_mask":["subject", "subject_loc"]}
707 for a in incompatibles:
708 if self._get_parameter(a):
709 for b in incompatibles[a]:
710 if self._get_parameter(b):
711 raise ValueError("Options %s and %s are incompatible." \
712 % (a,b))
713 if (self.template_type and not self.template_length) \
714 or (self.template_length and not self.template_type) :
715 raise ValueError("Options template_type and template_type require each other.")
716 _Ncbiblast2SeqCommandline._validate(self)
717
718
720 """Wrapper for the NCBI BLAST+ program blastx (nucleotide query, protein database).
721
722 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
723 replaced the old blastall tool with separate tools for each of the searches.
724 This wrapper therefore replaces BlastallCommandline with option -p blastx.
725
726 >>> from Bio.Blast.Applications import NcbiblastxCommandline
727 >>> cline = NcbiblastxCommandline(query="m_cold.fasta", db="nr", evalue=0.001)
728 >>> cline
729 NcbiblastxCommandline(cmd='blastx', query='m_cold.fasta', db='nr', evalue=0.001)
730 >>> print cline
731 blastx -query m_cold.fasta -db nr -evalue 0.001
732
733 You would typically run the command line with the Python subprocess module,
734 as described in the Biopython tutorial.
735 """
736 - def __init__(self, cmd="blastx", **kwargs):
737 self.parameters = [ \
738
739 _Option(["-strand", "strand"], ["input"],
740 lambda value : value in ["both", "minus", "plus"],0,
741 """Query strand(s) to search against database/subject.
742
743 Values allowed are "both" (default), "minus", "plus".""", False),
744
745 _Option(["-query_gencode", "query_gencode"], ["input"], None, 0,
746 """Genetic code to use to translate query
747
748 Integer. Default is one.""", False),
749
750 _Option(["-frame_shift_penalty", "frame_shift_penalty"], ["input"], None, 0,
751 "Frame shift penalty (integer, at least 1, default ignored).", False),
752 _Option(["-max_intron_length", "max_intron_length"], ["input"], None, 0,
753 """Maximum intron length (integer).
754
755 Length of the largest intron allowed in a translated nucleotide
756 sequence when linking multiple distinct alignments (a negative
757 value disables linking). Default zero.""", False),
758 _Option(["-matrix", "matrix"], ["input"], None, 0,
759 "Scoring matrix name (default BLOSUM62).", False),
760 _Option(["-threshold", "threshold"], ["input"], None, 0,
761 "Minimum word score such that the word is added to the BLAST lookup table (float)", False),
762
763 _Option(["-seg", "seg"], ["input"], None, 0,
764 """Filter query sequence with SEG (string).
765
766 Format: "yes", "window locut hicut", or "no" to disable.
767 Default is "12 2.2 2.5""", False),
768
769 _Option(["-db_soft_mask", "db_soft_mask"], ["input"], None, 0,
770 """Filtering algorithm for soft masking (integer).
771
772 Filtering algorithm ID to apply to the BLAST database as soft masking.
773
774 Incompatible with: subject, subject_loc""", False),
775
776 _Switch(["-ungapped", "ungapped"], ["input"],
777 "Perform ungapped alignment only?"),
778 ]
779 _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs)
780
790
791
793 """Wrapper for the NCBI BLAST+ program tblastn.
794
795 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
796 replaced the old blastall tool with separate tools for each of the searches.
797 This wrapper therefore replaces BlastallCommandline with option -p tblastn.
798
799 >>> from Bio.Blast.Applications import NcbitblastnCommandline
800 >>> cline = NcbitblastnCommandline(help=True)
801 >>> cline
802 NcbitblastnCommandline(cmd='tblastn', help=True)
803 >>> print cline
804 tblastn -help
805
806 You would typically run the command line with the Python subprocess module,
807 as described in the Biopython tutorial.
808 """
809 - def __init__(self, cmd="tblastn", **kwargs):
810 self.parameters = [ \
811
812 _Option(["-db_gencode", "db_gencode"], ["input"], None, 0,
813 """Genetic code to use to translate query
814
815 Integer. Default is one.""", False),
816 _Option(["-frame_shift_penalty", "frame_shift_penalty"], ["input"], None, 0,
817 "Frame shift penalty (integer, at least 1, default ignored).", False),
818 _Option(["-max_intron_length", "max_intron_length"], ["input"], None, 0,
819 """Maximum intron length (integer).
820
821 Length of the largest intron allowed in a translated nucleotide
822 sequence when linking multiple distinct alignments (a negative
823 value disables linking). Default zero.""", False),
824 _Option(["-matrix", "matrix"], ["input"], None, 0,
825 "Scoring matrix name (default BLOSUM62).", False),
826 _Option(["-threshold", "threshold"], ["input"], None, 0,
827 "Minimum word score such that the word is added to the BLAST lookup table (float)", False),
828 _Option(["-comp_based_stats", "comp_based_stats"], ["input"],
829 lambda value : value in "0Ft12TtDd3", 0,
830 """Use composition-based statistics (string, default 2, i.e. True).
831
832 0, F or f: no composition-based statistics
833 1: Composition-based statistics as in NAR 29:2994-3005, 2001
834 2, T or t, D or d : Composition-based score adjustment as in
835 Bioinformatics 21:902-911, 2005, conditioned on sequence properties
836 3: Composition-based score adjustment as in Bioinformatics 21:902-911,
837 2005, unconditionally
838
839 Note that only tblastn supports values of 1 and 3.""", False),
840
841 _Option(["-seg", "seg"], ["input"], None, 0,
842 """Filter query sequence with SEG (string).
843
844 Format: "yes", "window locut hicut", or "no" to disable.
845 Default is "12 2.2 2.5""", False),
846
847 _Switch(["-ungapped", "ungapped"], ["input"],
848 "Perform ungapped alignment only?"),
849
850 _Switch(["-use_sw_tback", "use_sw_tback"], ["input"],
851 "Compute locally optimal Smith-Waterman alignments?"),
852
853 _Option(["-in_pssm", "in_pssm"], ["input", "file"], None, 0,
854 """PSI-BLAST checkpoint file
855
856 Incompatible with: remote, query""", False),
857 ]
858 _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs)
859
869
870
872 """Wrapper for the NCBI BLAST+ program tblastx.
873
874 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
875 replaced the old blastall tool with separate tools for each of the searches.
876 This wrapper therefore replaces BlastallCommandline with option -p tblastx.
877
878 >>> from Bio.Blast.Applications import NcbitblastxCommandline
879 >>> cline = NcbitblastxCommandline(help=True)
880 >>> cline
881 NcbitblastxCommandline(cmd='tblastx', help=True)
882 >>> print cline
883 tblastx -help
884
885 You would typically run the command line with the Python subprocess module,
886 as described in the Biopython tutorial.
887 """
888 - def __init__(self, cmd="tblastx", **kwargs):
889 self.parameters = [ \
890
891 _Option(["-strand", "strand"], ["input"],
892 lambda value : value in ["both", "minus", "plus"],0,
893 """Query strand(s) to search against database/subject.
894
895 Values allowed are "both" (default), "minus", "plus".""", False),
896
897 _Option(["-query_gencode", "query_gencode"], ["input"], None, 0,
898 """Genetic code to use to translate query
899
900 Integer. Default is one.""", False),
901
902 _Option(["-db_gencode", "db_gencode"], ["input"], None, 0,
903 """Genetic code to use to translate query
904
905 Integer. Default is one.""", False),
906 _Option(["-max_intron_length", "max_intron_length"], ["input"], None, 0,
907 """Maximum intron length (integer).
908
909 Length of the largest intron allowed in a translated nucleotide
910 sequence when linking multiple distinct alignments (a negative
911 value disables linking). Default zero.""", False),
912 _Option(["-matrix", "matrix"], ["input"], None, 0,
913 "Scoring matrix name (default BLOSUM62).", False),
914 _Option(["-threshold", "threshold"], ["input"], None, 0,
915 "Minimum word score such that the word is added to the BLAST lookup table (float)", False),
916
917 _Option(["-seg", "seg"], ["input"], None, 0,
918 """Filter query sequence with SEG (string).
919
920 Format: "yes", "window locut hicut", or "no" to disable.
921 Default is "12 2.2 2.5""", False),
922 ]
923 _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs)
924
925
927 if self.remote and self.in_pssm:
928 raise ValueError("The remote option cannot be used with in_pssm")
929 if self.query and self.in_pssm:
930 raise ValueError("The query option cannot be used with in_pssm")
931 _Ncbiblast2SeqCommandline._validate(self)
932
933
935 """Wrapper for the NCBI BLAST+ program psiblast.
936
937 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
938 replaced the old blastpgp tool with a similar tool psiblast. This wrapper
939 therefore replaces BlastpgpCommandline, the wrapper for blastpgp.
940
941 >>> from Bio.Blast.Applications import NcbipsiblastCommandline
942 >>> cline = NcbipsiblastCommandline(help=True)
943 >>> cline
944 NcbipsiblastCommandline(cmd='psiblast', help=True)
945 >>> print cline
946 psiblast -help
947
948 You would typically run the command line with the Python subprocess module,
949 as described in the Biopython tutorial.
950 """
951 - def __init__(self, cmd="psiblast", **kwargs):
952 self.parameters = [ \
953
954 _Option(["-matrix", "matrix"], ["input"], None, 0,
955 "Scoring matrix name (default BLOSUM62).", False),
956 _Option(["-threshold", "threshold"], ["input"], None, 0,
957 "Minimum word score such that the word is added to the BLAST lookup table (float)", False),
958 _Option(["-comp_based_stats", "comp_based_stats"], ["input"],
959 lambda value : value in "0Ft2TtDd", 0,
960 """Use composition-based statistics (string, default 2, i.e. True).
961
962 0, F or f: no composition-based statistics
963 2, T or t, D or d : Composition-based score adjustment as in
964 Bioinformatics 21:902-911, 2005, conditioned on sequence properties
965
966 Note that tblastn also supports values of 1 and 3.""", False),
967
968 _Option(["-seg", "seg"], ["input"], None, 0,
969 """Filter query sequence with SEG (string).
970
971 Format: "yes", "window locut hicut", or "no" to disable.
972 Default is "12 2.2 2.5""", False),
973
974 _Option(["-gap_trigger", "gap_trigger"], ["input"], None, 0,
975 "Number of bits to trigger gapping (float, default 22)", False),
976
977 _Switch(["-use_sw_tback", "use_sw_tback"], ["input"],
978 "Compute locally optimal Smith-Waterman alignments?"),
979
980 _Option(["-num_iterations", "num_iterations"], ["input"], None, 0,
981 """Number of iterations to perform, integer
982
983 Integer of at least one. Default is one.
984 Incompatible with: remote""", False),
985 _Option(["-out_pssm", "out_pssm"], ["output", "file"], None, 0,
986 "File name to store checkpoint file", False),
987 _Option(["-out_ascii_pssm", "out_ascii_pssm"], ["output", "file"], None, 0,
988 "File name to store ASCII version of PSSM", False),
989 _Option(["-in_msa", "in_msa"], ["input", "file"], None, 0,
990 """File name of multiple sequence alignment to restart PSI-BLAST
991
992 Incompatible with: in_pssm, query""", False),
993 _Option(["-in_pssm", "in_pssm"], ["input", "file"], None, 0,
994 """PSI-BLAST checkpoint file
995
996 Incompatible with: in_msa, query, phi_pattern""", False),
997
998 _Option(["-pseudocount", "pseudocount"], ["input"], None, 0,
999 """Pseudo-count value used when constructing PSSM
1000
1001 Integer. Default is zero.""", False),
1002 _Option(["-inclusion_ethresh", "inclusion_ethresh"], ["input"], None, 0,
1003 """E-value inclusion threshold for pairwise alignments
1004
1005 Float. Default is 0.002.""", False),
1006
1007 _Option(["-phi_pattern", "phi_pattern"], ["input", "file"], None, 0,
1008 """File name containing pattern to search
1009
1010 Incompatible with: in_pssm""", False),
1011 ]
1012 _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs)
1013
1015 incompatibles = {"num_iterations":["remote"],
1016 "in_msa":["in_pssm", "query"],
1017 "in_pssm":["in_msa","query","phi_pattern"]}
1018 for a in incompatibles:
1019 if self._get_parameter(a):
1020 for b in incompatibles[a]:
1021 if self._get_parameter(b):
1022 raise ValueError("Options %s and %s are incompatible." \
1023 % (a,b))
1024 _Ncbiblast2SeqCommandline._validate(self)
1025
1026
1028 """Wrapper for the NCBI BLAST+ program rpsblast.
1029
1030 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
1031 replaced the old rpsblast tool with a similar tool of the same name. This
1032 wrapper replaces RpsBlastCommandline, the wrapper for the old rpsblast.
1033
1034 >>> from Bio.Blast.Applications import NcbirpsblastCommandline
1035 >>> cline = NcbirpsblastCommandline(help=True)
1036 >>> cline
1037 NcbirpsblastCommandline(cmd='rpsblast', help=True)
1038 >>> print cline
1039 rpsblast -help
1040
1041 You would typically run the command line with the Python subprocess module,
1042 as described in the Biopython tutorial.
1043 """
1044 - def __init__(self, cmd="rpsblast", **kwargs):
1045 self.parameters = [ \
1046
1047 _Option(["-seg", "seg"], ["input"], None, 0,
1048 """Filter query sequence with SEG (string).
1049
1050 Format: "yes", "window locut hicut", or "no" to disable.
1051 Default is "12 2.2 2.5""", False),
1052 ]
1053 _NcbiblastCommandline.__init__(self, cmd, **kwargs)
1054
1055
1057 """Wrapper for the NCBI BLAST+ program rpstblastn.
1058
1059 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
1060 replaced the old rpsblast tool with a similar tool of the same name, and a
1061 separate tool rpstblastn for Translated Reverse Position Specific BLAST.
1062
1063 >>> from Bio.Blast.Applications import NcbirpstblastnCommandline
1064 >>> cline = NcbirpstblastnCommandline(help=True)
1065 >>> cline
1066 NcbirpstblastnCommandline(cmd='rpstblastn', help=True)
1067 >>> print cline
1068 rpstblastn -help
1069
1070 You would typically run the command line with the Python subprocess module,
1071 as described in the Biopython tutorial.
1072 """
1073 - def __init__(self, cmd="rpstblastn", **kwargs):
1074 self.parameters = [ \
1075
1076 _Option(["-strand", "strand"], ["input"],
1077 lambda value : value in ["both", "minus", "plus"],0,
1078 """Query strand(s) to search against database/subject.
1079
1080 Values allowed are "both" (default), "minus", "plus".""", False),
1081
1082 _Option(["-query_gencode", "query_gencode"], ["input"], None, 0,
1083 """Genetic code to use to translate query
1084
1085 Integer. Default is one.""", False),
1086
1087 _Option(["-seg", "seg"], ["input"], None, 0,
1088 """Filter query sequence with SEG (string).
1089
1090 Format: "yes", "window locut hicut", or "no" to disable.
1091 Default is "12 2.2 2.5""", False),
1092
1093 _Switch(["-ungapped", "ungapped"], ["input"],
1094 "Perform ungapped alignment only?"),
1095 ]
1096 _NcbiblastCommandline.__init__(self, cmd, **kwargs)
1097
1098
1100 """Run the Bio.Blast.Applications module's doctests."""
1101 import doctest
1102 doctest.testmod(verbose=1)
1103
1104 if __name__ == "__main__":
1105
1106 _test()
1107