Package Bio :: Package Application
[hide private]
[frames] | no frames]

Source Code for Package Bio.Application

  1  # Copyright 2001-2004 Brad Chapman. 
  2  # Revisions copyright 2009 by Peter Cock. 
  3  # All rights reserved. 
  4  # This code is part of the Biopython distribution and governed by its 
  5  # license.  Please see the LICENSE file that should have been included 
  6  # as part of this package. 
  7  """General mechanisms to access applications in Biopython. 
  8   
  9  This module is not intended for direct use (any more). It provides 
 10  the basic objects for our command line wrappers such as: 
 11   
 12   - Bio.Align.Applications 
 13   - Bio.Blast.Applications 
 14   - Bio.Emboss.Applications 
 15   - Bio.Sequencing.Applications 
 16   
 17  """ 
 18  import os, sys 
 19  import StringIO 
 20  import subprocess 
 21   
 22  from Bio import File 
 23   
24 -def generic_run(commandline):
25 """Run an application with the given commandline (DEPRECATED). 26 27 This expects a pre-built commandline that derives from 28 AbstractCommandline, and returns a ApplicationResult object 29 to get results from a program, along with handles of the 30 standard output and standard error. 31 32 WARNING - This will read in the full program output into memory! 33 This may be in issue when the program writes a large amount of 34 data to standard output. 35 36 NOTE - This function is considered to be obsolete, and we intend to 37 deprecate it and then remove it in future releases of Biopython. 38 We now recommend you invoke subprocess directly, using str(commandline) 39 to turn an AbstractCommandline wrapper into a command line string. This 40 will give you full control of the tool's input and output as well. 41 """ 42 import warnings 43 warnings.warn("Bio.Application.generic_run and the associated " 44 "Bio.Application.ApplicationResult are deprecated. " 45 "Please use the built in Python module subprocess " 46 "instead, as described in the Biopython Tutorial.", 47 DeprecationWarning) 48 #We don't need to supply any piped input, but we setup the 49 #standard input pipe anyway as a work around for a python 50 #bug if this is called from a Windows GUI program. For 51 #details, see http://bugs.python.org/issue1124861 52 child = subprocess.Popen(str(commandline), 53 stdin=subprocess.PIPE, 54 stdout=subprocess.PIPE, 55 stderr=subprocess.PIPE, 56 shell=(sys.platform!="win32")) 57 #Use .communicate as might get deadlocks with .wait(), see Bug 2804/2806 58 r_out, e_out = child.communicate() 59 # capture error code: 60 error_code = child.returncode 61 return ApplicationResult(commandline, error_code), \ 62 File.UndoHandle(StringIO.StringIO(r_out)), \ 63 File.UndoHandle(StringIO.StringIO(e_out))
64
65 -class ApplicationResult:
66 """Make results of a program available through a standard interface (DEPRECATED). 67 68 This tries to pick up output information available from the program 69 and make it available programmatically. 70 71 NOTE - This obsolete is considered to be obsolete, and we intend to 72 deprecate it and then remove it in future releases of Biopython. 73 """
74 - def __init__(self, application_cl, return_code):
75 """Intialize with the commandline from the program. 76 """ 77 import warnings 78 warnings.warn("Bio.Application.ApplicationResult and the " 79 "associated function Bio.Application.generic_run " 80 "are deprecated. Please use the built in Python " 81 "module subprocess instead, as described in the " 82 "Biopython Tutorial", DeprecationWarning) 83 self._cl = application_cl 84 85 # provide the return code of the application 86 self.return_code = return_code 87 88 # get the application dependent results we can provide 89 # right now the only results we handle are output files 90 self._results = {} 91 92 for parameter in self._cl.parameters: 93 if "file" in parameter.param_types and \ 94 "output" in parameter.param_types: 95 if parameter.is_set: 96 self._results[parameter.names[-1]] = parameter.value
97
98 - def get_result(self, output_name):
99 """Retrieve result information for the given output. 100 101 Supports any of the defined parameters aliases (assuming the 102 parameter is defined as an output). 103 """ 104 try: 105 return self._results[output_name] 106 except KeyError, err: 107 #Try the aliases... 108 for parameter in self._cl.parameters: 109 if output_name in parameter.names: 110 return self._results[parameter.names[-1]] 111 #No, really was a key error: 112 raise err
113
114 - def available_results(self):
115 """Retrieve a list of all available results. 116 """ 117 result_names = self._results.keys() 118 result_names.sort() 119 return result_names
120
121 -class AbstractCommandline(object):
122 """Generic interface for constructing command line strings. 123 124 This class shouldn't be called directly; it should be subclassed to 125 provide an implementation for a specific application. 126 127 For a usage example we'll show one of the EMBOSS wrappers. You can set 128 options when creating the wrapper object using keyword arguments - or later 129 using their corresponding properties: 130 131 >>> from Bio.Emboss.Applications import WaterCommandline 132 >>> cline = WaterCommandline(gapopen=10, gapextend=0.5) 133 >>> cline 134 WaterCommandline(cmd='water', gapopen=10, gapextend=0.5) 135 136 You can instead manipulate the parameters via their properties, e.g. 137 138 >>> cline.gapopen 139 10 140 >>> cline.gapopen = 20 141 >>> cline 142 WaterCommandline(cmd='water', gapopen=20, gapextend=0.5) 143 144 You can clear a parameter you have already added by 'deleting' the 145 corresponding property: 146 147 >>> del cline.gapopen 148 >>> cline.gapopen 149 >>> cline 150 WaterCommandline(cmd='water', gapextend=0.5) 151 152 Once you have set the parameters you need, turn the object into a string: 153 154 >>> str(cline) 155 Traceback (most recent call last): 156 ... 157 ValueError: You must either set outfile (output filename), or enable filter or stdout (output to stdout). 158 159 In this case the wrapper knows certain arguments are required to construct 160 a valid command line for the tool. For a complete example, 161 162 >>> from Bio.Emboss.Applications import WaterCommandline 163 >>> cline = WaterCommandline(gapopen=10, gapextend=0.5) 164 >>> cline.asequence = "asis:ACCCGGGCGCGGT" 165 >>> cline.bsequence = "asis:ACCCGAGCGCGGT" 166 >>> cline.outfile = "temp_water.txt" 167 >>> print cline 168 water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5 169 >>> cline 170 WaterCommandline(cmd='water', outfile='temp_water.txt', asequence='asis:ACCCGGGCGCGGT', bsequence='asis:ACCCGAGCGCGGT', gapopen=10, gapextend=0.5) 171 172 You would typically run the command line via a standard Python operating 173 system call (e.g. using the subprocess module). 174 """
175 - def __init__(self, cmd, **kwargs):
176 """Create a new instance of a command line wrapper object.""" 177 # Init method - should be subclassed! 178 # 179 # The subclass methods should look like this: 180 # 181 # def __init__(self, cmd="muscle", **kwargs): 182 # self.parameters = [...] 183 # AbstractCommandline.__init__(self, cmd, **kwargs) 184 # 185 # i.e. There should have an optional argument "cmd" to set the location 186 # of the executable (with a sensible default which should work if the 187 # command is on the path on Unix), and keyword arguments. It should 188 # then define a list of parameters, all objects derived from the base 189 # class _AbstractParameter. 190 # 191 # The keyword arguments should be any valid parameter name, and will 192 # be used to set the associated parameter. 193 self.program_name = cmd 194 try: 195 parameters = self.parameters 196 except AttributeError: 197 raise AttributeError("Subclass should have defined self.parameters") 198 #Create properties for each parameter at run time 199 aliases = set() 200 for p in parameters: 201 for name in p.names: 202 if name in aliases: 203 raise ValueError("Parameter alias %s multiply defined" \ 204 % name) 205 aliases.add(name) 206 name = p.names[-1] 207 #Beware of binding-versus-assignment confusion issues 208 def getter(name): 209 return lambda x : x._get_parameter(name)
210 def setter(name): 211 return lambda x, value : x.set_parameter(name, value)
212 def deleter(name): 213 return lambda x : x._clear_parameter(name) 214 doc = p.description 215 if isinstance(p, _Switch): 216 doc += "\n\nThis property controls the addition of the %s " \ 217 "switch, treat this property as a boolean." % p.names[0] 218 else: 219 doc += "\n\nThis controls the addition of the %s parameter " \ 220 "and its associated value. Set this property to the " \ 221 "argument value required." % p.names[0] 222 prop = property(getter(name), setter(name), deleter(name), doc) 223 setattr(self.__class__, name, prop) #magic! 224 for key, value in kwargs.iteritems(): 225 self.set_parameter(key, value) 226
227 - def _validate(self):
228 """Make sure the required parameters have been set (PRIVATE). 229 230 No return value - it either works or raises a ValueError. 231 232 This is a separate method (called from __str__) so that subclasses may 233 override it. 234 """ 235 for p in self.parameters: 236 #Check for missing required parameters: 237 if p.is_required and not(p.is_set): 238 raise ValueError("Parameter %s is not set." \ 239 % p.names[-1])
240 #Also repeat the parameter validation here, just in case? 241
242 - def __str__(self):
243 """Make the commandline string with the currently set options. 244 245 e.g. 246 >>> from Bio.Emboss.Applications import WaterCommandline 247 >>> cline = WaterCommandline(gapopen=10, gapextend=0.5) 248 >>> cline.asequence = "asis:ACCCGGGCGCGGT" 249 >>> cline.bsequence = "asis:ACCCGAGCGCGGT" 250 >>> cline.outfile = "temp_water.txt" 251 >>> print cline 252 water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5 253 >>> str(cline) 254 'water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5' 255 """ 256 self._validate() 257 commandline = "%s " % self.program_name 258 for parameter in self.parameters: 259 if parameter.is_set: 260 #This will include a trailing space: 261 commandline += str(parameter) 262 return commandline.strip() # remove trailing space
263
264 - def __repr__(self):
265 """Return a representation of the command line object for debugging. 266 267 e.g. 268 >>> from Bio.Emboss.Applications import WaterCommandline 269 >>> cline = WaterCommandline(gapopen=10, gapextend=0.5) 270 >>> cline.asequence = "asis:ACCCGGGCGCGGT" 271 >>> cline.bsequence = "asis:ACCCGAGCGCGGT" 272 >>> cline.outfile = "temp_water.txt" 273 >>> print cline 274 water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5 275 >>> cline 276 WaterCommandline(cmd='water', outfile='temp_water.txt', asequence='asis:ACCCGGGCGCGGT', bsequence='asis:ACCCGAGCGCGGT', gapopen=10, gapextend=0.5) 277 """ 278 answer = "%s(cmd=%s" % (self.__class__.__name__, repr(self.program_name)) 279 for parameter in self.parameters: 280 if parameter.is_set: 281 if isinstance(parameter, _Switch): 282 answer += ", %s=True" % parameter.names[-1] 283 else: 284 answer += ", %s=%s" \ 285 % (parameter.names[-1], repr(parameter.value)) 286 answer += ")" 287 return answer
288
289 - def _get_parameter(self, name):
290 """Get a commandline option value.""" 291 for parameter in self.parameters: 292 if name in parameter.names: 293 if isinstance(parameter, _Switch): 294 return parameter.is_set 295 else: 296 return parameter.value 297 raise ValueError("Option name %s was not found." % name)
298
299 - def _clear_parameter(self, name):
300 """Reset or clear a commandline option value.""" 301 cleared_option = False 302 for parameter in self.parameters: 303 if name in parameter.names: 304 parameter.value = None 305 parameter.is_set = False 306 cleared_option = True 307 if not cleared_option: 308 raise ValueError("Option name %s was not found." % name)
309
310 - def set_parameter(self, name, value = None):
311 """Set a commandline option for a program. 312 """ 313 set_option = False 314 for parameter in self.parameters: 315 if name in parameter.names: 316 if isinstance(parameter, _Switch): 317 if value is None: 318 import warnings 319 warnings.warn("For a switch type argument like %s, " 320 "we expect a boolean. None is treated " 321 "as FALSE!" % parameter.names[-1]) 322 parameter.is_set = bool(value) 323 set_option = True 324 else: 325 if value is not None: 326 self._check_value(value, name, parameter.checker_function) 327 parameter.value = value 328 parameter.is_set = True 329 set_option = True 330 if not set_option: 331 raise ValueError("Option name %s was not found." % name)
332
333 - def _check_value(self, value, name, check_function):
334 """Check whether the given value is valid. 335 336 No return value - it either works or raises a ValueError. 337 338 This uses the passed function 'check_function', which can either 339 return a [0, 1] (bad, good) value or raise an error. Either way 340 this function will raise an error if the value is not valid, or 341 finish silently otherwise. 342 """ 343 if check_function is not None: 344 is_good = check_function(value) #May raise an exception 345 assert is_good in [0,1,True,False] 346 if not is_good: 347 raise ValueError("Invalid parameter value %r for parameter %s" \ 348 % (value, name))
349
350 - def __setattr__(self, name, value):
351 """Set attribute name to value (PRIVATE). 352 353 This code implements a workaround for a user interface issue. 354 Without this __setattr__ attribute-based assignment of parameters 355 will silently accept invalid parameters, leading to known instances 356 of the user assuming that parameters for the application are set, 357 when they are not. 358 359 >>> from Bio.Emboss.Applications import WaterCommandline 360 >>> cline = WaterCommandline(gapopen=10, gapextend=0.5, stdout=True) 361 >>> cline.asequence = "a.fasta" 362 >>> cline.bsequence = "b.fasta" 363 >>> cline.csequence = "c.fasta" 364 Traceback (most recent call last): 365 ... 366 ValueError: Option name csequence was not found. 367 >>> print cline 368 water -stdout -asequence=a.fasta -bsequence=b.fasta -gapopen=10 -gapextend=0.5 369 370 This workaround uses a whitelist of object attributes, and sets the 371 object attribute list as normal, for these. Other attributes are 372 assumed to be parameters, and passed to the self.set_parameter method 373 for validation and assignment. 374 """ 375 if name in ['parameters', 'program_name']: # Allowed attributes 376 self.__dict__[name] = value 377 else: 378 self.set_parameter(name, value) # treat as a parameter
379 380
381 -class _AbstractParameter:
382 """A class to hold information about a parameter for a commandline. 383 384 Do not use this directly, instead use one of the subclasses. 385 """
386 - def __init__(self):
387 raise NotImplementedError
388
389 - def __str__(self):
390 raise NotImplementedError
391
392 -class _Option(_AbstractParameter):
393 """Represent an option that can be set for a program. 394 395 This holds UNIXish options like --append=yes and -a yes, 396 where a value (here "yes") is generally expected. 397 398 For UNIXish options like -kimura in clustalw which don't 399 take a value, use the _Switch object instead. 400 401 Attributes: 402 403 o names -- a list of string names by which the parameter can be 404 referenced (ie. ["-a", "--append", "append"]). The first name in 405 the list is considered to be the one that goes on the commandline, 406 for those parameters that print the option. The last name in the list 407 is assumed to be a "human readable" name describing the option in one 408 word. 409 410 o param_types -- a list of string describing the type of parameter, 411 which can help let programs know how to use it. Example descriptions 412 include 'input', 'output', 'file'. Note that if 'file' is included, 413 these argument values will automatically be escaped if the filename 414 contains spaces. 415 416 o checker_function -- a reference to a function that will determine 417 if a given value is valid for this parameter. This function can either 418 raise an error when given a bad value, or return a [0, 1] decision on 419 whether the value is correct. 420 421 o equate -- should an equals sign be inserted if a value is used? 422 423 o description -- a description of the option. 424 425 o is_required -- a flag to indicate if the parameter must be set for 426 the program to be run. 427 428 o is_set -- if the parameter has been set 429 430 o value -- the value of a parameter 431 """
432 - def __init__(self, names = [], types = [], checker_function = None, 433 is_required = False, description = "", equate=True):
434 self.names = names 435 self.param_types = types 436 self.checker_function = checker_function 437 self.description = description 438 self.equate = equate 439 self.is_required = is_required 440 441 self.is_set = False 442 self.value = None
443
444 - def __str__(self):
445 """Return the value of this option for the commandline. 446 447 Includes a trailing space. 448 """ 449 # Note: Before equate was handled explicitly, the old 450 # code would do either "--name " or "--name=value ", 451 # or " -name " or " -name value ". This choice is now 452 # now made explicitly when setting up the option. 453 if self.value is None: 454 return "%s " % self.names[0] 455 if "file" in self.param_types: 456 v = _escape_filename(self.value) 457 else: 458 v = str(self.value) 459 if self.equate: 460 return "%s=%s " % (self.names[0], v) 461 else: 462 return "%s %s " % (self.names[0], v)
463
464 -class _Switch(_AbstractParameter):
465 """Represent an optional argument switch for a program. 466 467 This holds UNIXish options like -kimura in clustalw which don't 468 take a value, they are either included in the command string 469 or omitted. 470 471 o names -- a list of string names by which the parameter can be 472 referenced (ie. ["-a", "--append", "append"]). The first name in 473 the list is considered to be the one that goes on the commandline, 474 for those parameters that print the option. The last name in the list 475 is assumed to be a "human readable" name describing the option in one 476 word. 477 478 o param_types -- a list of string describing the type of parameter, 479 which can help let programs know how to use it. Example descriptions 480 include 'input', 'output', 'file'. Note that if 'file' is included, 481 these argument values will automatically be escaped if the filename 482 contains spaces. 483 484 o description -- a description of the option. 485 486 o is_set -- if the parameter has been set 487 488 NOTE - There is no value attribute, see is_set instead, 489 """
490 - def __init__(self, names = [], types = [], description = ""):
491 self.names = names 492 self.param_types = types 493 self.description = description 494 self.is_set = False 495 self.is_required = False
496
497 - def __str__(self):
498 """Return the value of this option for the commandline. 499 500 Includes a trailing space. 501 """ 502 assert not hasattr(self, "value") 503 if self.is_set: 504 return "%s " % self.names[0] 505 else: 506 return ""
507
508 -class _Argument(_AbstractParameter):
509 """Represent an argument on a commandline. 510 """
511 - def __init__(self, names = [], types = [], checker_function = None, 512 is_required = False, description = ""):
513 self.names = names 514 self.param_types = types 515 self.checker_function = checker_function 516 self.description = description 517 self.is_required = is_required 518 self.is_set = False 519 self.value = None
520
521 - def __str__(self):
522 if self.value is None: 523 return " " 524 else: 525 return "%s " % self.value
526
527 -def _escape_filename(filename):
528 """Escape filenames with spaces by adding quotes (PRIVATE). 529 530 Note this will not add quotes if they are already included: 531 532 >>> print _escape_filename('example with spaces') 533 "example with spaces" 534 >>> print _escape_filename('"example with spaces"') 535 "example with spaces" 536 """ 537 #Is adding the following helpful 538 #if os.path.isfile(filename): 539 # #On Windows, if the file exists, we can ask for 540 # #its alternative short name (DOS style 8.3 format) 541 # #which has no spaces in it. Note that this name 542 # #is not portable between machines, or even folder! 543 # try: 544 # import win32api 545 # short = win32api.GetShortPathName(filename) 546 # assert os.path.isfile(short) 547 # return short 548 # except ImportError: 549 # pass 550 if " " not in filename: 551 return filename 552 #We'll just quote it - works on Windows, Mac OS X etc 553 if filename.startswith('"') and filename.endswith('"'): 554 #Its already quoted 555 return filename 556 else: 557 return '"%s"' % filename
558
559 -def _test():
560 """Run the Bio.Application module's doctests.""" 561 import doctest 562 doctest.testmod(verbose=1)
563 564 if __name__ == "__main__": 565 #Run the doctests 566 _test() 567