Package Bio :: Module File
[hide private]
[frames] | no frames]

Source Code for Module Bio.File

  1  # Copyright 1999 by Jeffrey Chang.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Code for more fancy file handles. 
  7   
  8   
  9  Classes: 
 10   
 11  UndoHandle     File object decorator with support for undo-like operations. 
 12   
 13  StringHandle   Wraps a file object around a string. 
 14   
 15  SGMLHandle     File object that automatically strips SGML tags from data 
 16                 (DEPRECATED). 
 17   
 18  SGMLStripper   Object that strips SGML.  This is now considered OBSOLETE, and 
 19                 is likely to be deprecated in a future release of Biopython, 
 20                 and later removed. 
 21   
 22  """ 
 23  import os 
 24  import StringIO 
 25  import sgmllib 
 26   
27 -class UndoHandle:
28 """A Python handle that adds functionality for saving lines. 29 30 Saves lines in a LIFO fashion. 31 32 Added methods: 33 saveline Save a line to be returned next time. 34 peekline Peek at the next line without consuming it. 35 36 """
37 - def __init__(self, handle):
38 self._handle = handle 39 self._saved = []
40
41 - def __iter__(self):
42 return self
43
44 - def next(self):
45 next = self.readline() 46 if not next: 47 raise StopIteration 48 return next
49
50 - def readlines(self, *args, **keywds):
51 lines = self._saved + self._handle.readlines(*args,**keywds) 52 self._saved = [] 53 return lines
54
55 - def readline(self, *args, **keywds):
56 if self._saved: 57 line = self._saved.pop(0) 58 else: 59 line = self._handle.readline(*args,**keywds) 60 return line
61
62 - def read(self, size=-1):
63 if size == -1: 64 saved = "".join(self._saved) 65 self._saved[:] = [] 66 else: 67 saved = '' 68 while size > 0 and self._saved: 69 if len(self._saved[0]) <= size: 70 size = size - len(self._saved[0]) 71 saved = saved + self._saved.pop(0) 72 else: 73 saved = saved + self._saved[0][:size] 74 self._saved[0] = self._saved[0][size:] 75 size = 0 76 return saved + self._handle.read(size)
77
78 - def saveline(self, line):
79 if line: 80 self._saved = [line] + self._saved
81
82 - def peekline(self):
83 if self._saved: 84 line = self._saved[0] 85 else: 86 line = self._handle.readline() 87 self.saveline(line) 88 return line
89
90 - def tell(self):
91 lengths = map(len, self._saved) 92 sum = reduce(lambda x, y: x+y, lengths, 0) 93 return self._handle.tell() - sum
94
95 - def seek(self, *args):
96 self._saved = [] 97 self._handle.seek(*args)
98
99 - def __getattr__(self, attr):
100 return getattr(self._handle, attr)
101
102 - def __enter__(self):
103 return self
104
105 - def __exit__(self, type, value, traceback):
106 self._handle.close()
107 108 109 # I could make this faster by using cStringIO. 110 # However, cStringIO (in v1.52) does not implement the 111 # readlines method. 112 StringHandle = StringIO.StringIO 113
114 -class SGMLHandle:
115 """A Python handle that automatically strips SGML tags from data (DEPRECATED). 116 117 This module is now considered to be obsolete, and is likely to be 118 deprecated in a future release of Biopython, and later removed. 119 """
120 - def __init__(self, handle):
121 """SGMLStripper(handle) 122 123 handle is a file handle to SGML-formatted data. 124 125 """ 126 import warnings 127 warnings.warn("Bio.File.SGMLHandle is deprecated, and will be removed"\ 128 " in a future release of Biopython. If you want to"\ 129 " continue to use this code, please get in contact via"\ 130 " the mailing lists to avoid its permanent removal from"\ 131 " Biopython.", DeprecationWarning) 132 self._handle = handle 133 self._stripper = SGMLStripper()
134
135 - def read(self, *args, **keywds):
136 data = self._handle.read(*args, **keywds) 137 return self._stripper.strip(data)
138
139 - def readline(self, *args, **keywds):
140 line = self._handle.readline(*args, **keywds) 141 return self._stripper.strip(line)
142
143 - def readlines(self, *args, **keywds):
144 lines = self._handle.readlines(*args, **keywds) 145 for i in range(len(lines)): 146 lines[i] = self._stripper.strip(str) 147 return lines
148
149 - def __getattr__(self, attr):
150 return getattr(self._handle, attr)
151 152
153 -class SGMLStripper:
154 - class MyParser(sgmllib.SGMLParser):
155 - def __init__(self):
156 sgmllib.SGMLParser.__init__(self) 157 self.data = ''
158 - def handle_data(self, data):
159 self.data = self.data + data
160
161 - def __init__(self):
162 self._parser = SGMLStripper.MyParser()
163
164 - def strip(self, str):
165 """S.strip(str) -> string 166 167 Strip the SGML tags from str. 168 169 """ 170 if not str: # empty string, don't do anything. 171 return '' 172 # I need to make sure that I don't return an empty string if 173 # the buffer is not empty. This can happen if there's a newline 174 # character embedded within a tag. Thus, I'll first check to 175 # see if the last character is a newline. If it is, and it's stripped 176 # away, I'll add it back. 177 is_newline = str[-1] in ['\n', '\r'] 178 179 self._parser.data = '' # clear the parser's data (don't reset) 180 self._parser.feed(str) 181 if self._parser.data: 182 str = self._parser.data 183 elif is_newline: 184 str = '\n' 185 else: 186 str = '' 187 return str
188