1 """This module contains code to access EZRetrieve (DEPRECATED).
2
3 This module is now deprecated, and will be removed in a future release of
4 Biopython.
5
6 This is a very simple interface to the EZRetrieve website described in:
7
8 Zhang, H., Ramanathan, Y., Soteropoulos, P., Recce, M., and Tolias, P.P. (2002).
9 EZ-Retrieve: A web-server for batch retrieval of coordinate-specified human
10 DNA sequences and underscoring putative transcription factor-binding sites.
11 Nucl. Acids. Res. 2002 30: e121.
12 http://dx.doi.org/10.1093/nar/gnf120
13
14 Functions:
15 retrieve_single Retrieve a single sequence from EZRetrieve.
16 parse_single Parse the results from EZRetrieve into FASTA format.
17 """
18
19 import warnings
20 warnings.warn("Bio.EZRetrieve is deprecated, and will be removed in a future"\
21 " release of Biopython. If you want to continue to use this"\
22 " code, please get in contact with the Biopython developers"\
23 " via the mailing lists to avoid its permanent removal from"\
24 " Biopython.", DeprecationWarning)
25
26 -def retrieve_single(id, from_, to, retrieve_by=None, organism=None,
27 parse_results=1):
28 import urllib
29
30 CGI = "http://siriusb.umdnj.edu:18080/EZRetrieve/single_r_run.jsp"
31 org2value = {"Hs" : "0", "Mm" : "1", "Rn" : 2}
32 organism = organism or "Hs"
33 assert organism in org2value
34
35 acctype2value = {"genbank":0, "unigene":1, "locuslink":2, "image":3}
36 retrieve_by = retrieve_by or "GenBank"
37 retrieve_by = retrieve_by.lower()
38 assert retrieve_by in acctype2value
39
40 params = {
41 "input" : str(id),
42 "from" : str(from_),
43 "to" : str(to),
44 "org" : org2value[organism],
45 "AccType" : acctype2value[retrieve_by],
46 }
47 options = urllib.urlencode(params)
48 handle = urllib.urlopen(CGI, options)
49 if parse_results:
50 results = parse_single(handle)
51 else:
52 results = handle.read()
53 return results
54
56 """Return a FASTA-formatted string for the sequence. May raise an
57 AssertionError if there was a problem retrieving the sequence.
58
59 """
60 import re
61 results = handle.read()
62 lresults = results.lower()
63
64 i = results.find("Error: ")
65 if i >= 0:
66 j = lresults.index("<br>", i)
67 errmsg = results[i:j].strip()
68 raise AssertionError(errmsg)
69
70 i = lresults.find("<b>>")
71 assert i >= 0, "Couldn't find sequence."
72 j = lresults.find("<br><br>", i)
73 seqdata = results[i:j]
74 reobj = re.compile(r"<[^>]*>", re.IGNORECASE|re.DOTALL)
75 seqdata = reobj.sub("", seqdata)
76 seqdata = re.sub(r"\s+", r"\n", seqdata)
77 seqdata = seqdata.strip() + "\n"
78 return seqdata
79