1
2 """
3 This module contains the HTTP fetcher interface and several implementations.
4 """
5
6 __all__ = ['fetch', 'getDefaultFetcher', 'setDefaultFetcher', 'HTTPResponse',
7 'HTTPFetcher', 'createHTTPFetcher', 'HTTPFetchingError',
8 'HTTPError']
9
10 import urllib2
11 import time
12 import cStringIO
13 import sys
14
15 import openid
16 import openid.urinorm
17
18
19
20 try:
21 import httplib2
22 except ImportError:
23
24 httplib2 = None
25
26
27 try:
28 import pycurl
29 except ImportError:
30 pycurl = None
31
32 USER_AGENT = "python-openid/%s (%s)" % (openid.__version__, sys.platform)
33
34 -def fetch(url, body=None, headers=None):
35 """Invoke the fetch method on the default fetcher. Most users
36 should need only this method.
37
38 @raises Exception: any exceptions that may be raised by the default fetcher
39 """
40 fetcher = getDefaultFetcher()
41 return fetcher.fetch(url, body, headers)
42
44 """Create a default HTTP fetcher instance
45
46 prefers Curl to urllib2."""
47 if pycurl is None:
48 fetcher = Urllib2Fetcher()
49 else:
50 fetcher = CurlHTTPFetcher()
51
52 return fetcher
53
54
55
56
57 _default_fetcher = None
58
72
74 """Set the default fetcher
75
76 @param fetcher: The fetcher to use as the default HTTP fetcher
77 @type fetcher: HTTPFetcher
78
79 @param wrap_exceptions: Whether to wrap exceptions thrown by the
80 fetcher wil HTTPFetchingError so that they may be caught
81 easier. By default, exceptions will be wrapped. In general,
82 unwrapped fetchers are useful for debugging of fetching errors
83 or if your fetcher raises well-known exceptions that you would
84 like to catch.
85 @type wrap_exceptions: bool
86 """
87 global _default_fetcher
88 if fetcher is None or not wrap_exceptions:
89 _default_fetcher = fetcher
90 else:
91 _default_fetcher = ExceptionWrappingFetcher(fetcher)
92
94 """Whether the currently set HTTP fetcher is a Curl HTTP fetcher."""
95 return isinstance(getDefaultFetcher(), CurlHTTPFetcher)
96
98 """XXX document attributes"""
99 headers = None
100 status = None
101 body = None
102 final_url = None
103
104 - def __init__(self, final_url=None, status=None, headers=None, body=None):
109
111 return "<%s status %s for %s>" % (self.__class__.__name__,
112 self.status,
113 self.final_url)
114
116 """
117 This class is the interface for openid HTTP fetchers. This
118 interface is only important if you need to write a new fetcher for
119 some reason.
120 """
121
122 - def fetch(self, url, body=None, headers=None):
123 """
124 This performs an HTTP POST or GET, following redirects along
125 the way. If a body is specified, then the request will be a
126 POST. Otherwise, it will be a GET.
127
128
129 @param headers: HTTP headers to include with the request
130 @type headers: {str:str}
131
132 @return: An object representing the server's HTTP response. If
133 there are network or protocol errors, an exception will be
134 raised. HTTP error responses, like 404 or 500, do not
135 cause exceptions.
136
137 @rtype: L{HTTPResponse}
138
139 @raise Exception: Different implementations will raise
140 different errors based on the underlying HTTP library.
141 """
142 raise NotImplementedError
143
145 return url.startswith('http://') or url.startswith('https://')
146
148 """Exception that is wrapped around all exceptions that are raised
149 by the underlying fetcher when using the ExceptionWrappingFetcher
150
151 @ivar why: The exception that caused this exception
152 """
154 Exception.__init__(self, why)
155 self.why = why
156
158 """Fetcher that wraps another fetcher, causing all exceptions
159
160 @cvar uncaught_exceptions: Exceptions that should be exposed to the
161 user if they are raised by the fetch call
162 """
163
164 uncaught_exceptions = (SystemExit, KeyboardInterrupt, MemoryError)
165
167 self.fetcher = fetcher
168
169 - def fetch(self, *args, **kwargs):
170 try:
171 return self.fetcher.fetch(*args, **kwargs)
172 except self.uncaught_exceptions:
173 raise
174 except:
175 exc_cls, exc_inst = sys.exc_info()[:2]
176 if exc_inst is None:
177
178 exc_inst = exc_cls
179
180 raise HTTPFetchingError(why=exc_inst)
181
183 """An C{L{HTTPFetcher}} that uses urllib2.
184 """
185 - def fetch(self, url, body=None, headers=None):
186 if not _allowedURL(url):
187 raise ValueError('Bad URL scheme: %r' % (url,))
188
189 if headers is None:
190 headers = {}
191
192 headers.setdefault(
193 'User-Agent',
194 "%s Python-urllib/%s" % (USER_AGENT, urllib2.__version__,))
195
196 req = urllib2.Request(url, data=body, headers=headers)
197 try:
198 f = urllib2.urlopen(req)
199 try:
200 return self._makeResponse(f)
201 finally:
202 f.close()
203 except urllib2.HTTPError, why:
204 try:
205 return self._makeResponse(why)
206 finally:
207 why.close()
208
210 resp = HTTPResponse()
211 resp.body = urllib2_response.read()
212 resp.final_url = urllib2_response.geturl()
213 resp.headers = dict(urllib2_response.info().items())
214
215 if hasattr(urllib2_response, 'code'):
216 resp.status = urllib2_response.code
217 else:
218 resp.status = 200
219
220 return resp
221
223 """
224 This exception is raised by the C{L{CurlHTTPFetcher}} when it
225 encounters an exceptional situation fetching a URL.
226 """
227 pass
228
229
231 """
232 An C{L{HTTPFetcher}} that uses pycurl for fetching.
233 See U{http://pycurl.sourceforge.net/}.
234 """
235 ALLOWED_TIME = 20
236
238 HTTPFetcher.__init__(self)
239 if pycurl is None:
240 raise RuntimeError('Cannot find pycurl library')
241
243 header_file.seek(0)
244
245
246 unused_http_status_line = header_file.readline()
247 lines = [line.strip() for line in header_file]
248
249
250 empty_line = lines.pop()
251 if empty_line:
252 raise HTTPError("No blank line at end of headers: %r" % (line,))
253
254 headers = {}
255 for line in lines:
256 try:
257 name, value = line.split(':', 1)
258 except ValueError:
259 raise HTTPError(
260 "Malformed HTTP header line in response: %r" % (line,))
261
262 value = value.strip()
263
264
265 name = name.lower()
266 headers[name] = value
267
268 return headers
269
271
272
273 return _allowedURL(url)
274
275 - def fetch(self, url, body=None, headers=None):
276 stop = int(time.time()) + self.ALLOWED_TIME
277 off = self.ALLOWED_TIME
278
279 if headers is None:
280 headers = {}
281
282 headers.setdefault('User-Agent',
283 "%s %s" % (USER_AGENT, pycurl.version,))
284
285 header_list = []
286 if headers is not None:
287 for header_name, header_value in headers.iteritems():
288 header_list.append('%s: %s' % (header_name, header_value))
289
290 c = pycurl.Curl()
291 try:
292 c.setopt(pycurl.NOSIGNAL, 1)
293
294 if header_list:
295 c.setopt(pycurl.HTTPHEADER, header_list)
296
297
298 if body is not None:
299 c.setopt(pycurl.POST, 1)
300 c.setopt(pycurl.POSTFIELDS, body)
301
302 while off > 0:
303 if not self._checkURL(url):
304 raise HTTPError("Fetching URL not allowed: %r" % (url,))
305
306 data = cStringIO.StringIO()
307 response_header_data = cStringIO.StringIO()
308 c.setopt(pycurl.WRITEFUNCTION, data.write)
309 c.setopt(pycurl.HEADERFUNCTION, response_header_data.write)
310 c.setopt(pycurl.TIMEOUT, off)
311 c.setopt(pycurl.URL, openid.urinorm.urinorm(url))
312
313 c.perform()
314
315 response_headers = self._parseHeaders(response_header_data)
316 code = c.getinfo(pycurl.RESPONSE_CODE)
317 if code in [301, 302, 303, 307]:
318 url = response_headers.get('location')
319 if url is None:
320 raise HTTPError(
321 'Redirect (%s) returned without a location' % code)
322
323
324 c.setopt(pycurl.POST, 0)
325
326
327
328 else:
329 resp = HTTPResponse()
330 resp.headers = response_headers
331 resp.status = code
332 resp.final_url = url
333 resp.body = data.getvalue()
334 return resp
335
336 off = stop - int(time.time())
337
338 raise HTTPError("Timed out fetching: %r" % (url,))
339 finally:
340 c.close()
341
343 """A fetcher that uses C{httplib2} for performing HTTP
344 requests. This implementation supports HTTP caching.
345
346 @see: http://bitworking.org/projects/httplib2/
347 """
348
350 """@param cache: An object suitable for use as an C{httplib2}
351 cache. If a string is passed, it is assumed to be a
352 directory name.
353 """
354 if httplib2 is None:
355 raise RuntimeError('Cannot find httplib2 library. '
356 'See http://bitworking.org/projects/httplib2/')
357
358 super(HTTPLib2Fetcher, self).__init__()
359
360
361 self.httplib2 = httplib2.Http(cache)
362
363
364
365 self.httplib2.force_exception_to_status_code = False
366
367 - def fetch(self, url, body=None, headers=None):
368 """Perform an HTTP request
369
370 @raises Exception: Any exception that can be raised by httplib2
371
372 @see: C{L{HTTPFetcher.fetch}}
373 """
374 if body:
375 method = 'POST'
376 else:
377 method = 'GET'
378
379
380
381 if not (url.startswith('http://') or url.startswith('https://')):
382 raise ValueError('URL is not a HTTP URL: %r' % (url,))
383
384 httplib2_response, content = self.httplib2.request(
385 url, method, body=body, headers=headers)
386
387
388
389
390
391
392
393 try:
394 final_url = httplib2_response['content-location']
395 except KeyError:
396
397 assert not httplib2_response.previous
398
399
400 assert httplib2_response.status != 200
401 final_url = url
402
403 return HTTPResponse(
404 body=content,
405 final_url=final_url,
406 headers=dict(httplib2_response.items()),
407 status=httplib2_response.status,
408 )
409