#!/usr/bin/env python # Copyright 1999 by eGroups, Inc. # # All Rights Reserved # # Permission to use, copy, modify, and distribute this software and # its documentation for any purpose and without fee is hereby # granted, provided that the above copyright notice appear in all # copies and that both that copyright notice and this permission # notice appear in supporting documentation, and that the name of # eGroups not be used in advertising or publicity pertaining to # distribution of the software without specific, written prior # permission. # # EGROUPS DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, # INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN # NO EVENT SHALL EGROUPS BE LIABLE FOR ANY SPECIAL, INDIRECT OR # CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS # OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, # NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. VERSION_STRING = '$Id: //depot/main/findmail/src/coroutine/corourl.py#7 $' import os import sys import string import exceptions import coro import urlparse import socket # # functions for fetching a url, that can be used in a coroutine # HTTP_VERSION = 'HTTP/1.0' HTTP_PORT = 80 CLIENT_VERSION = '1.0' # sentinel value USE_DEFAULT_TIMEOUT = -1 def geturl(url, timeout=None, connect_timeout=None): (scheme, host, path, param, query, frag) = urlparse.urlparse (url) if scheme != 'http': raise NameError, 'Invalid url <%s>, only http supported' % (scheme) request = urlparse.urlunparse(('', '', path, param, query, frag)) h = HTTP(host, timeout=timeout, connect_timeout=connect_timeout) h.putrequest ('GET', request) h.putheader ('Host', host) h.putheader ('User-Agent', 'coroutine_http/' + CLIENT_VERSION) h.endheaders () errcode, errmsg, headers, body = h.getreply() if errcode == 200: return body else: return None # # Each instance of the HTTP class is a single 1.0 connection # class HTTP_Error (exceptions.Exception): pass class HTTP: def __init__(self, host = None, port = None, timeout=None, connect_timeout=None): self._send_buffer = '' self._recv_buffer = '' self._socket = None self._timeout = timeout self._connect_timeout = connect_timeout if host is not None: self.connect(host, port) return None def _send(self): if self._socket is None: raise HTTP_Error, "uninitialized connection." while len(self._send_buffer): try: send_size = self._socket.send(self._send_buffer) except socket.error, why: raise HTTP_Error, 'socket error while sending: ' + str(why[0]) except coro.TimeoutError, why: raise HTTP_Error, 'timeout error while sending: ' + str(why[0]) if send_size: self._send_buffer = self._send_buffer[send_size:] else: raise HTTP_Error, 'socket error while sending' return None def _recv(self): # # read data until receiving an # recv_data = [] while 1: try: data_str = self._socket.recv(8192) except socket.error, why: raise HTTP_Error, 'socket error while recving: ' + str(why[0]) except coro.TimeoutError, why: raise HTTP_Error, 'timeout error while recving: ' + str(why[0]) if len(data_str): recv_data.append(data_str) else: break self._recv_buffer = string.join(recv_data, '') return None def send(self, buffer_str): self._send_buffer = self._send_buffer + buffer_str return None def connect(self, host, port=None, timeout=USE_DEFAULT_TIMEOUT, connect_timeout=USE_DEFAULT_TIMEOUT): if port is None: # # now port specified, determine if it is part of the host name. # colon = string.find(host, ':') if colon > -1: port = host[colon+1:] host = host[:colon] try: port = string.atoi(port) except: raise coro.CoroutineSocketError, "nonnumeric port" else: port = HTTP_PORT # use init values if no timeouts have been passed in if timeout == USE_DEFAULT_TIMEOUT: timeout = self._timeout if connect_timeout == USE_DEFAULT_TIMEOUT: connect_timeout = self._connect_timeout self._socket = coro.make_socket(socket.AF_INET, socket.SOCK_STREAM, timeout=timeout, connect_timeout=connect_timeout) try: self._socket.connect((host, port)) except coro.TimeoutError, why: raise HTTP_Error, 'timeout error while connecting: ' + str(why[0]) def close(self): if self._socket is not None: self._socket.close() self._socket = None self._send_buffer = '' self._recv_buffer = '' return None def putrequest(self, request, selector = '/'): if len(self._send_buffer): raise HTTP_Error, "http request must come before http headers" str = "%s %s %s\r\n" % (request, selector, HTTP_VERSION) self.send(str) return None def putheader(self, header, content): str = "%s: %s\r\n" % (header, content) self.send(str) return None def endheaders(self): str = "\r\n" self._send_buffer = self._send_buffer + str return None def _error(self, code = -1, msg = '', header = [], body = ''): return code, msg, header, body def getreply(self): try: self._send() self._recv() except HTTP_Error, error: return self._error(msg = error) # # break out the rest of the message # try: [ver, code, rest] = string.split(self._recv_buffer, None, 2) except ValueError: return self._error() if ver[:5] != 'HTTP/': return self._error() errcode = string.atoi(code) try: [msg, rest] = string.split(rest, "\n", 1) except ValueError: return self._error(code = errcode) else: errmsg = string.strip(msg) # We have to be careful here, some servers don't properly # end lines with \r\n lines = string.split (rest, '\n') header = [] for x in range (len(lines)): line = string.strip (lines[x]) if not line: body = string.join (lines[x+1:], '\n') break else: header.append (line) return errcode, errmsg, header, body if __name__ == '__main__': def fetch (url): filename = url[string.rfind(url, '/')+1:] print 'starting %s...' % filename open (filename, 'wb').write (geturl (url)) print 'finished with %s' % filename import sys urls = sys.argv[1:] for url in sys.argv[1:]: coro.spawn (fetch, url) coro.event_loop (30.0)