cvs_id = "$Id: URLFetch.py,v 1.3 2004/04/22 18:41:17 jpakaste Exp $"

import httplib_async
import asyncore
import urlparse
import base64
import gzip
try:
    from cStringIO import StringIO
except:
    from StringIO import StringIO
from error import log, logtb, logparam
import straw
import time

def is_ip(host):
    parts = host.split(".")
    if len(parts) != 4:
        return 0
    for p in parts:
        try:
            np = int(p)
        except (ValueError, TypeError):
            return 0
        if not 0 <= np < 256:
            return 0
    return 1

class RequestSchemeException(Exception):
    def __init__(self, scheme):
        self.scheme = scheme
        self.args = {}

class Request:
    def __init__(self, host = None, port = None, path = None, ip = None,
                 headers = None, user = None, password = None, priority = None,
                 consumer = None, uri = None):
        self.host = host
        self.port = port
        self.path = path
        self.ip = ip
        self.headers = headers
        self.user = user
        self.password = password
        self.priority = priority
        self.consumer = consumer
        self.uri = uri

class ConnectionManager:
    def __init__(self):
        self._queue = []

    def request(self, uri, consumer, headers={}, user=None, password=None, priority=straw.NetworkConstants.PRIORITY_DEFAULT):
        uri = uri.strip()
        scheme, host, path, params, query, fragment = urlparse.urlparse(uri)
        if scheme != "http":
            raise RequestSchemeException(scheme)
        try:
            host, port = host.split(":", 1)
            port = int(port)
        except (TypeError, ValueError):
            port = 80 # default port

        if not path:
            path = "/"
        if params:
            path = path + ";" + params
        if query:
            path = path + "?" + query
        req = Request(host = host, port = port, path = path, headers = headers,
                      user = user, password = password, priority = priority, consumer = consumer, uri = uri)

        # no lookup necessary if host is an ip address.
        # if we are using a proxy, let it handle the lookup.
        if is_ip(host):
            req.ip = host
            self._queue_request(req)

        if straw.Config.get_instance().proxy_config.use:
            self._queue_request(req)
        else:
            try:
                straw.LookupManager.get_instance().lookup(
                    host, self._request_resolved, req)
            except straw.LookupManager.NameFormatException, e:
                consumer.http_failed(e)

    def _queue_request(self, req):
        i = 0
        while i < len(self._queue):
            if self._queue[i].priority > req.priority:
                self._queue.insert(i, req)
                return
            i += 1
            # from outside while
        self._queue.append(req)

    def _request_resolved(self, host, ip, req):
        if ip is not None and ip != "":
            req.ip = ip
            self._queue_request(req)
        else:
            req.consumer.http_failed("Host name lookup failed")

    def poll(self, timeout=0.1):
        config = straw.Config.get_instance()
        lookup_manager = straw.LookupManager.get_instance()
        # activate up to MAX_CONNECTIONS channels
        while self._queue and len(asyncore.socket_map) < straw.NetworkConstants.MAX_CONNECTIONS and not config.proxy_config.is_waiting:
            req = self._queue.pop(0)
            # has the user switched off the proxy after this request was queued
            if (not config.proxy_config.use) and (not req.ip):
                lookup_manager.lookup(req.host, self._request_resolved, req)
            else:
                #req.consumer.skip_poll()
                self.do_request(req)
        # keep the network running
        now = time.time()
        lookup_manager.poll(timeout)
        timeout -= (time.time() - now)
        if timeout > 0.0:
            asyncore.poll(timeout)
        # time out stuck consumers
        self.time_out_consumers()
        # return non-zero if we should keep polling
        return len(self._queue) or len(asyncore.socket_map)

    def time_out_consumers(self):
        now = time.time()
        for obj in asyncore.socket_map.values():
            pc = obj.consumer
            if now - pc.start_time > straw.NetworkConstants.MAX_DOWNLOAD_TIME:
                pc.time_exceeded()

    def do_request(self, req):
        proxy = None
        pc = straw.Config.get_instance().proxy_config
        if pc.use:
            proxy = pc.ip
            if pc.use_authentication:
                req.headers['Proxy-Authorization'] = ('Basic %s' % base64.encodestring('%s:%s' % (pc.user, pc.password)).strip())
        if req.user and req.password:
            req.headers['Authorization'] = 'Basic %s' % base64.encodestring('%s:%s' % (req.user, req.password)).strip()
        return ProxyConsumer(req, proxy=(proxy, pc.port))

connection_manager = ConnectionManager()

class Consumer:
    def __init__(self, host, ip, port, proxy=None):
        self.connection = httplib_async.HTTPConnection_async(host, ip, port, self, proxy=proxy)

    def request(self, path, headers={}, user=None, password=None):
        self.connection.request("GET", path, headers)
        self.connection.execute()

    def finished_callback(self):
        raise NotImplementedException

    def http_failed(self, exception):
        """Called by HTTPConnection_async when connection failed with exception"""
        raise exception

    def http_header(self, status, header):
        """Called by HTTPConnection_async with status and header"""
        if header.getheader('content-length') == '0':
            self.finished_callback()

    def http_redirect(self, location, permanent = 0):
        """Called by HTTPConnection_async with the new location in case of 301 or 302"""
        raise NotImplementedException

    def feed(self, data):
        """Called by HTTPConnection_async with (part of the) data, after http_header"""
        raise NotImplementedException

class ProxyConsumer(Consumer):
    def __init__(self, req, proxy=None):
        self.consumer = req.consumer
        self.data = ""
        self.header = None
        self.status = None
        self.req = req
        self.path = req.path
        self.finished = 0
        Consumer.__init__(self, req.host, req.ip, req.port, proxy)
        req.headers['Accept-encoding'] = 'gzip'
        req.headers['User-agent'] = 'Straw/%s' % straw.VERSION
        self.request(req.path, req.headers, req.user, req.password)
        self.start_time = time.time()

    def finished_callback(self):
        if not self.finished:
            self.finished = 1
            if self.header and self.header.getheader('content-encoding') == 'gzip':
                self.data = gzip.GzipFile(fileobj = StringIO(self.data)).read()
            self.consumer.http_results(self.status, self.header, self.data)

    def http_failed(self, exception):
        if not self.finished:
            self.finished = 1
            self.consumer.http_failed(exception)

    def http_header(self, status, header):
        self.status = status
        self.header = header
        Consumer.http_header(self, status, header)

    def http_redirect(self, location, permanent = 0):
        assert type(location) == type(''), "Invalid redirect"
        if urlparse.urlparse(location)[0] != 'http':
            location = urlparse.urljoin(self.req.uri, location)
        if permanent:
            self.consumer.http_permanent_redirect(location)

        connection_manager.request(location, self.consumer, self.req.headers,
                                   self.req.user, self.req.password)

    def feed(self, data):
        self.data += data
        datalength = len(self.data)
        cl = self.header.getheader('content-length')
        if cl is not None and datalength >= int(cl):
        #if datalength >= int(self.header.getheader('content-length', 0)):
            self.connection.close()
            self.finished_callback()
        elif datalength >= straw.NetworkConstants.MAX_DOWNLOAD_SIZE:
            self.connection.close()
            self.http_failed("Maximum download file size exceeded")

    def http_close(self):
        if self.header and (len(self.data) <
                            int(self.header.getheader('content-length', 0))):
            msg = "Feed is empty."
            self.http_failed(msg)
        else:
            self.finished_callback()

    def time_exceeded(self):
        self.connection.close()
        self.http_failed("Maximum download time exceeded")
