# Written by John Hoffman
# Modified by Cameron Dale
# see LICENSE.txt for license information
#
# $Id: zurllib.py 106 2007-06-13 22:55:12Z camrdale-guest $

"""A high-level fetcher for WWW data, similar to the urllib module.

@type VERSION: C{string}
@var VERSION: the User-Agent header to send on all connections
@type MAX_REDIRECTS: C{int}
@var MAX_REDIRECTS: the maximum number of redirects to follow

"""

from httplib import HTTPConnection, HTTPSConnection, HTTPException
from urlparse import urlparse
from bencode import bdecode
import socket
from gzip import GzipFile
from StringIO import StringIO
from urllib import quote, unquote
from __init__ import product_name, version_short

VERSION = product_name+'/'+version_short
MAX_REDIRECTS = 10


class btHTTPcon(HTTPConnection):
    """Attempt to add automatic connection timeout to HTTPConnection."""
    
    def connect(self):
        """Redefine the connect to include a socket timeout."""
        HTTPConnection.connect(self)
        try:
            self.sock.settimeout(30)
        except:
            pass

class btHTTPScon(HTTPSConnection):
    """Attempt to add automatic connection timeout to HTTPSConnection."""

    def connect(self):
        """Redefine the connect to include a socket timeout."""
        HTTPSConnection.connect(self)
        try:
            self.sock.settimeout(30)
        except:
            pass 

class urlopen:
    """Opens a URL for reading.
    
    @type tries: C{int}
    @ivar tries: the number of attempts to open it so far
    @type error_return: C{dictionary}
    @ivar error_return: the bencoded returned data if an error occurred
    @type connection: L{btHTTPcon} or L{btHTTPScon}
    @ivar connection: the connection to the server
    @type response: C{httplib.HTTPResponse}
    @ivar response: the response from the server
    @type ungzip: C{boolean}
    @ivar ungzip: whether to ungzip any gzipped data that is received
    
    """
    
    def __init__(self, url, ungzip = True):
        """Initialize the instance and call the open method.
        
        @type url: C{string}
        @param url: the URL to open
        @type ungzip: C{boolean}
        @param ungzip: whether to ungzip any gzipped data that is received
            (optional, defaults to True)
        
        """
        
        self.tries = 0
        self._open(url.strip())
        self.error_return = None
        self.ungzip = ungzip

    def _open(self, url):
        """Open a connection and request the URL, saving the response.
        
        @type url: C{string}
        @param url: the URL to open
        @raise IOError: if there was a problem with the URL, or if the 
            server returned an error
        
        """
        
        self.tries += 1
        if self.tries > MAX_REDIRECTS:
            raise IOError, ('http error', 500,
                            "Internal Server Error: Redirect Recursion")
        (scheme, netloc, path, pars, query, fragment) = urlparse(url)
        if scheme != 'http' and scheme != 'https':
            raise IOError, ('url error', 'unknown url type', scheme, url)
        url = path
        if pars:
            url += ';'+pars
        if query:
            url += '?'+query
#        if fragment:
        try:
            if scheme == 'http':
                self.connection = btHTTPcon(netloc)
            else:
                self.connection = btHTTPScon(netloc)
            self.connection.request('GET', url, None,
                                { 'User-Agent': VERSION,
                                  'Accept-Encoding': 'gzip' } )
            self.response = self.connection.getresponse()
        except HTTPException, e:
            raise IOError, ('http error', str(e))
        status = self.response.status
        if status in (301,302):
            try:
                self.connection.close()
            except:
                pass
            self._open(self.response.getheader('Location'))
            return
        if status != 200:
            try:
                data = self._read()
                d = bdecode(data)
                if d.has_key('failure reason'):
                    self.error_return = data
                    return
            except:
                pass
            raise IOError, ('http error', status, self.response.reason)

    def read(self):
        """Read the response data from the previous request.
        
        @rtype: C{string}
        @return: the response, or the error if an error occurred
        
        """
        if self.error_return:
            return self.error_return
        return self._read()

    def _read(self):
        """Read the response data and maybe decompress it.
        
        @rtype: C{string}
        @return: the processed response data
        
        """
        
        data = self.response.read()
        if self.ungzip and (self.response.getheader('Content-Type','').find('gzip') >= 0 or
                            self.response.getheader('Content-Encoding','').find('gzip') >= 0):
            try:
                compressed = StringIO(data)
                f = GzipFile(fileobj = compressed)
                data = f.read()
            except:
                raise IOError, ('http error', 'got corrupt response')
        return data

    def close(self):
        """Closes the connection to the server."""
        self.connection.close()
