# Written by Bram Cohen
# multitracker extensions by John Hoffman
# Modified by Cameron Dale
# see LICENSE.txt for license information
#
# $Id: makemetafile.py 261 2007-08-17 04:22:48Z camrdale-guest $

"""Create a torrent file or data structure.

@type logger: C{logging.Logger}
@var logger: the logger to send all log messages to for this module
@type defaults: C{list} of (C{string}, unknown, C{string})
@var defaults: the default configuration variables, including descriptions
@type default_piece_len_exp: C{int}
@var default_piece_len_exp: the exponent of the default piece size to use
@type ignore: C{list} of C{string}
@var ignore: file names to ignore when creating torrents

"""

from os.path import getsize, split, join, abspath, isdir
from os import listdir
from sha import sha
from copy import copy
from string import strip
from DebTorrent.bencode import bencode
from btformats import check_info
from threading import Event, Thread
from time import time
from traceback import print_exc
from DebTorrent.zurllib import urlopen
from gzip import GzipFile
from bz2 import decompress
from StringIO import StringIO
from re import subn
import binascii, logging
try:
    from sys import getfilesystemencoding
    ENCODING = getfilesystemencoding()
except:
    from sys import getdefaultencoding
    ENCODING = getdefaultencoding()

logger = logging.getLogger('DebTorrent.BT1.makemetafile')

defaults = [
    ('announce_list', '',
        'a list of announce URLs - explained below'),
    ('deb_mirrors', '',
        'a list of mirror URLs - explained below'),
    ('piece_size_pow2', 0,
        "which power of 2 to set the piece size to (0 = automatic)"),
    ('comment', '',
        "optional human-readable comment to put in .dtorrent"),
    ('filesystem_encoding', '',
        "optional specification for filesystem encoding " +
        "(set automatically in recent Python versions)"),
    ('target', '',
        "optional target file for the torrent"),
    ('pieces_file', '', 'the file that contains the sub-package piece information'),
    ('separate_all', 0, 'create a separate torrent for the architecture:all packages'),
    ]

default_piece_len_exp = 18

ignore = ['core', 'CVS']

def print_announcelist_details():
    """Print the configuration options for the announce list and deb mirrors."""
    print ('    announce_list = optional list of redundant/backup tracker URLs, in the format:')
    print ('           url[,url...][|url[,url...]...]')
    print ('                where URLs separated by commas are all tried first')
    print ('                before the next group of URLs separated by the pipe is checked.')
    print ("                If none is given, it is assumed you don't want one in the metafile.")
    print ('                If announce_list is given, clients which support it')
    print ('                will ignore the <announce> value.')
    print ('           Examples:')
    print ('                http://tracker1.com|http://tracker2.com|http://tracker3.com')
    print ('                     (tries trackers 1-3 in order)')
    print ('                http://tracker1.com,http://tracker2.com,http://tracker3.com')
    print ('                     (tries trackers 1-3 in a randomly selected order)')
    print ('                http://tracker1.com|http://backup1.com,http://backup2.com')
    print ('                     (tries tracker 1 first, then tries between the 2 backups randomly)')
    print ('')
    print ('    deb_mirrors = optional list of mirror URLs, in the format:')
    print ('            url[|url...]')
    
def uniconvertl(l, e):
    """Convert a list of strings to Unicode.
    
    @type l: C{list} of C{string}
    @param l: the strings to convert to unicode
    @type e: C{string}
    @param e: the encoding to use for converting the input data
    @rtype: C{list} of C{string}
    @return: the converted strings encoded in UTF-8
    @raise UnicodeError: if a conversion error occurs
    
    """
    
    r = []
    try:
        for s in l:
            r.append(uniconvert(s, e))
    except UnicodeError:
        raise UnicodeError('bad filename: '+join(l))
    return r

def uniconvert(s, e = None):
    """Convert a string to Unicode.
    
    @type s: C{string}
    @param s: the string to convert to unicode
    @type e: C{string}
    @param e: the encoding to use for converting the input data
        (optional, defaults to the current file system encoding, or ASCII if
        it cannot be determined)
    @rtype: C{string}
    @return: the converted string encoded in UTF-8
    @raise UnicodeError: if a conversion error occurs
    
    """
    
    if not e:
        e = ENCODING
    if not e:
        e = 'ascii'
    
    try:
        s = unicode(s,e)
    except UnicodeError:
        raise UnicodeError('bad filename: '+s)
    return s.encode('utf-8')

def convert_all(f):
    """Find the architecture and replace it with 'all'.
    
    @type f: C{string}
    @param f: the string to search and replace the architecture in
    @rtype: C{string}
    @return: the converted string
    
    """
    
    (f_all, n) = subn(r'binary-[a-zA-Z0-9]+([^a-zA-Z0-9]?)', r'binary-all\1', f)
    if n == 0:
        # Otherwise add '-all' before the extension
        (f_all, n) = subn(r'\.([^.]*)$', r'-all.\1', f)
        if n == 0:
            # Otherwise add '-all' to the end
            f_all = f + '-all'
    return f_all

def make_meta_file(file, url, params = {}, progress = lambda x: None):
    """Create the torrent files from a Packages file.
    
    @type file: C{string}
    @param file: the Packages file to parse to create the torrent
    @type url: C{string}
    @param url: the announce address to use
    @type params: C{dictionary}
    @param params: the command-line parameters to use
    @type progress: C{method}
    @param progress: report the progress of the creation
    
    """
    
    if params.has_key('piece_size_pow2'):
        piece_len_exp = params['piece_size_pow2']
    else:
        piece_len_exp = default_piece_len_exp
    if params.has_key('target') and params['target'] != '':
        f = params['target']
    else:
        a, b = split(file)
        if b == '':
            f = a + '.dtorrent'
            name = a
        else:
            f = join(a, b + '.dtorrent')
            name = b
            
    if piece_len_exp == 0:  # automatic
        size = calcsize(file)
        if   size > 8L*1024*1024*1024:   # > 8 gig =
            piece_len_exp = 21          #   2 meg pieces
        elif size > 2*1024*1024*1024:   # > 2 gig =
            piece_len_exp = 20          #   1 meg pieces
        elif size > 512*1024*1024:      # > 512M =
            piece_len_exp = 19          #   512K pieces
        elif size > 64*1024*1024:       # > 64M =
            piece_len_exp = 18          #   256K pieces
        elif size > 16*1024*1024:       # > 16M =
            piece_len_exp = 17          #   128K pieces
        elif size > 4*1024*1024:        # > 4M =
            piece_len_exp = 16          #   64K pieces
        else:                           # < 4M =
            piece_len_exp = 15          #   32K pieces
    piece_length = 2 ** piece_len_exp

    encoding = None
    if params.has_key('filesystem_encoding'):
        encoding = params['filesystem_encoding']

    (info, info_all) = makeinfo(file, piece_length, encoding, progress, 
                                params['separate_all'], params['pieces_file'])

    if info:
        create_file(f, info, url, uniconvert(name, encoding), params)
        
    if info_all:
        create_file(convert_all(f), info_all, url, uniconvert(convert_all(name), encoding), params)
        
def create_file(f, info, url, name, params):
    """Actually write the torrent data to a file.
    
    @type f: C{string}
    @param f: the file name to write
    @type info: C{dictionary}
    @param info: the torrent data to write
    @type url: C{string}
    @param url: the announce address for the torrent
    @type name: C{string}
    @param name: the internal name of the torrent
    @type params: C{dictionary}
    @param params: the command-line parameters
   
    """
    
    check_info(info)
    h = open(f, 'wb')
    data = {'info': info, 'announce': strip(url), 
        'name': name,
        'creation date': long(time())}
    
    if params.has_key('comment') and params['comment']:
        data['comment'] = params['comment']
        
    if params.has_key('real_announce_list'):    # shortcut for progs calling in from outside
        data['announce-list'] = params['real_announce_list']
    elif params.has_key('announce_list') and params['announce_list']:
        l = []
        for tier in params['announce_list'].split('|'):
            l.append(tier.split(','))
        data['announce-list'] = l
        
    if params.has_key('real_deb_mirrors'):    # shortcut for progs calling in from outside
        data['deb_mirrors'] = params['real_deb_mirrors']
    elif params.has_key('deb_mirrors') and params['deb_mirrors']:
        data['deb_mirrors'] = params['deb_mirrors'].split('|')
        
    h.write(bencode(data))
    h.close()

def calcsize(file):
    """Calculate the size of a file/directory.
    
    @type file: C{string}
    @param file: the file/directory to calculate the size of
    @rtype: C{long}
    @return: the size of the file/directory
    
    """
    
    if not isdir(file):
        return getsize(file)
    total = 0L
    for s in subfiles(abspath(file)):
        total += getsize(s[1])
    return total

def getsubpieces(file, pieces_file = ''):
    """Retrieve the sub-package piece imformation for the Packages file.
    
    @type file: C{string}
    @param file: the Packages file name to retrieve piece information for
    @type pieces_file: C{string}
    @param pieces_file: the file that contains the piece information
        (optional, defaults to retrieving the info from the web)
    @rtype: C{dictionary}
    @return: the piece info, keys are the file names, values are tuples of 
        a list of piece SHA1 hashes and a list of piece sizes

    """
    
    pieces = {}
    packages = 0
    piece_url = ''
    
    if pieces_file:
        try:
            f = open(pieces_file)
        except:
            logger.exception('sub-pieces file not found: '+pieces_file)
            return {}
    elif 'dists' in file.split('_'):
        try:
            parts = file.split('_')
            try:
                parts[parts.index('stable', parts.index('dists'))] = 'etch'
            except:
                pass
            try:
                parts[parts.index('testing', parts.index('dists'))] = 'lenny'
            except:
                pass
            try:
                parts[parts.index('unstable', parts.index('dists'))] = 'sid'
            except:
                pass
            piece_url = 'http://merkel.debian.org/~ajt/extrapieces/dists_'
            piece_url += '_'.join(parts[parts.index('dists')+1:])
            if piece_url.endswith('.gz'):
                piece_url = piece_url[:-3]
            if piece_url.endswith('.bz2'):
                piece_url = piece_url[:-4]
            piece_url += '-extrapieces.gz'
            piece_file = urlopen(piece_url)
            piece_data = piece_file.read()
            try:
                piece_file.close()
            except:
                pass
            f = piece_data.split('\n')
        except:
            logger.exception('sub-pieces URL not working: '+piece_url)
            return {}
    else:
        logger.warning('unable to find sub-pieces data')
        return {}

    p = [None, [], []]
    read_data = False
    for line in f:
        line = line.rstrip()
        if line == "":
            if (p[0] and p[1] and p[2]):
                pieces[p[0]] = (p[1], p[2])
                packages += 1
                #progress(packages)
            p = [None, [], []]
            read_data = False
        if read_data == True and line[:1] != " ":
            read_data = False
        if line[:9] == "Filename:":
            p[0] = line[10:]
        if line == "SHA1-Pieces:":
            read_data = True
        if read_data == True and line[:1] == " ":
            p[1].append(binascii.a2b_hex(line[1:41]))
            p[2].append(int(line[42:]))

    try:
        f.close()
    except:
        pass

    logger.info('successfully retrieved sub-piece data for '+str(len(pieces))+' files')

    return pieces

def getpieces(f, encoding = None, progress = lambda x: None, separate_all = 0, sub_pieces = {}):
    """Extract the piece information from the Packages file.
    
    @type f: C{iterable}
    @param f: the already opened file or file data as a list of strings
    @type encoding: C{string}
    @param encoding: the encoding to use for the file names
        (optional, defaults to the default encoding, or ASCII)
    @type progress: C{method}
    @param progress: the method to call with updates on the progress
        (optional, defaults to not printing progress updates)
    @type separate_all: C{boolean}
    @param separate_all: whether to separate the architecture:all packages into
        a separate torrent (optional, defaults to False)
    @type sub_pieces: C{dictionary}
    @param sub_pieces: the sub-package piece info, keys are the file names,
        values are tuples of a list of piece SHA1 hashes and a list of piece
        sizes (optional, defaults to not using sub-package pieces)
    @rtype: (C{dictionary}, C{dictionary})
    @return: the two torrents, the second is the architecture:all one, if that
        was requested, otherwise it is None
    
    """
    
    if not encoding:
        encoding = ENCODING
    if not encoding:
        encoding = 'ascii'
    
    pieces = ([], [])
    lengths = ([], [])
    fs = ([], [])
    packages = [0, 0]
    info = None
    info_all = None
    
    p = [None, None, None, None, None, None]
    for line in f:
        line = line.rstrip()
        if line == "":
            if (p[0] and p[1] and p[2]):
                # Check which torrent to add the info to
                all = 0
                if (separate_all and p[5] == 'all'):
                    all = 1

                if sub_pieces.has_key(p[1]):
                    lengths[all].extend(sub_pieces[p[1]][1])
                    pieces[all].extend(sub_pieces[p[1]][0])
                else:
                    lengths[all].append(p[0])
                    pieces[all].append(p[2])

                path = []
                while p[1]:
                    p[1],d = split(p[1])
                    path.insert(0,d)
                fs[all].append({'length': p[0], 'path': uniconvertl(path, encoding)})
                packages[all] += 1
                progress(packages[0] + packages[1])
            p = [None, None, None, None, None, None]
        if line[:9] == "Filename:":
            p[1] = line[10:]
        if line[:5] == "Size:":
            p[0] = long(line[6:])
        if line[:5] == "SHA1:":
            p[2] = binascii.a2b_hex(line[6:])
        if line[:8] == "Package:":
            p[3] = line[9:]
        if line[:8] == "Version:":
            p[4] = line[9:]
        if line[:13] == "Architecture:":
            p[5] = line[14:]

    if packages[0] > 0:
        info = {'pieces': ''.join(pieces[0]), 'piece lengths': lengths[0], 'files': fs[0]}
        logger.info('got metainfo for torrent of '+str(len(pieces[0]))+
                    ' pieces for '+str(len(fs[0]))+' files')
    if packages[1] > 0:
        info_all = {'pieces': ''.join(pieces[1]), 'piece lengths': lengths[1], 'files': fs[1]}
        logger.info('got metainfo for torrent of '+str(len(pieces[0]))+
                    ' pieces for '+str(len(fs[0]))+' files')

    return (info, info_all)

def makeinfo(file, piece_length, encoding, progress, separate_all = 0, pieces_file = ''):
    """
    
    @type file: C{string}
    @param file: the file name of the Packages file to make into a torrent
    @type piece_length: C{int}
    @param piece_length: not used
    @type encoding: C{string}
    @param encoding: the encoding to use for the file names
    @type progress: C{method}
    @param progress: the method to call with updates on the progress
    @type separate_all: C{boolean}
    @param separate_all: whether to separate the architecture:all packages into
        a separate torrent (optional, defaults to False)
    @type pieces_file: C{string}
    @param pieces_file: the file that contains the piece information
        (optional, defaults to retrieving the info from the web)
    @rtype: (C{dictionary}, C{dictionary})
    @return: the two torrents, the second is the architecture:all one, if that
        was requested, otherwise it is None
    
    """

    sub_pieces = getsubpieces(file, pieces_file)

    file = abspath(file)
    f = open(file)
    (info, info_all) = getpieces(f, encoding, progress, separate_all = separate_all, sub_pieces = sub_pieces)
    f.close()
    
    return (info, info_all)

def subfiles(d):
    """Process a directory structure to find all the files in it.
    
    Files in a directory are parsed first before the sub-directory files.
    
    @type d: C{string}
    @param d: the top-level directory to start at
    @rtype: C{list} of (C{list} of C{string}, C{string})
    @return: all the files found in the directory, both as a path list and a
        file name
    
    """
    
    r = []
    stack = [([], d)]
    while len(stack) > 0:
        p, n = stack.pop()
        if isdir(n):
            for s in listdir(n):
                if s not in ignore and s[:1] != '.':
                    stack.append((copy(p) + [s], join(n, s)))
        else:
            r.append((p, n))
    return r


def completedir(dir, url, params = {}, vc = lambda x: None, fc = lambda x: None):
    """Create a torrent for each file in a directory.
    
    Does not recurse into sub-directories.
    
    @type dir: C{string}
    @param dir: the directory to find files in
    @type url: C{string}
    @param url: the announce address to use for the torrents
    @type params: C{dictionary}
    @param params: the configuration options (optional, defaults to None)
    @type vc: C{method}
    @param vc: progress report while the torrent generation is underway
    @type fc: C{method}
    @param fc: progress report when a new torrent generation is started
    
    """
    
    files = listdir(dir)
    files.sort()
    ext = '.dtorrent'
    if params.has_key('target'):
        target = params['target']
    else:
        target = ''

    togen = []
    for f in files:
        if f[-len(ext):] != ext and (f + ext) not in files:
            togen.append(join(dir, f))
        
    for i in togen:
        fc(i)
        try:
            t = split(i)[-1]
            if t not in ignore and t[0] != '.':
                if target != '':
                    params['target'] = join(target,t+ext)
                make_meta_file(i, url, params, progress = vc)
        except ValueError:
            print_exc()

class TorrentCreator:
    """Create a torrent metainfo from a downloaded Packages file (threaded).
    
    """
    
    def __init__(self, path, data, callback, sched, separate_all = 0):
        """Process a downloaded Packages file and start the torrent making thread.
        
        @type path: C{list} of C{string}
        @param path: the path of the file to download, starting with the mirror name
        @type data: C{string}
        @param data: the downloaded Packages file
        @type callback: C{method}
        @param callback: the method to call with the torrent when it has been created
        @type sched: C{method}
        @param sched: the method to call to schedule future invocation of a function
        @type separate_all: C{boolean}
        @param separate_all: whether to separate the architecture:all packages into
            a separate torrent (optional, defaults to False)
        
        """

        self.path = path
        self.data = data
        self.callback = callback
        self.sched = sched
        self.separate_all = separate_all
        self.name = '_'.join(self.path[:-1])
        self.responses = []

        # Create and start the thread to create the torrent metainfo
        logger.debug('starting thread to create torrent for: '+self.name)
        rq = Thread(target = self._create, name = 'TorrentCreator('+self.name+')')
        rq.setDaemon(False)
        rq.start()
    
    def _create(self):
        """Process a downloaded Packages file and start a torrent."""

        h = []
        try:
            # Decompress the data
            if self.path[-1].endswith('.gz'):
                compressed = StringIO(self.data)
                f = GzipFile(fileobj = compressed)
                self.data = f.read()
            elif self.path[-1].endswith('.bz2'):
                self.data = decompress(self.data)
            
            assert self.data[:8] == "Package:"
            h = self.data.split('\n')
            self.data = ''
        except:
            logger.warning('Packages file is not in the correct format')
            self.data = ''
            del h[:]
            self.sched(self._finished)
            return

        logger.debug('Packages file successfully decompressed')
        sub_pieces = getsubpieces('_'.join(self.path))

        (info, info_all) = getpieces(h, separate_all = self.separate_all, sub_pieces = sub_pieces)
        del h[:]
        
        mirror = []
        if self.path.count('dists'):
            mirror.append('http://' + '/'.join(self.path[:self.path.index('dists')]) + '/')
            
        name = self.name
        if info and self.separate_all in (0, 2, 3):
            response = {'info': info,
                        'name': uniconvert(name)}
            if mirror:
                response['deb_mirrors'] = mirror
            self.responses.append((response, name))

        name = convert_all(self.name)
        if info_all and self.separate_all in (1, 3):
            response = {'info': info,
                        'name': uniconvert(name)}
            if mirror:
                response['deb_mirrors'] = mirror
            self.responses.append((response, name))
        
        self.sched(self._finished)

    def _finished(self):
        """Wrap up the creation and call the callback function."""
        
        for (response, name) in self.responses:
            self.callback(response, name, self.path)
        
        del self.responses[:]
