# Copyright (C) 2006 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

"""The functionality for parsing a cvsps dump file"""

import time


class _PatchSet(object):
    """A single patches in a CVSPS dump file."""

    __slots__ = [
        'num',
        'ancestor_branch',
        'author',
        'branch',
        'date',
        'log',
        'members',
        'tag',
        'timestamp',
        'time_offset',
        ]

    def __init__(self, num):
        """Create a new one, by reading the number."""
        self.num = num
        self.ancestor_branch = None
        self.author = None
        self.branch = None
        self.date = None
        self.log = None
        self.members = None
        self.tag = None
        self.timestamp = None
        self.time_offset = None

    def __str__(self):
        return ('Patchset(%s, %s, %s, %s)'
                % (self.num, self.branch, self.author, self.date))


class Parser(object):
    """Parse the output of cvsps.

    This class maintains a state indicating where we are currently processing.
    """

    STATE_GENERAL = None
    STATE_DASHES = 'dashes'
    STATE_LOG = 'log'
    STATE_MEMBERS = 'members'

    PREFIX_ANCESTOR_BRANCH = 'Ancestor branch: '
    PREFIX_AUTHOR = 'Author: '
    PREFIX_BRANCH = 'Branch: '
    PREFIX_DASHES = '---------------------'
    PREFIX_DATE = 'Date: '
    PREFIX_LOG = 'Log:'
    PREFIX_MEMBERS = 'Members:'
    PREFIX_PATCHSET = 'PatchSet '
    PREFIX_TAG = 'Tag: '

    def __init__(self, cvsps_dump_file, encoding=None):
        """Pass in a dump file to parse."""
        self._cvsps_dump_file = cvsps_dump_file

        # The default _encoding is 'iso-8859-1'. Because that is the most
        # common encoding, and it always has a mapping. 'utf-8' might be
        # another possible encoding, but there will be some character
        # combinations that cannot exist
        self._encoding = encoding
        if not self._encoding:
            self._encoding = 'iso-8859-1'

        self._state = self.STATE_GENERAL
        self._patchset = None
        self._lastpatch = None
        self._pb = None

        self._patchsets = []

        # Just keep a dict of strings that have been parsed, so we can save
        # some memory for all of the duplicated strings.
        self._string_cache = {}
        self._cache_hits = 0

    def _cache(self, s):
        """Get a cached version of the string to decrease memory overhead."""
        return self._string_cache.setdefault(s, s)

    def _handle_ancestor_branch(self, line):
        """Handle and 'Author:' line"""
        assert line.startswith(self.PREFIX_ANCESTOR_BRANCH)
        assert self._patchset is not None
        self._patchset.ancestor_branch = line[len(self.PREFIX_ANCESTOR_BRANCH):].rstrip()

    def _handle_author(self, line):
        """Handle and 'Author:' line"""
        assert line.startswith(self.PREFIX_AUTHOR)
        assert self._patchset is not None
        author = line[len(self.PREFIX_AUTHOR):].rstrip()
        self._patchset.author = self._cache(author.decode(self._encoding))

    def _handle_branch(self, line):
        """Handle a 'Branch:' line"""
        assert line.startswith(self.PREFIX_BRANCH)
        assert self._patchset is not None
        branch = line[len(self.PREFIX_BRANCH):].rstrip()
        self._patchset.branch = self._cache(branch)

    def _handle_dashes(self, line):
        """Parse an all dashes line"""
        line = line.strip()
        # Make sure the line is all dashes
        assert line == '-'*len(line)
        assert self._state in (None, self.STATE_MEMBERS)
        self._state = self.STATE_DASHES

    def _handle_date(self, line):
        """Handle a 'Date:' line"""
        assert line.startswith(self.PREFIX_DATE)
        assert self._patchset is not None
        date = line[len(self.PREFIX_DATE):].rstrip()
        # XXX: These should all use UTC for the conversion, how to do that?
        time_tuple = time.strptime(date, '%Y/%m/%d %H:%M:%S')
        timestamp = round(time.mktime(time_tuple), 3)

        self._patchset.date = self._cache(date)
        self._patchset.timestamp = timestamp
        self._patchset.time_offset = 0

    def _handle_log(self, line):
        """Handle a 'Log:' line"""
        assert self._patchset is not None
        # Log: just starts the log information on the next few lines
        self._state = self.STATE_LOG
        # We shouldn't have seen Log: before.
        assert self._patchset.log is None
        self._patchset.log = []

    def _handle_log_line(self, line):
        assert self._patchset is not None
        assert self._patchset.log is not None
        self._patchset.log.append(line.decode(self._encoding))

    def _handle_members(self, line):
        """Handle a 'Members:' line"""
        assert self._patchset is not None
        # Log: just starts the log information on the next few lines
        self._state = self.STATE_MEMBERS
        # We shouldn't have seen Members: before.
        assert self._patchset.members is None
        self._patchset.members = []

    def _handle_member_line(self, line):
        """Handle a Members: entry"""
        assert line.startswith('\t')
        assert self._patchset is not None
        assert self._patchset.members is not None
        if ':' not in line:
            return
        fname, version = line[1:].rsplit(':', 1)
        fname = self._cache(fname)
        versions = version.split('->')
        assert len(versions) == 2
        version = self._cache(versions[-1].strip())
        self._patchset.members.append((fname, version))

    def _handle_patchset(self, line):
        """Parse a PatchSet line"""
        assert line.startswith(self.PREFIX_PATCHSET)

        if self._patchset is not None:
            # Process the old patchset
            self._patchsets.append(self._patchset)
            self._patchset = None
            if self._pb is not None:
                self._pb.update('reading patchsets', 0, len(self._patchsets))

        assert self._state in (self.STATE_DASHES,)

        # Reset the state for a new PatchSet
        patchset_num = int(line[len(self.PREFIX_PATCHSET):])
        self._patchset = _PatchSet(patchset_num)
        self._state = self.STATE_GENERAL

    def _handle_tag(self, line):
        """Handle a 'Tag:' line"""
        assert line.startswith(self.PREFIX_TAG)
        assert self._patchset is not None
        tag = line[len(self.PREFIX_TAG):].rstrip()
        if tag == '(none)':
            self._patchset.tag = None
        else:
            self._patchset.tag = tag

    def parse(self, pb=None):
        """Parse the file and yield information."""
        if self._patchsets != []:
            return self._patchsets

        self._pb = pb

        self._string_cache.clear()

        handlers = {
            self.PREFIX_ANCESTOR_BRANCH:self._handle_ancestor_branch,
            self.PREFIX_AUTHOR:self._handle_author,
            self.PREFIX_BRANCH:self._handle_branch,
            self.PREFIX_DASHES:self._handle_dashes,
            self.PREFIX_DATE:self._handle_date,
            self.PREFIX_LOG:self._handle_log,
            self.PREFIX_MEMBERS:self._handle_members,
            self.PREFIX_PATCHSET:self._handle_patchset,
            self.PREFIX_TAG:self._handle_tag,
        }
        for line in self._cvsps_dump_file:
            # first, check current state, if we are in log or members,
            # then we need to handle multiline entries

            if self._state == self.STATE_LOG:
                # end of log is indicated by an empty line, followed
                # by 'Members:'
                if (self._patchset.log[-1:] == ['\n']
                    and line.startswith(self.PREFIX_MEMBERS)):
                    # Removing the blank trailing line
                    self._patchset.log.pop()
                    # And continue on to the standard handler
                else:
                    self._handle_log_line(line)
                    continue

            elif self._state == self.STATE_MEMBERS:
                # All member lines start with a tab
                if line.startswith('\t'):
                    self._handle_member_line(line)
                    continue

            # Blank lines are ignored
            if not line or line == '\n':
                continue

            for prefix in handlers:
                if line.startswith(prefix):
                    handlers[prefix](line)
                    break
            else:
                assert False, 'Failed to process: %r' % (line,)

        # We've processed all the lines
        if self._patchset is not None:
            self._patchsets.append(self._patchset)
            self._patchset = None
        if self._pb is not None:
            self._pb.update('reading patchsets', 0, len(self._patchsets))

            self._pb.note('Read %s patchsets (string cache hits: %s, total: %s)',
                len(self._patchsets), self._cache_hits,
                len(self._string_cache))

        self._string_cache.clear()
        self._cache_hits = 0

        self._pb = None
        return self._patchsets

