from __future__ import with_statement
__license__   = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'

'''
Conversion to EPUB.
'''
import sys, textwrap, re, os, uuid
from itertools import cycle
from calibre.utils.config import Config, StringConfig
from calibre.utils.zipfile import ZipFile, ZIP_STORED
from calibre.ebooks.html import config as common_config, tostring
from lxml import etree

class DefaultProfile(object):
    
    flow_size            = sys.maxint
    screen_size          = None
    remove_special_chars = False
    remove_object_tags   = False
    
class PRS505(DefaultProfile):
    
    flow_size            = 270000
    screen_size          = (590, 765)
    remove_special_chars = re.compile(u'[\u200b\u00ad]')
    remove_object_tags   = True
        

PROFILES = {
            'PRS505' : PRS505,
            'None'   : DefaultProfile,
            }

def rules(stylesheets):
    for s in stylesheets:
        if hasattr(s, 'cssText'):
            for r in s:
                if r.type == r.STYLE_RULE:
                    yield r

def decrypt_font(key, path):
    raw = open(path, 'rb').read()
    crypt = raw[:1024]
    key = cycle(iter(key))
    decrypt = ''.join([chr(ord(x)^key.next()) for x in crypt])
    with open(path, 'wb') as f:
        f.write(decrypt)
        f.write(raw[1024:])

def process_encryption(encfile, opf):
    key = None
    m = re.search(r'(?i)(urn:uuid:[0-9a-f-]+)', open(opf, 'rb').read())
    if m:
        key = m.group(1)
        key = list(map(ord, uuid.UUID(key).bytes))
    try:
        root = etree.parse(encfile)
        for em in root.xpath('descendant::*[contains(name(), "EncryptionMethod")]'):
            algorithm = em.get('Algorithm', '')
            if algorithm != 'http://ns.adobe.com/pdf/enc#RC':
                return False
            cr = em.getparent().xpath('descendant::*[contains(name(), "CipherReference")]')[0]
            uri = cr.get('URI')
            path = os.path.abspath(os.path.join(os.path.dirname(encfile), '..', *uri.split('/')))
            if os.path.exists(path):
                decrypt_font(key, path)
        return True
    except:
        import traceback
        traceback.print_exc()
    return False

def initialize_container(path_to_container, opf_name='metadata.opf'):
    '''
    Create an empty EPUB document, with a default skeleton.
    '''
    CONTAINER='''\
<?xml version="1.0"?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
   <rootfiles>
      <rootfile full-path="%s" media-type="application/oebps-package+xml"/>
   </rootfiles>
</container>
    '''%opf_name
    zf = ZipFile(path_to_container, 'w')
    zf.writestr('mimetype', 'application/epub+zip', compression=ZIP_STORED)
    zf.writestr('META-INF/', '', 0700)
    zf.writestr('META-INF/container.xml', CONTAINER)
    return zf

def config(defaults=None, name='epub'):
    desc = _('Options to control the conversion to EPUB')
    if defaults is None:
        c = Config(name, desc)
    else:
        c = StringConfig(defaults, desc)
    
    c.update(common_config())
    c.remove_opt('output')
    c.remove_opt('zip')
    
    c.add_opt('output', ['-o', '--output'], default=None,
             help=_('The output EPUB file. If not specified, it is '
                    'derived from the input file name.'))
    c.add_opt('profile', ['--profile'], default='PRS505', choices=list(PROFILES.keys()),
              help=_('Profile of the target device this EPUB is meant for. '
                     'Set to None to create a device independent EPUB. '
                     'The profile is used for device specific restrictions '
                     'on the EPUB. Choices are: ')+str(list(PROFILES.keys())))
    c.add_opt('override_css', ['--override-css'], default=None,
              help=_('Either the path to a CSS stylesheet or raw CSS. '
                     'This CSS will override any existing CSS '
                     'declarations in the source files.'))
    structure = c.add_group('structure detection', 
                            _('Control auto-detection of document structure.'))
    structure('chapter', ['--chapter'], 
              default="//*[re:match(name(), 'h[1-2]') and "
              "re:test(., 'chapter|book|section|part', 'i')] | "
              "//*[@class = 'chapter']",
            help=_('''\
An XPath expression to detect chapter titles. The default is to consider <h1> or
<h2> tags that contain the words "chapter","book","section" or "part" as chapter titles as 
well as any tags that have class="chapter". 
The expression used must evaluate to a list of elements. To disable chapter detection,
use the expression "/". See the XPath Tutorial in the calibre User Manual for further
help on using this feature.
''').replace('\n', ' '))
    structure('chapter_mark', ['--chapter-mark'], choices=['pagebreak', 'rule', 'both', 'none'],
              default='pagebreak', 
              help=_('Specify how to mark detected chapters. A value of '
                     '"pagebreak" will insert page breaks before chapters. '
                     'A value of "rule" will insert a line before chapters. '
                     'A value of "none" will disable chapter marking and a '
                     'value of "both" will use both page breaks and lines '
                     'to mark chapters.'))
    structure('cover', ['--cover'], default=None,
              help=_('Path to the cover to be used for this book'))
    structure('prefer_metadata_cover', ['--prefer-metadata-cover'], default=False,
              action='store_true',
              help=_('Use the cover detected from the source file in preference '
                     'to the specified cover.'))
    structure('remove_first_image', ['--remove-first-image'], default=False,
              help=_('Remove the first image from the input ebook. Useful if '
                     'the first image in the source file is a cover and you '
                     'are specifying an external cover.'))
    structure('dont_split_on_page_breaks', ['--dont-split-on-page-breaks'], default=False,
              help=_('Turn off splitting at page breaks. Normally, input files '
                     'are automatically split at every page break into '
                     'two files. This gives an output ebook that can be parsed '
                     'faster and with less resources. However, splitting is '
                     'slow and if your source file contains a very large '
                     'number of page breaks, you should turn off splitting '
                     'on page breaks.'))
    structure('page', ['--page'], default=None,
              help=_('XPath expression to detect page boundaries for building '
                     'a custom pagination map, as used by AdobeDE. Default is '
                     'not to build an explicit pagination map.'))
    structure('page_names', ['--page-names'], default=None,
              help=_('XPath expression to find the name of each page in the '
                     'pagination map relative to its boundary element. '
                     'Default is to number all pages staring with 1.'))
    toc = c.add_group('toc', 
        _('''\
Control the automatic generation of a Table of Contents. If an OPF file is detected
and it specifies a Table of Contents, then that will be used rather than trying
to auto-generate a Table of Contents.
''').replace('\n', ' '))
    toc('max_toc_links', ['--max-toc-links'], default=50, 
        help=_('Maximum number of links to insert into the TOC. Set to 0 '
               'to disable. Default is: %default. Links are only added to the '
               'TOC if less than the --toc-threshold number of chapters were detected.'))
    toc('no_chapters_in_toc', ['--no-chapters-in-toc'], default=False,
        help=_("Don't add auto-detected chapters to the Table of Contents."))
    toc('toc_threshold', ['--toc-threshold'], default=6,
        help=_('If fewer than this number of chapters is detected, then links '
               'are added to the Table of Contents. Default: %default'))
    toc('level1_toc', ['--level1-toc'], default=None,
        help=_('XPath expression that specifies all tags that should be added '
               'to the Table of Contents at level one. If this is specified, '
               'it takes precedence over other forms of auto-detection.'))
    toc('level2_toc', ['--level2-toc'], default=None,
        help=_('XPath expression that specifies all tags that should be added '
               'to the Table of Contents at level two. Each entry is added '
               'under the previous level one entry.'))
    toc('level3_toc', ['--level3-toc'], default=None,
        help=_('XPath expression that specifies all tags that should be added '
               'to the Table of Contents at level three. Each entry is added '
               'under the previous level two entry.'))
    toc('from_ncx', ['--from-ncx'], default=None,
        help=_('Path to a .ncx file that contains the table of contents to use '
               'for this ebook. The NCX file should contain links relative to '
               'the directory it is placed in. See '
               'http://www.niso.org/workrooms/daisy/Z39-86-2005.html#NCX for '
               'an overview of the NCX format.'))
    toc('use_auto_toc', ['--use-auto-toc'], default=False,
        help=_('Normally, if the source file already has a Table of Contents, '
               'it is used in preference to the autodetected one. '
               'With this option, the autodetected one is always used.'))
    
    layout = c.add_group('page layout', _('Control page layout'))
    layout('margin_top', ['--margin-top'], default=5.0, 
           help=_('Set the top margin in pts. Default is %default'))
    layout('margin_bottom', ['--margin-bottom'], default=5.0, 
           help=_('Set the bottom margin in pts. Default is %default'))
    layout('margin_left', ['--margin-left'], default=5.0, 
           help=_('Set the left margin in pts. Default is %default'))
    layout('margin_right', ['--margin-right'], default=5.0, 
           help=_('Set the right margin in pts. Default is %default'))
    layout('base_font_size2', ['--base-font-size'], default=12.0,
           help=_('The base font size in pts. Default is %defaultpt. '
                  'Set to 0 to disable rescaling of fonts.'))
    layout('remove_paragraph_spacing', ['--remove-paragraph-spacing'], default=False,
           help=_('Remove spacing between paragraphs. '
                  'Also sets a indent on paragraphs of 1.5em. '
                  'You can override this by adding p {text-indent: 0cm} to '
                  '--override-css. Spacing removal will not work if the source '
                  'file forces inter-paragraph spacing.'))
    layout('no_justification', ['--no-justification'], default=False,
           help=_('Do not force text to be justified in output.'))
    layout('linearize_tables', ['--linearize-tables'], default=False,
           help=_('Remove table markup, converting it into paragraphs. '
                  'This is useful if your source file uses a table to manage layout.'))
    layout('preserve_tag_structure', ['--preserve-tag-structure'], default=False,
           help=_('Preserve the HTML tag structure while splitting large HTML files. '
                  'This is only neccessary if the HTML files contain CSS that '
                  'uses sibling selectors. Enabling this greatly slows down '
                  'processing of large HTML files.'))
    
    c.add_opt('show_opf', ['--show-opf'], default=False, group='debug',
              help=_('Print generated OPF file to stdout'))
    c.add_opt('show_ncx', ['--show-ncx'], default=False, group='debug',
              help=_('Print generated NCX file to stdout'))
    c.add_opt('keep_intermediate', ['--keep-intermediate-files'], group='debug', 
              default=False,
              help=_('Keep intermediate files during processing by html2epub'))
    c.add_opt('extract_to', ['--extract-to'], group='debug', default=None,
              help=_('Extract the contents of the produced EPUB file to the '
                     'specified directory.'))
    return c
