########################################################################
# $Header: /var/local/cvsroot/4Suite/Ft/Rdf/_4rdf.py,v 1.29 2004/09/08 09:11:07 mbrown Exp $
"""
Implementation of '4rdf' command
(functions defined here are used by the Ft.Lib.CommandLine framework)

Copyright 2004 Fourthought, Inc. (USA).
Detailed license and copyright information: http://4suite.org/COPYRIGHT
Project home, documentation, distributions: http://4suite.org/
"""

import os, sys

from Ft.Lib import CloseStream
from Ft.Lib.CommandLine.CommandLineUtil import SourceArgToInputSource
from Ft.Lib.Uri import BASIC_RESOLVER
from Ft.Rdf import __version__
from Ft.Rdf import Model, RDF_MS_BASE, RDF_SCHEMA_BASE, DAML_OIL_NS
from Ft.Rdf.Serializers.Dom import Serializer as DomSerializer
from Ft.Rdf.Serializers.NTriples import Serializer as NTSerializer
from Ft.Xml.Domlette import NonvalidatingReader, PrettyPrint
from Ft.Xml.InputSource import DefaultFactory

__doc__ = """4RDF command-line application
  Note: This is a general-purpose RDF tool. If you wish to query a 4Suite
  repository, you may find '4ss rdf' more convenient."""

INVOKED_AS = sys.argv[0]

def Run(options, args):
    dump = options.get('dump')
    full = options.get('full-uris')
    serialize = options.get('serialize')
    driver = options.get('driver')
    dbname = options.get('dbname')
    suri = options.get('source-uri', BASIC_RESOLVER.generate())
    source_args = args.get('source', [])
    in_format = options.get('input-format', 'rdfxml')

    #print (dump, full, serialize, driver, dbname, source_args)

    #Make dump the default action given "4rdf file.rdf"
    if source_args and not (driver or serialize):
        dump = 1

    if source_args and (dump or serialize):
        driver = "Memory"

    # FIXME: move this to option/arg validation func
    if not driver:
        raise SystemExit("You must specify a source for the RDF to be processed, either by filename,\nor by database driver and name.\nUse %s -h to get help"%INVOKED_AS)

    try:
        driver = "Ft.Rdf.Drivers." + driver
        driver_mod = __import__(driver, {}, {}, ["*"])
    except:
        raise SystemExit("Unknown driver '%s'" % driver)

    if driver_mod.ExistsDb(dbname):
        db = driver_mod.GetDb(dbname)
    else:
        db = driver_mod.CreateDb(dbname)

    db.begin()
    m = Model.Model(db)

    if in_format == "rdfxml":
        reader = NonvalidatingReader
        serializer = DomSerializer()
        for source_arg in source_args:
            try:
                source_isrc = SourceArgToInputSource(source_arg, DefaultFactory)
                doc = reader.parse(source_isrc)
                CloseStream(source_isrc, quiet=True)
                suri = suri or source_isrc.uri
                serializer.deserialize(m, doc, suri)
            except Exception, e:
                sys.stderr.write(str(e)+'\n')
                sys.stderr.flush()
                return

    elif in_format == "ntriples":
        serializer = NTSerializer()
        for source_arg in source_args:
            try:
                source_isrc = SourceArgToInputSource(source_arg, DefaultFactory)
                suri = suri or source_isrc.uri
                serializer.deserialize(m, source_isrc, suri)
                CloseStream(source_isrc, quiet=True)
            except Exception, e:
                sys.stderr.write(str(e)+'\n')
                sys.stderr.flush()
                return

    if dump:
        stmts = m.complete(None, None, None)
        sys.stderr.write('The following is a list of remaining tuples, each in the form' \
              ' "subject, predicate, object".  If a resource has an rdfs:label,' \
              ' it is presented with this label between angle brackets.\n')
        sys.stderr.flush()
        print "["
        for stmt in stmts:
            if full:
                subj = stmt.subject
                pred = stmt.predicate
                obj = stmt.object
            else:
                subj = CheckLabel(m, stmt.subject)
                pred = CheckLabel(m, stmt.predicate)
                obj = CheckLabel(m, stmt.object)
            print (u'("%s", "%s", "%s"),'%(subj, pred, obj)).encode('utf-8')
        print "]"

    if serialize == "rdfxml":
        serializer = DomSerializer()
        outdoc = serializer.serialize(m)
        PrettyPrint(outdoc, stream=sys.stdout)

    elif serialize == "ntriples":
        serializer = NTSerializer()
        serializer.serialize(m, stream=sys.stdout)

    db.commit()
    return


g_labelCache = {
    'http://www.w3.org/1999/02/22-rdf-syntax-ns#type': '<rdf:type>',
    'http://www.w3.org/1999/02/22-rdf-syntax-ns#value': '<rdf:value>',
    'http://www.w3.org/1999/02/22-rdf-syntax-ns#subject': '<rdf:subject>',
    'http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate': '<rdf:predicate>',
    'http://www.w3.org/1999/02/22-rdf-syntax-ns#object': '<rdf:object>',
    'http://www.w3.org/1999/02/22-rdf-syntax-ns#Property': '<rdf:Property>',
    'http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement': '<rdf:Statement>',
    'http://www.w3.org/2000/01/rdf-schema#label': '<rdfs:label>',
    'http://www.w3.org/2000/01/rdf-schema#comment': '<rdfs:comment>',
    'http://www.w3.org/2000/01/rdf-schema#Class': '<rdfs:Class>',
    'http://www.w3.org/2000/01/rdf-schema#subClassOf': '<rdfs:subClassOf>',
    'http://www.w3.org/2000/01/rdf-schema#subPropertyOf': '<rdfs:subPropertyOf>',
    'http://www.w3.org/2000/01/rdf-schema#isDefinedBy': '<rdfs:isDefinedBy>',
    'http://www.w3.org/2000/01/rdf-schema#domain': '<rdfs:domain>',
    'http://www.w3.org/2000/01/rdf-schema#range': '<rdfs:range>',
    'http://www.w3.org/2000/01/rdf-schema#Literal': '<rdfs:Literal>',
    'http://www.w3.org/2000/01/rdf-schema#Resource': '<rdfs:Resource>',

    DAML_OIL_NS+'Class': '<daml:Class>',
    DAML_OIL_NS+'List': '<daml:List>',
    DAML_OIL_NS+'first': '<daml:first>',
    DAML_OIL_NS+'rest': '<daml:rest>',
    DAML_OIL_NS+'nil': '<daml:nil>',
    DAML_OIL_NS+'Property': '<daml:Property>',
    DAML_OIL_NS+'onProperty': '<daml:onProperty>',
    DAML_OIL_NS+'Ontology': '<daml:Ontology>',
    DAML_OIL_NS+'cardinality': '<daml:cardinality>',
    DAML_OIL_NS+'cardinalityQ': '<daml:cardinalityQ>',
    DAML_OIL_NS+'complementOf': '<daml:complementOf>',
    DAML_OIL_NS+'Disjoint': '<daml:Disjoint>',
    DAML_OIL_NS+'disjointUnionOf': '<daml:disjointUnionOf>',
    DAML_OIL_NS+'disjointWith': '<daml:disjointWith>',
    DAML_OIL_NS+'domain': '<daml:domain>',
    DAML_OIL_NS+'equivalentTo': '<daml:equivalentTo>',
    DAML_OIL_NS+'hasClass': '<daml:hasClass>',
    DAML_OIL_NS+'hasClassQ': '<daml:hasClassQ>',
    DAML_OIL_NS+'hasValue': '<daml:hasValue>',
    DAML_OIL_NS+'imports': '<daml:imports>',
    DAML_OIL_NS+'maxCardinality': '<daml:maxCardinality>',
    DAML_OIL_NS+'maxCardinalityQ': '<daml:maxCardinalityQ>',
    DAML_OIL_NS+'minCardinality': '<daml:minCardinality>',
    DAML_OIL_NS+'minCardinalityQ': '<daml:minCardinalityQ>',
    DAML_OIL_NS+'oneOf': '<daml:oneOf>',
    DAML_OIL_NS+'Restriction': '<daml:Restriction>',
    DAML_OIL_NS+'sameClassAs': '<daml:sameClassAs>',
    DAML_OIL_NS+'samePropertyAs': '<daml:samePropertyAs>',
    DAML_OIL_NS+'subClassOf': '<daml:subClassOf>',
    DAML_OIL_NS+'subPropertyOf': '<daml:subPropertyOf>',
    DAML_OIL_NS+'toClass': '<daml:toClass>',
    DAML_OIL_NS+'TransitiveProperty': '<daml:TransitiveProperty>',
    DAML_OIL_NS+'UnanmiguousProperty': '<daml:UnanmiguousProperty>',
    DAML_OIL_NS+'unionOf': '<daml:unionOf>',
    DAML_OIL_NS+'UniqueProperty': '<daml:UniqueProperty>',
    DAML_OIL_NS+'versionInfo': '<daml:versionInfo>',
    }

g_anonMapping = {}
g_anonCounter = 1

def CheckLabel(model, text):
    global g_anonMapping, g_anonCounter, g_labelCache
    if model.isBnodeLabel(text):
        if g_anonMapping.has_key(text):
            return "ANON-%i" % g_anonMapping[text]
        else:
            g_anonMapping[text] = g_anonCounter
            g_anonCounter += 1
            return "ANON-%i" % (g_anonCounter - 1)
    if g_labelCache.has_key(text):
        l = g_labelCache[text]
        if l is None:
            return text
        else:
            return l
    else:
        label_stmts = model.complete(text, RDF_SCHEMA_BASE + "label", None)
        if label_stmts:
            l = "<%s>"%label_stmts[0].object
            g_labelCache[text] = l
            return l
        else:
            g_labelCache[text] = None
            return text


from Ft.Lib.CommandLine import Options, CommandLineApp, Arguments, Command

class RdfCommandLineApp(CommandLineApp.CommandLineApp):

    def __init__(self):
        CommandLineApp.CommandLineApp.__init__(
            self,
            '4rdf',
            '4RDF version %s' % __version__,
            __doc__,
            [],
            ourOptions = Options.Options([
                Options.Option('V',
                               'version',
                               'Display program version and exit',
                               ),
                Options.Option('d',
                               'dump',
                               'Dump the resulting triples from the specified file or database in Python '\
                               'list-of-tuple form to standard output (no persistent '\
                               'changes are made). This is the default action if no driver is specified',
                               ),
                #http://lists.w3.org/Archives/Public/w3c-rdfcore-wg/2001May/0264
                Options.Option(None,
                               'input-format=FORMAT',
                               'The format of the input file, can be "rdfxml" (the default) or "ntriples"'),
                #Options.Option('p',
                #               'dump-p',
                #               'Dump the resulting triples from the specified file or database '\
                #               'in the W3C\'s prolog form to standard output (no persistent '\
                #               'changes are made)',
                #               ),
                Options.Option('s',
                               'serialize=FORMAT',
                               'Serialize the RDF from the given file or database '\
                               'to standard output (no persistent '\
                               'changes are made). Uses the given format, "rdfxml" or "ntriples"',
                               ),
                Options.Option('f',
                               'full-uris',
                               'Do not abbreviate URIs according to RDF labels and built-in abbreviations',
                               ),
                Options.Option(None,
                               'driver=DRIVER',
                               'Use the given 4RDF backend driver',
                               ),
                Options.Option(None,
                               'dbname=DBNAME',
                               'Update the RDF model database '\
                               'with the given name',
                               ),
                Options.Option(None,
                               'source-uri=URI',
                               'Override the URI used as the base of the document during deserialization'
                               ),
                ]),

            enableShowCommands = 0
            )

        self.function = Run
        self.arguments = [
            Arguments.ZeroOrMoreArgument('source',
                                         'The URI of an RDF/XML or N-Triples document, or "-" to indicate standard input.',
                                        str),
            ]


    def validate_options(self, options):
        if options.has_key('version'):
            print '4RDF, from 4Suite %s' % __version__
            return
        else:
            return Command.Command.validate_options(self, options)


    def validate_arguments(self, args):
        return Command.Command.validate_arguments(self, args)
