#!/usr/bin/env python
# Copyright (C) 2009, Parrot Foundation
# $Id: parrot-fuzzer 39413 2009-06-05 23:19:30Z chromatic $

from fusil.application    import Application
from fusil.process.watch  import WatchProcess
from fusil.process.create import CreateProcess
from fusil.process.stdout import WatchStdout
from fusil.project_agent  import ProjectAgent
from fusil.process.tools  import locateProgram
from fusil.write_code     import WriteCode
from optparse             import OptionGroup
import re
import string
import random

'''

=head1 Name

parrot-fuzzer

=head1 Description

This is a fuzzer for Parrot, written in Python using the fusil library.  It
attempts to break Parrot by generating calls to random PIR opcodes.

=head1 Dependencies

This script requires Python 2.5+ to run.  The fusil
L<http://fusil.hachoir.org/trac> and python-ptrace
L<http://python-ptrace.hachoir.org/trac> libraries are also required.

=head1 Running

Short version: C<sudo ./tools/dev/parrot-fuzzer>

C<parrot-fuzzer> is run like any other fusil-based fuzzer.  Fusil likes to be
run as root.  This so that the child process in which Parrot runs can be put in
a more restricted environment, limiting potential damage.

fusil assumes the existence of a C<fusil> user and group.  Parrot runs as this
user/group as part of its restricted environment.  Passing C<--unsafe> allows
it to run as the current user.  Although it is not likely that this will cause
any damage to your system, it is possible.

C<parrot-fuzzer> needs access to Parrot's source code in order to figure out
which PMCs and ops are available.  It assumes that it's running in the root dir
of Parrot's source code.  You can use a different dir via
C<--parrot_root=/some/other/path>.

=head1 Options

=over 4

=item C<--parrot_root=/path/to/parrot>

Specify the path to the root of Parrot's source dir.  By default, this is the
current dir.

=item C<--runcore=--some-runcore>

Specify which runcore to use when running Parrot.  The default is the C<slow>
core.  This option corresponds directly to Parrot's C<--runcore> option.  Other
runcores include C<fast>, C<jit>, C<cgoto>, C<cgp>, C<cgp-jit>, and C<switch>.
See Parrot's help for more details.

=item C<--ignore_blacklist>

Some PMCs and opcodes are known to cause false positives or results of limited
value.  These are blacklisted by default.  Using C<--ignore_blacklist> causes
the fuzzer to use all available PMCs and opcodes, even those known to behave
badly during testing.

=item C<--instructions=10>

Generate this number of instructions during test run.  The default is 3.  Note
that a larger number such as 20 does not necessarily result in more failures.

=back

=head1 License

This program is distributed under the same license as Parrot itself.

=cut

'''

class ParrotFuzzer(Application):

    #base name of the dir where temp files and successful results will be stored
    NAME="parrot-fuzz"

    def createFuzzerOptions(self, parser):
        options = OptionGroup(parser, "Parrot fuzzer")
        options.add_option("--parrot_root",
                help="Parrot program path (default: .)",
                type="str",
                default=".")
        options.add_option("--runcore",
                help="Run Parrot with the specified runcore (default: --slow-core)",
                type="str",
                default="--slow-core")
        options.add_option("--instructions",
                help="Generate this many instructions per test run (default: 3)",
                type="int",
                default="3")
        options.add_option("--ignore_blacklist",
                help="Use opcodes and PMCs known to cause bad or questionable results (default: use blacklists)",
                action="store_true",
                default=False)
        return options


    def setupProject(self):
        parrot_root  = self.options.parrot_root
        runcore      = self.options.runcore
        parrot       = locateProgram(parrot_root + "/parrot")
        process      = ParrotProcess(self.project, [parrot, runcore, "<fuzzy.pir>"])
        pirgen       = PirGenerator(self.project, self.options)
        WatchProcess(process)
        WatchStdout(process)

class PirGenerator(ProjectAgent, WriteCode):

    def __init__(self, project, options):
        self.parrot_root       = options.parrot_root
        self.instruction_count = options.instructions
        self.ignore_blacklist  = options.ignore_blacklist
        self.opfunc_gen        = OpfuncGenerator()
        self.arg_gen           = ArgGenerator(self.parrot_root, self.ignore_blacklist)

        self.opfunc_gen.populateOpfuncList(self.parrot_root, self.ignore_blacklist)

        ProjectAgent.__init__(self, project, "pir_source")
        WriteCode.__init__(self)

    def generatePir(self, filename):

        self.pir_body     = ''
        self.pir_preamble = """
.sub main
    $P0 = new ['ExceptionHandler']
    set_addr $P0, catchall
    push_eh $P0   #pokemon: gotta catch 'em all
"""
        self.pir_postamble = """
catchall:
    #Don't do anything with exceptions: we're hoping for a segfault or similar.
.end
"""
        #how many instructions to generate
        #Strangely, a low number like 3 seems to generate slightly more faults
        #than a high number like 20.
        opfunc_count = self.instruction_count
        self.pir_body += "    #generating "+str(opfunc_count)+" instructions\n"

        arg_types = ['s', 'p', 'i', 'n', 'sc', 'ic', 'nc']
        opfuncs      = []
        arg_counts   = dict()
        self.createFile(filename)
        arg_gen = self.arg_gen

        #pick some opfuncs
        for i in range(opfunc_count):
            opfuncs.append(OpfuncCall(*self.opfunc_gen.getOpfunc()))

        #calculate how many of each type of arg will be needed
        for arg_type in arg_types:
            arg_counts[arg_type] = 0
            for opfunc in opfuncs:
                arg_counts[arg_type] += opfunc.getArgCount(arg_type)

        for arg_type in arg_types:
            #print "need "+str(arg_counts[arg_type])+" args of type "+arg_type
            arg_gen.setArgCount(arg_type, arg_counts[arg_type])

        #generate the args, adding any supporting code to the preamble
        self.pir_preamble += arg_gen.generateStringArgs()
        self.pir_preamble += arg_gen.generatePMCArgs()
        self.pir_preamble += arg_gen.generateIntArgs()
        self.pir_preamble += arg_gen.generateNumArgs()
        self.pir_preamble += arg_gen.generateStringConstArgs()
        self.pir_preamble += arg_gen.generateIntConstArgs()
        self.pir_preamble += arg_gen.generateNumConstArgs()

        #put the args into the opfunc calls
        for opfunc in opfuncs:
            #print "working on " + opfunc.getLongName()
            for arg_num in range(opfunc.getTotalArgCount()):
                arg_type = opfunc.getArgType(arg_num)
                #print "arg type for #"+str(arg_num)+" is "+arg_type
                opfunc.setArgVal(arg_num, arg_gen.getArgVal(arg_type))
            #append getOpfuncCall
            self.pir_body += opfunc.getOpfuncCall()

        #write the code
        self.write(0, self.pir_preamble)
        self.write(0, self.pir_body)
        self.write(0, self.pir_postamble)
        self.close()

    def on_session_start(self):
        filename = self.session().createFilename('fuzzy.pir')
        self.generatePir(filename)
        self.send('pir_source', filename)

#Representation of a call to an opfunc, including values of arguments
#Note that argumens are literal, e.g. '$P0', '"foo"', etc
class OpfuncCall:
    def __init__(self, name, sig):
        self.arg_types = []
        self.arg_vals = []
        self.name = name
        if sig == '':
            self.long_name = name
        else:
            self.long_name = name + '_' + sig
        self.total_arg_count = 0
        #print "making an opfunc: " + self.long_name
        if sig != '':
            for arg in string.split(sig, "_"):
                self.arg_types.append(arg)
                self.arg_vals.append('')
                self.total_arg_count += 1
                #print "found an arg: " + arg

    def getLongName(self):
        return self.long_name

    def getArgCount(self, arg):
        return self.arg_types.count(arg)

    def getTotalArgCount(self):
        return self.total_arg_count

    def getArgType(self, n):
        return self.arg_types[n]

    def getArgType(self, n):
        return self.arg_types[n]

    def setArgVal(self, n, arg_val):
        self.arg_vals[n] = arg_val

    def getOpfuncCall(self):
        opfunc_call = '\n    #'+self.long_name+'\n    ' + self.name
        for arg_val in self.arg_vals:
            opfunc_call += ' ' + arg_val + ','
        opfunc_call = string.rstrip(opfunc_call, ",")
        opfunc_call += "\n"
        return opfunc_call

class ArgGenerator:
    arg_counts = {}
    args       = {}

    def __init__(self, parrot_root, ignore_blacklist):
        self.pmc_gen = PMCTypeGenerator()
        self.pmc_gen.populatePMCList(parrot_root, ignore_blacklist)

    def setArgCount(self, arg_type, count):
        self.arg_counts[arg_type] = count

    def getArgVal(self, arg_type):
        return random.choice(self.args[arg_type])

    def generateStringArgs(self):
        pir_preamble = ""
        self.args['s'] = []
        for n in range(self.arg_counts['s']):
            str_val = self.getString()
            pir_preamble += "    $S" + str(n) + " = \"" + str_val + "\"\n"
            self.args['s'].append('$S' + str(n))
        return pir_preamble

    def generatePMCArgs(self):
        pir_preamble = ""
        self.args['p'] = []
        for n in range(self.arg_counts['p']):
            pir_preamble += "    $P" + str(n) + " = new ['" + self.pmc_gen.getPMCType() + "']\n"
            self.args['p'].append('$P' + str(n))
        return pir_preamble

    def generateIntArgs(self):
        pir_preamble = ""
        self.args['i'] = []
        for n in range(self.arg_counts['i']):
            num = random.choice(['neg_many','neg_one','zero','pos_one','pos_many'])

            if num == 'neg_many':
                num_val = random.randint(-999999,-2)
            if num == 'neg_one':
                num_val = -1
            if num == 'zero':
                num_val = 0
            if num == 'pos_one':
                num_val = 1
            if num == 'pos_many':
                num_val = random.randint(2, 999999)

            pir_preamble += "    $I" + str(n) + " = "+str(num_val)+"\n"
            self.args['i'].append('$I' + str(n))
        return pir_preamble

    def generateNumArgs(self):
        pir_preamble = ""
        self.args['n'] = []
        for n in range(self.arg_counts['n']):
            num = random.choice(['neg_many','neg_one','zero','pos_one','pos_many'])

            if num == 'neg_many':
                num_val = (random.random() * -999999) - 1
            if num == 'neg_one':
                num_val = -1.0
            if num == 'zero':
                num_val = 0.0
            if num == 'pos_one':
                num_val = 1.0
            if num == 'pos_many':
                num_val = (random.random() * 999999) + 1
            pir_preamble += "    $N" + str(n) + " = "+str(num_val)+"\n"
            self.args['n'].append('$N' + str(n))
        return pir_preamble

    def generateStringConstArgs(self):
        pir_preamble = ""
        self.args['sc'] = []
        for n in range(self.arg_counts['sc']):
            self.args['sc'].append('"'+self.getString()+'"')
        return pir_preamble

    def generateIntConstArgs(self):
        pir_preamble = ""
        self.args['ic'] = []
        for n in range(self.arg_counts['ic']):
            #negative numbers and zero mess up control flow-related ops
            #num = random.choice(['neg_many','neg_one','zero','pos_one','pos_many'])
            num = random.choice(['pos_one','pos_many'])

            if num == 'neg_many':
                num_val = random.randint(-999999,-2)
            if num == 'neg_one':
                num_val = -1
            if num == 'zero':
                num_val = 0
            if num == 'pos_one':
                num_val = 1
            if num == 'pos_many':
                num_val = random.randint(2, 999999)

            self.args['ic'].append(str(num_val))
        return pir_preamble

    def generateNumConstArgs(self):
        pir_preamble = ""
        self.args['nc'] = []
        for n in range(self.arg_counts['nc']):
            num = random.choice(['neg_many','neg_one','zero','pos_one','pos_many'])

            if num == 'neg_many':
                num_val = (random.random() * -999999) - 1
            if num == 'neg_one':
                num_val = -1.0
            if num == 'zero':
                num_val = 0.0
            if num == 'pos_one':
                num_val = 1.0
            if num == 'pos_many':
                num_val = (random.random() * 999999) + 1
            self.args['nc'].append(str(num_val))
        return pir_preamble

    def getString(self):
        str_val = ''
        chars = string.printable + string.punctuation + string.whitespace
        str_len = random.randint(0,10)
        for m in range(str_len):
            char = chars[random.randint(0, len(chars)-1)]
            if char == '"':
                char = '\\"'
            if char == '\\':
                char = '\\\\'
            if char == '\n' or char == '\r':
                char = ''
            str_val += char
        return str_val

class PMCTypeGenerator:
    pmc_list = []
    pmc_blacklist = [
            'Packfile',
            'PackfileAnnotation',
            'PackfileAnnotationKeys',
            'PackfileAnnotations',
            'PackfileConstantTable',
            'PackfileDirectory',
            'PackfileFixupEntry',
            'PackfileFixupTable',
            'PackfileRawSegment',
            'PackfileSegment',
            ]

    def populatePMCList(self, parrot_root, ignore_blacklist):
        pmc_pm = parrot_root + "/lib/Parrot/PMC.pm"
        pmc_f  = open(pmc_pm, 'r')
        for line in pmc_f:
            if re.search('\t[a-zA-Z]+ => [0-9]+,', line):
                line = re.sub('\t',      '', line)
                line = re.sub(' =>.*\n', '', line)
                if ignore_blacklist or line not in self.pmc_blacklist:
                    self.pmc_list.append(line)

    def getPMCType(self):
        return random.choice(self.pmc_list)


class OpfuncGenerator:
    opfunc_list = []
    opfunc_blacklist = [
            'branch_cs', # TT# 470 - known to segfault
            'check_events', #only for testing
            'check_events__', #not for direct use
            'clears', #clearing all [SPIN] registers isn't useful
            'clearp',
            'cleari',
            'clearn',
            'cpu_ret',
            'debug',
            'debug_break',
            'debug_init',
            'debug_load',
            'debug_print',
            'die',
            'enternative',
            'exit',
            'gc_debug',
            'if',
            'pic_callr__',
            'pic_get_params__',
            'pic_infix__',
            'pic_inline_sub__',
            'pic_set_returns__',
            'pin',
            'pop_eh',
            'prederef__',
            'profile',
            'push_eh',
            'returncc',
            'rethrow',
            'runinterp',
            'setn_ind',
            'sets_ind',
            'seti_ind',
            'setp_ind',
            'sleep',
            'tailcall',
            'trace',
            'trap',
            'unless',
            'unpin',
            'wrapper__',
            'yield',
            ]

    def populateOpfuncList(self, parrot_root, ignore_blacklist):
        ops_h = parrot_root + "/src/ops/core_ops.c"
        ops_f = open(ops_h, 'r')
        #This is a moderately fragile hack that relies on the specific
        #format of some generated code.  Expect breakage.
        for line in ops_f:
            if line.find('PARROT_INLINE_OP') > -1 or line.find('PARROT_FUNCTION_OP') > -1:
                line = ops_f.next()
                short_name = line
                line = ops_f.next()
                long_name = line
                #strip leading space and opening double-quote
                short_name = re.sub('[ ]+"', '', short_name)
                long_name  = re.sub('[ ]+"', '', long_name)
                #strip everything after closing double-quote
                short_name = re.sub('".*\n', '', short_name)
                long_name  = re.sub('".*\n', '', long_name)

                if long_name == short_name:
                    sig = ''
                else:
                    sig = string.replace(long_name, short_name + '_', '')

                #XXX: don't know how to handle these args
                if (not re.search('(pc|k|ki|kc|kic)', sig)):
                    if ignore_blacklist or short_name not in self.opfunc_blacklist:
                        self.opfunc_list.append([short_name, sig])
                #        print "accepted "+long_name+"("+sig+")"
                #else:
                #    print "REJECTED "+long_name+"("+sig+")"

    def getOpfunc(self):
        return random.choice(self.opfunc_list)

class ParrotProcess(CreateProcess):
    def on_pir_source(self, filename):
        self.cmdline.arguments[1] = filename
        self.createProcess()

if __name__ == "__main__":
    ParrotFuzzer().main()
