# Examples of user-defined filters. Edit and run with -f filters.txt.
# The examples below are self-explanatory. Notice the use of the predefined
#  variables ($PASS, $FAIL, $MATCH, $RECORD) and methods (error).


# In this example, a minimum value of AF1=0.1 is required
{
    tag  => 'INFO/AF1',                       # The VCF tag to apply this filter on
        name => 'MinAF',                          # The filter ID
        desc => 'Minimum AF1 [0.01]',             # Description for the VCF header
        test => sub { return $MATCH < 0.01 ? $FAIL : $PASS },
},


# Filter all indels (presence of INDEL tag is tested)
{
    tag      => 'INFO/INDEL',
    apply_to => 'indels',         # Can be one of SNPs, indels, all. Default: [All]
        name     => 'Indel',
    desc     => 'INDEL tag present',
    test     => sub { return $FAIL },
},


# Only loci with enough reads supporting the variant will pass the filter
{
    tag      => 'INFO/DP4',
    name     => 'FewAlts',
    desc     => 'Too few reads supporting the variant',
    apply_to => 'SNPs',
    test     => sub {
        if ( !($MATCH =~ /^([^,]+),([^,]+),([^,]+),(.+)$/) )
        {
            error("Could not parse INFO/DP4: $CHROM:$POS [$MATCH]");
        }
        if ( 0.1*($1+$2) > $3+$4  ) { return $PASS; }
        return $FAIL;
    },
},


# Example of filtering based on genotype columns and the QUAL column
{
    tag      => 'FORMAT/PL',
    name     => 'NoHets',
    desc     => 'Inbred homozygous mouse, no hets expected',
    apply_to => 'SNPs',
    test     => sub {
            for my $pl (@$MATCH)
            {
                my @pls = split(/,/,$pl);
                if ( $pls[1]<$pls[0] && $pls[1]<$pls[2] ) { return $FAIL; }
            }
        return $PASS;
    },
},


# These two examples show that the VCF output line can be edited. (Thanks to Shane McCarthy)
{
    tag      => 'FORMAT/DP',
    name     => 'MinSampleDP',
    desc     => 'Genotypes set to . for samples with DP < 2',
    apply_to => 'all',
    test     => sub {
        my $i = 8;
        for my $dp (@$MATCH)
        {
            $i++;
            next unless ($dp<2);
            my @format = split(/:/,$$RECORD[$i]);
            $format[0] = $format[0] =~ /\// ? "./." : ".";
            $$RECORD[$i] = join(":",@format);
        }
        return $PASS;
    },
},
{
    tag      => 'FORMAT/GQ',
    name     => 'MinSampleGQ',
    desc     => 'Genotypes set to . for samples with GQ < 2',
    apply_to => 'all',
    test     => sub {
        my $i = 8;
        for my $gq (@$MATCH)
        {
            $i++;
            next unless ($gq<2);
            my @format = split(/:/,$$RECORD[$i]);
            $format[0] = $format[0] =~ /\// ? "./." : ".";
            $$RECORD[$i] = join(":",@format);
        }
        return $PASS;
    },
},

