#!/usr/bin/perl

use strict;
use warnings FATAL => 'all';

# This is mk-duplicate-key-checker, a program to analyze MySQL tables for
# duplicated or redundant indexes and foreign key constraints.
# 
# This program is copyright (c) 2007 Baron Schwartz.
# Feedback and improvements are welcome.
#
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, version 2; OR the Perl Artistic License.  On UNIX and similar
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
# licenses.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
# Place, Suite 330, Boston, MA  02111-1307  USA.

# ###########################################################################
# This is a combination of modules and programs in one -- a runnable module.
# http://www.perl.com/pub/a/2006/07/13/lightning-articles.html?page=last
# Or, look it up in the Camel book on pages 642 and 643 in the 3rd edition.
# ###########################################################################

# ###########################################################################
# DSNParser package 1149
# ###########################################################################
use strict;
use warnings FATAL => 'all';

package DSNParser;

sub new {
   my ( $class, @opts ) = @_;
   my $self = {
      opts => {
         D => {
            desc => 'Database to use',
            dsn  => 'database',
            copy => 1,
         },
         F => {
            desc => 'Only read default options from the given file',
            dsn  => 'mysql_read_default_file',
            copy => 1,
         },
         h => {
            desc => 'Connect to host',
            dsn  => 'host',
            copy => 1,
         },
         p => {
            desc => 'Password to use when connecting',
            dsn  => 'password',
            copy => 1,
         },
         P => {
            desc => 'Port number to use for connection',
            dsn  => 'port',
            copy => 1,
         },
         S => {
            desc => 'Socket file to use for connection',
            dsn  => 'mysql_socket',
            copy => 1,
         },
         u => {
            desc => 'User for login if not current user',
            dsn  => 'user',
            copy => 1,
         },
      },
   };
   foreach my $opt ( @opts ) {
      $self->{opts}->{$opt->{key}} = { desc => $opt->{desc}, copy => $opt->{copy} };
   }
   return bless $self, $class;
}

sub prop {
   my ( $self, $prop, $value ) = @_;
   if ( @_ > 2 ) {
      $self->{$prop} = $value;
   }
   return $self->{$prop};
}

sub parse {
   my ( $self, $dsn, $prev, $defaults ) = @_;
   return unless $dsn;
   $prev     ||= {};
   $defaults ||= {};
   my %vals;
   my %opts = %{$self->{opts}};
   if ( $dsn !~ m/=/ && $self->prop('autokey') ) {
      $vals{ $self->prop('autokey') } = $dsn;
   }
   else {
      my %hash = map { m/^(.)=(.*)$/g } split(/,/, $dsn);
      foreach my $key ( keys %opts ) {
         $vals{$key} = $hash{$key};
         if ( !defined $vals{$key} && defined $prev->{$key} && $opts{$key}->{copy} ) {
            $vals{$key} = $prev->{$key};
         }
         if ( !defined $vals{$key} ) {
            $vals{$key} = $defaults->{$key};
         }
      }
      foreach my $key ( keys %hash ) {
         die "Unrecognized DSN part '$key' in '$dsn'\n"
            unless exists $opts{$key};
      }
   }
   if ( (my $required = $self->prop('required')) ) {
      foreach my $key ( keys %$required ) {
         die "Missing DSN part '$key' in '$dsn'\n" unless $vals{$key};
      }
   }
   return \%vals;
}

sub as_string {
   my ( $self, $dsn ) = @_;
   return $dsn unless ref $dsn;
   return join(',', map { "$_=$dsn->{$_}" } grep { defined $dsn->{$_} } sort keys %$dsn );
}

sub usage {
   my ( $self ) = @_;
   my $usage
      = "DSN syntax is key=value[,key=value...]  Allowable DSN keys:\n"
      . "  KEY  COPY  MEANING\n"
      . "  ===  ====  =============================================\n";
   my %opts = %{$self->{opts}};
   foreach my $key ( sort keys %opts ) {
      $usage .= "  $key    "
             .  ($opts{$key}->{copy} ? 'yes   ' : 'no    ')
             .  ($opts{$key}->{desc} || '[No description]')
             . "\n";
   }
   if ( (my $key = $self->prop('autokey')) ) {
      $usage .= "  If the DSN is a bareword, the word is treated as the '$key' key.\n";
   }
   return $usage;
}

sub get_cxn_params {
   my ( $self, $info ) = @_;
   my $dsn;
   my %opts = %{$self->{opts}};
   my $driver = $self->prop('dbidriver') || '';
   if ( $driver eq 'Pg' ) {
      $dsn = 'DBI:Pg:dbname=' . ( $info->{D} || '' ) . ';'
         . join(';', map  { "$opts{$_}->{dsn}=$info->{$_}" }
                     grep { defined $info->{$_} }
                     qw(h P));
   }
   else {
      $dsn = 'DBI:mysql:' . ( $info->{D} || '' ) . ';'
         . join(';', map  { "$opts{$_}->{dsn}=$info->{$_}" }
                     grep { defined $info->{$_} }
                     qw(F h P S))
         . ';mysql_read_default_group=mysql';
   }
   return ($dsn, $info->{u}, $info->{p});
}

1;

# ###########################################################################
# End DSNParser package
# ###########################################################################

# ###########################################################################
# OptionParser package 1178
# ###########################################################################
use strict;
use warnings FATAL => 'all';

package OptionParser;

use Getopt::Long;
use List::Util qw(max);
use English qw(-no_match_vars);

sub new {
   my ( $class, @opts ) = @_;
   my %key_seen;
   my %long_seen;
   my %key_for;
   my %defaults;
   my @mutex;
   my @atleast1;
   my %long_for;
   my %disables;
   my %copyfrom;
   unshift @opts,
      { s => 'help',    d => 'Show this help message' },
      { s => 'version', d => 'Output version information and exit' };
   foreach my $opt ( @opts ) {
      if ( ref $opt ) {
         my ( $long, $short ) = $opt->{s} =~ m/^([\w-]+)(?:\|([^!+=]*))?/;
         $opt->{k} = $short || $long;
         $key_for{$long} = $opt->{k};
         $long_for{$opt->{k}} = $long;
         $long_for{$long} = $long;
         $opt->{l} = $long;
         die "Duplicate option $opt->{k}" if $key_seen{$opt->{k}}++;
         die "Duplicate long option $opt->{l}" if $long_seen{$opt->{l}}++;
         $opt->{t} = $short;
         $opt->{n} = $opt->{s} =~ m/!/;
         $opt->{g} ||= 'o';
         if ( (my ($y) = $opt->{s} =~ m/=([mdHhAaz])/) ) {
            $opt->{y} = $y;
            $opt->{s} =~ s/=./=s/;
         }
         $opt->{r} = $opt->{d} =~ m/required/;
         if ( (my ($def) = $opt->{d} =~ m/default(?: ([^)]+))?/) ) {
            $defaults{$opt->{k}} = defined $def ? $def : 1;
         }
         if ( (my ($dis) = $opt->{d} =~ m/(disables .*)/) ) {
            $disables{$opt->{k}} = [ $class->get_participants($dis) ];
         }
      }
      else { # It's an instruction.

         if ( $opt =~ m/at least one|mutually exclusive|one and only one/ ) {
            my @participants = map {
                  die "No such option '$_' in $opt" unless $long_for{$_};
                  $long_for{$_};
               } $class->get_participants($opt);
            if ( $opt =~ m/mutually exclusive|one and only one/ ) {
               push @mutex, \@participants;
            }
            if ( $opt =~ m/at least one|one and only one/ ) {
               push @atleast1, \@participants;
            }
         }
         elsif ( $opt =~ m/default to/ ) {
            my @participants = map {
                  die "No such option '$_' in $opt" unless $long_for{$_};
                  $key_for{$_};
               } $class->get_participants($opt);
            $copyfrom{$participants[0]} = $participants[1];
         }

      }
   }

   foreach my $dis ( keys %disables ) {
      $disables{$dis} = [ map {
            die "No such option '$_' while processing $dis" unless $long_for{$_};
            $long_for{$_};
         } @{$disables{$dis}} ];
   }

   return bless {
      specs => [ grep { ref $_ } @opts ],
      notes => [],
      instr => [ grep { !ref $_ } @opts ],
      mutex => \@mutex,
      defaults => \%defaults,
      long_for => \%long_for,
      atleast1 => \@atleast1,
      disables => \%disables,
      key_for  => \%key_for,
      copyfrom => \%copyfrom,
      strict   => 1,
      groups   => [ { k => 'o', d => 'Options' } ],
   }, $class;
}

sub get_participants {
   my ( $self, $str ) = @_;
   my @participants;
   foreach my $thing ( $str =~ m/(--?[\w-]+)/g ) {
      if ( (my ($long) = $thing =~ m/--(.+)/) ) {
         push @participants, $long;
      }
      else {
         foreach my $short ( $thing =~ m/([^-])/g ) {
            push @participants, $short;
         }
      }
   }
   return @participants;
}

sub parse {
   my ( $self, %defaults ) = @_;
   my @specs = @{$self->{specs}};
   my %factor_for = (k => 1_024, M => 1_048_576, G => 1_073_741_824);

   my %opt_seen;
   my %vals = %{$self->{defaults}};
   @vals{keys %defaults} = values %defaults;
   foreach my $spec ( @specs ) {
      $vals{$spec->{k}} = undef unless defined $vals{$spec->{k}};
      $opt_seen{$spec->{k}} = 1;
   }

   foreach my $key ( keys %defaults ) {
      die "Cannot set default for non-existent option '$key'\n"
         unless $opt_seen{$key};
   }

   Getopt::Long::Configure('no_ignore_case', 'bundling');
   GetOptions( map { $_->{s} => \$vals{$_->{k}} } @specs )
      or $self->error('Error parsing options');

   if ( $vals{version} ) {
      my $prog = $self->prog;
      printf("%s  Ver %s Distrib %s Changeset %s\n",
         $prog, $main::VERSION, $main::DISTRIB, $main::SVN_REV);
      exit(0);
   }

   if ( @ARGV && $self->{strict} ) {
      $self->error("Unrecognized command-line options @ARGV");
   }

   foreach my $dis ( grep { defined $vals{$_} } keys %{$self->{disables}} ) {
      my @disses = map { $self->{key_for}->{$_} } @{$self->{disables}->{$dis}};
      @vals{@disses} = map { undef } @disses;
   }

   foreach my $spec ( grep { $_->{r} } @specs ) {
      if ( !defined $vals{$spec->{k}} ) {
         $self->error("Required option --$spec->{l} must be specified");
      }
   }

   foreach my $mutex ( @{$self->{mutex}} ) {
      my @set = grep { defined $vals{$self->{key_for}->{$_}} } @$mutex;
      if ( @set > 1 ) {
         my $note = join(', ',
            map { "--$self->{long_for}->{$_}" }
                @{$mutex}[ 0 .. scalar(@$mutex) - 2] );
         $note .= " and --$self->{long_for}->{$mutex->[-1]}"
               . " are mutually exclusive.";
         $self->error($note);
      }
   }

   foreach my $required ( @{$self->{atleast1}} ) {
      my @set = grep { defined $vals{$self->{key_for}->{$_}} } @$required;
      if ( !@set ) {
         my $note = join(', ',
            map { "--$self->{long_for}->{$_}" }
                @{$required}[ 0 .. scalar(@$required) - 2] );
         $note .= " or --$self->{long_for}->{$required->[-1]}";
         $self->error("Specify at least one of $note");
      }
   }

   foreach my $spec ( grep { $_->{y} && defined $vals{$_->{k}} } @specs ) {
      my $val = $vals{$spec->{k}};
      if ( $spec->{y} eq 'm' ) {
         my ( $num, $suffix ) = $val =~ m/(\d+)([smhd])$/;
         if ( $suffix ) {
            $val = $suffix eq 's' ? $num            # Seconds
                 : $suffix eq 'm' ? $num * 60       # Minutes
                 : $suffix eq 'h' ? $num * 3600     # Hours
                 :                  $num * 86400;   # Days
            $vals{$spec->{k}} = $val;
         }
         else {
            $self->error("Invalid --$spec->{l} argument");
         }
      }
      elsif ( $spec->{y} eq 'd' ) {
         my $from_key = $self->{copyfrom}->{$spec->{k}};
         my $default = {};
         if ( $from_key ) {
            $default = $self->{dsn}->parse($self->{dsn}->as_string($vals{$from_key}));
         }
         $vals{$spec->{k}} = $self->{dsn}->parse($val, $default);
      }
      elsif ( $spec->{y} eq 'z' ) {
         my ($pre, $num, $factor) = $val =~ m/^([+-])?(\d+)([kMG])?$/;
         if ( defined $num ) {
            if ( $factor ) {
               $num *= $factor_for{$factor};
            }
            $vals{$spec->{k}} = ($pre || '') . $num;
         }
         else {
            $self->error("Invalid --$spec->{l} argument");
         }
      }
   }

   foreach my $spec ( grep { $_->{y} } @specs ) {
      my $val = $vals{$spec->{k}};
      if ( $spec->{y} eq 'H' || (defined $val && $spec->{y} eq 'h') ) {
         $vals{$spec->{k}} = { map { $_ => 1 } split(',', ($val || '')) };
      }
      elsif ( $spec->{y} eq 'A' || (defined $val && $spec->{y} eq 'a') ) {
         $vals{$spec->{k}} = [ split(',', ($val || '')) ];
      }
   }

   return %vals;
}

sub error {
   my ( $self, $note ) = @_;
   $self->{__error__} = 1;
   push @{$self->{notes}}, $note;
}

sub prog {
   (my $prog) = $PROGRAM_NAME =~ m/([.A-Za-z-]+)$/;
   return $prog || $PROGRAM_NAME;
}

sub prompt {
   my ( $self ) = @_;
   my $prog   = $self->prog;
   my $prompt = $self->{prompt} || '<options>';
   return "Usage: $prog $prompt\n";
}

sub descr {
   my ( $self ) = @_;
   my $prog = $self->prog;
   my $descr  = $prog . ' ' . ($self->{descr} || '')
          . "  For more details, please use the --help option, "
          . "or try 'perldoc $prog' for complete documentation.";
   $descr = join("\n", $descr =~ m/(.{0,80})(?:\s+|$)/g);
   $descr =~ s/ +$//mg;
   return $descr;
}

sub usage_or_errors {
   my ( $self, %opts ) = @_;
   if ( $opts{help} ) {
      print $self->usage(%opts);
      exit(0);
   }
   elsif ( $self->{__error__} ) {
      print $self->errors();
      exit(0);
   }
}

sub errors {
   my ( $self ) = @_;
   my $usage = $self->prompt() . "\n";
   if ( (my @notes = @{$self->{notes}}) ) {
      $usage .= join("\n  * ", 'Errors in command-line arguments:', @notes) . "\n";
   }
   return $usage . "\n" . $self->descr();
}

sub usage {
   my ( $self, %vals ) = @_;
   my @specs = @{$self->{specs}};

   my $maxl = max(map { length($_->{l}) + ($_->{n} ? 4 : 0)} @specs);

   my $maxs = max(0,
      map { length($_->{l}) + ($_->{n} ? 4 : 0)}
      grep { $_->{t} } @specs);

   my $lcol = max($maxl, ($maxs + 3));
   my $rcol = 80 - $lcol - 6;
   my $rpad = ' ' x ( 80 - $rcol );

   $maxs = max($lcol - 3, $maxs);

   my $usage = $self->descr() . "\n" . $self->prompt();
   foreach my $g ( @{$self->{groups}} ) {
      $usage .= "\n$g->{d}:\n";
      foreach my $spec ( sort { $a->{l} cmp $b->{l} } grep { $_->{g} eq $g->{k} } @specs ) {
         my $long  = $spec->{n} ? "[no]$spec->{l}" : $spec->{l};
         my $short = $spec->{t};
         my $desc  = $spec->{d};
         $desc = join("\n$rpad", grep { $_ } $desc =~ m/(.{0,$rcol})(?:\s+|$)/g);
         $desc =~ s/ +$//mg;
         if ( $short ) {
            $usage .= sprintf("  --%-${maxs}s -%s  %s\n", $long, $short, $desc);
         }
         else {
            $usage .= sprintf("  --%-${lcol}s  %s\n", $long, $desc);
         }
      }
   }

   if ( (my @instr = @{$self->{instr}}) ) {
      $usage .= join("\n", map { "  $_" } @instr) . "\n";
   }
   if ( $self->{dsn} ) {
      $usage .= "\n" . $self->{dsn}->usage();
   }
   $usage .= "\nOptions and values after processing arguments:\n";
   foreach my $spec ( sort { $a->{l} cmp $b->{l} } @specs ) {
      my $val   = $vals{$spec->{k}};
      my $type  = $spec->{y} || '';
      my $bool  = $spec->{s} =~ m/^[\w-]+(?:\|[\w-])?!?$/;
      $val      = $bool                     ? ( $val ? 'TRUE' : 'FALSE' )
                : !defined $val             ? '(No value)'
                : $type eq 'd'              ? $self->{dsn}->as_string($val)
                : $type =~ m/H|h/           ? join(',', sort keys %$val)
                : $type =~ m/A|a/           ? join(',', @$val)
                :                             $val;
      $usage .= sprintf("  --%-${lcol}s  %s\n", $spec->{l}, $val);
   }
   return $usage;
}

sub prompt_noecho {
   shift @_ if ref $_[0] eq __PACKAGE__;
   my ( $prompt ) = @_;
   local $OUTPUT_AUTOFLUSH = 1;
   print $prompt;
   my $response;
   eval {
      require Term::ReadKey;
      Term::ReadKey::ReadMode('noecho');
      chomp($response = <STDIN>);
      Term::ReadKey::ReadMode('normal');
      print "\n";
   };
   if ( $EVAL_ERROR ) {
      die "Cannot read response; is Term::ReadKey installed? $EVAL_ERROR";
   }
   return $response;
}

sub groups {
   my ( $self, @groups ) = @_;
   push @{$self->{groups}}, @groups;
}

1;

# ###########################################################################
# End OptionParser package
# ###########################################################################

# TODO: merge with TableParser.pm
package IndexChecker;

use List::Util qw(min);

sub new {
   bless {}, shift;
}

sub find_engine {
   my ( $self, $ddl, $opts ) = @_;
   my ( $engine ) = $ddl =~ m/\) (?:ENGINE|TYPE)=(\w+)/;
   return $engine || undef;
}

# The general format of a key is
# [FOREIGN|UNIQUE|PRIMARY|FULLTEXT|SPATIAL] KEY `name` [USING BTREE|HASH] (`cols`).
sub find_keys {
   my ( $self, $ddl, $opts ) = @_;

   # Find and filter the indexes.
   my @indexes = 
      grep { $_ !~ m/FOREIGN/ }
      $ddl =~ m/((?:\w+ )?KEY .+\))/mg;

   # Make allowances for HASH bugs in SHOW CREATE TABLE.  A non-MEMORY table
   # will report its index as USING HASH even when this is not supported.  The
   # true type should be BTREE.  See http://bugs.mysql.com/bug.php?id=22632
   my $engine = $self->find_engine($ddl);
   if ( $engine !~ m/MEMORY|HEAP/ ) {
      @indexes = map { $_ =~ s/USING HASH/USING BTREE/; $_; } @indexes;
   }

   my @keys = map {
      my ( $struct, $cols ) = $_ =~ m/(?:USING (\w+))? \((.+)\)/;
      my ( $special ) = $_ =~ m/(FULLTEXT|SPATIAL)/;
      $struct = $struct || $special || 'BTREE';
      my ( $name ) = $_ =~ m/KEY `(.*?)` \(/;

      # MySQL pre-4.1 supports only HASH indexes.
      if ( $opts->{version} lt '004001000' && $engine =~ m/HEAP|MEMORY/i ) {
         $struct = 'HASH';
      }

      {
         struct   => $struct,
         cols     => $cols,
         name     => $name || 'PRIMARY',
      }
   } @indexes;
   return \@keys;
}

sub find_fks {
   my ( $self, $ddl, $opts ) = @_;

   my @fks = $ddl =~ m/CONSTRAINT .* FOREIGN KEY .* REFERENCES [^\)]*\)/mg;

   my @result = map {
      my ( $name ) = $_ =~ m/CONSTRAINT `(.*?)`/;
      my ( $fkcols ) = $_ =~ m/\(([^\)]+)\)/;
      my ( $cols )   = $_ =~ m/REFERENCES.*?\(([^\)]+)\)/;
      my ( $parent ) = $_ =~ m/REFERENCES (\S+) /;
      if ( $parent !~ m/\./ ) {
         $parent = "`$opts->{database}`.$parent";
      }
      {  name   => $name,
         parent => $parent,
         cols   => $cols,
         fkcols => $fkcols,
      };
   } @fks;
   return \@result;
}

sub find_duplicate_keys {
   my ( $self, $keys, $opts ) = @_;
   my @keys = @$keys;
   my %seen; # Avoid outputting a key more than once.
   my @result;

   foreach my $i ( 0..$#keys - 1 ) {
      foreach my $j ( $i+1..$#keys ) {
         my $i_cols = $keys[$i]->{cols};
         my $j_cols = $keys[$j]->{cols};
         my $len    = min(length($i_cols), length($j_cols));
         if ( $opts->{ignore_order} ) {
            $i_cols = join(',', sort(split(/`/, $i_cols)));
            $j_cols = join(',', sort(split(/`/, $j_cols)));
         }
         if ( (($keys[$i]->{struct} eq $keys[$j]->{struct}) || $opts->{ignore_type})
            && substr($i_cols, 0, $len) eq substr($j_cols, 0, $len))
         {
            push @result, $keys[$i] unless $seen{$i}++;
            push @result, $keys[$j] unless $seen{$j}++;
         }
      }
   }

   # If the key ends with a prefix of the primary key, it's a duplicate.
   if ( $opts->{clustered} && $opts->{engine} =~ m/^(?:InnoDB|solidDB)$/ ) {
      my $i = 0;
      my $found = 0;
      while ( $i < @keys ) {
         if ( $keys[$i]->{name} eq 'PRIMARY' ) {
            $found = 1;
            last;
         }
         $i++;
      }
      if ( $found ) {
         my $pkcols = $keys[$i]->{cols};
         KEY:
         foreach my $j ( 0..$#keys ) {
            next KEY if $i == $j;
            my $suffix = $keys[$j]->{cols};
            SUFFIX:
            while ( $suffix =~ s/`[^`]+`,// ) {
               my $len = min(length($pkcols), length($suffix));
               if ( (($keys[$i]->{struct} eq $keys[$j]->{struct}) || $opts->{ignore_type})
                  && substr($suffix, 0, $len) eq substr($pkcols, 0, $len))
               {
                  push @result, $keys[$i] unless $seen{$i}++;
                  push @result, $keys[$j] unless $seen{$j}++;
                  last SUFFIX;
               }
            }
         }
      }
   }

   return \@result;
}

sub find_duplicate_fks {
   my ( $self, $fks, $opts ) = @_;
   my @fks = @$fks;
   my %seen; # Avoid outputting a fk more than once.
   my @result;
   foreach my $i ( 0..$#fks - 1 ) {
      foreach my $j ( $i+1..$#fks ) {
         # A foreign key is a duplicate no matter what order the columns are in, so
         # re-order them alphabetically so they can be compared.
         my $i_cols = join(', ', map { "`$_`" } sort($fks[$i]->{cols} =~ m/`([^`]+)`/g));
         my $j_cols = join(', ', map { "`$_`" } sort($fks[$j]->{cols} =~ m/`([^`]+)`/g));
         my $i_fkcols = join(', ', map { "`$_`" } sort($fks[$i]->{fkcols} =~ m/`([^`]+)`/g));
         my $j_fkcols = join(', ', map { "`$_`" } sort($fks[$j]->{fkcols} =~ m/`([^`]+)`/g));
         if ( $fks[$i]->{parent} eq $fks[$j]->{parent}
               && $i_cols eq $j_cols
               && $i_fkcols eq $j_fkcols
         ) {
            push @result, $fks[$i] unless $seen{$i}++;
            push @result, $fks[$j] unless $seen{$j}++;
         }
      }
   }
   return \@result;
}

# ###########################################################################
# And now for the "program".
# ###########################################################################
package main;

use DBI;
use English qw(-no_match_vars);
use Getopt::Long;
use List::Util qw(max);

our $VERSION = '1.1.2';
our $DISTRIB = '1316';
our $SVN_REV = sprintf("%d", q$Revision: 1308 $ =~ m/(\d+)/g || 0);

$OUTPUT_AUTOFLUSH = 1;

if ( !caller ) {

   # ############################################################################
   # Get configuration information.
   # ############################################################################

   my @opt_spec = (
      { s => 'allatonce|s',       d => 'Print only once, instead of one DB at a time' },
      { s => 'allstruct|a',       d => 'Compare indexes with different structs (BTREE, HASH, etc)' },
      { s => 'askpass',           d => 'Prompt for password for connections' },
      { s => 'clustered|c',       d => 'PK columns appended to secondary key is duplicate' },
      { s => 'databases|d=h',     d => 'Check only this comma-separated list of databases' },
      { s => 'defaults-file|F=s', d => 'Only read mysql options from the given file' },
      { s => 'function|f=s',      d => 'Do f=foreign keys, k=keys; default fk' },
      { s => 'host|h=s',          d => 'Connect to host' },
      { s => 'ignoredb|g=H',      d => 'Ignore this comma-separated list of databases' },
      { s => 'ignoreorder',       d => 'Ignore index order so KEY(a,b) duplicates KEY(b,a)' },
      { s => 'ignoretbl|n=H',     d => 'Ignore this comma-separated list of tables' },
      { s => 'password|p=s',      d => 'Password to use when connecting' },
      { s => 'port|P=i',          d => 'Port number to use for connection' },
      { s => 'socket|S=s',        d => 'Socket file to use for connection' },
      { s => 'tab|b',             d => 'Output separated with tabs' },
      { s => 'tables|t=h',        d => 'Check only this comma-separated list of tables' },
      { s => 'user|u=s',          d => 'User for login if not current user' },
      { s => 'verbose|v',         d => 'Output everything, not just dupes' },
   );

   my $opt_parser = OptionParser->new(@opt_spec);
   $opt_parser->{prompt} = '<options>';
   $opt_parser->{descr}  = q{examines MySQL tables for duplicate or redundant }
                         . q{indexes and foreign keys.  Connection options }
                         . q{are read from MySQL option files.};
   my %opts = $opt_parser->parse();
   $opt_parser->usage_or_errors(%opts);

   # ############################################################################
   # Get ready to do the main work.
   # ############################################################################

   # Connect to the database
   if ( !defined $opts{p} && $opts{askpass} ) {
      $opts{p} = OptionParser::prompt_noecho("Enter password: ");
   }

   my $dp = new DSNParser;
   my $dbh = DBI->connect(
      $dp->get_cxn_params(\%opts),
      { AutoCommit => 1, RaiseError => 1, PrintError => 0 } );

   my @databases = @{$dbh->selectcol_arrayref('SHOW DATABASES')};
   my @whole_batch;
   my $exit_code = 0;
   my $version = sprintf('%03d%03d%03d', $dbh->{mysql_serverinfo} =~ m/(\d+)/g);

   my $ic = new IndexChecker;
   my $ic_opts = {
      ignore_type  => $opts{a},
      ignore_order => $opts{ignoreorder},
      clustered    => $opts{c},
   };

   DATABASE:
   foreach my $database ( @databases ) {

      # Ignore databases as instructed.  Also ignore INFORMATION_SCHEMA and skip
      # databases caused by lost+found directories created in the root of ext3
      # filesystems; they are not really databases.
      # TODO: modularize this.
      next DATABASE if
         ( $opts{d} && !exists($opts{d}->{$database}) )
         || $database =~ m/^(information_schema|lost\+found)$/mi
         || exists $opts{g}->{$database};

      my @tables = @{$dbh->selectcol_arrayref('SHOW TABLES FROM `' . $database .  '`')};
      next DATABASE unless @tables;

      my %info_for;

      TABLE:
      foreach my $table ( @tables ) {
         next TABLE if
            ( $opts{t} && !exists($opts{t}->{$table}) )
            || exists $opts{n}->{$table};

         $exit_code = 1;
         my $ddl;
         eval {
            $ddl = ($dbh->selectrow_array("SHOW CREATE TABLE `$database`.`$table`"))[1];
            $exit_code = 0;
         };
         next TABLE if !$ddl;
         my $engine = $ic->find_engine($ddl) || next TABLE;

         my $keys = $opts{f} =~ m/k/ ? $ic->find_keys($ddl, {version => $version }) : [];
         my $fks  = $opts{f} =~ m/f/ ? $ic->find_fks($ddl, {database => $database}) : [];

         if ( @$keys || @$fks ) {
            $info_for{$table} = {
               database => $database,
               table    => $table,
               engine   => $engine,
               keys     => $keys,
               fks      => $fks,
            };
         }
      }

      my @to_print;
      foreach my $table ( sort keys %info_for ) {
         my $hash   = $info_for{$table};
         my $engine = $hash->{engine};

         # Prepare indexes
         if ( $opts{f} =~ m/k/ ) {
            if ( $opts{v} ) { # Print all
               push @to_print, map { make_hash($hash, 'KEY', $_) }
                  @{$hash->{keys}};
            }
            else { # Find duplicate/redundant by prefix matching.
               push @to_print, map { make_hash($hash, 'KEY', $_) }
                  @{$ic->find_duplicate_keys($hash->{keys}, { engine => $engine, %$ic_opts})};
            }
         }

         # Prepare foreign keys
         if ( $opts{f} =~ m/f/ ) {
            if ( $opts{v} ) { # Print all
               push @to_print, map { make_hash($hash, 'FK', $_) }
                  @{$hash->{fks}};
            }
            else { # Otherwise output duplicates.
               push @to_print, map { make_hash($hash, 'FK', $_) }
                  @{$ic->find_duplicate_fks($hash->{fks})};
            }
         }
      }

      next DATABASE unless @to_print;

      if ( $opts{s} ) {
         push @whole_batch, @to_print;
      }
      else {
         print_batch(@to_print);
      }
   }

   if ( @whole_batch && $opts{s} ) {
      print_batch(@whole_batch);
   }

   exit($exit_code);

   # ############################################################################
   # Subroutines
   # ############################################################################

   sub make_hash {
      my ( $info, $type, $hash ) = @_;
      return {
         database => $info->{database},
         table    => $info->{table},
         engine   => $info->{engine},
         name     => $hash->{name},
         type     => $type,
         struct   => $hash->{struct} || 'NULL',
         parent   => $hash->{parent} || 'NULL',
         cols     => $hash->{cols},
      };
   }

   sub print_batch {
      my ( @batch ) = @_;

      my $hdr;
      if ( $opts{b} ) {
         $hdr = ( "%s\t" x 8 ) . "\n";
      }
      else {
         my $max_idx  = max(6, map { length($_->{name}) } @batch);
         my $max_tbl  = max(5, map { length($_->{table}) } @batch);
         my $max_db   = max(8, map { length($_->{database}) } @batch);
         my $max_par  = max(6, map { length($_->{parent} || '') } @batch);
         $hdr         = "%-${max_db}s %-${max_tbl}s %-6s %-${max_idx}s %-4s %-8s %-${max_par}s %s\n";
      }

      printf($hdr, qw(DATABASE TABLE ENGINE OBJECT TYPE STRUCT PARENT COLUMNS));
      foreach my $thing ( @batch ) {
         printf($hdr, @{$thing}{qw(database table engine name type struct parent cols)});
      }

   }

}

1; # Because this is a module as well as a script.

# ############################################################################
# Documentation
# ############################################################################

=pod

=head1 NAME

mk-duplicate-key-checker - Find possible duplicate indexes and foreign keys on
MySQL tables.

=head1 DESCRIPTION

This program examines the output of SHOW CREATE TABLE on MySQL tables, and if
it finds indexes that cover the same columns as another index in the same
order, or cover an exact leftmost prefix of another index, it prints out
the suspicious indexes.  By default, indexes must be of the same type, so a
BTREE index is not a duplicate of a FULLTEXT index, even if they have the same
colums.  You can override this.

It also looks for duplicate foreign keys.  A duplicate foreign key covers the
same columns as another in the same table, and references the same parent
table.

=head1 OPTIONS

=over

=item --allatonce

Prints everything it finds in one chunk.  The default is to print a database at
a time.

=item --allstruct

Compare indexes with different structures.  By default this is disabled, because
a BTREE index that covers the same columns as a FULLTEXT index is not really a
duplicate, for example.

=item --askpass

Prompt for password for connections.

=item --clustered

Detects when a suffix of a secondary key is a leftmost prefix of the primary
key, and treats it as a duplicate key.  Only detects this condition on storage
engines whose primary keys are clustered (currently InnoDB and solidDB).

Clustered storage engines append the primary key columns to the leaf nodes of
all secondary keys anyway, so you might consider it redundant to have them
appear in the internal nodes as well.  Of course, you may also want them in the
internal nodes, because just having them at the leaf nodes won't help for some
queries.  It does help for covering index queries, however.

Here's an example of a key that is considered redundant with this option:

  PRIMARY KEY  (`a`)
  KEY `b` (`b`,`a`)

=item --databases

A comma-separated list of databases to examine.

=item --defaults-file

Only read default options from the given file.

=item --function

What to check: 'f' is foreign keys, 'k' is indexes.  The default is to check
both.

=item --help

Displays a help message.

=item --host

Connect to host.

=item --ignoredb

A comma-separated list of databases to ignore.

=item --ignoretype

Ignore column ordering, so an index on columns (a,b) is considered a duplicate
of an index on columns (b,a).

=item --ignoretbl

A comma-separated list of tables to ignore.

=item --password

Password to use when connecting.

=item --port

Port number to use for connection.

=item --socket

Socket file to use for connection.

=item --tab

Print output separated with tabs, instead of whitespace-aligned.  See
L<"OUTPUT"> for details.

=item --tables

A comma-separated list of tables to check.

=item --user

User for login if not current user.

=item --verbose

Output all keys and/or foreign keys found, not just redundant ones.

=item --version

Output version information and exit.

=back

=head1 OUTPUT

Output is to STDOUT, one line per server and table, with header lines for each
database.  I tried to make the output easy to process with awk.  For this reason
columns are always present.  If there's no value, the script prints 'NULL'.
Output is sorted by database and table.

The columns in the output are as follows.

=over

=item DATABASE

The database the table is in.

=item TABLE

The table name.

=item ENGINE

The table's storage engine.

=item OBJECT

The index or constraint's name, e.g. `tbl_ibfk_3` (the default InnoDB name for
the third foreign key on a table named tbl).

=item TYPE

'KEY' for indexes, 'FK' for foreign keys.

=item STRUCT

The type of index: BTREE, FULLTEXT, HASH etc.  By default MySQL's indexes are
BTREE in most cases.  This does not apply to foreign keys.

=item PARENT

The parent table to which the foreign key constraint refers.  This does not
apply to indexes.

=item COLUMNS

The columns included in the index or foreign key constraint.  For indexes,
this column list is output verbatim, as shown in SHOW CREATE TABLE.  For
foreign keys, the columns are ordered so string comparison can find
duplicates, since column order in a foreign key is immaterial.

=back

=head1 SYSTEM REQUIREMENTS

You need the following Perl modules: DBI and DBD::mysql.

=head1 BUGS

Please use the Sourceforge bug tracker, forums, and mailing lists to request
support or report bugs: L<http://sourceforge.net/projects/maatkit/>.

=head1 LICENSE

This program is copyright (c) 2007 Baron Schwartz.
Feedback and improvements are welcome.

THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.

This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation, version 2; OR the Perl Artistic License.  On UNIX and similar
systems, you can issue `man perlgpl' or `man perlartistic' to read these
licenses.

You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA  02111-1307  USA.

=head1 AUTHOR

Baron Schwartz.

=head1 VERSION

This manual page documents Ver 1.1.2 Distrib 1316 $Revision: 1308 $.

=cut
