#!/usr/bin/perl

# This program checksums MySQL tables efficiently on one or more servers.
#
# This program is copyright (c) 2007 Baron Schwartz.
# Feedback and improvements are welcome.
#
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, version 2; OR the Perl Artistic License.  On UNIX and similar
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
# licenses.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
# Place, Suite 330, Boston, MA  02111-1307  USA.

use strict;
use warnings FATAL => 'all';

# ###########################################################################
# TableParser package 1259
# ###########################################################################
use strict;
use warnings FATAL => 'all';

package TableParser;

sub new {
   bless {}, shift;
}

sub parse {
   my ( $self, $ddl, $opts ) = @_;

   if ( ref $ddl eq 'ARRAY' ) {
      if ( $ddl->[0] eq 'table' ) {
         $ddl = $ddl->[1];
      }
      else {
         return {
            engine => 'VIEW',
         };
      }
   }

   if ( $ddl !~ m/CREATE (?:TEMPORARY )?TABLE `/ ) {
      die "Cannot parse table definition; is ANSI quoting enabled or SQL_QUOTE_SHOW_CREATE disabled?";
   }

   my ( $engine ) = $ddl =~ m/\) (?:ENGINE|TYPE)=(\w+)/;

   my @defs = $ddl =~ m/^(\s+`.*?),?$/gm;
   my @cols = map { $_ =~ m/`([^`]+)`/g } @defs;

   my %def_for;
   @def_for{@cols} = @defs;

   my (@nums, @null);
   my (%type_for, %is_nullable, %is_numeric, %is_autoinc);
   foreach my $col ( @cols ) {
      my $def = $def_for{$col};
      my ( $type ) = $def =~ m/`[^`]+`\s([a-z]+)/;
      die "Can't determine column type for $def" unless $type;
      $type_for{$col} = $type;
      if ( $type =~ m/(?:(?:tiny|big|medium|small)?int|float|double|decimal|year)/ ) {
         push @nums, $col;
         $is_numeric{$col} = 1;
      }
      if ( $def !~ m/NOT NULL/ ) {
         push @null, $col;
         $is_nullable{$col} = 1;
      }
      $is_autoinc{$col} = $def =~ m/AUTO_INCREMENT/i ? 1 : 0;
   }

   my %keys;
   foreach my $key ( $ddl =~ m/^  ((?:[A-Z]+ )?KEY .*)$/gm ) {

      if ( $engine !~ m/MEMORY|HEAP/ ) {
         $key =~ s/USING HASH/USING BTREE/;
      }

      my ( $type, $cols ) = $key =~ m/(?:USING (\w+))? \((.+)\)/;
      my ( $special ) = $key =~ m/(FULLTEXT|SPATIAL)/;
      $type = $type || $special || 'BTREE';
      if ( $opts->{mysql_version} && $opts->{mysql_version} lt '004001000'
         && $engine =~ m/HEAP|MEMORY/i )
      {
         $type = 'HASH'; # MySQL pre-4.1 supports only HASH indexes on HEAP
      }

      my ($name) = $key =~ m/(PRIMARY|`[^`]*`)/;
      my $unique = $key =~ m/PRIMARY|UNIQUE/ ? 1 : 0;
      my @cols   = grep { m/[^,]/ } split('`', $cols);
      $name      =~ s/`//g;

      $keys{$name} = {
         colnames    => $cols,
         cols        => \@cols,
         unique      => $unique,
         is_col      => { map { $_ => 1 } @cols },
         is_nullable => scalar(grep { $is_nullable{$_} } @cols),
         type        => $type,
      };
   }

   return {
      cols           => \@cols,
      col_posn       => { map { $cols[$_] => $_ } 0..$#cols },
      is_col         => { map { $_ => 1 } @cols },
      null_cols      => \@null,
      is_nullable    => \%is_nullable,
      is_autoinc     => \%is_autoinc,
      keys           => \%keys,
      defs           => \%def_for,
      numeric_cols   => \@nums,
      is_numeric     => \%is_numeric,
      engine         => $engine,
      type_for       => \%type_for,
   };
}

1;

# ###########################################################################
# End TableParser package
# ###########################################################################

# ###########################################################################
# TableChecksum package 1284
# ###########################################################################
use strict;
use warnings FATAL => 'all';

package TableChecksum;

use English qw(-no_match_vars);
use POSIX qw(ceil);
use List::Util qw(min max);

our %ALGOS = (
   CHECKSUM => { pref => 0, hash => 0 },
   ACCUM    => { pref => 1, hash => 1 },
   BIT_XOR  => { pref => 2, hash => 1 },
);

sub new {
   bless {}, shift;
}

sub best_algorithm {
   my ( $self, %opts ) = @_;
   my ($alg, $vp, $dbh) = @opts{ qw(algorithm vp dbh) };
   my @choices = sort { $ALGOS{$a}->{pref} <=> $ALGOS{$b}->{pref} } keys %ALGOS;
   die "Invalid checksum algorithm $alg"
      if $alg && !$ALGOS{$alg};

   if ( 
      $opts{where} || $opts{chunk}        # CHECKSUM does whole table
      || $opts{replicate}                 # CHECKSUM can't do INSERT.. SELECT
      || !$vp->version_ge($dbh, '4.1.1')) # CHECKSUM doesn't exist
   {
      @choices = grep { $_ ne 'CHECKSUM' } @choices;
   }

   if ( !$vp->version_ge($dbh, '4.1.1') ) {
      @choices = grep { $_ ne 'BIT_XOR' } @choices;
   }

   if ( $alg && grep { $_ eq $alg } @choices ) {
      return $alg;
   }

   if ( $opts{count} && grep { $_ ne 'CHECKSUM' } @choices ) {
      @choices = grep { $_ ne 'CHECKSUM' } @choices;
   }

   return $choices[0];
}

sub is_hash_algorithm {
   my ( $self, $algorithm ) = @_;
   return $ALGOS{$algorithm} && $ALGOS{$algorithm}->{hash};
}

sub choose_hash_func {
   my ( $self, %opts ) = @_;
   my @funcs = qw(SHA1 MD5);
   if ( $opts{func} ) {
      unshift @funcs, $opts{func};
   }
   my ($result, $error);
   do {
      my $func;
      eval {
         $func = shift(@funcs);
         $opts{dbh}->do("SELECT $func('test-string')");
         $result = $func;
      };
      if ( $EVAL_ERROR && $EVAL_ERROR =~ m/failed: (.*?) at \S+ line/ ) {
         $error .= qq{$func cannot be used because "$1"\n};
      }
   } while ( @funcs && !$result );

   die $error unless $result;
   return $result;
}

sub optimize_xor {
   my ( $self, %opts ) = @_;
   my ( $dbh, $func ) = @opts{qw(dbh func)};

   my $opt_slice = 0;
   my $unsliced  = uc $dbh->selectall_arrayref("SELECT $func('a')")->[0]->[0];
   my $sliced    = '';
   my $start     = 1;
   my $crc_wid   = length($unsliced) < 16 ? 16 : length($unsliced);

   do { # Try different positions till sliced result equals non-sliced.
      $dbh->do('SET @crc := "", @cnt := 0');
      my $slices = $self->make_xor_slices(
         query     => "\@crc := $func('a')",
         crc_wid   => $crc_wid,
         opt_slice => $opt_slice,
      );

      my $sql = "SELECT CONCAT($slices) AS TEST FROM (SELECT NULL) AS x";
      $sliced = ($dbh->selectrow_array($sql))[0];
      if ( $sliced ne $unsliced ) {
         $start += 16;
         ++$opt_slice;
      }
   } while ( $start < $crc_wid && $sliced ne $unsliced );

   return $sliced eq $unsliced ? $opt_slice : undef;
}

sub make_xor_slices {
   my ( $self, %opts ) = @_;
   my ( $query, $crc_wid, $opt_slice )
      = @opts{qw(query crc_wid opt_slice)};

   my @slices;
   for ( my $start = 1; $start <= $crc_wid; $start += 16 ) {
      my $len = $crc_wid - $start + 1;
      if ( $len > 16 ) {
         $len = 16;
      }
      push @slices,
         "LPAD(CONV(BIT_XOR("
         . "CAST(CONV(SUBSTRING(\@crc, $start, $len), 16, 10) AS UNSIGNED))"
         . ", 10, 16), $len, '0')";
   }

   if ( defined $opt_slice && $opt_slice < @slices ) {
      $slices[$opt_slice] =~ s/\@crc/\@crc := $query/;
   }
   else {
      map { s/\@crc/$query/ } @slices;
   }

   return join(', ', @slices);
}

sub make_row_checksum {
   my ( $self, %args ) = @_;
   my ( $table, $quoter, $func )
      = @args{ qw(table quoter func) };

   my $sep = $args{sep} || '#';
   $sep =~ s/'//g;
   $sep ||= '#';

   my %cols = map { $_ => 1 } ($args{cols} ? @{$args{cols}} : @{$table->{cols}});
   my @cols =
      map {
         my $type = $table->{type_for}->{$_};
         my $result = $quoter->quote($_);
         if ( $type eq 'timestamp' ) {
            $result .= ' + 0';
         }
         elsif ( $type =~ m/float|double/ && $args{precision} ) {
            $result = "ROUND($result, $args{precision})";
         }
         $result;
      }
      grep {
         $cols{$_}
      }
      @{$table->{cols}};

   my @nulls = grep { $cols{$_} } @{$table->{null_cols}};
   if ( @nulls ) {
      my $bitmap = "CONCAT("
         . join(', ', map { 'ISNULL(' . $quoter->quote($_) . ')' } @nulls)
         . ")";
      push @cols, $bitmap;
   }

   my $query = @cols > 1
             ? "$func(CONCAT_WS('$sep', " . join(', ', @cols) . '))'
             : "$func($cols[0])";

   return $query;
}

sub make_checksum_query {
   my ( $self, %args ) = @_;
   my ( $dbname, $tblname, $table, $quoter, $algorithm,
        $func, $crc_wid, $opt_slice )
      = @args{ qw(dbname tblname table quoter algorithm
        func crc_wid opt_slice) };
   die "Invalid or missing checksum algorithm"
      unless $algorithm && $ALGOS{$algorithm};

   my $result;

   if ( $algorithm eq 'CHECKSUM' ) {
      return "CHECKSUM TABLE " . $quoter->quote($dbname, $tblname);
   }

   my $expr = $self->make_row_checksum(%args);

   if ( $algorithm eq 'BIT_XOR' ) {
      my $slices = $self->make_xor_slices( query => $expr, %args );
      $result = "LOWER(CONCAT($slices)) AS crc ";
   }
   else {
      $result = "RIGHT(MAX("
         . "\@crc := CONCAT(LPAD(\@cnt := \@cnt + 1, 16, '0'), "
         . "$func(CONCAT(\@crc, $expr)))"
         . "), $crc_wid) AS crc ";
   }
   if ( $args{replicate} ) {
      $result = "REPLACE /*progress_comment*/ INTO $args{replicate} "
         . "(db, tbl, chunk, boundaries, this_cnt, this_crc) "
         . "SELECT ?, ?, ?, ?, COUNT(*) AS cnt, $result";
   }
   else {
      $result = "SELECT /*progress_comment*/ COUNT(*) AS cnt, $result";
   }
   return $result . "FROM " . $quoter->quote($dbname, $tblname) . "/*WHERE*/";
}

sub check_server {
   my ( $self, $args, $level ) = @_;
   $level ||= 0;

   my $dbh;
   eval {
      $dbh = $args->{dbh} || DBI->connect(
         $args->{dsn_parser}->get_cxn_params($args->{dsn},
            { RaiseError => 1, PrintError => 0, AutoCommit => 1 }));
   };
   if ( $EVAL_ERROR ) {
      print "Cannot connect to "
         . $args->{dsn_parser}->as_string($args->{dsn}), "\n";
      return;
   }

   my ($id) = $dbh->selectrow_array('SELECT @@SERVER_ID');
   my $master_thinks_i_am = $args->{dsn}->{server_id};
   if ( !defined $id
       || ( defined $master_thinks_i_am && $master_thinks_i_am != $id )
       || $args->{server_ids_seen}->{$id}++
   ) {
      print "Skipping "
         . $args->{dsn_parser}->as_string($args->{dsn}), "\n";
      return;
   }

   (my $sql = <<"   EOF") =~ s/^      //gm;
      SELECT db, tbl, chunk, boundaries,
         COALESCE(this_cnt-master_cnt, 0) AS cnt_diff,
         COALESCE(
            this_crc <> master_crc OR ISNULL(master_crc) <> ISNULL(this_crc),
            0
         ) AS crc_diff 
      FROM $args->{table}
      WHERE master_cnt <> this_cnt OR master_crc <> this_crc 
      OR ISNULL(master_crc) <> ISNULL(this_crc)
   EOF

   my $diffs = $dbh->selectall_arrayref($sql, { Slice => {} });
   if ( @$diffs ) {
      $args->{callback}->($args->{dsn}, @$diffs);
   }

   if ( !defined $args->{recurse} || $level < $args->{recurse} ) {

      my @slaves = 
         grep { $_->{master_id} == $id } # Only my own slaves.
         map  {                          # Convert each to all-lowercase keys.
            my %hash;
            @hash{ map { lc $_ } keys %$_ } = values %$_;
            \%hash;
         }
         @{$dbh->selectall_arrayref("SHOW SLAVE HOSTS", { Slice => {} })};

      foreach my $slave ( @slaves ) {
         my $dsn = $args->{dsn_parser}->parse(
             "h=$slave->{host},P=$slave->{port}", $args->{dsn});
         $dsn->{server_id} = $slave->{server_id};
         $self->check_server( { %$args, dsn => $dsn, dbh => undef }, $level + 1 );
      }
   }

}

1;

# ###########################################################################
# End TableChecksum package
# ###########################################################################

# ###########################################################################
# OptionParser package 1178
# ###########################################################################
use strict;
use warnings FATAL => 'all';

package OptionParser;

use Getopt::Long;
use List::Util qw(max);
use English qw(-no_match_vars);

sub new {
   my ( $class, @opts ) = @_;
   my %key_seen;
   my %long_seen;
   my %key_for;
   my %defaults;
   my @mutex;
   my @atleast1;
   my %long_for;
   my %disables;
   my %copyfrom;
   unshift @opts,
      { s => 'help',    d => 'Show this help message' },
      { s => 'version', d => 'Output version information and exit' };
   foreach my $opt ( @opts ) {
      if ( ref $opt ) {
         my ( $long, $short ) = $opt->{s} =~ m/^([\w-]+)(?:\|([^!+=]*))?/;
         $opt->{k} = $short || $long;
         $key_for{$long} = $opt->{k};
         $long_for{$opt->{k}} = $long;
         $long_for{$long} = $long;
         $opt->{l} = $long;
         die "Duplicate option $opt->{k}" if $key_seen{$opt->{k}}++;
         die "Duplicate long option $opt->{l}" if $long_seen{$opt->{l}}++;
         $opt->{t} = $short;
         $opt->{n} = $opt->{s} =~ m/!/;
         $opt->{g} ||= 'o';
         if ( (my ($y) = $opt->{s} =~ m/=([mdHhAaz])/) ) {
            $opt->{y} = $y;
            $opt->{s} =~ s/=./=s/;
         }
         $opt->{r} = $opt->{d} =~ m/required/;
         if ( (my ($def) = $opt->{d} =~ m/default(?: ([^)]+))?/) ) {
            $defaults{$opt->{k}} = defined $def ? $def : 1;
         }
         if ( (my ($dis) = $opt->{d} =~ m/(disables .*)/) ) {
            $disables{$opt->{k}} = [ $class->get_participants($dis) ];
         }
      }
      else { # It's an instruction.

         if ( $opt =~ m/at least one|mutually exclusive|one and only one/ ) {
            my @participants = map {
                  die "No such option '$_' in $opt" unless $long_for{$_};
                  $long_for{$_};
               } $class->get_participants($opt);
            if ( $opt =~ m/mutually exclusive|one and only one/ ) {
               push @mutex, \@participants;
            }
            if ( $opt =~ m/at least one|one and only one/ ) {
               push @atleast1, \@participants;
            }
         }
         elsif ( $opt =~ m/default to/ ) {
            my @participants = map {
                  die "No such option '$_' in $opt" unless $long_for{$_};
                  $key_for{$_};
               } $class->get_participants($opt);
            $copyfrom{$participants[0]} = $participants[1];
         }

      }
   }

   foreach my $dis ( keys %disables ) {
      $disables{$dis} = [ map {
            die "No such option '$_' while processing $dis" unless $long_for{$_};
            $long_for{$_};
         } @{$disables{$dis}} ];
   }

   return bless {
      specs => [ grep { ref $_ } @opts ],
      notes => [],
      instr => [ grep { !ref $_ } @opts ],
      mutex => \@mutex,
      defaults => \%defaults,
      long_for => \%long_for,
      atleast1 => \@atleast1,
      disables => \%disables,
      key_for  => \%key_for,
      copyfrom => \%copyfrom,
      strict   => 1,
      groups   => [ { k => 'o', d => 'Options' } ],
   }, $class;
}

sub get_participants {
   my ( $self, $str ) = @_;
   my @participants;
   foreach my $thing ( $str =~ m/(--?[\w-]+)/g ) {
      if ( (my ($long) = $thing =~ m/--(.+)/) ) {
         push @participants, $long;
      }
      else {
         foreach my $short ( $thing =~ m/([^-])/g ) {
            push @participants, $short;
         }
      }
   }
   return @participants;
}

sub parse {
   my ( $self, %defaults ) = @_;
   my @specs = @{$self->{specs}};
   my %factor_for = (k => 1_024, M => 1_048_576, G => 1_073_741_824);

   my %opt_seen;
   my %vals = %{$self->{defaults}};
   @vals{keys %defaults} = values %defaults;
   foreach my $spec ( @specs ) {
      $vals{$spec->{k}} = undef unless defined $vals{$spec->{k}};
      $opt_seen{$spec->{k}} = 1;
   }

   foreach my $key ( keys %defaults ) {
      die "Cannot set default for non-existent option '$key'\n"
         unless $opt_seen{$key};
   }

   Getopt::Long::Configure('no_ignore_case', 'bundling');
   GetOptions( map { $_->{s} => \$vals{$_->{k}} } @specs )
      or $self->error('Error parsing options');

   if ( $vals{version} ) {
      my $prog = $self->prog;
      printf("%s  Ver %s Distrib %s Changeset %s\n",
         $prog, $main::VERSION, $main::DISTRIB, $main::SVN_REV);
      exit(0);
   }

   if ( @ARGV && $self->{strict} ) {
      $self->error("Unrecognized command-line options @ARGV");
   }

   foreach my $dis ( grep { defined $vals{$_} } keys %{$self->{disables}} ) {
      my @disses = map { $self->{key_for}->{$_} } @{$self->{disables}->{$dis}};
      @vals{@disses} = map { undef } @disses;
   }

   foreach my $spec ( grep { $_->{r} } @specs ) {
      if ( !defined $vals{$spec->{k}} ) {
         $self->error("Required option --$spec->{l} must be specified");
      }
   }

   foreach my $mutex ( @{$self->{mutex}} ) {
      my @set = grep { defined $vals{$self->{key_for}->{$_}} } @$mutex;
      if ( @set > 1 ) {
         my $note = join(', ',
            map { "--$self->{long_for}->{$_}" }
                @{$mutex}[ 0 .. scalar(@$mutex) - 2] );
         $note .= " and --$self->{long_for}->{$mutex->[-1]}"
               . " are mutually exclusive.";
         $self->error($note);
      }
   }

   foreach my $required ( @{$self->{atleast1}} ) {
      my @set = grep { defined $vals{$self->{key_for}->{$_}} } @$required;
      if ( !@set ) {
         my $note = join(', ',
            map { "--$self->{long_for}->{$_}" }
                @{$required}[ 0 .. scalar(@$required) - 2] );
         $note .= " or --$self->{long_for}->{$required->[-1]}";
         $self->error("Specify at least one of $note");
      }
   }

   foreach my $spec ( grep { $_->{y} && defined $vals{$_->{k}} } @specs ) {
      my $val = $vals{$spec->{k}};
      if ( $spec->{y} eq 'm' ) {
         my ( $num, $suffix ) = $val =~ m/(\d+)([smhd])$/;
         if ( $suffix ) {
            $val = $suffix eq 's' ? $num            # Seconds
                 : $suffix eq 'm' ? $num * 60       # Minutes
                 : $suffix eq 'h' ? $num * 3600     # Hours
                 :                  $num * 86400;   # Days
            $vals{$spec->{k}} = $val;
         }
         else {
            $self->error("Invalid --$spec->{l} argument");
         }
      }
      elsif ( $spec->{y} eq 'd' ) {
         my $from_key = $self->{copyfrom}->{$spec->{k}};
         my $default = {};
         if ( $from_key ) {
            $default = $self->{dsn}->parse($self->{dsn}->as_string($vals{$from_key}));
         }
         $vals{$spec->{k}} = $self->{dsn}->parse($val, $default);
      }
      elsif ( $spec->{y} eq 'z' ) {
         my ($pre, $num, $factor) = $val =~ m/^([+-])?(\d+)([kMG])?$/;
         if ( defined $num ) {
            if ( $factor ) {
               $num *= $factor_for{$factor};
            }
            $vals{$spec->{k}} = ($pre || '') . $num;
         }
         else {
            $self->error("Invalid --$spec->{l} argument");
         }
      }
   }

   foreach my $spec ( grep { $_->{y} } @specs ) {
      my $val = $vals{$spec->{k}};
      if ( $spec->{y} eq 'H' || (defined $val && $spec->{y} eq 'h') ) {
         $vals{$spec->{k}} = { map { $_ => 1 } split(',', ($val || '')) };
      }
      elsif ( $spec->{y} eq 'A' || (defined $val && $spec->{y} eq 'a') ) {
         $vals{$spec->{k}} = [ split(',', ($val || '')) ];
      }
   }

   return %vals;
}

sub error {
   my ( $self, $note ) = @_;
   $self->{__error__} = 1;
   push @{$self->{notes}}, $note;
}

sub prog {
   (my $prog) = $PROGRAM_NAME =~ m/([.A-Za-z-]+)$/;
   return $prog || $PROGRAM_NAME;
}

sub prompt {
   my ( $self ) = @_;
   my $prog   = $self->prog;
   my $prompt = $self->{prompt} || '<options>';
   return "Usage: $prog $prompt\n";
}

sub descr {
   my ( $self ) = @_;
   my $prog = $self->prog;
   my $descr  = $prog . ' ' . ($self->{descr} || '')
          . "  For more details, please use the --help option, "
          . "or try 'perldoc $prog' for complete documentation.";
   $descr = join("\n", $descr =~ m/(.{0,80})(?:\s+|$)/g);
   $descr =~ s/ +$//mg;
   return $descr;
}

sub usage_or_errors {
   my ( $self, %opts ) = @_;
   if ( $opts{help} ) {
      print $self->usage(%opts);
      exit(0);
   }
   elsif ( $self->{__error__} ) {
      print $self->errors();
      exit(0);
   }
}

sub errors {
   my ( $self ) = @_;
   my $usage = $self->prompt() . "\n";
   if ( (my @notes = @{$self->{notes}}) ) {
      $usage .= join("\n  * ", 'Errors in command-line arguments:', @notes) . "\n";
   }
   return $usage . "\n" . $self->descr();
}

sub usage {
   my ( $self, %vals ) = @_;
   my @specs = @{$self->{specs}};

   my $maxl = max(map { length($_->{l}) + ($_->{n} ? 4 : 0)} @specs);

   my $maxs = max(0,
      map { length($_->{l}) + ($_->{n} ? 4 : 0)}
      grep { $_->{t} } @specs);

   my $lcol = max($maxl, ($maxs + 3));
   my $rcol = 80 - $lcol - 6;
   my $rpad = ' ' x ( 80 - $rcol );

   $maxs = max($lcol - 3, $maxs);

   my $usage = $self->descr() . "\n" . $self->prompt();
   foreach my $g ( @{$self->{groups}} ) {
      $usage .= "\n$g->{d}:\n";
      foreach my $spec ( sort { $a->{l} cmp $b->{l} } grep { $_->{g} eq $g->{k} } @specs ) {
         my $long  = $spec->{n} ? "[no]$spec->{l}" : $spec->{l};
         my $short = $spec->{t};
         my $desc  = $spec->{d};
         $desc = join("\n$rpad", grep { $_ } $desc =~ m/(.{0,$rcol})(?:\s+|$)/g);
         $desc =~ s/ +$//mg;
         if ( $short ) {
            $usage .= sprintf("  --%-${maxs}s -%s  %s\n", $long, $short, $desc);
         }
         else {
            $usage .= sprintf("  --%-${lcol}s  %s\n", $long, $desc);
         }
      }
   }

   if ( (my @instr = @{$self->{instr}}) ) {
      $usage .= join("\n", map { "  $_" } @instr) . "\n";
   }
   if ( $self->{dsn} ) {
      $usage .= "\n" . $self->{dsn}->usage();
   }
   $usage .= "\nOptions and values after processing arguments:\n";
   foreach my $spec ( sort { $a->{l} cmp $b->{l} } @specs ) {
      my $val   = $vals{$spec->{k}};
      my $type  = $spec->{y} || '';
      my $bool  = $spec->{s} =~ m/^[\w-]+(?:\|[\w-])?!?$/;
      $val      = $bool                     ? ( $val ? 'TRUE' : 'FALSE' )
                : !defined $val             ? '(No value)'
                : $type eq 'd'              ? $self->{dsn}->as_string($val)
                : $type =~ m/H|h/           ? join(',', sort keys %$val)
                : $type =~ m/A|a/           ? join(',', @$val)
                :                             $val;
      $usage .= sprintf("  --%-${lcol}s  %s\n", $spec->{l}, $val);
   }
   return $usage;
}

sub prompt_noecho {
   shift @_ if ref $_[0] eq __PACKAGE__;
   my ( $prompt ) = @_;
   local $OUTPUT_AUTOFLUSH = 1;
   print $prompt;
   my $response;
   eval {
      require Term::ReadKey;
      Term::ReadKey::ReadMode('noecho');
      chomp($response = <STDIN>);
      Term::ReadKey::ReadMode('normal');
      print "\n";
   };
   if ( $EVAL_ERROR ) {
      die "Cannot read response; is Term::ReadKey installed? $EVAL_ERROR";
   }
   return $response;
}

sub groups {
   my ( $self, @groups ) = @_;
   push @{$self->{groups}}, @groups;
}

1;

# ###########################################################################
# End OptionParser package
# ###########################################################################

# ###########################################################################
# DSNParser package 1216
# ###########################################################################
use strict;
use warnings FATAL => 'all';

package DSNParser;

sub new {
   my ( $class, @opts ) = @_;
   my $self = {
      opts => {
         D => {
            desc => 'Database to use',
            dsn  => 'database',
            copy => 1,
         },
         F => {
            desc => 'Only read default options from the given file',
            dsn  => 'mysql_read_default_file',
            copy => 1,
         },
         h => {
            desc => 'Connect to host',
            dsn  => 'host',
            copy => 1,
         },
         p => {
            desc => 'Password to use when connecting',
            dsn  => 'password',
            copy => 1,
         },
         P => {
            desc => 'Port number to use for connection',
            dsn  => 'port',
            copy => 1,
         },
         S => {
            desc => 'Socket file to use for connection',
            dsn  => 'mysql_socket',
            copy => 1,
         },
         u => {
            desc => 'User for login if not current user',
            dsn  => 'user',
            copy => 1,
         },
      },
   };
   foreach my $opt ( @opts ) {
      $self->{opts}->{$opt->{key}} = { desc => $opt->{desc}, copy => $opt->{copy} };
   }
   return bless $self, $class;
}

sub prop {
   my ( $self, $prop, $value ) = @_;
   if ( @_ > 2 ) {
      $self->{$prop} = $value;
   }
   return $self->{$prop};
}

sub parse {
   my ( $self, $dsn, $prev, $defaults ) = @_;
   return unless $dsn;
   $prev     ||= {};
   $defaults ||= {};
   my %vals;
   my %opts = %{$self->{opts}};
   if ( $dsn !~ m/=/ && $self->prop('autokey') ) {
      $dsn = $self->prop('autokey') . "=$dsn";
   }
   my %hash = map { m/^(.)=(.*)$/g } split(/,/, $dsn);
   foreach my $key ( keys %opts ) {
      $vals{$key} = $hash{$key};
      if ( !defined $vals{$key} && defined $prev->{$key} && $opts{$key}->{copy} ) {
         $vals{$key} = $prev->{$key};
      }
      if ( !defined $vals{$key} ) {
         $vals{$key} = $defaults->{$key};
      }
   }
   foreach my $key ( keys %hash ) {
      die "Unrecognized DSN part '$key' in '$dsn'\n"
         unless exists $opts{$key};
   }
   if ( (my $required = $self->prop('required')) ) {
      foreach my $key ( keys %$required ) {
         die "Missing DSN part '$key' in '$dsn'\n" unless $vals{$key};
      }
   }
   return \%vals;
}

sub as_string {
   my ( $self, $dsn ) = @_;
   return $dsn unless ref $dsn;
   return join(',',
      map  { "$_=" . ($_ eq 'p' ? '...' : $dsn->{$_}) }
      grep { defined $dsn->{$_} && $self->{opts}->{$_} }
      sort keys %$dsn );
}

sub usage {
   my ( $self ) = @_;
   my $usage
      = "DSN syntax is key=value[,key=value...]  Allowable DSN keys:\n"
      . "  KEY  COPY  MEANING\n"
      . "  ===  ====  =============================================\n";
   my %opts = %{$self->{opts}};
   foreach my $key ( sort keys %opts ) {
      $usage .= "  $key    "
             .  ($opts{$key}->{copy} ? 'yes   ' : 'no    ')
             .  ($opts{$key}->{desc} || '[No description]')
             . "\n";
   }
   if ( (my $key = $self->prop('autokey')) ) {
      $usage .= "  If the DSN is a bareword, the word is treated as the '$key' key.\n";
   }
   return $usage;
}

sub get_cxn_params {
   my ( $self, $info ) = @_;
   my $dsn;
   my %opts = %{$self->{opts}};
   my $driver = $self->prop('dbidriver') || '';
   if ( $driver eq 'Pg' ) {
      $dsn = 'DBI:Pg:dbname=' . ( $info->{D} || '' ) . ';'
         . join(';', map  { "$opts{$_}->{dsn}=$info->{$_}" }
                     grep { defined $info->{$_} }
                     qw(h P));
   }
   else {
      $dsn = 'DBI:mysql:' . ( $info->{D} || '' ) . ';'
         . join(';', map  { "$opts{$_}->{dsn}=$info->{$_}" }
                     grep { defined $info->{$_} }
                     qw(F h P S))
         . ';mysql_read_default_group=mysql';
   }
   return ($dsn, $info->{u}, $info->{p});
}

1;

# ###########################################################################
# End DSNParser package
# ###########################################################################

# ###########################################################################
# VersionParser package 1149
# ###########################################################################
use strict;
use warnings FATAL => 'all';

package VersionParser;

sub new {
   my ( $class ) = @_;
   bless {}, $class;
}

sub parse {
   my ( $self, $str ) = @_;
   return sprintf('%03d%03d%03d', $str =~ m/(\d+)/g);
}

sub version_ge {
   my ( $self, $dbh, $target ) = @_;
   $self->{$dbh} ||= $self->parse(
      $dbh->selectrow_array('SELECT VERSION()'));
   return $self->{$dbh} ge $self->parse($target);
}

1;

# ###########################################################################
# End VersionParser package
# ###########################################################################

# ###########################################################################
# MySQLDump package 1238
# ###########################################################################
use strict;
use warnings FATAL => 'all';

package MySQLDump;

use English qw(-no_match_vars);

( our $before = <<'EOF') =~ s/^   //gm;
   /*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
   /*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
   /*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;
   /*!40101 SET NAMES utf8 */;
   /*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */;
   /*!40103 SET TIME_ZONE='+00:00' */;
   /*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */;
   /*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */;
   /*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;
   /*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */;
EOF

( our $after = <<'EOF') =~ s/^   //gm;
   /*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */;
   /*!40101 SET SQL_MODE=@OLD_SQL_MODE */;
   /*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */;
   /*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */;
   /*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
   /*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
   /*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
   /*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;
EOF

sub new {
   my ( $class ) = @_;
   my $self = bless {}, $class;
   return $self;
}

sub dump {
   my ( $self, $dbh, $quoter, $db, $tbl, $what ) = @_;

   if ( $what eq 'table' ) {
      my $ddl = $self->get_create_table($dbh, $quoter, $db, $tbl);
      if ( $ddl->[0] eq 'table' ) {
         return $before
            . 'DROP TABLE IF EXISTS ' . $quoter->quote($tbl) . ";\n"
            . $ddl->[1] . ";\n";
      }
      else {
         return 'DROP TABLE IF EXISTS ' . $quoter->quote($tbl) . ";\n"
            . '/*!50001 DROP VIEW IF EXISTS '
            . $quoter->quote($tbl) . "*/;\n/*!50001 "
            . $self->get_tmp_table($dbh, $quoter, $db, $tbl) . "*/;\n";
      }
   }
   elsif ( $what eq 'triggers' ) {
      my $trgs = $self->get_triggers($dbh, $quoter, $db, $tbl);
      if ( $trgs && @$trgs ) {
         my $result = $before . "\nDELIMITER ;;\n";
         foreach my $trg ( @$trgs ) {
            if ( $trg->{sql_mode} ) {
               $result .= "/*!50003 SET SESSION SQL_MODE=\"$trg->{sql_mode}\" */;;\n";
            }
            $result .= "/*!50003 CREATE */ ";
            if ( $trg->{definer} ) {
               my ( $user, $host )
                  = map { s/'/''/g; "'$_'"; }
                    split('@', $trg->{definer}, 2);
               $result .= "/*!50017 DEFINER=$user\@$host */ ";
            }
            $result .= sprintf("/*!50003 TRIGGER %s %s %s ON %s\nFOR EACH ROW %s */;;\n\n",
               $quoter->quote($trg->{trigger}),
               @{$trg}{qw(timing event)},
               $quoter->quote($trg->{table}),
               $trg->{statement});
         }
         $result .= "DELIMITER ;\n\n/*!50003 SET SESSION SQL_MODE=\@OLD_SQL_MODE */;\n\n";
         return $result;
      }
      else {
         return undef;
      }
   }
   elsif ( $what eq 'view' ) {
      my $ddl = $self->get_create_table($dbh, $quoter, $db, $tbl);
      return '/*!50001 DROP TABLE IF EXISTS ' . $quoter->quote($tbl) . "*/;\n"
         . '/*!50001 DROP VIEW IF EXISTS ' . $quoter->quote($tbl) . "*/;\n"
         . '/*!50001 ' . $ddl->[1] . "*/;\n";
   }
   else {
      die "You didn't say what to dump.";
   }
}

sub get_create_table {
   my ( $self, $dbh, $quoter, $db, $tbl ) = @_;
   if ( !$self->{tables}->{$db}->{$tbl} ) {
      $dbh->do('/*!40101 SET @OLD_SQL_MODE := @@SQL_MODE, '
         . '@@SQL_MODE := REPLACE(REPLACE(@@SQL_MODE, "ANSI_QUOTES", ""), ",,", ","), '
         . '@OLD_QUOTE := @@SQL_QUOTE_SHOW_CREATE, '
         . '@@SQL_QUOTE_SHOW_CREATE := 1 */');
      my $href = $dbh->selectrow_hashref(
         "SHOW CREATE TABLE "
         . $quoter->quote($db)
         . '.'
         . $quoter->quote($tbl)
      );
      $dbh->do('/*!40101 SET @@SQL_MODE := @OLD_SQL_MODE, '
         . '@@SQL_QUOTE_SHOW_CREATE := @OLD_QUOTE */');
      my ($key) = grep { m/create table/i } keys %$href;
      if ( $key ) {
         $self->{tables}->{$db}->{$tbl} = [ 'table', $href->{$key} ];
      }
      else {
         ($key) = grep { m/create view/i } keys %$href;
         $self->{tables}->{$db}->{$tbl} = [ 'view', $href->{$key} ];
      }
   }
   return $self->{tables}->{$db}->{$tbl};
}

sub get_columns {
   my ( $self, $dbh, $quoter, $db, $tbl ) = @_;
   if ( !$self->{columns}->{$db}->{$tbl} ) {
      my $cols = $dbh->selectall_arrayref(
         "SHOW COLUMNS FROM "
         . $quoter->quote($db)
         . '.'
         . $quoter->quote($tbl),
         { Slice => {} }
      );
      $self->{columns}->{$db}->{$tbl} = [
         map {
            my %row;
            @row{ map { lc $_ } keys %$_ } = values %$_;
            \%row;
         } @$cols
      ];
   }
   return $self->{columns}->{$db}->{$tbl};
}

sub get_tmp_table {
   my ( $self, $dbh, $quoter, $db, $tbl ) = @_;
   my $result = 'CREATE TABLE ' . $quoter->quote($tbl) . " (\n";
   $result .= join(",\n",
      map { '  ' . $quoter->quote($_->{field}) . ' ' . $_->{type} }
      @{$self->get_columns($dbh, $quoter, $db, $tbl)});
   $result .= "\n)";
}

sub get_triggers {
   my ( $self, $dbh, $quoter, $db, $tbl ) = @_;
   if ( !$self->{triggers}->{$db} ) {
      $self->{triggers}->{$db} = {};
      $dbh->do('/*!40101 SET @OLD_SQL_MODE := @@SQL_MODE, '
         . '@@SQL_MODE := REPLACE(REPLACE(@@SQL_MODE, "ANSI_QUOTES", ""), ",,", ","), '
         . '@OLD_QUOTE := @@SQL_QUOTE_SHOW_CREATE, '
         . '@@SQL_QUOTE_SHOW_CREATE := 1 */');
      my $trgs = $dbh->selectall_arrayref(
         "SHOW TRIGGERS FROM " . $quoter->quote($db),
         { Slice => {} }
      );
      foreach my $trg ( @$trgs ) {
         my %trg;
         @trg{ map { lc $_ } keys %$trg } = values %$trg;
         push @{$self->{triggers}->{$db}->{$trg{table}}}, \%trg;
      }
      $dbh->do('/*!40101 SET @@SQL_MODE := @OLD_SQL_MODE, '
         . '@@SQL_QUOTE_SHOW_CREATE := @OLD_QUOTE */');
   }
   return $self->{triggers}->{$db}->{$tbl};
}

1;

# ###########################################################################
# End MySQLDump package
# ###########################################################################

# ###########################################################################
# TableChunker package 1267
# ###########################################################################
use strict;
use warnings FATAL => 'all';

package TableChunker;

use POSIX qw(ceil);
use List::Util qw(min max);

sub new {
   bless {}, shift;
}

my $EPOCH      = '1970-01-01';
my %int_types  = map { $_ => 1 }
   qw( bigint date datetime int mediumint smallint time timestamp tinyint year );
my %real_types = map { $_ => 1 }
   qw( decimal double float );

sub find_chunk_columns {
   my ( $self, $table, $opts ) = @_;
   $opts ||= {};

   my @candidate_cols;

   my @possible_keys = grep { $_->{type} eq 'BTREE' } values %{$table->{keys}};

   my $can_chunk_exact = 0;
   if ($opts->{exact}) {
      @candidate_cols =
         grep {
            $int_types{$table->{type_for}->{$_}}
            || $real_types{$table->{type_for}->{$_}}
         }
         map  { $_->{cols}->[0] }
         grep { $_->{unique} && @{$_->{cols}} == 1 }
              @possible_keys;
      if ( @candidate_cols ) {
         $can_chunk_exact = 1;
      }
   }

   if ( !@candidate_cols ) {
      @candidate_cols =
         grep {
            $int_types{$table->{type_for}->{$_}}
            || $real_types{$table->{type_for}->{$_}}
         }
         map { $_->{cols}->[0] }
         @possible_keys;
   }

   my @result;
   if ( $table->{keys}->{PRIMARY} ) {
      my $pk_first_col = $table->{keys}->{PRIMARY}->{cols}->[0];
      @result = grep { $_ eq $pk_first_col } @candidate_cols;
      @candidate_cols = grep { $_ ne $pk_first_col } @candidate_cols;
   }
   my $i = 0;
   my %col_pos = map { $_ => $i++ } @{$table->{cols}};
   push @result, sort { $col_pos{$a} <=> $col_pos{$b} } @candidate_cols;

   return ($can_chunk_exact, \@result);
}

sub calculate_chunks {
   my ( $self, %args ) = @_;
   foreach my $arg ( qw(table col min max rows_in_range size dbh) ) {
      die "Required argument $arg not given or undefined"
         unless defined $args{$arg};
   }

   my @chunks;
   my ($range_func, $start_point, $end_point);
   my $col_type = $args{table}->{type_for}->{$args{col}};


   if ( $col_type =~ m/(?:int|year|float|double|decimal)$/ ) {
      $start_point = $args{min};
      $end_point   = $args{max};
      $range_func  = 'range_num';
   }
   elsif ( $col_type eq 'timestamp' ) {
      ($start_point, $end_point) = $args{dbh}->selectrow_array(
         "SELECT UNIX_TIMESTAMP('$args{min}'), UNIX_TIMESTAMP('$args{max}')");
      $range_func  = 'range_timestamp';
   }
   elsif ( $col_type eq 'date' ) {
      ($start_point, $end_point) = $args{dbh}->selectrow_array(
         "SELECT TO_DAYS('$args{min}'), TO_DAYS('$args{max}')");
      $range_func  = 'range_date';
   }
   elsif ( $col_type eq 'time' ) {
      ($start_point, $end_point) = $args{dbh}->selectrow_array(
         "SELECT TIME_TO_SEC('$args{min}'), TIME_TO_SEC('$args{max}')");
      $range_func  = 'range_time';
   }
   elsif ( $col_type eq 'datetime' ) {
      $start_point = $self->timestampdiff($args{dbh}, $args{min});
      $end_point   = $self->timestampdiff($args{dbh}, $args{max});
      $range_func  = 'range_datetime';
   }
   else {
      die "I don't know how to chunk $col_type\n";
   }

   if ( !defined $start_point ) {
      $start_point = 0;
   }
   if ( !defined $end_point || $end_point < $start_point ) {
      $end_point = 0;
   }

   my $interval = $args{size} * ($end_point - $start_point) / $args{rows_in_range};
   if ( $int_types{$col_type} ) {
      $interval = ceil($interval);
   }
   $interval ||= $args{size};
   if ( $args{exact} ) {
      $interval = $args{size};
   }

   my $col = "`$args{col}`";
   if ( $start_point < $end_point ) {
      my ( $beg, $end );
      my $iter = 0;
      for ( my $i = $start_point; $i < $end_point; $i += $interval ) {
         ( $beg, $end ) = $self->$range_func($args{dbh}, $i, $interval, $end_point);

         if ( $iter++ == 0 ) {
            push @chunks, "$col < " . $self->quote($end);
         }
         else {
            push @chunks, "$col >= " . $self->quote($beg) . " AND $col < " . $self->quote($end);
         }
      }

      my $nullable = $args{table}->{is_nullable}->{$args{col}};
      pop @chunks;
      if ( @chunks ) {
         push @chunks, "$col >= " . $self->quote($beg);
      }
      else {
         push @chunks, $nullable ? "$col IS NOT NULL" : '1=1';
      }
      if ( $nullable ) {
         push @chunks, "$col IS NULL";
      }

   }
   else {
      push @chunks, '1=1';
   }

   return @chunks;
}

sub get_first_chunkable_column {
   my ( $self, $table, $opts ) = @_;
   my ($exact, $cols) = $self->find_chunk_columns($table, $opts);
   return $cols->[0];
}

sub size_to_rows {
   my ( $self, $dbh, $db, $tbl, $size, $cache ) = @_;
   my $avg_row_length;
   my $status;
   if ( !$cache || !($status = $cache->{$db}->{$tbl}) ) {
      $tbl =~ s/_/\\_/g;
      my $sth = $dbh->prepare(
         "SHOW TABLE STATUS FROM `$db` LIKE '$tbl'");
      $sth->execute;
      $status = $sth->fetchrow_hashref();
      if ( $cache ) {
         $cache->{$db}->{$tbl} = $status;
      }
   }
   my ($key) = grep { /avg_row_length/i } keys %$status;
   $avg_row_length = $status->{$key};
   return $avg_row_length ? ceil($size / $avg_row_length) : undef;
}

sub get_range_statistics {
   my ( $self, $dbh, $db, $tbl, $col, $opts ) = @_;
   my ( $min, $max ) = $dbh->selectrow_array(
      "SELECT MIN(`$col`), MAX(`$col`) FROM `$db`.`$tbl`");
   my $expl = $dbh->selectrow_hashref(
      "EXPLAIN SELECT * FROM `$db`.`$tbl");
   return (
      min           => $min,
      max           => $max,
      rows_in_range => $expl->{rows},
   );
}

sub quote {
   my ( $self, $val ) = @_;
   return $val =~ m/\d[:-]/ ? qq{"$val"} : $val;
}

sub inject_chunks {
   my ( $self, %args ) = @_;
   foreach my $arg ( qw(database table chunks chunk_num) ) {
      die "$arg is required" unless defined $args{$arg};
   }
   my $comment = sprintf("/*%s.%s:%d/%d*/",
      $args{database}, $args{table},
      $args{chunk_num} + 1, scalar @{$args{chunks}});
   $args{query} =~ s!/\*progress_comment\*/!$comment!;
   my $where = "WHERE (" . $args{chunks}->[$args{chunk_num}] . ')';
   if ( $args{where} ) {
      $where .= " AND ($args{where})";
   }
   $args{query} =~ s!/\*WHERE\*/! $where!;
   return $args{query};
}

sub range_num {
   my ( $self, $dbh, $start, $interval, $max ) = @_;
   my $end = min($max, $start + $interval);
   $start =~ s/\.(\d{5}).*$/.$1/;
   $end   =~ s/\.(\d{5}).*$/.$1/;
   if ( $end > $start ) {
      return ( $start, $end );
   }
   else {
      die "Chunk size is too small: $end !> $start\n";
   }
}

sub range_time {
   my ( $self, $dbh, $start, $interval, $max ) = @_;
   return $dbh->selectrow_array(
      "SELECT SEC_TO_TIME($start), SEC_TO_TIME(LEAST($max, $start + $interval))");
}

sub range_date {
   my ( $self, $dbh, $start, $interval, $max ) = @_;
   return $dbh->selectrow_array(
      "SELECT FROM_DAYS($start), FROM_DAYS(LEAST($max, $start + $interval))");
}

sub range_datetime {
   my ( $self, $dbh, $start, $interval, $max ) = @_;
   return $dbh->selectrow_array(
      "SELECT DATE_ADD('$EPOCH', INTERVAL $start SECOND),
       DATE_ADD('$EPOCH', INTERVAL LEAST($max, $start + $interval) SECOND)");
}

sub range_timestamp {
   my ( $self, $dbh, $start, $interval, $max ) = @_;
   return $dbh->selectrow_array(
      "SELECT FROM_UNIXTIME($start), FROM_UNIXTIME(LEAST($max, $start + $interval))");
}

sub timestampdiff {
   my ( $self, $dbh, $time ) = @_;
   my ( $diff ) = $dbh->selectrow_array(
      "SELECT (TO_DAYS('$time') * 86400 + TIME_TO_SEC('$time')) "
      . "- TO_DAYS('$EPOCH 00:00:00') * 86400");
   my ( $check ) = $dbh->selectrow_array(
      "SELECT DATE_ADD('$EPOCH', INTERVAL $diff SECOND)");
   die <<"   EOF"
   Incorrect datetime math: given $time, calculated $diff but checked to $check.
   This is probably because you are using a version of MySQL that overflows on
   large interval values to DATE_ADD().  If not, please report this as a bug.
   EOF
      unless $check eq $time;
   return $diff;
}

1;

# ###########################################################################
# End TableChunker package
# ###########################################################################

# ###########################################################################
# Quoter package 1149
# ###########################################################################
use strict;
use warnings FATAL => 'all';

package Quoter;

sub new {
   my ( $class ) = @_;
   bless {}, $class;
}

sub quote {
   my ( $self, @vals ) = @_;
   foreach my $val ( @vals ) {
      $val =~ s/`/``/g;
   }
   return join('.', map { '`' . $_ . '`' } @vals);
}

1;

# ###########################################################################
# End Quoter package
# ###########################################################################

# ###########################################################################
# MySQLFind package 1230
# ###########################################################################
use strict;
use warnings FATAL => 'all';

package MySQLFind;


use English qw(-no_match_vars);

sub new {
   my ( $class, %opts ) = @_;
   my $self = bless \%opts, $class;
   $self->{engines}->{views} = 1 unless defined $self->{engines}->{views};
   die "Specify dbh" unless $opts{dbh};
   if ( $opts{useddl} ) {
      die "Specifying useddl requires parser and dumper"
         unless $opts{parser} && $opts{dumper};
   }
   if ( $opts{tables}->{status} ) {
      ($self->{timestamp}->{now})
         = $opts{dbh}->selectrow_array('SELECT CURRENT_TIMESTAMP');
   }
   return $self;
}

sub find_databases {
   my ( $self ) = @_;
   return grep {
      $_ !~ m/^(information_schema|lost\+found)$/i
   }
   $self->_filter('databases', sub { $_[0] }, $self->_fetch_db_list());
}

sub _fetch_db_list {
   my ( $self ) = @_;
   my $sql = 'SHOW DATABASES';
   my @params;
   if ( $self->{databases}->{like} ) {
      $sql .= ' LIKE ?';
      push @params, $self->{databases}->{like};
   }
   my $sth = $self->{dbh}->prepare($sql);
   $sth->execute( @params );
   return map { $_->[0] } @{$sth->fetchall_arrayref()};
}

sub find_tables {
   my ( $self, %opts ) = @_;
   my $views = $self->{engines}->{views};
   my @tables 
      = $self->_filter('engines', sub { $_[0]->{engine} },
         $self->_filter('tables', sub { $_[0]->{name} },
            $self->_fetch_tbl_list(%opts)));
   @tables = grep {
         ( $views || ($_->{engine} ne 'VIEW') )
      } @tables;
   foreach my $crit ( @{$self->{tables}->{status}} ) {
      my ($key, $test) = %$crit;
      @tables
         = grep {
            $self->_test_date($_, $key, $test)
         } @tables;
   }
   return map { $_->{name} } @tables;
}

sub _fetch_tbl_list {
   my ( $self, %opts ) = @_;
   die "database is required" unless $opts{database};
   my $need_engine = $self->{engines}->{permit}
        || $self->{engines}->{reject}
        || $self->{engines}->{regexp};
   my $need_status = $self->{tables}->{status};
   my @params;
   if ( $need_status || ($need_engine && !$self->{useddl}) ) {
      my $sql = "SHOW TABLE STATUS FROM "
              . $self->{quoter}->quote($opts{database});
      if ( $self->{tables}->{like} ) {
         $sql .= ' LIKE ?';
         push @params, $self->{tables}->{like};
      }
      my $sth = $self->{dbh}->prepare($sql);
      $sth->execute(@params);
      my @tables = @{$sth->fetchall_arrayref({})};
      return map {
         my %tbl; # Make a copy with lowercased keys
         @tbl{ map { lc $_ } keys %$_ } = values %$_;
         $tbl{engine} ||= $tbl{type} || $tbl{comment};
         delete $tbl{type};
         \%tbl;
      } @tables;
   }
   else {
      my $sql = "SHOW /*!50002 FULL*/ TABLES FROM "
              . $self->{quoter}->quote($opts{database});
      if ( $self->{tables}->{like} ) {
         $sql .= ' LIKE ?';
         push @params, $self->{tables}->{like};
      }
      my $sth = $self->{dbh}->prepare($sql);
      $sth->execute(@params);
      my @tables = @{$sth->fetchall_arrayref()};
      my @result;
      foreach my $tbl ( @tables ) {
         my $engine = '';
         if ( ($tbl->[1] || '') eq 'VIEW' ) {
            $engine = 'VIEW';
         }
         elsif ( $need_engine ) {
            my $struct = $self->{parser}->parse(
               $self->{dumper}->get_create_table(
                  $self->{dbh}, $self->{quoter}, $opts{database}, $tbl->[0]));
            $engine = $struct->{engine};
         }
         push @result,
         {  name   => $tbl->[0],
            engine => $engine,
         }
      }
      return @result;
   }
}

sub _filter {
   my ( $self, $thing, $sub, @vals ) = @_;
   my $permit = $self->{$thing}->{permit};
   my $reject = $self->{$thing}->{reject};
   my $regexp = $self->{$thing}->{regexp};
   return grep {
      my $val = $sub->($_);
      $val = '' unless defined $val;
      ( !$reject || !$reject->{$val} )
         && ( !$permit ||  $permit->{$val} )
         && ( !$regexp ||  $val =~ m/$regexp/ )
   } @vals
}

sub _test_date {
   my ( $self, $table, $prop, $test ) = @_;
   $prop = lc $prop;
   if ( !defined $table->{$prop} ) {
      return $self->{nullpass};
   }
   my ( $equality, $num ) = $test =~ m/^([+-])?(\d+)$/;
   die "Invalid date test $test for $prop" unless defined $num;
   ($self->{timestamp}->{$num})
      ||= $self->{dbh}->selectrow_array(
         "SELECT DATE_SUB('$self->{timestamp}->{now}', INTERVAL $num SECOND)");
   my $time = $self->{timestamp}->{$num};
   return 
         ( $equality eq '-' && $table->{$prop} gt $time )
      || ( $equality eq '+' && $table->{$prop} lt $time )
      || (                     $table->{$prop} eq $time );
}

1;

# ###########################################################################
# End MySQLFind package
# ###########################################################################
package main;

use DBI;
use English qw(-no_match_vars);
use List::Util qw(max);
use POSIX qw(ceil);

our $VERSION = '1.1.20';
our $DISTRIB = '1316';
our $SVN_REV = sprintf("%d", q$Revision: 1311 $ =~ m/(\d+)/g || 0);

$OUTPUT_AUTOFLUSH = 1;

# ############################################################################
# Get configuration information.
# ############################################################################

my @opt_spec = (
   { s => 'algorithm|a=s',
     d => 'Checksum algorithm (ACCUM|CHECKSUM|BIT_XOR)' },
   { s => 'askpass',
     d => 'Prompt for username and password for connections' },
   { s => 'checksum',
     d => 'Print checksums and table names in the style of '
        . 'md5sum (disables --count)' },
   { s => 'chunksize|C=s',
     d => 'Approximate number of rows or size of '
        . 'data to checksum at a time; allowable '
        . 'suffixes are k, M, G (disallows -a CHECKSUM)' },
   { s => 'columns=a',
     d => 'Do only this comma-separated list of columns' },
   { s => 'databases|d=h',
     d => 'Do only this comma-separated list of databases' },
   { s => 'defaults-file|F=s',
     d => 'Only read mysql options from the given file' },
   { s => 'emptyrepltbl',
     d => 'Empty table given by --replicate before starting' },
   { s => 'engine|e=h',
     d => 'Do only this comma-separated list of storage engines' },
   { s => 'explain',
     d => 'Show, but do not execute, checksum queries' },
   { s => 'explainhosts',
     d => 'Print connection information and exit' },
   { s => 'float-precision=i',
     d => 'Precision for FLOAT and DOUBLE column comparisons' },
   { s => 'function|f=s',
     d => 'Hash function for checksums (SHA1, MD5...)' },
   { s => 'ignoredb|g=H',
     d => 'Ignore this comma-separated list of databases' },
   { s => 'ignoreengine|E=H',
     d => 'Ignore this comma-separated list of storage engines '
        . '(default FEDERATED,MRG_MyISAM)' },
   { s => 'ignoretbl|n=H',
     d => 'Ignore this comma-separated list of tables' },
   { s => 'lock|k',
     d => 'Lock table on master until done on slaves (implies -l)' },
   { s => 'count|r!',
     d => 'Count rows in tables.  This is built into '
        . 'ACCUM and BIT_XOR, but requires an extra query for CHECKSUM' },
   { s => 'crc|c!',
     d => 'Do a CRC (checksum) of tables (default)' },
   { s => 'optxor|o!',
     d => 'Optimize BIT_XOR with user variables (default)'},
   { s => 'password|p=s',
     d => 'Password to use when connecting' },
   { s => 'port|P=i',
     d => 'Port number to use for connection' },
   { s => 'quiet|q',
     d => 'Do not print checksum results' },
   { s => 'recursecheck',
     d => 'Do --replcheck recursively (implies --replcheck)' },
   { s => 'replcheck',
     d => 'Connect to slaves and check results (requires --replicate)' },
   { s => 'replicate|R=s',
     d => 'Replicate checksums to slaves (disallows -a CHECKSUM)' },
   { s => 'slavelag|l',
     d => 'Report how far slaves lag master' },
   { s => 'separator|s=s',
     d => 'Separator for CONCAT_WS() (default #)' },
   { s => 'sleep=i',
     d => 'Sleep time between checksums' },
   { s => 'sleep-coef=f',
     d => 'Sleep time as a multiple of last checksum time' },
   { s => 'socket|S=s',
     d => 'Socket file to use for connection' },
   { s => 'tab|b',
     d => 'Print tab-separated output' },
   { s => 'tables|t=h',
     d => 'Do only this comma-separated list of tables' },
   { s => 'user|u=s',
     d => 'User for login if not current user' },
   { s => 'verify|v!',
     d => 'Verify checksum compatibility across servers (default)' },
   { s => 'wait|w=m',
     d => 'Wait this long for slaves to catch up to their master '
        . '(implies -kl).  Possible suffixes are s/m/h/d.' },
   { s => 'where|W=s',
     d => 'Do only rows matching this WHERE clause (disallows -a CHECKSUM)' },
);

my $vp         = new VersionParser();
my $dsn_parser = new DSNParser();
$dsn_parser->prop('autokey', 'h');
$dsn_parser->prop('required', { h => 1 } );

my $opt_parser = new OptionParser(@opt_spec);
$opt_parser->{dsn}    = $dsn_parser;
$opt_parser->{strict} = 0;
$opt_parser->{prompt} = '[OPTION]... HOST [HOST...]';
$opt_parser->{descr}
   = 'checksums MySQL tables efficiently on one or more HOSTs.  Each HOST is '
   . 'specified as a DSN and missing values are inherited from the first '
   . 'HOST.  If you specify multiple HOSTs, the first is assumed to be the '
   . 'master.';
my %opts = $opt_parser->parse();

# Post-process command-line options and arguments.
$opts{k} ||= defined $opts{w};
$opts{l} ||= $opts{k};
$opts{replcheck} ||= $opts{recursecheck};

if ( !@ARGV ) {
   $opt_parser->error("No hosts specified.");
}

my @hosts;
foreach my $arg ( unique(@ARGV) ) {
   push @hosts, $dsn_parser->parse($arg, $hosts[0], \%opts);
}

if ( $opts{explainhosts} ) {
   foreach my $host ( @hosts ) {
      print "Server $host->{h}:\n   ", $dsn_parser->as_string($host), "\n";
   }
   exit(0);
}

if ( $opts{checksum} ) {
   $opts{r} = 0;
}

if ( $opts{explain} ) {
   @hosts = $hosts[0];
}

if ( !$opts{help} ) {
   if ( $opts{replcheck} && !$opts{R} ) {
      $opt_parser->error("--replcheck requires --replicate.");
   }
   elsif ( $opts{R} && @hosts > 1 ) {
      $opt_parser->error("You can only specify one host with --replicate.");
   }
   if ( $opts{C} && $opts{C} !~ m/^[1-9]\d*[MGk]?$/ ) {
      $opt_parser->error("Invalid --chunksize argument");
   }
}

$opt_parser->usage_or_errors(%opts);

# ############################################################################
# Ready to work now.
# ############################################################################
my $tc          = new TableChecksum();
my $exit_status = 0;
my $main_dbh    = get_dbh($hosts[0], ($opts{d} ? keys %{$opts{d}} : '' ));

# ############################################################################
# Check replication slaves if desired.
# ############################################################################
if ( $opts{replcheck} ) {
   my @headers = qw(db tbl chunk cnt_diff crc_diff boundaries);
   $tc->check_server(
      {  dbh        => $main_dbh,
         dsn        => $hosts[0],
         dsn_parser => $dsn_parser,
         recurse    => $opts{recursecheck} ? undef : 1,
         table      => $opts{R},
         callback   => sub {
            my ( $host, @differences ) = @_;
            $exit_status = 1;
            if ( !$opts{q} ) {
               print "Differences on " . $dsn_parser->as_string($host) . "\n";
               my $max_db   = max(5, map { length($_->{db})  } @differences);
               my $max_tbl  = max(5, map { length($_->{tbl}) } @differences);
               my $fmt      = "%-${max_db}s %-${max_tbl}s %5s %8s %8s %s\n";
               printf($fmt, map { uc } @headers);
               foreach my $diff ( @differences ) {
                  printf($fmt, @{$diff}{@headers});
               }
               print "\n";
            }
         }
      }
   );
   exit($exit_status);
}

# ############################################################################
# Otherwise do the checksums.
# ############################################################################
my $strat = $tc->best_algorithm(
   algorithm   => $opts{a},
   vp          => $vp,
   dbh         => $main_dbh,
   where       => $opts{W},
   chunk       => $opts{C},
   replicate   => $opts{R},
   count       => $opts{r},
);

if ( $opts{a} && $opts{a} ne $strat ) {
   warn "--algorithm=$opts{a} can't be used; falling back to $strat\n";
}

# ############################################################################
# If using a cryptographic hash strategy, decide what hash function to use, and
# if using BIT_XOR, whether and which slice to place the user variable in.
# ############################################################################
my $func;
my $opt_slice;
my $crc_wid = 16;
if ( $tc->is_hash_algorithm($strat) ) {
   $func = $tc->choose_hash_func(
      func => $opts{f},
      dbh  => $main_dbh,
   );
   if ( $opts{f} && $opts{f} ne $func ) {
      warn "Checksum function $opts{f} cannot be used; using $func\n";
   }
   $crc_wid = max(16, length(
      $main_dbh->selectall_arrayref("SELECT $func('a')")->[0]->[0]));

   if ( $opts{o} && $strat eq 'BIT_XOR' ) {
      $opt_slice = $tc->optimize_xor(dbh => $main_dbh, func => $func);
      if ( !defined $opt_slice ) {
         warn "Cannot use --optxor, disabling.\n";
         $opts{o} = 0;
      }
   }
}

# ############################################################################
# Verify that CONCAT_WS is compatible across all servers.  On older versions
# of MySQL it skips both empty strings and NULL; on newer just NULL.
# ############################################################################
if ( $opts{v} && @hosts > 1 ) {
   my @verify_sums;
   foreach my $host ( @hosts ) {
      my $dbh = get_dbh($host, ($opts{d} ? keys %{$opts{d}} : '' ));
      my $cks = $dbh->selectall_arrayref(
         "SELECT MD5(CONCAT_WS(',', '1', ''))")->[0]->[0];
      push @verify_sums, {
         host => $host->{h},
         ver  => $dbh->{mysql_serverinfo},
         sum  => $cks
      };
   }
   if ( unique(map { $_->{sum} } @verify_sums ) > 1 ) {
      my $max = max(map { length($_->{h}) } @hosts);
      die "Not all servers have compatible versions.  Some return different\n"
         . "checksum values for the same query, and cannot be compared.  This\n"
         . "behavior changed in MySQL 4.0.14.  Here is info on each host:\n\n"
         . join("\n",
            map {
               sprintf("%-${max}s %-32s %s", @{$_}{qw(host sum ver)})
            }
               { host => 'HOST', sum => 'CHECKSUM', ver => 'VERSION'},
               @verify_sums
         )
         . "\n\nYou can disable this check with --noverify.\n";
   }
}

# ############################################################################
# Check for existence and privileges on the replication table before starting,
# and prepare the statements that will be used to update it.
# ############################################################################
my ($fetch_sth, $update_sth);
if ( $opts{R} ) {
   $main_dbh->do("REPLACE INTO $opts{R} SELECT * FROM $opts{R} LIMIT 0");
   if ( $opts{emptyrepltbl} ) {
      # Clean out the replication table.
      my $del_sql = "DELETE FROM $opts{R}";
      if ( $opts{d} ) {
         $del_sql .= " WHERE db IN("
                  . join(',', map { $main_dbh->quote($_) } keys %{$opts{d}})
                  . ')';
      }
      if ( $opts{t} ) {
         $del_sql .= $opts{d} ? ' AND ' : ' WHERE ';
         $del_sql .= "tbl IN("
                  . join(',', map { $main_dbh->quote($_) } keys %{$opts{t}})
                  . ')';
      }
      $main_dbh->do($del_sql);
   }
   $fetch_sth = $main_dbh->prepare(
      "SELECT this_crc, this_cnt FROM $opts{R} "
      . "WHERE db = ? AND tbl = ? AND chunk = ?");
   $update_sth = $main_dbh->prepare(
      "UPDATE $opts{R} SET master_crc = ?, master_cnt = ? "
      . "WHERE db = ? AND tbl = ? AND chunk = ?");
}

# ############################################################################
# Iterate through databases and tables and do the checksums.
# ############################################################################
my $md5sum_fmt = "%-${crc_wid}s  %s.%s.%s.%d\n";
my $q      = new Quoter();
my $tp     = new TableParser();
my $du     = new MySQLDump();
my $ch     = new TableChunker();
my $finder = new MySQLFind(
   dbh       => $main_dbh,
   quoter    => $q,
   useddl    => 1,
   parser    => $tp,
   dumper    => $du,
   databases => {
      permit => $opts{d},
      reject => $opts{g},
   },
   tables => {
      permit => $opts{t},
      reject => $opts{n},
   },
   engines => {
      views  => 0,
      permit => $opts{e},
      reject => $opts{E},
   },
);

DATABASE:
foreach my $database ( $finder->find_databases() ) {
   my @tables;
   foreach my $table ( $finder->find_tables(database => $database) ) {

      # Skip the table in which checksums are stored.
      if ( !$opts{R} || $opts{R} ne "$database.$table" ) {

         eval { # Catch errors caused by tables being dropped during work.

            my $struct = $tp->parse(
               $du->get_create_table($main_dbh, $q, $database, $table));

            my @chunks = '1=1'; # Default.
            if ( $opts{C} ) {
               # Figure out whether the chunksize is a number of rows or a
               # data size. TODO: this is also in parallel-dump, put it into
               # optionparser? or put it into the chunker?
               my $rows_per_chunk;
               my ( $num, $suffix ) = $opts{C} =~ m/^(\d+)([MGk])$/;
               if ( $suffix ) {
                  # Figure out how many rows fit into this many bytes
                  my $size = $suffix eq 'k' ? 1_024
                           : $suffix eq 'M' ? 1_024 * 1_024
                           :                  1_024 * 1_024 * 1_024;
                  $rows_per_chunk = $ch->size_to_rows(
                     $main_dbh, $database, $table, $size * $num);
               }
               else {
                  $rows_per_chunk = $num;
               }

               if ( $rows_per_chunk ) {
                  # Get the chunk column candidates and calculate chunks for
                  # this table.
                  my $col   = $ch->get_first_chunkable_column($struct);
                  if ( $col ) {
                     my %params = $ch->get_range_statistics(
                        $main_dbh, $database, $table, $col);

                     if ( !grep { !defined $params{$_} }
                           qw(min max rows_in_range) )
                     {
                        @chunks = $ch->calculate_chunks(
                           dbh      => $main_dbh,
                           table    => $struct,
                           col      => $col,
                           size     => $rows_per_chunk,
                           %params,
                        );
                     }
                  }
               }
            }

            push @tables, {
               struct   => $struct,
               database => $database,
               table    => $table,
               chunks   => \@chunks,
            };

         };
         if ( $EVAL_ERROR ) {
            print_err($EVAL_ERROR, $database, $table);
         }

      }
   }

   next DATABASE unless @tables;

   # Design and print header
   my ($hdr, $explain);
   if ( $opts{b} ) {
      $hdr = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n";
      $explain = "%s\t%s\t%s\n";
   }
   else {
      my $max_tbl  = max(5, map { length($_->{table}) } @tables);
      my $max_db   = max(8, length($database));
      my $max_host = max(4, map { length($_->{h}) } @hosts);
      $hdr         = "%-${max_db}s %-${max_tbl}s %5s "
                   . "%-${max_host}s %-6s %10s %${crc_wid}s %4s %4s %4s %4s\n";
      $explain     = "%-${max_db}s %-${max_tbl}s %s\n";
   }

   my @hdr_args = qw(DATABASE TABLE CHUNK HOST ENGINE
                     COUNT CHECKSUM TIME WAIT STAT LAG);
   printf($hdr, @hdr_args)
      unless ($opts{q} || $opts{explain} || $opts{checksum});

   TABLE:
   foreach my $table ( @tables ) {
      eval {

         # ##################################################################
         # The query is independent of the chunk, so I make it once for every
         # one.
         # ##################################################################
         my $query = $tc->make_checksum_query(
            dbname    => $database,
            tblname   => $table->{table},
            table     => $table->{struct},
            quoter    => $q,
            algorithm => $strat,
            func      => $func,
            crc_wid   => $crc_wid,
            opt_slice => $opt_slice,
            cols      => $opts{columns},
            sep       => $opts{s},
            replicate => $opts{R},
         );

         # ##################################################################
         # This loop may seem suboptimal, because it causes a new child to be
         # forked for each table, for each host, for each chunk.  It also
         # causes the program to parallelize only within the chunk; that is,
         # no two child processes are running on different chunks at a time.
         # This is by design.  It lets me unlock the table on the master
         # between chunks.
         # ##################################################################
         foreach my $chunk_num ( 0 .. scalar(@{$table->{chunks}}) - 1 ) {

            my $chunk_start_time = time;

            if ( $opts{R} ) { # We're in --replicate mode.
               do_tbl_replicate($hosts[0], $query, $table,
                  $chunk_num, $hdr, $explain);
            }

            else {

               # Lock table and get position on the master.
               if ( !$opts{explain} ) {
                  if ( $opts{k} ) {
                     $main_dbh->do( "LOCK TABLES "
                        . $q->quote($database, $table) . " READ");
                  }
                  if ( defined $opts{w} ) {
                     my $master_status
                        = $main_dbh->selectrow_hashref('SHOW MASTER STATUS');
                     @{$table}{keys %$master_status} = values %$master_status;
                  }
               }

               my %children;
               foreach my $i ( 0 .. $#hosts ) {
                  my $is_master = $i == 0; # First host is assumed to be master.
                  my $host      = $hosts[$i];

                  # Open a single connection for each host.  Re-use the
                  # connection for the master/single host.
                  if ( $is_master ) {
                     $host->{dbh} ||= $main_dbh;
                  }
                  else {
                     $host->{dbh} ||= get_dbh($host);
                  }

                  # Fork, but only if there's more than one host.
                  my $pid = @hosts > 1 ? fork() : undef;
                  if ( @hosts == 1 || (defined($pid) && $pid == 0) ) {
                     # Do the work (I'm a child, or there's only one host)
                     eval {
                        do_tbl($host, $query, $table, $chunk_num,
                               $is_master, $hdr, $explain);
                     };
                     if ( $EVAL_ERROR ) {
                        print_err($EVAL_ERROR, $database, $table->{table},
                           $dsn_parser->as_string($host));
                        exit(1) if @hosts > 1; # exit only if I'm a child
                     }
                     exit(0) if @hosts > 1; # exit only if I'm a child
                  }
                  elsif ( @hosts > 1 && !defined($pid) ) {
                     die("Unable to fork!");
                  }
                  # I already exited if I'm a child, so I'm the parent.
                  $children{$host->{h}} = $pid if @hosts > 1;
               }

               # Wait for the children to exit.
               foreach my $host ( keys %children ) {
                  my $pid = waitpid($children{$host}, 0);
                  $exit_status ||= $CHILD_ERROR >> 8;
               }
               $main_dbh->do("UNLOCK TABLES") if ($opts{k} && !$opts{explain});
            }

            if ( $opts{sleep} && !$opts{explain} ) {
               sleep($opts{sleep});
            }
            elsif ( $opts{'sleep-coef'} && !$opts{explain} ) {
               my $sleep_time
                  = ceil( (time - $chunk_start_time) * $opts{'sleep-coef'} );
               sleep($sleep_time);
            }

         }
      };
      if ( $EVAL_ERROR ) {
         print_err($EVAL_ERROR, $database, $table->{table});
      }
   }
}

sub do_tbl_replicate {
   my ( $host, $query, $info, $chunk_num, $hdr, $explain ) = @_;
   my $dbh = $main_dbh;
   $dbh->do("USE " . $q->quote($info->{database}) );
   my $cnt = 'NULL';
   my $crc = 'NULL';
   my $beg = time();
   my $sql = $ch->inject_chunks(
      query     => $query,
      database  => $info->{database},
      table     => $info->{table},
      chunks    => $info->{chunks},
      chunk_num => $chunk_num,
      where     => $opts{W},
   );

   my $where = $info->{chunks}->[$chunk_num];
   if ( $opts{explain} ) {
      if ( $chunk_num == 0 ) {
         printf($explain, @{$info}{qw(database table)}, $sql);
      }
      printf($explain, @{$info}{qw(database table)}, $where);
      return;
   }

   $dbh->do('SET @crc := "", @cnt := 0 /*!50108 , '
          . '@@binlog_format := "STATEMENT"*/');
   $dbh->do($sql, {}, @{$info}{qw(database table)}, $chunk_num, $where);
   $fetch_sth->execute(@{$info}{qw(database table)}, $chunk_num);
   ( $crc, $cnt ) = $fetch_sth->fetchrow_array();
   $update_sth->execute($crc, $cnt, @{$info}{qw(database table)}, $chunk_num);

   my $end = time();
   $crc  ||= 'NULL';
   if (!$opts{q} && !$opts{explain}) {
      if ( $opts{checksum} ) {
         printf($md5sum_fmt, $crc, $host->{h},
            @{$info}{qw(database table)}, $chunk_num);
      }
      else {
         printf($hdr,
            @{$info}{qw(database table)}, $chunk_num,
            $host->{h}, $info->{struct}->{engine}, $cnt, $crc,
            $end - $beg, 'NULL', 'NULL', 'NULL')
      }
   }
}

sub do_tbl {
   my ( $host, $query, $info, $chunk_num, $is_master, $hdr, $explain ) = @_;
   my $dbh = $host->{dbh};
   $dbh->do("USE " . $q->quote($info->{database}) );
   my $cnt = 'NULL';
   my $crc = 'NULL';
   my $sta = 'NULL';
   my $lag = 'NULL';
   my $beg = time();

   # I'm a slave.  Wait to catch up to the master.  Calculate slave lag.
   if ( !$is_master && defined $opts{w} && !$opts{explain} ) {
      my $sql = $ch->inject_chunks(
         query     => "SELECT /*progress_comment*/ MASTER_POS_WAIT("
               . "'$info->{File}', $info->{Position}, $opts{w})",
         database  => $info->{database},
         table     => $info->{table},
         chunks    => $info->{chunks},
         chunk_num => $chunk_num,
      );
      $sta = $dbh->selectall_arrayref($sql)->[0]->[0];
      $sta = 'NULL' unless defined $sta;
   }
   if ( !$is_master && $opts{l} && !$opts{explain} ) {
      my $res = $dbh->selectrow_hashref("SHOW SLAVE STATUS");
      $lag = $res && defined $res->{Seconds_Behind_Master}
           ? $res->{Seconds_Behind_Master}
           : 'NULL';
   }

   # Time the checksum operation and the wait-for-master operation separately.
   my $mid = time();

   # Do the checksum operation.
   if ( $strat eq 'CHECKSUM' ) {
      if ( $opts{c} ) {
         $crc = do_checksum($dbh, $info, $query, $explain);
      }
      if ( $opts{r} ) {
         $cnt = do_count($dbh, $info, $chunk_num, $explain);
      }
   }
   elsif ( $opts{c} ) {
      ( $cnt, $crc ) = do_var_crc($dbh, $query, $info, $chunk_num, $explain);
      $crc ||= 'NULL';
   }
   else {
      $cnt = do_count($dbh, $info, $chunk_num, $explain);
   }

   my $end = time();
   if ( !$opts{q} && !$opts{explain} ) {
      if ( $opts{checksum} ) {
         printf($md5sum_fmt, $crc, $host->{h},
            @{$info}{qw(database table)}, $chunk_num);
      }
         else {
         printf($hdr,
            @{$info}{qw(database table)}, $chunk_num,
            $host->{h}, $info->{struct}->{engine}, $cnt, $crc,
            $end - $mid, $mid - $beg, $sta, $lag);
      }
   }
}

exit $exit_status;

# ############################################################################
# Subroutines
# ############################################################################

sub get_dbh {
   my ( $info ) = @_;
   if ( $opts{askpass} && !defined $info->{p} ) {
      $info->{p}
         = OptionParser::prompt_noecho("Enter password for $info->{h}: ");
   }

   my $db_options = {
      RaiseError => 1,
      PrintError => 0,
      AutoCommit => $opts{k} ? 0 : 1,
   };

   my $dbh = DBI->connect($dsn_parser->get_cxn_params($info), $db_options);
   $dbh->{InactiveDestroy} = 1; # Prevent destroying on fork.
   return $dbh;
}

sub do_var_crc {
   my ( $dbh, $query, $info, $chunk_num, $explain ) = @_;

   my $sql = $ch->inject_chunks(
      query     => $query,
      database  => $info->{database},
      table     => $info->{table},
      chunks    => $info->{chunks},
      chunk_num => $chunk_num,
      where     => $opts{W},
   );

   if ( $opts{explain} ) {
      if ( $chunk_num == 0 ) {
         printf($explain, @{$info}{qw(database table)}, $sql);
      }
      printf($explain,
         @{$info}{qw(database table)}, $info->{chunks}->[$chunk_num]);
      return;
   }

   $dbh->do('set @crc := "", @cnt := 0');
   my $res = $dbh->selectall_arrayref($sql, { Slice => {} })->[0];
   return ($res->{cnt}, $res->{crc});
}

sub do_checksum {
   my ( $dbh, $info, $query, $explain ) = @_;
   if ( $opts{explain} ) {
      printf($explain, @{$info}{qw(database table)}, $query);
   }
   else {
      my $res = $dbh->selectrow_hashref($query);
      if ( $res ) {
         my ($key) = grep { m/checksum/i } keys %$res;
         return $res->{$key};
      }
   }
}

sub do_count {
   my ( $dbh, $info, $chunk_num, $explain ) = @_;
   my $sql = "SELECT COUNT(*) FROM "
      . $q->quote(@{$info}{qw(database table)})
      . ($opts{W} ? " WHERE $opts{W}" : '');
   if ( $opts{explain} ) {
      printf($explain, @{$info}{qw(database table)}, $sql);
   }
   else {
      return $dbh->selectall_arrayref($sql)->[0]->[0];
   }
}

sub unique {
   my %seen;
   grep { !$seen{$_}++ } @_;
}

# Tries to extract the MySQL error message and print it
sub print_err {
   my ( $msg, $database, $table, $host ) = @_;
   return if !defined $msg
      # Honor --quiet in the (common?) event of dropped tables or deadlocks
      or ($opts{q}
         && $EVAL_ERROR =~ m/: Table .*? doesn't exist|Deadlock found/);
   $msg =~ s/^.*?failed: (.*?) at \S+ line (\d+).*$/$1 at line $2/s;
   $msg =~ s/\s+/ /g;
   if ( $database && $table ) {
      $msg .= " while doing $database.$table";
   }
   if ( $host ) {
      $msg .= " on $host";
   }
   print STDERR $msg, "\n";
}

# ############################################################################
# Documentation
# ############################################################################
=pod

=head1 NAME

mk-table-checksum - Perform an online replication consistency check, or
checksum MySQL tables efficiently on one or many servers.

=head1 SYNOPSIS

   mk-table-checksum --replicate=mydb.checksum master-host
   ... time passses, replication catches up ...
   mk-table-checksum --replicate=mydb.checksum --replcheck master-host

Or,

   mk-table-checksum h=host1,u=user,p=password h=host2 ...

Or,

   mk-table-checksum host1 host2 ... hostN | mk-checksum-filter

See L<"SPECIFYING HOSTS"> for more on the syntax of the host arguments.

=head1 OVERVIEW

mk-table-checksum generates table checksums for MySQL tables, typically
useful for verifying your slaves are in sync with the master.  The checksums
are generated by a query on the server, and there is very little network
traffic as a result.

Checksums typically take about twice as long as COUNT(*) on very large InnoDB
tables in my tests.  For smaller tables, COUNT(*) is a good bit faster than
the checksums.  See L<"--algorithm"> for more details on performance.

If you specify more than one server, mk-table-checksum assumes the first
server is the master and others are slaves.  Checksums are parallelized for
speed, forking off a child process for each table.  Duplicate server names are
ignored, but if you want to checksum a server against itself you can use two
different forms of the hostname (for example, "localhost 127.0.0.1", or
"h=localhost,p=3306 h=localhost,p=3307")

mk-table-checksum only examines table structure on the first host specified,
so if anything differs on the others, it won't notice.  It ignores views.

The checksums work on MySQL version 3.23.58 through 6.0-alpha.

=head1 SPECIFYING HOSTS

mk-table-checksum connects to a theoretically unlimited number of MySQL
servers.  You specify a list of one or more host definitions on the command
line, such as "host1 host2".  Each host definition can be just a hostname, or it
can be a complex string that specifies connection options as well.  You can
specify connection options two ways:

=over

=item *

Format a host definition in a key=value,key=value form.  If an argument on the
command line contains the letter '=', mk-table-checksum will parse it into
its component parts.  Examine the L<"--help"> output for details on the allowed
keys.

Specifying a list of simple host definitions "host1 host2" is equivalent to the
more complicated "h=host1 h=host2" format.

=item *

With the command-line options such as L<"--user"> and L<"--password">.  These
options, if given, apply globally to all host definitions.

=back

In addition to specifying connection options this way, mk-table-checksum
allows shortcuts.  Any options specified for the first host definition on the
command line fill in missing values in subsequent ones.  Any options that are
still missing after this are filled in from the command-line options if
possible.

In other words, the places you specify connection options have precedence:
highest precedence is the option specified directly in the host definition, next
is the option specified in the first host definition, and lowest is the
command-line option.

You can mix simple and complex host definitions and/or command-line arguments.
For example, if all your servers except one of your slaves uses a non-standard
port number:

   mk-table-checksum --port 4500 master h=slave1,p=3306 slave2 slave3

If you are confused about how mk-table-checksum will connect to your servers,
give the L<"--explainhosts"> option and it will tell you.

=head1 OPTIONS

Many options are enabled by default and can be disabled by prefixing with --no.

=over

=item --algorithm

Specifies which checksum algorithm to use.  Valid arguments are CHECKSUM,
BIT_XOR and ACCUM.  The latter two do cryptographic hash checksums.

CHECKSUM is built into MySQL, but has some disadvantages.  BIT_XOR and ACCUM are
implemented by SQL queries.  They use a cryptographic hash of all columns
concatenated together with a separator, followed by a bitmap of each nullable
column that is NULL (necessary because CONCAT_WS() skips NULL columns).

CHECKSUM is the default.  This method uses MySQL's built-in CHECKSUM TABLE
command.  It cannot be used before MySQL 4.1.1, and various options disable it
as well.  It does not simultaneously count rows; that requires an extra COUNT(*)
query.  This is a good option when you are using MyISAM tables with live
checksums enabled; in this case both the COUNT(*) and CHECKSUM queries will run
very quickly.  CHECKSUM TABLE is about 20% faster than the other two
algorithms, even on InnoDB tables, if you are not doing COUNT(*).

The BIT_XOR algorithm is available for MySQL 4.1.1 and newer.  It uses
BIT_XOR(), which is order-independent, to reduce all the rows to a single
checksum.  It runs within an order of magnitude of COUNT(*) on InnoDB tables;
on large tables it's typically about half as fast as COUNT(*).

ACCUM uses a user variable as an accumulator.  It reduces each row to a single
checksum, which is concatenated with the accumulator and re-checksummed.  This
technique is order-dependent.  If the table has a primary key, it will be used
to order the results for consistency; otherwise it's up to chance.  It tends to
use a little less CPU and run a little faster than the BIT_XOR algorithm.

The ACCUM algorithm has two possible advantages over BIT_XOR: speed (there may
be fewer cryptographic hash operations and it may read less data) and possibly
fewer collisions.  The pathological worst case is where identical rows will
cancel each other out in the BIT_XOR.  In this case you will not be able to
distinguish a table full of one value from a table full of another value.  The
ACCUM algorithm will distinguish them.

However, the ACCUM algorithm is order-dependent, so if you have two tables
with identical data but the rows are out of order, you'll get different
checksums with ACCUM.

Choose your (mild) poison.  Each algorithm is very good in reality.  If a given
algorithm won't work for some reason, mk-table-checksum falls back to
another.  The least common denominator is ACCUM, which works on MySQL 3.23.2 and
newer.

=item --askpass

Prompt for a password for each host for which no password is given.

=item --checksum

Makes the output behave more like the output of C<md5sum>.  The checksum is
first on the line, followed by the host, database, table, and chunk number,
concatenated with dots.  Disables L<"--count">.

=item --chunksize

If you specify a chunk size, mk-table-checksum will try to find an index that
will let it split the table into ranges of approximately L<"--chunksize">
rows, based on the table's index statistics.  It will checksum each range
separately with parameters in the checksum query's WHERE clause.

If mk-table-checksum cannot find a suitable index, it will do the entire
table in one chunk as though you had not specified L<"--chunksize"> at all.
Each table is handled individually, so some tables may be chunked and others
not.

The chunks will be approximately sized, and depending on the distribution of
values in the indexed column, some chunks may be larger than the value you
specify.

If you specify a suffix (one of k, M or G), the parameter is treated as a data
size rather than a number of rows.  The output of SHOW TABLE STATUS is then used
to estimate the amount of data the table contains, and convert that to a number
of rows.

=item --columns

A comma-separated list of columns to checksum.  Other columns will be
ignored.TODO: test

=item --count

Count the rows as well as taking their checksum. This is disabled by default to
avoid an extra COUNT(*) query when L<"--algorithm"> is CHECKSUM.  For other
algorithms, you get a count for free.  If you have only MyISAM tables and live
checksums are enabled, both CHECKSUM and COUNT will be very fast, but otherwise
you may want to use one of the other algorithms.

=item --crc

Take the checksum of the rows as well as their count.  This is enabled by
default.  If you disable it, you'll just get COUNT(*) queries.

=item --databases

Only checksum this comma-separated list of databases.

=item --defaults-file

If you specify this option, only this file is read for MySQL default options;
otherwise all the default files will be read.

=item --emptyrepltbl

Issues a DELETE against the table given by L<"--replicate"> before beginning
work.  Ignored if L<"--replicate"> is not specified.  This can be useful to
remove entries related to tables that no longer exist, or just to clean out the
results of a previous run.

If you specify L<"--databases"> or L<"--tables">, MySLQ Table Checksum will
construct a WHERE clause for the DELETE statement, so only matching rows will
be deleted.

=item --engine

Only checksum tables whose storage engine is in this comma-separated list.
You can use this to restrict the checksum to InnoDB, for example.

=item --explain

Print checksum queries and WHERE clauses (if chunking is enabled) for each
table, but do not execute the queries.

=item --explainhosts

Print out a list of hosts to which mk-table-checksum will connect, with all
the various connection options, and exit.  See L<"SPECIFYING HOSTS">.

=item --float-precision

If you specify this option, FLOAT and DOUBLE columns will be rounded to the
specified number of digits after the decimal point for the checksum.  This can
avoid checksum mismatches due to different floating-point representations of
the same values on different MySQL versions and hardware.

=item --function

You can use this option to choose the cryptographic hash function used for
L<"--algorithm">=ACCUM or L<"--algorithm">=BIT_XOR.  The default is to use SHA1,
but MD5 is also a good choice, and you can use your own function, such as a
compiled UDF, if you wish.  Whatever function you specify is run in SQL, not
in Perl, so it must be available to MySQL.

=item --help

Displays a help message.

=item --ignoredb

Use this option to skip a comma-separated list of databases.

=item --ignoreengine

Use this option to skip a comma-separated list of storage engines (table types).

=item --ignoretbl

Use this option to skip a comma-separated list of tables.

=item --lock

This option can help you to get a consistent read on a master and many slaves.
If you specify this option, mk-table-checksum will lock the table on the
first server on the command line, which it assumes to be the master.  It will
keep this lock until the checksums complete on the other servers.

This option isn't very useful by itself, so you probably want to use L<"--wait">
instead.

=item --optxor

This option, which is enabled by default, specifies to use user variables to
reduce the number of times each row must be passed through the cryptographic
hash function when you are using the BIT_XOR algorithm.

With the optimization, the queries look like this in pseudo-code:

  SELECT CONCAT(
     BIT_XOR(SLICE_OF(@user_variable)),
     BIT_XOR(SLICE_OF(@user_variable)),
     ...
     BIT_XOR(SLICE_OF(@user_variable := HASH(col1, col2... colN))));

The exact positioning of user variables and calls to the hash function is
determined dynamically, and will vary between MySQL versions.  Without the
optimization, it looks like this:

  SELECT CONCAT(
     BIT_XOR(SLICE_OF(MD5(col1, col2... colN))),
     BIT_XOR(SLICE_OF(MD5(col1, col2... colN))),
     ...
     BIT_XOR(SLICE_OF(MD5(col1, col2... colN))));

The difference is the number of times all the columns must be mashed together
and fed through the hash function.  If you are checksumming really large
columns, such as BLOB or TEXT columns, this might make a big difference.

=item --password

The password to use when connecting.

=item --port

The port number to use for the connection.

=item --recursecheck

Recursively runs L<"--replcheck"> to check the entire replication subtree
rooted at the given master.

=item --replcheck

Connects to the master and runs SHOW SLAVE HOSTS, then connects to each slave
of the master, runs the query shown in L<"CONSISTENT CHECKSUMS">, and prints
results.  Exits after printing.  This is just a convenient way of running the
query so you don't have to do it manually.

The output is one informational line per slave host, followed by the results
of the query, if any.  If L<"--quiet"> is specified, there is no output.

Requires L<"--replicate"> to be specified so it knows which table to query.
Connection information for each slave is derived from the same
default-and-override method described in L<"SPECIFYING HOSTS">.  The host and
port from SHOW SLAVE HOSTS are combined into "h=host,P=port" and used as the
argument.

This requires the @@SERVER_ID system variable, so it works only on MySQL
3.23.26 or newer.

If any slave has chunks that differ from the master, mk-table-checksum's
exit status is 1; otherwise it is 0.

=item --replicate

This option enables a completely different checksum strategy for a consistent,
lock-free checksum across a master and its slaves.  This works only with
statement-based replication (mk-table-checksum will switch the binlog format
to STATEMENT for the duration of the session if your server uses row-based
replication).  Instead of running the checksum queries on each server, you run
it only on the master.  You specify a table to insert the results into.  The
query will insert directly into the table, so it will be replicated through the
binlog to the slaves.

The argument to the option is the table in which the checksums should be stored.
The table must have at least these columns: db, tbl, chunk, boundaries,
this_crc, master_crc, this_cnt, master_cnt.  Here is a suggested table
structure:

  CREATE TABLE checksum (
     db         char(64)     NOT NULL,
     tbl        char(64)     NOT NULL,
     chunk      int          NOT NULL,
     boundaries char(64)     NOT NULL,
     this_crc   char(40)     NOT NULL,
     this_cnt   int          NOT NULL,
     master_crc char(40)         NULL,
     master_cnt int              NULL,
     ts         timestamp    NOT NULL,
     PRIMARY KEY (db, tbl, chunk)
  );

Be sure to choose an appropriate storage engine for the checksum table.  If you
are checksumming InnoDB tables, for instance, a deadlock will break replication
if the checksum table is non-transactional, because the transaction will still
be written to the binlog.  It will then replay without a deadlock on the
slave and break replication with "different error on master and slave."  This
is not a problem with mk-table-checksum, it's a problem with MySQL
replication, and you can read more about it in the MySQL manual.

When the queries are finished replicating, you can run a simple query on each
slave to see which tables have differences from the master.  See L<"CONSISTENT
CHECKSUMS"> for details.  If you find tables that have differences, you can use
the chunk boundaries in a WHERE clause to mk-table-sync to help repair
them more efficiently.  See L<mk-table-sync> for details.

This option eliminates the need to do complicated locking and unlocking, waiting
for master binlog positions, and so on.  It disables L<"--lock">, L<"--wait">,
and L<"--slavelag">.

The checksum queries actually do a REPLACE into this table, so existing rows
need not be removed before running.  However, you may wish to do this anyway to
remove rows related to tables that don't exist anymore.  The L<"--emptyrepltbl">
option does this for you.

Since mk-table-checksum uses USE to select the table's database as its
default database before executing the checksum query, the checksum queries
should replicate to slaves even if --binlog-do-db settings on the master filter
out the checksum table's database.  For more information on how --binlog-do-db
works, see L<http://dev.mysql.com/doc/en/binary-log.html>.

If the slaves have any --replicate-do-X or replicate-ignore-X options, you
should be careful not to checksum any databases or tables that exist on the
master and not the slaves.  Changes to such tables may not normally be executed
on the slaves because of the --replicate-X options, but the checksum queries
change the checksum table, not the tables they checksum.  Therefore these
queries will be executed on the slave, and if the table or database does not
exist, they will cause replication to fail.  For more information on replication
rules, see L<http://dev.mysql.com/doc/en/replication-rules.html>.

The table specified by L<"--replicate"> will never be checksummed itself.

=item --separator

This option controls the separator character used for CONCAT_WS() when taking
row checksums with user-variables.

=item --slavelag

If this option is enabled, the output will show how many seconds behind the
master each slave is.  This can be useful when you want a fast, parallel,
non-blocking checksum, and you know your slaves might lag the master.  You can
inspect the results and make an educated guess whether any discrepancies on the
slave are due to slave lag instead of corrupt data.

=item --sleep

If this option is specified, mk-table-checksum will sleep the specified
number of seconds between checksums.  That is, it will sleep between every
table, and if you specify L<"--chunksize">, it will also sleep between chunks.

=item --sleep-coef

If this option is specified, mk-table-checksum will sleep the amount of
time elapsed during the previous checksum, multiplied by the specified
coefficient.  This option is ignored if L<"--sleep"> is specified.

=item --socket

The socket file to use for the connection.

=item --tab

Instead of column-aligned output, print tab-separated output.

=item --tables

Restrict checksums to this comma-separated list of tables.

=item --user

MySQL user account to use for the connection.

=item --verify

This option is enabled by default.  It runs a trivial checksum on all servers to
ensure they have compatible CONCAT_WS() and cryptographic hash functions.

Versions of MySQL before 4.0.14 will skip empty strings and NULLs in
CONCAT_WS, and others will only skip NULLs.  The two kinds of behavior will
produce different results if you have any columns containing the empty string
in your table.  If you know you don't (for instance, all columns are
integers), you can safely disable this check and you will get a reliable
checksum even on servers with different behavior.

=item --version

Output version information and exit.

=item --wait

This option helps you get a consistent checksum across a master server and its
slaves.  It combines locking and waiting to accomplish this.  First it locks the
table on the master (the first server on the command line).  Then it finds the
master's binlog position.  Checksums on slaves will be deferred until they reach
the same binlog position.

The argument to the option is the number of seconds to wait for the slaves to
catch up to the master.  It is actually the argument to MASTER_POS_WAIT().  If
the slaves don't catch up to the master within this time, they will unblock
and go ahead with the checksum.  You can tell whether this happened by
examining the STAT column in the output, which is the return value of
MASTER_POS_WAIT().

=item --where

You can use this option to limit the checksum to only part of the table.  This
is particularly useful if you have append-only tables and don't want to
constantly re-check all rows; you could run a daily job to just check
yesterday's rows, for instance.

This option is much like the -w option to mysqldump.  Do not specify the WHERE
keyword.  You may need to quote the value.  Here is an example:

  mk-table-checksum --where "foo=bar"

=back

=head1 CONSISTENT CHECKSUMS

If you are using this tool to verify your slaves still have the same data as the
master, which is why I wrote it, you should read this section.

The best way to do this with replication is to use the L<"--replicate"> option.
When the queries are finished running on the master and its slaves, you can go
to the slaves and issue SQL queries to see if any tables are different from the
master.  Try the following:

  SELECT db, tbl, chunk, this_cnt-master_cnt AS cnt_diff,
     this_crc <> master_crc OR ISNULL(master_crc) <> ISNULL(this_crc)
        AS crc_diff
  FROM checksum
  WHERE master_cnt <> this_cnt OR master_crc <> this_crc
     OR ISNULL(master_crc) <> ISNULL(this_crc);

The L<"--replcheck"> option can do this query for you.  If you can't use this
method, try the following:

=over

=item *

If your servers are not being written to, you can just run the tool with no
further ado:

  mk-table-checksum server1 server2 ... serverN

=item *

If the servers are being written to, you need some way to make sure they are
consistent at the moment you run the checksums.  For situations other than
master-slave replication, you will have to figure this out yourself.  You may be
able to use the L<"--where"> option with a date or time column to only checksum
data that's not recent.

=item *

If you are checksumming a master and slaves, you can do a fast parallel
checksum and assume the slaves are caught up to the master.  In practice, this
tends to work well except for tables which are constantly updated.  You can
use the L<"--slavelag"> option to see how far behind each slave was when it
checksummed a given table.  This can help you decide whether to investigate
further.

=item *

The next most disruptive technique is to lock the table on the master, then take
checksums.  This should prevent changes from propagating to the slaves.  You can
just lock on the master (with L<"--lock">), or you can both lock on the master
and wait on the slaves till they reach that point in the master's binlog
(L<"--wait">).  Which is better depends on your workload; only you know that.

=item *

If you decide to make the checksums on the slaves wait until they're guaranteed
to be caught up to the master, the algorithm looks like this:

 For each table,
   Master: lock table
   Master: get pos
   In parallel,
     Master: checksum
     Slave(s): wait for pos, then checksum
   End
   Master: unlock table
 End

=back

What I typically do when I'm not using the L<"--replicate"> option is simply run
the tool on all servers with no further options.  This runs fast, parallel,
non-blocking checksums simultaneously.  If there are tables that look different,
I re-run with L<"--wait">=600 on the tables in question.  This makes the tool
lock on the master as explained above.

=head1 OUTPUT

Output is to STDOUT, one line per server and table, with header lines for each
database.  I tried to make the output easy to process with awk.  For this reason
columns are always present.  If there's no value, mk-table-checksum prints
'NULL'.

The default is column-aligned output for human readability, but you can change
it to tab-separated if you want.  Use the L<"--tab"> option for this.

Output is unsorted, though all lines for one table should be output together.
For speed, all checksums are done in parallel (as much as possible) and may
complete out of the order in which they were started.  You might want to run
them through another script or command-line utility to make sure they are in the
order you want.  If you pipe the output through L<mk-checksum-filter>, you
can sort the output and/or avoid seeing output about tables that have no
differences.

The columns in the output are as follows.  The database, table, and chunk come
first so you can sort by them easily (they are the "primary key").

Output from L<"--replcheck"> and L<"--checksum"> are different.

=over

=item DATABASE

The database the table is in.

=item TABLE

The table name.

=item CHUNK

The chunk (see L<"--chunksize">).  Zero if you are not doing chunked checksums.

=item HOST

The server's hostname.

=item ENGINE

The table's storage engine.

=item COUNT

The table's row count, unless you specified to skip it.

=item CHECKSUM

The table's checksum, unless you specifed to skip it or the table has no rows.
some types of checksums will be 0 if there are no rows; others will print NULL.

=item TIME

The time the actual checksum and/or counting took.

=item WAIT

How long the checksum blocked before beginning.

=item STAT

The return value of MASTER_POS_WAIT().

=item LAG

How far the slave lags the master, as reported by SHOW SLAVE STATUS.

=back

=head1 EXIT STATUS

A successful exit status is 0.  If there is an error checksumming any table,
the exit status is 1.

When running L<"--replcheck">, if any slave has chunks that differ from the
master, the exit status is 1.

=head1 QUERIES

If you are using innotop (see L<http://innotop.sourceforge.net/>),
mytop, or another tool to watch currently running MySQL queries, you may see
the checksum queries.  They look similar to this:

  REPLACE /*test.test_tbl:'2'/'5'*/ INTO test.checksum(db, ...

Since mk-table-checksum's queries run for a long time and tend to be
textually very long, and thus won't fit on one screen of these monitoring
tools, I've been careful to place a comment at the beginning of the query so
you can see what it is and what it's doing.  The comment contains the name of
the table that's being checksummed, the chunk it is currently checksumming,
and how many chunks will be checksummed.  In the case above, it is
checksumming chunk 2 of 5 in table test.test_tbl.

=head1 SEE ALSO

See also L<mk-checksum-filter> and L<mk-table-sync>.

=head1 BUGS

Please use the Sourceforge bug tracker, forums, and mailing lists to request
support or report bugs: L<http://sourceforge.net/projects/maatkit/>.

=head1 SYSTEM REQUIREMENTS

You need Perl, DBI, DBD::mysql, and some core packages that ought to be
installed in any reasonably new version of Perl.

=head1 AUTHOR

Baron "Xaprb" Schwartz.

=head1 ACKNOWLEDGEMENTS

This is an incomplete list.  My apologies for omissions or misspellings.

Claus Jeppesen,
Francois Saint-Jacques,
Giuseppe Maxia,
Heikki Tuuri,
James Briggs,
Martin Friebe,
Sergey Zhuravlev,

=head1 COPYRIGHT, LICENSE AND WARRANTY

This program is copyright (c) 2007 Baron Schwartz.
Feedback and improvements are welcome.

THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.

This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation, version 2; OR the Perl Artistic License.  On UNIX and similar
systems, you can issue `man perlgpl' or `man perlartistic' to read these
licenses.

You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA  02111-1307  USA.

=head1 VERSION

This manual page documents Ver 1.1.20 Distrib 1316 $Revision: 1311 $.

=cut
