package Debian::AptContents;

use strict;
use warnings;

=head1 NAME

Debian::AptContents - parse/search through apt-file's Contents files

=head1 SYNOPSIS

    my $c = Debian::AptContents->new( { homedir => '~/.dh-make-perl' } );
    my @pkgs = $c->find_file_packages('/usr/bin/foo');
    my $dep = $c->find_perl_module_package('Foo::Bar');

=head1 TODO

This needs to really work not only for Perl modules.

A module specific to Perl modules is needed by dh-make-perl, but it can
subclass Debian::AptContents, which needs to become more generic.

=cut

use base qw(Class::Accessor);
__PACKAGE__->mk_accessors(
    qw(
        cache homedir cache_file contents_dir contents_files verbose
        source sources_file dist
    )
);

use Debian::Dependency;
use Debian::Version qw(deb_ver_cmp);
use File::Spec::Functions qw( catfile catdir splitpath );
use IO::Uncompress::Gunzip;
use Module::CoreList ();
use Storable;

=head1 CONSTRUCTOR

=over

=item new

Constructs new instance of the class. Expects at least C<homedir> option.

=back

=head1 FIELDS

=over

=item homedir

(B<mandatory>) Directory where the object stores its cache.

=item contents_dir

Directory where L<apt-file> stores Contents files are stored. Default is
F</var/cache/apt/apt-file>

=item sources_file

Path to the F<sources.list> file. Default is F</etc/apt/sources.list>.

=item dist

Used for filtering on the C<distributon> part of the repository paths listed in
L<sources.list>. Default is empty, meaning no filtering.

=item contents_files

Arrayref of F<Contents> file names. Default is to parse C<sources_file> and to
look in C<contents_dir> for matching files.

=item cache_file

Path to the file with cached parsed information from all F<Contents> files.
Default is F<Contents.cache> under C<homedir>.

=item cache

Filled by C<read_cache>. Used by C<find_file_packages> and (obviously)
C<store_cache>

=item verbose

Verbosity level. 0 means silent, the bigger the more the jabber. Default is 1.

=back

=cut

sub new
{
    my $class = shift;
    $class = ref($class) if ref($class);
    my $self = $class->SUPER::new(@_);

    # required options
    $self->homedir
        or die "No homedir given";

    # some defaults
    $self->contents_dir( '/var/cache/apt/apt-file' )
        unless $self->contents_dir;
    $self->sources_file('/etc/apt/sources.list')
        unless defined( $self->sources_file );
    $self->contents_files( $self->get_contents_files )
        unless $self->contents_files;
    $self->cache_file( catfile( $self->homedir, 'Contents.cache' ) )
        unless $self->cache_file;
    $self->verbose(1) unless defined( $self->verbose );

    $self->read_cache();

    return $self;
}

=head1 OBJECT METHODS

=over

=item warning

Used internally. Given a verbosity level and a message, prints the message to
STDERR if the verbosity level is greater than or equal of the value of
C<verbose>.

=cut

sub warning
{
    my( $self, $level, $msg ) = @_;

    warn "$msg\n" if $self->verbose >= $level;
}

=item repo_source_to_contents_path

Given a line with Deban package repository path (typically taken from
F<sources.list>), converts it to the correspondinf F<Contents> file name.

=cut

sub repo_source_to_contents_path {
    my ( $self, $source ) = @_;

    my ( $schema, $uri, $dist, @extra ) = split /\s+/, $source;
    my ( $proto, $host, $port, $dir ) = $uri =~ m{
	^
        (?:([^:/?\#]+):)?                      # proto
        (?://
                ([^:/?\#]*)                    # host
                (?::(\d+))?                    # port
        )?
        ([^?\#]*)                              # path
    }x;

    unless ( defined $schema ) {
        $self->warning( 1, "'$_' has unknown format" );
        next;
    }

    return unless $schema eq 'deb';

    if ( $self->dist ) {
        if ( $self->dist =~ /^\s*{\s*(.+)\s*}\s*$/ ) {
            return unless grep { /^$dist$/ } split(/\s*,\s*/, $1);
        } else {
            return if $dist ne $self->dist;
        }
    }

    $host ||= '';   # set empty string if $host is undef
    $dir ||= '';    # deb http://there sid main

        s{/$}{} for( $host, $dir, $dist );  # remove trailing /
        s{^/}{} for( $host, $dir, $dist );  # remove initial /
        s{/}{_}g for( $host, $dir, $dist ); # replace remaining /

    return ( $host . "_" . join( "_", $dir||(), "dists", $dist ) );
}

=item get_contents_files

Reads F<sources.list>, gives the repository paths to
C<repo_source_to_contents_path> and returns an arrayref of file names of
Contents files.

=cut

sub get_contents_files
{
    my $self = shift;

    my $sources = IO::File->new( $self->sources_file, 'r' )
        or die "Unable to open '" . $self->sources_file . "': $!\n";

    my $archspec = `dpkg --print-architecture`;
    chomp($archspec);

    my @res;

    while( <$sources> ) {
        chomp;
        s/#.*//;
        s/^\s+//;
        s/\s+$//;
        next unless $_;

        my $path = $self->repo_source_to_contents_path($_);

        next unless $path;

        # try all of with/out architecture and
        # un/compressed
        for my $a ( '', "-$archspec" ) {
            for my $c ( '', '.gz' ) {
                my $f = catfile(
                    $self->contents_dir,
                    "${path}_Contents$a$c",
                );
                push @res, $f if -e $f;
            }
        }
    }

    return [ sort @res ];
}

=item read_cache

Reads the cached parsed F<Contents> files. If there are F<Contents> files with
more recent mtime than that of the cache (or if there is no cache at all),
parses all F<Contents> and stores the cache via C<store_cache> for later
invocation.

=cut

sub read_cache {
    my $self = shift;

    my $cache;

    if ( -r $self->cache_file ) {
        $cache = eval { Storable::retrieve(  $self->cache_file ) };
        undef($cache) unless ref($cache) and ref($cache) eq 'HASH';
    }

    # see if the cache is stale
    if ( $cache and $cache->{stamp} and $cache->{contents_files} ) {
        undef($cache)
            unless join( '><', @{ $self->contents_files } ) eq
                join( '><', @{ $cache->{contents_files} } );

        # file lists are the same?
        # see if any of the files has changed since we
        # last read it
        if ( $cache ) {
            for ( @{ $self->contents_files } ) {
                if ( ( stat($_) )[9] > $cache->{stamp} ) {
                    undef($cache);
                    last;
                }
            }
        }
    }
    else {
        undef($cache);
    }

    unless ($cache) {
        $self->source('parsed files');
        $cache->{stamp}          = time;
        $cache->{contents_files} = [];
        $cache->{apt_contents}   = {};
        for ( @{ $self->contents_files } ) {
            push @{ $cache->{contents_files} }, $_;
            my $f = /\.gz$/
                ? IO::Uncompress::Gunzip->new($_)
                : IO::File->new( $_, 'r' );

            unless ($f) {
                warn "Error reading '$_': $!\n";
                next;
            }

            $self->warning( 1, "Parsing $_ ..." );
            my $capturing = 0;
            my $line;
            while ( defined( $line = $f->getline ) ) {
                if ($capturing) {
                    my ( $file, $packages ) = split( /\s+/, $line );
                    next unless $file =~ s{
                        ^usr/
                        (?:share|lib)/
                        (?:perl\d+/             # perl5/
                        | perl/(?:\d[\d.]+)/   # or perl.5.10/
                        )
                    }{}x;
                    $cache->{apt_contents}{$file} = $packages;

                    # $packages is a comma-separated list of
                    # section/package items. We'll parse it when a file
                    # matches. Otherwise we'd parse thousands of entries,
                    # while checking only a couple
                }
                else {
                    $capturing = 1 if $line =~ /^FILE\s+LOCATION/;
                }
            }
        }

        if ( %{ $cache->{apt_contents} } ) {
            $self->cache($cache);
            $self->store_cache;
        }
    }
    else {
        $self->source('cache');
        $self->warning( 1,
            "Using cached Contents from "
            . localtime( $cache->{stamp} )
        );

        $self->cache($cache);
    }
}

=item store_cache

Writes the contents of the parsed C<cache> to the C<cache_file>.

Storable is used to stream the data. Along woth the information from
F<Contents> files, a timestamp is stored.

=cut

sub store_cache {
    my $self = shift;

    my ( $vol, $dir, $file ) = splitpath( $self->cache_file );

    $dir = catdir( $vol, $dir );
    unless ( -d $dir ) {
        mkdir $dir
            or die "Error creating directory '$dir': $!\n"
    }

    Storable::store( $self->cache, $self->cache_file . '-new' );
    rename( $self->cache_file . '-new', $self->cache_file );
}

=item find_file_packages

Returns a list of packages where the given file was found.

F<Contents> files store the package section together with package name. That is
stripped.

Returns an empty list of the file is not found in any package.

=cut

sub find_file_packages {
    my( $self, $file ) = @_;

    my $packages = $self->cache->{apt_contents}{$file};

    return () unless $packages;

    my @packages = split( /,/, $packages );     # Contents contains a
                                                # comma-delimitted list
                                                # of packages

    s{[^/]+/}{} for @packages;  # remove section

    return @packages;
}

=item find_core_perl_dependency( $module[, $version] )

return a dependency on perl containing the required module version. If the
module is not available in any perl as released by Debian, return undef.

Currently Debian has only two releases of Perl: 5.8.8 (5.008008) and 5.10
(5.010000).

=cut

our @debian_perls = qw( 5.008008 5.010000 );

sub find_core_perl_dependency {
    my ( $self, $module, $version ) = @_;

    # see if the module is included in perl core
    my $core_ver;
    for (@debian_perls) {
        my $core = Module::CoreList->find_version($_);
        next unless exists $core->{$module};    # not in that perl version

        # reaching here, the module is in the core version in $_
        # if we don't need a particular version, we are done
        unless( defined($version) ) {
            $core_ver = $_;
            last;
        }

        # OTOH, if we do need a particular version, but
        # the core module has none, try next core release
        my $ver = $core->{$module};
        next unless defined($ver);

        # if the core module version is sufficiently new, we're done
        if( deb_ver_cmp( $ver, $version ) >= 0 ) {
            $core_ver = $_;
            last;
        }
    }

    if($core_ver) {
        $core_ver = version->new($core_ver);            # v5.9.2
        ( $core_ver = $core_ver->normal ) =~ s/^v//;    # "5.9.2"

        return Debian::Dependency->new( 'perl', $core_ver );
    }

    # not a core module
    return undef;
}

=item find_perl_module_package( $module, $version )

Given Perl module name (e.g. Foo::Bar), returns a L<Debian::Dependency> object
representing the required Debian package and version. If the module is a core
one, suitable dependency on perl is returned.

=cut

sub find_perl_module_package {
    my ( $self, $module, $version ) = @_;

    # see if the module is included in perl core
    my $core_dep = $self->find_core_perl_dependency( $module, $version );

    return $core_dep if defined($core_dep);

    # not a core module (or at least not in any perl release available in
    # Debian)
    # try module packages
    my $module_file = $module;
    $module_file =~ s|::|/|g;

    my @matches = $self->find_file_packages("$module_file.pm");

    # rank non -perl packages lower
    @matches = sort {
        if    ( $a !~ /-perl: / ) { return 1; }
        elsif ( $b !~ /-perl: / ) { return -1; }
        else                      { return $a cmp $b; }    # or 0?
    } @matches;

    return Debian::Dependency->new( $matches[0], $version )
        if @matches;

    return;
}

1;

=back

=head1 AUTHOR

=over 4

=item Damyan Ivanov <dmn@debian.org>

=back

=head1 COPYRIGHT & LICENSE

=over 4

=item Copyright (C) 2008, 2009 Damyan Ivanov <dmn@debian.org>

=back

This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License version 2 as published by the Free
Software Foundation.

This program is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE.  See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
Street, Fifth Floor, Boston, MA 02110-1301 USA.

=cut
