#!/usr/bin/perl -w

eval 'exec /usr/bin/perl -w -S $0 ${1+"$@"}'
    if 0; # not running under some shell

=pod

=head1 NAME

tv_grab_za - Grab TV listings for South Africa.

=head1 SYNOPSIS

tv_grab_za --help

tv_grab_za [--config-file FILE] --configure [--gui OPTION]

tv_grab_za [--config-file FILE] [--output FILE] [--days N]
           [--offset N] [--quiet] [--opentime]

=head1 DESCRIPTION

Output TV listings for DSTV channels available in South Africa.
The data comes from www.dstv.com. The grabber relies on
parsing HTML so it might stop working at any time.

First run B<tv_grab_za --configure> to choose, which channels you want
to download. Then running B<tv_grab_za> with no arguments will output
listings in XML format to standard output.

B<--configure> Prompt for which channels,
and write the configuration file.

B<--config-file FILE> Set the name of the configuration file, the
default is B<~/.xmltv/tv_grab_za.conf>.  This is the file written by
B<--configure> and read when grabbing.

B<--gui OPTION> Use this option to enable a graphical interface to be used.
OPTION may be 'Tk', or left blank for the best available choice.
Additional allowed values of OPTION are 'Term' for normal terminal output
(default) and 'TermNoProgressBar' to disable the use of XMLTV::ProgressBar.

B<--output FILE> write to FILE rather than standard output.

B<--days N> grab N days.  Can be 1, 7, 14 or 30.  Default is 14

B<--quiet> suppress the progress messages normally written to standard
error.

B<--opentime> Discard all M-Net programs outside of opentime

B<--help> print a help message and exit.

=head1 SEE ALSO

L<xmltv(5)>.

=head1 AUTHOR

Chris Picton, cpicton@users.sf.net.  Based on tv_grab_fi by Matti Airas.

=head1 BUGS

Probably many

=cut

######################################################################
# initializations

use strict;
use XMLTV::Version '$Id: tv_grab_za,v 1.1 2005/05/28 23:12:08 rmeden Exp $ ';
use Getopt::Long;
use Date::Manip;
use HTML::TreeBuilder;
use HTML::Entities; # parse entities
use IO::File;
#use Date::Parse;

use XMLTV;
use XMLTV::Memoize;
use XMLTV::ProgressBar;
use XMLTV::Ask;
use XMLTV::Config_file;
use XMLTV::DST;
use XMLTV::Get_nice;
use XMLTV::Mode;
use XMLTV::Date;
# Todo: perhaps we should internationalize messages and docs?
use XMLTV::Usage <<END
$0: get South African television listings in XMLTV format
To configure: $0 --configure [--config-file FILE]
To grab listings: $0 [--config-file FILE] [--output FILE] [--days N]
        [--offset N] [--quiet] [--opentime]
END
  ;

# Attributes of the root element in output.
my $HEAD = { 'source-info-url'     => 'http://www.dstv.com/',
             'source-data-url'     => "http://www.dstv.com/dstv-guide/default.asp",
             'generator-info-name' => 'XMLTV',
             'generator-info-url'  => 'http://membled.com/work/apps/xmltv/',
           };

# Whether zero-length programmes should be included in the output.
my $WRITE_ZERO_LENGTH = 0;

# The timezone in South Africa.
my $TZ="+0200";

# default language
my $LANG="en";

# Global channel data.
our @ch_all;


# Initialize cookie_jar
use LWP::Simple qw($ua);
use HTTP::Cookies;
my $cookies = HTTP::Cookies->new;
$ua->cookie_jar($cookies);

######################################################################
# get options

# Get options, .
XMLTV::Memoize::check_argv('XMLTV::Get_nice::get_nice_aux');
my ($opt_days, $opt_offset, $opt_help, $opt_output,
    $opt_configure, $opt_config_file, $opt_gui,
    $opt_quiet, $opt_list_channels, $opt_opentime, $opt_cache);
$opt_days  = 14; # default
$opt_offset = 0; # default
$opt_quiet  = 0; # default
GetOptions('days=i'        => \$opt_days,
           'help'          => \$opt_help,
           'configure'     => \$opt_configure,
           'opentime'      => \$opt_opentime,
           'config-file=s' => \$opt_config_file,
           'gui:s'         => \$opt_gui,
           'output=s'      => \$opt_output,
           'quiet'         => \$opt_quiet,
           'cache'         => \$opt_cache,
          )
  or usage(0);
die 'number of days must not be negative'
  if (defined $opt_days && $opt_days < 0);
die 'number of days can only be 1, 7, 14 or 30'
  if (defined $opt_days && $opt_days !~ /^(1|7|14|30)$/);
usage(1) if $opt_help;

XMLTV::Ask::init($opt_gui);

my $mode = XMLTV::Mode::mode('grab', # default
                             $opt_configure => 'configure'
                            );

# File that stores which channels to download.
my $config_file
  = XMLTV::Config_file::filename($opt_config_file, 'tv_grab_za', $opt_quiet);

init_cookies();

if ($mode eq 'configure') {
    mode_configure();
}
my @config_lines; # used only in grab mode
@config_lines = XMLTV::Config_file::read_lines($config_file);

# Whatever we are doing, we need the channels data.
my @channels;
my %channels;
#######################################

# Not configuration, we must be writing something, either full
# listings or just channels.
#


# Ignore the mess for now - will be sorted out soon

die if $mode ne 'grab' and $mode ne 'list-channels';

# Options to be used for XMLTV::Writer.
my %w_args;
if (defined $opt_output) {
    my $fh = new IO::File(">$opt_output");
    die "cannot write to $opt_output: $!" if not defined $fh;
    $w_args{OUTPUT} = $fh;
}
$w_args{encoding} = 'ISO-8859-1';
my $writer = new XMLTV::Writer(%w_args);
$writer->start($HEAD);

if ($mode eq 'list-channels') {
    # Write channels mode.
    $writer->write_channel($_) foreach @ch_all;
    $writer->end();
    exit();
}

######################################################################
# We are producing full listings.
die if $mode ne 'grab';

# Read configuration.
my $line_num = 1;
foreach (@config_lines) {
    ++ $line_num;
    next if not defined;
    s/#.*//g;
    next if /^\s+$/;
    s/\s+$//g;
    my (undef, $chanid, $optid, $name) = split(/\s+/, $_, 4);
    $channels{$chanid} = {'name' => $name, 'id' => $chanid, 'optid', => $optid };
}

######################################################################
# begin main program


# Print out the channels
die "No channels specified, run me with --configure first\n"
  if not keys %channels;

foreach my $chanid (keys %channels) {
    my $n=$channels{$chanid}->{'name'};
    my $ch_xid="$chanid.dstv.com";
    $writer->write_channel({ id => $ch_xid, 'display-name' => [ [ $n ] ] });
}

my $bar = new XMLTV::ProgressBar('getting listings', (scalar keys %channels))
  if not $opt_quiet;
foreach (keys %channels) {
    process_html($channels{$_});
    update $bar if not $opt_quiet;
}
$bar->finish() if not $opt_quiet;
$writer->end();

######################################################################
# subroutine definitions

# Use Log::TraceMessages if installed.
BEGIN {
    eval { require Log::TraceMessages };
    if ($@) {
        *t = sub {};
        *d = sub { '' };
    }
    else {
        *t = \&Log::TraceMessages::t;
        *d = \&Log::TraceMessages::d;
        Log::TraceMessages::check_argv();
    }
}

sub tidy( $ ) {
    for (my $tmp = shift) {
        tr/\t\205/ /d;
        s/([^\012\015\040-\176\240-\377]+)//g;
        return $_;
    }
}

####
# process_table: fetch a URL and process it
#
# arguments:
#    Date::Manip object giving the day to grab
#    xmltv id of channel
#    katso id of channel
#
# returns: list of programme hashes to write
#
sub process_html {
    my $inhash = shift;
    my $chanid = $inhash->{'id'};
    my $optid = $inhash->{'optid'};
    my $name = $inhash->{'name'};

    my $stype=2;
    $stype = 0 if $opt_days == 1;
    $stype = 1 if $opt_days == 7;
    $stype = 2 if $opt_days == 14;
    $stype = 3 if $opt_days == 30;

    my $now = time();
    my $url = "http://www.dstv.com/dstv-guide/default.asp?Submit=true&Page=Schedules&sType=$stype&ChannelID=$optid&Bouquet=3";
    t "getting URL: $url";
    
    my $data;
    if ($opt_cache) {
        my $cachefile = "/tmp/tv_grab_za-cache-$optid";
        if (! -f $cachefile) {
            $data=tidy(get_nice($url));
            open CACHE, ">$cachefile";
            print CACHE $data;
            close CACHE;
        } else {
            open CACHE, $cachefile;
            my @data = <CACHE>;
            close CACHE;
            $data = join("", @data);
        }
    } else {
        $data=tidy(get_nice($url));
    }

    if (not defined $data) {
        die "could not fetch $url, aborting\n";
    }
    local $SIG{__WARN__} = sub {
        warn "$url: $_[0]";
    };

    # parse the page to a document object
    my $tree = HTML::TreeBuilder->new();
    $tree->parse($data);

    # Find the date headers on the page
    my @headers=();
    my @table4 = $tree->look_down("_tag"=>"table", "ID" => "Table4");
    foreach my $elem (@table4) {
        my $header = $elem->parent()->parent();
        if (lc($header->tag()) eq 'td') {
            $header = $header->parent();
        }
        push @headers, $header;
    }
    t 'found ' . (d scalar @headers) . ' days ';

    # Find all the siblings of these headers
    foreach my $elem (@headers) {
        my $tmp = $elem->look_down('_tag' => 'td', 'class' => 'srch-head-mid')->look_down('_tag' => 'b');
        my $date = $tmp->as_text();
        t 'Date: '.(d $date);
        my ($time, $title, $rating);
        $tmp = $elem->right();
        my ($prev_r, $r, $prev_time);

        while (defined $tmp) {
            my $row;
            
            if (!defined $tmp || $tmp eq ' ') {
                last;
            }
            $row = $tmp->look_down('_tag' => 'table', 'id' => 'Table3');
            if (defined $row) {
                my @vals = $row->look_down('_tag' => 'b');
                ($time, $title) = split(/\s+/, $vals[0]->as_text(), 2);
                $rating = $vals[1]->as_text();
                $title =~ s/\s+$//g;
                $rating =~ s/\s+$//g;

                t "Time: $time, Title: $title, Rating: $rating";
            }

            $row = $tmp->look_down('_tag' => 'td', 'style' => 'padding:4px;');
            if (defined $row) {
                my $desc = $row->as_text();
                $desc =~ s/\s+$//g;
                $desc =~ s/^\s+//g;

                my $start = gen_start_time($date, $time, $now);

                # Try to get full title from description if title seems cut off
                if ($title =~ /\.\.\.$/ ) {
	       			$title =~ s/\.\.\.$//g;
		      		# Try get full title from description;
			     	if ($desc =~ /^'(${title}[^\.]+[^\'])'?\.\s+(.+)/) {
			       		t "REMAPPING TITLE from $title to $1";
    					$title = $1;
    					$desc = $2;
	      				t "New desc = $desc";
				    }
                }

    			my $subtitle = undef;
    			my $year = undef;
    			my $actors = undef;
    			my $director = undef;
    			my $writers = undef;       # Unused right now
    			my $commentators = undef;  # Unused right now
    			
    			if ($desc =~ /^'([^\.\']+)'?\.\s+(.+)/) {
    				$subtitle = $1;
    				$desc = $2;
    				t "FOUND EPISODE TITLE: $subtitle";
    				t "Title: $title";
    				t "New desc = $desc";
    			}
    			
    			if ($desc =~ /(.*\.)\s+([^\.]* [A-Z][^\.]*)\.\s+\(([0-9]{4})\)\s*([^\.]*)\.*$/) {
    #				print "desc = $desc\n";
    #				print "Actors = $2\n";
    #				print "Year = $3\n";
    #				print "Director = $4\n";
    				$desc = $1;
    				$actors = $2;
    				$year = $3;
    				$director = $4;
    				if (defined $actors) {
    					$actors =~ s/^\s+//g;
    					$actors =~ s/\s+$//g;
                        my @a = split(/,\s+/, $actors);
                        $actors = [];
    					foreach my $a (@a) {
                            push @$actors, $a;
                        }
    				}
    				$director = undef if $director =~ /^\s*$/;
				
    				if (defined $director && $director eq "B&W") {
    					$desc = "$desc (B&W)";
    					$director = undef;
    				}
    				
    			}

                $r = undef;
                $r->{title} = [[$title]];
                $r->{'sub-title'} = [[$subtitle]] if $subtitle;
                $r->{rating} = [[$rating, "DSTV"]];
                $r->{start} = $start;
                $r->{channel} = "$chanid.dstv.com";
                $r->{desc} = [[$desc]];
                
                # credits
                my %c;
                $c{director} = [ $director ] if $director;
                $c{actor} = $actors if $actors;
            	$c{writer} = $writers if $writers;
                $c{commentator} = $commentators if $commentators;
                $c{director} = [ $director ] if $director;
                $r->{date} = $year if $year;

                $r->{credits} = \%c if %c;
                
                if (defined $prev_r) {
                    $prev_r->{stop} = $start;

                    my $write = 1;

                    my ($hr, $min) = split(/:/, $time);
                    my ($prev_hr, $prev_min) = split(/:/, $prev_time);
                    if ($name eq 'M-Net' && $opt_opentime) {
                        if ($hr <= 17) { # $prev ends before 17:00
                            $write = 0;
                        }
                        if ($prev_hr >= 19) { # $prev starts after 19:00
                            $write = 0;
                        }
                    }
                    if ($write == 1) {
                        $writer->write_programme($prev_r);
                    }
                }
                $prev_time = $time;
                $prev_r = $r;

            }

            $row = $tmp->look_down('_tag' => 'table', 'id' => 'Table4');
            if (defined $row) {
                last;
            } 

            $tmp = $tmp->right();
        }
    }
}

# get channel listing
sub get_channels {
    my $bar;


    $bar = new XMLTV::ProgressBar('getting list of channels', 1)
      if not $opt_quiet;
    my %channels;
    my $url="http://www.dstv.com/dstv-guide/default.asp?Page=Schedules";
    my $local_data=get_nice($url);
    die "could not get channel listing $url, aborting\n"
      if not defined $local_data;

    # FIXME commonize this
    local $SIG{__WARN__} = sub {
        warn "$url: $_[0]";
    };
    local $SIG{__DIE__} = sub {
        die "$url: $_[0]";
    };
    
    my $tree = HTML::TreeBuilder->new();
    $tree->parse($local_data);

    # all channel elements are options inside the ChannelID select

    my @option_list = $tree->look_down(_tag => 'select',
                                        id => 'ChannelID',
                                        name => 'ChannelID');
    my @options = map { $_->look_down(_tag => 'option') } @option_list;

    foreach my $opt (@options) {
        my $optid = $opt->attr('value');
        my $text = $opt->as_text();
        my ($chanid, $name) = split(/\s+\-\s+/, $text, 2);
        $channels{$chanid} = {'name' => $name, 'id' => $chanid, 'optid', => $optid };
    }

    die "no channels could be found" if not keys %channels;
    update $bar if not $opt_quiet;
    $bar->finish() if not $opt_quiet;
    return %channels;
}

# Bump a YYYYMMDD date by one.
sub nextday {
    my $d = shift;
    my $p = parse_date($d);
    my $n = DateCalc($p, '+ 1 day');
    return UnixDate($n, '%Q');
}

sub mode_configure {

    XMLTV::Config_file::check_no_overwrite($config_file);
    %channels = get_channels();

    open(CONF, ">$config_file") or die "cannot write to $config_file: $!";

    # Ask about each channel.
    my @chs = sort {uc($channels{$a}->{'name'}) cmp uc($channels{$b}->{'name'})} keys %channels;
    my @qs = map { "add channel '$channels{$_}->{name}'?" } @chs;
    my @want = ask_many_boolean(1, @qs);
    foreach (@chs) {
        my $w = shift @want;
            warn("cannot read input, stopping channel questions"), last
              if not defined $w;

            # Print a config line, but comment it out if channel not wanted.
            print CONF '#' if not $w;
            my $name = $channels{$_}->{'name'};
            my $optid = $channels{$_}->{'optid'};
            print CONF "channel $_ $optid $name\n";
    }

    close CONF or warn "cannot close $config_file: $!";
    say("Finished configuration.");

    exit();
}

sub init_cookies {


    my $bar = new XMLTV::ProgressBar('Initialising cookies', 1)
      if not $opt_quiet;
    get_nice('http://www.dstv.com');
    update $bar if not $opt_quiet;
    $bar->finish() if not $opt_quiet;

}

sub gen_start_time {
    my ($date, $time, $now) = @_;
    
    # Date = 'Friday 23 May'
    # Time = '14:00';
    # str2time sometimes gets the wrong year
    # Append the current year to the date
    # If we are in Nov or Dec, reading for Jan or Feb, year++
    my $year = (localtime($now))[5] + 1900;
    my $mon = (localtime($now))[4] + 1;
    if (($mon == 11 || $mon == 12) && ($date =~ /(January|February)/)) {
        $year++;
    }
    my $timestamp = UnixDate("$date $year $time", "%s");
#	my $timestamp = str2time("$date $year $time");
	return POSIX::strftime("%Y%m%d%H%M", localtime($timestamp));
}
