#! /usr/bin/perl
#* ============================================================
# * File        : operacast.pl
# * Version     : 0.1
# * Author      : Eric
# * Date        : 2006-09-11
# * Description: parser for http://www.operacast.com/opstations.htm
# *
# *
# *  
# * ============================================================ */

use English;
use XML::DOM;
use HTML::Entities;

#------------------------------------------------------------------------------
# Init
#------------------------------------------------------------------------------

&read_parse();    # get commandline parameters into @in
$source = $in[0]; # source filename from command line

my $doc = XML::DOM::Document->new;
my $head = $doc->createXMLDecl ('1.0');
my $root = $doc->createElement('items');

sub newNode
{
  local $name  = shift;
  local $value = shift;
  local $node = $doc->createElement($name);
  local $text = $doc->createTextNode($value);
  $node->appendChild($text);
  
  return $node;
}

#------------------------------------------------------------------------------
# read file into $data
#------------------------------------------------------------------------------

$datafile = $source;
open( INFO, "<$datafile" );      # Open file for reading
undef $/;
$data = <INFO>;                 # Read all
close(INFO);

#------------------------------------------------------------------------------
# Parse playlist
#------------------------------------------------------------------------------


# <a name="auditorium"></a><a href="http://abc.net.au/classic/" target="home">ABC
#          CLASSIC FM</a></font></h2>

#        <p><a href="http://abc.net.au/classic/" target="home"><font face="Arial, Helvetica, sans-serif" size="-1"><b>Sydney, 
#          AUSTRALIA</b></font></a></p>
#      </TD>
#      <TD VALIGN=top WIDTH="279" HEIGHT=78><font color="#FFCC99">.</font></TD>
#      <TD VALIGN=top WIDTH=360 HEIGHT=78><font face="Arial, Helvetica, sans-serif" size="-1"><b><FONT COLOR="#333333" SIZE="-2"><A HREF="http://www.abc.net.au/classic/audio/streaming_wmp.htm"><FONT SIZE="-1">96K 
#        stereo Windoiws Media Player 9</FONT></A><BR>
#        <font face="Verdana, Arial, Helvetica, sans-serif">suitable for 112K Dual 
#        ISDN connections and higher and scaled for 56K and 28K connections</font><BR>
#        </FONT><FONT FACE="Arial, Helvetica, sans-serif" SIZE="-1"><B><FONT COLOR="#333333" SIZE="-2"><A HREF="112K%20Dual%20ISDN%20connections%20and%20higher%20and%20scaled%20for%2056K%20and%2028K%20connections%5D"><FONT SIZE="-1">96K 
#        G2 Real Player 8</FONT></A><BR>

#        <font face="Verdana, Arial, Helvetica, sans-serif">suitable for </font></FONT><font face="Verdana, Arial, Helvetica, sans-serif" size="-1"><FONT COLOR="#333333" SIZE="-2">112K 
#        Dual ISDN connections and higher and scaled for 56K and 28K connections</FONT></font></B></FONT><BR>
#        <A HREF="http://www.abc.net.au/streaming/classic/classicfm.m3u">128K stereo 
#        MP3</A><BR>
#        <font color="#333333" size="-2" face="Verdana, Arial, Helvetica, sans-serif">suitable 
#        for 256K broadband connecntions and higher</font></b></font></TD

sub dump_lines
{
  # get rid of newlines
  $data =~ s/\n/ /g;
  # multiple spaces
  $data =~ s/\s\s+/ /g;
  # remove all up to STATION NAME column name
  $data =~ s/^.*STATION\sNAME//g;
  # newline for <tr  (stations are listed in table rows)
  $data =~ s/<TR/\n/gi;
  # remove some formatting
  $data =~ s/<\/?FONT[^>]*>//gi;
  $data =~ s/<\/?B[^>]*>//gi;
  
  @lines = split ( "\n", $data);
  
  foreach $line(@lines)
  {

    @cells = split ( "<TD", $line);

    $name = "";
    
    if (@matches = ( $cells[1] =~ m/<a\shref[^>]+>([^<]*)<\/a>/gi ) )
    {
      $name = @matches[0] . " - " . @matches[1];
    }

    
    if ( ($name ne "") && (@matches = ( $cells[3] =~ m/<a\shref="([^"]+"[^>]*>[^<]+)<\/a>/gi ) ) )
    {
      
      foreach $url(@matches)
      {

        $url =~ m/(.*)"[^>]*>(.*)/;
        
        $url = $1;
        $url_name = $2;
        
        $item = $doc->createElement('item');
        $root->appendChild($item);

        $idname = decode_entities($name . " ($url_name)");
        
        $item->appendChild( newNode('name', "$idname") );
        $item->appendChild( newNode('url', $url) );
        $item->appendChild( newNode('descr', "$idname") );
        $item->appendChild( newNode('handler', "default") );
      
      }

    }
  
  }
}

#------------------------------------------------------------------------------
# search url's in $data and place them in special format
#------------------------------------------------------------------------------

&dump_lines();

print $head->toString;
print $root->toString;
print "\n";

#--------------------------------------------------------------------------------
# get command line parameters
#--------------------------------------------------------------------------------

sub read_parse 
{
  local (*in) = @_ if @_;
  local ($i);
  push(@in, @ARGV);
  foreach $i (0 .. $#in) { $in[$i] =~ s/\+/ /g;}
  return scalar(@in);
}



