package Net::Dict::Leo;


use 5.6.0;
use strict;
use warnings;


use vars qw( $VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS );

$VERSION = '0.09';


use base qw( Exporter );
use integer;

# Items to export into callers namespace by default. Note: do not export
# names by default without a very good reason. Use EXPORT_OK instead.
# Do not simply export all your public functions/methods/constants.
%EXPORT_TAGS = ( DEBUG => [qw( leo_translate leo_debug )] );
@EXPORT = qw( leo_translate );
@EXPORT_OK = qw( leo_debug :DEBUG );


use Carp;
use Encode;
use HTML::Entities;
use HTML::Parser 3.00;
use LWP::UserAgent;


sub leo_translate(%);
sub leo_debug(%);
sub get_request(%);
sub parse($);
sub urlencode($);


sub leo_translate( % )
{
    parse &get_request;
}


sub leo_debug( % )
{
    my $response = &get_request;
    $response->as_string;
}


sub get_request( % )
{
    # check parameters
    my %tmp = ();
    if ( $#_ ) {
	%tmp = @_;
	$tmp{relink} = $tmp{sectHdr} = "off";
	$tmp{tableBorder} = 0;
	# mono-/bidirectional search
	if ( exists $tmp{searchLoc} ) {
	    $tmp{searchLoc} = ($tmp{searchLoc}<=>0);
	}
	# spelling tolerance
	if ( exists $tmp{spellToler} and $tmp{spellToler} !~ /^(on|off|standard)$/ ) {
	    carp "Unrecognized spelling tolerance setting.  Using default.\n";
	    delete $tmp{spellToler};
	}
	# morphology search, no longer supported
	if ( exists $tmp{deStem} and $tmp{deStem} ) {
	    carp "Morphology search currently not supported by Leo.\n";
	    delete $tmp{deStem};
	}
	##if ( $tmp{deStem} !~ /none|standard|forcedAll/ ) {
	##    carp "Unrecognized morph setting.  Using default.\n";
	##    delete $tmp{deStem};
	##}
	# special characters tolerance
	if ( exists $tmp{cmpType} and $tmp{cmpType} !~ /^(fuzzy|exact|relaxed)$/ ) {
	    carp "Unrecognized special characters tolerance setting.  Using default.\n";
	    delete $tmp{cmpType};
	}
    }
    else {
	$tmp{search} = shift;
    }

    # build query string
    my $querystring = "";
    my %form = map { urlencode $_ } %tmp;
    $form{lang} = "en";
    $querystring .= "$_=$form{$_}&"
	foreach keys %form;
    chop $querystring;

    # create user agent
    my $useragent = new LWP::UserAgent;
    if ( defined $form{useragent} and $form{useragent} ) {
	agent $useragent $form{useragent};
    }
    $useragent->env_proxy();

    # request
    my $request = new HTTP::Request GET => "http://dict.leo.org/?$querystring";
    $request->content_type( "application/x-www-form-urlencoded" );
    request $useragent $request;
}


sub parse( $ )
{
    my $response = shift;
    my ($content) = $response->content =~ m#(^.*search result.*$)#m;
    $content = decode "iso-8859-15", $content;
    $content ||= "";
    my @words = ();
    my @structured_words = ();
    my $p = new HTML::Parser
	ignore_elements => [qw(head th)],
	report_tags => [qw(tr th td)],
	unbroken_text => 1,
	marked_sections => 1,
	handlers => [
	    default => [""],
	    start => [ sub {
		    my ($self, $tag, $attr) = @_;
		    $attr->{colspan} ||= 1;
		    lc $tag eq "td" and $attr->{colspan} < 2
			and $self->handler( default => sub {
			    $_ = decode_entities shift;
			    s#[\240\032]+# #go;
			    s#(?>\s+)# #go;
			    s#^\s##mo;
			    s#\s$##mo;
			    push @words, $_ if /\S/;
			}, "text" );
		}, "self, tagname, attr" ],
	    end => [ sub {
		    my $self = shift;
		    lc shift eq "td" and $self->handler( default => [""] );
		}, "self, tagname" ],
	    comment => [""],
	    declaration => [""]
	];
    $p->parse($content);

    push @structured_words, [ shift(@words), shift @words ]
	while @words;

    @structured_words;
}


sub urlencode( $ )
{
    $_ = shift;
    s#([^\w_-])#sprintf "%%%02x", ord $1#ge;
    $_
}


1;


__END__


=head1 NAME

Net::Dict::Leo - Interface to Leo's Online Dictionary

=head1 SYNOPSIS

  use Net::Dict::Leo;
  leo_translate(I<phrase>);
  leo_translate(I<complex_search>);

=head1 DESCRIPTION

Leo (L<http://dict.leo.org/>) offers an online English-German
bidirectional dictionary, which this module offers an interface for.

=over 8

=item B<leo_translate>(I<phrase>);

In its simplest form leo_translate accepts a word or phrase and
queries Leo for all english and german translations.  The return
value is a list of pairs (two-element arrays) of corresponding
english and german terms.

=item B<leo_translate>(I<complex_search>);

This function accepts a hash reference to specify parameters along
with the search.  As above, the return value is a list of pairs of
corresponding english and german terms.

Recognized hash keys include:

=over 8

=item search

The word or phrase you are searching for.  This is a must.

=item searchLoc

A signed integer denoting the direction of translation.  Positive
values are for "German to English", negative values for "English to
German".  The default value of 0 (zero) denotes bidirectional search.

=cut

#=item spellToler
#
#Indicates whether to extend the search to (orthografically)
#similar words.  The accepted values are:
#
#I<off>: Do not search for similar words.
#
#I<standard>: (Default) Only search for similar words if no or few
#results are returned from the original search.  The exact definition
#of "few" is hidden within Leo's internals.
#
#I<on>: Always search for similar words.
#
#=item deStem
#
#Indicates whether to extend the search to morphologic base forms
#of the word.  The accepted values are:
#
#I<none>: No attempt to find base forms.
#
#I<standard>: (Default) Only find base forms if no or few results
#are returned from the original search.  The exact meaning of "few"
#is hidden within Leo's internals.
#
#I<forcedAll>: Find base forms in any way.

=item cmpType

Determines how strictly the input of umlauts is handled.
The accepted values are:

I<exact>: Only E<Auml>, E<Ouml>, E<Uuml>, E<auml>, E<ouml>, E<uuml> are accepted.

I<relaxed>: (Default) In addition, the character sequences Ae, Oe,
Ue, ae, oe, ue, ss are accepted as well.

I<fuzzy>: Even the characters A, O, U, a, o, u are accepted for
the respective umlauts.

=item useragent

You can supply an optional user agent string if you don't like to
be identified as libwww/perl.

=back

=back

=head1 BUGS

The search is limited to the first result page of dict.leo.org equivalent to
100 results.  Even if there are more results you will not get any warning
message at this time.

If called with combinations of words, leo is only performing a logical OR
search, returning results which contain any one of the words.  However, results
with all of the words are returned first and thus, listed at the top.

No support for the German-French dictionary at dict.leo.org.

=head1 AUTHOR

Carsten Luckmann <I<leo@luckmann.name>>

=head1 SEE ALSO

perl(1), leo(1).

=cut
