# Googlematic::Search
# (c) 2002 Matt Webb <matt@interconnected.org> All rights reserved
#
# Searches Google using the SOAP interface and Google's WSDL service
# description.
#
# Googlematic::Search runs as a single session under POE. It could just be a
# state for every Responder, but this way there's a central place to monitor
# how many searches are made.
#
# To ensure this session remains running, there's a main loop which resets
# the search counter every hour.
#
# Search requests are always posted into the search state. There are three
# possibilities from there:
# - successful search
# - unsuccessful search because the search limit has been reached
# - unsuccessful search for an unknown reason
# Each of these three corresponds to a Responder state -- information is sent
# back to the relevant state, proxied via the IM session.
#
# Search requests are throttled by the counter. The counter is checked and
# incremented by the search state; it's reset hourly by the reset state.


package Googlematic::Search;

use strict;
use POE::Session;
use XML::Parser;
use SOAP::Lite;

# workaround because the 1999 schema implementation incorrectly doesn't
# accept "true" and "false" for boolean values.
*SOAP::XMLSchema1999::Deserializer::as_boolean =
    *SOAP::XMLSchemaSOAP1_1::Deserializer::as_boolean = 
    \&SOAP::XMLSchema2001::Deserializer::as_boolean;


# _start
# - Starts the counter reset and holding loop states
# - Instantiates the SOAP::Lite object with the Google WSDL
sub _start {
    my ($kernel, $heap, $session) = @_[KERNEL, HEAP, SESSION];
    
    # Let other sessions refer to this one as 'google'
    my $status = $kernel->alias_set("google");

    $heap->{soap} = SOAP::Lite->service('file:./GoogleSearch.wsdl');
    
    # Set a timer to reset the counter. Happens hourly.
    # Also keeps the session running and open
    $kernel->yield("reset");

}


# reset
# - Reset the search counter every hour
# - This also keeps the session open and running
sub reset {
    my ($kernel, $heap, $session) = @_[KERNEL, HEAP, SESSION];
    
    $heap->{counter} = 0;
    $kernel->delay("reset", "3600");
}


# search($interface, $buddy, $query)
# - All search requests from Responders are incoming here
# - Increments the search counter and performs the Google search
# - If the counter is too high, tells the Responder via the proxy
# - If there's an error, tells the Responder via the proxy
# - Returns the entire results set back to the Responder via the proxy
sub search {
    my ($kernel, $heap, $session) = @_[KERNEL, HEAP, SESSION];

    # Google API key goes here
    my $key = $Googlematic::CONFIG->{google_key};
    
    # interface and buddy_name are needed to send information back to the
    # Responder. Posting information back has to happen via the proxy
    # state of the 'im' session
    my $interface = $_[ARG0];
    my $buddy = $_[ARG1];
    
    my $query = XML::Parser::Expat->xml_escape( $_[ARG2] );

    # Increment the search count. The default of 35 per hour allows for
    # 840 searches per day, if this script is continuously run.
    ++$heap->{counter};
    if($heap->{counter} > $Googlematic::CONFIG->{searches_per_hour}) {
	$kernel->post("im", "proxy", $interface, $buddy, "limit");
	return;
    }

    # Log this search
    print "Doing search for <$query>\n";

    my $result;
    # eval() incase SOAP::Lite dies
    eval { $result = $heap->{soap}
	   -> doGoogleSearch(
			     $key,                   # key
			     $query,                 # search query
			     0,                      # start results
			     10,                     # max results
			     "false",                # filter: boolean
			     "",                     # restrict (string)
			     "false",                # safeSearch: boolean
			     "",                     # lr
			     "latin1",               # ie
			     "latin1"                # oe
			     );
       };

    if($@ || !$result) {
	# There's been an error! If there are many errors it might be a
	# problem with the SOAP connection to Google and we want to avoid
	# spamming them, so bump the counter so we reach the search limit
	# sooner.
	$heap->{counter} += 10;
	$kernel->post("im", "proxy", $interface, $buddy, "error");
	# Log the search error.
	print STDERR "Search error: $@\n";
	return;
    }
    
    $kernel->post(
		  # Successful search! Return the entire results set
		  "im", "proxy", $interface, $buddy, "remember", $result->{resultElements}
		  );
    
}


return 1;
