use strict;
use LWP::UserAgent;
my ($local,$s,@l,%tags,%temp,@temp,$sub, $ua, @status,$file,$outfile);
$file='c:/perl/xml/config.xml' unless $file=shift @ARGV;
my $lurl; #local url useful for testing
my $debug=1; # 2 gets hash names
my $headers=0; # debug option to print out the headers
my @dontwant; # items in here are not processed
my $useragent="BangkokPerl/1.0";
my $debuglog="debug.log";
open LOG,">$debuglog" if $debug;
{
my @page=parseconfig($file,"page");
my $page=$page[0];
$_=$page->{'options'}; #set options
if (/debug=(\d)/) { $debug=$1; }
print "debug $debug\n";
if (/lurl/) { $lurl=1; }
if (/headers/) { $headers=1; }
$_=$page->{'dontwant'};
if ($_) {@dontwant=split;}
@temp=parseconfig($file,'item'); #get the items
my $outfile = $page->{'outfile'};
if ($outfile) {
open OUT, ">$outfile" or die "died opening $file";
select OUT;
}
$page->{'head'}=~ s/^$/$1/s; #clean off any comments
print eval($page->{'head'}),"\n"; #print the header
$page->{'body'}=~ s/^$/$1/s; #start off the body
print eval($page->{'body'}),"\n";
foreach (@temp) { getdata($_); } #process each
$page->{'footer'}=~ s/^$/$1/s;
print eval($page->{'footer'}),"\n"; #print footer
exit;
}
sub getdata {
my ($k,$v,@l,%temp);
my $temp=shift or die "no data";
foreach (@dontwant) {
return if $_ eq $temp->{'name'};
}
if ($lurl) {$temp->{'fetch'}=$temp->{'lurl'}; }
else {$temp->{'fetch'}=$temp->{'url'}; }
$temp->{'sub'}=~ s/^$/$1/s; #clean off any comments
if (!$temp->{'method'} or$temp->{'method'} eq 'chunk') {
print retrievechunk($temp),"\n";
}
elsif ($temp->{'method'} eq 'all') {
print retrieveall($temp),"\n";
}
elsif($temp->{'method'}=~ m/file/) {
$temp->{'file'}=$temp->{'method'};
$temp->{'file'}=~ s/^file=(.+)/$1/;
print retrievetofile($temp),"\n";
}
0;
}
sub retrievetofile {
my $temp=shift or die "no data";
my $url=$temp->{'fetch'};
my $sub=$temp->{'sub'};
my $file=$temp->{'file'};
print STDERR "$temp->{'name'} $temp->{'method'}\n" if $debug;
$ua = new LWP::UserAgent if not $ua;
$ua->agent($useragent);
$ua->env_proxy; #let environment set proxy
$s="";
my $req = new HTTP::Request 'GET' => $url;
my $res = $ua->request($req,$file);
print LOG "$temp->{'name'} Status: " . $res->status_line . "\n";
print STDERR "$temp->{'name'} $res\n";
if ($res->is_success) {
$_=$res->content;
return eval($sub);
}
else {
print STDERR "Error: $temp->{'name'}" . $res->status_line . " $url\n";
return "Error: $temp->{'name'}" . $res->status_line . " $url\n";
}
}
sub retrieveall { # good for short ones
my $temp=shift or die "no data";
my $url=$temp->{'fetch'};
my $sub=$temp->{'sub'};
print STDERR "$temp->{'name'} $temp->{'method'} $url\n" if $debug;
$ua = new LWP::UserAgent if not $ua;
$ua->agent($useragent);
# $ua->env_proxy; #let environment set proxy
$s="";
my $res=$ua->request(HTTP::Request->new('GET', $url));
if ($headers) { showheaders($temp,$res) }
else { print LOG "$temp->{'name'} Status: " . $res->status_line . "\n";
}
if ($res->is_success) {
$_=$res->content;
return eval($sub);
}
else {
print STDERR "Error: $temp->{'name'}" . $res->status_line . " $url\n";
return "Error: $temp->{'name'}" . $res->status_line . " $url\n";
}
}
sub retrievechunk { #default, good if don't need whole thing
my $temp=shift or die "no data";
my $url=$temp->{'fetch'};
my $sub=$temp->{'sub'};
my $head=$headers;
my $method="CHUNK";
print STDERR "$temp->{'name'} $method $url\n" if $debug;
$ua = new LWP::UserAgent if not $ua;
$ua->agent($useragent);
# $ua->env_proxy; #let environment set proxy
my ($expected_length, $bytes_received);
my $lastchunk="";
$s="no data";
my $res=$ua->request(HTTP::Request->new('GET', $url),
sub {
my($chunk, $res) = @_;
if ($head) { showheaders($temp,$res); $head=0;}
else { print LOG "$temp->{'name'} Status: " . $res->status_line . "\n";
}
$bytes_received += length($chunk);
unless (defined $expected_length) {
$expected_length = $res->content_length || 0;
}
if ($expected_length) {
printf STDERR "%d%% - ",100 * $bytes_received / $expected_length if $debug==2;
}
print STDERR "$bytes_received bytes received\n" if $debug;
$_=$lastchunk=$lastchunk . $chunk; #our data can get chopped!
die if ($s=eval($sub));
# $s;
},1024);
if (!$res->is_success) {
print STDERR "$temp->{'name'} Error: " . $res->status_line . " $url\n";
$s= "$temp->{'name'} Error: " . $res->status_line . " $url\n";
}
return $s;
}
sub showheaders { #show the headers
my ($temp,$res) =@_;
print LOG "########$temp->{'name'} $temp->{'method'}########\n";
my $hd=$res->headers;
while (my ($k,$v) = each %$hd) {
print LOG "$k\t$v\n";
if ( $k eq 'content_type' and $v=~/ARRAY/) {
foreach ($v) { print $_,"\n"}
}
}
}
sub parseconfig {
my ($s,$k,$v,@l,%tags,%temp,@temp,$element);
my $file = shift or die "no filename for parseconfig"; # we need a filename
# parse an XML config file - assumes a DTD like this
#
$element="item" if not $element=shift; # change name if you want
# we return a list of anonymous hashes, each with the above keys (without the ?s)
# can add additional fields to the DTD, parser doesn't care
# note this won't handle nested items
{ local $/= undef;
open CONFIG,"$file";
$s= ;
close CONFIG; }
if ($s=~ m/\\s*(.*?)\\s*" . $_ . ">(.*)";
}
} else { die "bad DTD $file"; }
$_=$s;
@l=m#<$element>(.+?)$element>#sg; #get a list of elements
foreach $s (@l) {
my $temp={};
while (($k,$v) = each %tags) {
$s=~ m/$v/s;
$temp->{$k}=$2 if length $2;
}
push @temp,$temp; #anonymous hash to list
}
return @temp if ($debug < 3);
print "-----keys------\n";
my $temp={};
for $temp (@temp) {
while (($k,$v) = each %$temp) {
print "$k\t$v\n";
}
}
exit;
}