# Copyright (c) 1997 Robert White. All rights reserved. # Perl Artistic license applies. robert@bangkokwizard.com # this is snatch ver. 1.2 previous name websnatch.pl use strict; use LWP::UserAgent; use CGI qw(:standard :html3); my ($local,$s,@l,%tags,$item, %temp,@temp,$sub, $ua, @status,$file,$outfile, %cookie); my %sys=qw(logfile debug.log); my %lwp; my %sysIE; my %IE; my @dontwant; # items in here are not processed my $headers=0; # debug option to print out the headers my $debug=3; # 4 prints hash names only $sys{'file'}=$ENV{snatchfile} if $ENV{snatchfile}; $sys{'file'}=shift @ARGV if @ARGV[0]; $sys{'file'}='c:/perl/xml/config.xml' unless $sys{'file'}; open LOG,">$sys{'logfile'}" if $debug; Link: &check_page; @temp=parseconfig($sys{'file'},'item'); #get the items Items: foreach (@temp) { my $item=$_; options($item); foreach $s (@dontwant) { next Items if $s eq $item->{'name'}; } if ($debug>2){print STDERR 'SYS>',$item->{'name'}," $item->{'method'}\n";} if ($item->{'method'}=~ m/timer/i) { timer($item); } elsif($item->{'method'}=~ m/link\s+(.+)/i) { my @links; @links=@{$sys{'link'}} if (defined $sys{'link'}); push @links, split / /,$1; $sys{'link'}=[@links]; next; } elsif($item->{'method'}=~ m/code/i) { run_code($item),"\n"; } elsif($item->{'method'}=~ m/LWP/i) { doLWP($item); } else { my $t= do_sub($item),"\n"; print $t; } print STDERR "EVAL ERR: $@\n" if $@; } &sysvars if $debug>3; &checkfooter; if (defined $sys{'link'}) { #this is probably excessive my @links=@{$sys{'link'}}; $sys{'file'}=pop @links; undef @{$sys{'link'}}; undef $sys{'link'}; $sys{'link'}=[@links] if @links; undef $sys{'page'}; undef $lwp{'lurl'}; goto Link; } sub timer { my $h=shift; my ($p, %h, $n, $t ); return if not $h or not $h->{'method'}; my $t=Win32::GetTickCount; $_=$h->{'method'}; /timer\s+(\w+)(.*)/oi; my $n=$1?$1:return 0;; $p=$2; my $st="timer " . $n . ' start'; $p=' start' if not defined $sys{$st}; $n='timer ' . $n . $p; print STDERR $n,"\n"; if ($n=~/start/) { $sys{$n}=$t; } else { $sys{$n}=$t-$sys{$st}; } $p= do_sub($h); print $p; $t; } sub options { my $h=shift; return if not $h or not $h->{'options'}; $_=$h->{'options'}; if (/debug=(\d)/) { $sys{'debug'}=$1; } if (/lurl/) { $lwp{'lurl'}=1; } if (/headers/) { $lwp{'headers'}=1; } } sub sysvars { push @l,"%sys variables>\n"; foreach (sort keys %sys) { push @l,$_," = ",$sys{$_},"\n"; } $_=join' ',@l; push @l,"%sysIE variables\n"; foreach (sort keys %sysIE) { push @l,$_," = ",$sysIE{$_},"\n"; } $_=join' ',@l; print STDERR $_; } sub options { my $h=shift; return if not $h or not $h->{'options'}; $_=$h->{'options'}; if (/debug=(\d)/) { $sys{'debug'}=$1; } if (/lurl/) { $lwp{'lurl'}=1; } if (/headers/) { $lwp{'headers'}=1; } } sub check_page { my @page=parseconfig($sys{'file'},"page"); #page definition my $page=pop @page; return if not $page; options($page); $sys{'page'}=$page; $_=$page->{'dontwant'}; @dontwant=split if $_; $outfile = $page->{'outfile'}; #Open the output if ($outfile) { $sys{'outfile'}=$outfile; open OUT, ">$outfile" or die "died opening $sys{'outfile'}"; select OUT; } if($page->{'options'}!=~ m/nopage/i) { print STDERR "Page print\n"; $page->{'head'}=~ s/^$/$1/s; #clean off any comments print eval($page->{'head'}),"\n"; #print the header print STDERR "EVAL ERR: $@\n" if $@; if (defined $page->{'body'}) { $page->{'body'}=~ s/^$/$1/s; #start off the body print eval($page->{'body'}),"\n"; print STDERR "EVAL ERR: $@\n" if $@; } } } sub checkfooter { if (defined $sys{'page'}) { my $page= $sys{'page'}; my $pf=$page->{'footer'}; if ($pf) { $pf=~ s/^$/$1/s; print eval($pf),"\n"; #print footer } else { print ''; } close OUT; } } sub do_sub { # used for anything else my $temp=shift or die "no data"; my $sub=$temp->{'sub'}; $sub=~ s/^$/$1/s; #clean off any comments print STDERR "$temp->{'name'} using sub\n" if $debug>2; $s= eval($sub); } sub run_code { # and htmlify the output a bit my $temp=shift or die "no data"; $temp->{'sub'}=~ s/^$/$1/s; #clean off any comments my $code=$temp->{'sub'}; print STDERR "$temp->{'name'} using code\n" if $debug>1; $s= eval($temp->{'header'}) if $temp->{'header'}; print STDERR "EVAL ERR: $@\n$s" if $@; { local $\='
'; print "OUTPUT"; $s= eval($code); print STDERR "EVAL ERR: $@\n$s" if $@; print "
COMMENTS"; $s= eval($temp->{'comments'}) if $temp->{'comments'}; print STDERR "EVAL ERR: $@\n$s" if $@; } } ############### LWP section # file headers go to the LOG file # sub doLWP { my ($method); my $temp=shift or die "no data"; if (!$debug){print STDERR $temp->{'name'},"\n";}# show doing something $ua = new LWP::UserAgent if not $ua; $ua->env_proxy; #let environment set proxy my $useragent="BangkokPerl/1.0"; $ua->agent($useragent); $temp->{'sub'}=~ s/^$/$1/s; #clean off any comments if ($lwp{'lurl'}) {$lwp{'fetch'}=$temp->{'lurl'}; } else {$lwp{'fetch'}=$temp->{'url'}; } return if ($lwp{'lurl'} and not $lwp{'fetch'}); #some thing can't be local $method=$temp->{'method'}; if (!$method) { print do_sub($temp),"\n"; } else { # if ($debug){print STDERR $temp->{'name'}," $method\n";} $method=~ s/LWP\s+(.)/$1/; if (!$method or $method eq 'chunk') { print LWPchunk($temp),"\n"; } elsif ($method eq 'all') { print LWPall($temp),"\n"; } elsif($method=~ m/file/) { $temp->{'file'}=$method; $temp->{'file'}=~ s/^file=(.+)/$1/; print LWPtofile($temp),"\n"; } } print STDERR "EVAL ERR: $@\n" if $@; } sub LWPtofile { #note the sub has to open the file to do anything! my $temp=shift or die "no data"; my $sub=$temp->{'sub'}; print STDERR "$temp->{'name'} $temp->{'method'}\n" if $debug>1; my $req = new HTTP::Request 'GET' => $lwp{'fetch'}; my $res = $ua->request($req,$temp->{'file'}); print LOG "$temp->{'name'} Status: " . $res->status_line . "\n"; print STDERR "$temp->{'name'} $res\n"; if ($res->is_success) { $_=$res->content; return eval($sub); } else { print STDERR "Error: $temp->{'name'}" . $res->status_line . " $lwp{'fetch'}\n"; return "Error: $temp->{'name'}" . $res->status_line . " $lwp{'fetch'}\n"; } } sub LWPall { # good for short ones my $temp=shift or die "no data"; my $sub=$temp->{'sub'}; print STDERR "$temp->{'name'} $temp->{'method'} $lwp{'fetch'}\n" if $debug>1; my $method='GET'; $method='POST' if uc($temp->{'method'})=~ m/POST/; my $req=HTTP::Request->new($method, $lwp{'fetch'}); $req->content($temp->{'content'}) if $temp->{'content'}; my $res=$ua->request($req); if ($headers) { showheaders($temp,$res) } else { print LOG "$temp->{'name'} Status: " . $res->status_line . "\n"; } if ($res->is_success) { $_=$res->content; return eval($sub); } else { print STDERR "Error: $temp->{'name'}" . $res->status_line . " $lwp{'fetch'}\n"; return "Error: $temp->{'name'}" . $res->status_line . " $lwp{'fetch'}\n"; } } sub LWPchunk { #default, usually don't need whole thing my $temp=shift or die "no data"; my $sub=$temp->{'sub'}; my $head=!$headers; my $method="CHUNK"; print STDERR "$temp->{'name'} $method $lwp{'fetch'}\n" if $debug>1; my ($expected_length, $bytes_received); my $lastchunk=""; my $res=$ua->request(HTTP::Request->new('GET', $lwp{'fetch'}), sub { my($chunk, $res) = @_; if (!$head) { showheaders($temp,$res); $head++;} else { print LOG "$temp->{'name'} Status: " . $res->status_line . "\n" if $head==1; #just the first one } $bytes_received += length($chunk); unless (defined $expected_length) { $expected_length = $res->content_length || 0; } if ($expected_length) { printf STDERR "%d%% - ",100 * $bytes_received / $expected_length if $debug==2; } print STDERR "$bytes_received bytes received\n" if $debug; $_=$lastchunk=$lastchunk . $chunk; #our data can get chopped! die if ($s=eval($sub)); $s; }); if (!$res->is_success) { print STDERR "$temp->{'name'} Error: " . $res->status_line . " $lwp{'fetch'}\n"; $s= "$temp->{'name'} Error: " . $res->status_line . " $lwp{'fetch'}\n"; } return $s; } sub showheaders { #show the headers my ($temp,$res) =@_; print LOG "######## $temp->{'name'} $temp->{'method'} ########\n"; my $hd=$res->headers; $lwp{'headers'}=$hd; while (my ($k,$v) = each %$hd) { print LOG "$k\t$v\n"; } } #put commas in numbers sub commify { my $input = shift; $input = reverse $input; $input =~ s<(\d\d\d)(?=\d)(?!\d*\.)><$1,>g; $_ = reverse $input; } sub parseconfig { my ($s,$file,$element); $file = shift or die "no filename for parseconfig"; # we need a filename $element="item" if not $element=shift; # change name if you want { local $/; open CONFIG,"$file"; $s= ; close CONFIG; } return parseXML($s,$element); } sub parseXML { my ($s,$element) = @_; my ($k,$v,@l,%tags,%temp,@temp); # parse an XML config file - assumes a DTD like this # # we return a list of anonymous hashes, each with the above keys (without the ?s) # can add additional fields to the DTD, parser doesn't care # NOTE this won't handle nested items if ($s=~ m/\\s*(.*?)\\s*(.*)"; } } else { print "bad DTD? No $element $file"; return; } $_=$s; @l=m#<$element>(.+?)#sg; #get a list of elements foreach $s (@l) { my $temp={}; while (($k,$v) = each %tags) { $s=~ m/$v/s; $temp->{$k}=$2 if length $2; #print STDERR $temp->{$k},"\n"; } push @temp,$temp; #anonymous hash to list } return @temp if $debug<4; my $temp={}; for $temp (@temp) { while (my($k,$v) = each %$temp) { print "KEY $k\tVAL $v\n"; } } exit if $debug==4; } sub debug { my $temp=shift; print "-----keys------\n"; my $temp={}; while (my($k,$v) = each %$temp) { print "KEY $k\tVAL $v\n"; } exit if $debug==4; }