#!/usr/bin/perl # Copyright 2000-5 Vlado Keselj www.cs.dal.ca/~vlado sub help { print STDERR <<"#EOT" } # Report new material on a web page, version $VERSION # # Uses diff, lynx or wget, sendmail (if option -e is used) # # Usage: report-new.pl [switches] URL # -h Print help and exit. # -v Print version of the program and exit. # -e email Sends output, if not empty, to email. # -d method Dumping method. The default is lynx_txt; other option is # wget. #EOT use strict; use POSIX qw(strftime); use vars qw( $VERSION ); $VERSION = sprintf "%d.%d", q$Revision: 1.7 $ =~ /(\d+)/g; use Getopt::Std; use vars qw($opt_v $opt_h $opt_e $opt_d); getopts("hve:d:"); if ($opt_v) { print "$VERSION\n"; exit; } elsif ($opt_h || !@ARGV) { &help(); exit; } my $dump = \&dump_lynx_txt; if ($opt_d ne '') { if ($opt_d eq 'lynx_txt') { $dump = \&dump_lynx_txt } elsif($opt_d eq 'wget' ) { $dump = \&dump_wget } else { &help(); print STDERR "error: '-d $opt_d'"; exit -1; } } ($#ARGV==0 && $ARGV[0]=~/^http:\/\//) || die "Format: report-new.pl http://...\n"; my ($urlbase, $url); $urlbase = $url = shift; # E.g.: http://www.cs.dal.ca/~vlado/srcperl if ( $url =~ m.//[^/]*/. ) { $urlbase = $`.$& } # E.g.: http://www.cs.dal.ca/ my $urlId = &encode_w1($url); my $timestamp = strftime("%Y-%m-%d-%T", localtime(time)); if (! -d 'tmp') { mkdir 'tmp', 0700 or die "can't mkdir tmp: $!" } if (! -d 'report-new.pl.d') { mkdir 'report-new.pl.d', 0700 or die "can't mkdir report-new.pl.d: $!" } my $TmpBase = "tmp/$urlId-$timestamp"; my $TmpFile1 = "$TmpBase-1"; my $lastFile = "report-new.pl.d/$urlId.last"; -e $lastFile or putfile($lastFile,''); # First step: grab the page &$dump($url, $TmpFile1); my $material = getfile($TmpFile1); $material = `diff $TmpFile1 $lastFile 2>&1`; $material =~ s/^[^<].*\n//mg; $material =~ s/^< //mg; if ($material) { if ($opt_e) { my $out; open($out, "|sendmail -t") or die; print $out "To: $opt_e\n". "Subject: [report-new.pl] $url\n\n$material"; close($out); } else { print $material } } unlink($lastFile); rename($TmpFile1, $lastFile); sub putfile($@) { my $f = shift; local *F; open(F, ">$f") or die "putfile:cannot open $f:$!"; print F '' unless @_; while (@_) { print F shift(@_) } close(F) } sub encode_w1( $ ) { local $_ = shift; s/[\W_]/'_'.uc unpack("H2",$&)/ge; return $_; } sub dump_lynx_txt { my $url = shift; my $file = shift; local *F; open(F,"|lynx -dump -nolist -") or die "lynx error:$!"; print F $url; close(F); } sub dump_wget { my $url = shift; my $file = shift; system('wget', '--quiet', '-O', $file, $url); } sub getfile($) { my $f = shift; local *F; open(F, "<$f") or die "getfile:cannot open $f:$!"; my @r = ; close(F); return wantarray ? @r : join ('', @r); } __END__ =head1 NAME report-new.pl - Report new material on a web page =head1 SYNOPIS report-new.pl [switches] URL =head1 DESCRIPTION Reports new material on a web page. Typically used as a cron job with the -e option. -h Print help and exit. -v Print version of the program and exit. -e email Sends output, if not empty, to email. justify [input files] -d method Dumping method. The default is lynx_txt; other option is wget. =head1 PREREQUISITES POSIX qw(strftime); uses diff, lynx or wget, sendmail (if option -e is used). =head1 SCRIPT CATEGORIES Web =head1 README Reports new material on a web page. =head1 SEE ALSO Scripts: wget =head1 THANKS I would like to thank Peet Moris for bug reports and comments. =head1 COPYRIGHT Copyright 2000-5 Vlado Keselj F This script is provided "as is" without expressed or implied warranty. This is free software; you can redistribute it and/or modify it under the same terms as Perl itself. The latest version can be found at F. =cut