#!/usr/bin/perl ############################################################################### # # WHAT? # Processes "spamd" logs and prints a summary report # # HOW? # spamd-stats [-t ] [logfile ...] # # Options: # -t 0 = disable showing top tests # -t 1 = disable limitation, show ALL tests # # WHO? # Marius Feraru # ############################################################################### use strict; use warnings; our $VERSION = sprintf '0.%d.%d', '\$Revision: 1.0 $' =~ /(\d+)\.(\d+)/xm; use English qw( -no_match_vars ); use Getopt::Long qw(:config no_ignore_case bundling auto_version auto_help); use List::Util qw(max min); use Data::Dumper; # default options... my %opt = ( top_tests => 20 ); # wanna override them? Getopt::Long::GetOptions( \%opt, qw[ top_tests|t=i ], ); # log files to scan... my @log_files = @ARGV; # default.... use default "maillog" if ( !@log_files ) { push @log_files, '/var/log/maillog'; } # STATS structure my $STATS = { total => 0, # number of processed messages spam => 0, # number of spam messages scores => { # scores collection spam => [], ham => [], }, tests => {}, # stats by test learn => {}, # autolearn }; foreach my $logfile (@log_files) { process_log($logfile); } # Report: print 'Processed ', $STATS->{total}, ' messages, ', valp( $STATS->{spam}, $STATS->{total} ), ' spam.', "\n", 'Learned from: ', valp( $STATS->{learn}{ham}, $STATS->{total} ), ' ham, ', valp( $STATS->{learn}{spam}, $STATS->{total} ), ' spam.', "\n", 'Spam Scores: max: ', max( @{ $STATS->{scores}{spam} } ), ', min: ', min( @{ $STATS->{scores}{spam} } ), "\n", 'Ham Scores: max: ', max( @{ $STATS->{scores}{ham} } ), ', min: ', min( @{ $STATS->{scores}{ham} } ), "\n",; if ( $opt{top_tests} ) { if ( $opt{top_tests} > 0 ) { print 'Top ', $opt{top_tests}, q{ }; } print "Tests hit:\n"; my $idx = 1; for my $test ( sort { $STATS->{tests}{$b} <=> $STATS->{tests}{$a} } keys %{ $STATS->{tests} } ) { printf "%8d %s\n", $STATS->{tests}{$test}, $test; last if $opt{top_tests} > 0 && $idx++ >= $opt{top_tests}; } } # Value with percent attached sub valp { my ( $value, $total ) = @_; return sprintf '%d (%.2f%%)', $value, $total ? 100 * $value / $total : 0; } # decompressors: looks lazy, but it's FASTER than using any Compress:: # = RegExp is applied to file name my %DECOMPRESSORS = ( '/bin/zcat' => qr/\.(?i:gz)$/, '/usr/bin/bzcat' => qr/\.(?i:bz2)$/, ); # get a log filehandle... sub get_fh { my $logfile = shift; while ( my ( $dec, $rex ) = each %DECOMPRESSORS ) { next if $logfile !~ $rex; open my $fh, q{-|}, $dec, $logfile or die "Cannot decompress $logfile with $dec: $OS_ERROR\n"; return $fh; } open my $fh, q{<}, $logfile or die "Cannot open $logfile: $OS_ERROR\n"; return $fh; } # Log file processor... sub process_log { my $fh = get_fh(@_); while ( my $line = <$fh> ) { next if $line !~ m{ ^ .+? spamd: \s+ result: \s+ (\S) \s+ (-?\d+) \s+ - \s+ (\S+) \s+ (\S+) }xms; my ( $is_spam, $score, $flags, $stats ) = ( $1, $2, $3, $4 ); $is_spam = $is_spam eq 'Y' ? 1 : 0; $STATS->{total}++; if ($is_spam) { $STATS->{spam}++; push @{ $STATS->{scores}{spam} }, $score; } else { push @{ $STATS->{scores}{ham} }, $score; } for my $flag ( split /,/xm, $flags ) { $STATS->{tests}{$flag}++; } if ( $stats =~ /autolearn=([^,]+)/xm ) { $STATS->{learn}{$1}++; } } close $fh; return; }