#!/usr/bin/perl -w use strict; use Data::Dumper; use Mail::Util (); use Mail::Header (); use constant DEBUG => 0; use constant TOP_TESTS => 30; my $STATS = { autolearn => { spam => 0, ham => 0 }, checked_by => {}, count => 0, hits => { min=>1000, max=>-1000 }, msgid => { min=>'', max=>'' }, is_spam => { Yes => 0, No => 0 }, miss => 0, tests => {}, }; sub current_spambox { my ($mon, $year) = (localtime)[4,5]; return sprintf "$ENV{HOME}/.PTTR/spam-%04d%02d", $year+1900, $mon+1; } sub show_count { my $cnt = sprintf "%06d", $STATS->{count}; $cnt =~ s/(?:^| )0/ /g; print "\b"x6, $cnt; } my $SCORE_SET = 3; my @SCORE_FILES = ( '/usr/share/spamassassin/50_scores.cf', '/etc/mail/spamassassin/local.cf', "$ENV{HOME}/.spamassassin/user_prefs" ); my $SCORES = (); sub get_scores { foreach (@SCORE_FILES) { open(my $fh, $_) or next; while(<$fh>) { next unless /^score\s+(\S+)\s+(.+?)\s*$/; my ($rule, $scores) = ($1, $2); my @sc = split /\s+/, $scores; next unless @sc; $SCORES->{$rule} = $sc[$SCORE_SET] || $sc[0]; } close $fh; } } sub spam_stats { my $INBOX = shift || return; return unless -f $INBOX; my $hd = Mail::Header->new; print "Using $INBOX (" . (-s $INBOX) . ' bytes) ... ' . (' 'x6); foreach (Mail::Util::read_mbox($INBOX)) { $STATS->{count}++; show_count; $hd->empty; $hd->extract($_); $hd->unfold('X-Spam-Status'); $hd->unfold('X-Spam-Check-By'); $hd->unfold('X-Spam-Checker-Version'); my $msgid = $hd->get('Message-ID'); my $ckdby = $hd->get('X-Spam-Check-By') || $hd->get('X-Spam-Checker-Version') || 'none'; for ($ckdby, $msgid) { s/\s+$//; s/^\s+//; s/^.+\s+//; } local $_ = $hd->get('X-Spam-Status'); unless ($_) { # message was NOT checked by SpamAssassin! $STATS->{miss}++; next; } else { s/,\s+/,/g; /^(yes|no),/i and $STATS->{is_spam}{$1}++; /\bversion=(\S+)/ and do { my $sa_version = $1; $STATS->{checked_by}{$ckdby}{$sa_version}{messages}++; /\brequired=(\S+)/ and $STATS->{checked_by}{$ckdby}{$sa_version}{reqhits} = $1; }; /\bautolearn=(\S+)/ and $STATS->{autolearn}{$1}++; /\bhits=(\S+)/ and do { ($STATS->{hits}{min}, $STATS->{msgid}{min}) = ($1, $msgid) if $1 < $STATS->{hits}{min}; ($STATS->{hits}{max}, $STATS->{msgid}{max}) = ($1, $msgid) if $1 > $STATS->{hits}{max}; }; /tests=(\S+)/ and do { $STATS->{tests}{$_}++ foreach split /,/, $1; }; } } print $/; print Dumper($STATS) if DEBUG; } $| = 1; get_scores; my @INBOX = @ARGV ? @ARGV : (current_spambox); spam_stats($_) for @INBOX; ### REPORT! my $checked = $STATS->{count} - $STATS->{miss}; print <{count} messages. It claimed that $STATS->{is_spam}{Yes} are spam and $STATS->{is_spam}{No} are ham. In the process it auto-learnt from $STATS->{autolearn}{spam} spam and $STATS->{autolearn}{ham} ham messages. Record hits were: maximum: $STATS->{hits}{max} ($STATS->{msgid}{max}) minimum: $STATS->{hits}{min} ($STATS->{msgid}{min}) REPORT print((TOP_TESTS ? 'Top ' . TOP_TESTS : 'All'), ' matching tests:', $/); my $st = TOP_TESTS || scalar keys %{$STATS->{tests}}; foreach (sort {$STATS->{tests}{$b} <=> $STATS->{tests}{$a}} keys %{$STATS->{tests}}) { last if $st-- <= 0; printf "%6d\t%s (%s)\n", $STATS->{tests}{$_}, $_, $SCORES->{$_}; } print 'Checks were done by:', $/; foreach my $host (sort keys %{$STATS->{checked_by}}) { if (scalar keys %{$STATS->{checked_by}{$host}} == 1) { my ($ver, $nfo) = %{$STATS->{checked_by}{$host}}; print "\t$host: $nfo->{messages} messages,", " (version $ver, required_hits: $nfo->{reqhits})\n"; } else { print "\t$host:\n"; while (my ($ver,$nfo) = each %{$STATS->{checked_by}{$host}}) { print "\t\t$nfo->{messages} messages,", " (version $ver, required_hits: $nfo->{reqhits})\n"; } } }