#!/usr/bin/perl -w
#
# Written by Sylvain MAURIN <maurin@isc.cnrs.fr>
# CeCILL licence
# http://www.cecill.info/licences/Licence_CeCILL_V2-en.html
#
# Debian 3.1 : depends on
# libmail-box-perl (CPAN Mail::Box)
# libemail-foldertype-perl (CPAN Email::FolderType)
# libconfig-simple-perl (CPAN Config::Simple)

##### Script conf ##### 
my $DEBUG=3;
my $sahost="aragorn.isc.cnrs.fr";
my $maildir="Maildir";
my $spamdir=".spam-poubelle"; # Beware about dot begun on mail-sub-dirs !
my $ignoredir=".spam-probable";
#######################

use strict;
use User::pwent;
use Email::FolderType qw[folder_type];
use Mail::Box::Manager;
use Config::Simple;

sub scanspam(@);

my %uniq_path;
my $mgr = Mail::Box::Manager->new;
my $cfg = Config::Simple->new;
my %Config;

if ( -f '/etc/adduser.conf')
{
 $cfg->read('/etc/adduser.conf');
 %Config = $cfg->vars();
 if ($DEBUG)
 {
  foreach my $ConfigKey (keys(%Config))
  {
   print "Config:" . $ConfigKey . ":" . $Config{"$ConfigKey"} . "\n";
  }
 }
}
else
{
 %Config=("default.FIRST_UID",10000,"default.LAST_UID",20000);
}

#MAIN
#
while (my $pwent = getpwent())
{
 my $folderpath=$pwent->dir . "/$maildir/";
 my $uservalidation=$pwent->dir . "/ISC-antispam" ;
 if
 (
  (not(defined($uniq_path{$pwent->dir}))) &&
  ($Config{"default.FIRST_UID"} <= $pwent->uid) &&
  ($pwent->uid <= $Config{"default.LAST_UID"}) &&
  ($pwent->name eq "maurin" ) &&
  (-r $folderpath ) &&
  (-f $uservalidation ) &&
  (folder_type($folderpath) eq "Maildir")
 )
 {
  $uniq_path{$pwent->dir}=$pwent->dir;
  printf("*" x 70 . "\n" . "MAIN:folderpath:%s\n",$folderpath) if $DEBUG ; 
  scanspam($pwent,"init");
  opendir(DIR,"$folderpath");
  my @names = readdir(DIR);
  closedir(DIR);
  foreach my $name (@names) 
  {
   next if ($name eq "..");  # skip the parent  directory entry
   next if ($name !~ /\..+/) ; # All sub maildirs begin by dot 
   next if ($name =~ /$spamdir/);  # skip the spam dir
   next if ($name =~ /$ignoredir/);  # skip the ignored directory
   if (-d ($folderpath . "/" . $name ))
   {
    scanspam($pwent,"ham",$folderpath . "/" . $name);    
    next;                  
   }
  }
  scanspam($pwent,"spam", $folderpath . "/$spamdir/");
 }
 unlink($uservalidation);
}
endpwent();

# Spam Assassin learning
#
sub scanspam(@)
{   
 my $pwent = shift;
 my $learn_type = shift;
 my $folderpath = shift;
 printf("SCANSPAM : learn_type : %s\n",$learn_type) if $DEBUG;
 if ( $learn_type eq "init" )
 {
  print "SA INIT BAYESIAN FILTER\n" if $DEBUG;
  my $cmd="su - " . $pwent->name . " -s /bin/sh -c 'sa-learn --backup' >" . $pwent->dir . "/.SpamAssassinDB-backup";
  print $cmd . "\n" if $DEBUG;
  my $log=`$cmd`;
  print "sa-learn:$log" if ($log=~/ERROR/);
  $cmd="su - " . $pwent->name . " -s /bin/sh -c 'sa-learn --clear'";
  print $cmd . "\n" if $DEBUG;
  $log=`$cmd`;
  print "sa-learn:$log" if ($log=~/ERROR/);
  return 1;
 }
 if (( -d $folderpath ) && (folder_type($folderpath) eq "Maildir" ))
 {
  my $folder = $mgr->open($folderpath , access=>'r');
  my @msg = $folder->messages('ACTIVE');
  print "Mail folder $folderpath contains ", scalar @msg, " messages:\n" if $DEBUG;
  my $counter  = 1;
  foreach my $message (@msg)
  {
   printf("%d.", $counter++) if $DEBUG>1;
   if ($learn_type eq "spam")
   {
    if
    (
     (defined($message->get('X-Spam-Flag'))) &&
     (defined($message->get('X-Spam-Checker-Version'))) &&
     ($message->get('X-Spam-Flag') eq 'YES') &&
     ($message->get('X-Spam-Checker-Version') =~ /$sahost/)
    )
    {
     print "PREVIOUSLY DETECTED:" if $DEBUG>2;
     my $part=($message->parts)[1];
     if ($part->get('Content-Type') eq 'message/rfc822')
     {
      my ($tmpmsg,$msg_raw,$log,$cmd);
      my @msg_txt;
      $tmpmsg = '/tmp/SA-learn-msg.' . time();
      if (not(open(MSG,"+>$tmpmsg"))) 
      {
       print STDERR "can not open $tmpmsg\n";
       return 1;
      }
      chown($pwent->uid,$pwent->gid,$tmpmsg);
      chmod 0600, $tmpmsg;
      print MSG $part->string;
      close(MSG);
      $msg_raw="";
      open(MSG, '>', \$msg_raw);
      print MSG $part->string;
      @msg_txt=split(/\n/,$msg_raw);
      close(MSG);
      my $headerchecked;
      printf("SA LEARN SPAM:") if $DEBUG;
      for my $line (@msg_txt)
      {
       if (($line =~ /^Subject: (.+)/) && (not(defined($headerchecked))))
       {
        printf("**%s**:", $1 || '<no subject>' ) if $DEBUG>2 ;
        $headerchecked=1;
       }
      }
      $cmd="su - " . $pwent->name . " -s /bin/sh -c 'sa-learn --spam " . $tmpmsg ."'";
      print $cmd . ":" if $DEBUG;
      $log=`$cmd`;
      chomp($log);
      print "$log:" if $DEBUG;
      unlink($tmpmsg);
     }
    }
    else
    {
     my ($tmpmsg,$log,$cmd);
     $tmpmsg = '/tmp/SA-learn-msg.' . time();
     if (not(open(MSG,"+>$tmpmsg")))
     {
      print STDERR "can not open $tmpmsg\n";
      return 1;
     }
     chown($pwent->uid,$pwent->gid,$tmpmsg);
     chmod 0600, $tmpmsg;
     print MSG $message->string;
     close(MSG);
     printf("SA LEARN SPAM:") if $DEBUG;
     printf("**%s**:", $message->get('Subject')) if $DEBUG>2;
     $cmd="su - " . $pwent->name . " -s /bin/sh -c 'sa-learn --spam " . $tmpmsg ."'";
     print $cmd . ":" if $DEBUG;
     $log=`$cmd`;
     chomp($log);
     print "$log:" if $DEBUG;
     unlink($tmpmsg);
    }
   }
   if ($learn_type eq "ham")
   {
     my ($tmpmsg,$msg_raw,$log,$cmd,$oldmask);
     my @msg_txt;
     $tmpmsg = '/tmp/SA-learn-msg.' . time();
     if (not(open(MSG,"+>$tmpmsg")))
     {
      print STDERR "can not open $tmpmsg\n";
      return 1;
     }
     chown($pwent->uid,$pwent->gid,$tmpmsg);
     chmod 0600, $tmpmsg;
     print MSG $message->string;
     close(MSG);
     printf("SA LEARN HAM:") if $DEBUG;
     printf("**%s**:", $message->get('Subject')) if $DEBUG>2;
     $cmd="su - " . $pwent->name . " -s /bin/sh -c 'sa-learn --ham " . $tmpmsg ."'";
     print $cmd . ":" if $DEBUG;
     $log=`$cmd`;
     chomp($log);
     print "$log:" if $DEBUG;
     unlink($tmpmsg);
   }
   print "SCAN DONE\n" if $DEBUG>1;
  }
  $folder->close(write => 'NEVER');
  return 1;
 }
 print STDERR "$folderpath not a maildir\n";
 return 1;
}

