#!/bin/bash

#
# Train spamassassin global bayes filter
#
# Intended to be run daily, just put this script
# into /etc/cron.daily with rwxr-xr-x permissions.
#
# Will only learn from files touched in the last three days.
#
# Suggested usage: have users move messages to "spambox" and "hambox" mail folders
# (e.g. via IMAP) in their $HOME/mail directory
# and then rotate those files monthly after they are learned from
# (e.g. to SpamAssassin-Spam-YYYY-MM and SpamAssassin-Ham-YYYY-MM) so that your
# corpus is retained in case you need to review it or retrain Bayes from scratch
# for some reason.
#
# (C) 2006 John Hardin <jhardin@impsec.org>
# Master copy at http://www.impsec.org/~jhardin/antispam/spamassassin.procmail
# Licensed under the GPL v2 or later.
# Contact the author for commercial licensing.
#

#DEBUG=" --debug-level all,rules,info,plugin "
unset DEBUG

# learn from folders in user home dirs
#: echo "Learning from user local mailboxes"
for SPAM in `find /home/*/[Mm]ail -type f \( -name SpamAssassin-SPAM* -or -name spambox \) -mtime -3`
do
	if [ -s "$SPAM" ]
	then
		echo SPAM from $SPAM
		MBTYPE="--mbox"
		if [ "`file $SPAM | grep ' MBX mail '`" ]
		then
			MBTYPE="--mbx"
		fi
		/usr/bin/sa-learn --spam -C /etc/mail/spamassassin $MBTYPE $DEBUG $SPAM
	fi
done
echo
for HAM in `find /home/*/[Mm]ail -type f \( -name SpamAssassin-HAM* -or -name hambox \) -mtime -3`
do
	if [ -s "$HAM" ]
	then
		echo HAM from $HAM
		MBTYPE="--mbox"
		if [ "`file $HAM | grep ' MBX mail '`" ]
		then
			MBTYPE="--mbx"
		fi
		/usr/bin/sa-learn --ham -C /etc/mail/spamassassin $MBTYPE $DEBUG $HAM
	fi
done

# Report status
echo
echo "Bayes Statistics:"
/usr/bin/sa-learn --dump magic

# If you are autolearning, you may want to run a manual expiry here
#/usr/bin/sa-learn --force-expire -C /etc/mail/spamassassin

# Paranoia - ensure database is readable
chmod a+r /etc/mail/spamassassin/bayes_seen /etc/mail/spamassassin/bayes_toks