#!/usr/bin/perl
#
#   MailScanner - SMTP E-Mail Virus Scanner
#   Copyright (C) 2001  Julian Field
#
#   $Id: mailscanner,v 1.41 2002/03/25 13:52:52 jkf Exp $
#
#   This program is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software
#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
#   The author, Julian Field, can be contacted by email at
#      Jules@JulianField.net
#   or by paper mail at
#      Julian Field
#      Dept of Electronics & Computer Science
#      University of Southampton
#      Southampton
#      SO17 1BJ
#      United Kingdom
#

# Main program....

use strict;
use POSIX;

# Needed for Sys::Syslog, as Debian Potato (at least) doesn't
# appear to have "gethostname" syscall as used (indirectly) by Sys::Syslog
# So it uses `hostname` instead, which it can't do if PATH is tainted.
# It's good to have this anyway, although we may need to modify it for
# other OS when we find that something we need isn't here -- nwp 14/01/02
$ENV{PATH}="/sbin:/bin:/usr/sbin:/usr/bin";
# Needed for -T: delete $ENV{'BASH_ENV'}; # Don't run things on bash startup

# Remember to update this before producing new version of MailScanner
$Config::MailScannerVersion = '3.13';

# Add my directory onto the front of the include path
my($dir,$basename);
$dir = $0;
# can't use s/// as it doesn't untaint $dir -- nwp 14/01/02 (just)
$dir =~ m#^(.*)/([^/]+)$#;
$dir = $1;
$basename = $2;
unshift @INC, $dir;

require 'logger.pl';
#require 'sendmail.pl';
require 'workarea.pl';
require 'explode.pl';
require 'sweep.pl';
require 'info.pl';
require 'config.pl';
#require 'mta-specific.pl';
require 'disinfect.pl';
require 'lock.pl';

# Specify config file location on command line if you like
$Config::ConfigFile = $ARGV[0] if defined $ARGV[0];

# Start Syslog-ing
Log::Start($basename);
Log::InfoLog("MailScanner E-Mail Virus Scanner version $Config::MailScannerVersion starting.");
Config::ReadConfig($Config::ConfigFile);
require 'mta-specific.pl';
require 'sendmail.pl';
umask 0077; # Set nice and safe to no-one else can access anything!

# Verify versions of MIME-tools, as they often cause problems.
#
# These version numbers are what come in the MIME-tools v5.410 package,
# which I (nwp) use.
my %mime_required = (
		     Parser	=> "5.406",
		     Entity	=> "5.404",
		     Tools	=> "5.410",
		     Words	=> "5.404",
		     Head	=> "5.403",
		     Decoder	=> "5.403",
		     Body	=> "5.403",
);

foreach (keys %mime_required) {
    no strict 'refs';
    my $varname = "MIME::". ucfirst lc($_) ."::VERSION";
    defined $$varname or next;
    my $module_version = $$varname;
    $module_version >= $mime_required{$_} or
      Log::DieLog("FATAL: Newer MIME-tools module needed: MIME::$_ is only $module_version-- $mime_required{$_} required");
}
{
    no strict 'refs';
    my $varname = "Mail::SpamAssassin::VERSION";
    Log::DieLog("FATAL: Newer Mail::SpamAssassin module needed: Mail::SpamAssassin is only $Mail::SpamAssassin::VERSION-- 2.1 required")
      if defined $Mail::SpamAssassin::VERSION && 
         $Mail::SpamAssassin::VERSION<"2.1";
}

# Become a daemon and save our PID
if ($Config::Debugging) {
  print STDERR "In Debugging mode, not forking...\n";
  # Get current debugging flag, and invert it:
  my $current = config MIME::ToolUtils 'DEBUGGING';
#  config MIME::ToolUtils DEBUGGING => !$current;
} else {
  fork && exit;
  setsid();
}

# Tried to set [u,g]id after writing pid, but then it fails when it re-execs itself.
# Using the posix calls because I don't want to have to bother to find out what
# happens when "$< = $uid" fails (i.e. not running as root). Yet.
if ($Config::RunAsUser ne "") {
  my $uid = getpwnam($Config::RunAsUser);
  Log::InfoLog("ECS MailScanner setting UID to $Config::RunAsUser ($uid)");
  POSIX::setuid($uid) or Log::DieLog("Can't set UID $uid");
}

if ($Config::RunAsUser ne "") {
  my $gid = getgrnam($Config::RunAsGroup);
  Log::InfoLog("ECS MailScanner setting GID to $Config::RunAsGroup ($gid)");
  POSIX::setgid($gid) or Log::DieLog("Can't set GID $gid");
}


$> = $<;
$) = $(;

local(*PID);
open(PID, ">$Config::PidFile")
  or Log::DieLog("Cannot write pid file $Config::PidFile, %s", $!);
print PID "$$\n";
close PID;
#undef *PID;

# Must do this after all the "require-ing" so it cannot be done in config.pl
# Check the incoming and outgoing queues are on the same device
my($indevice, $outdevice);
chdir($Config::InQueueDir);
$indevice = (stat('.'))[0];
chdir($Config::OutQueueDir);
$outdevice = (stat('.'))[0];
Log::DieLog("$Config::InQueueDir and $Config::OutQueueDir must be on the same filesystem/partition!")
  unless $indevice == $outdevice;

# Clean up the entire outgoing sendmail queue in case I was
# killed off half way through processing some messages.
my $CleanUpList = Sendmail::ListWholeQueue($Config::InQueueDir);
Sendmail::ClearOutQueue($CleanUpList, $Config::OutQueueDir);

# Restart periodically, and handle time_t rollover in the year 2038
my($StartTime, $RestartTime);
$StartTime = time;
$RestartTime = $StartTime + $Config::RestartEvery;
while (time>=$StartTime && time<$RestartTime) {
  # Clear up the work area
  ClearWorkArea($Config::SrcDir);

  # Find all the messages to work on and write the header files
  # for all the messages I will need to scan.
  my(@DefinitelyClean, @Unscanned, @MessagesIn, %MessagesInfo,
     %Headers, $Msgs, $Bytes);
  Sendmail::FindMessagesToProcess($Config::InQueueDir, $Config::SrcDir,
                                  \@DefinitelyClean, \@Unscanned, \@MessagesIn,
                                  \%MessagesInfo, \%Headers, \$Msgs, \$Bytes);

  # Lookup the hosts that sent us all these messages in all the RBL's
  # to try to detect spam.
  my(%SpamInfo); # Map Message ID --> Spam Text
  %SpamInfo = ();
  Sendmail::FindSpammers(\%MessagesInfo, \%Headers, \%SpamInfo)
    if $Config::CheckSpam && (@MessagesIn || @DefinitelyClean);

  ## Deliver spam unless we are told not to
  #if ($Config::DeleteSpam) {
  #  my(@spamIds);
  #  @spamIds = keys %SpamInfo;
  #  Sendmail::DeleteIds(\@spamIds, $Config::InQueueDir);
  #  # Also need to remove the Ids from every array and hash we use later :-(
  #}

  #my($spamkey);
  #print "Spam Information:\n";
  #foreach $spamkey (sort keys %SpamInfo) {
  #  print "ID $spamkey\t$SpamInfo{$spamkey}\n";
  #}
  #print "\n";

  # Immediately deliver messages I have chosen not to scan because
  # the user doesn't want their domain scanned
  my(%EmptyHash);
  %EmptyHash = ();
  Sendmail::MoveToOutgoingQueue(\@Unscanned, 'unscanned', \%EmptyHash,
                                $Config::InQueueDir, $Config::OutQueueDir)
    if @Unscanned;

  # Immediately deliver messages I don't need to scan
  Sendmail::MoveToOutgoingQueue(\@DefinitelyClean, 'unused', \%SpamInfo,
                                $Config::InQueueDir, $Config::OutQueueDir)
    if @DefinitelyClean;

  # Wait and go round again if nothing to scan
  sleep(30), next unless @MessagesIn;

  # Construct directory hierarchy for the message attachments
  BuildInDirs($Config::SrcDir, \@MessagesIn);

  # Save all the attachments into files. Fill a list with any unparsable
  # messages, and return a hash of MIME entities comprising all the parsable
  # messages.
  # 24/5/01 Must explode all the TNEF attachments too, returning a ref to a
  # list of TNEF messages. Messages are either all TNEF or not at all.
  # Each value of %IsTNEF should be a ref to the %MimeEntities entry for the
  # appropriate winmail.dat file.
  my(%MimeEntities, $Start, $Duration, @Unparsable, @BadTNEF, %IsTNEF);
  $Start = time;
  %MimeEntities = ExplodeMessages($Config::InQueueDir, $Config::SrcDir,
                                  \@Unparsable, \%IsTNEF, \@BadTNEF,
                                  @MessagesIn);
  #DumpSkeletons(\%MimeEntities);

  # Check all the attachments for viruses and work out
  # the cleanliness of each message.
  my($InfectionReports, %InfectionTypes, @CleanIds, @DirtyIds);
  $InfectionReports = Sweep::VirusScan($Config::SrcDir, \%InfectionTypes);
  # Write infection reports for all the unparsable messages
  Sweep::LogUnparsable($InfectionReports, \%InfectionTypes, \@Unparsable);
  # Write infection reports for all the unparsable TNEF attachments
  Sweep::LogBadTNEF($InfectionReports, \%InfectionTypes, \@BadTNEF, $Config::SrcDir);
  $Duration = time - $Start;
  Log::InfoLog("Scanned $Msgs messages, $Bytes bytes in $Duration seconds");

  # Use the list of messages in the queue and the infection reports,
  # to produce 2 lists of messages: those definitely clean and those dirty.
  Sweep::CleanAndDirty($InfectionReports, \@MessagesIn,
                       \@CleanIds, \@DirtyIds);
  #Sweep::PrintSummary($InfectionReports, \@CleanIds, \@DirtyIds);

  # 24/5/01 Make these structures use the %IsTNEF information, so they are
  # still totally correct. Rollocks. This doesn't of course walk the dir
  # structure, it walks the MIME structure which contains no information
  # about the files comprising the TNEF attachment. So I still need to
  # handle TNEF messages specially in the code below this.
  my(%NumParts, $File2Entity, %Entity2Parent, %Entity2File);
  $File2Entity = EntitiesInfo(\%MimeEntities, \%NumParts,
                              \%Entity2Parent, \%Entity2File);
  #PrintParts(\%NumParts);
  #PrintFilenames($File2Entity);
  #PrintInfectedSections($InfectionReports, $File2Entity);
  #PrintParents(\%Entity2Parent);

  # Replace all the infected attachments with a text file.
  # For infections applying to the whole message (e.g. we could not parse it),
  # replace the entire message with a new body addressed to the same recipients.
  # 24/5/01 Must also pick up all the TNEF infected messages and remove the
  # entire winmail.dat attachment from each one.
  # 28/9/01 Must not attempt to disinfect bad TNEF messages, they will report
  # as not having a virus in them!
  Disinfect($InfectionReports, \%InfectionTypes, \%MimeEntities,
            $File2Entity, \%Entity2Parent, \%Entity2File, \%IsTNEF);

  # Quarantine the infected attachments
  QuarantineInfections($InfectionReports, $Config::QuarantineDir,
                       $Config::SrcDir, $Config::InQueueDir)
    if $Config::QuarantineAction =~ /store/i;

  # Deliver all the cleaned up messages into the outgoing queue and read
  # the original recipient list for each one in the process
  if ($Config::SignCleanMessages) {
    # Sign all the clean messages with the inline signature
    SignCleanMessages(\@CleanIds, \%MimeEntities);
    # And deliver them, from their MIME entity structures
    Sendmail::DeliverIds(\@CleanIds, \%MimeEntities, \%MessagesInfo,
                         \%SpamInfo, $Config::InQueueDir,
                         $Config::OutQueueDir, 'clean');
  } else {
    Sendmail::MoveToOutgoingQueue(\@CleanIds, \%MimeEntities, \%SpamInfo,
                                  $Config::InQueueDir, $Config::OutQueueDir);
  }
  if ($Config::DeliverToRecipients) {
    Sendmail::DeliverIds(\@DirtyIds, \%MimeEntities, \%MessagesInfo,
                         \%SpamInfo, $Config::InQueueDir,
                         $Config::OutQueueDir, 'dirty');
  } else {
    Sendmail::DeleteIds(\@DirtyIds, $Config::InQueueDir);
  }

  # Tell the message senders about the infections.
  # Need to send different messages depending on the %InfectionTypes.
  Sendmail::WarnSenders(\@DirtyIds, \%MessagesInfo, $InfectionReports, \%InfectionTypes)
    if $Config::TellSenders;
  # And notify postmaster about them too
  Sendmail::WarnLocalPostmaster(\@DirtyIds, \%MessagesInfo,
                                $InfectionReports, \%Headers)
    if $Config::TellLocalPostie;

  # Now attempt to disinfect the infected attachments and deliver them.
  # Only do this if they want disinfected messages delivered *and*
  # they want virus scanning to happen at all. Cannot disinfect anything
  # without virus scanning enabled.
  DisinfectAndDeliver($InfectionReports, \@CleanIds, \@DirtyIds, \@BadTNEF, \%MessagesInfo)
    if $Config::DeliverDisinfected && $Config::VirusScanning;
  last if $Config::Debugging;
}

# I have done a large number of virus scanning runs now,
# so kill and re-run myself.
unless ($Config::Debugging) {
  Log::DebugLog("About to re-exec myself: exec args ($0), (" . @ARGV . "), cwd is (" . `pwd` . ")...");
  # Don't want to leave connections to 514/udp open...
  Log::Stop();
  # But we may need to log here...
  exec $0, @ARGV or do {
    my $ExecError = $!;
    Log::Start($basename);
    Log::DieLog("Could not re-exec myself: $ExecError");
  };

}
