{{{ =head1 NAME URIRegistrar - add message metadata indicating the registrar of URI domains =head1 SYNOPSIS loadplugin Mail::SpamAssassin::Plugin::URIRegistrar =head1 REQUIREMENT This plugin requires a working whois command. This plugin requires working GDBM. =cut package Mail::SpamAssassin::Plugin::URIRegistrar; use Mail::SpamAssassin::Plugin; use strict; use bytes; use GDBM_File; use vars qw(@ISA); @ISA = qw(Mail::SpamAssassin::Plugin); my $whois_cmd = "/usr/bin/whois -in"; my $regfile = "/etc/mail/spamassassin/URIRegistrar_Registrars"; my $dbfile = "/var/tmp/URIRegistrar_Domains.db"; my $TTL_sec = 60*60*24*7; # default TTL one week my $TTL_unk_sec = 60*15; # default "unknown" ttl 15 minutes (retry) undef my %domains; undef my %blacklist; # constructor: register the eval rule sub new { my $class = shift; my $mailsaobject = shift; # some boilerplate... $class = ref($class) || $class; my $self = $class->SUPER::new($mailsaobject); bless ($self, $class); $self->register_eval_rule("check_uriregistrar"); $self->set_config($mailsaobject->{conf}); return $self; } sub set_config { my($self, $conf) = @_; my @cmds = (); push (@cmds, { setting => 'registrar_whois_cmd', is_priv => 1, type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING, default => "/usr/bin/whois -in", code => sub { my ($self, $key, $value, $line) = @_; if ($value =~ /^$/) { return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE; } if ($value =~ /([^-_\w\s\/.:])/) { dbg("info: URIRegistrar registrar_whois_cmd contains illegal character \"$1\""); return $Mail::SpamAssassin::Conf::INVALID_VALUE; } (my $cmd) = $value =~ /^(\S+)/; if (! -f $cmd || ! -x $cmd) { dbg("info: URIRegistrar registrar_whois_cmd \"$cmd\" is not an executable file"); return $Mail::SpamAssassin::Conf::INVALID_VALUE; } $whois_cmd = $value; } }); push (@cmds, { setting => 'registrar_registrars_file', is_priv => 1, type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING, default => $regfile, code => sub { my ($self, $key, $value, $line) = @_; if ($value =~ /^$/) { return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE; } if (! -f $value || ! -r $value) { dbg("info: URIRegistrar registrar_registrars_file \"$value\" not found or not readable"); return $Mail::SpamAssassin::Conf::INVALID_VALUE; } $regfile = $value; } }); push (@cmds, { setting => 'registrar_cache_file', is_priv => 1, type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING, default => $dbfile, code => sub { my ($self, $key, $value, $line) = @_; if ($value =~ /^$/) { return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE; } if (-f $value && (! -r $value || ! -w $value)) { dbg("info: URIRegistrar registrar_cache_file \"$value\" insufficient permission"); return $Mail::SpamAssassin::Conf::INVALID_VALUE; } $dbfile = $value; } }); push (@cmds, { setting => 'registrar_ttl', is_priv => 1, type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC, default => 60*60*24*7, code => sub { my ($self, $key, $value, $line) = @_; if ($value =~ /^$/) { return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE; } if ($value < 60 || $value > 60*60*24*365) { return $Mail::SpamAssassin::Conf::INVALID_VALUE; } $TTL_sec = $value; } }); push (@cmds, { setting => 'registrar_unknown_ttl', is_priv => 1, type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC, default => 60*15, code => sub { my ($self, $key, $value, $line) = @_; if ($value =~ /^$/) { return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE; } if ($value < 60 || $value > 60*60*6) { return $Mail::SpamAssassin::Conf::INVALID_VALUE; } $TTL_unk_sec = $value; } }); $conf->{parser}->register_commands(\@cmds); } sub check_uriregistrar { my ($self, $pms, $rulename) = @_; if ($pms->{spamfriendly}) { $pms->test_log($pms->{SF_Domain}); return 1; } else { return 0; } } sub parsed_metadata { my ($self, $opts) = @_; my $pms = $opts->{permsgstatus}; undef my %domlist; $pms->{spamfriendly} = 0; foreach my $uri ($pms->get_uri_list()) { my $dom = my_uri_to_domain($uri); if ($dom) { dbg("debug: URIRegistrar URI domain $dom"); $domlist{$dom} = 1; } } foreach my $addr (($pms->get("EnvelopeFrom"), $pms->get("MESSAGEID"))) { next unless $addr =~ /\@/; $addr =~ s/^.*\@\.*//; $addr =~ s/[>.]*\s*$//; next unless $addr =~ /\./; my $dom = Mail::SpamAssassin::Util::RegistrarBoundaries::trim_domain($addr); if ($dom) { dbg("debug: URIRegistrar email domain $dom"); $domlist{$dom} = 1; } } return unless %domlist; my $sf_domain = ""; load_registrars() unless %blacklist; my $expired = 0; foreach my $domain (keys %domlist) { next if $domain =~ /\.(?:gov|edu)$/; # trust these dbg("debug: URIRegistrar: checking domain $domain"); my $timeout = $domains{$domain}; if (defined($timeout)) { dbg("debug: URIRegistrar: cache hit: $timeout"); if (abs($timeout) < time()) { # a domain timed out, time to clean the cache $expired = 1; } if ($timeout > 0) { $sf_domain = $domain; last; } } else { dbg("debug: URIRegistrar: performing whois lookup"); if (open(WHOIS, "${whois_cmd} '$domain' |")) { $timeout = time() + $TTL_sec; my $registrar = ""; while () { next unless /Registrar:/; chomp; s/^.*Registrar:\s*//; s/\s+[(]R[0-9]+-[A-Z]+[)]\s*$//; s/\s+$//; $registrar = "\U$_"; if ($registrar){ dbg("debug: URIRegistrar: registrar is \"$registrar\""); if (defined($blacklist{$registrar})) { dbg("debug: URIRegistrar: registrar is in blacklist"); $sf_domain = $domain; last; } } } close(WHOIS); if ($registrar){ unless ($sf_domain) { dbg("debug: URIRegistrar: registrar is not in blacklist"); $timeout = $timeout * -1; } } else { dbg("debug: URIRegistrar: registrar not determined via whois"); $timeout = time() + $TTL_unk_sec * -1; } $domains{$domain} = $timeout; } else { dbg("debug: URIRegistrar: whois failed: $!"); return; } } last if $sf_domain; # minimize whois calls if possible } if ($sf_domain) { dbg("debug: URIRegistrar: spam-friendly registrar was present"); $pms->{SF_Domain} = $sf_domain; $pms->{spamfriendly} = 1; } purge_cache() if $expired; return 1; } sub my_uri_to_domain { my ($uri) = @_; # Javascript is not going to help us, so return. return if ($uri =~ /^javascript:/i); $uri =~ s,#.*$,,gs; # drop fragment $uri =~ s#^[a-z]+:/{0,2}##gsi; # drop the protocol $uri =~ s,^[^/]*\@,,gs; # username/passwd $uri =~ s,[/\?\&].*$,,gs; # path/cgi params $uri =~ s,:\d+$,,gs; # port $uri =~ s/[.,]+$//; # trailing punctuation $uri =~ s/^[.,]+//; # leading punctuation return if $uri =~ /\%/; # skip undecoded URIs. # we'll see the decoded version as well return if $uri =~ /^\d+\.\d+\.\d+\.\d+$/; # Ignore raw-DQ URIs return if $uri !~ /\./; # Ignore URIs with no second-level domain return lc Mail::SpamAssassin::Util::RegistrarBoundaries::trim_domain($uri); } sub load_registrars { undef %blacklist if %blacklist; open (BL, "< $regfile") || die "URIRegistrar plugin can't read registrar file ${regfile}: $!"; while () { chomp; s/^\s+//; s/#.*$//; s/\|\s*\d+\s*$//; # scoring not supported in plugin s/\s+$//; if ($_) { my $registrar = "\U$_"; dbg("info: URIRegistrar adding \"$registrar\" to blacklist."); $blacklist{$registrar} = 1; } } close(BL); purge_cache(); } sub purge_cache { unless (%domains) { tie(%domains, 'GDBM_File', $dbfile, &GDBM_WRCREAT, 0644) || die "URIRegistrar plugin can't tie to domains cache ${dbfile}: $!"; } # purge hashes of expired entries my $purged = 0; my $cached = 0; while ((my $key,my $val) = each %domains) { if (abs($val) < time()) { # expired, forget it # next query will verify still with spammy registrar delete $domains{$key}; $purged++; } else { $cached++; } } dbg("info: URIRegistrar has $cached cached domains, $purged expired domains purged."); } sub dbg { Mail::SpamAssassin::dbg(@_); } 1; }}}