mailcow-dockerized/data/Dockerfiles/rspamd/sa_trivial_convert.lua
2019-12-23 10:21:44 +01:00

444 lines
12 KiB
Lua

local fun = require "fun"
local rspamd_logger = require "rspamd_logger"
local util = require "rspamd_util"
local lua_util = require "lua_util"
local rspamd_regexp = require "rspamd_regexp"
local ucl = require "ucl"
local complicated = {}
local rules = {}
local scores = {}
local function words_to_re(words, start)
return table.concat(fun.totable(fun.drop_n(start, words)), " ");
end
local function split(str, delim)
local result = {}
if not delim then
delim = '[^%s]+'
end
for token in string.gmatch(str, delim) do
table.insert(result, token)
end
return result
end
local function handle_header_def(hline, cur_rule)
--Now check for modifiers inside header's name
local hdrs = split(hline, '[^|]+')
local hdr_params = {}
local cur_param = {}
-- Check if an re is an ordinary re
local ordinary = true
for _,h in ipairs(hdrs) do
if h == 'ALL' or h == 'ALL:raw' then
ordinary = false
else
local args = split(h, '[^:]+')
cur_param['strong'] = false
cur_param['raw'] = false
cur_param['header'] = args[1]
if args[2] then
-- We have some ops that are required for the header, so it's not ordinary
ordinary = false
end
fun.each(function(func)
if func == 'addr' then
cur_param['function'] = function(str)
local addr_parsed = util.parse_addr(str)
local ret = {}
if addr_parsed then
for _,elt in ipairs(addr_parsed) do
if elt['addr'] then
table.insert(ret, elt['addr'])
end
end
end
return ret
end
elseif func == 'name' then
cur_param['function'] = function(str)
local addr_parsed = util.parse_addr(str)
local ret = {}
if addr_parsed then
for _,elt in ipairs(addr_parsed) do
if elt['name'] then
table.insert(ret, elt['name'])
end
end
end
return ret
end
elseif func == 'raw' then
cur_param['raw'] = true
elseif func == 'case' then
cur_param['strong'] = true
else
rspamd_logger.warnx(rspamd_config, 'Function %1 is not supported in %2',
func, cur_rule['symbol'])
end
end, fun.tail(args))
-- Some header rules require splitting to check of multiple headers
if cur_param['header'] == 'MESSAGEID' then
-- Special case for spamassassin
ordinary = false
elseif cur_param['header'] == 'ToCc' then
ordinary = false
else
table.insert(hdr_params, cur_param)
end
end
cur_rule['ordinary'] = ordinary and (not (#hdr_params > 1))
cur_rule['header'] = hdr_params
end
end
local function process_sa_conf(f)
local cur_rule = {}
local valid_rule = false
local function insert_cur_rule()
if not rules[cur_rule.type] then
rules[cur_rule.type] = {}
end
local target = rules[cur_rule.type]
if cur_rule.type == 'header' then
if not cur_rule.header[1].header then
rspamd_logger.errx(rspamd_config, 'bad rule definition: %1', cur_rule)
return
end
if not target[cur_rule.header[1].header] then
target[cur_rule.header[1].header] = {}
end
target = target[cur_rule.header[1].header]
end
if not cur_rule['symbol'] then
rspamd_logger.errx(rspamd_config, 'bad rule definition: %1', cur_rule)
return
end
target[cur_rule['symbol']] = cur_rule
cur_rule = {}
valid_rule = false
end
local function parse_score(words)
if #words == 3 then
-- score rule <x>
return tonumber(words[3])
elseif #words == 6 then
-- score rule <x1> <x2> <x3> <x4>
-- we assume here that bayes and network are enabled and select <x4>
return tonumber(words[6])
else
rspamd_logger.errx(rspamd_config, 'invalid score for %1', words[2])
end
return 0
end
local skip_to_endif = false
local if_nested = 0
for l in f:lines() do
(function ()
l = lua_util.rspamd_str_trim(l)
-- Replace bla=~/re/ with bla =~ /re/ (#2372)
l = l:gsub('([^%s])%s*([=!]~)%s*([^%s])', '%1 %2 %3')
if string.len(l) == 0 or string.sub(l, 1, 1) == '#' then
return
end
-- Unbalanced if/endif
if if_nested < 0 then if_nested = 0 end
if skip_to_endif then
if string.match(l, '^endif') then
if_nested = if_nested - 1
if if_nested == 0 then
skip_to_endif = false
end
elseif string.match(l, '^if') then
if_nested = if_nested + 1
elseif string.match(l, '^else') then
-- Else counterpart for if
skip_to_endif = false
end
table.insert(complicated, l)
return
else
if string.match(l, '^ifplugin') then
skip_to_endif = true
if_nested = if_nested + 1
table.insert(complicated, l)
elseif string.match(l, '^if !plugin%(') then
skip_to_endif = true
if_nested = if_nested + 1
table.insert(complicated, l)
elseif string.match(l, '^if') then
-- Unknown if
skip_to_endif = true
if_nested = if_nested + 1
table.insert(complicated, l)
elseif string.match(l, '^else') then
-- Else counterpart for if
skip_to_endif = true
table.insert(complicated, l)
elseif string.match(l, '^endif') then
if_nested = if_nested - 1
table.insert(complicated, l)
end
end
-- Skip comments
local words = fun.totable(fun.take_while(
function(w) return string.sub(w, 1, 1) ~= '#' end,
fun.filter(function(w)
return w ~= "" end,
fun.iter(split(l)))))
if words[1] == "header" then
-- header SYMBOL Header ~= /regexp/
if valid_rule then
insert_cur_rule()
end
if words[4] and (words[4] == '=~' or words[4] == '!~') then
cur_rule['type'] = 'header'
cur_rule['symbol'] = words[2]
if words[4] == '!~' then
table.insert(complicated, l)
return
end
cur_rule['re_expr'] = words_to_re(words, 4)
local unset_comp = string.find(cur_rule['re_expr'], '%s+%[if%-unset:')
if unset_comp then
table.insert(complicated, l)
return
end
cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
if not cur_rule['re'] then
rspamd_logger.warnx(rspamd_config, "Cannot parse regexp '%1' for %2",
cur_rule['re_expr'], cur_rule['symbol'])
table.insert(complicated, l)
return
else
handle_header_def(words[3], cur_rule)
if not cur_rule['ordinary'] then
table.insert(complicated, l)
return
end
end
valid_rule = true
else
table.insert(complicated, l)
return
end
elseif words[1] == "body" then
-- body SYMBOL /regexp/
if valid_rule then
insert_cur_rule()
end
cur_rule['symbol'] = words[2]
if words[3] and (string.sub(words[3], 1, 1) == '/'
or string.sub(words[3], 1, 1) == 'm') then
cur_rule['type'] = 'sabody'
cur_rule['re_expr'] = words_to_re(words, 2)
cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
if cur_rule['re'] then
valid_rule = true
end
else
-- might be function
table.insert(complicated, l)
return
end
elseif words[1] == "rawbody" then
-- body SYMBOL /regexp/
if valid_rule then
insert_cur_rule()
end
cur_rule['symbol'] = words[2]
if words[3] and (string.sub(words[3], 1, 1) == '/'
or string.sub(words[3], 1, 1) == 'm') then
cur_rule['type'] = 'sarawbody'
cur_rule['re_expr'] = words_to_re(words, 2)
cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
if cur_rule['re'] then
valid_rule = true
end
else
table.insert(complicated, l)
return
end
elseif words[1] == "full" then
-- body SYMBOL /regexp/
if valid_rule then
insert_cur_rule()
end
cur_rule['symbol'] = words[2]
if words[3] and (string.sub(words[3], 1, 1) == '/'
or string.sub(words[3], 1, 1) == 'm') then
cur_rule['type'] = 'message'
cur_rule['re_expr'] = words_to_re(words, 2)
cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
cur_rule['raw'] = true
if cur_rule['re'] then
valid_rule = true
end
else
table.insert(complicated, l)
return
end
elseif words[1] == "uri" then
-- uri SYMBOL /regexp/
if valid_rule then
insert_cur_rule()
end
cur_rule['type'] = 'uri'
cur_rule['symbol'] = words[2]
cur_rule['re_expr'] = words_to_re(words, 2)
cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
if cur_rule['re'] and cur_rule['symbol'] then
valid_rule = true
else
table.insert(complicated, l)
return
end
elseif words[1] == "meta" then
-- meta SYMBOL expression
if valid_rule then
insert_cur_rule()
end
table.insert(complicated, l)
return
elseif words[1] == "describe" and valid_rule then
cur_rule['description'] = words_to_re(words, 2)
elseif words[1] == "score" then
scores[words[2]] = parse_score(words)
else
table.insert(complicated, l)
return
end
end)()
end
if valid_rule then
insert_cur_rule()
end
end
for _,matched in ipairs(arg) do
local f = io.open(matched, "r")
if f then
rspamd_logger.messagex(rspamd_config, 'loading SA rules from %s', matched)
process_sa_conf(f)
else
rspamd_logger.errx(rspamd_config, "cannot open %1", matched)
end
end
local multimap_conf = {}
local function handle_rule(what, syms, hdr)
local mtype
local filter
local fname
local header
local sym = what:upper()
if what == 'sabody' then
mtype = 'content'
fname = 'body_re.map'
filter = 'oneline'
elseif what == 'sarawbody' then
fname = 'raw_body_re.map'
mtype = 'content'
filter = 'rawtext'
elseif what == 'full' then
fname = 'full_re.map'
mtype = 'content'
filter = 'full'
elseif what == 'uri' then
fname = 'uri_re.map'
mtype = 'url'
filter = 'full'
elseif what == 'header' then
fname = ('hdr_' .. hdr .. '_re.map'):lower()
mtype = 'header'
header = hdr
sym = sym .. '_' .. hdr:upper()
else
rspamd_logger.errx('unknown type: %s', what)
return
end
local conf = {
type = mtype,
filter = filter,
symbol = 'SA_MAP_AUTO_' .. sym,
regexp = true,
map = fname,
header = header,
symbols = {}
}
local re_file = io.open(fname, 'w')
for k,r in pairs(syms) do
local score = 0.0
if scores[k] then
score = scores[k]
end
re_file:write(string.format('/%s/ %s:%f\n', tostring(r.re), k, score))
table.insert(conf.symbols, k)
end
re_file:close()
multimap_conf[sym:lower()] = conf
rspamd_logger.messagex('stored %s regexp in %s', sym:lower(), fname)
end
for k,v in pairs(rules) do
if k == 'header' then
for h,r in pairs(v) do
handle_rule(k, r, h)
end
else
handle_rule(k, v)
end
end
local out = ucl.to_format(multimap_conf, 'ucl')
local mmap_conf = io.open('auto_multimap.conf', 'w')
mmap_conf:write(out)
mmap_conf:close()
rspamd_logger.messagex('stored multimap conf in %s', 'auto_multimap.conf')
local sa_remain = io.open('auto_sa.conf', 'w')
fun.each(function(l)
sa_remain:write(l)
sa_remain:write('\n')
end, fun.filter(function(l) return not string.match(l, '^%s+$') end, complicated))
sa_remain:close()
rspamd_logger.messagex('stored sa remains conf in %s', 'auto_sa.conf')