add basic command line parsing for expire + some streamlining

This commit is contained in:
holger krekel
2025-09-14 15:13:25 +02:00
parent ed7a70ba31
commit 6d3e690653
3 changed files with 153 additions and 92 deletions

View File

@@ -4,8 +4,8 @@ Expire old messages and addresses.
""" """
import os import os
import shutil
import sys import sys
from argparse import ArgumentParser
from datetime import datetime from datetime import datetime
from stat import S_ISREG from stat import S_ISREG
@@ -13,13 +13,20 @@ from chatmaild.config import read_config
class FileEntry: class FileEntry:
def __init__(self, relpath, mtime, size): def __init__(self, basedir, relpath, mtime, size):
self.basedir = basedir
self.relpath = relpath self.relpath = relpath
self.mtime = mtime self.mtime = mtime
self.size = size self.size = size
def __repr__(self): def __repr__(self):
return f"<FileEntry size={self.size} '{self.relpath}'>" return f"<FileEntry size={self.size} '{self.relpath}' >"
def __str__(self):
return self.get_path()
def get_path(self):
return joinpath(self.basedir, self.relpath)
def fmt_size(self): def fmt_size(self):
return f"{int(self.size/1000):5.0f}K" return f"{int(self.size/1000):5.0f}K"
@@ -49,8 +56,8 @@ class Stats:
def iter_mailboxes(self, callback=None): def iter_mailboxes(self, callback=None):
for name in os.listdir(self.basedir)[: self.maxnum]: for name in os.listdir(self.basedir)[: self.maxnum]:
if "@" in name: if "@" in name:
mailboxdir = joinpath(self.basedir, name) basedir = joinpath(self.basedir, name)
mailbox = MailboxStat(mailboxdir) mailbox = MailboxStat(basedir)
self.mailboxes.append(mailbox) self.mailboxes.append(mailbox)
if callback is not None: if callback is not None:
callback(mailbox) callback(mailbox)
@@ -59,8 +66,8 @@ class Stats:
class MailboxStat: class MailboxStat:
last_login = None last_login = None
def __init__(self, mailboxdir): def __init__(self, basedir):
self.mailboxdir = mailboxdir = str(mailboxdir) self.basedir = basedir = str(basedir)
# all detected messages in cur/new/tmp folders # all detected messages in cur/new/tmp folders
self.messages = [] self.messages = []
@@ -71,55 +78,58 @@ class MailboxStat:
self.totalsize = 0 self.totalsize = 0
# scan all relevant files (without recursion) # scan all relevant files (without recursion)
for name in os.listdir(mailboxdir): for name in os.listdir(basedir):
fpath = joinpath(mailboxdir, name) fpath = joinpath(basedir, name)
if name in ("cur", "new", "tmp"): if name in ("cur", "new", "tmp"):
for msg_name in os.listdir(fpath): for msg_name in os.listdir(fpath):
msg_path = joinpath(fpath, msg_name) msg_path = joinpath(fpath, msg_name)
st = os.stat(msg_path) st = os.stat(msg_path)
relpath = joinpath(name, msg_name) relpath = joinpath(name, msg_name)
self.messages.append( self.messages.append(
FileEntry(relpath, mtime=st.st_mtime, size=st.st_size) FileEntry(
self.basedir, relpath, mtime=st.st_mtime, size=st.st_size
)
) )
self.totalsize += st.st_size self.totalsize += st.st_size
else: else:
st = os.stat(fpath) st = os.stat(fpath)
if S_ISREG(st.st_mode): if S_ISREG(st.st_mode):
self.extrafiles.append(FileEntry(name, st.st_mtime, st.st_size)) self.extrafiles.append(
FileEntry(self.basedir, name, st.st_mtime, st.st_size)
)
if name == "password": if name == "password":
self.last_login = st.st_mtime self.last_login = st.st_mtime
self.totalsize += st.st_size self.totalsize += st.st_size
self.extrafiles.sort(key=lambda x: -x.size) self.extrafiles.sort(key=lambda x: -x.size)
def print_info(msg):
print(msg, file=sys.stderr)
class Expiry: class Expiry:
def __init__(self, config, stat, dry, now): def __init__(self, config, stat, dry, now):
self.config = config self.config = config
self.dry = dry self.dry = dry
self.now = now self.now = now
self.del_files = []
self.del_mailboxes = []
def rmtree(self, path): def perform_removes(self):
if not self.dry: for mboxdir in self.del_mailboxes:
print("would remove mailbox", path) print_info(f"removing {mboxdir}")
else: if not self.dry:
shutil.rmtree(path, ignore_errors=True) self.rmtree(mboxdir)
for path in self.del_files:
def unlink(self, mailboxdir, relpath): print_info(f"removing {path}")
path = joinpath(mailboxdir, relpath) if not self.dry:
if not self.dry: try:
for message in self.messages: os.unlink(path)
if relpath == message.relpath: except FileNotFoundError:
print( pass # it's gone already, fine
f"would remove {message.fmt_since(self.now)} {message.fmt_size()} {path}"
)
break
else:
try:
os.unlink(path)
except FileNotFoundError:
pass # it's gone already, fine
def process_mailbox_stat(self, mbox): def process_mailbox_stat(self, mbox):
print_info(f"processing expiry for {mbox.basedir}")
cutoff_without_login = ( cutoff_without_login = (
self.now - int(self.config.delete_inactive_users_after) * 86400 self.now - int(self.config.delete_inactive_users_after) * 86400
) )
@@ -128,35 +138,55 @@ class Expiry:
changed = False changed = False
if mbox.last_login and mbox.last_login < cutoff_without_login: if mbox.last_login and mbox.last_login < cutoff_without_login:
self.rmtree(mbox.mailboxdir) self.del_mailboxes.append(mbox.basedir)
return return
for message in mbox.messages: for message in mbox.messages:
if message.mtime < cutoff_mails: if message.mtime < cutoff_mails:
self.unlink(mbox.mailboxdir, message.relpath) self.del_files.append(message.get_path())
elif message.size > 200000 and message.mtime < cutoff_large_mails: elif message.size > 200000 and message.mtime < cutoff_large_mails:
self.unlink(mbox.mailboxdir, message.relpath) self.del_files.append(message.get_path())
else: else:
continue continue
changed = True changed = True
if changed: if changed:
self.unlink(mbox.mailboxdir, "maildirsize") self.del_files.append(joinpath(mbox.basedir, "maildirsize"))
def main(args=None): def main(args):
if args is None: """Expire mailboxes and messages according to chatmail config"""
args = sys.argv[1:] parser = ArgumentParser(description=main.__doc__)
else: parser.add_argument(
args = list(map(str, args)) "chatmail_ini", action="store", help="path pointing to chatmail.ini file"
cfgpath, basedir, maxnum = args )
config = read_config(cfgpath) parser.add_argument(
"mailboxes_dir",
action="store",
help="path pointing to directory containing all mailbox directories",
)
parser.add_argument(
"--maxnum",
default=None,
action="store",
help="maximum number of mailbxoes to iterate on",
)
parser.add_argument(
"--remove",
dest="remove",
action="store_true",
help="actually remove all expired files and dirs",
)
args = parser.parse_args([str(x) for x in args])
config = read_config(args.chatmail_ini)
now = datetime.utcnow().timestamp() now = datetime.utcnow().timestamp()
now = datetime(2025, 9, 9).timestamp() maxnum = int(args.maxnum) if args.maxnum else None
stat = Stats(args.mailboxes_dir, maxnum=maxnum)
stat = Stats(basedir, maxnum=int(maxnum)) exp = Expiry(config, stat, dry=not args.remove, now=now)
exp = Expiry(config, stat, dry=True, now=now)
stat.iter_mailboxes(exp.process_mailbox_stat) stat.iter_mailboxes(exp.process_mailbox_stat)
exp.perform_removes()
if __name__ == "__main__": if __name__ == "__main__":
main() main(sys.argv[1:])

View File

@@ -2,7 +2,7 @@ import os
import sys import sys
from datetime import datetime from datetime import datetime
from chatmaild.expire import FileEntry, Stats, joinpath from chatmaild.expire import Stats
DAYSECONDS = 24 * 60 * 60 DAYSECONDS = 24 * 60 * 60
MONTHSECONDS = DAYSECONDS * 30 MONTHSECONDS = DAYSECONDS * 30
@@ -47,21 +47,13 @@ class Report:
def process_mailbox_stat(self, mailbox): def process_mailbox_stat(self, mailbox):
last_login = mailbox.last_login last_login = mailbox.last_login
if last_login: if last_login:
if os.path.basename(mailbox.mailboxdir)[:3] == "ci-": if os.path.basename(mailbox.basedir)[:3] == "ci-":
self.ci_logins.append(last_login) self.ci_logins.append(last_login)
else: else:
self.user_logins.append(last_login) self.user_logins.append(last_login)
for entry in mailbox.messages: self.messages.extend(mailbox.messages)
new = FileEntry( self.sum_all_messages += sum(msg.size for msg in mailbox.messages)
relpath=joinpath(os.path.basename(mailbox.mailboxdir), entry.relpath), self.sum_extra += sum(entry.size for entry in mailbox.extrafiles)
mtime=entry.mtime,
size=entry.size,
)
self.messages.append(new)
self.sum_all_messages += entry.size
for entry in mailbox.extrafiles:
self.sum_extra += entry.size
def dump_summary(self): def dump_summary(self):
reports = [] reports = []

View File

@@ -1,5 +1,7 @@
import os
import random import random
from datetime import datetime from datetime import datetime
from fnmatch import fnmatch
from pathlib import Path from pathlib import Path
import pytest import pytest
@@ -8,37 +10,46 @@ from chatmaild.expire import FileEntry, MailboxStat
from chatmaild.expire import main as expiry_main from chatmaild.expire import main as expiry_main
from chatmaild.fsreport import Report, Stats from chatmaild.fsreport import Report, Stats
# XXX maildirsize (used by dovecot quota) needs to be removed after removing files # XXX basedirsize (used by dovecot quota) needs to be removed after removing files
@pytest.fixture @pytest.fixture
def mailboxdir1(tmp_path): def basedir1(tmp_path):
mailboxdir1 = tmp_path.joinpath("mailbox1@example.org") basedir1 = tmp_path.joinpath("mailbox1@example.org")
mailboxdir1.mkdir() basedir1.mkdir()
password = mailboxdir1.joinpath("password") password = basedir1.joinpath("password")
password.write_text("xxx") password.write_text("xxx")
basedir1.joinpath("maildirsize").write_text("xxx")
garbagedir = mailboxdir1.joinpath("garbagedir") garbagedir = basedir1.joinpath("garbagedir")
garbagedir.mkdir() garbagedir.mkdir()
cur = mailboxdir1.joinpath("cur") create_new_messages(basedir1, ["cur/msg1"], size=500)
new = mailboxdir1.joinpath("new") create_new_messages(basedir1, ["new/msg2"], size=600)
cur.mkdir() return basedir1
msg_cur = cur.joinpath("msg1")
msg_cur.write_text("xxx")
new.mkdir() def create_new_messages(basedir, relpaths, size=1000, days=0):
msg_new = new.joinpath("msg2") now = datetime.utcnow().timestamp()
msg_new.write_text("xxx123")
return mailboxdir1 for relpath in relpaths:
msg_path = Path(basedir).joinpath(relpath)
msg_path.parent.mkdir(parents=True, exist_ok=True)
msg_path.write_text("x" * size)
# accessed now, modified N days ago
os.utime(msg_path, (now, now - days * 86400))
@pytest.fixture @pytest.fixture
def mbox1(mailboxdir1): def mbox1(basedir1):
return MailboxStat(mailboxdir1) return MailboxStat(basedir1)
def test_filentry_ordering(): def test_filentry_ordering(tmp_path):
l = [FileEntry(f"x{i}", size=i + 10, mtime=1000 - i) for i in range(10)] l = [
FileEntry(str(tmp_path), f"x{i}", size=i + 10, mtime=1000 - i)
for i in range(10)
]
sorted = list(l) sorted = list(l)
random.shuffle(l) random.shuffle(l)
l.sort(key=lambda x: x.size) l.sort(key=lambda x: x.size)
@@ -46,37 +57,65 @@ def test_filentry_ordering():
def test_stats_mailbox(mbox1): def test_stats_mailbox(mbox1):
password = Path(mbox1.mailboxdir).joinpath("password") password = Path(mbox1.basedir).joinpath("password")
assert mbox1.last_login == password.stat().st_mtime assert mbox1.last_login == password.stat().st_mtime
assert len(mbox1.messages) == 2 assert len(mbox1.messages) == 2
msgs = list(mbox1.messages) msgs = list(mbox1.messages)
assert len(msgs) == 2 assert len(msgs) == 2
assert msgs[0].size == 3 # cur assert msgs[0].size == 500 # cur
assert msgs[1].size == 6 # new assert msgs[1].size == 600 # new
extra = Path(mbox1.mailboxdir).joinpath("large-extra") create_new_messages(mbox1.basedir, ["large-extra"], size=1000)
extra.write_text("x" * 1000) create_new_messages(mbox1.basedir, ["index-something"], size=3)
Path(mbox1.mailboxdir).joinpath("index-something").write_text("123") mbox2 = MailboxStat(mbox1.basedir)
mbox2 = MailboxStat(mbox1.mailboxdir) assert len(mbox2.extrafiles) == 4
assert len(mbox2.extrafiles) == 3
assert mbox2.extrafiles[0].size == 1000 assert mbox2.extrafiles[0].size == 1000
# cope well with mailbox dirs that have no password (for whatever reason) # cope well with mailbox dirs that have no password (for whatever reason)
Path(mbox1.mailboxdir).joinpath("password").unlink() Path(mbox1.basedir).joinpath("password").unlink()
mbox3 = MailboxStat(mbox1.mailboxdir) mbox3 = MailboxStat(mbox1.basedir)
assert mbox3.last_login is None assert mbox3.last_login is None
def test_report(mbox1): def test_report(mbox1):
now = datetime.utcnow().timestamp() now = datetime.utcnow().timestamp()
mailboxes_dir = Path(mbox1.mailboxdir).parent mailboxes_dir = Path(mbox1.basedir).parent
stats = Stats(str(mailboxes_dir), maxnum=None) stats = Stats(str(mailboxes_dir), maxnum=None)
rep = Report(stats, now=now) rep = Report(stats, now=now)
stats.iter_mailboxes(rep.process_mailbox_stat) stats.iter_mailboxes(rep.process_mailbox_stat)
rep.dump_summary() rep.dump_summary()
def test_expiry(example_config, mbox1): def test_expiry_cli_basic(example_config, mbox1):
args = example_config._inipath, mbox1.mailboxdir, 10000 args = example_config._inipath, Path(mbox1.basedir).parent
expiry_main(args) expiry_main(args)
def test_expiry_cli_old_files(capsys, example_config, mbox1):
args = example_config._inipath, Path(mbox1.basedir).parent
relpaths_old = ["cur/msg_old1", "cur/msg_old1"]
cutoff_days = int(example_config.delete_mails_after) + 1
create_new_messages(mbox1.basedir, relpaths_old, size=1000, days=cutoff_days)
relpaths_large = ["cur/msg_old_large1", "new/msg_old_large2"]
cutoff_days = int(example_config.delete_large_after) + 1
create_new_messages(
mbox1.basedir, relpaths_large, size=1000 * 300, days=cutoff_days
)
create_new_messages(mbox1.basedir, ["cur/shouldstay"], size=1000 * 300, days=1)
expiry_main(args)
out, err = capsys.readouterr()
allpaths = relpaths_old + relpaths_large + ["maildirsize"]
for path in allpaths:
for line in err.split("\n"):
if fnmatch(line, f"removing*{path}"):
break
else:
pytest.fail(f"failed to remove {path}\n{err}")
assert "shouldstay" not in err