From 6d3e690653d2b94d20bf469c7b05ba6c8c3783f1 Mon Sep 17 00:00:00 2001 From: holger krekel Date: Sun, 14 Sep 2025 15:13:25 +0200 Subject: [PATCH] add basic command line parsing for expire + some streamlining --- chatmaild/src/chatmaild/expire.py | 124 ++++++++++++------- chatmaild/src/chatmaild/fsreport.py | 18 +-- chatmaild/src/chatmaild/tests/test_expire.py | 103 ++++++++++----- 3 files changed, 153 insertions(+), 92 deletions(-) diff --git a/chatmaild/src/chatmaild/expire.py b/chatmaild/src/chatmaild/expire.py index 93b46bb0..8cee2ab3 100644 --- a/chatmaild/src/chatmaild/expire.py +++ b/chatmaild/src/chatmaild/expire.py @@ -4,8 +4,8 @@ Expire old messages and addresses. """ import os -import shutil import sys +from argparse import ArgumentParser from datetime import datetime from stat import S_ISREG @@ -13,13 +13,20 @@ from chatmaild.config import read_config class FileEntry: - def __init__(self, relpath, mtime, size): + def __init__(self, basedir, relpath, mtime, size): + self.basedir = basedir self.relpath = relpath self.mtime = mtime self.size = size def __repr__(self): - return f"" + return f"" + + def __str__(self): + return self.get_path() + + def get_path(self): + return joinpath(self.basedir, self.relpath) def fmt_size(self): return f"{int(self.size/1000):5.0f}K" @@ -49,8 +56,8 @@ class Stats: def iter_mailboxes(self, callback=None): for name in os.listdir(self.basedir)[: self.maxnum]: if "@" in name: - mailboxdir = joinpath(self.basedir, name) - mailbox = MailboxStat(mailboxdir) + basedir = joinpath(self.basedir, name) + mailbox = MailboxStat(basedir) self.mailboxes.append(mailbox) if callback is not None: callback(mailbox) @@ -59,8 +66,8 @@ class Stats: class MailboxStat: last_login = None - def __init__(self, mailboxdir): - self.mailboxdir = mailboxdir = str(mailboxdir) + def __init__(self, basedir): + self.basedir = basedir = str(basedir) # all detected messages in cur/new/tmp folders self.messages = [] @@ -71,55 +78,58 @@ class MailboxStat: self.totalsize = 0 # scan all relevant files (without recursion) - for name in os.listdir(mailboxdir): - fpath = joinpath(mailboxdir, name) + for name in os.listdir(basedir): + fpath = joinpath(basedir, name) if name in ("cur", "new", "tmp"): for msg_name in os.listdir(fpath): msg_path = joinpath(fpath, msg_name) st = os.stat(msg_path) relpath = joinpath(name, msg_name) self.messages.append( - FileEntry(relpath, mtime=st.st_mtime, size=st.st_size) + FileEntry( + self.basedir, relpath, mtime=st.st_mtime, size=st.st_size + ) ) self.totalsize += st.st_size else: st = os.stat(fpath) if S_ISREG(st.st_mode): - self.extrafiles.append(FileEntry(name, st.st_mtime, st.st_size)) + self.extrafiles.append( + FileEntry(self.basedir, name, st.st_mtime, st.st_size) + ) if name == "password": self.last_login = st.st_mtime self.totalsize += st.st_size self.extrafiles.sort(key=lambda x: -x.size) +def print_info(msg): + print(msg, file=sys.stderr) + + class Expiry: def __init__(self, config, stat, dry, now): self.config = config self.dry = dry self.now = now + self.del_files = [] + self.del_mailboxes = [] - def rmtree(self, path): - if not self.dry: - print("would remove mailbox", path) - else: - shutil.rmtree(path, ignore_errors=True) - - def unlink(self, mailboxdir, relpath): - path = joinpath(mailboxdir, relpath) - if not self.dry: - for message in self.messages: - if relpath == message.relpath: - print( - f"would remove {message.fmt_since(self.now)} {message.fmt_size()} {path}" - ) - break - else: - try: - os.unlink(path) - except FileNotFoundError: - pass # it's gone already, fine + def perform_removes(self): + for mboxdir in self.del_mailboxes: + print_info(f"removing {mboxdir}") + if not self.dry: + self.rmtree(mboxdir) + for path in self.del_files: + print_info(f"removing {path}") + if not self.dry: + try: + os.unlink(path) + except FileNotFoundError: + pass # it's gone already, fine def process_mailbox_stat(self, mbox): + print_info(f"processing expiry for {mbox.basedir}") cutoff_without_login = ( self.now - int(self.config.delete_inactive_users_after) * 86400 ) @@ -128,35 +138,55 @@ class Expiry: changed = False if mbox.last_login and mbox.last_login < cutoff_without_login: - self.rmtree(mbox.mailboxdir) + self.del_mailboxes.append(mbox.basedir) return for message in mbox.messages: if message.mtime < cutoff_mails: - self.unlink(mbox.mailboxdir, message.relpath) + self.del_files.append(message.get_path()) elif message.size > 200000 and message.mtime < cutoff_large_mails: - self.unlink(mbox.mailboxdir, message.relpath) + self.del_files.append(message.get_path()) else: continue changed = True if changed: - self.unlink(mbox.mailboxdir, "maildirsize") + self.del_files.append(joinpath(mbox.basedir, "maildirsize")) -def main(args=None): - if args is None: - args = sys.argv[1:] - else: - args = list(map(str, args)) - cfgpath, basedir, maxnum = args - config = read_config(cfgpath) +def main(args): + """Expire mailboxes and messages according to chatmail config""" + parser = ArgumentParser(description=main.__doc__) + parser.add_argument( + "chatmail_ini", action="store", help="path pointing to chatmail.ini file" + ) + parser.add_argument( + "mailboxes_dir", + action="store", + help="path pointing to directory containing all mailbox directories", + ) + parser.add_argument( + "--maxnum", + default=None, + action="store", + help="maximum number of mailbxoes to iterate on", + ) + + parser.add_argument( + "--remove", + dest="remove", + action="store_true", + help="actually remove all expired files and dirs", + ) + args = parser.parse_args([str(x) for x in args]) + + config = read_config(args.chatmail_ini) now = datetime.utcnow().timestamp() - now = datetime(2025, 9, 9).timestamp() - - stat = Stats(basedir, maxnum=int(maxnum)) - exp = Expiry(config, stat, dry=True, now=now) + maxnum = int(args.maxnum) if args.maxnum else None + stat = Stats(args.mailboxes_dir, maxnum=maxnum) + exp = Expiry(config, stat, dry=not args.remove, now=now) stat.iter_mailboxes(exp.process_mailbox_stat) + exp.perform_removes() if __name__ == "__main__": - main() + main(sys.argv[1:]) diff --git a/chatmaild/src/chatmaild/fsreport.py b/chatmaild/src/chatmaild/fsreport.py index 009a2af2..3d846420 100644 --- a/chatmaild/src/chatmaild/fsreport.py +++ b/chatmaild/src/chatmaild/fsreport.py @@ -2,7 +2,7 @@ import os import sys from datetime import datetime -from chatmaild.expire import FileEntry, Stats, joinpath +from chatmaild.expire import Stats DAYSECONDS = 24 * 60 * 60 MONTHSECONDS = DAYSECONDS * 30 @@ -47,21 +47,13 @@ class Report: def process_mailbox_stat(self, mailbox): last_login = mailbox.last_login if last_login: - if os.path.basename(mailbox.mailboxdir)[:3] == "ci-": + if os.path.basename(mailbox.basedir)[:3] == "ci-": self.ci_logins.append(last_login) else: self.user_logins.append(last_login) - for entry in mailbox.messages: - new = FileEntry( - relpath=joinpath(os.path.basename(mailbox.mailboxdir), entry.relpath), - mtime=entry.mtime, - size=entry.size, - ) - self.messages.append(new) - self.sum_all_messages += entry.size - - for entry in mailbox.extrafiles: - self.sum_extra += entry.size + self.messages.extend(mailbox.messages) + self.sum_all_messages += sum(msg.size for msg in mailbox.messages) + self.sum_extra += sum(entry.size for entry in mailbox.extrafiles) def dump_summary(self): reports = [] diff --git a/chatmaild/src/chatmaild/tests/test_expire.py b/chatmaild/src/chatmaild/tests/test_expire.py index 2649fce5..ee6fb78b 100644 --- a/chatmaild/src/chatmaild/tests/test_expire.py +++ b/chatmaild/src/chatmaild/tests/test_expire.py @@ -1,5 +1,7 @@ +import os import random from datetime import datetime +from fnmatch import fnmatch from pathlib import Path import pytest @@ -8,37 +10,46 @@ from chatmaild.expire import FileEntry, MailboxStat from chatmaild.expire import main as expiry_main from chatmaild.fsreport import Report, Stats -# XXX maildirsize (used by dovecot quota) needs to be removed after removing files +# XXX basedirsize (used by dovecot quota) needs to be removed after removing files @pytest.fixture -def mailboxdir1(tmp_path): - mailboxdir1 = tmp_path.joinpath("mailbox1@example.org") - mailboxdir1.mkdir() - password = mailboxdir1.joinpath("password") +def basedir1(tmp_path): + basedir1 = tmp_path.joinpath("mailbox1@example.org") + basedir1.mkdir() + password = basedir1.joinpath("password") password.write_text("xxx") + basedir1.joinpath("maildirsize").write_text("xxx") - garbagedir = mailboxdir1.joinpath("garbagedir") + garbagedir = basedir1.joinpath("garbagedir") garbagedir.mkdir() - cur = mailboxdir1.joinpath("cur") - new = mailboxdir1.joinpath("new") - cur.mkdir() - msg_cur = cur.joinpath("msg1") - msg_cur.write_text("xxx") - new.mkdir() - msg_new = new.joinpath("msg2") - msg_new.write_text("xxx123") - return mailboxdir1 + create_new_messages(basedir1, ["cur/msg1"], size=500) + create_new_messages(basedir1, ["new/msg2"], size=600) + return basedir1 + + +def create_new_messages(basedir, relpaths, size=1000, days=0): + now = datetime.utcnow().timestamp() + + for relpath in relpaths: + msg_path = Path(basedir).joinpath(relpath) + msg_path.parent.mkdir(parents=True, exist_ok=True) + msg_path.write_text("x" * size) + # accessed now, modified N days ago + os.utime(msg_path, (now, now - days * 86400)) @pytest.fixture -def mbox1(mailboxdir1): - return MailboxStat(mailboxdir1) +def mbox1(basedir1): + return MailboxStat(basedir1) -def test_filentry_ordering(): - l = [FileEntry(f"x{i}", size=i + 10, mtime=1000 - i) for i in range(10)] +def test_filentry_ordering(tmp_path): + l = [ + FileEntry(str(tmp_path), f"x{i}", size=i + 10, mtime=1000 - i) + for i in range(10) + ] sorted = list(l) random.shuffle(l) l.sort(key=lambda x: x.size) @@ -46,37 +57,65 @@ def test_filentry_ordering(): def test_stats_mailbox(mbox1): - password = Path(mbox1.mailboxdir).joinpath("password") + password = Path(mbox1.basedir).joinpath("password") assert mbox1.last_login == password.stat().st_mtime assert len(mbox1.messages) == 2 msgs = list(mbox1.messages) assert len(msgs) == 2 - assert msgs[0].size == 3 # cur - assert msgs[1].size == 6 # new + assert msgs[0].size == 500 # cur + assert msgs[1].size == 600 # new - extra = Path(mbox1.mailboxdir).joinpath("large-extra") - extra.write_text("x" * 1000) - Path(mbox1.mailboxdir).joinpath("index-something").write_text("123") - mbox2 = MailboxStat(mbox1.mailboxdir) - assert len(mbox2.extrafiles) == 3 + create_new_messages(mbox1.basedir, ["large-extra"], size=1000) + create_new_messages(mbox1.basedir, ["index-something"], size=3) + mbox2 = MailboxStat(mbox1.basedir) + assert len(mbox2.extrafiles) == 4 assert mbox2.extrafiles[0].size == 1000 # cope well with mailbox dirs that have no password (for whatever reason) - Path(mbox1.mailboxdir).joinpath("password").unlink() - mbox3 = MailboxStat(mbox1.mailboxdir) + Path(mbox1.basedir).joinpath("password").unlink() + mbox3 = MailboxStat(mbox1.basedir) assert mbox3.last_login is None def test_report(mbox1): now = datetime.utcnow().timestamp() - mailboxes_dir = Path(mbox1.mailboxdir).parent + mailboxes_dir = Path(mbox1.basedir).parent stats = Stats(str(mailboxes_dir), maxnum=None) rep = Report(stats, now=now) stats.iter_mailboxes(rep.process_mailbox_stat) rep.dump_summary() -def test_expiry(example_config, mbox1): - args = example_config._inipath, mbox1.mailboxdir, 10000 +def test_expiry_cli_basic(example_config, mbox1): + args = example_config._inipath, Path(mbox1.basedir).parent expiry_main(args) + + +def test_expiry_cli_old_files(capsys, example_config, mbox1): + args = example_config._inipath, Path(mbox1.basedir).parent + + relpaths_old = ["cur/msg_old1", "cur/msg_old1"] + cutoff_days = int(example_config.delete_mails_after) + 1 + create_new_messages(mbox1.basedir, relpaths_old, size=1000, days=cutoff_days) + + relpaths_large = ["cur/msg_old_large1", "new/msg_old_large2"] + cutoff_days = int(example_config.delete_large_after) + 1 + create_new_messages( + mbox1.basedir, relpaths_large, size=1000 * 300, days=cutoff_days + ) + + create_new_messages(mbox1.basedir, ["cur/shouldstay"], size=1000 * 300, days=1) + + expiry_main(args) + out, err = capsys.readouterr() + + allpaths = relpaths_old + relpaths_large + ["maildirsize"] + for path in allpaths: + for line in err.split("\n"): + if fnmatch(line, f"removing*{path}"): + break + else: + pytest.fail(f"failed to remove {path}\n{err}") + + assert "shouldstay" not in err