diff --git a/chatmaild/src/chatmaild/expire.py b/chatmaild/src/chatmaild/expire.py index 341cfbae..93b46bb0 100644 --- a/chatmaild/src/chatmaild/expire.py +++ b/chatmaild/src/chatmaild/expire.py @@ -11,8 +11,6 @@ from stat import S_ISREG from chatmaild.config import read_config -# XXX maildirsize (used by dovecot quota) needs to be removed after removing files - class FileEntry: def __init__(self, relpath, mtime, size): @@ -48,14 +46,19 @@ class Stats: self.mailboxes = [] self.maxnum = maxnum - def iter_mailboxes(self): - for mailbox in os.listdir(self.basedir)[: self.maxnum]: - if "@" in mailbox: - mailboxdir = joinpath(self.basedir, mailbox) - self.mailboxes.append(MailboxStat(mailboxdir)) + def iter_mailboxes(self, callback=None): + for name in os.listdir(self.basedir)[: self.maxnum]: + if "@" in name: + mailboxdir = joinpath(self.basedir, name) + mailbox = MailboxStat(mailboxdir) + self.mailboxes.append(mailbox) + if callback is not None: + callback(mailbox) class MailboxStat: + last_login = None + def __init__(self, mailboxdir): self.mailboxdir = mailboxdir = str(mailboxdir) # all detected messages in cur/new/tmp folders @@ -83,66 +86,76 @@ class MailboxStat: st = os.stat(fpath) if S_ISREG(st.st_mode): self.extrafiles.append(FileEntry(name, st.st_mtime, st.st_size)) + if name == "password": + self.last_login = st.st_mtime self.totalsize += st.st_size self.extrafiles.sort(key=lambda x: -x.size) - @property - def last_login(self): - for entry in self.extrafiles: - if entry.relpath == "password": - return entry.mtime +class Expiry: + def __init__(self, config, stat, dry, now): + self.config = config + self.dry = dry + self.now = now -def run_expire(config, basedir, now, dry=True, maxnum=None): - stat = Stats(basedir, maxnum=maxnum) - stat.iter_mailboxes() - cutoff_date_without_login = now - int(config.delete_inactive_users_after) * 86400 - cutoff_date_mails = now - int(config.delete_mails_after) * 86400 - cutoff_date_large_mails = now - int(config.delete_large_after) * 86400 - - def rmtree(path): - if dry: + def rmtree(self, path): + if not self.dry: print("would remove mailbox", path) else: shutil.rmtree(path, ignore_errors=True) - def unlink(mailboxdir, message): - if dry: - relpath = os.path.basename(mailboxdir) + message.relpath - print( - f"would remove {message.fmt_since(now)} {message.fmt_size()} {relpath}" - ) + def unlink(self, mailboxdir, relpath): + path = joinpath(mailboxdir, relpath) + if not self.dry: + for message in self.messages: + if relpath == message.relpath: + print( + f"would remove {message.fmt_since(self.now)} {message.fmt_size()} {path}" + ) + break else: - os.unlink(path) + try: + os.unlink(path) + except FileNotFoundError: + pass # it's gone already, fine + + def process_mailbox_stat(self, mbox): + cutoff_without_login = ( + self.now - int(self.config.delete_inactive_users_after) * 86400 + ) + cutoff_mails = self.now - int(self.config.delete_mails_after) * 86400 + cutoff_large_mails = self.now - int(self.config.delete_large_after) * 86400 - for mbox in stat.mailboxes: changed = False - if mbox.last_login and mbox.last_login < cutoff_date_without_login: - rmtree(mbox.mailboxdir) - continue + if mbox.last_login and mbox.last_login < cutoff_without_login: + self.rmtree(mbox.mailboxdir) + return + for message in mbox.messages: - path = joinpath(mbox.mailboxdir, message.relpath) - if message.mtime < cutoff_date_mails: - unlink(mbox.mailboxdir, message) - elif message.size > 200000 and message.mtime < cutoff_date_large_mails: - unlink(mbox.mailboxdir, message) + if message.mtime < cutoff_mails: + self.unlink(mbox.mailboxdir, message.relpath) + elif message.size > 200000 and message.mtime < cutoff_large_mails: + self.unlink(mbox.mailboxdir, message.relpath) else: continue changed = True - if changed and not dry: - p = joinpath(mbox.mailboxdir, "maildirsize") - try: - os.unlink(p) - except FileNotFoundError: - pass + if changed: + self.unlink(mbox.mailboxdir, "maildirsize") -def main(): - cfgpath, basedir, maxnum = sys.argv[1:] +def main(args=None): + if args is None: + args = sys.argv[1:] + else: + args = list(map(str, args)) + cfgpath, basedir, maxnum = args config = read_config(cfgpath) now = datetime.utcnow().timestamp() now = datetime(2025, 9, 9).timestamp() - run_expire(config, basedir, maxnum=int(maxnum), now=now) + + stat = Stats(basedir, maxnum=int(maxnum)) + exp = Expiry(config, stat, dry=True, now=now) + stat.iter_mailboxes(exp.process_mailbox_stat) if __name__ == "__main__": diff --git a/chatmaild/src/chatmaild/fsreport.py b/chatmaild/src/chatmaild/fsreport.py index 91a59302..009a2af2 100644 --- a/chatmaild/src/chatmaild/fsreport.py +++ b/chatmaild/src/chatmaild/fsreport.py @@ -2,7 +2,6 @@ import os import sys from datetime import datetime -from chatmaild.config import read_config from chatmaild.expire import FileEntry, Stats, joinpath DAYSECONDS = 24 * 60 * 60 @@ -18,6 +17,8 @@ def D(timestamp, now=datetime.utcnow().timestamp()): def K(size): if size < 1000: return f"{size:5.0f}" + elif size < 10000: + return f"{size/1000:3.2f}K" return f"{int(size/1000):5.0f}K" @@ -43,26 +44,24 @@ class Report: self.stats = stats self.now = now - for mailbox in stats.mailboxes: - last_login = mailbox.last_login - if last_login: - if os.path.basename(mailbox.mailboxdir)[:3] == "ci-": - self.ci_logins.append(last_login) - else: - self.user_logins.append(last_login) - for entry in mailbox.messages: - new = FileEntry( - relpath=joinpath( - os.path.basename(mailbox.mailboxdir), entry.relpath - ), - mtime=entry.mtime, - size=entry.size, - ) - self.messages.append(new) - self.sum_all_messages += entry.size + def process_mailbox_stat(self, mailbox): + last_login = mailbox.last_login + if last_login: + if os.path.basename(mailbox.mailboxdir)[:3] == "ci-": + self.ci_logins.append(last_login) + else: + self.user_logins.append(last_login) + for entry in mailbox.messages: + new = FileEntry( + relpath=joinpath(os.path.basename(mailbox.mailboxdir), entry.relpath), + mtime=entry.mtime, + size=entry.size, + ) + self.messages.append(new) + self.sum_all_messages += entry.size - for entry in mailbox.extrafiles: - self.sum_extra += entry.size + for entry in mailbox.extrafiles: + self.sum_extra += entry.size def dump_summary(self): reports = [] @@ -131,19 +130,14 @@ class Report: print(f"last {days:3} days: {K(active)} {p(active)}") -def run_report(config, basedir, maxnum=None, now=None): - stats = Stats(basedir, maxnum=maxnum) - stats.iter_mailboxes() - rep = Report(stats, now=now) - rep.dump_summary() - - def main(): - cfgpath, basedir, maxnum = sys.argv[1:] - config = read_config(cfgpath) + basedir, maxnum = sys.argv[1:] now = datetime.utcnow().timestamp() now = datetime(2025, 9, 9).timestamp() - run_report(config, basedir, maxnum=int(maxnum), now=now) + stats = Stats(basedir, maxnum=int(maxnum)) + rep = Report(stats, now=now) + stats.iter_mailboxes(rep.process_mailbox_stat) + rep.dump_summary() if __name__ == "__main__": diff --git a/chatmaild/src/chatmaild/tests/test_expire.py b/chatmaild/src/chatmaild/tests/test_expire.py index 3b290a79..2649fce5 100644 --- a/chatmaild/src/chatmaild/tests/test_expire.py +++ b/chatmaild/src/chatmaild/tests/test_expire.py @@ -1,6 +1,40 @@ import random +from datetime import datetime +from pathlib import Path + +import pytest from chatmaild.expire import FileEntry, MailboxStat +from chatmaild.expire import main as expiry_main +from chatmaild.fsreport import Report, Stats + +# XXX maildirsize (used by dovecot quota) needs to be removed after removing files + + +@pytest.fixture +def mailboxdir1(tmp_path): + mailboxdir1 = tmp_path.joinpath("mailbox1@example.org") + mailboxdir1.mkdir() + password = mailboxdir1.joinpath("password") + password.write_text("xxx") + + garbagedir = mailboxdir1.joinpath("garbagedir") + garbagedir.mkdir() + + cur = mailboxdir1.joinpath("cur") + new = mailboxdir1.joinpath("new") + cur.mkdir() + msg_cur = cur.joinpath("msg1") + msg_cur.write_text("xxx") + new.mkdir() + msg_new = new.joinpath("msg2") + msg_new.write_text("xxx123") + return mailboxdir1 + + +@pytest.fixture +def mbox1(mailboxdir1): + return MailboxStat(mailboxdir1) def test_filentry_ordering(): @@ -11,36 +45,38 @@ def test_filentry_ordering(): assert l == sorted -def test_stats_mailbox(tmp_path): - mailboxdir = tmp_path - password = mailboxdir.joinpath("password") - password.write_text("xxx") +def test_stats_mailbox(mbox1): + password = Path(mbox1.mailboxdir).joinpath("password") + assert mbox1.last_login == password.stat().st_mtime + assert len(mbox1.messages) == 2 - garbagedir = mailboxdir.joinpath("garbagedir") - garbagedir.mkdir() - - cur = mailboxdir.joinpath("cur") - new = mailboxdir.joinpath("new") - cur.mkdir() - msg_cur = cur.joinpath("msg1") - msg_cur.write_text("xxx") - new.mkdir() - msg_new = new.joinpath("msg2") - msg_new.write_text("xxx123") - - mbox = MailboxStat(tmp_path) - assert mbox.last_login == password.stat().st_mtime - assert len(mbox.messages) == 2 - - msgs = list(mbox.messages) + msgs = list(mbox1.messages) assert len(msgs) == 2 assert msgs[0].size == 3 # cur - assert msgs[1].size == 6 # new - extra = mailboxdir.joinpath("large") + extra = Path(mbox1.mailboxdir).joinpath("large-extra") extra.write_text("x" * 1000) - mailboxdir.joinpath("index-something").write_text("123") - mbox = MailboxStat(tmp_path) - assert len(mbox.extrafiles) == 3 - assert mbox.extrafiles[0].size == 1000 + Path(mbox1.mailboxdir).joinpath("index-something").write_text("123") + mbox2 = MailboxStat(mbox1.mailboxdir) + assert len(mbox2.extrafiles) == 3 + assert mbox2.extrafiles[0].size == 1000 + + # cope well with mailbox dirs that have no password (for whatever reason) + Path(mbox1.mailboxdir).joinpath("password").unlink() + mbox3 = MailboxStat(mbox1.mailboxdir) + assert mbox3.last_login is None + + +def test_report(mbox1): + now = datetime.utcnow().timestamp() + mailboxes_dir = Path(mbox1.mailboxdir).parent + stats = Stats(str(mailboxes_dir), maxnum=None) + rep = Report(stats, now=now) + stats.iter_mailboxes(rep.process_mailbox_stat) + rep.dump_summary() + + +def test_expiry(example_config, mbox1): + args = example_config._inipath, mbox1.mailboxdir, 10000 + expiry_main(args)