From a4152140ca48c1619fdc4290ec9d5b802f406fd9 Mon Sep 17 00:00:00 2001 From: holger krekel Date: Thu, 11 Sep 2025 14:28:06 +0200 Subject: [PATCH] move delete_inactive_users to new implementation --- chatmaild/pyproject.toml | 2 +- .../src/chatmaild/delete_inactive_users.py | 31 --- chatmaild/src/chatmaild/expire.py | 194 ++++++++++++++++++ .../tests/test_delete_inactive_users.py | 4 +- chatmaild/src/chatmaild/tests/test_expire.py | 39 ++++ cmdeploy/src/cmdeploy/dovecot/expunge.cron.j2 | 4 +- 6 files changed, 239 insertions(+), 35 deletions(-) delete mode 100644 chatmaild/src/chatmaild/delete_inactive_users.py create mode 100644 chatmaild/src/chatmaild/expire.py create mode 100644 chatmaild/src/chatmaild/tests/test_expire.py diff --git a/chatmaild/pyproject.toml b/chatmaild/pyproject.toml index 3c312e74..8097c8df 100644 --- a/chatmaild/pyproject.toml +++ b/chatmaild/pyproject.toml @@ -27,7 +27,7 @@ chatmail-metadata = "chatmaild.metadata:main" filtermail = "chatmaild.filtermail:main" echobot = "chatmaild.echo:main" chatmail-metrics = "chatmaild.metrics:main" -delete_inactive_users = "chatmaild.delete_inactive_users:main" +expire = "chatmaild.expire:main" lastlogin = "chatmaild.lastlogin:main" turnserver = "chatmaild.turnserver:main" diff --git a/chatmaild/src/chatmaild/delete_inactive_users.py b/chatmaild/src/chatmaild/delete_inactive_users.py deleted file mode 100644 index 81467852..00000000 --- a/chatmaild/src/chatmaild/delete_inactive_users.py +++ /dev/null @@ -1,31 +0,0 @@ -""" -Remove inactive users -""" - -import os -import shutil -import sys -import time - -from .config import read_config - - -def delete_inactive_users(config): - cutoff_date = time.time() - config.delete_inactive_users_after * 86400 - for addr in os.listdir(config.mailboxes_dir): - try: - user = config.get_user(addr) - except ValueError: - continue - - read_timestamp = user.get_last_login_timestamp() - if read_timestamp and read_timestamp < cutoff_date: - path = config.mailboxes_dir.joinpath(addr) - assert path == user.maildir - shutil.rmtree(path, ignore_errors=True) - - -def main(): - (cfgpath,) = sys.argv[1:] - config = read_config(cfgpath) - delete_inactive_users(config) diff --git a/chatmaild/src/chatmaild/expire.py b/chatmaild/src/chatmaild/expire.py new file mode 100644 index 00000000..c0e85085 --- /dev/null +++ b/chatmaild/src/chatmaild/expire.py @@ -0,0 +1,194 @@ +import sys +import os +import shutil +import logging +import time +from stat import S_ISREG +from pathlib import Path +from datetime import datetime +from collections import namedtuple + +# delete already seen big mails after 7 days, in the INBOX +# 2 0 * * * vmail find {{ config.mailboxes_dir }} -path '*/cur/*' -mtime +{{ config.delete_large_after }} -size +200k -type f -delete +# # delete all mails after {{ config.delete_mails_after }} days, in the Inbox +# 2 0 * * * vmail find {{ config.mailboxes_dir }} -path '*/cur/*' -mtime +{{ config.delete_mails_after }} -type f -delete +## or in any IMAP subfolder +# 2 0 * * * vmail find {{ config.mailboxes_dir }} -path '*/.*/cur/*' -mtime +{{ config.delete_mails_after }} -type f -delete +## even if they are unseen +# 2 0 * * * vmail find {{ config.mailboxes_dir }} -path '*/new/*' -mtime +{{ config.delete_mails_after }} -type f -delete +# 2 0 * * * vmail find {{ config.mailboxes_dir }} -path '*/.*/new/*' -mtime +{{ config.delete_mails_after }} -type f -delete +## or only temporary (but then they shouldn't be around after {{ config.delete_mails_after }} days anyway). +# 2 0 * * * vmail find {{ config.mailboxes_dir }} -path '*/tmp/*' -mtime +{{ config.delete_mails_after }} -type f -delete +# 2 0 * * * vmail find {{ config.mailboxes_dir }} -path '*/.*/tmp/*' -mtime +{{ config.delete_mails_after }} -type f -delete +# 3 0 * * * vmail find {{ config.mailboxes_dir }} -name 'maildirsize' -type f -delete + + +FileEntry = namedtuple("FileEntry", ["relpath", "mtime", "size"]) +dayseconds = 24 * 60 * 60 +monthseconds = dayseconds * 30 + + +def joinpath(name, extra): + return name + "/" + extra + + +def D(timestamp, now=datetime.utcnow().timestamp()): + diff_seconds = int(now) - int(timestamp) + # assert diff_seconds >= 0, (int(timestamp), int(now)) + return f"{int(diff_seconds / dayseconds):2.0f}d" + + +def K(size): + return f"{int(size/1000):6.0f}K" + + +def M(size): + return f"{int(size/1000000):6.0f}M" + + +now = datetime.utcnow().timestamp() + + +class Stats: + def __init__(self, basedir): + self.basedir = str(basedir) + self.mailboxes = [] + + def iter_mailboxes(self, maxnum=None): + for mailbox in os.listdir(self.basedir)[:maxnum]: + if "@" in mailbox: + mailboxdir = joinpath(self.basedir, mailbox) + self.mailboxes.append(MailboxStat(mailboxdir)) + + +class MailboxStat: + def __init__(self, mailboxdir): + self.mailboxdir = mailboxdir = str(mailboxdir) + self.messages = [] + self.extrafiles = [] + + for name in os.listdir(mailboxdir): + fpath = joinpath(mailboxdir, name) + if name in ("cur", "new", "tmp"): + for msg_name in os.listdir(fpath): + msg_path = joinpath(fpath, msg_name) + st = os.stat(msg_path) + relpath = joinpath(name, msg_name) + self.messages.append( + FileEntry(relpath, mtime=st.st_mtime, size=st.st_size) + ) + else: + st = os.stat(fpath) + if S_ISREG(st.st_mode): + self.extrafiles.append(FileEntry(name, st.st_mtime, st.st_size)) + self.extrafiles.sort(key=lambda x: x.size, reverse=True) + + @property + def last_login(self): + for entry in self.extrafiles: + if entry.relpath == "password": + return entry.mtime + + def get_messages(self, prefix=""): + l = [] + for entry in self.messages: + if entry.relpath.startswith(prefix): + l.append(entry) + return l + + def get_extra_files(self): + return list(self.extrafiles) + + +class XXXStats: + def __init__(self): + self.sum_extra = 0 + self.sum_all_messages = 0 + self.logins = [] + self.messages = [] + + def analyze(self, statscache): + print("start") + for mailbox in statscache.cache: + mbox_cache = statscache.cache[mailbox] + if "password" not in mbox_cache: + continue + self.logins.append(mbox_cache["password"][0]) + for relpath, (mtime, size) in mbox_cache.items(): + if relpath[:4] in ("cur/", "new/", "tmp/"): + self.sum_all_messages += size + entry = FileEntry(relpath=relpath, mtime=mtime, size=size) + self.messages.append(entry) + else: + self.sum_extra += size + + def dump_summary(self): + print(f"size of everything: {M(self.sum_extra + self.sum_all_messages)}") + print(f"size all messages: {M(self.sum_all_messages)}") + percent = self.sum_extra / (self.sum_extra + self.sum_all_messages) * 100 + print(f"size extra files: {M(self.sum_extra)} ({percent:.2f}%)") + for size in (100000, 200000, 500000, 1000000, 5000000): + all_of_size = sum( + x.size + for x in self.messages + if x.size > size and x.relpath.startswith("cur") + ) + percent = all_of_size / self.sum_all_messages * 100 + print(f"size seen {K(size)} messages: {M(all_of_size)} ({percent:.2f}%)") + for size in (100000, 200000, 500000, 1000000, 5000000): + all_of_size = sum( + x.size + for x in self.messages + if x.size > size and x.mtime < now - 2 * dayseconds + ) + percent = all_of_size / self.sum_all_messages * 100 + print( + f"size 2day-old {K(size)} messages: {M(all_of_size)} ({percent:.2f}%)" + ) + for size in (100000, 200000, 500000, 1000000, 5000000): + all_of_size = sum( + x.size + for x in self.messages + if x.size > size + and x.relpath.startswith("cur") + and x.mtime < now - 7 * dayseconds + ) + percent = all_of_size / self.sum_all_messages * 100 + print( + f"size seen 7-day old {K(size)} messages: {M(all_of_size)} ({percent:.2f}%)" + ) + + print() + + num_logins = len(self.logins) + monthly_active = len([x for x in self.logins if x >= now - monthseconds]) + daily_active = len([x for x in self.logins if x >= now - dayseconds]) + stale = num_logins - monthly_active + + def p(num): + return f"({num/num_logins * 100:.2f}%)" + + print(f"all logins: {K(num_logins)}") + print(f"monthly active: {K(monthly_active)} {p(monthly_active)}") + print(f">1m old logins: {K(stale)} {p(stale)}") + print(f"daily active: {K(daily_active)} {p(daily_active)}") + + +def run_expire(config, basedir): + stat = Stats(basedir) + stat.iter_mailboxes() + cutoff_date = time.time() - config.delete_inactive_users_after * 86400 + + num = 0 + for mbox in stat.mailboxes: + if mbox.last_login < cutoff_date: + logging.info("removing outdated mailbox %s", mbox.mailboxdir) + shutil.rmtree(mbox.mailboxdir, ignore_errors=True) + num += 1 + print(f"expired {num} mailboxes") + + +if __name__ == "__main__": + cfgpath, basedir = sys.argv[1:] + config = read_config(cfgpath) + run_expire(config, basedir) diff --git a/chatmaild/src/chatmaild/tests/test_delete_inactive_users.py b/chatmaild/src/chatmaild/tests/test_delete_inactive_users.py index 937237b4..3eb21d12 100644 --- a/chatmaild/src/chatmaild/tests/test_delete_inactive_users.py +++ b/chatmaild/src/chatmaild/tests/test_delete_inactive_users.py @@ -1,6 +1,6 @@ import time -from chatmaild.delete_inactive_users import delete_inactive_users +from chatmaild.expire import run_expire from chatmaild.doveauth import AuthDictProxy @@ -45,7 +45,7 @@ def test_delete_inactive_users(example_config): for addr in to_remove: assert example_config.get_user(addr).maildir.exists() - delete_inactive_users(example_config) + run_expire(example_config, example_config.mailboxes_dir) for p in example_config.mailboxes_dir.iterdir(): assert not p.name.startswith("old") diff --git a/chatmaild/src/chatmaild/tests/test_expire.py b/chatmaild/src/chatmaild/tests/test_expire.py new file mode 100644 index 00000000..a2a40a57 --- /dev/null +++ b/chatmaild/src/chatmaild/tests/test_expire.py @@ -0,0 +1,39 @@ +from chatmaild.expire import MailboxStat + + +def test_stats_mailbox(tmp_path): + mailboxdir = tmp_path + password = mailboxdir.joinpath("password") + password.write_text("xxx") + + garbagedir = mailboxdir.joinpath("garbagedir") + garbagedir.mkdir() + + cur = mailboxdir.joinpath("cur") + new = mailboxdir.joinpath("new") + cur.mkdir() + msg_cur = cur.joinpath("msg1") + msg_cur.write_text("xxx") + new.mkdir() + msg_new = new.joinpath("msg2") + msg_new.write_text("xxx123") + + mbox = MailboxStat(tmp_path) + assert mbox.last_login == password.stat().st_mtime + assert len(mbox.messages) == 2 + + seen = mbox.get_messages("cur") + assert len(seen) == 1 + assert seen[0].size == 3 + + new = mbox.get_messages("new") + assert len(new) == 1 + assert new[0].size == 6 + + extra = mailboxdir.joinpath("large") + extra.write_text("x" * 1000) + mailboxdir.joinpath("index-something").write_text("123") + mbox = MailboxStat(tmp_path) + extrafiles = mbox.get_extra_files() + assert len(extrafiles) == 3 + assert extrafiles[0].size == 1000 diff --git a/cmdeploy/src/cmdeploy/dovecot/expunge.cron.j2 b/cmdeploy/src/cmdeploy/dovecot/expunge.cron.j2 index 9eb27182..a9f10e8e 100644 --- a/cmdeploy/src/cmdeploy/dovecot/expunge.cron.j2 +++ b/cmdeploy/src/cmdeploy/dovecot/expunge.cron.j2 @@ -11,4 +11,6 @@ 2 0 * * * vmail find {{ config.mailboxes_dir }} -path '*/tmp/*' -mtime +{{ config.delete_mails_after }} -type f -delete 2 0 * * * vmail find {{ config.mailboxes_dir }} -path '*/.*/tmp/*' -mtime +{{ config.delete_mails_after }} -type f -delete 3 0 * * * vmail find {{ config.mailboxes_dir }} -name 'maildirsize' -type f -delete -4 0 * * * vmail /usr/local/lib/chatmaild/venv/bin/delete_inactive_users /usr/local/lib/chatmaild/chatmail.ini + +# ported +4 0 * * * vmail /usr/local/lib/chatmaild/venv/bin/expire /usr/local/lib/chatmaild/chatmail.ini {config.mailboxes_dir}