refactor and write tests for overall expiry/report runs

This commit is contained in:
holger krekel
2025-09-14 15:02:45 +02:00
parent 023116bc91
commit ed7a70ba31
3 changed files with 146 additions and 103 deletions

View File

@@ -11,8 +11,6 @@ from stat import S_ISREG
from chatmaild.config import read_config from chatmaild.config import read_config
# XXX maildirsize (used by dovecot quota) needs to be removed after removing files
class FileEntry: class FileEntry:
def __init__(self, relpath, mtime, size): def __init__(self, relpath, mtime, size):
@@ -48,14 +46,19 @@ class Stats:
self.mailboxes = [] self.mailboxes = []
self.maxnum = maxnum self.maxnum = maxnum
def iter_mailboxes(self): def iter_mailboxes(self, callback=None):
for mailbox in os.listdir(self.basedir)[: self.maxnum]: for name in os.listdir(self.basedir)[: self.maxnum]:
if "@" in mailbox: if "@" in name:
mailboxdir = joinpath(self.basedir, mailbox) mailboxdir = joinpath(self.basedir, name)
self.mailboxes.append(MailboxStat(mailboxdir)) mailbox = MailboxStat(mailboxdir)
self.mailboxes.append(mailbox)
if callback is not None:
callback(mailbox)
class MailboxStat: class MailboxStat:
last_login = None
def __init__(self, mailboxdir): def __init__(self, mailboxdir):
self.mailboxdir = mailboxdir = str(mailboxdir) self.mailboxdir = mailboxdir = str(mailboxdir)
# all detected messages in cur/new/tmp folders # all detected messages in cur/new/tmp folders
@@ -83,66 +86,76 @@ class MailboxStat:
st = os.stat(fpath) st = os.stat(fpath)
if S_ISREG(st.st_mode): if S_ISREG(st.st_mode):
self.extrafiles.append(FileEntry(name, st.st_mtime, st.st_size)) self.extrafiles.append(FileEntry(name, st.st_mtime, st.st_size))
if name == "password":
self.last_login = st.st_mtime
self.totalsize += st.st_size self.totalsize += st.st_size
self.extrafiles.sort(key=lambda x: -x.size) self.extrafiles.sort(key=lambda x: -x.size)
@property
def last_login(self):
for entry in self.extrafiles:
if entry.relpath == "password":
return entry.mtime
class Expiry:
def __init__(self, config, stat, dry, now):
self.config = config
self.dry = dry
self.now = now
def run_expire(config, basedir, now, dry=True, maxnum=None): def rmtree(self, path):
stat = Stats(basedir, maxnum=maxnum) if not self.dry:
stat.iter_mailboxes()
cutoff_date_without_login = now - int(config.delete_inactive_users_after) * 86400
cutoff_date_mails = now - int(config.delete_mails_after) * 86400
cutoff_date_large_mails = now - int(config.delete_large_after) * 86400
def rmtree(path):
if dry:
print("would remove mailbox", path) print("would remove mailbox", path)
else: else:
shutil.rmtree(path, ignore_errors=True) shutil.rmtree(path, ignore_errors=True)
def unlink(mailboxdir, message): def unlink(self, mailboxdir, relpath):
if dry: path = joinpath(mailboxdir, relpath)
relpath = os.path.basename(mailboxdir) + message.relpath if not self.dry:
print( for message in self.messages:
f"would remove {message.fmt_since(now)} {message.fmt_size()} {relpath}" if relpath == message.relpath:
) print(
f"would remove {message.fmt_since(self.now)} {message.fmt_size()} {path}"
)
break
else: else:
os.unlink(path) try:
os.unlink(path)
except FileNotFoundError:
pass # it's gone already, fine
def process_mailbox_stat(self, mbox):
cutoff_without_login = (
self.now - int(self.config.delete_inactive_users_after) * 86400
)
cutoff_mails = self.now - int(self.config.delete_mails_after) * 86400
cutoff_large_mails = self.now - int(self.config.delete_large_after) * 86400
for mbox in stat.mailboxes:
changed = False changed = False
if mbox.last_login and mbox.last_login < cutoff_date_without_login: if mbox.last_login and mbox.last_login < cutoff_without_login:
rmtree(mbox.mailboxdir) self.rmtree(mbox.mailboxdir)
continue return
for message in mbox.messages: for message in mbox.messages:
path = joinpath(mbox.mailboxdir, message.relpath) if message.mtime < cutoff_mails:
if message.mtime < cutoff_date_mails: self.unlink(mbox.mailboxdir, message.relpath)
unlink(mbox.mailboxdir, message) elif message.size > 200000 and message.mtime < cutoff_large_mails:
elif message.size > 200000 and message.mtime < cutoff_date_large_mails: self.unlink(mbox.mailboxdir, message.relpath)
unlink(mbox.mailboxdir, message)
else: else:
continue continue
changed = True changed = True
if changed and not dry: if changed:
p = joinpath(mbox.mailboxdir, "maildirsize") self.unlink(mbox.mailboxdir, "maildirsize")
try:
os.unlink(p)
except FileNotFoundError:
pass
def main(): def main(args=None):
cfgpath, basedir, maxnum = sys.argv[1:] if args is None:
args = sys.argv[1:]
else:
args = list(map(str, args))
cfgpath, basedir, maxnum = args
config = read_config(cfgpath) config = read_config(cfgpath)
now = datetime.utcnow().timestamp() now = datetime.utcnow().timestamp()
now = datetime(2025, 9, 9).timestamp() now = datetime(2025, 9, 9).timestamp()
run_expire(config, basedir, maxnum=int(maxnum), now=now)
stat = Stats(basedir, maxnum=int(maxnum))
exp = Expiry(config, stat, dry=True, now=now)
stat.iter_mailboxes(exp.process_mailbox_stat)
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -2,7 +2,6 @@ import os
import sys import sys
from datetime import datetime from datetime import datetime
from chatmaild.config import read_config
from chatmaild.expire import FileEntry, Stats, joinpath from chatmaild.expire import FileEntry, Stats, joinpath
DAYSECONDS = 24 * 60 * 60 DAYSECONDS = 24 * 60 * 60
@@ -18,6 +17,8 @@ def D(timestamp, now=datetime.utcnow().timestamp()):
def K(size): def K(size):
if size < 1000: if size < 1000:
return f"{size:5.0f}" return f"{size:5.0f}"
elif size < 10000:
return f"{size/1000:3.2f}K"
return f"{int(size/1000):5.0f}K" return f"{int(size/1000):5.0f}K"
@@ -43,26 +44,24 @@ class Report:
self.stats = stats self.stats = stats
self.now = now self.now = now
for mailbox in stats.mailboxes: def process_mailbox_stat(self, mailbox):
last_login = mailbox.last_login last_login = mailbox.last_login
if last_login: if last_login:
if os.path.basename(mailbox.mailboxdir)[:3] == "ci-": if os.path.basename(mailbox.mailboxdir)[:3] == "ci-":
self.ci_logins.append(last_login) self.ci_logins.append(last_login)
else: else:
self.user_logins.append(last_login) self.user_logins.append(last_login)
for entry in mailbox.messages: for entry in mailbox.messages:
new = FileEntry( new = FileEntry(
relpath=joinpath( relpath=joinpath(os.path.basename(mailbox.mailboxdir), entry.relpath),
os.path.basename(mailbox.mailboxdir), entry.relpath mtime=entry.mtime,
), size=entry.size,
mtime=entry.mtime, )
size=entry.size, self.messages.append(new)
) self.sum_all_messages += entry.size
self.messages.append(new)
self.sum_all_messages += entry.size
for entry in mailbox.extrafiles: for entry in mailbox.extrafiles:
self.sum_extra += entry.size self.sum_extra += entry.size
def dump_summary(self): def dump_summary(self):
reports = [] reports = []
@@ -131,19 +130,14 @@ class Report:
print(f"last {days:3} days: {K(active)} {p(active)}") print(f"last {days:3} days: {K(active)} {p(active)}")
def run_report(config, basedir, maxnum=None, now=None):
stats = Stats(basedir, maxnum=maxnum)
stats.iter_mailboxes()
rep = Report(stats, now=now)
rep.dump_summary()
def main(): def main():
cfgpath, basedir, maxnum = sys.argv[1:] basedir, maxnum = sys.argv[1:]
config = read_config(cfgpath)
now = datetime.utcnow().timestamp() now = datetime.utcnow().timestamp()
now = datetime(2025, 9, 9).timestamp() now = datetime(2025, 9, 9).timestamp()
run_report(config, basedir, maxnum=int(maxnum), now=now) stats = Stats(basedir, maxnum=int(maxnum))
rep = Report(stats, now=now)
stats.iter_mailboxes(rep.process_mailbox_stat)
rep.dump_summary()
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -1,6 +1,40 @@
import random import random
from datetime import datetime
from pathlib import Path
import pytest
from chatmaild.expire import FileEntry, MailboxStat from chatmaild.expire import FileEntry, MailboxStat
from chatmaild.expire import main as expiry_main
from chatmaild.fsreport import Report, Stats
# XXX maildirsize (used by dovecot quota) needs to be removed after removing files
@pytest.fixture
def mailboxdir1(tmp_path):
mailboxdir1 = tmp_path.joinpath("mailbox1@example.org")
mailboxdir1.mkdir()
password = mailboxdir1.joinpath("password")
password.write_text("xxx")
garbagedir = mailboxdir1.joinpath("garbagedir")
garbagedir.mkdir()
cur = mailboxdir1.joinpath("cur")
new = mailboxdir1.joinpath("new")
cur.mkdir()
msg_cur = cur.joinpath("msg1")
msg_cur.write_text("xxx")
new.mkdir()
msg_new = new.joinpath("msg2")
msg_new.write_text("xxx123")
return mailboxdir1
@pytest.fixture
def mbox1(mailboxdir1):
return MailboxStat(mailboxdir1)
def test_filentry_ordering(): def test_filentry_ordering():
@@ -11,36 +45,38 @@ def test_filentry_ordering():
assert l == sorted assert l == sorted
def test_stats_mailbox(tmp_path): def test_stats_mailbox(mbox1):
mailboxdir = tmp_path password = Path(mbox1.mailboxdir).joinpath("password")
password = mailboxdir.joinpath("password") assert mbox1.last_login == password.stat().st_mtime
password.write_text("xxx") assert len(mbox1.messages) == 2
garbagedir = mailboxdir.joinpath("garbagedir") msgs = list(mbox1.messages)
garbagedir.mkdir()
cur = mailboxdir.joinpath("cur")
new = mailboxdir.joinpath("new")
cur.mkdir()
msg_cur = cur.joinpath("msg1")
msg_cur.write_text("xxx")
new.mkdir()
msg_new = new.joinpath("msg2")
msg_new.write_text("xxx123")
mbox = MailboxStat(tmp_path)
assert mbox.last_login == password.stat().st_mtime
assert len(mbox.messages) == 2
msgs = list(mbox.messages)
assert len(msgs) == 2 assert len(msgs) == 2
assert msgs[0].size == 3 # cur assert msgs[0].size == 3 # cur
assert msgs[1].size == 6 # new assert msgs[1].size == 6 # new
extra = mailboxdir.joinpath("large") extra = Path(mbox1.mailboxdir).joinpath("large-extra")
extra.write_text("x" * 1000) extra.write_text("x" * 1000)
mailboxdir.joinpath("index-something").write_text("123") Path(mbox1.mailboxdir).joinpath("index-something").write_text("123")
mbox = MailboxStat(tmp_path) mbox2 = MailboxStat(mbox1.mailboxdir)
assert len(mbox.extrafiles) == 3 assert len(mbox2.extrafiles) == 3
assert mbox.extrafiles[0].size == 1000 assert mbox2.extrafiles[0].size == 1000
# cope well with mailbox dirs that have no password (for whatever reason)
Path(mbox1.mailboxdir).joinpath("password").unlink()
mbox3 = MailboxStat(mbox1.mailboxdir)
assert mbox3.last_login is None
def test_report(mbox1):
now = datetime.utcnow().timestamp()
mailboxes_dir = Path(mbox1.mailboxdir).parent
stats = Stats(str(mailboxes_dir), maxnum=None)
rep = Report(stats, now=now)
stats.iter_mailboxes(rep.process_mailbox_stat)
rep.dump_summary()
def test_expiry(example_config, mbox1):
args = example_config._inipath, mbox1.mailboxdir, 10000
expiry_main(args)