Replace expiry "find" commands with a new chatmaild.expire python module + a reporting one

This commit is contained in:
holger krekel
2025-09-11 14:28:06 +02:00
committed by link2xt
parent 4cfe228a1f
commit 9ddd5d8b2b
14 changed files with 558 additions and 67 deletions

View File

@@ -1,31 +0,0 @@
"""
Remove inactive users
"""
import os
import shutil
import sys
import time
from .config import read_config
def delete_inactive_users(config):
cutoff_date = time.time() - config.delete_inactive_users_after * 86400
for addr in os.listdir(config.mailboxes_dir):
try:
user = config.get_user(addr)
except ValueError:
continue
read_timestamp = user.get_last_login_timestamp()
if read_timestamp and read_timestamp < cutoff_date:
path = config.mailboxes_dir.joinpath(addr)
assert path == user.maildir
shutil.rmtree(path, ignore_errors=True)
def main():
(cfgpath,) = sys.argv[1:]
config = read_config(cfgpath)
delete_inactive_users(config)

View File

@@ -0,0 +1,182 @@
"""
Expire old messages and addresses.
"""
import os
import shutil
import sys
import time
from argparse import ArgumentParser
from collections import namedtuple
from datetime import datetime
from stat import S_ISREG
from chatmaild.config import read_config
FileEntry = namedtuple("FileEntry", ("relpath", "mtime", "size"))
def iter_mailboxes(basedir, maxnum):
if not os.path.exists(basedir):
print_info(f"no mailboxes found at: {basedir}")
return
for name in os.listdir(basedir)[:maxnum]:
if "@" in name:
yield MailboxStat(basedir + "/" + name)
class MailboxStat:
last_login = None
def __init__(self, basedir):
self.basedir = str(basedir)
# all detected messages in cur/new/tmp folders
self.messages = []
# all detected files in mailbox top dir
self.extrafiles = []
# scan all relevant files (without recursion)
old_cwd = os.getcwd()
os.chdir(self.basedir)
for name in os.listdir("."):
if name in ("cur", "new", "tmp"):
for msg_name in os.listdir(name):
relpath = name + "/" + msg_name
st = os.stat(relpath)
self.messages.append(FileEntry(relpath, st.st_mtime, st.st_size))
else:
st = os.stat(name)
if S_ISREG(st.st_mode):
self.extrafiles.append(FileEntry(name, st.st_mtime, st.st_size))
if name == "password":
self.last_login = st.st_mtime
self.extrafiles.sort(key=lambda x: -x.size)
os.chdir(old_cwd)
def print_info(msg):
print(msg, file=sys.stderr)
class Expiry:
def __init__(self, config, dry, now, verbose):
self.config = config
self.dry = dry
self.now = now
self.verbose = verbose
self.del_mboxes = 0
self.all_mboxes = 0
self.del_files = 0
self.all_files = 0
self.start = time.time()
def remove_mailbox(self, mboxdir):
if self.verbose:
print_info(f"removing {mboxdir}")
if not self.dry:
shutil.rmtree(mboxdir)
self.del_mboxes += 1
def remove_file(self, path):
if self.verbose:
print_info(f"removing {path}")
if not self.dry:
try:
os.unlink(path)
except FileNotFoundError:
print_info(f"file not found/vanished {path}")
self.del_files += 1
def process_mailbox_stat(self, mbox):
cutoff_without_login = (
self.now - int(self.config.delete_inactive_users_after) * 86400
)
cutoff_mails = self.now - int(self.config.delete_mails_after) * 86400
cutoff_large_mails = self.now - int(self.config.delete_large_after) * 86400
self.all_mboxes += 1
changed = False
if mbox.last_login and mbox.last_login < cutoff_without_login:
self.remove_mailbox(mbox.basedir)
return
# all to-be-removed files are relative to the mailbox basedir
os.chdir(mbox.basedir)
mboxname = os.path.basename(mbox.basedir)
if self.verbose:
print_info(f"checking for mailbox messages in: {mboxname}")
self.all_files += len(mbox.messages)
for message in mbox.messages:
if message.mtime < cutoff_mails:
self.remove_file(message.relpath)
elif message.size > 200000 and message.mtime < cutoff_large_mails:
# we only remove noticed large files (not unnoticed ones in new/)
if message.relpath.startswith("cur/"):
self.remove_file(message.relpath)
else:
continue
changed = True
if changed:
self.remove_file("maildirsize")
def get_summary(self):
return (
f"Removed {self.del_mboxes} out of {self.all_mboxes} mailboxes "
f"and {self.del_files} out of {self.all_files} files in existing mailboxes "
f"in {time.time() - self.start:2.2f} seconds"
)
def main(args=None):
"""Expire mailboxes and messages according to chatmail config"""
parser = ArgumentParser(description=main.__doc__)
ini = "/usr/local/lib/chatmaild/chatmail.ini"
parser.add_argument(
"chatmail_ini",
action="store",
nargs="?",
help=f"path pointing to chatmail.ini file, default: {ini}",
default=ini,
)
parser.add_argument(
"--days", action="store", help="assume date to be days older than now"
)
parser.add_argument(
"--maxnum",
default=None,
action="store",
help="maximum number of mailboxes to iterate on",
)
parser.add_argument(
"-v",
dest="verbose",
action="store_true",
help="print out removed files and mailboxes",
)
parser.add_argument(
"--remove",
dest="remove",
action="store_true",
help="actually remove all expired files and dirs",
)
args = parser.parse_args(args)
config = read_config(args.chatmail_ini)
now = datetime.utcnow().timestamp()
if args.days:
now = now - 86400 * int(args.days)
maxnum = int(args.maxnum) if args.maxnum else None
exp = Expiry(config, dry=not args.remove, now=now, verbose=args.verbose)
for mailbox in iter_mailboxes(str(config.mailboxes_dir), maxnum=maxnum):
exp.process_mailbox_stat(mailbox)
print(exp.get_summary())
if __name__ == "__main__":
main(sys.argv[1:])

View File

@@ -0,0 +1,168 @@
"""
command line tool to analyze mailbox message storage
example invocation:
python -m chatmaild.fsreport /path/to/chatmail.ini
to show storage summaries for all "cur" folders
python -m chatmaild.fsreport /path/to/chatmail.ini --mdir cur
to show storage summaries only for first 1000 mailboxes
python -m chatmaild.fsreport /path/to/chatmail.ini --maxnum 1000
"""
import os
from argparse import ArgumentParser
from datetime import datetime
from chatmaild.config import read_config
from chatmaild.expire import iter_mailboxes
DAYSECONDS = 24 * 60 * 60
MONTHSECONDS = DAYSECONDS * 30
def HSize(size: int):
"""Format a size integer as a Human-readable string Kilobyte, Megabyte or Gigabyte"""
if size < 10000:
return f"{size / 1000:5.2f}K"
if size < 1000 * 1000:
return f"{size / 1000:5.0f}K"
if size < 1000 * 1000 * 1000:
return f"{int(size / 1000000):5.0f}M"
return f"{size / 1000000000:5.2f}G"
class Report:
def __init__(self, now, min_login_age, mdir):
self.size_extra = 0
self.size_messages = 0
self.now = now
self.min_login_age = min_login_age
self.mdir = mdir
self.num_ci_logins = self.num_all_logins = 0
self.login_buckets = {x: 0 for x in (1, 10, 30, 40, 80, 100, 150)}
self.message_buckets = {x: 0 for x in (0, 160000, 500000, 2000000)}
def process_mailbox_stat(self, mailbox):
# categorize login times
last_login = mailbox.last_login
if last_login:
self.num_all_logins += 1
if os.path.basename(mailbox.basedir)[:3] == "ci-":
self.num_ci_logins += 1
else:
for days in self.login_buckets:
if last_login >= self.now - days * DAYSECONDS:
self.login_buckets[days] += 1
cutoff_login_date = self.now - self.min_login_age * DAYSECONDS
if last_login and last_login <= cutoff_login_date:
# categorize message sizes
for size in self.message_buckets:
for msg in mailbox.messages:
if msg.size >= size:
if self.mdir and not msg.relpath.startswith(self.mdir):
continue
self.message_buckets[size] += msg.size
self.size_messages += sum(entry.size for entry in mailbox.messages)
self.size_extra += sum(entry.size for entry in mailbox.extrafiles)
def dump_summary(self):
all_messages = self.size_messages
print()
print("## Mailbox storage use analysis")
print(f"Mailbox data total size: {HSize(self.size_extra + all_messages)}")
print(f"Messages total size : {HSize(all_messages)}")
try:
percent = self.size_extra / (self.size_extra + all_messages) * 100
except ZeroDivisionError:
percent = 100
print(f"Extra files : {HSize(self.size_extra)} ({percent:.2f}%)")
print()
if self.min_login_age:
print(f"### Message storage for {self.min_login_age} days old logins")
pref = f"[{self.mdir}] " if self.mdir else ""
for minsize, sumsize in self.message_buckets.items():
percent = (sumsize / all_messages * 100) if all_messages else 0
print(
f"{pref}larger than {HSize(minsize)}: {HSize(sumsize)} ({percent:.2f}%)"
)
user_logins = self.num_all_logins - self.num_ci_logins
def p(num):
return f"({num / user_logins * 100:2.2f}%)" if user_logins else "100%"
print()
print(f"## Login stats, from date reference {datetime.fromtimestamp(self.now)}")
print(f"all: {HSize(self.num_all_logins)}")
print(f"non-ci: {HSize(user_logins)}")
print(f"ci: {HSize(self.num_ci_logins)}")
for days, active in self.login_buckets.items():
print(f"last {days:3} days: {HSize(active)} {p(active)}")
def main(args=None):
"""Report about filesystem storage usage of all mailboxes and messages"""
parser = ArgumentParser(description=main.__doc__)
ini = "/usr/local/lib/chatmaild/chatmail.ini"
parser.add_argument(
"chatmail_ini",
action="store",
nargs="?",
help=f"path pointing to chatmail.ini file, default: {ini}",
default=ini,
)
parser.add_argument(
"--days",
default=0,
action="store",
help="assume date to be days older than now",
)
parser.add_argument(
"--min-login-age",
default=0,
dest="min_login_age",
action="store",
help="only sum up message size if last login is at least min-login-age days old",
)
parser.add_argument(
"--mdir",
action="store",
help="only consider 'cur' or 'new' or 'tmp' messages for summary",
)
parser.add_argument(
"--maxnum",
default=None,
action="store",
help="maximum number of mailboxes to iterate on",
)
args = parser.parse_args(args)
config = read_config(args.chatmail_ini)
now = datetime.utcnow().timestamp()
if args.days:
now = now - 86400 * int(args.days)
maxnum = int(args.maxnum) if args.maxnum else None
rep = Report(now=now, min_login_age=int(args.min_login_age), mdir=args.mdir)
for mbox in iter_mailboxes(str(config.mailboxes_dir), maxnum=maxnum):
rep.process_mailbox_stat(mbox)
rep.dump_summary()
if __name__ == "__main__":
main()

View File

@@ -1,7 +1,7 @@
import time
from chatmaild.delete_inactive_users import delete_inactive_users
from chatmaild.doveauth import AuthDictProxy
from chatmaild.expire import main as main_expire
def test_login_timestamps(example_config):
@@ -45,7 +45,12 @@ def test_delete_inactive_users(example_config):
for addr in to_remove:
assert example_config.get_user(addr).maildir.exists()
delete_inactive_users(example_config)
main_expire(
args=[
"--remove",
str(example_config._inipath),
]
)
for p in example_config.mailboxes_dir.iterdir():
assert not p.name.startswith("old")

View File

@@ -0,0 +1,129 @@
import os
import random
from datetime import datetime
from fnmatch import fnmatch
from pathlib import Path
import pytest
from chatmaild.expire import FileEntry, MailboxStat, iter_mailboxes
from chatmaild.expire import main as expiry_main
from chatmaild.fsreport import main as report_main
def fill_mbox(basedir):
basedir1 = basedir.joinpath("mailbox1@example.org")
basedir1.mkdir()
password = basedir1.joinpath("password")
password.write_text("xxx")
basedir1.joinpath("maildirsize").write_text("xxx")
garbagedir = basedir1.joinpath("garbagedir")
garbagedir.mkdir()
create_new_messages(basedir1, ["cur/msg1"], size=500)
create_new_messages(basedir1, ["new/msg2"], size=600)
return basedir1
def create_new_messages(basedir, relpaths, size=1000, days=0):
now = datetime.utcnow().timestamp()
for relpath in relpaths:
msg_path = Path(basedir).joinpath(relpath)
msg_path.parent.mkdir(parents=True, exist_ok=True)
msg_path.write_text("x" * size)
# accessed now, modified N days ago
os.utime(msg_path, (now, now - days * 86400))
@pytest.fixture
def mbox1(example_config):
basedir1 = fill_mbox(example_config.mailboxes_dir)
return MailboxStat(basedir1)
def test_filentry_ordering(tmp_path):
l = [FileEntry(f"x{i}", size=i + 10, mtime=1000 - i) for i in range(10)]
sorted = list(l)
random.shuffle(l)
l.sort(key=lambda x: x.size)
assert l == sorted
def test_no_mailbxoes(tmp_path, capsys):
assert [] == list(iter_mailboxes(str(tmp_path.joinpath("notexists")), maxnum=10))
out, err = capsys.readouterr()
assert "no mailboxes" in err
def test_stats_mailbox(mbox1):
password = Path(mbox1.basedir).joinpath("password")
assert mbox1.last_login == password.stat().st_mtime
assert len(mbox1.messages) == 2
msgs = list(sorted(mbox1.messages, key=lambda x: x.size))
assert len(msgs) == 2
assert msgs[0].size == 500 # cur
assert msgs[1].size == 600 # new
create_new_messages(mbox1.basedir, ["large-extra"], size=1000)
create_new_messages(mbox1.basedir, ["index-something"], size=3)
mbox2 = MailboxStat(mbox1.basedir)
assert len(mbox2.extrafiles) == 4
assert mbox2.extrafiles[0].size == 1000
# cope well with mailbox dirs that have no password (for whatever reason)
Path(mbox1.basedir).joinpath("password").unlink()
mbox3 = MailboxStat(mbox1.basedir)
assert mbox3.last_login is None
def test_report_no_mailboxes(example_config):
args = (str(example_config._inipath),)
report_main(args)
def test_report(mbox1, example_config):
args = (str(example_config._inipath),)
report_main(args)
args = list(args) + "--days 1".split()
report_main(args)
args = list(args) + "--min-login-age 1".split()
report_main(args)
args = list(args) + "--mdir cur".split()
report_main(args)
def test_expiry_cli_basic(example_config, mbox1):
args = (str(example_config._inipath),)
expiry_main(args)
def test_expiry_cli_old_files(capsys, example_config, mbox1):
relpaths_old = ["cur/msg_old1", "cur/msg_old1"]
cutoff_days = int(example_config.delete_mails_after) + 1
create_new_messages(mbox1.basedir, relpaths_old, size=1000, days=cutoff_days)
relpaths_large = ["cur/msg_old_large1", "new/msg_old_large2"]
cutoff_days = int(example_config.delete_large_after) + 1
create_new_messages(
mbox1.basedir, relpaths_large, size=1000 * 300, days=cutoff_days
)
create_new_messages(mbox1.basedir, ["cur/shouldstay"], size=1000 * 300, days=1)
args = str(example_config._inipath), "--remove", "-v"
expiry_main(args)
out, err = capsys.readouterr()
allpaths = relpaths_old + relpaths_large + ["maildirsize"]
for path in allpaths:
for line in err.split("\n"):
if fnmatch(line, f"removing*{path}"):
break
else:
if path != "new/msg_old_large2":
pytest.fail(f"failed to remove {path}\n{err}")
assert "shouldstay" not in err