move delete_inactive_users to new implementation

This commit is contained in:
holger krekel
2025-09-11 14:28:06 +02:00
parent 4cfe228a1f
commit a4152140ca
6 changed files with 239 additions and 35 deletions

View File

@@ -27,7 +27,7 @@ chatmail-metadata = "chatmaild.metadata:main"
filtermail = "chatmaild.filtermail:main"
echobot = "chatmaild.echo:main"
chatmail-metrics = "chatmaild.metrics:main"
delete_inactive_users = "chatmaild.delete_inactive_users:main"
expire = "chatmaild.expire:main"
lastlogin = "chatmaild.lastlogin:main"
turnserver = "chatmaild.turnserver:main"

View File

@@ -1,31 +0,0 @@
"""
Remove inactive users
"""
import os
import shutil
import sys
import time
from .config import read_config
def delete_inactive_users(config):
cutoff_date = time.time() - config.delete_inactive_users_after * 86400
for addr in os.listdir(config.mailboxes_dir):
try:
user = config.get_user(addr)
except ValueError:
continue
read_timestamp = user.get_last_login_timestamp()
if read_timestamp and read_timestamp < cutoff_date:
path = config.mailboxes_dir.joinpath(addr)
assert path == user.maildir
shutil.rmtree(path, ignore_errors=True)
def main():
(cfgpath,) = sys.argv[1:]
config = read_config(cfgpath)
delete_inactive_users(config)

View File

@@ -0,0 +1,194 @@
import sys
import os
import shutil
import logging
import time
from stat import S_ISREG
from pathlib import Path
from datetime import datetime
from collections import namedtuple
# delete already seen big mails after 7 days, in the INBOX
# 2 0 * * * vmail find {{ config.mailboxes_dir }} -path '*/cur/*' -mtime +{{ config.delete_large_after }} -size +200k -type f -delete
# # delete all mails after {{ config.delete_mails_after }} days, in the Inbox
# 2 0 * * * vmail find {{ config.mailboxes_dir }} -path '*/cur/*' -mtime +{{ config.delete_mails_after }} -type f -delete
## or in any IMAP subfolder
# 2 0 * * * vmail find {{ config.mailboxes_dir }} -path '*/.*/cur/*' -mtime +{{ config.delete_mails_after }} -type f -delete
## even if they are unseen
# 2 0 * * * vmail find {{ config.mailboxes_dir }} -path '*/new/*' -mtime +{{ config.delete_mails_after }} -type f -delete
# 2 0 * * * vmail find {{ config.mailboxes_dir }} -path '*/.*/new/*' -mtime +{{ config.delete_mails_after }} -type f -delete
## or only temporary (but then they shouldn't be around after {{ config.delete_mails_after }} days anyway).
# 2 0 * * * vmail find {{ config.mailboxes_dir }} -path '*/tmp/*' -mtime +{{ config.delete_mails_after }} -type f -delete
# 2 0 * * * vmail find {{ config.mailboxes_dir }} -path '*/.*/tmp/*' -mtime +{{ config.delete_mails_after }} -type f -delete
# 3 0 * * * vmail find {{ config.mailboxes_dir }} -name 'maildirsize' -type f -delete
FileEntry = namedtuple("FileEntry", ["relpath", "mtime", "size"])
dayseconds = 24 * 60 * 60
monthseconds = dayseconds * 30
def joinpath(name, extra):
return name + "/" + extra
def D(timestamp, now=datetime.utcnow().timestamp()):
diff_seconds = int(now) - int(timestamp)
# assert diff_seconds >= 0, (int(timestamp), int(now))
return f"{int(diff_seconds / dayseconds):2.0f}d"
def K(size):
return f"{int(size/1000):6.0f}K"
def M(size):
return f"{int(size/1000000):6.0f}M"
now = datetime.utcnow().timestamp()
class Stats:
def __init__(self, basedir):
self.basedir = str(basedir)
self.mailboxes = []
def iter_mailboxes(self, maxnum=None):
for mailbox in os.listdir(self.basedir)[:maxnum]:
if "@" in mailbox:
mailboxdir = joinpath(self.basedir, mailbox)
self.mailboxes.append(MailboxStat(mailboxdir))
class MailboxStat:
def __init__(self, mailboxdir):
self.mailboxdir = mailboxdir = str(mailboxdir)
self.messages = []
self.extrafiles = []
for name in os.listdir(mailboxdir):
fpath = joinpath(mailboxdir, name)
if name in ("cur", "new", "tmp"):
for msg_name in os.listdir(fpath):
msg_path = joinpath(fpath, msg_name)
st = os.stat(msg_path)
relpath = joinpath(name, msg_name)
self.messages.append(
FileEntry(relpath, mtime=st.st_mtime, size=st.st_size)
)
else:
st = os.stat(fpath)
if S_ISREG(st.st_mode):
self.extrafiles.append(FileEntry(name, st.st_mtime, st.st_size))
self.extrafiles.sort(key=lambda x: x.size, reverse=True)
@property
def last_login(self):
for entry in self.extrafiles:
if entry.relpath == "password":
return entry.mtime
def get_messages(self, prefix=""):
l = []
for entry in self.messages:
if entry.relpath.startswith(prefix):
l.append(entry)
return l
def get_extra_files(self):
return list(self.extrafiles)
class XXXStats:
def __init__(self):
self.sum_extra = 0
self.sum_all_messages = 0
self.logins = []
self.messages = []
def analyze(self, statscache):
print("start")
for mailbox in statscache.cache:
mbox_cache = statscache.cache[mailbox]
if "password" not in mbox_cache:
continue
self.logins.append(mbox_cache["password"][0])
for relpath, (mtime, size) in mbox_cache.items():
if relpath[:4] in ("cur/", "new/", "tmp/"):
self.sum_all_messages += size
entry = FileEntry(relpath=relpath, mtime=mtime, size=size)
self.messages.append(entry)
else:
self.sum_extra += size
def dump_summary(self):
print(f"size of everything: {M(self.sum_extra + self.sum_all_messages)}")
print(f"size all messages: {M(self.sum_all_messages)}")
percent = self.sum_extra / (self.sum_extra + self.sum_all_messages) * 100
print(f"size extra files: {M(self.sum_extra)} ({percent:.2f}%)")
for size in (100000, 200000, 500000, 1000000, 5000000):
all_of_size = sum(
x.size
for x in self.messages
if x.size > size and x.relpath.startswith("cur")
)
percent = all_of_size / self.sum_all_messages * 100
print(f"size seen {K(size)} messages: {M(all_of_size)} ({percent:.2f}%)")
for size in (100000, 200000, 500000, 1000000, 5000000):
all_of_size = sum(
x.size
for x in self.messages
if x.size > size and x.mtime < now - 2 * dayseconds
)
percent = all_of_size / self.sum_all_messages * 100
print(
f"size 2day-old {K(size)} messages: {M(all_of_size)} ({percent:.2f}%)"
)
for size in (100000, 200000, 500000, 1000000, 5000000):
all_of_size = sum(
x.size
for x in self.messages
if x.size > size
and x.relpath.startswith("cur")
and x.mtime < now - 7 * dayseconds
)
percent = all_of_size / self.sum_all_messages * 100
print(
f"size seen 7-day old {K(size)} messages: {M(all_of_size)} ({percent:.2f}%)"
)
print()
num_logins = len(self.logins)
monthly_active = len([x for x in self.logins if x >= now - monthseconds])
daily_active = len([x for x in self.logins if x >= now - dayseconds])
stale = num_logins - monthly_active
def p(num):
return f"({num/num_logins * 100:.2f}%)"
print(f"all logins: {K(num_logins)}")
print(f"monthly active: {K(monthly_active)} {p(monthly_active)}")
print(f">1m old logins: {K(stale)} {p(stale)}")
print(f"daily active: {K(daily_active)} {p(daily_active)}")
def run_expire(config, basedir):
stat = Stats(basedir)
stat.iter_mailboxes()
cutoff_date = time.time() - config.delete_inactive_users_after * 86400
num = 0
for mbox in stat.mailboxes:
if mbox.last_login < cutoff_date:
logging.info("removing outdated mailbox %s", mbox.mailboxdir)
shutil.rmtree(mbox.mailboxdir, ignore_errors=True)
num += 1
print(f"expired {num} mailboxes")
if __name__ == "__main__":
cfgpath, basedir = sys.argv[1:]
config = read_config(cfgpath)
run_expire(config, basedir)

View File

@@ -1,6 +1,6 @@
import time
from chatmaild.delete_inactive_users import delete_inactive_users
from chatmaild.expire import run_expire
from chatmaild.doveauth import AuthDictProxy
@@ -45,7 +45,7 @@ def test_delete_inactive_users(example_config):
for addr in to_remove:
assert example_config.get_user(addr).maildir.exists()
delete_inactive_users(example_config)
run_expire(example_config, example_config.mailboxes_dir)
for p in example_config.mailboxes_dir.iterdir():
assert not p.name.startswith("old")

View File

@@ -0,0 +1,39 @@
from chatmaild.expire import MailboxStat
def test_stats_mailbox(tmp_path):
mailboxdir = tmp_path
password = mailboxdir.joinpath("password")
password.write_text("xxx")
garbagedir = mailboxdir.joinpath("garbagedir")
garbagedir.mkdir()
cur = mailboxdir.joinpath("cur")
new = mailboxdir.joinpath("new")
cur.mkdir()
msg_cur = cur.joinpath("msg1")
msg_cur.write_text("xxx")
new.mkdir()
msg_new = new.joinpath("msg2")
msg_new.write_text("xxx123")
mbox = MailboxStat(tmp_path)
assert mbox.last_login == password.stat().st_mtime
assert len(mbox.messages) == 2
seen = mbox.get_messages("cur")
assert len(seen) == 1
assert seen[0].size == 3
new = mbox.get_messages("new")
assert len(new) == 1
assert new[0].size == 6
extra = mailboxdir.joinpath("large")
extra.write_text("x" * 1000)
mailboxdir.joinpath("index-something").write_text("123")
mbox = MailboxStat(tmp_path)
extrafiles = mbox.get_extra_files()
assert len(extrafiles) == 3
assert extrafiles[0].size == 1000

View File

@@ -11,4 +11,6 @@
2 0 * * * vmail find {{ config.mailboxes_dir }} -path '*/tmp/*' -mtime +{{ config.delete_mails_after }} -type f -delete
2 0 * * * vmail find {{ config.mailboxes_dir }} -path '*/.*/tmp/*' -mtime +{{ config.delete_mails_after }} -type f -delete
3 0 * * * vmail find {{ config.mailboxes_dir }} -name 'maildirsize' -type f -delete
4 0 * * * vmail /usr/local/lib/chatmaild/venv/bin/delete_inactive_users /usr/local/lib/chatmaild/chatmail.ini
# ported
4 0 * * * vmail /usr/local/lib/chatmaild/venv/bin/expire /usr/local/lib/chatmaild/chatmail.ini {config.mailboxes_dir}