mirror of
https://github.com/chatmail/relay.git
synced 2026-05-12 17:14:36 +00:00
add summary reporting, rework expiry logic
This commit is contained in:
@@ -28,6 +28,7 @@ filtermail = "chatmaild.filtermail:main"
|
||||
echobot = "chatmaild.echo:main"
|
||||
chatmail-metrics = "chatmaild.metrics:main"
|
||||
expire = "chatmaild.expire:main"
|
||||
fsreport = "chatmaild.fsreport:main"
|
||||
lastlogin = "chatmaild.lastlogin:main"
|
||||
turnserver = "chatmaild.turnserver:main"
|
||||
|
||||
|
||||
@@ -1,42 +1,47 @@
|
||||
"""
|
||||
Expire old messages and addresses.
|
||||
|
||||
"""
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
import time
|
||||
from collections import namedtuple
|
||||
from datetime import datetime
|
||||
from stat import S_ISREG
|
||||
|
||||
from chatmaild.config import read_config
|
||||
|
||||
# delete already seen big mails after 7 days, in the INBOX
|
||||
# 2 0 * * * vmail find {{ config.mailboxes_dir }} -path '*/cur/*' -mtime +{{ config.delete_large_after }} -size +200k -type f -delete
|
||||
# # delete all mails after {{ config.delete_mails_after }} days, in the Inbox
|
||||
# 3 0 * * * vmail find {{ config.mailboxes_dir }} -name 'maildirsize' -type f -delete
|
||||
# XXX maildirsize (used by dovecot quota) needs to be removed after removing files
|
||||
|
||||
|
||||
FileEntry = namedtuple("FileEntry", ["relpath", "mtime", "size"])
|
||||
dayseconds = 24 * 60 * 60
|
||||
monthseconds = dayseconds * 30
|
||||
class FileEntry:
|
||||
def __init__(self, relpath, mtime, size):
|
||||
self.relpath = relpath
|
||||
self.mtime = mtime
|
||||
self.size = size
|
||||
|
||||
def __repr__(self):
|
||||
return f"<FileEntry size={self.size} '{self.relpath}'>"
|
||||
|
||||
def fmt_size(self):
|
||||
return f"{int(self.size/1000):5.0f}K"
|
||||
|
||||
def fmt_since(self, now):
|
||||
diff_seconds = int(now) - int(self.mtime)
|
||||
return f"{int(diff_seconds / 86400):2.0f}d"
|
||||
|
||||
def __eq__(self, other):
|
||||
return (
|
||||
self.relpath == other.relpath
|
||||
and self.size == other.size
|
||||
and self.mtime == other.mtime
|
||||
)
|
||||
|
||||
|
||||
def joinpath(name, extra):
|
||||
return name + "/" + extra
|
||||
|
||||
|
||||
def D(timestamp, now=datetime.utcnow().timestamp()):
|
||||
diff_seconds = int(now) - int(timestamp)
|
||||
# assert diff_seconds >= 0, (int(timestamp), int(now))
|
||||
return f"{int(diff_seconds / dayseconds):2.0f}d"
|
||||
|
||||
|
||||
def K(size):
|
||||
return f"{int(size/1000):6.0f}K"
|
||||
|
||||
|
||||
def M(size):
|
||||
return f"{int(size/1000000):6.0f}M"
|
||||
|
||||
|
||||
class Stats:
|
||||
def __init__(self, basedir, maxnum=None):
|
||||
self.basedir = str(basedir)
|
||||
@@ -53,9 +58,16 @@ class Stats:
|
||||
class MailboxStat:
|
||||
def __init__(self, mailboxdir):
|
||||
self.mailboxdir = mailboxdir = str(mailboxdir)
|
||||
# all detected messages in cur/new/tmp folders
|
||||
self.messages = []
|
||||
|
||||
# all detected files in mailbox top dir
|
||||
self.extrafiles = []
|
||||
|
||||
# total size of all detected files
|
||||
self.totalsize = 0
|
||||
|
||||
# scan all relevant files (without recursion)
|
||||
for name in os.listdir(mailboxdir):
|
||||
fpath = joinpath(mailboxdir, name)
|
||||
if name in ("cur", "new", "tmp"):
|
||||
@@ -66,11 +78,13 @@ class MailboxStat:
|
||||
self.messages.append(
|
||||
FileEntry(relpath, mtime=st.st_mtime, size=st.st_size)
|
||||
)
|
||||
self.totalsize += st.st_size
|
||||
else:
|
||||
st = os.stat(fpath)
|
||||
if S_ISREG(st.st_mode):
|
||||
self.extrafiles.append(FileEntry(name, st.st_mtime, st.st_size))
|
||||
self.extrafiles.sort(key=lambda x: x.size, reverse=True)
|
||||
self.totalsize += st.st_size
|
||||
self.extrafiles.sort(key=lambda x: -x.size)
|
||||
|
||||
@property
|
||||
def last_login(self):
|
||||
@@ -78,101 +92,8 @@ class MailboxStat:
|
||||
if entry.relpath == "password":
|
||||
return entry.mtime
|
||||
|
||||
def get_messages(self, prefix=""):
|
||||
l = []
|
||||
for entry in self.messages:
|
||||
if entry.relpath.startswith(prefix):
|
||||
l.append(entry)
|
||||
return l
|
||||
|
||||
def get_extra_files(self):
|
||||
return list(self.extrafiles)
|
||||
|
||||
def get_file_entry(self, name):
|
||||
for entry in self.extrafiles:
|
||||
if name == entry.relapth:
|
||||
return entry
|
||||
|
||||
|
||||
class XXXStats:
|
||||
def __init__(self):
|
||||
self.sum_extra = 0
|
||||
self.sum_all_messages = 0
|
||||
self.logins = []
|
||||
self.messages = []
|
||||
|
||||
def analyze(self, statscache):
|
||||
print("start")
|
||||
for mailbox in statscache.cache:
|
||||
mbox_cache = statscache.cache[mailbox]
|
||||
if "password" not in mbox_cache:
|
||||
continue
|
||||
self.logins.append(mbox_cache["password"][0])
|
||||
for relpath, (mtime, size) in mbox_cache.items():
|
||||
if relpath[:4] in ("cur/", "new/", "tmp/"):
|
||||
self.sum_all_messages += size
|
||||
entry = FileEntry(relpath=relpath, mtime=mtime, size=size)
|
||||
self.messages.append(entry)
|
||||
else:
|
||||
self.sum_extra += size
|
||||
|
||||
def dump_summary(self):
|
||||
now = datetime.utcnow().timestamp()
|
||||
|
||||
print(f"size of everything: {M(self.sum_extra + self.sum_all_messages)}")
|
||||
print(f"size all messages: {M(self.sum_all_messages)}")
|
||||
percent = self.sum_extra / (self.sum_extra + self.sum_all_messages) * 100
|
||||
print(f"size extra files: {M(self.sum_extra)} ({percent:.2f}%)")
|
||||
for size in (100000, 200000, 500000, 1000000, 5000000):
|
||||
all_of_size = sum(
|
||||
x.size
|
||||
for x in self.messages
|
||||
if x.size > size and x.relpath.startswith("cur")
|
||||
)
|
||||
percent = all_of_size / self.sum_all_messages * 100
|
||||
print(f"size seen {K(size)} messages: {M(all_of_size)} ({percent:.2f}%)")
|
||||
for size in (100000, 200000, 500000, 1000000, 5000000):
|
||||
all_of_size = sum(
|
||||
x.size
|
||||
for x in self.messages
|
||||
if x.size > size and x.mtime < now - 2 * dayseconds
|
||||
)
|
||||
percent = all_of_size / self.sum_all_messages * 100
|
||||
print(
|
||||
f"size 2day-old {K(size)} messages: {M(all_of_size)} ({percent:.2f}%)"
|
||||
)
|
||||
for size in (100000, 200000, 500000, 1000000, 5000000):
|
||||
all_of_size = sum(
|
||||
x.size
|
||||
for x in self.messages
|
||||
if x.size > size
|
||||
and x.relpath.startswith("cur")
|
||||
and x.mtime < now - 7 * dayseconds
|
||||
)
|
||||
percent = all_of_size / self.sum_all_messages * 100
|
||||
print(
|
||||
f"size seen 7-day old {K(size)} messages: {M(all_of_size)} ({percent:.2f}%)"
|
||||
)
|
||||
|
||||
print()
|
||||
|
||||
num_logins = len(self.logins)
|
||||
monthly_active = len([x for x in self.logins if x >= now - monthseconds])
|
||||
daily_active = len([x for x in self.logins if x >= now - dayseconds])
|
||||
stale = num_logins - monthly_active
|
||||
|
||||
def p(num):
|
||||
return f"({num/num_logins * 100:.2f}%)"
|
||||
|
||||
print(f"all logins: {K(num_logins)}")
|
||||
print(f"monthly active: {K(monthly_active)} {p(monthly_active)}")
|
||||
print(f">1m old logins: {K(stale)} {p(stale)}")
|
||||
print(f"daily active: {K(daily_active)} {p(daily_active)}")
|
||||
|
||||
|
||||
def run_expire(config, basedir, dry=False, maxnum=None):
|
||||
now = time.time()
|
||||
|
||||
def run_expire(config, basedir, now, dry=True, maxnum=None):
|
||||
stat = Stats(basedir, maxnum=maxnum)
|
||||
stat.iter_mailboxes()
|
||||
cutoff_date_without_login = now - int(config.delete_inactive_users_after) * 86400
|
||||
@@ -188,7 +109,9 @@ def run_expire(config, basedir, dry=False, maxnum=None):
|
||||
def unlink(mailboxdir, message):
|
||||
if dry:
|
||||
relpath = os.path.basename(mailboxdir) + message.relpath
|
||||
print(f"would remove {D(message.mtime)} {K(message.size)} {relpath}")
|
||||
print(
|
||||
f"would remove {message.fmt_since(now)} {message.fmt_size()} {relpath}"
|
||||
)
|
||||
else:
|
||||
os.unlink(path)
|
||||
|
||||
@@ -217,7 +140,9 @@ def run_expire(config, basedir, dry=False, maxnum=None):
|
||||
def main():
|
||||
cfgpath, basedir, maxnum = sys.argv[1:]
|
||||
config = read_config(cfgpath)
|
||||
run_expire(config, basedir, dry=True, maxnum=int(maxnum))
|
||||
now = datetime.utcnow().timestamp()
|
||||
now = datetime(2025, 9, 9).timestamp()
|
||||
run_expire(config, basedir, maxnum=int(maxnum), now=now)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
150
chatmaild/src/chatmaild/fsreport.py
Normal file
150
chatmaild/src/chatmaild/fsreport.py
Normal file
@@ -0,0 +1,150 @@
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime
|
||||
|
||||
from chatmaild.config import read_config
|
||||
from chatmaild.expire import FileEntry, Stats, joinpath
|
||||
|
||||
DAYSECONDS = 24 * 60 * 60
|
||||
MONTHSECONDS = DAYSECONDS * 30
|
||||
|
||||
|
||||
def D(timestamp, now=datetime.utcnow().timestamp()):
|
||||
diff_seconds = int(now) - int(timestamp)
|
||||
# assert diff_seconds >= 0, (int(timestamp), int(now))
|
||||
return f"{int(diff_seconds / DAYSECONDS):2.0f}d"
|
||||
|
||||
|
||||
def K(size):
|
||||
if size < 1000:
|
||||
return f"{size:5.0f}"
|
||||
return f"{int(size/1000):5.0f}K"
|
||||
|
||||
|
||||
def M(size):
|
||||
return f"{int(size/1000000):5.0f}M"
|
||||
|
||||
|
||||
def H(size):
|
||||
if size < 1000 * 1000:
|
||||
return K(size)
|
||||
if size < 1000 * 1000 * 1000:
|
||||
return M(size)
|
||||
return f"{size/1000000000:2.2f}G"
|
||||
|
||||
|
||||
class Report:
|
||||
def __init__(self, stats, now):
|
||||
self.sum_extra = 0
|
||||
self.sum_all_messages = 0
|
||||
self.messages = []
|
||||
self.user_logins = []
|
||||
self.ci_logins = []
|
||||
self.stats = stats
|
||||
self.now = now
|
||||
|
||||
for mailbox in stats.mailboxes:
|
||||
last_login = mailbox.last_login
|
||||
if last_login:
|
||||
if os.path.basename(mailbox.mailboxdir)[:3] == "ci-":
|
||||
self.ci_logins.append(last_login)
|
||||
else:
|
||||
self.user_logins.append(last_login)
|
||||
for entry in mailbox.messages:
|
||||
new = FileEntry(
|
||||
relpath=joinpath(
|
||||
os.path.basename(mailbox.mailboxdir), entry.relpath
|
||||
),
|
||||
mtime=entry.mtime,
|
||||
size=entry.size,
|
||||
)
|
||||
self.messages.append(new)
|
||||
self.sum_all_messages += entry.size
|
||||
|
||||
for entry in mailbox.extrafiles:
|
||||
self.sum_extra += entry.size
|
||||
|
||||
def dump_summary(self):
|
||||
reports = []
|
||||
|
||||
def print_messages(title, messages, num, rep=True):
|
||||
print()
|
||||
allsize = sum(x.size for x in messages)
|
||||
if rep:
|
||||
reports.append((title, allsize))
|
||||
|
||||
print(f"## {title} [total: {H(allsize)}]")
|
||||
for entry in messages[:num]:
|
||||
print(f"{K(entry.size)} {D(entry.mtime)} {entry.relpath}")
|
||||
|
||||
for kind in ("cur", "new"):
|
||||
biggest = list(self.messages)
|
||||
biggest.sort(key=lambda x: (-x.size, x.mtime))
|
||||
print_messages(f"Biggest {kind} messages", biggest, 10, rep=False)
|
||||
|
||||
oldest = self.messages
|
||||
mode = "cur"
|
||||
for maxsize in (160000, 500000, 2000000, 10000000):
|
||||
oldest = [x for x in oldest if x.size > maxsize and mode in x.relpath]
|
||||
oldest.sort(key=lambda x: x.mtime)
|
||||
print_messages(f"{mode} folders oldest > {K(maxsize)} messages", oldest, 10)
|
||||
|
||||
# list all 160K files of people who haven't logged in for a while
|
||||
messages = []
|
||||
cutoff_date_login = self.now - 30 * DAYSECONDS
|
||||
for mstat in self.stats.mailboxes:
|
||||
if mstat.last_login and mstat.last_login < cutoff_date_login:
|
||||
for msg in mstat.messages:
|
||||
if msg.size > 160000:
|
||||
messages.append(msg)
|
||||
|
||||
messages.sort(key=lambda x: x.size)
|
||||
print_messages(">30-day last_login new >160K", messages, 10)
|
||||
|
||||
print()
|
||||
print("## Overall mailbox storage use analysis")
|
||||
print(f"Mailbox data: {M(self.sum_extra + self.sum_all_messages)}")
|
||||
print(f"Messages : {M(self.sum_all_messages)}")
|
||||
percent = self.sum_extra / (self.sum_extra + self.sum_all_messages) * 100
|
||||
print(f"Extra files : {M(self.sum_extra)} ({percent:.2f}%)")
|
||||
|
||||
for title, size in reports:
|
||||
percent = size / self.sum_all_messages * 100
|
||||
print(f"{title:38} {M(size)} ({percent:.2f}%)")
|
||||
|
||||
all_logins = len(self.user_logins) + len(self.ci_logins)
|
||||
num_logins = len(self.user_logins)
|
||||
ci_logins = len(self.ci_logins)
|
||||
|
||||
def p(num):
|
||||
return f"({num/num_logins * 100:2.2f}%)"
|
||||
|
||||
print()
|
||||
print(f"## Login stats, from date reference {datetime.fromtimestamp(self.now)}")
|
||||
print(f"all: {K(all_logins)}")
|
||||
print(f"non-ci: {K(num_logins)}")
|
||||
print(f"ci: {K(ci_logins)}")
|
||||
for days in (1, 10, 30, 40, 80, 100, 150):
|
||||
active = len(
|
||||
[x for x in self.user_logins if x >= self.now - days * DAYSECONDS]
|
||||
)
|
||||
print(f"last {days:3} days: {K(active)} {p(active)}")
|
||||
|
||||
|
||||
def run_report(config, basedir, maxnum=None, now=None):
|
||||
stats = Stats(basedir, maxnum=maxnum)
|
||||
stats.iter_mailboxes()
|
||||
rep = Report(stats, now=now)
|
||||
rep.dump_summary()
|
||||
|
||||
|
||||
def main():
|
||||
cfgpath, basedir, maxnum = sys.argv[1:]
|
||||
config = read_config(cfgpath)
|
||||
now = datetime.utcnow().timestamp()
|
||||
now = datetime(2025, 9, 9).timestamp()
|
||||
run_report(config, basedir, maxnum=int(maxnum), now=now)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,4 +1,14 @@
|
||||
from chatmaild.expire import MailboxStat
|
||||
import random
|
||||
|
||||
from chatmaild.expire import FileEntry, MailboxStat
|
||||
|
||||
|
||||
def test_filentry_ordering():
|
||||
l = [FileEntry(f"x{i}", size=i + 10, mtime=1000 - i) for i in range(10)]
|
||||
sorted = list(l)
|
||||
random.shuffle(l)
|
||||
l.sort(key=lambda x: x.size)
|
||||
assert l == sorted
|
||||
|
||||
|
||||
def test_stats_mailbox(tmp_path):
|
||||
@@ -22,18 +32,15 @@ def test_stats_mailbox(tmp_path):
|
||||
assert mbox.last_login == password.stat().st_mtime
|
||||
assert len(mbox.messages) == 2
|
||||
|
||||
seen = mbox.get_messages("cur")
|
||||
assert len(seen) == 1
|
||||
assert seen[0].size == 3
|
||||
msgs = list(mbox.messages)
|
||||
assert len(msgs) == 2
|
||||
assert msgs[0].size == 3 # cur
|
||||
|
||||
new = mbox.get_messages("new")
|
||||
assert len(new) == 1
|
||||
assert new[0].size == 6
|
||||
assert msgs[1].size == 6 # new
|
||||
|
||||
extra = mailboxdir.joinpath("large")
|
||||
extra.write_text("x" * 1000)
|
||||
mailboxdir.joinpath("index-something").write_text("123")
|
||||
mbox = MailboxStat(tmp_path)
|
||||
extrafiles = mbox.get_extra_files()
|
||||
assert len(extrafiles) == 3
|
||||
assert extrafiles[0].size == 1000
|
||||
assert len(mbox.extrafiles) == 3
|
||||
assert mbox.extrafiles[0].size == 1000
|
||||
|
||||
Reference in New Issue
Block a user