mirror of
https://github.com/chatmail/relay.git
synced 2026-05-19 20:38:05 +00:00
add summary reporting, rework expiry logic
This commit is contained in:
@@ -28,6 +28,7 @@ filtermail = "chatmaild.filtermail:main"
|
|||||||
echobot = "chatmaild.echo:main"
|
echobot = "chatmaild.echo:main"
|
||||||
chatmail-metrics = "chatmaild.metrics:main"
|
chatmail-metrics = "chatmaild.metrics:main"
|
||||||
expire = "chatmaild.expire:main"
|
expire = "chatmaild.expire:main"
|
||||||
|
fsreport = "chatmaild.fsreport:main"
|
||||||
lastlogin = "chatmaild.lastlogin:main"
|
lastlogin = "chatmaild.lastlogin:main"
|
||||||
turnserver = "chatmaild.turnserver:main"
|
turnserver = "chatmaild.turnserver:main"
|
||||||
|
|
||||||
|
|||||||
@@ -1,42 +1,47 @@
|
|||||||
|
"""
|
||||||
|
Expire old messages and addresses.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
import sys
|
import sys
|
||||||
import time
|
|
||||||
from collections import namedtuple
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from stat import S_ISREG
|
from stat import S_ISREG
|
||||||
|
|
||||||
from chatmaild.config import read_config
|
from chatmaild.config import read_config
|
||||||
|
|
||||||
# delete already seen big mails after 7 days, in the INBOX
|
# XXX maildirsize (used by dovecot quota) needs to be removed after removing files
|
||||||
# 2 0 * * * vmail find {{ config.mailboxes_dir }} -path '*/cur/*' -mtime +{{ config.delete_large_after }} -size +200k -type f -delete
|
|
||||||
# # delete all mails after {{ config.delete_mails_after }} days, in the Inbox
|
|
||||||
# 3 0 * * * vmail find {{ config.mailboxes_dir }} -name 'maildirsize' -type f -delete
|
|
||||||
|
|
||||||
|
|
||||||
FileEntry = namedtuple("FileEntry", ["relpath", "mtime", "size"])
|
class FileEntry:
|
||||||
dayseconds = 24 * 60 * 60
|
def __init__(self, relpath, mtime, size):
|
||||||
monthseconds = dayseconds * 30
|
self.relpath = relpath
|
||||||
|
self.mtime = mtime
|
||||||
|
self.size = size
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"<FileEntry size={self.size} '{self.relpath}'>"
|
||||||
|
|
||||||
|
def fmt_size(self):
|
||||||
|
return f"{int(self.size/1000):5.0f}K"
|
||||||
|
|
||||||
|
def fmt_since(self, now):
|
||||||
|
diff_seconds = int(now) - int(self.mtime)
|
||||||
|
return f"{int(diff_seconds / 86400):2.0f}d"
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
return (
|
||||||
|
self.relpath == other.relpath
|
||||||
|
and self.size == other.size
|
||||||
|
and self.mtime == other.mtime
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def joinpath(name, extra):
|
def joinpath(name, extra):
|
||||||
return name + "/" + extra
|
return name + "/" + extra
|
||||||
|
|
||||||
|
|
||||||
def D(timestamp, now=datetime.utcnow().timestamp()):
|
|
||||||
diff_seconds = int(now) - int(timestamp)
|
|
||||||
# assert diff_seconds >= 0, (int(timestamp), int(now))
|
|
||||||
return f"{int(diff_seconds / dayseconds):2.0f}d"
|
|
||||||
|
|
||||||
|
|
||||||
def K(size):
|
|
||||||
return f"{int(size/1000):6.0f}K"
|
|
||||||
|
|
||||||
|
|
||||||
def M(size):
|
|
||||||
return f"{int(size/1000000):6.0f}M"
|
|
||||||
|
|
||||||
|
|
||||||
class Stats:
|
class Stats:
|
||||||
def __init__(self, basedir, maxnum=None):
|
def __init__(self, basedir, maxnum=None):
|
||||||
self.basedir = str(basedir)
|
self.basedir = str(basedir)
|
||||||
@@ -53,9 +58,16 @@ class Stats:
|
|||||||
class MailboxStat:
|
class MailboxStat:
|
||||||
def __init__(self, mailboxdir):
|
def __init__(self, mailboxdir):
|
||||||
self.mailboxdir = mailboxdir = str(mailboxdir)
|
self.mailboxdir = mailboxdir = str(mailboxdir)
|
||||||
|
# all detected messages in cur/new/tmp folders
|
||||||
self.messages = []
|
self.messages = []
|
||||||
|
|
||||||
|
# all detected files in mailbox top dir
|
||||||
self.extrafiles = []
|
self.extrafiles = []
|
||||||
|
|
||||||
|
# total size of all detected files
|
||||||
|
self.totalsize = 0
|
||||||
|
|
||||||
|
# scan all relevant files (without recursion)
|
||||||
for name in os.listdir(mailboxdir):
|
for name in os.listdir(mailboxdir):
|
||||||
fpath = joinpath(mailboxdir, name)
|
fpath = joinpath(mailboxdir, name)
|
||||||
if name in ("cur", "new", "tmp"):
|
if name in ("cur", "new", "tmp"):
|
||||||
@@ -66,11 +78,13 @@ class MailboxStat:
|
|||||||
self.messages.append(
|
self.messages.append(
|
||||||
FileEntry(relpath, mtime=st.st_mtime, size=st.st_size)
|
FileEntry(relpath, mtime=st.st_mtime, size=st.st_size)
|
||||||
)
|
)
|
||||||
|
self.totalsize += st.st_size
|
||||||
else:
|
else:
|
||||||
st = os.stat(fpath)
|
st = os.stat(fpath)
|
||||||
if S_ISREG(st.st_mode):
|
if S_ISREG(st.st_mode):
|
||||||
self.extrafiles.append(FileEntry(name, st.st_mtime, st.st_size))
|
self.extrafiles.append(FileEntry(name, st.st_mtime, st.st_size))
|
||||||
self.extrafiles.sort(key=lambda x: x.size, reverse=True)
|
self.totalsize += st.st_size
|
||||||
|
self.extrafiles.sort(key=lambda x: -x.size)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def last_login(self):
|
def last_login(self):
|
||||||
@@ -78,101 +92,8 @@ class MailboxStat:
|
|||||||
if entry.relpath == "password":
|
if entry.relpath == "password":
|
||||||
return entry.mtime
|
return entry.mtime
|
||||||
|
|
||||||
def get_messages(self, prefix=""):
|
|
||||||
l = []
|
|
||||||
for entry in self.messages:
|
|
||||||
if entry.relpath.startswith(prefix):
|
|
||||||
l.append(entry)
|
|
||||||
return l
|
|
||||||
|
|
||||||
def get_extra_files(self):
|
|
||||||
return list(self.extrafiles)
|
|
||||||
|
|
||||||
def get_file_entry(self, name):
|
|
||||||
for entry in self.extrafiles:
|
|
||||||
if name == entry.relapth:
|
|
||||||
return entry
|
|
||||||
|
|
||||||
|
|
||||||
class XXXStats:
|
|
||||||
def __init__(self):
|
|
||||||
self.sum_extra = 0
|
|
||||||
self.sum_all_messages = 0
|
|
||||||
self.logins = []
|
|
||||||
self.messages = []
|
|
||||||
|
|
||||||
def analyze(self, statscache):
|
|
||||||
print("start")
|
|
||||||
for mailbox in statscache.cache:
|
|
||||||
mbox_cache = statscache.cache[mailbox]
|
|
||||||
if "password" not in mbox_cache:
|
|
||||||
continue
|
|
||||||
self.logins.append(mbox_cache["password"][0])
|
|
||||||
for relpath, (mtime, size) in mbox_cache.items():
|
|
||||||
if relpath[:4] in ("cur/", "new/", "tmp/"):
|
|
||||||
self.sum_all_messages += size
|
|
||||||
entry = FileEntry(relpath=relpath, mtime=mtime, size=size)
|
|
||||||
self.messages.append(entry)
|
|
||||||
else:
|
|
||||||
self.sum_extra += size
|
|
||||||
|
|
||||||
def dump_summary(self):
|
|
||||||
now = datetime.utcnow().timestamp()
|
|
||||||
|
|
||||||
print(f"size of everything: {M(self.sum_extra + self.sum_all_messages)}")
|
|
||||||
print(f"size all messages: {M(self.sum_all_messages)}")
|
|
||||||
percent = self.sum_extra / (self.sum_extra + self.sum_all_messages) * 100
|
|
||||||
print(f"size extra files: {M(self.sum_extra)} ({percent:.2f}%)")
|
|
||||||
for size in (100000, 200000, 500000, 1000000, 5000000):
|
|
||||||
all_of_size = sum(
|
|
||||||
x.size
|
|
||||||
for x in self.messages
|
|
||||||
if x.size > size and x.relpath.startswith("cur")
|
|
||||||
)
|
|
||||||
percent = all_of_size / self.sum_all_messages * 100
|
|
||||||
print(f"size seen {K(size)} messages: {M(all_of_size)} ({percent:.2f}%)")
|
|
||||||
for size in (100000, 200000, 500000, 1000000, 5000000):
|
|
||||||
all_of_size = sum(
|
|
||||||
x.size
|
|
||||||
for x in self.messages
|
|
||||||
if x.size > size and x.mtime < now - 2 * dayseconds
|
|
||||||
)
|
|
||||||
percent = all_of_size / self.sum_all_messages * 100
|
|
||||||
print(
|
|
||||||
f"size 2day-old {K(size)} messages: {M(all_of_size)} ({percent:.2f}%)"
|
|
||||||
)
|
|
||||||
for size in (100000, 200000, 500000, 1000000, 5000000):
|
|
||||||
all_of_size = sum(
|
|
||||||
x.size
|
|
||||||
for x in self.messages
|
|
||||||
if x.size > size
|
|
||||||
and x.relpath.startswith("cur")
|
|
||||||
and x.mtime < now - 7 * dayseconds
|
|
||||||
)
|
|
||||||
percent = all_of_size / self.sum_all_messages * 100
|
|
||||||
print(
|
|
||||||
f"size seen 7-day old {K(size)} messages: {M(all_of_size)} ({percent:.2f}%)"
|
|
||||||
)
|
|
||||||
|
|
||||||
print()
|
|
||||||
|
|
||||||
num_logins = len(self.logins)
|
|
||||||
monthly_active = len([x for x in self.logins if x >= now - monthseconds])
|
|
||||||
daily_active = len([x for x in self.logins if x >= now - dayseconds])
|
|
||||||
stale = num_logins - monthly_active
|
|
||||||
|
|
||||||
def p(num):
|
|
||||||
return f"({num/num_logins * 100:.2f}%)"
|
|
||||||
|
|
||||||
print(f"all logins: {K(num_logins)}")
|
|
||||||
print(f"monthly active: {K(monthly_active)} {p(monthly_active)}")
|
|
||||||
print(f">1m old logins: {K(stale)} {p(stale)}")
|
|
||||||
print(f"daily active: {K(daily_active)} {p(daily_active)}")
|
|
||||||
|
|
||||||
|
|
||||||
def run_expire(config, basedir, dry=False, maxnum=None):
|
|
||||||
now = time.time()
|
|
||||||
|
|
||||||
|
def run_expire(config, basedir, now, dry=True, maxnum=None):
|
||||||
stat = Stats(basedir, maxnum=maxnum)
|
stat = Stats(basedir, maxnum=maxnum)
|
||||||
stat.iter_mailboxes()
|
stat.iter_mailboxes()
|
||||||
cutoff_date_without_login = now - int(config.delete_inactive_users_after) * 86400
|
cutoff_date_without_login = now - int(config.delete_inactive_users_after) * 86400
|
||||||
@@ -188,7 +109,9 @@ def run_expire(config, basedir, dry=False, maxnum=None):
|
|||||||
def unlink(mailboxdir, message):
|
def unlink(mailboxdir, message):
|
||||||
if dry:
|
if dry:
|
||||||
relpath = os.path.basename(mailboxdir) + message.relpath
|
relpath = os.path.basename(mailboxdir) + message.relpath
|
||||||
print(f"would remove {D(message.mtime)} {K(message.size)} {relpath}")
|
print(
|
||||||
|
f"would remove {message.fmt_since(now)} {message.fmt_size()} {relpath}"
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
os.unlink(path)
|
os.unlink(path)
|
||||||
|
|
||||||
@@ -217,7 +140,9 @@ def run_expire(config, basedir, dry=False, maxnum=None):
|
|||||||
def main():
|
def main():
|
||||||
cfgpath, basedir, maxnum = sys.argv[1:]
|
cfgpath, basedir, maxnum = sys.argv[1:]
|
||||||
config = read_config(cfgpath)
|
config = read_config(cfgpath)
|
||||||
run_expire(config, basedir, dry=True, maxnum=int(maxnum))
|
now = datetime.utcnow().timestamp()
|
||||||
|
now = datetime(2025, 9, 9).timestamp()
|
||||||
|
run_expire(config, basedir, maxnum=int(maxnum), now=now)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
150
chatmaild/src/chatmaild/fsreport.py
Normal file
150
chatmaild/src/chatmaild/fsreport.py
Normal file
@@ -0,0 +1,150 @@
|
|||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
from chatmaild.config import read_config
|
||||||
|
from chatmaild.expire import FileEntry, Stats, joinpath
|
||||||
|
|
||||||
|
DAYSECONDS = 24 * 60 * 60
|
||||||
|
MONTHSECONDS = DAYSECONDS * 30
|
||||||
|
|
||||||
|
|
||||||
|
def D(timestamp, now=datetime.utcnow().timestamp()):
|
||||||
|
diff_seconds = int(now) - int(timestamp)
|
||||||
|
# assert diff_seconds >= 0, (int(timestamp), int(now))
|
||||||
|
return f"{int(diff_seconds / DAYSECONDS):2.0f}d"
|
||||||
|
|
||||||
|
|
||||||
|
def K(size):
|
||||||
|
if size < 1000:
|
||||||
|
return f"{size:5.0f}"
|
||||||
|
return f"{int(size/1000):5.0f}K"
|
||||||
|
|
||||||
|
|
||||||
|
def M(size):
|
||||||
|
return f"{int(size/1000000):5.0f}M"
|
||||||
|
|
||||||
|
|
||||||
|
def H(size):
|
||||||
|
if size < 1000 * 1000:
|
||||||
|
return K(size)
|
||||||
|
if size < 1000 * 1000 * 1000:
|
||||||
|
return M(size)
|
||||||
|
return f"{size/1000000000:2.2f}G"
|
||||||
|
|
||||||
|
|
||||||
|
class Report:
|
||||||
|
def __init__(self, stats, now):
|
||||||
|
self.sum_extra = 0
|
||||||
|
self.sum_all_messages = 0
|
||||||
|
self.messages = []
|
||||||
|
self.user_logins = []
|
||||||
|
self.ci_logins = []
|
||||||
|
self.stats = stats
|
||||||
|
self.now = now
|
||||||
|
|
||||||
|
for mailbox in stats.mailboxes:
|
||||||
|
last_login = mailbox.last_login
|
||||||
|
if last_login:
|
||||||
|
if os.path.basename(mailbox.mailboxdir)[:3] == "ci-":
|
||||||
|
self.ci_logins.append(last_login)
|
||||||
|
else:
|
||||||
|
self.user_logins.append(last_login)
|
||||||
|
for entry in mailbox.messages:
|
||||||
|
new = FileEntry(
|
||||||
|
relpath=joinpath(
|
||||||
|
os.path.basename(mailbox.mailboxdir), entry.relpath
|
||||||
|
),
|
||||||
|
mtime=entry.mtime,
|
||||||
|
size=entry.size,
|
||||||
|
)
|
||||||
|
self.messages.append(new)
|
||||||
|
self.sum_all_messages += entry.size
|
||||||
|
|
||||||
|
for entry in mailbox.extrafiles:
|
||||||
|
self.sum_extra += entry.size
|
||||||
|
|
||||||
|
def dump_summary(self):
|
||||||
|
reports = []
|
||||||
|
|
||||||
|
def print_messages(title, messages, num, rep=True):
|
||||||
|
print()
|
||||||
|
allsize = sum(x.size for x in messages)
|
||||||
|
if rep:
|
||||||
|
reports.append((title, allsize))
|
||||||
|
|
||||||
|
print(f"## {title} [total: {H(allsize)}]")
|
||||||
|
for entry in messages[:num]:
|
||||||
|
print(f"{K(entry.size)} {D(entry.mtime)} {entry.relpath}")
|
||||||
|
|
||||||
|
for kind in ("cur", "new"):
|
||||||
|
biggest = list(self.messages)
|
||||||
|
biggest.sort(key=lambda x: (-x.size, x.mtime))
|
||||||
|
print_messages(f"Biggest {kind} messages", biggest, 10, rep=False)
|
||||||
|
|
||||||
|
oldest = self.messages
|
||||||
|
mode = "cur"
|
||||||
|
for maxsize in (160000, 500000, 2000000, 10000000):
|
||||||
|
oldest = [x for x in oldest if x.size > maxsize and mode in x.relpath]
|
||||||
|
oldest.sort(key=lambda x: x.mtime)
|
||||||
|
print_messages(f"{mode} folders oldest > {K(maxsize)} messages", oldest, 10)
|
||||||
|
|
||||||
|
# list all 160K files of people who haven't logged in for a while
|
||||||
|
messages = []
|
||||||
|
cutoff_date_login = self.now - 30 * DAYSECONDS
|
||||||
|
for mstat in self.stats.mailboxes:
|
||||||
|
if mstat.last_login and mstat.last_login < cutoff_date_login:
|
||||||
|
for msg in mstat.messages:
|
||||||
|
if msg.size > 160000:
|
||||||
|
messages.append(msg)
|
||||||
|
|
||||||
|
messages.sort(key=lambda x: x.size)
|
||||||
|
print_messages(">30-day last_login new >160K", messages, 10)
|
||||||
|
|
||||||
|
print()
|
||||||
|
print("## Overall mailbox storage use analysis")
|
||||||
|
print(f"Mailbox data: {M(self.sum_extra + self.sum_all_messages)}")
|
||||||
|
print(f"Messages : {M(self.sum_all_messages)}")
|
||||||
|
percent = self.sum_extra / (self.sum_extra + self.sum_all_messages) * 100
|
||||||
|
print(f"Extra files : {M(self.sum_extra)} ({percent:.2f}%)")
|
||||||
|
|
||||||
|
for title, size in reports:
|
||||||
|
percent = size / self.sum_all_messages * 100
|
||||||
|
print(f"{title:38} {M(size)} ({percent:.2f}%)")
|
||||||
|
|
||||||
|
all_logins = len(self.user_logins) + len(self.ci_logins)
|
||||||
|
num_logins = len(self.user_logins)
|
||||||
|
ci_logins = len(self.ci_logins)
|
||||||
|
|
||||||
|
def p(num):
|
||||||
|
return f"({num/num_logins * 100:2.2f}%)"
|
||||||
|
|
||||||
|
print()
|
||||||
|
print(f"## Login stats, from date reference {datetime.fromtimestamp(self.now)}")
|
||||||
|
print(f"all: {K(all_logins)}")
|
||||||
|
print(f"non-ci: {K(num_logins)}")
|
||||||
|
print(f"ci: {K(ci_logins)}")
|
||||||
|
for days in (1, 10, 30, 40, 80, 100, 150):
|
||||||
|
active = len(
|
||||||
|
[x for x in self.user_logins if x >= self.now - days * DAYSECONDS]
|
||||||
|
)
|
||||||
|
print(f"last {days:3} days: {K(active)} {p(active)}")
|
||||||
|
|
||||||
|
|
||||||
|
def run_report(config, basedir, maxnum=None, now=None):
|
||||||
|
stats = Stats(basedir, maxnum=maxnum)
|
||||||
|
stats.iter_mailboxes()
|
||||||
|
rep = Report(stats, now=now)
|
||||||
|
rep.dump_summary()
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
cfgpath, basedir, maxnum = sys.argv[1:]
|
||||||
|
config = read_config(cfgpath)
|
||||||
|
now = datetime.utcnow().timestamp()
|
||||||
|
now = datetime(2025, 9, 9).timestamp()
|
||||||
|
run_report(config, basedir, maxnum=int(maxnum), now=now)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -1,4 +1,14 @@
|
|||||||
from chatmaild.expire import MailboxStat
|
import random
|
||||||
|
|
||||||
|
from chatmaild.expire import FileEntry, MailboxStat
|
||||||
|
|
||||||
|
|
||||||
|
def test_filentry_ordering():
|
||||||
|
l = [FileEntry(f"x{i}", size=i + 10, mtime=1000 - i) for i in range(10)]
|
||||||
|
sorted = list(l)
|
||||||
|
random.shuffle(l)
|
||||||
|
l.sort(key=lambda x: x.size)
|
||||||
|
assert l == sorted
|
||||||
|
|
||||||
|
|
||||||
def test_stats_mailbox(tmp_path):
|
def test_stats_mailbox(tmp_path):
|
||||||
@@ -22,18 +32,15 @@ def test_stats_mailbox(tmp_path):
|
|||||||
assert mbox.last_login == password.stat().st_mtime
|
assert mbox.last_login == password.stat().st_mtime
|
||||||
assert len(mbox.messages) == 2
|
assert len(mbox.messages) == 2
|
||||||
|
|
||||||
seen = mbox.get_messages("cur")
|
msgs = list(mbox.messages)
|
||||||
assert len(seen) == 1
|
assert len(msgs) == 2
|
||||||
assert seen[0].size == 3
|
assert msgs[0].size == 3 # cur
|
||||||
|
|
||||||
new = mbox.get_messages("new")
|
assert msgs[1].size == 6 # new
|
||||||
assert len(new) == 1
|
|
||||||
assert new[0].size == 6
|
|
||||||
|
|
||||||
extra = mailboxdir.joinpath("large")
|
extra = mailboxdir.joinpath("large")
|
||||||
extra.write_text("x" * 1000)
|
extra.write_text("x" * 1000)
|
||||||
mailboxdir.joinpath("index-something").write_text("123")
|
mailboxdir.joinpath("index-something").write_text("123")
|
||||||
mbox = MailboxStat(tmp_path)
|
mbox = MailboxStat(tmp_path)
|
||||||
extrafiles = mbox.get_extra_files()
|
assert len(mbox.extrafiles) == 3
|
||||||
assert len(extrafiles) == 3
|
assert mbox.extrafiles[0].size == 1000
|
||||||
assert extrafiles[0].size == 1000
|
|
||||||
|
|||||||
Reference in New Issue
Block a user