feat(dovecot): add config flag to export statistics (#806)

This adds exporting of some dovecot event metrics to help debugging slow IMAP login and hibernation. For now, re-using mtail_address config flag and configure the port of the dovecot exporter to be 3904.
This commit is contained in:
j4n
2026-01-16 11:35:19 +01:00
committed by GitHub
parent 4906b82e44
commit be7aa21039
2 changed files with 133 additions and 0 deletions

View File

@@ -541,6 +541,8 @@ def deploy_chatmail(config_path: Path, disable_mail: bool, website_only: bool) -
(["master", "smtpd"], 587),
(["imap-login", "dovecot"], 993),
("iroh-relay", 3340),
("mtail", 3903),
("dovecot-stats", 3904),
("nginx", 8443),
(["master", "smtpd"], config.postfix_reinject_port),
(["master", "smtpd"], config.postfix_reinject_port_incoming),

View File

@@ -277,3 +277,134 @@ service imap-hibernate {
}
}
{% endif %}
{% if config.mtail_address %}
#
# Dovecot Statistics
#
# OpenMetrics endpoint at http://{{- config.mtail_address}}:3904/metrics
service stats {
inet_listener http {
port = 3904
address = {{- config.mtail_address}}
}
}
# IMAP Command Metrics
# - Bytes in/out for compression efficiency analysis
# - Lock wait time for contention debugging
# - Grouped by command name and reply state
metric imap_command {
filter = event=imap_command_finished
fields = bytes_in bytes_out lock_wait_usecs running_usecs
group_by = cmd_name tagged_reply_state
}
# Duration buckets for latency histograms (base 10: 10us, 100us, 1ms, 10ms, 100ms, 1s, 10s, 100s)
metric imap_command_duration {
filter = event=imap_command_finished
group_by = cmd_name duration:exponential:1:8:10
}
# Slow command outliers (>1 second = 1000000 usecs)
# Useful for alerting without high cardinality
metric imap_command_slow {
filter = event=imap_command_finished AND duration>1000000 AND NOT cmd_name=IDLE
group_by = cmd_name
}
# IDLE-specific metrics
metric imap_idle {
filter = event=imap_command_finished AND cmd_name=IDLE
fields = bytes_in bytes_out running_usecs
group_by = tagged_reply_state
}
metric imap_idle_duration {
filter = event=imap_command_finished AND cmd_name=IDLE
# Base 10: 100ms to 27h (covers short wakeups to long idle sessions)
group_by = duration:exponential:5:11:10
}
# Hibernation Metrics (requires imap_hibernate_timeout to be set)
metric imap_hibernated {
filter = event=imap_client_hibernated
# error field present = failure
group_by = mailbox
}
metric imap_hibernated_failed {
filter = event=imap_client_hibernated AND error=*
}
metric imap_unhibernated {
filter = event=imap_client_unhibernated
fields = hibernation_usecs
group_by = reason
}
metric imap_unhibernated_failed {
filter = event=imap_client_unhibernated AND error=*
}
# Hibernation duration buckets (how long clients actually stayed hibernated)
# Base 10: 100ms to 27h
metric imap_hibernation_duration {
filter = event=imap_client_unhibernated
group_by = reason duration:exponential:5:11:10
}
# Authentication / Login Metrics
metric auth_request {
filter = event=auth_request_finished
group_by = success
}
metric auth_request_duration {
filter = event=auth_request_finished
group_by = success duration:exponential:2:6:10
}
metric auth_failed {
filter = event=auth_request_finished AND success=no
}
# Passdb cache effectiveness
metric auth_passdb {
filter = event=auth_passdb_request_finished
group_by = result cache
}
# Master login (post-auth userdb lookup)
metric auth_master_login {
filter = event=auth_master_client_login_finished
}
metric auth_master_login_failed {
filter = event=auth_master_client_login_finished AND error=*
}
# Mail Delivery (LMTP) - affects IDLE wakeup latency
metric mail_delivery {
filter = event=mail_delivery_finished
}
metric mail_delivery_duration {
filter = event=mail_delivery_finished
group_by = duration:exponential:3:7:10
}
metric mail_delivery_failed {
filter = event=mail_delivery_finished AND error=*
}
# Connection Events
metric client_connected {
filter = event=client_connection_connected AND category=service:imap
}
metric client_disconnected {
filter = event=client_connection_disconnected AND category=service:imap
fields = bytes_in bytes_out
}
{% endif %}