Compare commits

...

14 Commits

Author SHA1 Message Date
holger krekel
ddc36902c8 try make opendkim reload on test1 faster 2026-03-10 21:39:08 +01:00
holger krekel
8a1c1d9288 ci: replace staging workflows with LXC-local testing
Remove the staging VPS-based test-and-deploy workflows and
replace them with LXC-local CI. Refactor image caching
to use per-container aliases instead of a single relay image.
Ensure postfix/dovecot/opendkim restarts after unbound restarts
2026-03-10 21:35:14 +01:00
holger krekel
c3fd52e5dd fix: use push_file_content for unbound and sysctl config files
fixes silent fail on invalid configuration file.
2026-03-10 21:34:13 +01:00
holger krekel
233d442bca fix: run cmdeploy dns and restart filtermail in lxc-start --run
lxc-start --run deployed the relay but never loaded DNS zones
or restarted filtermail-incoming, so DKIM verification and
outgoing mail failed.
2026-03-10 21:14:22 +01:00
holger krekel
8aabb9a955 fix: pass full config to ChatmailDeployer and ensure mailboxes_dir
ChatmailDeployer now receives the full config object
so it can access mailboxes_dir and create this directory
during deployment, preventing mail delivery failures.
2026-03-10 21:14:22 +01:00
holger krekel
9fc05646e0 fix: separate stderr capture in incus and suppress in test plugin
Capture stderr separately in Incus.run() instead of merging
it into stdout, which corrupted JSON parsing in run_json().
Add --quiet flag to reduce incus noise.
Suppress stderr in Remote subprocess to prevent pytest-xdist
communication issues.
2026-03-10 21:14:22 +01:00
holger krekel
155c1221b8 remove superflous sepchar argument 2026-03-10 19:12:31 +01:00
holger krekel
3393d071e5 remove memory limits in containers 2026-03-10 14:14:40 +01:00
holger krekel
52c73658f9 fix a potential hang because "journalctl -f" never finishes by itself, and handles might stay open if we don't close them during test teardown. 2026-03-10 13:31:53 +01:00
holger krekel
b022a61955 try to discover and use host dns settings instead of 9.9.9.9 2026-03-10 13:25:52 +01:00
holger krekel
2e383a8e94 make sure subprocesses by defaulit don't inherit stdin FD 2026-03-10 10:54:27 +01:00
holger krekel
9908a4c88c try fix some test hangs potentially caused by filtermail's DNS cache 2026-03-09 17:33:52 +01:00
holger krekel
c04f4a4b44 force tcp connections for dns resolution 2026-03-09 16:44:33 +01:00
holger krekel
6d0ce061bc change all timeouts to 60 2026-03-09 16:40:18 +01:00
13 changed files with 303 additions and 371 deletions

View File

@@ -16,27 +16,97 @@ jobs:
ref: ${{ github.event.pull_request.head.sha }}
- name: download filtermail
run: curl -L https://github.com/chatmail/filtermail/releases/download/v0.5.2/filtermail-x86_64 -o /usr/local/bin/filtermail && chmod +x /usr/local/bin/filtermail
- name: run chatmaild tests
- name: run chatmaild tests
working-directory: chatmaild
run: pipx run tox
scripts:
name: deploy-chatmail tests
name: deploy-chatmail tests
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: initenv
- name: initenv
run: scripts/initenv.sh
- name: append venv/bin to PATH
run: echo venv/bin >>$GITHUB_PATH
- name: run formatting checks
run: cmdeploy fmt -v
- name: run formatting checks
run: cmdeploy fmt -v
- name: run deploy-chatmail offline tests
run: pytest --pyargs cmdeploy
- name: run deploy-chatmail offline tests
run: pytest --pyargs cmdeploy
# all other cmdeploy commands require a staging server
# see https://github.com/deltachat/chatmail/issues/100
lxc-test:
name: LXC deploy and test
runs-on: ubuntu-24.04
timeout-minutes: 15
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.head.sha }}
- name: install incus
run: sudo apt-get update && sudo apt-get install -y incus
- name: initialise incus
run: |
sudo systemctl stop docker.socket docker || true
sudo iptables -P FORWARD ACCEPT
sudo sysctl -w fs.inotify.max_user_instances=65535
sudo sysctl -w fs.inotify.max_user_watches=65535
sudo incus admin init --minimal --quiet
sudo usermod -aG incus-admin "$USER"
- name: initenv
run: scripts/initenv.sh
- name: append venv/bin to PATH
run: echo venv/bin >>$GITHUB_PATH
- name: restore cached images
id: cache-images
uses: actions/cache@v4
with:
path: |
/tmp/localchat-base.tar.gz
/tmp/localchat-ns.tar.gz
/tmp/localchat-test0.tar.gz
/tmp/localchat-test1.tar.gz
lxconfigs/id_localchat*
key: incus-contain-v2-${{ runner.os }}-${{ github.ref_name }}
restore-keys: |
incus-contain-v2-${{ runner.os }}-${{ github.ref_name }}-
incus-contain-v2-${{ runner.os }}-main-
incus-contain-v2-${{ runner.os }}-
- name: import cached images
run: |
for alias in localchat-base localchat-ns localchat-test0 localchat-test1; do
if [ -f /tmp/$alias.tar.gz ]; then
sg incus-admin -c "incus --quiet image import /tmp/$alias.tar.gz --alias $alias" || true
fi
done
- name: cmdeploy lxc-test
run: sg incus-admin -c 'cmdeploy lxc-test -vv'
- name: show container logs on failure
if: failure() || cancelled()
run: |
for c in test0-localchat test1-localchat; do
echo "::group::$c journal (warnings+errors)"
sg incus-admin -c "incus exec $c -- journalctl -p warning --no-pager -n200" 2>/dev/null || echo "no log"
echo "::endgroup::"
echo "::group::$c failed services"
sg incus-admin -c "incus exec $c -- systemctl --no-pager --failed" || true
echo "::endgroup::"
done
- name: export images for cache
if: always()
run: |
for alias in localchat-base localchat-ns localchat-test0 localchat-test1; do
sg incus-admin -c "incus --quiet image export $alias /tmp/$alias" || true
done

View File

@@ -1,20 +0,0 @@
;; Zone file for staging-ipv4.testrun.org
$ORIGIN staging-ipv4.testrun.org.
$TTL 300
@ IN SOA ns.testrun.org. root.nine.testrun.org (
2023010101 ; Serial
7200 ; Refresh
3600 ; Retry
1209600 ; Expire
3600 ; Negative response caching TTL
)
;; Nameservers.
@ IN NS ns.testrun.org.
;; DNS records.
@ IN A 37.27.95.249
mta-sts.staging-ipv4.testrun.org. CNAME staging-ipv4.testrun.org.
www.staging-ipv4.testrun.org. CNAME staging-ipv4.testrun.org.

View File

@@ -1,21 +0,0 @@
;; Zone file for staging2.testrun.org
$ORIGIN staging2.testrun.org.
$TTL 300
@ IN SOA ns.testrun.org. root.nine.testrun.org (
2023010101 ; Serial
7200 ; Refresh
3600 ; Retry
1209600 ; Expire
3600 ; Negative response caching TTL
)
;; Nameservers.
@ IN NS ns.testrun.org.
;; DNS records.
@ IN A 37.27.24.139
mta-sts.staging2.testrun.org. CNAME staging2.testrun.org.
www.staging2.testrun.org. CNAME staging2.testrun.org.

View File

@@ -1,104 +0,0 @@
name: deploy on staging-ipv4.testrun.org, and run tests
on:
push:
branches:
- main
pull_request:
paths-ignore:
- 'scripts/**'
- '**/README.md'
- 'CHANGELOG.md'
- 'LICENSE'
jobs:
deploy:
name: deploy on staging-ipv4.testrun.org, and run tests
runs-on: ubuntu-latest
timeout-minutes: 30
environment:
name: staging-ipv4.testrun.org
url: https://staging-ipv4.testrun.org/
concurrency: staging-ipv4.testrun.org
steps:
- uses: actions/checkout@v4
- name: prepare SSH
run: |
mkdir ~/.ssh
echo "${{ secrets.STAGING_SSH_KEY }}" >> ~/.ssh/id_ed25519
chmod 600 ~/.ssh/id_ed25519
ssh-keyscan staging-ipv4.testrun.org > ~/.ssh/known_hosts
# save previous acme & dkim state
rsync -avz root@staging-ipv4.testrun.org:/var/lib/acme acme-ipv4 || true
rsync -avz root@staging-ipv4.testrun.org:/etc/dkimkeys dkimkeys-ipv4 || true
# store previous acme & dkim state on ns.testrun.org, if it contains useful certs
if [ -f dkimkeys-ipv4/dkimkeys/opendkim.private ]; then rsync -avz -e "ssh -o StrictHostKeyChecking=accept-new" dkimkeys-ipv4 root@ns.testrun.org:/tmp/ || true; fi
if [ "$(ls -A acme-ipv4/acme/certs)" ]; then rsync -avz -e "ssh -o StrictHostKeyChecking=accept-new" acme-ipv4 root@ns.testrun.org:/tmp/ || true; fi
# make sure CAA record isn't set
scp -o StrictHostKeyChecking=accept-new .github/workflows/staging-ipv4.testrun.org-default.zone root@ns.testrun.org:/etc/nsd/staging-ipv4.testrun.org.zone
ssh root@ns.testrun.org sed -i '/CAA/d' /etc/nsd/staging-ipv4.testrun.org.zone
ssh root@ns.testrun.org nsd-checkzone staging-ipv4.testrun.org /etc/nsd/staging-ipv4.testrun.org.zone
ssh root@ns.testrun.org systemctl reload nsd
- name: rebuild staging-ipv4.testrun.org to have a clean VPS
run: |
curl -X POST \
-H "Authorization: Bearer ${{ secrets.HETZNER_API_TOKEN }}" \
-H "Content-Type: application/json" \
-d '{"image":"debian-12"}' \
"https://api.hetzner.cloud/v1/servers/${{ secrets.STAGING_IPV4_SERVER_ID }}/actions/rebuild"
- run: scripts/initenv.sh
- name: append venv/bin to PATH
run: echo venv/bin >>$GITHUB_PATH
- name: upload TLS cert after rebuilding
run: |
echo " --- wait until staging-ipv4.testrun.org VPS is rebuilt --- "
rm ~/.ssh/known_hosts
while ! ssh -o ConnectTimeout=180 -o StrictHostKeyChecking=accept-new -v root@staging-ipv4.testrun.org id -u ; do sleep 1 ; done
ssh -o StrictHostKeyChecking=accept-new -v root@staging-ipv4.testrun.org id -u
# download acme & dkim state from ns.testrun.org
rsync -e "ssh -o StrictHostKeyChecking=accept-new" -avz root@ns.testrun.org:/tmp/acme-ipv4/acme acme-restore || true
rsync -avz root@ns.testrun.org:/tmp/dkimkeys-ipv4/dkimkeys dkimkeys-restore || true
# restore acme & dkim state to staging2.testrun.org
rsync -avz acme-restore/acme root@staging-ipv4.testrun.org:/var/lib/ || true
rsync -avz dkimkeys-restore/dkimkeys root@staging-ipv4.testrun.org:/etc/ || true
ssh -o StrictHostKeyChecking=accept-new -v root@staging-ipv4.testrun.org chown root:root -R /var/lib/acme || true
- name: run deploy-chatmail offline tests
run: pytest --pyargs cmdeploy
- name: setup dependencies
run: |
ssh root@staging-ipv4.testrun.org apt update
ssh root@staging-ipv4.testrun.org apt install -y git python3.11-venv python3-dev gcc
ssh root@staging-ipv4.testrun.org git clone https://github.com/chatmail/relay
ssh root@staging-ipv4.testrun.org "cd relay && git checkout " ${{ github.head_ref }}
ssh root@staging-ipv4.testrun.org "cd relay && scripts/initenv.sh"
- name: initialize config
run: |
ssh root@staging-ipv4.testrun.org "cd relay && scripts/cmdeploy init staging-ipv4.testrun.org"
ssh root@staging-ipv4.testrun.org "sed -i 's#disable_ipv6 = False#disable_ipv6 = True#' relay/chatmail.ini"
ssh root@staging-ipv4.testrun.org "sed -i 's/#\s*mtail_address/mtail_address/' relay/chatmail.ini"
- run: ssh root@staging-ipv4.testrun.org "cd relay && scripts/cmdeploy run --verbose --skip-dns-check --ssh-host localhost"
- name: set DNS entries
run: |
ssh root@staging-ipv4.testrun.org "cd relay && scripts/cmdeploy dns --zonefile staging-generated.zone --ssh-host localhost"
ssh root@staging-ipv4.testrun.org cat relay/staging-generated.zone >> .github/workflows/staging-ipv4.testrun.org-default.zone
cat .github/workflows/staging-ipv4.testrun.org-default.zone
scp .github/workflows/staging-ipv4.testrun.org-default.zone root@ns.testrun.org:/etc/nsd/staging-ipv4.testrun.org.zone
ssh root@ns.testrun.org nsd-checkzone staging-ipv4.testrun.org /etc/nsd/staging-ipv4.testrun.org.zone
ssh root@ns.testrun.org systemctl reload nsd
- name: cmdeploy test
run: ssh root@staging-ipv4.testrun.org "cd relay && CHATMAIL_DOMAIN2=ci-chatmail.testrun.org scripts/cmdeploy test --slow --ssh-host localhost"
- name: cmdeploy dns
run: ssh root@staging-ipv4.testrun.org "cd relay && scripts/cmdeploy dns -v --ssh-host localhost"

View File

@@ -1,97 +0,0 @@
name: deploy on staging2.testrun.org, and run tests
on:
push:
branches:
- main
pull_request:
paths-ignore:
- 'scripts/**'
- '**/README.md'
- 'CHANGELOG.md'
- 'LICENSE'
jobs:
deploy:
name: deploy on staging2.testrun.org, and run tests
runs-on: ubuntu-latest
timeout-minutes: 30
environment:
name: staging2.testrun.org
url: https://staging2.testrun.org/
concurrency: staging2.testrun.org
steps:
- uses: actions/checkout@v4
- name: prepare SSH
run: |
mkdir ~/.ssh
echo "${{ secrets.STAGING_SSH_KEY }}" >> ~/.ssh/id_ed25519
chmod 600 ~/.ssh/id_ed25519
ssh-keyscan staging2.testrun.org > ~/.ssh/known_hosts
# save previous acme & dkim state
rsync -avz root@staging2.testrun.org:/var/lib/acme . || true
rsync -avz root@staging2.testrun.org:/etc/dkimkeys . || true
# store previous acme & dkim state on ns.testrun.org, if it contains useful certs
if [ -f dkimkeys/opendkim.private ]; then rsync -avz -e "ssh -o StrictHostKeyChecking=accept-new" dkimkeys root@ns.testrun.org:/tmp/ || true; fi
if [ "$(ls -A acme/certs)" ]; then rsync -avz -e "ssh -o StrictHostKeyChecking=accept-new" acme root@ns.testrun.org:/tmp/ || true; fi
# make sure CAA record isn't set
scp -o StrictHostKeyChecking=accept-new .github/workflows/staging.testrun.org-default.zone root@ns.testrun.org:/etc/nsd/staging2.testrun.org.zone
ssh root@ns.testrun.org sed -i '/CAA/d' /etc/nsd/staging2.testrun.org.zone
ssh root@ns.testrun.org nsd-checkzone staging2.testrun.org /etc/nsd/staging2.testrun.org.zone
ssh root@ns.testrun.org systemctl reload nsd
- name: rebuild staging2.testrun.org to have a clean VPS
run: |
curl -X POST \
-H "Authorization: Bearer ${{ secrets.HETZNER_API_TOKEN }}" \
-H "Content-Type: application/json" \
-d '{"image":"debian-12"}' \
"https://api.hetzner.cloud/v1/servers/${{ secrets.STAGING_SERVER_ID }}/actions/rebuild"
- run: scripts/initenv.sh
- name: append venv/bin to PATH
run: echo venv/bin >>$GITHUB_PATH
- name: upload TLS cert after rebuilding
run: |
echo " --- wait until staging2.testrun.org VPS is rebuilt --- "
rm ~/.ssh/known_hosts
while ! ssh -o ConnectTimeout=180 -o StrictHostKeyChecking=accept-new -v root@staging2.testrun.org id -u ; do sleep 1 ; done
ssh -o StrictHostKeyChecking=accept-new -v root@staging2.testrun.org id -u
# download acme & dkim state from ns.testrun.org
rsync -e "ssh -o StrictHostKeyChecking=accept-new" -avz root@ns.testrun.org:/tmp/acme acme-restore || true
rsync -avz root@ns.testrun.org:/tmp/dkimkeys dkimkeys-restore || true
# restore acme & dkim state to staging2.testrun.org
rsync -avz acme-restore/acme root@staging2.testrun.org:/var/lib/ || true
rsync -avz dkimkeys-restore/dkimkeys root@staging2.testrun.org:/etc/ || true
ssh -o StrictHostKeyChecking=accept-new -v root@staging2.testrun.org chown root:root -R /var/lib/acme || true
- name: add hpk42 key to staging server
run: ssh root@staging2.testrun.org 'curl -s https://github.com/hpk42.keys >> .ssh/authorized_keys'
- name: run deploy-chatmail offline tests
run: pytest --pyargs cmdeploy
- run: |
cmdeploy init staging2.testrun.org
sed -i 's/#\s*mtail_address/mtail_address/' chatmail.ini
- run: cmdeploy run --verbose --skip-dns-check
- name: set DNS entries
run: |
cmdeploy dns --zonefile staging-generated.zone --verbose
cat staging-generated.zone >> .github/workflows/staging.testrun.org-default.zone
cat .github/workflows/staging.testrun.org-default.zone
scp .github/workflows/staging.testrun.org-default.zone root@ns.testrun.org:/etc/nsd/staging2.testrun.org.zone
ssh root@ns.testrun.org nsd-checkzone staging2.testrun.org /etc/nsd/staging2.testrun.org.zone
ssh root@ns.testrun.org systemctl reload nsd
- name: cmdeploy test
run: CHATMAIL_DOMAIN2=ci-chatmail.testrun.org cmdeploy test --slow
- name: cmdeploy dns
run: cmdeploy dns -v

View File

@@ -260,10 +260,10 @@ def test_cmd(args, out):
pytest_args = [
pytest_path,
"cmdeploy/src/",
"-n4",
"-rs",
"-x",
"-v",
"-vv" if args.verbose > 1 else "-v",
"-s",
"--durations=5",
]
if args.slow:
@@ -437,7 +437,7 @@ def main(args=None):
if args.func is None:
return parser.parse_args(["-h"])
out = Out(sepchar="\u2501", verbosity=args.verbose)
out = Out(verbosity=args.verbose)
kwargs = {}
if args.inipath is not None and args.func.__name__ not in ("init_cmd", "fmt_cmd"):

View File

@@ -463,8 +463,9 @@ class ChatmailDeployer(Deployer):
("iroh", None, None),
]
def __init__(self, mail_domain):
self.mail_domain = mail_domain
def __init__(self, config):
self.config = config
self.mail_domain = config.mail_domain
def install(self):
files.put(
@@ -493,6 +494,17 @@ class ChatmailDeployer(Deployer):
)
def configure(self):
# Ensure the per-domain mailbox directory exists before
# chatmail-metadata starts (it crashes without it).
files.directory(
name="Ensure vmail mailbox directory exists",
path=f"/home/vmail/mail/{self.mail_domain}",
user="vmail",
group="vmail",
mode="700",
present=True,
)
# This file is used by auth proxy.
# https://wiki.debian.org/EtcMailName
server.shell(
@@ -502,6 +514,15 @@ class ChatmailDeployer(Deployer):
],
)
files.directory(
name=f"Ensure mailboxes directory {self.config.mailboxes_dir} exists",
path=str(self.config.mailboxes_dir),
user="vmail",
group="vmail",
mode="700",
present=True,
)
class FcgiwrapDeployer(Deployer):
def install(self):
@@ -620,7 +641,7 @@ def deploy_chatmail(config_path: Path, disable_mail: bool, website_only: bool) -
tls_deployer = get_tls_deployer(config, mail_domain)
all_deployers = [
ChatmailDeployer(mail_domain),
ChatmailDeployer(config),
LegacyRemoveDeployer(),
FiltermailDeployer(),
JournaldDeployer(),

View File

@@ -4,7 +4,7 @@ import os
import time
from ..util import get_git_hash, get_version_string, shell
from .incus import RELAY_IMAGE_ALIAS, Incus, RelayContainer
from .incus import Incus, RelayContainer
RELAY_NAMES = ("test0", "test1")
@@ -47,6 +47,7 @@ def _lxc_start_cmd(args, out):
out.green("Ensuring DNS container (ns-localchat) ...")
dns_ct = ix.get_dns_container()
dns_ct.ensure()
dns_ct.ensure_cached_as_image()
sub.print(f"DNS container IP: {dns_ct.ipv4}")
names = args.names if args.names else RELAY_NAMES
@@ -114,14 +115,53 @@ def _lxc_start_cmd(args, out):
)
sub.green(f" Include {ssh_cfg}")
# Optionally run cmdeploy run on each relay
# Optionally run cmdeploy run + dns on each relay
if args.run:
local_hash = get_git_hash()
for ct in relays:
status = _deploy_status(ct, local_hash, ix)
with out.section(f"cmdeploy run: {ct.sname} ({ct.domain})"):
ret = _run_cmdeploy("run", ct, ix, out, extra=["--skip-dns-check"])
if "IN-SYNC" in status:
out.print(f"{ct.sname} is {status}, skipping")
else:
ret = _run_cmdeploy("run", ct, ix, out, extra=["--skip-dns-check"])
if ret:
out.red(f"Deploy to {ct.sname} failed (exit {ret})")
return ret
# Cache a per-relay image after each successful deploy
# so the next run can launch directly from the deployed state.
with out.section(f"lxc-test: caching {ct.sname} image"):
ct.ensure_cached_as_image()
# Restart mail services to flush stale DNS state.
# Cached container images boot with a resolv.conf
# pointing to the previous run's DNS IP;
# configure_dns() already restarted unbound,
# but postfix/dovecot may hold stale results
# from the window between boot and DNS fix.
for ct in relays:
out.print(f"Restarting mail services on {ct.name} ...")
ct.bash("systemctl restart postfix dovecot opendkim")
for ct in relays:
with out.section(f"cmdeploy dns: {ct.sname} ({ct.domain})"):
ret = _run_cmdeploy(
"dns",
ct,
ix,
out,
extra=["--zonefile", str(ct.zone)],
)
if ret:
out.red(f"Deploy to {ct.sname} failed (exit {ret})")
out.red(f"DNS for {ct.sname} failed (exit {ret})")
return ret
if ct.zone.exists():
dns_ct.set_dns_records(ct.zone.read_text())
# Restart filtermail so its in-process DNS cache
# does not hold stale negative DKIM responses
# from before the zones were loaded.
out.print(f"Restarting filtermail-incoming on {ct.name} ...")
ct.bash("systemctl restart filtermail-incoming")
# -------------------------------------------------------------------
@@ -195,64 +235,26 @@ def lxc_test_cmd(args, out):
"""
ix = Incus(out)
t_total = time.time()
relay_names = list(RELAY_NAMES)
if args.one:
relay_names = relay_names[:1]
v_flag = " -" + "v" * out.verbosity if out.verbosity > 0 else ""
local_hash = get_git_hash()
ret = out.shell(f"cmdeploy lxc-start{v_flag} --run test0", cwd=str(ix.project_root))
if ret:
return ret
# Per-relay: start, deploy, then snapshot the first relay as a
# reusable image so the second relay launches pre-deployed.
ipv4_only_flags = {RELAY_NAMES[0]: False, RELAY_NAMES[1]: True}
for ct in map(ix.get_container, relay_names):
name = ct.sname
ipv4_only = ipv4_only_flags.get(name, False)
v_flag = " -" + "v" * out.verbosity if out.verbosity > 0 else ""
start_cmd = f"cmdeploy lxc-start{v_flag} {name}"
if ipv4_only:
start_cmd += " --ipv4-only"
with out.section(f"cmdeploy lxc-start: {name}"):
ret = out.shell(start_cmd, cwd=str(ix.project_root))
if ret:
return ret
status = _deploy_status(ct, local_hash, ix)
with out.section(f"cmdeploy run: {name}"):
if "IN-SYNC" in status:
out.print(f"{name} is {status}, skipping")
else:
ret = _run_cmdeploy("run", ct, ix, out, extra=["--skip-dns-check"])
if ret:
out.red(f"Deploy to {name} failed (exit {ret})")
return ret
# Snapshot the first relay so subsequent ones launch pre-deployed
if not ix.find_image([RELAY_IMAGE_ALIAS]):
with out.section("lxc-test: caching relay image"):
ct.publish_as_relay_image()
for ct in map(ix.get_container, relay_names):
with out.section(f"cmdeploy dns: {ct.sname} ({ct.domain})"):
ret = _run_cmdeploy("dns", ct, ix, out, extra=["--zonefile", str(ct.zone)])
if ret:
out.red(f"DNS for {ct.sname} failed (exit {ret})")
return ret
with out.section(f"lxc-test: loading DNS zones {' & '.join(relay_names)}"):
dns_ct = ix.get_dns_container()
for ct in map(ix.get_container, relay_names):
if ct.zone.exists():
zone_data = ct.zone.read_text()
out.print(f"Loading {ct.zone} into PowerDNS ...")
dns_ct.set_dns_records(zone_data)
if not args.one:
ret = out.shell(
f"cmdeploy lxc-start{v_flag} --run test1 --ipv4-only",
cwd=str(ix.project_root),
)
if ret:
return ret
with out.section("cmdeploy test"):
first = ix.get_container(relay_names[0])
first = ix.get_container("test0")
env = None
if len(relay_names) > 1:
if not args.one:
env = os.environ.copy()
env["CHATMAIL_DOMAIN2"] = ix.get_container(relay_names[1]).domain
env["CHATMAIL_DOMAIN2"] = ix.get_container("test1").domain
ret = _run_cmdeploy("test", first, ix, out, **({"env": env} if env else {}))
if ret:
out.red(f"Tests failed (exit {ret})")

View File

@@ -14,9 +14,9 @@ DOMAIN_SUFFIX = ".localchat"
UPSTREAM_IMAGE = "images:debian/12"
BASE_IMAGE_ALIAS = "localchat-base"
BASE_SETUP_NAME = "localchat-base-setup"
RELAY_IMAGE_ALIAS = "localchat-relay"
DNS_CONTAINER_NAME = "ns-localchat"
DNS_IMAGE_ALIAS = "localchat-ns"
DNS_DOMAIN = "ns.localchat"
@@ -99,6 +99,22 @@ class Incus:
target = f"include {self.ssh_config_path}".lower()
return any(line.strip().lower() == target for line in lines)
def get_host_nameservers(self):
"""Return upstream nameservers found on the host."""
ns = []
for path in ["/run/systemd/resolve/resolv.conf", "/etc/resolv.conf"]:
p = Path(path)
if p.exists():
for line in p.read_text().splitlines():
if line.strip().startswith("nameserver "):
addr = line.split()[1]
if addr not in ("127.0.0.1", "127.0.0.53", "::1"):
if addr not in ns:
ns.append(addr)
if ns:
break
return ns
def run(self, args, check=True, capture=True, input=None):
"""Run an incus command.
@@ -106,7 +122,7 @@ class Incus:
to the terminal line-by-line while also being captured for
later return via result.stdout.
"""
cmd = ["incus"] + list(args)
cmd = ["incus", "--quiet"] + list(args)
sub = self.out.new_prefixed_out(" ")
if not capture:
@@ -128,9 +144,9 @@ class Incus:
proc = subprocess.Popen(
cmd,
text=True,
stdin=subprocess.PIPE if input else None,
stdin=subprocess.PIPE if input else subprocess.DEVNULL,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
stderr=subprocess.PIPE,
)
stdout_lines = []
@@ -143,15 +159,17 @@ class Incus:
if sub.verbosity >= 2:
sub.print(f" > {line.rstrip()}")
stderr = proc.stderr.read()
ret = proc.wait()
stdout = "".join(stdout_lines)
if check and ret != 0:
for line in stdout.splitlines():
full_output = stdout + stderr
for line in full_output.splitlines():
if sub.verbosity < 1: # and we haven't printed it yet
sub.red(line)
raise subprocess.CalledProcessError(ret, cmd, output=stdout)
raise subprocess.CalledProcessError(ret, cmd, output=stdout, stderr=stderr)
return subprocess.CompletedProcess(cmd, ret, stdout=stdout)
return subprocess.CompletedProcess(cmd, ret, stdout=stdout, stderr=stderr)
def run_json(self, args, check=True):
"""Run an incus command with ``--format=json``.
@@ -166,7 +184,16 @@ class Incus:
)
if result.returncode != 0:
return None
return json.loads(result.stdout)
try:
return json.loads(result.stdout)
except json.JSONDecodeError as e:
msg = f"Incus JSON processing failed for {args!r}: {e!s}"
self.out.red(msg)
self.out.red(f"Captured stdout: {result.stdout!r}")
self.out.red(f"Captured stderr: {result.stderr!r}")
if check:
raise
return None
def run_output(self, args, check=True):
"""Run an incus command and return its stripped stdout.
@@ -189,8 +216,13 @@ class Incus:
return None
def delete_images(self):
"""Delete the cached base and relay images."""
for alias in (RELAY_IMAGE_ALIAS, BASE_IMAGE_ALIAS):
"""Delete localchat-base and per-container images."""
for alias in [
BASE_IMAGE_ALIAS,
DNS_IMAGE_ALIAS,
"localchat-test0",
"localchat-test1",
]:
self.run(["image", "delete", alias], check=False) # ok if absent
def list_managed(self):
@@ -220,7 +252,7 @@ class Incus:
def ensure_base_image(self):
"""Build and cache a base image with openssh and the SSH key.
The image is published as a local incus image with alias
The image is cached as a local incus image with alias
'localchat-base'. Subsequent container launches use this
image instead of the upstream Debian 12, skipping the
slow apt-get install step.
@@ -241,8 +273,10 @@ class Incus:
key_path = self.ssh_key_path
pub_key = key_path.with_suffix(".pub").read_text().strip()
host_ns = self.get_host_nameservers()
ns_lines = "\n".join(f"nameserver {n}" for n in host_ns)
ct.bash(f"""
echo 'nameserver 9.9.9.9' > /etc/resolv.conf
printf '{ns_lines}\n' > /etc/resolv.conf
apt-get -o DPkg::Lock::Timeout=60 update
DEBIAN_FRONTEND=noninteractive apt-get install -y openssh-server python3
systemctl enable ssh
@@ -279,12 +313,11 @@ class Incus:
class Container:
"""The base container handle wraps all interactions with incus."""
def __init__(self, incus, name, domain=None, memory="200MiB"):
def __init__(self, incus, name, domain=None):
self.incus = incus
self.out = incus.out
self.name = name
self.domain = domain or f"{name}{DOMAIN_SUFFIX}"
self.memory = memory
self.ipv4 = None
self.ipv6 = None
@@ -319,7 +352,7 @@ class Container:
def launch(self):
"""Launch from the best available image, return the alias used."""
image = self.incus.find_image([RELAY_IMAGE_ALIAS, BASE_IMAGE_ALIAS])
image = self.incus.find_image([BASE_IMAGE_ALIAS])
if not image:
raise RuntimeError(
f"No base image '{BASE_IMAGE_ALIAS}' found. "
@@ -329,7 +362,6 @@ class Container:
cfg = []
cfg += ("-c", f"{LABEL_KEY}=true")
cfg += ("-c", f"user.localchat-domain={self.domain}")
cfg += ("-c", f"limits.memory={self.memory}")
self.incus.run(["launch", image, self.name, *cfg])
return image
@@ -401,6 +433,18 @@ class Container:
parts = line.split()
return int(parts[2]), int(parts[1])
def ensure_cached_as_image(self):
"""Cache this container as a respective image."""
alias = self.image_alias
if self.incus.find_image([alias]):
return
self.out.print(" Cleaning apt cache before caching image ...")
self.bash("apt-get clean")
self.out.print(f" Caching {self.name!r} as '{alias}' ...")
self.incus.run(["publish", self.name, f"--alias={alias}", "--force"])
self.out.print(f" Image '{alias}' cached.")
self.wait_ready()
class RelayContainer(Container):
"""Container handle for a chatmail relay.
@@ -414,15 +458,24 @@ class RelayContainer(Container):
incus,
f"{name}-localchat",
domain=f"_{name}{DOMAIN_SUFFIX}",
memory="600MiB",
)
self.sname = name
self.image_alias = f"localchat-{name}"
self.ini = incus.lxconfigs_dir / f"chatmail-{name}.ini"
self.zone = incus.lxconfigs_dir / f"{name}.zone"
def launch(self):
"""Launch (from a potentially cached image) and clear inherited chatmail-version."""
image = super().launch()
"""Launch from localchat-{sname} if cached, else localchat-base."""
candidates = [self.image_alias]
candidates.append(BASE_IMAGE_ALIAS)
image = self.incus.find_image(candidates)
assert image, f"No deployment base, candidates: {','.join(candidates)}"
self.out.print(f" Launching from '{image}' image ...")
cfg = []
cfg += ("-c", f"{LABEL_KEY}=true")
cfg += ("-c", f"user.localchat-domain={self.domain}")
self.incus.run(["launch", image, self.name, *cfg])
self.bash("rm -f /etc/chatmail-version")
return image
@@ -439,11 +492,14 @@ class RelayContainer(Container):
self.bash("""
sysctl -w net.ipv6.conf.all.disable_ipv6=1
sysctl -w net.ipv6.conf.default.disable_ipv6=1
mkdir -p /etc/sysctl.d
printf 'net.ipv6.conf.all.disable_ipv6=1\\n
net.ipv6.conf.default.disable_ipv6=1\\n'
> /etc/sysctl.d/99-disable-ipv6.conf
""")
self.push_file_content(
"/etc/sysctl.d/99-disable-ipv6.conf",
"""
net.ipv6.conf.all.disable_ipv6=1
net.ipv6.conf.default.disable_ipv6=1
""",
)
def configure_hosts(self, ip):
"""Set hostname and /etc/hosts inside the container."""
@@ -454,22 +510,6 @@ class RelayContainer(Container):
echo '{ip} {self.name} {self.domain}' >> /etc/hosts
""")
def publish_as_relay_image(self):
"""Publish this container as a reusable relay image.
Stops the container, 'publishes' it as 'localchat-relay', then restarts it.
"""
if self.incus.find_image([RELAY_IMAGE_ALIAS]):
return
self.out.print(
f" Locally caching {self.name!r} as '{RELAY_IMAGE_ALIAS}' image ..."
)
self.incus.run(
["publish", self.name, f"--alias={RELAY_IMAGE_ALIAS}", "--force"]
)
self.wait_ready()
self.out.print(f" Relay image '{RELAY_IMAGE_ALIAS}' ready.")
def deployed_version(self):
"""Read /etc/chatmail-version, or None if absent."""
return self.bash("cat /etc/chatmail-version", check=False)
@@ -483,22 +523,29 @@ class RelayContainer(Container):
def verify_ssh(self, ssh_config):
"""Verify SSH connectivity to this container."""
cmd = f"ssh -F {ssh_config} -o ConnectTimeout=10 root@{self.domain} hostname"
return shell(cmd, timeout=15).returncode == 0
cmd = f"ssh -F {ssh_config} -o ConnectTimeout=60 root@{self.domain} hostname"
return shell(cmd, timeout=60).returncode == 0
def configure_dns(self, dns_ip):
"""Point this container's resolver at *dns_ip* and verify DNS is reachable."""
self.bash(f"""
systemctl disable --now systemd-resolved 2>/dev/null || true
rm -f /etc/resolv.conf
echo 'nameserver {dns_ip}' > /etc/resolv.conf
printf 'nameserver {dns_ip}\\n' >/etc/resolv.conf
mkdir -p /etc/unbound/unbound.conf.d
printf 'server:\\n domain-insecure: "localchat"\\n\\n
forward-zone:\\n name: "localchat"\\n
forward-addr: {dns_ip}\\n'
> /etc/unbound/unbound.conf.d/localchat-forward.conf
systemctl restart unbound 2>/dev/null || true
""")
self.push_file_content(
"/etc/unbound/unbound.conf.d/localchat-forward.conf",
f"""
server:
domain-insecure: "localchat"
forward-zone:
name: "localchat"
forward-addr: {dns_ip}
""",
)
self.bash("systemctl restart unbound 2>/dev/null || true")
self._wait_dns_reachable(dns_ip)
def _wait_dns_reachable(self, dns_ip, timeout=10):
@@ -545,6 +592,21 @@ class DNSContainer(Container):
def __init__(self, incus):
super().__init__(incus, DNS_CONTAINER_NAME, domain=DNS_DOMAIN)
self.image_alias = DNS_IMAGE_ALIAS
def launch(self):
"""Launch from localchat-ns if cached, else localchat-base."""
image = self.incus.find_image([DNS_IMAGE_ALIAS, BASE_IMAGE_ALIAS])
if not image:
raise RuntimeError(
f"No base image '{BASE_IMAGE_ALIAS}' found. "
"Call ensure_base_image() before launching containers."
)
self.out.print(f" Launching from '{image}' image ...")
cfg = []
cfg += ("-c", f"{LABEL_KEY}=true")
cfg += ("-c", f"user.localchat-domain={self.domain}")
self.incus.run(["launch", image, self.name, *cfg])
def pdnsutil(self, *args, check=True):
"""Run ``pdnsutil <args>`` inside the DNS container."""
@@ -562,7 +624,7 @@ class DNSContainer(Container):
""")
self._wait_dns_ready()
def _wait_dns_ready(self, timeout=10):
def _wait_dns_ready(self, timeout=60):
"""Poll until the recursor answers a query on port 53."""
deadline = time.time() + timeout
while time.time() < deadline:
@@ -604,10 +666,13 @@ class DNSContainer(Container):
if self.run_cmd("which", "pdns_server", check=False) is not None:
return
self.bash("""
host_ns = self.incus.get_host_nameservers()
ns_lines = "\n".join(f"nameserver {n}" for n in host_ns)
self.bash(f"""
systemctl disable --now systemd-resolved 2>/dev/null || true
rm -f /etc/resolv.conf
echo 'nameserver 9.9.9.9' > /etc/resolv.conf
printf '{ns_lines}\n' > /etc/resolv.conf
# Block automatic service startup during package installation
printf '#!/bin/sh\\nexit 101\\n' > /usr/sbin/policy-rc.d
@@ -643,7 +708,6 @@ class DNSContainer(Container):
local-address=0.0.0.0
local-port=53
forward-zones=localchat=127.0.0.1:5353
forward-zones-recurse=.=9.9.9.9;149.112.112.112
allow-from=0.0.0.0/0
dont-query=
dnssec=off

View File

@@ -60,6 +60,10 @@ PidFile /run/opendkim/opendkim.pid
# by the package dns-root-data.
TrustAnchorFile /usr/share/dns/root.key
# Use the local unbound resolver rather than querying root servers directly.
# on IPv4-only hosts opendkim may otherwise try ipv6 requests and time out.
Nameservers 127.0.0.1
# Sign messages when `-o milter_macro_daemon_name=ORIGINATING` is set.
MTA ORIGINATING

View File

@@ -487,13 +487,16 @@ def cmfactory(
@pytest.fixture
def remote(sshdomain, pytestconfig):
return Remote(sshdomain, ssh_config=pytestconfig.getoption("ssh_config"))
r = Remote(sshdomain, ssh_config=pytestconfig.getoption("ssh_config"))
yield r
r.close()
class Remote:
def __init__(self, sshdomain, ssh_config=None):
self.sshdomain = sshdomain
self.ssh_config = ssh_config
self._procs = []
def iter_output(self, logcmd="", ready=None):
getjournal = "journalctl -f" if not logcmd else logcmd
@@ -509,12 +512,15 @@ class Remote:
command.extend(["-F", self.ssh_config])
command.append(f"root@{self.sshdomain}")
[command.append(arg) for arg in getjournal.split()]
self.popen = subprocess.Popen(
popen = subprocess.Popen(
command,
stdin=subprocess.DEVNULL,
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL,
)
self._procs.append(popen)
while 1:
line = self.popen.stdout.readline()
line = popen.stdout.readline()
res = line.decode().strip().lower()
if not res:
break
@@ -523,6 +529,12 @@ class Remote:
ready = None
yield res
def close(self):
while self._procs:
proc = self._procs.pop()
proc.kill()
proc.wait()
@pytest.fixture
def lp(request):

View File

@@ -15,10 +15,10 @@ from termcolor import colored
class Out:
"""Convenience output printer providing coloring and section formatting."""
def __init__(self, sepchar="\u2501", prefix="", verbosity=0):
def __init__(self, prefix="", verbosity=0):
self.section_timings = []
self.prefix = prefix
self.sepchar = sepchar
self.sepchar = "\u2501"
self.verbosity = verbosity
env_width = os.environ.get("_CMDEPLOY_WIDTH")
if env_width:
@@ -31,7 +31,6 @@ class Out:
sharing section_timings with the parent.
"""
out = Out(
sepchar=self.sepchar,
prefix=self.prefix + newprefix,
verbosity=self.verbosity,
)
@@ -90,6 +89,7 @@ class Out:
cmd,
shell=True,
text=True,
stdin=subprocess.DEVNULL,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
env=env,
@@ -133,6 +133,7 @@ def shell(cmd, check=False, **kwargs):
"""
if "capture_output" not in kwargs and "stdout" not in kwargs:
kwargs["capture_output"] = True
kwargs.setdefault("stdin", subprocess.DEVNULL)
return subprocess.run(collapse(cmd), shell=True, text=True, check=check, **kwargs)

View File

@@ -176,7 +176,7 @@ running two `PowerDNS <https://www.powerdns.com/>`_ services:
* **pdns-recursor** (recursive) listens on the Incus
bridge so all containers can use it.
Forwards ``.localchat`` queries to the local
authoritative server and everything else to Quad9 (``9.9.9.9``).
authoritative server and resolves everything else recursively.
After the DNS container is up, ``lxc-start`` configures the Incus bridge
to advertise its IP via DHCP and disables Incus's own DNS.