diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 9af1c3f5..2ee3ec80 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -16,27 +16,97 @@ jobs: ref: ${{ github.event.pull_request.head.sha }} - name: download filtermail run: curl -L https://github.com/chatmail/filtermail/releases/download/v0.5.2/filtermail-x86_64 -o /usr/local/bin/filtermail && chmod +x /usr/local/bin/filtermail - - name: run chatmaild tests + - name: run chatmaild tests working-directory: chatmaild run: pipx run tox scripts: - name: deploy-chatmail tests + name: deploy-chatmail tests runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - name: initenv + - name: initenv run: scripts/initenv.sh - name: append venv/bin to PATH run: echo venv/bin >>$GITHUB_PATH - - name: run formatting checks - run: cmdeploy fmt -v + - name: run formatting checks + run: cmdeploy fmt -v - - name: run deploy-chatmail offline tests - run: pytest --pyargs cmdeploy + - name: run deploy-chatmail offline tests + run: pytest --pyargs cmdeploy - # all other cmdeploy commands require a staging server - # see https://github.com/deltachat/chatmail/issues/100 + lxc-test: + name: LXC deploy and test + runs-on: ubuntu-24.04 + timeout-minutes: 15 + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.head.sha }} + + - name: install incus + run: sudo apt-get update && sudo apt-get install -y incus + + - name: initialise incus + run: | + sudo systemctl stop docker.socket docker || true + sudo iptables -P FORWARD ACCEPT + sudo sysctl -w fs.inotify.max_user_instances=65535 + sudo sysctl -w fs.inotify.max_user_watches=65535 + sudo incus admin init --minimal --quiet + sudo usermod -aG incus-admin "$USER" + + - name: initenv + run: scripts/initenv.sh + + - name: append venv/bin to PATH + run: echo venv/bin >>$GITHUB_PATH + + - name: restore cached images + id: cache-images + uses: actions/cache@v4 + with: + path: | + /tmp/localchat-base.tar.gz + /tmp/localchat-ns.tar.gz + /tmp/localchat-test0.tar.gz + /tmp/localchat-test1.tar.gz + lxconfigs/id_localchat* + key: incus-contain-v2-${{ runner.os }}-${{ github.ref_name }} + restore-keys: | + incus-contain-v2-${{ runner.os }}-${{ github.ref_name }}- + incus-contain-v2-${{ runner.os }}-main- + incus-contain-v2-${{ runner.os }}- + + - name: import cached images + run: | + for alias in localchat-base localchat-ns localchat-test0 localchat-test1; do + if [ -f /tmp/$alias.tar.gz ]; then + sg incus-admin -c "incus --quiet image import /tmp/$alias.tar.gz --alias $alias" || true + fi + done + + - name: cmdeploy lxc-test + run: sg incus-admin -c 'cmdeploy lxc-test -vv' + + - name: show container logs on failure + if: failure() || cancelled() + run: | + for c in test0-localchat test1-localchat; do + echo "::group::$c journal (warnings+errors)" + sg incus-admin -c "incus exec $c -- journalctl -p warning --no-pager -n200" 2>/dev/null || echo "no log" + echo "::endgroup::" + echo "::group::$c failed services" + sg incus-admin -c "incus exec $c -- systemctl --no-pager --failed" || true + echo "::endgroup::" + done + + - name: export images for cache + if: always() + run: | + for alias in localchat-base localchat-ns localchat-test0 localchat-test1; do + sg incus-admin -c "incus --quiet image export $alias /tmp/$alias" || true + done diff --git a/.github/workflows/staging-ipv4.testrun.org-default.zone b/.github/workflows/staging-ipv4.testrun.org-default.zone deleted file mode 100644 index 785b71aa..00000000 --- a/.github/workflows/staging-ipv4.testrun.org-default.zone +++ /dev/null @@ -1,20 +0,0 @@ -;; Zone file for staging-ipv4.testrun.org - -$ORIGIN staging-ipv4.testrun.org. -$TTL 300 - -@ IN SOA ns.testrun.org. root.nine.testrun.org ( - 2023010101 ; Serial - 7200 ; Refresh - 3600 ; Retry - 1209600 ; Expire - 3600 ; Negative response caching TTL -) - -;; Nameservers. -@ IN NS ns.testrun.org. - -;; DNS records. -@ IN A 37.27.95.249 -mta-sts.staging-ipv4.testrun.org. CNAME staging-ipv4.testrun.org. -www.staging-ipv4.testrun.org. CNAME staging-ipv4.testrun.org. diff --git a/.github/workflows/staging.testrun.org-default.zone b/.github/workflows/staging.testrun.org-default.zone deleted file mode 100644 index 444e4d86..00000000 --- a/.github/workflows/staging.testrun.org-default.zone +++ /dev/null @@ -1,21 +0,0 @@ -;; Zone file for staging2.testrun.org - -$ORIGIN staging2.testrun.org. -$TTL 300 - -@ IN SOA ns.testrun.org. root.nine.testrun.org ( - 2023010101 ; Serial - 7200 ; Refresh - 3600 ; Retry - 1209600 ; Expire - 3600 ; Negative response caching TTL -) - -;; Nameservers. -@ IN NS ns.testrun.org. - -;; DNS records. -@ IN A 37.27.24.139 -mta-sts.staging2.testrun.org. CNAME staging2.testrun.org. -www.staging2.testrun.org. CNAME staging2.testrun.org. - diff --git a/.github/workflows/test-and-deploy-ipv4only.yaml b/.github/workflows/test-and-deploy-ipv4only.yaml deleted file mode 100644 index 990963ec..00000000 --- a/.github/workflows/test-and-deploy-ipv4only.yaml +++ /dev/null @@ -1,104 +0,0 @@ -name: deploy on staging-ipv4.testrun.org, and run tests - -on: - push: - branches: - - main - pull_request: - paths-ignore: - - 'scripts/**' - - '**/README.md' - - 'CHANGELOG.md' - - 'LICENSE' - -jobs: - deploy: - name: deploy on staging-ipv4.testrun.org, and run tests - runs-on: ubuntu-latest - timeout-minutes: 30 - environment: - name: staging-ipv4.testrun.org - url: https://staging-ipv4.testrun.org/ - concurrency: staging-ipv4.testrun.org - steps: - - uses: actions/checkout@v4 - - - name: prepare SSH - run: | - mkdir ~/.ssh - echo "${{ secrets.STAGING_SSH_KEY }}" >> ~/.ssh/id_ed25519 - chmod 600 ~/.ssh/id_ed25519 - ssh-keyscan staging-ipv4.testrun.org > ~/.ssh/known_hosts - # save previous acme & dkim state - rsync -avz root@staging-ipv4.testrun.org:/var/lib/acme acme-ipv4 || true - rsync -avz root@staging-ipv4.testrun.org:/etc/dkimkeys dkimkeys-ipv4 || true - # store previous acme & dkim state on ns.testrun.org, if it contains useful certs - if [ -f dkimkeys-ipv4/dkimkeys/opendkim.private ]; then rsync -avz -e "ssh -o StrictHostKeyChecking=accept-new" dkimkeys-ipv4 root@ns.testrun.org:/tmp/ || true; fi - if [ "$(ls -A acme-ipv4/acme/certs)" ]; then rsync -avz -e "ssh -o StrictHostKeyChecking=accept-new" acme-ipv4 root@ns.testrun.org:/tmp/ || true; fi - # make sure CAA record isn't set - scp -o StrictHostKeyChecking=accept-new .github/workflows/staging-ipv4.testrun.org-default.zone root@ns.testrun.org:/etc/nsd/staging-ipv4.testrun.org.zone - ssh root@ns.testrun.org sed -i '/CAA/d' /etc/nsd/staging-ipv4.testrun.org.zone - ssh root@ns.testrun.org nsd-checkzone staging-ipv4.testrun.org /etc/nsd/staging-ipv4.testrun.org.zone - ssh root@ns.testrun.org systemctl reload nsd - - - name: rebuild staging-ipv4.testrun.org to have a clean VPS - run: | - curl -X POST \ - -H "Authorization: Bearer ${{ secrets.HETZNER_API_TOKEN }}" \ - -H "Content-Type: application/json" \ - -d '{"image":"debian-12"}' \ - "https://api.hetzner.cloud/v1/servers/${{ secrets.STAGING_IPV4_SERVER_ID }}/actions/rebuild" - - - run: scripts/initenv.sh - - - name: append venv/bin to PATH - run: echo venv/bin >>$GITHUB_PATH - - - name: upload TLS cert after rebuilding - run: | - echo " --- wait until staging-ipv4.testrun.org VPS is rebuilt --- " - rm ~/.ssh/known_hosts - while ! ssh -o ConnectTimeout=180 -o StrictHostKeyChecking=accept-new -v root@staging-ipv4.testrun.org id -u ; do sleep 1 ; done - ssh -o StrictHostKeyChecking=accept-new -v root@staging-ipv4.testrun.org id -u - # download acme & dkim state from ns.testrun.org - rsync -e "ssh -o StrictHostKeyChecking=accept-new" -avz root@ns.testrun.org:/tmp/acme-ipv4/acme acme-restore || true - rsync -avz root@ns.testrun.org:/tmp/dkimkeys-ipv4/dkimkeys dkimkeys-restore || true - # restore acme & dkim state to staging2.testrun.org - rsync -avz acme-restore/acme root@staging-ipv4.testrun.org:/var/lib/ || true - rsync -avz dkimkeys-restore/dkimkeys root@staging-ipv4.testrun.org:/etc/ || true - ssh -o StrictHostKeyChecking=accept-new -v root@staging-ipv4.testrun.org chown root:root -R /var/lib/acme || true - - - name: run deploy-chatmail offline tests - run: pytest --pyargs cmdeploy - - - name: setup dependencies - run: | - ssh root@staging-ipv4.testrun.org apt update - ssh root@staging-ipv4.testrun.org apt install -y git python3.11-venv python3-dev gcc - ssh root@staging-ipv4.testrun.org git clone https://github.com/chatmail/relay - ssh root@staging-ipv4.testrun.org "cd relay && git checkout " ${{ github.head_ref }} - ssh root@staging-ipv4.testrun.org "cd relay && scripts/initenv.sh" - - - name: initialize config - run: | - ssh root@staging-ipv4.testrun.org "cd relay && scripts/cmdeploy init staging-ipv4.testrun.org" - ssh root@staging-ipv4.testrun.org "sed -i 's#disable_ipv6 = False#disable_ipv6 = True#' relay/chatmail.ini" - ssh root@staging-ipv4.testrun.org "sed -i 's/#\s*mtail_address/mtail_address/' relay/chatmail.ini" - - - run: ssh root@staging-ipv4.testrun.org "cd relay && scripts/cmdeploy run --verbose --skip-dns-check --ssh-host localhost" - - - name: set DNS entries - run: | - ssh root@staging-ipv4.testrun.org "cd relay && scripts/cmdeploy dns --zonefile staging-generated.zone --ssh-host localhost" - ssh root@staging-ipv4.testrun.org cat relay/staging-generated.zone >> .github/workflows/staging-ipv4.testrun.org-default.zone - cat .github/workflows/staging-ipv4.testrun.org-default.zone - scp .github/workflows/staging-ipv4.testrun.org-default.zone root@ns.testrun.org:/etc/nsd/staging-ipv4.testrun.org.zone - ssh root@ns.testrun.org nsd-checkzone staging-ipv4.testrun.org /etc/nsd/staging-ipv4.testrun.org.zone - ssh root@ns.testrun.org systemctl reload nsd - - - name: cmdeploy test - run: ssh root@staging-ipv4.testrun.org "cd relay && CHATMAIL_DOMAIN2=ci-chatmail.testrun.org scripts/cmdeploy test --slow --ssh-host localhost" - - - name: cmdeploy dns - run: ssh root@staging-ipv4.testrun.org "cd relay && scripts/cmdeploy dns -v --ssh-host localhost" - diff --git a/.github/workflows/test-and-deploy.yaml b/.github/workflows/test-and-deploy.yaml deleted file mode 100644 index 2f744cb8..00000000 --- a/.github/workflows/test-and-deploy.yaml +++ /dev/null @@ -1,97 +0,0 @@ -name: deploy on staging2.testrun.org, and run tests - -on: - push: - branches: - - main - pull_request: - paths-ignore: - - 'scripts/**' - - '**/README.md' - - 'CHANGELOG.md' - - 'LICENSE' - -jobs: - deploy: - name: deploy on staging2.testrun.org, and run tests - runs-on: ubuntu-latest - timeout-minutes: 30 - environment: - name: staging2.testrun.org - url: https://staging2.testrun.org/ - concurrency: staging2.testrun.org - steps: - - uses: actions/checkout@v4 - - - name: prepare SSH - run: | - mkdir ~/.ssh - echo "${{ secrets.STAGING_SSH_KEY }}" >> ~/.ssh/id_ed25519 - chmod 600 ~/.ssh/id_ed25519 - ssh-keyscan staging2.testrun.org > ~/.ssh/known_hosts - # save previous acme & dkim state - rsync -avz root@staging2.testrun.org:/var/lib/acme . || true - rsync -avz root@staging2.testrun.org:/etc/dkimkeys . || true - # store previous acme & dkim state on ns.testrun.org, if it contains useful certs - if [ -f dkimkeys/opendkim.private ]; then rsync -avz -e "ssh -o StrictHostKeyChecking=accept-new" dkimkeys root@ns.testrun.org:/tmp/ || true; fi - if [ "$(ls -A acme/certs)" ]; then rsync -avz -e "ssh -o StrictHostKeyChecking=accept-new" acme root@ns.testrun.org:/tmp/ || true; fi - # make sure CAA record isn't set - scp -o StrictHostKeyChecking=accept-new .github/workflows/staging.testrun.org-default.zone root@ns.testrun.org:/etc/nsd/staging2.testrun.org.zone - ssh root@ns.testrun.org sed -i '/CAA/d' /etc/nsd/staging2.testrun.org.zone - ssh root@ns.testrun.org nsd-checkzone staging2.testrun.org /etc/nsd/staging2.testrun.org.zone - ssh root@ns.testrun.org systemctl reload nsd - - - name: rebuild staging2.testrun.org to have a clean VPS - run: | - curl -X POST \ - -H "Authorization: Bearer ${{ secrets.HETZNER_API_TOKEN }}" \ - -H "Content-Type: application/json" \ - -d '{"image":"debian-12"}' \ - "https://api.hetzner.cloud/v1/servers/${{ secrets.STAGING_SERVER_ID }}/actions/rebuild" - - - run: scripts/initenv.sh - - - name: append venv/bin to PATH - run: echo venv/bin >>$GITHUB_PATH - - - name: upload TLS cert after rebuilding - run: | - echo " --- wait until staging2.testrun.org VPS is rebuilt --- " - rm ~/.ssh/known_hosts - while ! ssh -o ConnectTimeout=180 -o StrictHostKeyChecking=accept-new -v root@staging2.testrun.org id -u ; do sleep 1 ; done - ssh -o StrictHostKeyChecking=accept-new -v root@staging2.testrun.org id -u - # download acme & dkim state from ns.testrun.org - rsync -e "ssh -o StrictHostKeyChecking=accept-new" -avz root@ns.testrun.org:/tmp/acme acme-restore || true - rsync -avz root@ns.testrun.org:/tmp/dkimkeys dkimkeys-restore || true - # restore acme & dkim state to staging2.testrun.org - rsync -avz acme-restore/acme root@staging2.testrun.org:/var/lib/ || true - rsync -avz dkimkeys-restore/dkimkeys root@staging2.testrun.org:/etc/ || true - ssh -o StrictHostKeyChecking=accept-new -v root@staging2.testrun.org chown root:root -R /var/lib/acme || true - - - name: add hpk42 key to staging server - run: ssh root@staging2.testrun.org 'curl -s https://github.com/hpk42.keys >> .ssh/authorized_keys' - - - name: run deploy-chatmail offline tests - run: pytest --pyargs cmdeploy - - - run: | - cmdeploy init staging2.testrun.org - sed -i 's/#\s*mtail_address/mtail_address/' chatmail.ini - - - run: cmdeploy run --verbose --skip-dns-check - - - name: set DNS entries - run: | - cmdeploy dns --zonefile staging-generated.zone --verbose - cat staging-generated.zone >> .github/workflows/staging.testrun.org-default.zone - cat .github/workflows/staging.testrun.org-default.zone - scp .github/workflows/staging.testrun.org-default.zone root@ns.testrun.org:/etc/nsd/staging2.testrun.org.zone - ssh root@ns.testrun.org nsd-checkzone staging2.testrun.org /etc/nsd/staging2.testrun.org.zone - ssh root@ns.testrun.org systemctl reload nsd - - - name: cmdeploy test - run: CHATMAIL_DOMAIN2=ci-chatmail.testrun.org cmdeploy test --slow - - - name: cmdeploy dns - run: cmdeploy dns -v - diff --git a/cmdeploy/src/cmdeploy/cmdeploy.py b/cmdeploy/src/cmdeploy/cmdeploy.py index f5050271..b7fb69f6 100644 --- a/cmdeploy/src/cmdeploy/cmdeploy.py +++ b/cmdeploy/src/cmdeploy/cmdeploy.py @@ -260,10 +260,10 @@ def test_cmd(args, out): pytest_args = [ pytest_path, "cmdeploy/src/", - "-n4", "-rs", "-x", - "-v", + "-vv" if args.verbose > 1 else "-v", + "-s", "--durations=5", ] if args.slow: diff --git a/cmdeploy/src/cmdeploy/lxc/cli.py b/cmdeploy/src/cmdeploy/lxc/cli.py index eddd3b5e..1257f993 100644 --- a/cmdeploy/src/cmdeploy/lxc/cli.py +++ b/cmdeploy/src/cmdeploy/lxc/cli.py @@ -4,7 +4,7 @@ import os import time from ..util import get_git_hash, get_version_string, shell -from .incus import RELAY_IMAGE_ALIAS, Incus, RelayContainer +from .incus import Incus, RelayContainer RELAY_NAMES = ("test0", "test1") @@ -47,6 +47,7 @@ def _lxc_start_cmd(args, out): out.green("Ensuring DNS container (ns-localchat) ...") dns_ct = ix.get_dns_container() dns_ct.ensure() + dns_ct.ensure_cached_as_image() sub.print(f"DNS container IP: {dns_ct.ipv4}") names = args.names if args.names else RELAY_NAMES @@ -116,17 +117,39 @@ def _lxc_start_cmd(args, out): # Optionally run cmdeploy run + dns on each relay if args.run: + local_hash = get_git_hash() for ct in relays: + status = _deploy_status(ct, local_hash, ix) with out.section(f"cmdeploy run: {ct.sname} ({ct.domain})"): - ret = _run_cmdeploy("run", ct, ix, out, extra=["--skip-dns-check"]) - if ret: - out.red(f"Deploy to {ct.sname} failed (exit {ret})") - return ret + if "IN-SYNC" in status: + out.print(f"{ct.sname} is {status}, skipping") + else: + ret = _run_cmdeploy("run", ct, ix, out, extra=["--skip-dns-check"]) + if ret: + out.red(f"Deploy to {ct.sname} failed (exit {ret})") + return ret + # Cache a per-relay image after each successful deploy + # so the next run can launch directly from the deployed state. + with out.section(f"lxc-test: caching {ct.sname} image"): + ct.ensure_cached_as_image() - with out.section("loading DNS zones"): - for ct in relays: + # Restart mail services to flush stale DNS state. + # Cached container images boot with a resolv.conf + # pointing to the previous run's DNS IP; + # configure_dns() already restarted unbound, + # but postfix/dovecot may hold stale results + # from the window between boot and DNS fix. + for ct in relays: + out.print(f"Restarting mail services on {ct.name} ...") + ct.bash("systemctl restart postfix dovecot opendkim") + + for ct in relays: + with out.section(f"cmdeploy dns: {ct.sname} ({ct.domain})"): ret = _run_cmdeploy( - "dns", ct, ix, out, + "dns", + ct, + ix, + out, extra=["--zonefile", str(ct.zone)], ) if ret: @@ -134,7 +157,10 @@ def _lxc_start_cmd(args, out): return ret if ct.zone.exists(): dns_ct.set_dns_records(ct.zone.read_text()) - out.print(f"Restarting filtermail-incoming on {ct.name}") + # Restart filtermail so its in-process DNS cache + # does not hold stale negative DKIM responses + # from before the zones were loaded. + out.print(f"Restarting filtermail-incoming on {ct.name} ...") ct.bash("systemctl restart filtermail-incoming") @@ -209,71 +235,26 @@ def lxc_test_cmd(args, out): """ ix = Incus(out) t_total = time.time() - relay_names = list(RELAY_NAMES) - if args.one: - relay_names = relay_names[:1] + v_flag = " -" + "v" * out.verbosity if out.verbosity > 0 else "" - local_hash = get_git_hash() + ret = out.shell(f"cmdeploy lxc-start{v_flag} --run test0", cwd=str(ix.project_root)) + if ret: + return ret - # Per-relay: start, deploy, then snapshot the first relay as a - # reusable image so the second relay launches pre-deployed. - ipv4_only_flags = {RELAY_NAMES[0]: False, RELAY_NAMES[1]: True} - - for ct in map(ix.get_container, relay_names): - name = ct.sname - ipv4_only = ipv4_only_flags.get(name, False) - v_flag = " -" + "v" * out.verbosity if out.verbosity > 0 else "" - start_cmd = f"cmdeploy lxc-start{v_flag} {name}" - if ipv4_only: - start_cmd += " --ipv4-only" - with out.section(f"cmdeploy lxc-start: {name}"): - ret = out.shell(start_cmd, cwd=str(ix.project_root)) - if ret: - return ret - - status = _deploy_status(ct, local_hash, ix) - with out.section(f"cmdeploy run: {name}"): - if "IN-SYNC" in status: - out.print(f"{name} is {status}, skipping") - else: - ret = _run_cmdeploy("run", ct, ix, out, extra=["--skip-dns-check"]) - if ret: - out.red(f"Deploy to {name} failed (exit {ret})") - return ret - - # Snapshot the first relay so subsequent ones launch pre-deployed - if not ix.find_image([RELAY_IMAGE_ALIAS]): - with out.section("lxc-test: caching relay image"): - ct.publish_as_relay_image() - - for ct in map(ix.get_container, relay_names): - with out.section(f"cmdeploy dns: {ct.sname} ({ct.domain})"): - ret = _run_cmdeploy("dns", ct, ix, out, extra=["--zonefile", str(ct.zone)]) - if ret: - out.red(f"DNS for {ct.sname} failed (exit {ret})") - return ret - - with out.section(f"lxc-test: loading DNS zones {' & '.join(relay_names)}"): - dns_ct = ix.get_dns_container() - for ct in map(ix.get_container, relay_names): - if ct.zone.exists(): - zone_data = ct.zone.read_text() - out.print(f"Loading {ct.zone} into PowerDNS ...") - dns_ct.set_dns_records(zone_data) - - # Restart filtermail so its in-process DNS cache - # does not hold stale negative DKIM responses - # from before the zones were loaded. - for ct in map(ix.get_container, relay_names): - out.print(f"Restarting filtermail-incoming on {ct.name} ...") - ct.bash("systemctl restart filtermail-incoming") + if not args.one: + ret = out.shell( + f"cmdeploy lxc-start{v_flag} --run test1 --ipv4-only", + cwd=str(ix.project_root), + ) + if ret: + return ret with out.section("cmdeploy test"): - first = ix.get_container(relay_names[0]) + first = ix.get_container("test0") env = None - if len(relay_names) > 1: + if not args.one: env = os.environ.copy() - env["CHATMAIL_DOMAIN2"] = ix.get_container(relay_names[1]).domain + env["CHATMAIL_DOMAIN2"] = ix.get_container("test1").domain ret = _run_cmdeploy("test", first, ix, out, **({"env": env} if env else {})) if ret: out.red(f"Tests failed (exit {ret})") diff --git a/cmdeploy/src/cmdeploy/lxc/incus.py b/cmdeploy/src/cmdeploy/lxc/incus.py index 36700c1d..90c331a7 100644 --- a/cmdeploy/src/cmdeploy/lxc/incus.py +++ b/cmdeploy/src/cmdeploy/lxc/incus.py @@ -14,9 +14,9 @@ DOMAIN_SUFFIX = ".localchat" UPSTREAM_IMAGE = "images:debian/12" BASE_IMAGE_ALIAS = "localchat-base" BASE_SETUP_NAME = "localchat-base-setup" -RELAY_IMAGE_ALIAS = "localchat-relay" DNS_CONTAINER_NAME = "ns-localchat" +DNS_IMAGE_ALIAS = "localchat-ns" DNS_DOMAIN = "ns.localchat" @@ -184,7 +184,16 @@ class Incus: ) if result.returncode != 0: return None - return json.loads(result.stdout) + try: + return json.loads(result.stdout) + except json.JSONDecodeError as e: + msg = f"Incus JSON processing failed for {args!r}: {e!s}" + self.out.red(msg) + self.out.red(f"Captured stdout: {result.stdout!r}") + self.out.red(f"Captured stderr: {result.stderr!r}") + if check: + raise + return None def run_output(self, args, check=True): """Run an incus command and return its stripped stdout. @@ -207,8 +216,13 @@ class Incus: return None def delete_images(self): - """Delete the cached base and relay images.""" - for alias in (RELAY_IMAGE_ALIAS, BASE_IMAGE_ALIAS): + """Delete localchat-base and per-container images.""" + for alias in [ + BASE_IMAGE_ALIAS, + DNS_IMAGE_ALIAS, + "localchat-test0", + "localchat-test1", + ]: self.run(["image", "delete", alias], check=False) # ok if absent def list_managed(self): @@ -238,7 +252,7 @@ class Incus: def ensure_base_image(self): """Build and cache a base image with openssh and the SSH key. - The image is published as a local incus image with alias + The image is cached as a local incus image with alias 'localchat-base'. Subsequent container launches use this image instead of the upstream Debian 12, skipping the slow apt-get install step. @@ -338,7 +352,7 @@ class Container: def launch(self): """Launch from the best available image, return the alias used.""" - image = self.incus.find_image([RELAY_IMAGE_ALIAS, BASE_IMAGE_ALIAS]) + image = self.incus.find_image([BASE_IMAGE_ALIAS]) if not image: raise RuntimeError( f"No base image '{BASE_IMAGE_ALIAS}' found. " @@ -419,6 +433,18 @@ class Container: parts = line.split() return int(parts[2]), int(parts[1]) + def ensure_cached_as_image(self): + """Cache this container as a respective image.""" + alias = self.image_alias + if self.incus.find_image([alias]): + return + self.out.print(" Cleaning apt cache before caching image ...") + self.bash("apt-get clean") + self.out.print(f" Caching {self.name!r} as '{alias}' ...") + self.incus.run(["publish", self.name, f"--alias={alias}", "--force"]) + self.out.print(f" Image '{alias}' cached.") + self.wait_ready() + class RelayContainer(Container): """Container handle for a chatmail relay. @@ -434,12 +460,22 @@ class RelayContainer(Container): domain=f"_{name}{DOMAIN_SUFFIX}", ) self.sname = name + self.image_alias = f"localchat-{name}" self.ini = incus.lxconfigs_dir / f"chatmail-{name}.ini" self.zone = incus.lxconfigs_dir / f"{name}.zone" def launch(self): - """Launch (from a potentially cached image) and clear inherited chatmail-version.""" - image = super().launch() + """Launch from localchat-{sname} if cached, else localchat-base.""" + + candidates = [self.image_alias] + candidates.append(BASE_IMAGE_ALIAS) + image = self.incus.find_image(candidates) + assert image, f"No deployment base, candidates: {','.join(candidates)}" + self.out.print(f" Launching from '{image}' image ...") + cfg = [] + cfg += ("-c", f"{LABEL_KEY}=true") + cfg += ("-c", f"user.localchat-domain={self.domain}") + self.incus.run(["launch", image, self.name, *cfg]) self.bash("rm -f /etc/chatmail-version") return image @@ -474,22 +510,6 @@ class RelayContainer(Container): echo '{ip} {self.name} {self.domain}' >> /etc/hosts """) - def publish_as_relay_image(self): - """Publish this container as a reusable relay image. - - Stops the container, 'publishes' it as 'localchat-relay', then restarts it. - """ - if self.incus.find_image([RELAY_IMAGE_ALIAS]): - return - self.out.print( - f" Locally caching {self.name!r} as '{RELAY_IMAGE_ALIAS}' image ..." - ) - self.incus.run( - ["publish", self.name, f"--alias={RELAY_IMAGE_ALIAS}", "--force"] - ) - self.wait_ready() - self.out.print(f" Relay image '{RELAY_IMAGE_ALIAS}' ready.") - def deployed_version(self): """Read /etc/chatmail-version, or None if absent.""" return self.bash("cat /etc/chatmail-version", check=False) @@ -572,6 +592,21 @@ class DNSContainer(Container): def __init__(self, incus): super().__init__(incus, DNS_CONTAINER_NAME, domain=DNS_DOMAIN) + self.image_alias = DNS_IMAGE_ALIAS + + def launch(self): + """Launch from localchat-ns if cached, else localchat-base.""" + image = self.incus.find_image([DNS_IMAGE_ALIAS, BASE_IMAGE_ALIAS]) + if not image: + raise RuntimeError( + f"No base image '{BASE_IMAGE_ALIAS}' found. " + "Call ensure_base_image() before launching containers." + ) + self.out.print(f" Launching from '{image}' image ...") + cfg = [] + cfg += ("-c", f"{LABEL_KEY}=true") + cfg += ("-c", f"user.localchat-domain={self.domain}") + self.incus.run(["launch", image, self.name, *cfg]) def pdnsutil(self, *args, check=True): """Run ``pdnsutil `` inside the DNS container."""