GPU fixes

2025-10-05 14:18:54 +02:00
parent c89964694f
commit c5b92ad960
4 changed files with 34 additions and 11 deletions
--- a/.env.example
+++ b/.env.example
@@ -54,6 +54,11 @@ WHISPER_CPU_THREADS=4
 # 4) Use GPU-friendly precision and device
 # WHISPER_DEVICE=cuda
 # WHISPER_PRECISION=float16
+# 5) (Build-time) use an NVIDIA CUDA runtime base image for the app containers.
+# Set an image tag that exists for your architecture (most CUDA images are amd64):
+# GPU_BASE_IMAGE=nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04
+#
+# If you are on ARM64 without discrete NVIDIA GPU, leave GPU_BASE_IMAGE unset and run CPU-only.

 # Docker volumes paths
 LIBRARY_HOST_DIR=/mnt/nfs/library
--- a/README.md
+++ b/README.md
@@ -27,11 +27,12 @@ To run Whisper on NVIDIA GPU:
  - `NVIDIA_VISIBLE_DEVICES=all` (or a specific GPU index)
  - `WHISPER_DEVICE=cuda` (or `auto`)
  - `WHISPER_PRECISION=float16` (recommended for GPU)
- Rebuild and start:
-  - `docker compose up -d --build`
+  - Optional: set a GPU base image for builds (amd64 typical):
+    - `GPU_BASE_IMAGE=nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04`
+- Rebuild and start: `docker compose up -d --build`
 - Check logs for `device='cuda'` when the transcribe worker loads the model.

-This repo's app image is based on `nvidia/cuda:12.4.1-cudnn9-runtime-ubuntu22.04`, which includes the CUDA and cuDNN user-space libraries that faster-whisper requires. On non-GPU hosts it still runs on CPU.
+By default we build from `python:3.11-slim`. You can override the base image at build time via `GPU_BASE_IMAGE` to a CUDA runtime tag that exists for your architecture. If you don't specify it or you're on a non-GPU host, the containers run on CPU.

 ## Components Overview

--- a/app/Dockerfile
+++ b/app/Dockerfile
@@ -1,9 +1,11 @@
 ## GPU-ready base image with CUDA 12 + cuDNN 9 runtime
 # If you don't have an NVIDIA GPU or the NVIDIA Container Toolkit, this image still runs on CPU.
 # For smaller CPU-only images, you can switch back to python:3.11-slim.
-# Use a widely available CUDA + cuDNN runtime tag. If you prefer newer CUDA,
-# adjust this to a tag that exists on Docker Hub.
-FROM nvidia/cuda:12.3.2-cudnn8-runtime-ubuntu22.04
+# Base image is configurable so you can choose CPU-only or CUDA+cuDNN runtime.
+# Default to Python slim for maximum compatibility; override via build-arg
+# BASE_IMAGE (or docker-compose build args) to use an NVIDIA CUDA runtime.
+ARG BASE_IMAGE=python:3.11-slim
+FROM ${BASE_IMAGE}

 # Keep python fast/quiet and pip lean
 ENV PYTHONDONTWRITEBYTECODE=1 \
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,6 +1,9 @@
 services:
  podx-web:
-    build: ./app
+    build:
+      context: ./app
+      args:
+        BASE_IMAGE: ${GPU_BASE_IMAGE:-python:3.11-slim}
    container_name: podx-web
    env_file: [.env]
    environment:
@@ -56,7 +59,10 @@ services:

  # Main worker: handles downloads, indexing, RSS, OWUI, etc. (no heavy Whisper)
  podx-worker:
-    build: ./app
+    build:
+      context: ./app
+      args:
+        BASE_IMAGE: ${GPU_BASE_IMAGE:-python:3.11-slim}
    container_name: podx-worker
    command: ["rq", "worker", "-u", "redis://redis:6379/0", "default"]
    env_file: [.env]
@@ -121,7 +127,10 @@ services:

  # Transcribe-only worker: listens to the "transcribe" queue and runs Whisper jobs
  podx-worker-transcribe:
-    build: ./app
+    build:
+      context: ./app
+      args:
+        BASE_IMAGE: ${GPU_BASE_IMAGE:-python:3.11-slim}
    container_name: podx-worker-transcribe
    command: ["rq", "worker", "-u", "redis://redis:6379/0", "transcribe"]
    env_file: [.env]
@@ -257,7 +266,10 @@ services:

  # Scanner: watches /library and enqueues jobs (heavy jobs go to "transcribe" queue)
  podx-scanner:
-    build: ./app
+    build:
+      context: ./app
+      args:
+        BASE_IMAGE: ${GPU_BASE_IMAGE:-python:3.11-slim}
    container_name: podx-scanner
    command: ["python", "scanner.py"]
    env_file: [.env]
@@ -282,7 +294,10 @@ services:
    restart: unless-stopped

  podx-rss:
-    build: ./app
+    build:
+      context: ./app
+      args:
+        BASE_IMAGE: ${GPU_BASE_IMAGE:-python:3.11-slim}
    container_name: podx-rss
    command: ["python", "rss_ingest.py"]
    env_file: [.env]