GPU fixes

This commit is contained in:
2025-10-05 14:18:54 +02:00
parent c89964694f
commit c5b92ad960
4 changed files with 34 additions and 11 deletions

View File

@@ -54,6 +54,11 @@ WHISPER_CPU_THREADS=4
# 4) Use GPU-friendly precision and device
# WHISPER_DEVICE=cuda
# WHISPER_PRECISION=float16
# 5) (Build-time) use an NVIDIA CUDA runtime base image for the app containers.
# Set an image tag that exists for your architecture (most CUDA images are amd64):
# GPU_BASE_IMAGE=nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04
#
# If you are on ARM64 without discrete NVIDIA GPU, leave GPU_BASE_IMAGE unset and run CPU-only.
# Docker volumes paths
LIBRARY_HOST_DIR=/mnt/nfs/library

View File

@@ -27,11 +27,12 @@ To run Whisper on NVIDIA GPU:
- `NVIDIA_VISIBLE_DEVICES=all` (or a specific GPU index)
- `WHISPER_DEVICE=cuda` (or `auto`)
- `WHISPER_PRECISION=float16` (recommended for GPU)
- Rebuild and start:
- `docker compose up -d --build`
- Optional: set a GPU base image for builds (amd64 typical):
- `GPU_BASE_IMAGE=nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04`
- Rebuild and start: `docker compose up -d --build`
- Check logs for `device='cuda'` when the transcribe worker loads the model.
This repo's app image is based on `nvidia/cuda:12.4.1-cudnn9-runtime-ubuntu22.04`, which includes the CUDA and cuDNN user-space libraries that faster-whisper requires. On non-GPU hosts it still runs on CPU.
By default we build from `python:3.11-slim`. You can override the base image at build time via `GPU_BASE_IMAGE` to a CUDA runtime tag that exists for your architecture. If you don't specify it or you're on a non-GPU host, the containers run on CPU.
## Components Overview

View File

@@ -1,9 +1,11 @@
## GPU-ready base image with CUDA 12 + cuDNN 9 runtime
# If you don't have an NVIDIA GPU or the NVIDIA Container Toolkit, this image still runs on CPU.
# For smaller CPU-only images, you can switch back to python:3.11-slim.
# Use a widely available CUDA + cuDNN runtime tag. If you prefer newer CUDA,
# adjust this to a tag that exists on Docker Hub.
FROM nvidia/cuda:12.3.2-cudnn8-runtime-ubuntu22.04
# Base image is configurable so you can choose CPU-only or CUDA+cuDNN runtime.
# Default to Python slim for maximum compatibility; override via build-arg
# BASE_IMAGE (or docker-compose build args) to use an NVIDIA CUDA runtime.
ARG BASE_IMAGE=python:3.11-slim
FROM ${BASE_IMAGE}
# Keep python fast/quiet and pip lean
ENV PYTHONDONTWRITEBYTECODE=1 \

View File

@@ -1,6 +1,9 @@
services:
podx-web:
build: ./app
build:
context: ./app
args:
BASE_IMAGE: ${GPU_BASE_IMAGE:-python:3.11-slim}
container_name: podx-web
env_file: [.env]
environment:
@@ -56,7 +59,10 @@ services:
# Main worker: handles downloads, indexing, RSS, OWUI, etc. (no heavy Whisper)
podx-worker:
build: ./app
build:
context: ./app
args:
BASE_IMAGE: ${GPU_BASE_IMAGE:-python:3.11-slim}
container_name: podx-worker
command: ["rq", "worker", "-u", "redis://redis:6379/0", "default"]
env_file: [.env]
@@ -121,7 +127,10 @@ services:
# Transcribe-only worker: listens to the "transcribe" queue and runs Whisper jobs
podx-worker-transcribe:
build: ./app
build:
context: ./app
args:
BASE_IMAGE: ${GPU_BASE_IMAGE:-python:3.11-slim}
container_name: podx-worker-transcribe
command: ["rq", "worker", "-u", "redis://redis:6379/0", "transcribe"]
env_file: [.env]
@@ -257,7 +266,10 @@ services:
# Scanner: watches /library and enqueues jobs (heavy jobs go to "transcribe" queue)
podx-scanner:
build: ./app
build:
context: ./app
args:
BASE_IMAGE: ${GPU_BASE_IMAGE:-python:3.11-slim}
container_name: podx-scanner
command: ["python", "scanner.py"]
env_file: [.env]
@@ -282,7 +294,10 @@ services:
restart: unless-stopped
podx-rss:
build: ./app
build:
context: ./app
args:
BASE_IMAGE: ${GPU_BASE_IMAGE:-python:3.11-slim}
container_name: podx-rss
command: ["python", "rss_ingest.py"]
env_file: [.env]