GPU fixes

This commit is contained in:
2025-10-05 14:18:54 +02:00
parent c89964694f
commit c5b92ad960
4 changed files with 34 additions and 11 deletions

View File

@@ -54,6 +54,11 @@ WHISPER_CPU_THREADS=4
# 4) Use GPU-friendly precision and device # 4) Use GPU-friendly precision and device
# WHISPER_DEVICE=cuda # WHISPER_DEVICE=cuda
# WHISPER_PRECISION=float16 # WHISPER_PRECISION=float16
# 5) (Build-time) use an NVIDIA CUDA runtime base image for the app containers.
# Set an image tag that exists for your architecture (most CUDA images are amd64):
# GPU_BASE_IMAGE=nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04
#
# If you are on ARM64 without discrete NVIDIA GPU, leave GPU_BASE_IMAGE unset and run CPU-only.
# Docker volumes paths # Docker volumes paths
LIBRARY_HOST_DIR=/mnt/nfs/library LIBRARY_HOST_DIR=/mnt/nfs/library

View File

@@ -27,11 +27,12 @@ To run Whisper on NVIDIA GPU:
- `NVIDIA_VISIBLE_DEVICES=all` (or a specific GPU index) - `NVIDIA_VISIBLE_DEVICES=all` (or a specific GPU index)
- `WHISPER_DEVICE=cuda` (or `auto`) - `WHISPER_DEVICE=cuda` (or `auto`)
- `WHISPER_PRECISION=float16` (recommended for GPU) - `WHISPER_PRECISION=float16` (recommended for GPU)
- Rebuild and start: - Optional: set a GPU base image for builds (amd64 typical):
- `docker compose up -d --build` - `GPU_BASE_IMAGE=nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04`
- Rebuild and start: `docker compose up -d --build`
- Check logs for `device='cuda'` when the transcribe worker loads the model. - Check logs for `device='cuda'` when the transcribe worker loads the model.
This repo's app image is based on `nvidia/cuda:12.4.1-cudnn9-runtime-ubuntu22.04`, which includes the CUDA and cuDNN user-space libraries that faster-whisper requires. On non-GPU hosts it still runs on CPU. By default we build from `python:3.11-slim`. You can override the base image at build time via `GPU_BASE_IMAGE` to a CUDA runtime tag that exists for your architecture. If you don't specify it or you're on a non-GPU host, the containers run on CPU.
## Components Overview ## Components Overview

View File

@@ -1,9 +1,11 @@
## GPU-ready base image with CUDA 12 + cuDNN 9 runtime ## GPU-ready base image with CUDA 12 + cuDNN 9 runtime
# If you don't have an NVIDIA GPU or the NVIDIA Container Toolkit, this image still runs on CPU. # If you don't have an NVIDIA GPU or the NVIDIA Container Toolkit, this image still runs on CPU.
# For smaller CPU-only images, you can switch back to python:3.11-slim. # For smaller CPU-only images, you can switch back to python:3.11-slim.
# Use a widely available CUDA + cuDNN runtime tag. If you prefer newer CUDA, # Base image is configurable so you can choose CPU-only or CUDA+cuDNN runtime.
# adjust this to a tag that exists on Docker Hub. # Default to Python slim for maximum compatibility; override via build-arg
FROM nvidia/cuda:12.3.2-cudnn8-runtime-ubuntu22.04 # BASE_IMAGE (or docker-compose build args) to use an NVIDIA CUDA runtime.
ARG BASE_IMAGE=python:3.11-slim
FROM ${BASE_IMAGE}
# Keep python fast/quiet and pip lean # Keep python fast/quiet and pip lean
ENV PYTHONDONTWRITEBYTECODE=1 \ ENV PYTHONDONTWRITEBYTECODE=1 \

View File

@@ -1,6 +1,9 @@
services: services:
podx-web: podx-web:
build: ./app build:
context: ./app
args:
BASE_IMAGE: ${GPU_BASE_IMAGE:-python:3.11-slim}
container_name: podx-web container_name: podx-web
env_file: [.env] env_file: [.env]
environment: environment:
@@ -56,7 +59,10 @@ services:
# Main worker: handles downloads, indexing, RSS, OWUI, etc. (no heavy Whisper) # Main worker: handles downloads, indexing, RSS, OWUI, etc. (no heavy Whisper)
podx-worker: podx-worker:
build: ./app build:
context: ./app
args:
BASE_IMAGE: ${GPU_BASE_IMAGE:-python:3.11-slim}
container_name: podx-worker container_name: podx-worker
command: ["rq", "worker", "-u", "redis://redis:6379/0", "default"] command: ["rq", "worker", "-u", "redis://redis:6379/0", "default"]
env_file: [.env] env_file: [.env]
@@ -121,7 +127,10 @@ services:
# Transcribe-only worker: listens to the "transcribe" queue and runs Whisper jobs # Transcribe-only worker: listens to the "transcribe" queue and runs Whisper jobs
podx-worker-transcribe: podx-worker-transcribe:
build: ./app build:
context: ./app
args:
BASE_IMAGE: ${GPU_BASE_IMAGE:-python:3.11-slim}
container_name: podx-worker-transcribe container_name: podx-worker-transcribe
command: ["rq", "worker", "-u", "redis://redis:6379/0", "transcribe"] command: ["rq", "worker", "-u", "redis://redis:6379/0", "transcribe"]
env_file: [.env] env_file: [.env]
@@ -257,7 +266,10 @@ services:
# Scanner: watches /library and enqueues jobs (heavy jobs go to "transcribe" queue) # Scanner: watches /library and enqueues jobs (heavy jobs go to "transcribe" queue)
podx-scanner: podx-scanner:
build: ./app build:
context: ./app
args:
BASE_IMAGE: ${GPU_BASE_IMAGE:-python:3.11-slim}
container_name: podx-scanner container_name: podx-scanner
command: ["python", "scanner.py"] command: ["python", "scanner.py"]
env_file: [.env] env_file: [.env]
@@ -282,7 +294,10 @@ services:
restart: unless-stopped restart: unless-stopped
podx-rss: podx-rss:
build: ./app build:
context: ./app
args:
BASE_IMAGE: ${GPU_BASE_IMAGE:-python:3.11-slim}
container_name: podx-rss container_name: podx-rss
command: ["python", "rss_ingest.py"] command: ["python", "rss_ingest.py"]
env_file: [.env] env_file: [.env]