verbanote-server/verbanote/loaders.py
Marty Oehme 7e91b7a1a2
Create simple loaders infrastructure
Loaders for models, necessary ffmpeg binaries and input files.
2023-08-20 13:29:13 +02:00

32 lines
840 B
Python

import locale
from pathlib import Path
from whisper import Whisper
from pyannote.audio import Pipeline
import torch
import static_ffmpeg
import gdown
def prep() -> None:
locale.getpreferredencoding = lambda: "UTF-8"
# download and add ffmpeg to env
static_ffmpeg.add_paths()
def audiofile(drive_url: str, path: str) -> Path | None:
if not drive_url:
return None
fn = Path.joinpath(Path(path), "interview")
gdown.download(drive_url, str(fn))
return fn
def diarization(access_token: str | None) -> Pipeline:
return Pipeline.from_pretrained(
"pyannote/speaker-diarization", use_auth_token=access_token
)
def whisper() -> Whisper:
# LOAD MODEL INTO VRAM
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
return whisper.load_model("large", device=device)