import locale from pathlib import Path from whisper import Whisper from pyannote.audio import Pipeline import torch import static_ffmpeg import gdown def prep() -> None: locale.getpreferredencoding = lambda: "UTF-8" # download and add ffmpeg to env static_ffmpeg.add_paths() def audiofile(drive_url: str, path: str) -> Path | None: if not drive_url: return None fn = Path.joinpath(Path(path), "interview") gdown.download(drive_url, str(fn)) return fn def diarization(access_token: str | None) -> Pipeline: return Pipeline.from_pretrained( "pyannote/speaker-diarization", use_auth_token=access_token ) def whisper() -> Whisper: # LOAD MODEL INTO VRAM device = torch.device("cuda" if torch.cuda.is_available() else "cpu") return whisper.load_model("large", device=device)