import locale from pathlib import Path from whisper import Whisper, load_model from pyannote.audio import Pipeline import torch import static_ffmpeg import file_operations def prep() -> None: locale.getpreferredencoding = lambda: "UTF-8" # download and add ffmpeg to env static_ffmpeg.add_paths() def audiofile(url: str, input_path: Path) -> Path: file = file_operations.download_from_url(url, input_path) file_wav = file_operations.convert_to_wav(file, input_path) file.unlink() return file_wav def diarization(access_token: str | None) -> Pipeline: pipeline = Pipeline.from_pretrained( "pyannote/speaker-diarization", use_auth_token=access_token ) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") return pipeline.to(device) def whispermodel() -> Whisper: # LOAD MODEL INTO VRAM device = torch.device("cuda" if torch.cuda.is_available() else "cpu") return load_model("large", device=device)