31 lines
840 B
Python
31 lines
840 B
Python
import locale
|
|
from pathlib import Path
|
|
from whisper import Whisper
|
|
from pyannote.audio import Pipeline
|
|
import torch
|
|
import static_ffmpeg
|
|
import gdown
|
|
|
|
|
|
def prep() -> None:
|
|
locale.getpreferredencoding = lambda: "UTF-8"
|
|
# download and add ffmpeg to env
|
|
static_ffmpeg.add_paths()
|
|
|
|
def audiofile(drive_url: str, path: str) -> Path | None:
|
|
if not drive_url:
|
|
return None
|
|
fn = Path.joinpath(Path(path), "interview")
|
|
gdown.download(drive_url, str(fn))
|
|
return fn
|
|
|
|
def diarization(access_token: str | None) -> Pipeline:
|
|
return Pipeline.from_pretrained(
|
|
"pyannote/speaker-diarization", use_auth_token=access_token
|
|
)
|
|
|
|
|
|
def whisper() -> Whisper:
|
|
# LOAD MODEL INTO VRAM
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
return whisper.load_model("large", device=device)
|