verbanote-server/verbanote/loaders.py

35 lines
985 B
Python
Raw Normal View History

import locale
from pathlib import Path
2023-08-23 11:22:55 +00:00
from whisper import Whisper, load_model
2023-08-22 12:27:52 +00:00
from pyannote.audio import Pipeline
import torch
import static_ffmpeg
import file_operations
2023-08-23 11:22:55 +00:00
def prep() -> None:
locale.getpreferredencoding = lambda: "UTF-8"
# download and add ffmpeg to env
static_ffmpeg.add_paths()
2023-08-20 12:29:36 +00:00
def audiofile(url: str, input_path: Path) -> Path:
file = file_operations.download_from_url(url, input_path)
file_wav = file_operations.convert_to_wav(file, input_path)
file.unlink()
return file_wav
2023-08-22 12:27:52 +00:00
def diarization(access_token: str | None) -> Pipeline:
pipeline = Pipeline.from_pretrained(
"pyannote/speaker-diarization", use_auth_token=access_token
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
return pipeline.to(device)
2023-08-23 11:22:55 +00:00
def whispermodel() -> Whisper:
# LOAD MODEL INTO VRAM
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
return load_model("large", device=device)