verbanote-server/verbanote/rp_handler.py

import logging
from pathlib import Path
import runpod
from runpod.serverless import os
import loaders
import file_operations

import process

logging.basicConfig(level=logging.DEBUG)

input_path: Path = Path(os.environ.get("VERBANOTE_INPUT_PATH", "/in"))
output_path: Path = Path(os.environ.get("VERBANOTE_OUTPUT_PATH", "/out"))

access_token: str = os.environ.get("VERBANOTE_HF_TOKEN", "")

loaders.prep()
diarize_pipeline = loaders.diarization(access_token)
# TODO download model into Docker image beforehand
whisper_model = loaders.whispermodel()


def handler(job):
    input: dict = job["input"]
    url: str | None = input.get("url")
    lang: str | None = input.get("lang")
    word_timestamps: str | None = input.get("word_timestamps")

    if not url:
        return {"error": "no file link provided"}

    try:
        audiofile = loaders.audiofile(url, input_path=input_path)
    except Exception:
        return {"error": "audiofile import failed"}

    diarized = process.diarize(audiofile, diarize_pipeline, output_path)
    diarized_groups = process.save_diarized_audio_files(
        diarized, audiofile, output_path
    )
    uploaded_diarization: str = file_operations.upload_to_oxo(file=diarized, expires=1)
    process.transcribe(
        model=whisper_model,
        diarized_groups=diarized_groups,
        files_path=output_path,
        lang=lang or "en",
        word_timestamps=word_timestamps or True,
    )
    transcription: process.TxtTranscription = process.output_txt(
        diarized_groups, output_path
    )
    uploaded_transcription: str = file_operations.upload_to_oxo(
        file=transcription.file, expires=1
    )

    return {
        "audiofile": str(audiofile),
        "diarization_url": uploaded_diarization,
        "diarization": diarized_groups,
        "transcription_url": uploaded_transcription,
        "transcription_text": transcription.text,
    }


# speakers = {
#     # speaker, textboxcolor, speaker color
#     "SPEAKER_00": ("SPEAKER00", "white", "darkgreen"),
#     "SPEAKER_01": ("SPEAKER01", "white", "darkorange"),
#     "SPEAKER_02": ("SPEAKER02", "white", "darkred"),
#     "SPEAKER_03": ("SPEAKER03", "white", "darkblue"),
#     "SPEAKER_04": ("SPEAKER04", "white", "darkyellow"),
#     "SPEAKER_05": ("SPEAKER05", "white", "lightgreen"),
#     "SPEAKER_06": ("SPEAKER06", "white", "lightred"),
#     "SPEAKER_07": ("SPEAKER07", "white", "lightblue"),
# }


if __name__ == "__main__":
    runpod.serverless.start({"handler": handler})