verbanote-server/verbanote/rp_handler.py

from pathlib import Path
import runpod
from runpod.serverless import os
import loaders
import process


output_path = os.environ.get("VERBANOTE_OUTPUT_PATH", "/transcriptions")
output_path = Path(output_path)
input_path = os.environ.get("VERBANOTE_INPUT_PATH", "/audiofiles")
input_path = Path(input_path)

access_token = os.environ.get("VERBANOTE_HF_TOKEN")

loaders.prep()
diarize_pipeline = loaders.diarization(access_token)
whisper_model = loaders.whisper()


def handler(job):
    input = job["input"]
    audiofile = loaders.audiofile(input.get("file"), path=input_path)
    if not audiofile:
        return {"error": "missing audio file location"}

    diarized = process.diarize(audiofile, diarize_pipeline, output_path)
    diarized_groups = process.save_diarized_audio_files(
        diarized, audiofile, output_path
    )
    process.transcribe(
        model=whisper_model, diarized_groups=diarized_groups, output_path=output_path
    )

    return {
        "speaker_timings": "s3-address-to-speakers",
        "transcription_text": "s3-address-to-transcription",
        "transcription_page": "web-address-to-deployment",
    }

# speakers = {
#     # speaker, textboxcolor, speaker color
#     "SPEAKER_00": ("SPEAKER00", "white", "darkgreen"),
#     "SPEAKER_01": ("SPEAKER01", "white", "darkorange"),
#     "SPEAKER_02": ("SPEAKER02", "white", "darkred"),
#     "SPEAKER_03": ("SPEAKER03", "white", "darkblue"),
#     "SPEAKER_04": ("SPEAKER04", "white", "darkyellow"),
#     "SPEAKER_05": ("SPEAKER05", "white", "lightgreen"),
#     "SPEAKER_06": ("SPEAKER06", "white", "lightred"),
#     "SPEAKER_07": ("SPEAKER07", "white", "lightblue"),
# }


if __name__ == "__main__":
    runpod.serverless.start({"handler": handler})