import logging from pathlib import Path import runpod from runpod.serverless import os import loaders import file_operations import process logging.basicConfig(level=logging.DEBUG) input_path: Path = Path(os.environ.get("VERBANOTE_INPUT_PATH", "/in")) output_path: Path = Path(os.environ.get("VERBANOTE_OUTPUT_PATH", "/out")) access_token: str = os.environ.get("VERBANOTE_HF_TOKEN", "") loaders.prep() diarize_pipeline = loaders.diarization(access_token) whisper_model = loaders.whispermodel() def handler(job): input: dict = job["input"] url: str | None = input.get("url") lang: str | None = input.get("lang") word_timestamps: str | None = input.get("word_timestamps") if not url: return {"error": "no file link provided"} try: audiofile = loaders.audiofile(url, input_path=input_path) except Exception: return {"error": "audiofile import failed"} diarized = process.diarize(audiofile, diarize_pipeline, output_path) diarized_groups = process.save_diarized_audio_files( diarized, audiofile, output_path ) uploaded_diarization: str = file_operations.upload_to_oxo(file=diarized, expires=1) process.transcribe( model=whisper_model, diarized_groups=diarized_groups, files_path=output_path, lang=lang or "fr", word_timestamps=word_timestamps or True, ) transcription = process.output_txt(diarized_groups, output_path) return { "audiofile": str(audiofile), "diarization_url": uploaded_diarization, "diarization": diarized_groups, "transcription_text": transcription, } # speakers = { # # speaker, textboxcolor, speaker color # "SPEAKER_00": ("SPEAKER00", "white", "darkgreen"), # "SPEAKER_01": ("SPEAKER01", "white", "darkorange"), # "SPEAKER_02": ("SPEAKER02", "white", "darkred"), # "SPEAKER_03": ("SPEAKER03", "white", "darkblue"), # "SPEAKER_04": ("SPEAKER04", "white", "darkyellow"), # "SPEAKER_05": ("SPEAKER05", "white", "lightgreen"), # "SPEAKER_06": ("SPEAKER06", "white", "lightred"), # "SPEAKER_07": ("SPEAKER07", "white", "lightblue"), # } if __name__ == "__main__": runpod.serverless.start({"handler": handler})