diff --git a/verbanote/process.py b/verbanote/process.py index 5de8954..3020549 100644 --- a/verbanote/process.py +++ b/verbanote/process.py @@ -1,6 +1,7 @@ import os import re import json +from dataclasses import dataclass from pathlib import Path from pyannote.audio import Pipeline from pydub import AudioSegment @@ -9,6 +10,12 @@ from whisper import Whisper MILLISECONDS_TO_SPACE = 2000 +@dataclass +class TxtTranscription: + text: str + file: Path + + def diarize(audiofile: Path, pipeline: Pipeline, output_path: Path) -> Path: audiofile_prepended = _add_audio_silence(audiofile) @@ -44,7 +51,7 @@ def transcribe( # TODO clean up this mess -def output_txt(diarized_groups: list, transcription_path: Path) -> str: +def output_txt(diarized_groups: list, transcription_path: Path) -> TxtTranscription: txt = list("") gidx = -1 for g in diarized_groups: @@ -67,11 +74,10 @@ def output_txt(diarized_groups: list, transcription_path: Path) -> str: txt.append("\n") output = "".join(txt) - with open( - Path.joinpath(transcription_path, "capspeaker.txt"), "w", encoding="utf-8" - ) as file: + fname = Path.joinpath(transcription_path, "transcription_result.txt") + with open(fname, "w", encoding="utf-8") as file: file.write(output) - return output + return TxtTranscription(text=output, file=fname) def save_diarized_audio_files(