Export TxtTranscription from txt output function

This commit is contained in:
Marty Oehme 2023-08-23 15:09:53 +02:00
parent f17639a637
commit 6cccf00df4
Signed by: Marty
GPG Key ID: EDBF2ED917B2EF6A
1 changed files with 11 additions and 5 deletions

View File

@ -1,6 +1,7 @@
import os
import re
import json
from dataclasses import dataclass
from pathlib import Path
from pyannote.audio import Pipeline
from pydub import AudioSegment
@ -9,6 +10,12 @@ from whisper import Whisper
MILLISECONDS_TO_SPACE = 2000
@dataclass
class TxtTranscription:
text: str
file: Path
def diarize(audiofile: Path, pipeline: Pipeline, output_path: Path) -> Path:
audiofile_prepended = _add_audio_silence(audiofile)
@ -44,7 +51,7 @@ def transcribe(
# TODO clean up this mess
def output_txt(diarized_groups: list, transcription_path: Path) -> str:
def output_txt(diarized_groups: list, transcription_path: Path) -> TxtTranscription:
txt = list("")
gidx = -1
for g in diarized_groups:
@ -67,11 +74,10 @@ def output_txt(diarized_groups: list, transcription_path: Path) -> str:
txt.append("\n")
output = "".join(txt)
with open(
Path.joinpath(transcription_path, "capspeaker.txt"), "w", encoding="utf-8"
) as file:
fname = Path.joinpath(transcription_path, "transcription_result.txt")
with open(fname, "w", encoding="utf-8") as file:
file.write(output)
return output
return TxtTranscription(text=output, file=fname)
def save_diarized_audio_files(