Export TxtTranscription from txt output function

This commit is contained in:
Marty Oehme 2023-08-23 15:09:53 +02:00
parent f17639a637
commit 6cccf00df4
Signed by: Marty
GPG key ID: EDBF2ED917B2EF6A

View file

@ -1,6 +1,7 @@
import os import os
import re import re
import json import json
from dataclasses import dataclass
from pathlib import Path from pathlib import Path
from pyannote.audio import Pipeline from pyannote.audio import Pipeline
from pydub import AudioSegment from pydub import AudioSegment
@ -9,6 +10,12 @@ from whisper import Whisper
MILLISECONDS_TO_SPACE = 2000 MILLISECONDS_TO_SPACE = 2000
@dataclass
class TxtTranscription:
text: str
file: Path
def diarize(audiofile: Path, pipeline: Pipeline, output_path: Path) -> Path: def diarize(audiofile: Path, pipeline: Pipeline, output_path: Path) -> Path:
audiofile_prepended = _add_audio_silence(audiofile) audiofile_prepended = _add_audio_silence(audiofile)
@ -44,7 +51,7 @@ def transcribe(
# TODO clean up this mess # TODO clean up this mess
def output_txt(diarized_groups: list, transcription_path: Path) -> str: def output_txt(diarized_groups: list, transcription_path: Path) -> TxtTranscription:
txt = list("") txt = list("")
gidx = -1 gidx = -1
for g in diarized_groups: for g in diarized_groups:
@ -67,11 +74,10 @@ def output_txt(diarized_groups: list, transcription_path: Path) -> str:
txt.append("\n") txt.append("\n")
output = "".join(txt) output = "".join(txt)
with open( fname = Path.joinpath(transcription_path, "transcription_result.txt")
Path.joinpath(transcription_path, "capspeaker.txt"), "w", encoding="utf-8" with open(fname, "w", encoding="utf-8") as file:
) as file:
file.write(output) file.write(output)
return output return TxtTranscription(text=output, file=fname)
def save_diarized_audio_files( def save_diarized_audio_files(