Export TxtTranscription from txt output function
This commit is contained in:
parent
f17639a637
commit
6cccf00df4
1 changed files with 11 additions and 5 deletions
|
@ -1,6 +1,7 @@
|
|||
import os
|
||||
import re
|
||||
import json
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from pyannote.audio import Pipeline
|
||||
from pydub import AudioSegment
|
||||
|
@ -9,6 +10,12 @@ from whisper import Whisper
|
|||
MILLISECONDS_TO_SPACE = 2000
|
||||
|
||||
|
||||
@dataclass
|
||||
class TxtTranscription:
|
||||
text: str
|
||||
file: Path
|
||||
|
||||
|
||||
def diarize(audiofile: Path, pipeline: Pipeline, output_path: Path) -> Path:
|
||||
audiofile_prepended = _add_audio_silence(audiofile)
|
||||
|
||||
|
@ -44,7 +51,7 @@ def transcribe(
|
|||
|
||||
|
||||
# TODO clean up this mess
|
||||
def output_txt(diarized_groups: list, transcription_path: Path) -> str:
|
||||
def output_txt(diarized_groups: list, transcription_path: Path) -> TxtTranscription:
|
||||
txt = list("")
|
||||
gidx = -1
|
||||
for g in diarized_groups:
|
||||
|
@ -67,11 +74,10 @@ def output_txt(diarized_groups: list, transcription_path: Path) -> str:
|
|||
txt.append("\n")
|
||||
|
||||
output = "".join(txt)
|
||||
with open(
|
||||
Path.joinpath(transcription_path, "capspeaker.txt"), "w", encoding="utf-8"
|
||||
) as file:
|
||||
fname = Path.joinpath(transcription_path, "transcription_result.txt")
|
||||
with open(fname, "w", encoding="utf-8") as file:
|
||||
file.write(output)
|
||||
return output
|
||||
return TxtTranscription(text=output, file=fname)
|
||||
|
||||
|
||||
def save_diarized_audio_files(
|
||||
|
|
Loading…
Reference in a new issue