Export TxtTranscription from txt output function
This commit is contained in:
parent
f17639a637
commit
6cccf00df4
1 changed files with 11 additions and 5 deletions
|
@ -1,6 +1,7 @@
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
|
from dataclasses import dataclass
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from pyannote.audio import Pipeline
|
from pyannote.audio import Pipeline
|
||||||
from pydub import AudioSegment
|
from pydub import AudioSegment
|
||||||
|
@ -9,6 +10,12 @@ from whisper import Whisper
|
||||||
MILLISECONDS_TO_SPACE = 2000
|
MILLISECONDS_TO_SPACE = 2000
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class TxtTranscription:
|
||||||
|
text: str
|
||||||
|
file: Path
|
||||||
|
|
||||||
|
|
||||||
def diarize(audiofile: Path, pipeline: Pipeline, output_path: Path) -> Path:
|
def diarize(audiofile: Path, pipeline: Pipeline, output_path: Path) -> Path:
|
||||||
audiofile_prepended = _add_audio_silence(audiofile)
|
audiofile_prepended = _add_audio_silence(audiofile)
|
||||||
|
|
||||||
|
@ -44,7 +51,7 @@ def transcribe(
|
||||||
|
|
||||||
|
|
||||||
# TODO clean up this mess
|
# TODO clean up this mess
|
||||||
def output_txt(diarized_groups: list, transcription_path: Path) -> str:
|
def output_txt(diarized_groups: list, transcription_path: Path) -> TxtTranscription:
|
||||||
txt = list("")
|
txt = list("")
|
||||||
gidx = -1
|
gidx = -1
|
||||||
for g in diarized_groups:
|
for g in diarized_groups:
|
||||||
|
@ -67,11 +74,10 @@ def output_txt(diarized_groups: list, transcription_path: Path) -> str:
|
||||||
txt.append("\n")
|
txt.append("\n")
|
||||||
|
|
||||||
output = "".join(txt)
|
output = "".join(txt)
|
||||||
with open(
|
fname = Path.joinpath(transcription_path, "transcription_result.txt")
|
||||||
Path.joinpath(transcription_path, "capspeaker.txt"), "w", encoding="utf-8"
|
with open(fname, "w", encoding="utf-8") as file:
|
||||||
) as file:
|
|
||||||
file.write(output)
|
file.write(output)
|
||||||
return output
|
return TxtTranscription(text=output, file=fname)
|
||||||
|
|
||||||
|
|
||||||
def save_diarized_audio_files(
|
def save_diarized_audio_files(
|
||||||
|
|
Loading…
Reference in a new issue