Fix speaker rendered throughout paragraph

Each sentence or 'segment' in whisper would be preceded by a [speaker]
notation. This commit fixes that to only include the speaker in front
of a larger group (since a new speaker would start a new diarization
group this will always work).
This commit is contained in:
Marty Oehme 2023-08-23 15:13:04 +02:00
parent 13b5f22953
commit a79f825f66
Signed by: Marty
GPG key ID: EDBF2ED917B2EF6A

View file

@ -73,9 +73,10 @@ def output_txt(diarized_groups: list, transcription_path: Path) -> TxtTranscript
if captions: if captions:
speaker = g[0].split()[-1] speaker = g[0].split()[-1]
txt.append(f"[{speaker}] ")
for c in captions: for c in captions:
txt.append(f"[{speaker}] {c['text']}\n") txt.append(f"{c['text']}")
txt.append("\n") txt.append("\n\n")
output = "".join(txt) output = "".join(txt)
fname = Path.joinpath(transcription_path, "transcription_result.txt") fname = Path.joinpath(transcription_path, "transcription_result.txt")