From 13b5f229533025bbe5d3ee22352c5879a70d2cf2 Mon Sep 17 00:00:00 2001 From: Marty Oehme Date: Wed, 23 Aug 2023 15:11:44 +0200 Subject: [PATCH] Fix Path classes wrongly used in string concats --- verbanote/process.py | 26 ++++++++++++++------------ verbanote/rp_handler.py | 10 ++++++++-- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/verbanote/process.py b/verbanote/process.py index 8058609..fa045a4 100644 --- a/verbanote/process.py +++ b/verbanote/process.py @@ -41,12 +41,11 @@ def transcribe( word_timestamps: bool = True, ) -> None: for i in range(len(diarized_groups)): - f = {Path.joinpath(output_path, str(i))} - audio_f = f"{f}.wav" - json_f = f"{f}.json" + audio_f = Path.joinpath(files_path, f"{str(i)}.wav") + json_f = Path.joinpath(files_path, f"{str(i)}.json") logging.info(f"Starting transcription of {str(audio_f)}...") result = model.transcribe( - audio=audio_f, language=lang, word_timestamps=word_timestamps + audio=str(audio_f), language=lang, word_timestamps=word_timestamps ) with open(json_f, "w") as outfile: json.dump(result, outfile, indent=4) @@ -66,7 +65,8 @@ def output_txt(diarized_groups: list, transcription_path: Path) -> TxtTranscript gidx += 1 - with open(f"{Path.joinpath(transcription_path, str(gidx))}.json") as f: + fname = Path.joinpath(transcription_path, f"{str(gidx)}.json") + with open(fname) as f: captions = json.load(f)["segments"] logging.info(f"Loaded {fname} for transcription...") @@ -81,6 +81,7 @@ def output_txt(diarized_groups: list, transcription_path: Path) -> TxtTranscript fname = Path.joinpath(transcription_path, "transcription_result.txt") with open(fname, "w", encoding="utf-8") as file: file.write(output) + logging.info(f"Wrote transcription to output file {fname}.") return TxtTranscription(text=output, file=fname) @@ -88,7 +89,9 @@ def save_diarized_audio_files( diarization: Path, audiofile: Path, output_path: Path ) -> list: groups = _group_speakers(diarization) - _save_individual_audio_files(audiofile, groups, output_path) + _save_individual_audio_files( + audiofile=audiofile, groups=groups, output_path=output_path + ) return groups @@ -97,11 +100,11 @@ def _add_audio_silence(audiofile) -> Path: spacer = AudioSegment.silent(duration=spacermilli) audio = AudioSegment.from_wav(audiofile) audio = spacer.append(audio, crossfade=0) - out_file = Path.joinpath(Path(os.path.dirname(audiofile)), "interview_prepend.wav") - audio.export(out_file, format="wav") + fname = Path.joinpath(Path(os.path.dirname(audiofile)), "interview_prepend.wav") + audio.export(fname, format="wav") logging.info(f"Exported audiofile with silence prepended to {fname}.") - return out_file + return fname def _save_individual_audio_files( @@ -115,9 +118,8 @@ def _save_individual_audio_files( start = _millisec(start) # - spacermilli end = _millisec(end) # - spacermilli gidx += 1 - audio[start:end].export( - f"{Path.joinpath(output_path, str(gidx))}.wav", format="wav" - ) + fname = Path.joinpath(output_path, f"{str(gidx)}.wav") + audio[start:end].export(fname, format="wav") logging.info(f"Exported audiopart {gidx} of {len(groups)} to {fname}.") diff --git a/verbanote/rp_handler.py b/verbanote/rp_handler.py index d98e0af..b4ca710 100644 --- a/verbanote/rp_handler.py +++ b/verbanote/rp_handler.py @@ -45,13 +45,19 @@ def handler(job): lang=lang or "fr", word_timestamps=word_timestamps or True, ) - transcription = process.output_txt(diarized_groups, output_path) + transcription: process.TxtTranscription = process.output_txt( + diarized_groups, output_path + ) + uploaded_transcription: str = file_operations.upload_to_oxo( + file=transcription.file, expires=1 + ) return { "audiofile": str(audiofile), "diarization_url": uploaded_diarization, "diarization": diarized_groups, - "transcription_text": transcription, + "transcription_url": uploaded_transcription, + "transcription_text": transcription.text, }