Fix Path classes wrongly used in string concats
This commit is contained in:
parent
f7b0d48c6f
commit
13b5f22953
2 changed files with 22 additions and 14 deletions
|
@ -41,12 +41,11 @@ def transcribe(
|
||||||
word_timestamps: bool = True,
|
word_timestamps: bool = True,
|
||||||
) -> None:
|
) -> None:
|
||||||
for i in range(len(diarized_groups)):
|
for i in range(len(diarized_groups)):
|
||||||
f = {Path.joinpath(output_path, str(i))}
|
audio_f = Path.joinpath(files_path, f"{str(i)}.wav")
|
||||||
audio_f = f"{f}.wav"
|
json_f = Path.joinpath(files_path, f"{str(i)}.json")
|
||||||
json_f = f"{f}.json"
|
|
||||||
logging.info(f"Starting transcription of {str(audio_f)}...")
|
logging.info(f"Starting transcription of {str(audio_f)}...")
|
||||||
result = model.transcribe(
|
result = model.transcribe(
|
||||||
audio=audio_f, language=lang, word_timestamps=word_timestamps
|
audio=str(audio_f), language=lang, word_timestamps=word_timestamps
|
||||||
)
|
)
|
||||||
with open(json_f, "w") as outfile:
|
with open(json_f, "w") as outfile:
|
||||||
json.dump(result, outfile, indent=4)
|
json.dump(result, outfile, indent=4)
|
||||||
|
@ -66,7 +65,8 @@ def output_txt(diarized_groups: list, transcription_path: Path) -> TxtTranscript
|
||||||
|
|
||||||
gidx += 1
|
gidx += 1
|
||||||
|
|
||||||
with open(f"{Path.joinpath(transcription_path, str(gidx))}.json") as f:
|
fname = Path.joinpath(transcription_path, f"{str(gidx)}.json")
|
||||||
|
with open(fname) as f:
|
||||||
captions = json.load(f)["segments"]
|
captions = json.load(f)["segments"]
|
||||||
logging.info(f"Loaded {fname} for transcription...")
|
logging.info(f"Loaded {fname} for transcription...")
|
||||||
|
|
||||||
|
@ -81,6 +81,7 @@ def output_txt(diarized_groups: list, transcription_path: Path) -> TxtTranscript
|
||||||
fname = Path.joinpath(transcription_path, "transcription_result.txt")
|
fname = Path.joinpath(transcription_path, "transcription_result.txt")
|
||||||
with open(fname, "w", encoding="utf-8") as file:
|
with open(fname, "w", encoding="utf-8") as file:
|
||||||
file.write(output)
|
file.write(output)
|
||||||
|
logging.info(f"Wrote transcription to output file {fname}.")
|
||||||
return TxtTranscription(text=output, file=fname)
|
return TxtTranscription(text=output, file=fname)
|
||||||
|
|
||||||
|
|
||||||
|
@ -88,7 +89,9 @@ def save_diarized_audio_files(
|
||||||
diarization: Path, audiofile: Path, output_path: Path
|
diarization: Path, audiofile: Path, output_path: Path
|
||||||
) -> list:
|
) -> list:
|
||||||
groups = _group_speakers(diarization)
|
groups = _group_speakers(diarization)
|
||||||
_save_individual_audio_files(audiofile, groups, output_path)
|
_save_individual_audio_files(
|
||||||
|
audiofile=audiofile, groups=groups, output_path=output_path
|
||||||
|
)
|
||||||
return groups
|
return groups
|
||||||
|
|
||||||
|
|
||||||
|
@ -97,11 +100,11 @@ def _add_audio_silence(audiofile) -> Path:
|
||||||
spacer = AudioSegment.silent(duration=spacermilli)
|
spacer = AudioSegment.silent(duration=spacermilli)
|
||||||
audio = AudioSegment.from_wav(audiofile)
|
audio = AudioSegment.from_wav(audiofile)
|
||||||
audio = spacer.append(audio, crossfade=0)
|
audio = spacer.append(audio, crossfade=0)
|
||||||
out_file = Path.joinpath(Path(os.path.dirname(audiofile)), "interview_prepend.wav")
|
fname = Path.joinpath(Path(os.path.dirname(audiofile)), "interview_prepend.wav")
|
||||||
audio.export(out_file, format="wav")
|
audio.export(fname, format="wav")
|
||||||
logging.info(f"Exported audiofile with silence prepended to {fname}.")
|
logging.info(f"Exported audiofile with silence prepended to {fname}.")
|
||||||
|
|
||||||
return out_file
|
return fname
|
||||||
|
|
||||||
|
|
||||||
def _save_individual_audio_files(
|
def _save_individual_audio_files(
|
||||||
|
@ -115,9 +118,8 @@ def _save_individual_audio_files(
|
||||||
start = _millisec(start) # - spacermilli
|
start = _millisec(start) # - spacermilli
|
||||||
end = _millisec(end) # - spacermilli
|
end = _millisec(end) # - spacermilli
|
||||||
gidx += 1
|
gidx += 1
|
||||||
audio[start:end].export(
|
fname = Path.joinpath(output_path, f"{str(gidx)}.wav")
|
||||||
f"{Path.joinpath(output_path, str(gidx))}.wav", format="wav"
|
audio[start:end].export(fname, format="wav")
|
||||||
)
|
|
||||||
logging.info(f"Exported audiopart {gidx} of {len(groups)} to {fname}.")
|
logging.info(f"Exported audiopart {gidx} of {len(groups)} to {fname}.")
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -45,13 +45,19 @@ def handler(job):
|
||||||
lang=lang or "fr",
|
lang=lang or "fr",
|
||||||
word_timestamps=word_timestamps or True,
|
word_timestamps=word_timestamps or True,
|
||||||
)
|
)
|
||||||
transcription = process.output_txt(diarized_groups, output_path)
|
transcription: process.TxtTranscription = process.output_txt(
|
||||||
|
diarized_groups, output_path
|
||||||
|
)
|
||||||
|
uploaded_transcription: str = file_operations.upload_to_oxo(
|
||||||
|
file=transcription.file, expires=1
|
||||||
|
)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"audiofile": str(audiofile),
|
"audiofile": str(audiofile),
|
||||||
"diarization_url": uploaded_diarization,
|
"diarization_url": uploaded_diarization,
|
||||||
"diarization": diarized_groups,
|
"diarization": diarized_groups,
|
||||||
"transcription_text": transcription,
|
"transcription_url": uploaded_transcription,
|
||||||
|
"transcription_text": transcription.text,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue