Reinsert diarization dependencies
This commit is contained in:
parent
48095a1dc9
commit
64123a29e0
8 changed files with 105 additions and 399 deletions
|
|
@ -1,4 +1,5 @@
|
|||
from pathlib import Path
|
||||
import logging
|
||||
import requests
|
||||
import subprocess
|
||||
|
||||
|
|
@ -6,11 +7,13 @@ import subprocess
|
|||
def download_from_url(url: str, input_path: Path) -> Path:
|
||||
resp = requests.get(url)
|
||||
if not resp.ok:
|
||||
logging.error(f"Created error code: {resp.status_code}")
|
||||
raise requests.exceptions.HTTPError()
|
||||
# TODO think about implementing a naming scheme based on url path
|
||||
fname = Path.joinpath(input_path, "inputfile")
|
||||
with open(fname, mode="wb") as file:
|
||||
file.write(resp.content)
|
||||
logging.info(f"Downloaded input file: {fname}")
|
||||
return fname
|
||||
|
||||
|
||||
|
|
@ -41,4 +44,5 @@ def convert_to_wav(file: Path, output_path: Path) -> Path:
|
|||
fn,
|
||||
]
|
||||
)
|
||||
logging.info(f"Converted {file} to wav format: {fn}")
|
||||
return fn
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
import locale
|
||||
from pathlib import Path
|
||||
# from whisper import Whisper
|
||||
# from pyannote.audio import Pipeline
|
||||
# import torch
|
||||
from pyannote.audio import Pipeline
|
||||
import torch
|
||||
import static_ffmpeg
|
||||
import file_operations
|
||||
|
||||
|
|
@ -18,14 +18,14 @@ def audiofile(url: str, input_path: Path) -> Path:
|
|||
file.unlink()
|
||||
return file_wav
|
||||
|
||||
#
|
||||
# def diarization(access_token: str | None) -> Pipeline:
|
||||
# pipeline = Pipeline.from_pretrained(
|
||||
# "pyannote/speaker-diarization", use_auth_token=access_token
|
||||
# )
|
||||
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
# return pipeline.to(device)
|
||||
#
|
||||
|
||||
def diarization(access_token: str | None) -> Pipeline:
|
||||
pipeline = Pipeline.from_pretrained(
|
||||
"pyannote/speaker-diarization", use_auth_token=access_token
|
||||
)
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
return pipeline.to(device)
|
||||
|
||||
#
|
||||
# def whisper() -> Whisper:
|
||||
# # LOAD MODEL INTO VRAM
|
||||
|
|
|
|||
|
|
@ -1,24 +0,0 @@
|
|||
## SETTINGS FOR LATER
|
||||
from pathlib import Path
|
||||
|
||||
# @markdown Enter the URL of the YouTube video, or the path to the video/audio file you want to transcribe, give the output path, etc. and run the cell. HTML file embeds the video for YouTube, and audio for media files.
|
||||
|
||||
Source = "Youtube" # @param ['Youtube', 'File (Google Drive)']
|
||||
# @markdown ---
|
||||
# @markdown #### **Youtube video**
|
||||
video_url = "https://youtu.be/hpZFJctBUHQ" # @param {type:"string"}
|
||||
# store_audio = True #@param {type:"boolean"}
|
||||
# @markdown ---
|
||||
# @markdown #### **Google Drive video or audio path (mp4, wav, mp3)**
|
||||
video_path = "/content/drive/MyDrive/Customer_Service.mp3" # @param {type:"string"}
|
||||
# @markdown ---
|
||||
output_path = "/content/transcript/" # @param {type:"string"}
|
||||
output_path = str(Path(output_path))
|
||||
# @markdown ---
|
||||
# @markdown #### **Title for transcription of media file**
|
||||
audio_title = "Sample Order Taking" # @param {type:"string"}
|
||||
# @markdown ---
|
||||
# @markdown #### Copy a token from your [Hugging Face tokens page](https://huggingface.co/settings/tokens) and paste it below.
|
||||
access_token = "hf_" # @param {type:"string"}
|
||||
# @markdown ---
|
||||
# @markdown **Run this cell again if you change the video.**
|
||||
|
|
@ -4,7 +4,7 @@ import json
|
|||
from pathlib import Path
|
||||
from pyannote.audio import Pipeline
|
||||
from pydub import AudioSegment
|
||||
from whisper import Whisper
|
||||
# from whisper import Whisper
|
||||
|
||||
MILLISECONDS_TO_SPACE = 2000
|
||||
|
||||
|
|
@ -12,8 +12,8 @@ MILLISECONDS_TO_SPACE = 2000
|
|||
def diarize(audiofile: Path, pipeline: Pipeline, output_path: Path) -> Path:
|
||||
audiofile_prepended = _add_audio_silence(audiofile)
|
||||
|
||||
DEMO_FILE = {"uri": "blabla", "audio": audiofile_prepended}
|
||||
dz = pipeline(DEMO_FILE)
|
||||
DIARIZE_FILE = {"uri": "not-important", "audio": audiofile_prepended}
|
||||
dz = pipeline(DIARIZE_FILE)
|
||||
|
||||
out_file = Path.joinpath(output_path, "diarization.txt")
|
||||
with open(out_file, "w") as text_file:
|
||||
|
|
@ -25,22 +25,22 @@ def diarize(audiofile: Path, pipeline: Pipeline, output_path: Path) -> Path:
|
|||
return out_file
|
||||
|
||||
|
||||
def transcribe(
|
||||
model: Whisper,
|
||||
diarized_groups: list,
|
||||
output_path: Path,
|
||||
lang: str = "en",
|
||||
word_timestamps: bool = True,
|
||||
):
|
||||
for i in range(len(diarized_groups)):
|
||||
f = {Path.joinpath(output_path, str(i))}
|
||||
audio_f = f"{f}.wav"
|
||||
json_f = f"{f}.json"
|
||||
result = model.transcribe(
|
||||
audio=audio_f, language=lang, word_timestamps=word_timestamps
|
||||
)
|
||||
with open(json_f, "w") as outfile:
|
||||
json.dump(result, outfile, indent=4)
|
||||
# def transcribe(
|
||||
# model: Whisper,
|
||||
# diarized_groups: list,
|
||||
# output_path: Path,
|
||||
# lang: str = "en",
|
||||
# word_timestamps: bool = True,
|
||||
# ):
|
||||
# for i in range(len(diarized_groups)):
|
||||
# f = {Path.joinpath(output_path, str(i))}
|
||||
# audio_f = f"{f}.wav"
|
||||
# json_f = f"{f}.json"
|
||||
# result = model.transcribe(
|
||||
# audio=audio_f, language=lang, word_timestamps=word_timestamps
|
||||
# )
|
||||
# with open(json_f, "w") as outfile:
|
||||
# json.dump(result, outfile, indent=4)
|
||||
|
||||
|
||||
def save_diarized_audio_files(
|
||||
|
|
|
|||
|
|
@ -1,22 +1,26 @@
|
|||
import logging
|
||||
from pathlib import Path
|
||||
import runpod
|
||||
from runpod.serverless import os
|
||||
import loaders
|
||||
# import process
|
||||
import file_operations
|
||||
|
||||
import process
|
||||
|
||||
output_path:Path = Path(os.environ.get("VERBANOTE_OUTPUT_PATH", "/in"))
|
||||
input_path:Path = Path(os.environ.get("VERBANOTE_INPUT_PATH", "/out"))
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
|
||||
input_path: Path = Path(os.environ.get("VERBANOTE_INPUT_PATH", "/in"))
|
||||
output_path: Path = Path(os.environ.get("VERBANOTE_OUTPUT_PATH", "/out"))
|
||||
|
||||
access_token: str = os.environ.get("VERBANOTE_HF_TOKEN", "")
|
||||
|
||||
loaders.prep()
|
||||
# diarize_pipeline = loaders.diarization(access_token)
|
||||
diarize_pipeline = loaders.diarization(access_token)
|
||||
# whisper_model = loaders.whisper()
|
||||
|
||||
|
||||
def handler(job):
|
||||
input:dict = job["input"]
|
||||
input: dict = job["input"]
|
||||
url: str | None = input.get("url")
|
||||
|
||||
if not url:
|
||||
|
|
@ -27,10 +31,11 @@ def handler(job):
|
|||
except Exception:
|
||||
return {"error": "audiofile import failed"}
|
||||
|
||||
# diarized = process.diarize(audiofile, diarize_pipeline, output_path)
|
||||
# diarized_groups = process.save_diarized_audio_files(
|
||||
# diarized, audiofile, output_path
|
||||
# )
|
||||
diarized = process.diarize(audiofile, diarize_pipeline, output_path)
|
||||
diarized_groups = process.save_diarized_audio_files(
|
||||
diarized, audiofile, output_path
|
||||
)
|
||||
uploaded_file: str = file_operations.upload_to_oxo(file=diarized, expires=1)
|
||||
# process.transcribe(
|
||||
# model=whisper_model, diarized_groups=diarized_groups, output_path=output_path
|
||||
# )
|
||||
|
|
@ -39,9 +44,11 @@ def handler(job):
|
|||
"speaker_timings": "s3-address-to-speakers",
|
||||
"transcription_text": "s3-address-to-transcription",
|
||||
"transcription_page": "web-address-to-deployment",
|
||||
"audiofile_path": str(audiofile)
|
||||
"audiofile_path": str(audiofile),
|
||||
"audio_url": uploaded_file,
|
||||
}
|
||||
|
||||
|
||||
# speakers = {
|
||||
# # speaker, textboxcolor, speaker color
|
||||
# "SPEAKER_00": ("SPEAKER00", "white", "darkgreen"),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue