From 66ad116802d1f61342ed8835ec9b5e561586f3c8 Mon Sep 17 00:00:00 2001 From: Marty Oehme Date: Tue, 22 Aug 2023 10:32:07 +0200 Subject: [PATCH] REGRESSION: Restrict handler to basic file handling --- Dockerfile | 2 +- pyproject.toml | 10 +++--- verbanote/file_operations.py | 44 +++++++++++++++++++++++++ verbanote/loaders.py | 63 +++++++++++++----------------------- verbanote/rp_handler.py | 43 +++++++++++++----------- 5 files changed, 96 insertions(+), 66 deletions(-) create mode 100644 verbanote/file_operations.py diff --git a/Dockerfile b/Dockerfile index 51afc6b..85ab491 100644 --- a/Dockerfile +++ b/Dockerfile @@ -42,7 +42,7 @@ WORKDIR ${APP_PATH} RUN poetry install # installing the large models -RUN poetry run ltt install torch==1.13.1 torchvision==0.14.1 torchaudio==0.13.1 +# RUN poetry run ltt install --pytorch-computation-backend=cu118 torch torchvision torchaudio COPY ./${APP_NAME} ./${APP_NAME} diff --git a/pyproject.toml b/pyproject.toml index c5c36ba..7433ee2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,14 +8,14 @@ readme = "README.md" [tool.poetry.dependencies] python = "^3.11" +requests = "^2.31.0" static-ffmpeg = "^2.5" runpod = "^1.1.3" pydub = "^0.25.1" -light-the-torch = "^0.7.5" -openai-whisper = { git = "https://github.com/openai/whisper.git" } -hmmlearn = {git = "https://github.com/hmmlearn/hmmlearn.git"} -pyannote-audio = {git = "https://github.com/pyannote/pyannote-audio.git", rev = "develop"} -gdown = "^4.7.1" +#light-the-torch = "^0.7.5" +#openai-whisper = { git = "https://github.com/openai/whisper.git" } +#hmmlearn = {git = "https://github.com/hmmlearn/hmmlearn.git"} +#pyannote-audio = {git = "https://github.com/pyannote/pyannote-audio.git", rev = "develop"} [build-system] requires = ["poetry-core"] diff --git a/verbanote/file_operations.py b/verbanote/file_operations.py new file mode 100644 index 0000000..05fb27c --- /dev/null +++ b/verbanote/file_operations.py @@ -0,0 +1,44 @@ +from pathlib import Path +import requests +import subprocess + + +def download_from_url(url: str, input_path: Path) -> Path: + resp = requests.get(url) + if not resp.ok: + raise requests.exceptions.HTTPError() + # TODO think about implementing a naming scheme based on url path + fname = Path.joinpath(input_path, "inputfile") + with open(fname, mode="wb") as file: + file.write(resp.content) + return fname + + +def upload_to_oxo(file: Path, url: str = "https://0x0.st", expires: int = 2) -> str: + resp = requests.post( + url=url, files={"file": open(file, "rb"), "expires": str(expires)} + ) + if not resp.ok: + raise requests.exceptions.HTTPError() + return str(resp.content) + + +def convert_to_wav(file: Path, output_path: Path) -> Path: + fn = Path.joinpath(output_path, "interview.wav") + subprocess.run( + [ + "ffmpeg", + "-i", + file, + "-vn", + "-acodec", + "pcm_s16le", + "-ar", + "16000", + "-ac", + "1", + "-y", + fn, + ] + ) + return fn diff --git a/verbanote/loaders.py b/verbanote/loaders.py index db8dd7d..93d3584 100644 --- a/verbanote/loaders.py +++ b/verbanote/loaders.py @@ -1,12 +1,10 @@ import locale from pathlib import Path -import subprocess -from whisper import Whisper -from pyannote.audio import Pipeline -import torch +# from whisper import Whisper +# from pyannote.audio import Pipeline +# import torch import static_ffmpeg -import gdown - +import file_operations def prep() -> None: locale.getpreferredencoding = lambda: "UTF-8" @@ -14,39 +12,22 @@ def prep() -> None: static_ffmpeg.add_paths() -def audiofile(drive_url: str, path: Path) -> Path | None: - if not drive_url: - return None - gdown.download(drive_url, "infile") - fn = Path.joinpath(path, "interview.wav") - subprocess.run( - [ - "ffmpeg", - "-i", - "{repr(video_path)}", - "-vn", - "-acodec", - "pcm_s16le", - "-ar", - "16000", - "-ac", - "1", - "-y", - fn, - ] - ) - return fn +def audiofile(url: str, input_path: Path) -> Path: + file = file_operations.download_from_url(url, input_path) + file_wav = file_operations.convert_to_wav(file, input_path) + file.unlink() + return file_wav - -def diarization(access_token: str | None) -> Pipeline: - pipeline = Pipeline.from_pretrained( - "pyannote/speaker-diarization", use_auth_token=access_token - ) - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - return pipeline.to(device) - - -def whisper() -> Whisper: - # LOAD MODEL INTO VRAM - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - return whisper.load_model("large", device=device) +# +# def diarization(access_token: str | None) -> Pipeline: +# pipeline = Pipeline.from_pretrained( +# "pyannote/speaker-diarization", use_auth_token=access_token +# ) +# device = torch.device("cuda" if torch.cuda.is_available() else "cpu") +# return pipeline.to(device) +# +# +# def whisper() -> Whisper: +# # LOAD MODEL INTO VRAM +# device = torch.device("cuda" if torch.cuda.is_available() else "cpu") +# return whisper.load_model("large", device=device) diff --git a/verbanote/rp_handler.py b/verbanote/rp_handler.py index ad42b2e..e85efd9 100644 --- a/verbanote/rp_handler.py +++ b/verbanote/rp_handler.py @@ -2,39 +2,44 @@ from pathlib import Path import runpod from runpod.serverless import os import loaders -import process +# import process -output_path = os.environ.get("VERBANOTE_OUTPUT_PATH", "/transcriptions") -output_path = Path(output_path) -input_path = os.environ.get("VERBANOTE_INPUT_PATH", "/audiofiles") -input_path = Path(input_path) +output_path:Path = Path(os.environ.get("VERBANOTE_OUTPUT_PATH", "/in")) +input_path:Path = Path(os.environ.get("VERBANOTE_INPUT_PATH", "/out")) -access_token = os.environ.get("VERBANOTE_HF_TOKEN") +access_token: str = os.environ.get("VERBANOTE_HF_TOKEN", "") loaders.prep() -diarize_pipeline = loaders.diarization(access_token) -whisper_model = loaders.whisper() +# diarize_pipeline = loaders.diarization(access_token) +# whisper_model = loaders.whisper() def handler(job): - input = job["input"] - audiofile = loaders.audiofile(input.get("file"), path=input_path) - if not audiofile: - return {"error": "missing audio file location"} + input:dict = job["input"] + url: str | None = input.get("url") - diarized = process.diarize(audiofile, diarize_pipeline, output_path) - diarized_groups = process.save_diarized_audio_files( - diarized, audiofile, output_path - ) - process.transcribe( - model=whisper_model, diarized_groups=diarized_groups, output_path=output_path - ) + if not url: + return {"error": "no file link provided"} + + try: + audiofile = loaders.audiofile(url, input_path=input_path) + except Exception: + return {"error": "audiofile import failed"} + + # diarized = process.diarize(audiofile, diarize_pipeline, output_path) + # diarized_groups = process.save_diarized_audio_files( + # diarized, audiofile, output_path + # ) + # process.transcribe( + # model=whisper_model, diarized_groups=diarized_groups, output_path=output_path + # ) return { "speaker_timings": "s3-address-to-speakers", "transcription_text": "s3-address-to-transcription", "transcription_page": "web-address-to-deployment", + "audiofile_path": str(audiofile) } # speakers = {