REGRESSION: Restrict handler to basic file handling

This commit is contained in:
Marty Oehme 2023-08-22 10:32:07 +02:00
parent b28ba0c4d9
commit 66ad116802
Signed by: Marty
GPG key ID: EDBF2ED917B2EF6A
5 changed files with 96 additions and 66 deletions

View file

@ -42,7 +42,7 @@ WORKDIR ${APP_PATH}
RUN poetry install RUN poetry install
# installing the large models # installing the large models
RUN poetry run ltt install torch==1.13.1 torchvision==0.14.1 torchaudio==0.13.1 # RUN poetry run ltt install --pytorch-computation-backend=cu118 torch torchvision torchaudio
COPY ./${APP_NAME} ./${APP_NAME} COPY ./${APP_NAME} ./${APP_NAME}

View file

@ -8,14 +8,14 @@ readme = "README.md"
[tool.poetry.dependencies] [tool.poetry.dependencies]
python = "^3.11" python = "^3.11"
requests = "^2.31.0"
static-ffmpeg = "^2.5" static-ffmpeg = "^2.5"
runpod = "^1.1.3" runpod = "^1.1.3"
pydub = "^0.25.1" pydub = "^0.25.1"
light-the-torch = "^0.7.5" #light-the-torch = "^0.7.5"
openai-whisper = { git = "https://github.com/openai/whisper.git" } #openai-whisper = { git = "https://github.com/openai/whisper.git" }
hmmlearn = {git = "https://github.com/hmmlearn/hmmlearn.git"} #hmmlearn = {git = "https://github.com/hmmlearn/hmmlearn.git"}
pyannote-audio = {git = "https://github.com/pyannote/pyannote-audio.git", rev = "develop"} #pyannote-audio = {git = "https://github.com/pyannote/pyannote-audio.git", rev = "develop"}
gdown = "^4.7.1"
[build-system] [build-system]
requires = ["poetry-core"] requires = ["poetry-core"]

View file

@ -0,0 +1,44 @@
from pathlib import Path
import requests
import subprocess
def download_from_url(url: str, input_path: Path) -> Path:
resp = requests.get(url)
if not resp.ok:
raise requests.exceptions.HTTPError()
# TODO think about implementing a naming scheme based on url path
fname = Path.joinpath(input_path, "inputfile")
with open(fname, mode="wb") as file:
file.write(resp.content)
return fname
def upload_to_oxo(file: Path, url: str = "https://0x0.st", expires: int = 2) -> str:
resp = requests.post(
url=url, files={"file": open(file, "rb"), "expires": str(expires)}
)
if not resp.ok:
raise requests.exceptions.HTTPError()
return str(resp.content)
def convert_to_wav(file: Path, output_path: Path) -> Path:
fn = Path.joinpath(output_path, "interview.wav")
subprocess.run(
[
"ffmpeg",
"-i",
file,
"-vn",
"-acodec",
"pcm_s16le",
"-ar",
"16000",
"-ac",
"1",
"-y",
fn,
]
)
return fn

View file

@ -1,12 +1,10 @@
import locale import locale
from pathlib import Path from pathlib import Path
import subprocess # from whisper import Whisper
from whisper import Whisper # from pyannote.audio import Pipeline
from pyannote.audio import Pipeline # import torch
import torch
import static_ffmpeg import static_ffmpeg
import gdown import file_operations
def prep() -> None: def prep() -> None:
locale.getpreferredencoding = lambda: "UTF-8" locale.getpreferredencoding = lambda: "UTF-8"
@ -14,39 +12,22 @@ def prep() -> None:
static_ffmpeg.add_paths() static_ffmpeg.add_paths()
def audiofile(drive_url: str, path: Path) -> Path | None: def audiofile(url: str, input_path: Path) -> Path:
if not drive_url: file = file_operations.download_from_url(url, input_path)
return None file_wav = file_operations.convert_to_wav(file, input_path)
gdown.download(drive_url, "infile") file.unlink()
fn = Path.joinpath(path, "interview.wav") return file_wav
subprocess.run(
[
"ffmpeg",
"-i",
"{repr(video_path)}",
"-vn",
"-acodec",
"pcm_s16le",
"-ar",
"16000",
"-ac",
"1",
"-y",
fn,
]
)
return fn
#
def diarization(access_token: str | None) -> Pipeline: # def diarization(access_token: str | None) -> Pipeline:
pipeline = Pipeline.from_pretrained( # pipeline = Pipeline.from_pretrained(
"pyannote/speaker-diarization", use_auth_token=access_token # "pyannote/speaker-diarization", use_auth_token=access_token
) # )
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
return pipeline.to(device) # return pipeline.to(device)
#
#
def whisper() -> Whisper: # def whisper() -> Whisper:
# LOAD MODEL INTO VRAM # # LOAD MODEL INTO VRAM
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
return whisper.load_model("large", device=device) # return whisper.load_model("large", device=device)

View file

@ -2,39 +2,44 @@ from pathlib import Path
import runpod import runpod
from runpod.serverless import os from runpod.serverless import os
import loaders import loaders
import process # import process
output_path = os.environ.get("VERBANOTE_OUTPUT_PATH", "/transcriptions") output_path:Path = Path(os.environ.get("VERBANOTE_OUTPUT_PATH", "/in"))
output_path = Path(output_path) input_path:Path = Path(os.environ.get("VERBANOTE_INPUT_PATH", "/out"))
input_path = os.environ.get("VERBANOTE_INPUT_PATH", "/audiofiles")
input_path = Path(input_path)
access_token = os.environ.get("VERBANOTE_HF_TOKEN") access_token: str = os.environ.get("VERBANOTE_HF_TOKEN", "")
loaders.prep() loaders.prep()
diarize_pipeline = loaders.diarization(access_token) # diarize_pipeline = loaders.diarization(access_token)
whisper_model = loaders.whisper() # whisper_model = loaders.whisper()
def handler(job): def handler(job):
input = job["input"] input:dict = job["input"]
audiofile = loaders.audiofile(input.get("file"), path=input_path) url: str | None = input.get("url")
if not audiofile:
return {"error": "missing audio file location"}
diarized = process.diarize(audiofile, diarize_pipeline, output_path) if not url:
diarized_groups = process.save_diarized_audio_files( return {"error": "no file link provided"}
diarized, audiofile, output_path
) try:
process.transcribe( audiofile = loaders.audiofile(url, input_path=input_path)
model=whisper_model, diarized_groups=diarized_groups, output_path=output_path except Exception:
) return {"error": "audiofile import failed"}
# diarized = process.diarize(audiofile, diarize_pipeline, output_path)
# diarized_groups = process.save_diarized_audio_files(
# diarized, audiofile, output_path
# )
# process.transcribe(
# model=whisper_model, diarized_groups=diarized_groups, output_path=output_path
# )
return { return {
"speaker_timings": "s3-address-to-speakers", "speaker_timings": "s3-address-to-speakers",
"transcription_text": "s3-address-to-transcription", "transcription_text": "s3-address-to-transcription",
"transcription_page": "web-address-to-deployment", "transcription_page": "web-address-to-deployment",
"audiofile_path": str(audiofile)
} }
# speakers = { # speakers = {