REGRESSION: Restrict handler to basic file handling
This commit is contained in:
parent
b28ba0c4d9
commit
66ad116802
5 changed files with 96 additions and 66 deletions
|
@ -42,7 +42,7 @@ WORKDIR ${APP_PATH}
|
||||||
RUN poetry install
|
RUN poetry install
|
||||||
|
|
||||||
# installing the large models
|
# installing the large models
|
||||||
RUN poetry run ltt install torch==1.13.1 torchvision==0.14.1 torchaudio==0.13.1
|
# RUN poetry run ltt install --pytorch-computation-backend=cu118 torch torchvision torchaudio
|
||||||
|
|
||||||
COPY ./${APP_NAME} ./${APP_NAME}
|
COPY ./${APP_NAME} ./${APP_NAME}
|
||||||
|
|
||||||
|
|
|
@ -8,14 +8,14 @@ readme = "README.md"
|
||||||
|
|
||||||
[tool.poetry.dependencies]
|
[tool.poetry.dependencies]
|
||||||
python = "^3.11"
|
python = "^3.11"
|
||||||
|
requests = "^2.31.0"
|
||||||
static-ffmpeg = "^2.5"
|
static-ffmpeg = "^2.5"
|
||||||
runpod = "^1.1.3"
|
runpod = "^1.1.3"
|
||||||
pydub = "^0.25.1"
|
pydub = "^0.25.1"
|
||||||
light-the-torch = "^0.7.5"
|
#light-the-torch = "^0.7.5"
|
||||||
openai-whisper = { git = "https://github.com/openai/whisper.git" }
|
#openai-whisper = { git = "https://github.com/openai/whisper.git" }
|
||||||
hmmlearn = {git = "https://github.com/hmmlearn/hmmlearn.git"}
|
#hmmlearn = {git = "https://github.com/hmmlearn/hmmlearn.git"}
|
||||||
pyannote-audio = {git = "https://github.com/pyannote/pyannote-audio.git", rev = "develop"}
|
#pyannote-audio = {git = "https://github.com/pyannote/pyannote-audio.git", rev = "develop"}
|
||||||
gdown = "^4.7.1"
|
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
requires = ["poetry-core"]
|
requires = ["poetry-core"]
|
||||||
|
|
44
verbanote/file_operations.py
Normal file
44
verbanote/file_operations.py
Normal file
|
@ -0,0 +1,44 @@
|
||||||
|
from pathlib import Path
|
||||||
|
import requests
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
|
||||||
|
def download_from_url(url: str, input_path: Path) -> Path:
|
||||||
|
resp = requests.get(url)
|
||||||
|
if not resp.ok:
|
||||||
|
raise requests.exceptions.HTTPError()
|
||||||
|
# TODO think about implementing a naming scheme based on url path
|
||||||
|
fname = Path.joinpath(input_path, "inputfile")
|
||||||
|
with open(fname, mode="wb") as file:
|
||||||
|
file.write(resp.content)
|
||||||
|
return fname
|
||||||
|
|
||||||
|
|
||||||
|
def upload_to_oxo(file: Path, url: str = "https://0x0.st", expires: int = 2) -> str:
|
||||||
|
resp = requests.post(
|
||||||
|
url=url, files={"file": open(file, "rb"), "expires": str(expires)}
|
||||||
|
)
|
||||||
|
if not resp.ok:
|
||||||
|
raise requests.exceptions.HTTPError()
|
||||||
|
return str(resp.content)
|
||||||
|
|
||||||
|
|
||||||
|
def convert_to_wav(file: Path, output_path: Path) -> Path:
|
||||||
|
fn = Path.joinpath(output_path, "interview.wav")
|
||||||
|
subprocess.run(
|
||||||
|
[
|
||||||
|
"ffmpeg",
|
||||||
|
"-i",
|
||||||
|
file,
|
||||||
|
"-vn",
|
||||||
|
"-acodec",
|
||||||
|
"pcm_s16le",
|
||||||
|
"-ar",
|
||||||
|
"16000",
|
||||||
|
"-ac",
|
||||||
|
"1",
|
||||||
|
"-y",
|
||||||
|
fn,
|
||||||
|
]
|
||||||
|
)
|
||||||
|
return fn
|
|
@ -1,12 +1,10 @@
|
||||||
import locale
|
import locale
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import subprocess
|
# from whisper import Whisper
|
||||||
from whisper import Whisper
|
# from pyannote.audio import Pipeline
|
||||||
from pyannote.audio import Pipeline
|
# import torch
|
||||||
import torch
|
|
||||||
import static_ffmpeg
|
import static_ffmpeg
|
||||||
import gdown
|
import file_operations
|
||||||
|
|
||||||
|
|
||||||
def prep() -> None:
|
def prep() -> None:
|
||||||
locale.getpreferredencoding = lambda: "UTF-8"
|
locale.getpreferredencoding = lambda: "UTF-8"
|
||||||
|
@ -14,39 +12,22 @@ def prep() -> None:
|
||||||
static_ffmpeg.add_paths()
|
static_ffmpeg.add_paths()
|
||||||
|
|
||||||
|
|
||||||
def audiofile(drive_url: str, path: Path) -> Path | None:
|
def audiofile(url: str, input_path: Path) -> Path:
|
||||||
if not drive_url:
|
file = file_operations.download_from_url(url, input_path)
|
||||||
return None
|
file_wav = file_operations.convert_to_wav(file, input_path)
|
||||||
gdown.download(drive_url, "infile")
|
file.unlink()
|
||||||
fn = Path.joinpath(path, "interview.wav")
|
return file_wav
|
||||||
subprocess.run(
|
|
||||||
[
|
|
||||||
"ffmpeg",
|
|
||||||
"-i",
|
|
||||||
"{repr(video_path)}",
|
|
||||||
"-vn",
|
|
||||||
"-acodec",
|
|
||||||
"pcm_s16le",
|
|
||||||
"-ar",
|
|
||||||
"16000",
|
|
||||||
"-ac",
|
|
||||||
"1",
|
|
||||||
"-y",
|
|
||||||
fn,
|
|
||||||
]
|
|
||||||
)
|
|
||||||
return fn
|
|
||||||
|
|
||||||
|
#
|
||||||
def diarization(access_token: str | None) -> Pipeline:
|
# def diarization(access_token: str | None) -> Pipeline:
|
||||||
pipeline = Pipeline.from_pretrained(
|
# pipeline = Pipeline.from_pretrained(
|
||||||
"pyannote/speaker-diarization", use_auth_token=access_token
|
# "pyannote/speaker-diarization", use_auth_token=access_token
|
||||||
)
|
# )
|
||||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||||
return pipeline.to(device)
|
# return pipeline.to(device)
|
||||||
|
#
|
||||||
|
#
|
||||||
def whisper() -> Whisper:
|
# def whisper() -> Whisper:
|
||||||
# LOAD MODEL INTO VRAM
|
# # LOAD MODEL INTO VRAM
|
||||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||||
return whisper.load_model("large", device=device)
|
# return whisper.load_model("large", device=device)
|
||||||
|
|
|
@ -2,39 +2,44 @@ from pathlib import Path
|
||||||
import runpod
|
import runpod
|
||||||
from runpod.serverless import os
|
from runpod.serverless import os
|
||||||
import loaders
|
import loaders
|
||||||
import process
|
# import process
|
||||||
|
|
||||||
|
|
||||||
output_path = os.environ.get("VERBANOTE_OUTPUT_PATH", "/transcriptions")
|
output_path:Path = Path(os.environ.get("VERBANOTE_OUTPUT_PATH", "/in"))
|
||||||
output_path = Path(output_path)
|
input_path:Path = Path(os.environ.get("VERBANOTE_INPUT_PATH", "/out"))
|
||||||
input_path = os.environ.get("VERBANOTE_INPUT_PATH", "/audiofiles")
|
|
||||||
input_path = Path(input_path)
|
|
||||||
|
|
||||||
access_token = os.environ.get("VERBANOTE_HF_TOKEN")
|
access_token: str = os.environ.get("VERBANOTE_HF_TOKEN", "")
|
||||||
|
|
||||||
loaders.prep()
|
loaders.prep()
|
||||||
diarize_pipeline = loaders.diarization(access_token)
|
# diarize_pipeline = loaders.diarization(access_token)
|
||||||
whisper_model = loaders.whisper()
|
# whisper_model = loaders.whisper()
|
||||||
|
|
||||||
|
|
||||||
def handler(job):
|
def handler(job):
|
||||||
input = job["input"]
|
input:dict = job["input"]
|
||||||
audiofile = loaders.audiofile(input.get("file"), path=input_path)
|
url: str | None = input.get("url")
|
||||||
if not audiofile:
|
|
||||||
return {"error": "missing audio file location"}
|
|
||||||
|
|
||||||
diarized = process.diarize(audiofile, diarize_pipeline, output_path)
|
if not url:
|
||||||
diarized_groups = process.save_diarized_audio_files(
|
return {"error": "no file link provided"}
|
||||||
diarized, audiofile, output_path
|
|
||||||
)
|
try:
|
||||||
process.transcribe(
|
audiofile = loaders.audiofile(url, input_path=input_path)
|
||||||
model=whisper_model, diarized_groups=diarized_groups, output_path=output_path
|
except Exception:
|
||||||
)
|
return {"error": "audiofile import failed"}
|
||||||
|
|
||||||
|
# diarized = process.diarize(audiofile, diarize_pipeline, output_path)
|
||||||
|
# diarized_groups = process.save_diarized_audio_files(
|
||||||
|
# diarized, audiofile, output_path
|
||||||
|
# )
|
||||||
|
# process.transcribe(
|
||||||
|
# model=whisper_model, diarized_groups=diarized_groups, output_path=output_path
|
||||||
|
# )
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"speaker_timings": "s3-address-to-speakers",
|
"speaker_timings": "s3-address-to-speakers",
|
||||||
"transcription_text": "s3-address-to-transcription",
|
"transcription_text": "s3-address-to-transcription",
|
||||||
"transcription_page": "web-address-to-deployment",
|
"transcription_page": "web-address-to-deployment",
|
||||||
|
"audiofile_path": str(audiofile)
|
||||||
}
|
}
|
||||||
|
|
||||||
# speakers = {
|
# speakers = {
|
||||||
|
|
Loading…
Reference in a new issue