REGRESSION: Restrict handler to basic file handling

2023-08-22 10:32:07 +02:00 · 2023-08-22 10:32:07 +02:00 · 66ad116802
commit 66ad116802
parent b28ba0c4d9
5 changed files with 96 additions and 66 deletions
--- a/2
+++ b/2
@ -42,7 +42,7 @@ WORKDIR ${APP_PATH}
 RUN poetry install 
 # installing the large models
-RUN poetry run ltt install torch==1.13.1 torchvision==0.14.1 torchaudio==0.13.1
+# RUN poetry run ltt install --pytorch-computation-backend=cu118 torch torchvision torchaudio
 COPY ./${APP_NAME} ./${APP_NAME}
--- a/pyproject.toml
+++ b/pyproject.toml
@ -8,14 +8,14 @@ readme = "README.md"
 [tool.poetry.dependencies]
 python = "^3.11"
 requests = "^2.31.0"
 static-ffmpeg = "^2.5"
 runpod = "^1.1.3"
 pydub = "^0.25.1"
-light-the-torch = "^0.7.5"
+#light-the-torch = "^0.7.5"
-openai-whisper = { git = "https://github.com/openai/whisper.git" }
+#openai-whisper = { git = "https://github.com/openai/whisper.git" }
-hmmlearn = {git = "https://github.com/hmmlearn/hmmlearn.git"}
+#hmmlearn = {git = "https://github.com/hmmlearn/hmmlearn.git"}
-pyannote-audio = {git = "https://github.com/pyannote/pyannote-audio.git", rev = "develop"}
+#pyannote-audio = {git = "https://github.com/pyannote/pyannote-audio.git", rev = "develop"}
 gdown = "^4.7.1"
 [build-system]
 requires = ["poetry-core"]
--- a/verbanote/file_operations.py
+++ b/verbanote/file_operations.py
@ -0,0 +1,44 @@
 from pathlib import Path
 import requests
 import subprocess
 def download_from_url(url: str, input_path: Path) -> Path:
    resp = requests.get(url)
    if not resp.ok:
        raise requests.exceptions.HTTPError()
    # TODO think about implementing a naming scheme based on url path
    fname = Path.joinpath(input_path, "inputfile")
    with open(fname, mode="wb") as file:
        file.write(resp.content)
    return fname
 def upload_to_oxo(file: Path, url: str = "https://0x0.st", expires: int = 2) -> str:
    resp = requests.post(
        url=url, files={"file": open(file, "rb"), "expires": str(expires)}
    )
    if not resp.ok:
        raise requests.exceptions.HTTPError()
    return str(resp.content)
 def convert_to_wav(file: Path, output_path: Path) -> Path:
    fn = Path.joinpath(output_path, "interview.wav")
    subprocess.run(
        [
            "ffmpeg",
            "-i",
            file,
            "-vn",
            "-acodec",
            "pcm_s16le",
            "-ar",
            "16000",
            "-ac",
            "1",
            "-y",
            fn,
        ]
    )
    return fn
--- a/verbanote/loaders.py
+++ b/verbanote/loaders.py
@ -1,12 +1,10 @@
 import locale
 from pathlib import Path
-import subprocess
+# from whisper import Whisper
-from whisper import Whisper
+# from pyannote.audio import Pipeline
-from pyannote.audio import Pipeline
+# import torch
 import torch
 import static_ffmpeg
-import gdown
+import file_operations
 def prep() -> None:
    locale.getpreferredencoding = lambda: "UTF-8"
@ -14,39 +12,22 @@ def prep() -> None:
    static_ffmpeg.add_paths()
-def audiofile(drive_url: str, path: Path) -> Path | None:
+def audiofile(url: str, input_path: Path) -> Path:
-    if not drive_url:
+    file = file_operations.download_from_url(url, input_path)
-        return None
+    file_wav = file_operations.convert_to_wav(file, input_path)
-    gdown.download(drive_url, "infile")
+    file.unlink()
-    fn = Path.joinpath(path, "interview.wav")
+    return file_wav
    subprocess.run(
        [
            "ffmpeg",
            "-i",
            "{repr(video_path)}",
            "-vn",
            "-acodec",
            "pcm_s16le",
            "-ar",
            "16000",
            "-ac",
            "1",
            "-y",
            fn,
        ]
    )
    return fn
-
+#
-def diarization(access_token: str | None) -> Pipeline:
+# def diarization(access_token: str | None) -> Pipeline:
-    pipeline = Pipeline.from_pretrained(
+#     pipeline = Pipeline.from_pretrained(
-        "pyannote/speaker-diarization", use_auth_token=access_token
+#         "pyannote/speaker-diarization", use_auth_token=access_token
-    )
+#     )
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+#     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    return pipeline.to(device)
+#     return pipeline.to(device)
-
+#
-
+#
-def whisper() -> Whisper:
+# def whisper() -> Whisper:
-    # LOAD MODEL INTO VRAM
+#     # LOAD MODEL INTO VRAM
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+#     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    return whisper.load_model("large", device=device)
+#     return whisper.load_model("large", device=device)
--- a/verbanote/rp_handler.py
+++ b/verbanote/rp_handler.py
@ -2,39 +2,44 @@ from pathlib import Path
 import runpod
 from runpod.serverless import os
 import loaders
-import process
+# import process
-output_path = os.environ.get("VERBANOTE_OUTPUT_PATH", "/transcriptions")
+output_path:Path = Path(os.environ.get("VERBANOTE_OUTPUT_PATH", "/in"))
-output_path = Path(output_path)
+input_path:Path = Path(os.environ.get("VERBANOTE_INPUT_PATH", "/out"))
 input_path = os.environ.get("VERBANOTE_INPUT_PATH", "/audiofiles")
 input_path = Path(input_path)
-access_token = os.environ.get("VERBANOTE_HF_TOKEN")
+access_token: str = os.environ.get("VERBANOTE_HF_TOKEN", "")
 loaders.prep()
-diarize_pipeline = loaders.diarization(access_token)
+# diarize_pipeline = loaders.diarization(access_token)
-whisper_model = loaders.whisper()
+# whisper_model = loaders.whisper()
 def handler(job):
-    input = job["input"]
+    input:dict = job["input"]
-    audiofile = loaders.audiofile(input.get("file"), path=input_path)
+    url: str | None = input.get("url")
    if not audiofile:
        return {"error": "missing audio file location"}
-    diarized = process.diarize(audiofile, diarize_pipeline, output_path)
+    if not url:
-    diarized_groups = process.save_diarized_audio_files(
+        return {"error": "no file link provided"}
-        diarized, audiofile, output_path
+
-    )
+    try:
-    process.transcribe(
+        audiofile = loaders.audiofile(url, input_path=input_path)
-        model=whisper_model, diarized_groups=diarized_groups, output_path=output_path
+    except Exception:
-    )
+        return {"error": "audiofile import failed"}
    # diarized = process.diarize(audiofile, diarize_pipeline, output_path)
    # diarized_groups = process.save_diarized_audio_files(
    #     diarized, audiofile, output_path
    # )
    # process.transcribe(
    #     model=whisper_model, diarized_groups=diarized_groups, output_path=output_path
    # )
    return {
        "speaker_timings": "s3-address-to-speakers",
        "transcription_text": "s3-address-to-transcription",
        "transcription_page": "web-address-to-deployment",
        "audiofile_path": str(audiofile)
    }
 # speakers = {