From 66ad116802d1f61342ed8835ec9b5e561586f3c8 Mon Sep 17 00:00:00 2001
From: Marty Oehme <marty.oehme@gmail.com>
Date: Tue, 22 Aug 2023 10:32:07 +0200
Subject: [PATCH] REGRESSION: Restrict handler to basic file handling

---
 Dockerfile                   |  2 +-
 pyproject.toml               | 10 +++---
 verbanote/file_operations.py | 44 +++++++++++++++++++++++++
 verbanote/loaders.py         | 63 +++++++++++++-----------------------
 verbanote/rp_handler.py      | 43 +++++++++++++-----------
 5 files changed, 96 insertions(+), 66 deletions(-)
 create mode 100644 verbanote/file_operations.py

diff --git a/Dockerfile b/Dockerfile
index 51afc6b..85ab491 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -42,7 +42,7 @@ WORKDIR ${APP_PATH}
 RUN poetry install 
 
 # installing the large models
-RUN poetry run ltt install torch==1.13.1 torchvision==0.14.1 torchaudio==0.13.1
+# RUN poetry run ltt install --pytorch-computation-backend=cu118 torch torchvision torchaudio
 
 COPY ./${APP_NAME} ./${APP_NAME}
 
diff --git a/pyproject.toml b/pyproject.toml
index c5c36ba..7433ee2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,14 +8,14 @@ readme = "README.md"
 
 [tool.poetry.dependencies]
 python = "^3.11"
+requests = "^2.31.0"
 static-ffmpeg = "^2.5"
 runpod = "^1.1.3"
 pydub = "^0.25.1"
-light-the-torch = "^0.7.5"
-openai-whisper = { git = "https://github.com/openai/whisper.git" }
-hmmlearn = {git = "https://github.com/hmmlearn/hmmlearn.git"}
-pyannote-audio = {git = "https://github.com/pyannote/pyannote-audio.git", rev = "develop"}
-gdown = "^4.7.1"
+#light-the-torch = "^0.7.5"
+#openai-whisper = { git = "https://github.com/openai/whisper.git" }
+#hmmlearn = {git = "https://github.com/hmmlearn/hmmlearn.git"}
+#pyannote-audio = {git = "https://github.com/pyannote/pyannote-audio.git", rev = "develop"}
 
 [build-system]
 requires = ["poetry-core"]
diff --git a/verbanote/file_operations.py b/verbanote/file_operations.py
new file mode 100644
index 0000000..05fb27c
--- /dev/null
+++ b/verbanote/file_operations.py
@@ -0,0 +1,44 @@
+from pathlib import Path
+import requests
+import subprocess
+
+
+def download_from_url(url: str, input_path: Path) -> Path:
+    resp = requests.get(url)
+    if not resp.ok:
+        raise requests.exceptions.HTTPError()
+    # TODO think about implementing a naming scheme based on url path
+    fname = Path.joinpath(input_path, "inputfile")
+    with open(fname, mode="wb") as file:
+        file.write(resp.content)
+    return fname
+
+
+def upload_to_oxo(file: Path, url: str = "https://0x0.st", expires: int = 2) -> str:
+    resp = requests.post(
+        url=url, files={"file": open(file, "rb"), "expires": str(expires)}
+    )
+    if not resp.ok:
+        raise requests.exceptions.HTTPError()
+    return str(resp.content)
+
+
+def convert_to_wav(file: Path, output_path: Path) -> Path:
+    fn = Path.joinpath(output_path, "interview.wav")
+    subprocess.run(
+        [
+            "ffmpeg",
+            "-i",
+            file,
+            "-vn",
+            "-acodec",
+            "pcm_s16le",
+            "-ar",
+            "16000",
+            "-ac",
+            "1",
+            "-y",
+            fn,
+        ]
+    )
+    return fn
diff --git a/verbanote/loaders.py b/verbanote/loaders.py
index db8dd7d..93d3584 100644
--- a/verbanote/loaders.py
+++ b/verbanote/loaders.py
@@ -1,12 +1,10 @@
 import locale
 from pathlib import Path
-import subprocess
-from whisper import Whisper
-from pyannote.audio import Pipeline
-import torch
+# from whisper import Whisper
+# from pyannote.audio import Pipeline
+# import torch
 import static_ffmpeg
-import gdown
-
+import file_operations
 
 def prep() -> None:
     locale.getpreferredencoding = lambda: "UTF-8"
@@ -14,39 +12,22 @@ def prep() -> None:
     static_ffmpeg.add_paths()
 
 
-def audiofile(drive_url: str, path: Path) -> Path | None:
-    if not drive_url:
-        return None
-    gdown.download(drive_url, "infile")
-    fn = Path.joinpath(path, "interview.wav")
-    subprocess.run(
-        [
-            "ffmpeg",
-            "-i",
-            "{repr(video_path)}",
-            "-vn",
-            "-acodec",
-            "pcm_s16le",
-            "-ar",
-            "16000",
-            "-ac",
-            "1",
-            "-y",
-            fn,
-        ]
-    )
-    return fn
+def audiofile(url: str, input_path: Path) -> Path:
+    file = file_operations.download_from_url(url, input_path)
+    file_wav = file_operations.convert_to_wav(file, input_path)
+    file.unlink()
+    return file_wav
 
-
-def diarization(access_token: str | None) -> Pipeline:
-    pipeline = Pipeline.from_pretrained(
-        "pyannote/speaker-diarization", use_auth_token=access_token
-    )
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    return pipeline.to(device)
-
-
-def whisper() -> Whisper:
-    # LOAD MODEL INTO VRAM
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    return whisper.load_model("large", device=device)
+#
+# def diarization(access_token: str | None) -> Pipeline:
+#     pipeline = Pipeline.from_pretrained(
+#         "pyannote/speaker-diarization", use_auth_token=access_token
+#     )
+#     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+#     return pipeline.to(device)
+#
+#
+# def whisper() -> Whisper:
+#     # LOAD MODEL INTO VRAM
+#     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+#     return whisper.load_model("large", device=device)
diff --git a/verbanote/rp_handler.py b/verbanote/rp_handler.py
index ad42b2e..e85efd9 100644
--- a/verbanote/rp_handler.py
+++ b/verbanote/rp_handler.py
@@ -2,39 +2,44 @@ from pathlib import Path
 import runpod
 from runpod.serverless import os
 import loaders
-import process
+# import process
 
 
-output_path = os.environ.get("VERBANOTE_OUTPUT_PATH", "/transcriptions")
-output_path = Path(output_path)
-input_path = os.environ.get("VERBANOTE_INPUT_PATH", "/audiofiles")
-input_path = Path(input_path)
+output_path:Path = Path(os.environ.get("VERBANOTE_OUTPUT_PATH", "/in"))
+input_path:Path = Path(os.environ.get("VERBANOTE_INPUT_PATH", "/out"))
 
-access_token = os.environ.get("VERBANOTE_HF_TOKEN")
+access_token: str = os.environ.get("VERBANOTE_HF_TOKEN", "")
 
 loaders.prep()
-diarize_pipeline = loaders.diarization(access_token)
-whisper_model = loaders.whisper()
+# diarize_pipeline = loaders.diarization(access_token)
+# whisper_model = loaders.whisper()
 
 
 def handler(job):
-    input = job["input"]
-    audiofile = loaders.audiofile(input.get("file"), path=input_path)
-    if not audiofile:
-        return {"error": "missing audio file location"}
+    input:dict = job["input"]
+    url: str | None = input.get("url")
 
-    diarized = process.diarize(audiofile, diarize_pipeline, output_path)
-    diarized_groups = process.save_diarized_audio_files(
-        diarized, audiofile, output_path
-    )
-    process.transcribe(
-        model=whisper_model, diarized_groups=diarized_groups, output_path=output_path
-    )
+    if not url:
+        return {"error": "no file link provided"}
+
+    try:
+        audiofile = loaders.audiofile(url, input_path=input_path)
+    except Exception:
+        return {"error": "audiofile import failed"}
+
+    # diarized = process.diarize(audiofile, diarize_pipeline, output_path)
+    # diarized_groups = process.save_diarized_audio_files(
+    #     diarized, audiofile, output_path
+    # )
+    # process.transcribe(
+    #     model=whisper_model, diarized_groups=diarized_groups, output_path=output_path
+    # )
 
     return {
         "speaker_timings": "s3-address-to-speakers",
         "transcription_text": "s3-address-to-transcription",
         "transcription_page": "web-address-to-deployment",
+        "audiofile_path": str(audiofile)
     }
 
 # speakers = {