import subprocess import tempfile from pathlib import Path from typing import cast import click import pytgpt.phind as phind debug = False def summarize_text_file(content: str, prompt: str | None) -> str: prompt = prompt or "Please summarize the following transcript:" bot = phind.PHIND() res = bot.chat(f"{prompt} {content}") return res def extract_transcript_contents(content: str, keep_newlines: bool = False) -> str: jq_command = "jq '.events.[].segs | select(. != null).[].utf8'" result = subprocess.run( jq_command, shell=True, capture_output=True, text=True, input=content ).stdout # Replace newlines with spaces result = result.replace("\n", "").split() # Join lines back together with newlines processed = ( " ".join(result).replace('"', "").replace("\\n", "\n" if keep_newlines else " ") ) return processed def grab_subtitles(url: str | Path) -> Path: import yt_dlp temp_dir = get_temp_dir() ydl_opts = { "outtmpl": f"{temp_dir}/subs", "writeautomaticsub": True, "subtitlesformat": "json3", "skip_download": True, } with yt_dlp.YoutubeDL(ydl_opts) as ydl: info = ydl.extract_info(url, download=True) filename = ydl.prepare_filename(info) dprint(f"Subtitle file saved as: {filename}") for root, _, files in Path(temp_dir).walk(): for file in files: if file.endswith(".json3"): return Path(root).joinpath(file) raise ValueError("No correct json3 transcript object found.") INPUT_FORMATS = ["json3", "yt", "txt"] @click.command() # TODO: Can I set it so it checks existence *only* when no youtube flag exists? # TODO: Implement some way to allow content passed in on stdin instead of file (- as arg?) @click.argument("file_path", type=click.Path(exists=False)) @click.option( "--from", "-f", "get_from", type=click.Choice(INPUT_FORMATS), default="txt", help="Choose format to process between json transcript, yt link or txt file.", ) @click.option( "--prompt", "-p", default="Please summarize the following transcript:", type=str, help="Use custom prompt.", ) @click.option( "--log-level", "-l", default=0, help="Set log level to 1 for debug.", ) def cli( file_path: Path | str, prompt: str, log_level: int, get_from: str, ): """Provide summary for a file at the specified path or a youtube video at the specified url.""" if log_level: global debug debug = True content = "" # youtube link, dl transcript if get_from == "yt": file_path = grab_subtitles(file_path) file_path = cast(Path, file_path) dprint(f"file path = {file_path}") # load local file with Path(file_path).open() as f: content = f.read() if get_from == "json3" or get_from == "yt": content = extract_transcript_contents(content) dprint(f"content = {content}") if not content: print("Please provide a file with valid content.") print(summarize_text_file(content, prompt)) cached_dir: Path | None = None def get_temp_dir() -> Path: global cached_dir if cached_dir is None: cached_dir = Path(tempfile.mkdtemp()) dprint(f"Created and cached temp dir {cached_dir}") return cached_dir def dprint(content: str | None): if debug: print(f"[DEBUG] {content}") if __name__ == "__main__": cli()