import subprocess import tempfile from pathlib import Path from typing import cast import click import pytgpt.phind as phind def summarize_text_file(content: str, prompt: str | None) -> str: prompt = prompt or "Please summarize the following transcript:" bot = phind.PHIND() return bot.chat(f"{prompt} {content}") def extract_transcript_contents(content: str, keep_newlines: bool = False) -> str: jq_command = "jq '.events.[].segs | select(. != null).[].utf8'" result = subprocess.run( jq_command, shell=True, capture_output=True, text=True, input=content ).stdout # Replace newlines with spaces result = result.replace("\n", "").split() # Join lines back together with newlines processed = ( " ".join(result).replace('"', "").replace("\\n", "\n" if keep_newlines else " ") ) return processed def grab_subtitles(url: str | Path) -> Path: import yt_dlp temp_dir = get_temp_dir() try: ydl_opts = { "outtmpl": f"{temp_dir}/subs", "writeautomaticsub": True, "subtitlesformat": "json3", "skip_download": True, } with yt_dlp.YoutubeDL(ydl_opts) as ydl: info = ydl.extract_info(url, download=True) filename = ydl.prepare_filename(info) print(f"Subtitle file saved as: {filename}") for root, _, files in Path(temp_dir).walk(): for file in files: if file.endswith(".json3"): return Path(root).joinpath(file) raise ValueError("No correct json3 transcript object found.") except ValueError as e: print(e) def get_temp_dir() -> str: # Create a temporary directory temp_dir = tempfile.mkdtemp() print(f"Creating temp dir {temp_dir}") return temp_dir def rm_dir(dir: Path | str) -> None: # Remove the temporary directory import shutil if Path(dir).is_dir(): shutil.rmtree(dir) @click.command() # TODO: Can I set it so it checks existence *only* when no youtube flag exists? @click.argument("file_path", type=click.Path(exists=False)) @click.option( "--json-transcript/--no-json-transcript", "-j", default=False, help="Use downloaded json3 transcript.", ) @click.option( "--youtube/--no-youtube", "-t", default=False, help="Get (english) transcript from youtube link.", ) @click.option( "--prompt", "-p", default="Please provide a detailed but concise summary for the following transcript:", type=str, help="Use custom prompt.", ) def cli( file_path: Path | str, json_transcript: bool, youtube: bool, prompt: str ): """Provide summary for a file at the specified path or a youtube video at the specified url.""" content = "" # youtube link, dl transcript if youtube: file_path = grab_subtitles(file_path) file_path = cast(Path, file_path) print(f"DEBUG: file path = {file_path}") # load local file with Path(file_path).open() as f: content = f.read() if json_transcript or youtube: content = extract_transcript_contents(content) if not content: print("Please provide a file with valid content.") print(summarize_text_file(content, prompt)) if __name__ == "__main__": cli()