import json import pickle from datetime import datetime from pathlib import Path import feedparser from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import HTMLResponse from fastapi_utils.tasks import repeat_every from prophet.data import Improvement, Original from prophet.llm import LLMClient BEE_FEED = "https://babylonbee.com/feed" BEE_FEED_TEST = "test/resources/feed_short.atom" # NOTE: Switch out when done testing PICKLE_DIR = "/tmp/pollenprophet" REFRESH_PERIOD = 3600 # between fetching articles, in seconds llm: LLMClient = LLMClient() def grab_latest_originals() -> list[Original]: # TODO: Implement skipping any we already have feed: feedparser.FeedParserDict = feedparser.parse(BEE_FEED) # noqa: F841 results: list[Original] = [] for entry in feed.entries: o = Original( title=entry.title, summary=entry.summary, link=entry.link, date=datetime.strptime(entry.published, "%a, %d %b %Y %H:%M:%S %z"), ) results.append(o) return results def save_new_improvements(improvements: list[Improvement]) -> None: save_dir = Path(PICKLE_DIR) save_dir.mkdir(parents=True, exist_ok=True) for imp in improvements: fname = save_dir / f"{int(imp.original.date.timestamp())}_{imp.id}" try: with open(fname, "wb") as f: pickle.dump(imp, f) print(f"Saved {fname}") except Exception as e: print(f"Error saving file {fname}: {e}") def load_existing_improvements() -> list[Improvement]: improvements: list[Improvement] = [] for fname in Path(PICKLE_DIR).iterdir(): if not fname.is_file(): continue try: with open(fname, "rb") as f: obj: Improvement = pickle.load(f) improvements.append(obj) except FileNotFoundError as e: print(f"Error loading file {fname}: {e}") return improvements def keep_only_new_originals( additional: list[Original], existing: list[Original] | None = None ): if not existing: existing = [e.original for e in load_existing_improvements()] existing_hashes = set([e.id for e in existing]) remaining: list[Original] = [] for new in additional: if new.id not in existing_hashes: remaining.append(new) return remaining def improve_originals(originals: list[Original]) -> list[Improvement]: improvements: list[Improvement] = [] for orig in originals: new_title = llm.rewrite_title_with_groq(orig.title) new_summary = llm.rewrite_summary_with_groq(orig, new_title) improvements.append( Improvement(original=orig, title=new_title, summary=new_summary) ) return improvements app = FastAPI() origins = [ "http://localhost", "http://localhost:8080", ] app.add_middleware( CORSMiddleware, allow_origins=origins, allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) @app.get("/improve-title") def improve_headline(content: str): return llm.rewrite_title_with_groq(content) @app.get("/improve-summary") def improve_summary(original_title: str, new_title: str, original_summary: str): o = Original( title=original_title, summary=original_summary, link="", date=datetime.now() ) return llm.rewrite_summary_with_groq(o, new_title) @app.on_event("startup") @repeat_every(seconds=REFRESH_PERIOD) def refresh_articles(): adding = keep_only_new_originals(grab_latest_originals()) improved = improve_originals(adding) save_new_improvements(improved) print(f"Updated articles. Added {len(improved)} new ones.") @app.get("/update") async def fetch_update(): await refresh_articles() return json.dumps(improved) @app.get("/improvements", response_class=HTMLResponse) def list_improvements(): improved = load_existing_improvements() return ( """ """ + "\n".join( f"""