diff --git a/prophet/app.py b/prophet/app.py index 78c12b1..b7839b8 100644 --- a/prophet/app.py +++ b/prophet/app.py @@ -1,29 +1,68 @@ +import hashlib import json +import os +import pickle +import re +from dataclasses import dataclass, field from datetime import datetime +from pathlib import Path +from uuid import uuid4 import feedparser from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware -from fastapi.staticfiles import StaticFiles +from fastapi.responses import HTMLResponse from fastapi_utils.tasks import repeat_every - -from prophet import view -from prophet.domain.improvement import Improvement -from prophet.domain.improvement_repo import IImprovementRepo -from prophet.domain.original import Original -from prophet.infra.improvement_pickle_repo import ImprovementPickleRepo -from prophet.llm import LLMClient +from groq import Groq BEE_FEED = "https://babylonbee.com/feed" BEE_FEED_TEST = "test/resources/feed_short.atom" # NOTE: Switch out when done testing +PICKLE_DIR = "/tmp/pollenprophet" + REFRESH_PERIOD = 3600 # between fetching articles, in seconds -llm: LLMClient = LLMClient() -repo: IImprovementRepo = ImprovementPickleRepo() + +@dataclass +class Original: # BadJoke: Sting + title: str + summary: str + link: str + date: datetime + image_link: str | None = None + id: str = field(init=False) + + def _extract_img(self, s: str) -> tuple[str, str]: # [img_link, rest of string] + img: str + m = re.match(r'", "", s) + return (img, rest) + + def __post_init__(self): + self.id = hashlib.sha256(self.link.encode()).hexdigest() + + extracted = self._extract_img(self.summary) + if extracted[0]: + self.image_link = extracted[0] + self.summary = extracted[1] + + +@dataclass +class Improvement: # GoodJoke: Queen + original: Original + title: str + summary: str + id: str = str(uuid4()) def grab_latest_originals() -> list[Original]: + # TODO: Implement skipping any we already have feed: feedparser.FeedParserDict = feedparser.parse(BEE_FEED) # noqa: F841 results: list[Original] = [] for entry in feed.entries: @@ -37,11 +76,39 @@ def grab_latest_originals() -> list[Original]: return results +def save_new_improvements(improvements: list[Improvement]) -> None: + save_dir = Path(PICKLE_DIR) + save_dir.mkdir(parents=True, exist_ok=True) + for imp in improvements: + fname = save_dir / f"{int(imp.original.date.timestamp())}_{imp.id}" + try: + with open(fname, "wb") as f: + pickle.dump(imp, f) + print(f"Saved {fname}") + except Exception as e: + print(f"Error saving file {fname}: {e}") + + +def load_existing_improvements() -> list[Improvement]: + improvements: list[Improvement] = [] + for fname in Path(PICKLE_DIR).iterdir(): + if not fname.is_file(): + continue + + try: + with open(fname, "rb") as f: + obj: Improvement = pickle.load(f) + improvements.append(obj) + except FileNotFoundError as e: + print(f"Error loading file {fname}: {e}") + return improvements + + def keep_only_new_originals( additional: list[Original], existing: list[Original] | None = None ): if not existing: - existing = [e.original for e in repo.get_all()] + existing = [e.original for e in load_existing_improvements()] existing_hashes = set([e.id for e in existing]) @@ -56,8 +123,8 @@ def keep_only_new_originals( def improve_originals(originals: list[Original]) -> list[Improvement]: improvements: list[Improvement] = [] for orig in originals: - new_title = llm.rewrite_title(orig.title) - new_summary = llm.rewrite_summary(orig, new_title) + new_title = rewrite_title_with_groq(orig.title) + new_summary = rewrite_summary_with_groq(orig, new_title) improvements.append( Improvement(original=orig, title=new_title, summary=new_summary) @@ -65,32 +132,84 @@ def improve_originals(originals: list[Original]) -> list[Improvement]: return improvements -def init() -> FastAPI: - app = FastAPI() - app.mount("/static", StaticFiles(directory="static"), name="static") +def rewrite_title_with_groq(original_content: str) -> str: + client = Groq(api_key=os.getenv("GROQ_API_KEY", "NO_API_KEY_FOUND")) - origins = [ - "http://localhost", - "http://localhost:8080", - ] - - app.add_middleware( - CORSMiddleware, - allow_origins=origins, - allow_credentials=True, - allow_methods=["*"], - allow_headers=["*"], + suggestions = client.chat.completions.create( + messages=[ + { + "role": "system", + "content": "You are a comedy writer at a satirical newspaper. Improve on the following satirical headline. Your new headline is funny, can involve current political events and has an edge to it. Print only the suggestions, with one suggestion on each line.", + }, + { + "role": "user", + "content": original_content, + }, + ], + model="llama-3.3-70b-versatile", ) - view.define_routes(app) - return app + suggestions_str = suggestions.choices[0].message.content + if not suggestions_str: + raise ValueError + print("Suggestions: ", suggestions_str) + winner = client.chat.completions.create( + messages=[ + { + "role": "system", + "content": "You are an editor at a satirical newspaper. Improve on the following satirical headline. For a given headline, you diligently evaluate: (1) Whether the headline is funny; (2) Whether the headline follows a clear satirical goal; (3) Whether the headline has sufficient substance and bite. Based on the outcomes of your review, you pick your favorite headline from the given suggestions and you make targeted revisions to it. Your output consists solely of the revised headline.", + }, + { + "role": "user", + "content": suggestions_str, + }, + ], + model="llama-3.3-70b-versatile", + ) + print("Winner: ", winner.choices[0].message.content) + winner_str = winner.choices[0].message.content + if not winner_str: + raise ValueError + return winner_str.strip(" \"'") -app = init() +def rewrite_summary_with_groq(orig: Original, improved_title: str) -> str: + client = Groq(api_key=os.getenv("GROQ_API_KEY", "NO_API_KEY_FOUND")) + + summary = client.chat.completions.create( + messages=[ + { + "role": "user", + "content": f"Below there is an original title and an original summary. Then follows an improved title. Write an improved summary based on the original summary which fits to the improved title. Only output the improved summary.\n\nTitle:{orig.title}\nSummary:{orig.summary}\n---\nTitle:{improved_title}\nSummary:", + } + ], + model="llama-3.3-70b-versatile", + ) + summary_str = summary.choices[0].message.content + if not summary_str: + raise ValueError + print("Improved summary", summary_str) + return summary_str.strip(" \"'") + + +app = FastAPI() + +origins = [ + "http://localhost", + "http://localhost:8080", +] + +app.add_middleware( + CORSMiddleware, + allow_origins=origins, + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) @app.get("/improve-title") def improve_headline(content: str): - return llm.rewrite_title(content) + return rewrite_title_with_groq(content) @app.get("/improve-summary") @@ -98,26 +217,99 @@ def improve_summary(original_title: str, new_title: str, original_summary: str): o = Original( title=original_title, summary=original_summary, link="", date=datetime.now() ) - return llm.rewrite_summary(o, new_title) + return rewrite_summary_with_groq(o, new_title) -# TODO: Switch to lifecycle events to avoid deprecated method @app.on_event("startup") @repeat_every(seconds=REFRESH_PERIOD) -async def refresh_articles(): - _ = await fetch_update() +def refresh_articles(): + adding = keep_only_new_originals(grab_latest_originals()) + improved = improve_originals(adding) + save_new_improvements(improved) + print(f"Updated articles. Added {len(improved)} new ones.") @app.get("/update") -async def fetch_update(debug_print: bool = True): - adding = keep_only_new_originals(grab_latest_originals()) - improved = improve_originals(adding) - repo.add_all(improved) - if debug_print: - print(f"Updated articles. Added {len(improved)} new ones.") +async def fetch_update(): + await refresh_articles() return json.dumps(improved) +@app.get("/improvements", response_class=HTMLResponse) +def list_improvements(): + improved = load_existing_improvements() + return ( + """ """ + + "\n".join( + f""" +
+
+ +
+
{item.title}
+
{item.summary}
+
""" + for item in sorted(improved, key=lambda i: i.original.date, reverse=True) + ) + ) + + +@app.get("/originals", response_class=HTMLResponse) +def list_originals(): + improved = load_existing_improvements() + return ( + """ """ + + "\n".join( + f""" +
+
+ +
+
{item.original.title}
+
{item.original.summary}
+
""" + for item in sorted(improved, key=lambda i: i.original.date, reverse=True) + ) + ) + + +style = """ +.card { + border: 1px solid #ccc; + padding: 10px; + margin: auto; + margin-bottom: 40px; + width: 600px; +} + +.card-title { + font-size: 24px; + margin-bottom: 5px; +} +""" + + +@app.get("/", response_class=HTMLResponse) +def root_route(): + return f""" + + + + The Pollen Prophet + + + + +

The Pollen Prophet

+

Making funny since 2025 what ought not bee.

+
+ + + """ + + def start() -> None: from uvicorn import run @@ -132,7 +324,7 @@ if __name__ == "__main__": # save_new_improvements(improved) # migrate to newer version - improved = repo.get_all() + improved = load_existing_improvements() for imp in improved: imp.original.__post_init__() print(f"Old Title: {imp.original.title}") @@ -143,4 +335,4 @@ if __name__ == "__main__": print(f"Summary: {imp.summary}") print("-" * 50) - repo.add_all(improved) + save_new_improvements(improved) diff --git a/prophet/config.py b/prophet/config.py deleted file mode 100644 index 55a8702..0000000 --- a/prophet/config.py +++ /dev/null @@ -1,22 +0,0 @@ -import os - -# Load environment variables from .env -from dataclasses import dataclass - -from dotenv import load_dotenv - -_ = load_dotenv() - - -@dataclass -class AiConfig: - API_KEY: str - - @classmethod - def from_env(cls) -> "AiConfig": - API_KEY = os.getenv("GROQ_API_KEY", "") - - if not API_KEY: - raise ValueError(f"{API_KEY} cannot be empty") - - return cls(**{"API_KEY": API_KEY}) diff --git a/prophet/domain/improvement.py b/prophet/domain/improvement.py deleted file mode 100644 index 50c7618..0000000 --- a/prophet/domain/improvement.py +++ /dev/null @@ -1,12 +0,0 @@ -from dataclasses import dataclass -from uuid import uuid4 - -from prophet.domain.original import Original - - -@dataclass -class Improvement: # GoodJoke: Queen - original: Original - title: str - summary: str - id: str = str(uuid4()) diff --git a/prophet/domain/improvement_repo.py b/prophet/domain/improvement_repo.py deleted file mode 100644 index 7081e34..0000000 --- a/prophet/domain/improvement_repo.py +++ /dev/null @@ -1,21 +0,0 @@ -from typing import Protocol - -from prophet.domain.improvement import Improvement - - -class ImprovementNotFoundError(Exception): - pass - - -class IImprovementRepo(Protocol): - def add(self, improvement: Improvement) -> None: - raise NotImplementedError - - def add_all(self, improvements: list[Improvement]) -> None: - raise NotImplementedError - - def get(self, id: str) -> Improvement: - raise NotImplementedError - - def get_all(self) -> list[Improvement]: - raise NotImplementedError diff --git a/prophet/domain/original.py b/prophet/domain/original.py deleted file mode 100644 index 87dda00..0000000 --- a/prophet/domain/original.py +++ /dev/null @@ -1,34 +0,0 @@ -import hashlib -import re -from dataclasses import dataclass, field -from datetime import datetime - - -@dataclass -class Original: # BadJoke: Sting - title: str - summary: str - link: str - date: datetime - image_link: str | None = None - id: str = field(init=False) - - def _extract_img(self, s: str) -> tuple[str, str]: # [img_link, rest of string] - img: str - m = re.match(r'", "", s) - return (img, rest) - - def __post_init__(self): - self.id = hashlib.sha256(self.link.encode()).hexdigest() - - extracted = self._extract_img(self.summary) - if extracted[0]: - self.image_link = extracted[0] - self.summary = extracted[1] diff --git a/prophet/infra/improvement_pickle_repo.py b/prophet/infra/improvement_pickle_repo.py deleted file mode 100644 index 7c59ec8..0000000 --- a/prophet/infra/improvement_pickle_repo.py +++ /dev/null @@ -1,49 +0,0 @@ -import pickle -from pathlib import Path -from typing import override - -from prophet.domain.improvement import Improvement -from prophet.domain.improvement_repo import IImprovementRepo, ImprovementNotFoundError - - -class ImprovementPickleRepo(IImprovementRepo): - pickle_dir: Path - - def __init__(self, pickle_dir: str | Path = "/tmp/pollenprophet") -> None: - self.pickle_dir = Path(pickle_dir) - self.pickle_dir.mkdir(parents=True, exist_ok=True) - - @override - def add(self, improvement: Improvement) -> None: - fname = self.pickle_dir / improvement.id - try: - with open(fname, "wb") as f: - pickle.dump(improvement, f) - print(f"Saved {fname}") - except FileExistsError: - print(f"Error saving file {fname}") - - @override - def add_all(self, improvements: list[Improvement]) -> None: - for imp in improvements: - self.add(imp) - - @override - def get(self, id: str) -> Improvement: - try: - with open(self.pickle_dir / id, "rb") as f: - improvement: Improvement = pickle.load(f) - except FileNotFoundError: - raise ImprovementNotFoundError - - return improvement - - @override - def get_all(self) -> list[Improvement]: - improvements: list[Improvement] = [] - for fname in Path(self.pickle_dir).iterdir(): - try: - improvements.append(self.get(fname.name)) - except ImprovementNotFoundError: - print(f"File {fname.absolute()} is not a valid Improvement.") - return improvements diff --git a/prophet/llm.py b/prophet/llm.py deleted file mode 100644 index 473dcca..0000000 --- a/prophet/llm.py +++ /dev/null @@ -1,68 +0,0 @@ -from groq import Groq - -from prophet.config import AiConfig -from prophet.domain.original import Original - - -class LLMClient: - config_ai: AiConfig - client: Groq - - def __init__( - self, config_ai: AiConfig | None = None, client: Groq | None = None - ) -> None: - self.config_ai = config_ai if config_ai else AiConfig.from_env() - self.client = client if client else Groq(api_key=self.config_ai.API_KEY) - - def rewrite_title(self, original_content: str) -> str: - suggestions = self.client.chat.completions.create( - messages=[ - { - "role": "system", - "content": "You are a comedy writer at a satirical newspaper. Improve on the following satirical headline. Your new headline is funny, can involve current political events and has an edge to it. Print only the suggestions, with one suggestion on each line.", - }, - { - "role": "user", - "content": original_content, - }, - ], - model="llama-3.3-70b-versatile", - ) - suggestions_str = suggestions.choices[0].message.content - if not suggestions_str: - raise ValueError - print("Suggestions: ", suggestions_str) - winner = self.client.chat.completions.create( - messages=[ - { - "role": "system", - "content": "You are an editor at a satirical newspaper. Improve on the following satirical headline. For a given headline, you diligently evaluate: (1) Whether the headline is funny; (2) Whether the headline follows a clear satirical goal; (3) Whether the headline has sufficient substance and bite. Based on the outcomes of your review, you pick your favorite headline from the given suggestions and you make targeted revisions to it. Your output consists solely of the revised headline.", - }, - { - "role": "user", - "content": suggestions_str, - }, - ], - model="llama-3.3-70b-versatile", - ) - print("Winner: ", winner.choices[0].message.content) - winner_str = winner.choices[0].message.content - if not winner_str: - raise ValueError - return winner_str.strip(" \"'") - - def rewrite_summary(self, orig: Original, improved_title: str) -> str: - summary = self.client.chat.completions.create( - messages=[ - { - "role": "user", - "content": f"Below there is an original title and an original summary. Then follows an improved title. Write an improved summary based on the original summary which fits to the improved title. Only output the improved summary.\n\nTitle:{orig.title}\nSummary:{orig.summary}\n---\nTitle:{improved_title}\nSummary:", - } - ], - model="llama-3.3-70b-versatile", - ) - summary_str = summary.choices[0].message.content - if not summary_str: - raise ValueError - print("Improved summary", summary_str) - return summary_str.strip(" \"'") diff --git a/prophet/view.py b/prophet/view.py deleted file mode 100644 index fbfcf59..0000000 --- a/prophet/view.py +++ /dev/null @@ -1,69 +0,0 @@ -# pyright: reportUnusedFunction=false - -from fastapi import FastAPI -from fastapi.responses import HTMLResponse - -from prophet.domain.improvement_repo import IImprovementRepo -from prophet.infra.improvement_pickle_repo import ImprovementPickleRepo - -repo: IImprovementRepo = ImprovementPickleRepo() - - -def define_routes(app: FastAPI): - @app.get("/improvements", response_class=HTMLResponse) - def list_improvements(): - improved = repo.get_all() - return ( - """ """ - + "\n".join( - f""" -
-
- -
-
{item.title}
-
{item.summary}
-
""" - for item in sorted( - improved, key=lambda i: i.original.date, reverse=True - ) - ) - ) - - @app.get("/originals", response_class=HTMLResponse) - def list_originals(): - improved = repo.get_all() - return ( - """ """ - + "\n".join( - f""" -
-
- -
-
{item.original.title}
-
{item.original.summary}
-
""" - for item in sorted( - improved, key=lambda i: i.original.date, reverse=True - ) - ) - ) - - @app.get("/", response_class=HTMLResponse) - def root_route(): - return """ - - - - The Pollen Prophet - - - -

The Pollen Prophet

-

Making funny since 2025 what ought not bee.

-
- - - """ diff --git a/static/me.png b/static/me.png deleted file mode 100644 index 76a7056..0000000 Binary files a/static/me.png and /dev/null differ diff --git a/static/style.css b/static/style.css deleted file mode 100644 index c731e2a..0000000 --- a/static/style.css +++ /dev/null @@ -1,12 +0,0 @@ -.card { - border: 1px solid #ccc; - padding: 10px; - margin: auto; - margin-bottom: 40px; - width: 600px; -} - -.card-title { - font-size: 24px; - margin-bottom: 5px; -}