Compare commits

..

No commits in common. "c8d218a16fa4cdc8640967657de9bbc3ec46c0f0" and "3668cc3cbd667a6204157497e58d0414fa0d8431" have entirely different histories.

10 changed files with 235 additions and 330 deletions

View file

@ -1,29 +1,68 @@
import hashlib
import json
import os
import pickle
import re
from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
from uuid import uuid4
import feedparser
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from fastapi.responses import HTMLResponse
from fastapi_utils.tasks import repeat_every
from prophet import view
from prophet.domain.improvement import Improvement
from prophet.domain.improvement_repo import IImprovementRepo
from prophet.domain.original import Original
from prophet.infra.improvement_pickle_repo import ImprovementPickleRepo
from prophet.llm import LLMClient
from groq import Groq
BEE_FEED = "https://babylonbee.com/feed"
BEE_FEED_TEST = "test/resources/feed_short.atom" # NOTE: Switch out when done testing
PICKLE_DIR = "/tmp/pollenprophet"
REFRESH_PERIOD = 3600 # between fetching articles, in seconds
llm: LLMClient = LLMClient()
repo: IImprovementRepo = ImprovementPickleRepo()
@dataclass
class Original: # BadJoke: Sting
title: str
summary: str
link: str
date: datetime
image_link: str | None = None
id: str = field(init=False)
def _extract_img(self, s: str) -> tuple[str, str]: # [img_link, rest of string]
img: str
m = re.match(r'<img src="(?P<img>.+?)"', s)
try:
img = m.group("img")
except (IndexError, NameError):
return ("", s)
if img:
rest = re.sub(r"<img src=.+?>", "", s)
return (img, rest)
def __post_init__(self):
self.id = hashlib.sha256(self.link.encode()).hexdigest()
extracted = self._extract_img(self.summary)
if extracted[0]:
self.image_link = extracted[0]
self.summary = extracted[1]
@dataclass
class Improvement: # GoodJoke: Queen
original: Original
title: str
summary: str
id: str = str(uuid4())
def grab_latest_originals() -> list[Original]:
# TODO: Implement skipping any we already have
feed: feedparser.FeedParserDict = feedparser.parse(BEE_FEED) # noqa: F841
results: list[Original] = []
for entry in feed.entries:
@ -37,11 +76,39 @@ def grab_latest_originals() -> list[Original]:
return results
def save_new_improvements(improvements: list[Improvement]) -> None:
save_dir = Path(PICKLE_DIR)
save_dir.mkdir(parents=True, exist_ok=True)
for imp in improvements:
fname = save_dir / f"{int(imp.original.date.timestamp())}_{imp.id}"
try:
with open(fname, "wb") as f:
pickle.dump(imp, f)
print(f"Saved {fname}")
except Exception as e:
print(f"Error saving file {fname}: {e}")
def load_existing_improvements() -> list[Improvement]:
improvements: list[Improvement] = []
for fname in Path(PICKLE_DIR).iterdir():
if not fname.is_file():
continue
try:
with open(fname, "rb") as f:
obj: Improvement = pickle.load(f)
improvements.append(obj)
except FileNotFoundError as e:
print(f"Error loading file {fname}: {e}")
return improvements
def keep_only_new_originals(
additional: list[Original], existing: list[Original] | None = None
):
if not existing:
existing = [e.original for e in repo.get_all()]
existing = [e.original for e in load_existing_improvements()]
existing_hashes = set([e.id for e in existing])
@ -56,8 +123,8 @@ def keep_only_new_originals(
def improve_originals(originals: list[Original]) -> list[Improvement]:
improvements: list[Improvement] = []
for orig in originals:
new_title = llm.rewrite_title(orig.title)
new_summary = llm.rewrite_summary(orig, new_title)
new_title = rewrite_title_with_groq(orig.title)
new_summary = rewrite_summary_with_groq(orig, new_title)
improvements.append(
Improvement(original=orig, title=new_title, summary=new_summary)
@ -65,32 +132,84 @@ def improve_originals(originals: list[Original]) -> list[Improvement]:
return improvements
def init() -> FastAPI:
app = FastAPI()
app.mount("/static", StaticFiles(directory="static"), name="static")
def rewrite_title_with_groq(original_content: str) -> str:
client = Groq(api_key=os.getenv("GROQ_API_KEY", "NO_API_KEY_FOUND"))
origins = [
"http://localhost",
"http://localhost:8080",
]
app.add_middleware(
CORSMiddleware,
allow_origins=origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
suggestions = client.chat.completions.create(
messages=[
{
"role": "system",
"content": "You are a comedy writer at a satirical newspaper. Improve on the following satirical headline. Your new headline is funny, can involve current political events and has an edge to it. Print only the suggestions, with one suggestion on each line.",
},
{
"role": "user",
"content": original_content,
},
],
model="llama-3.3-70b-versatile",
)
view.define_routes(app)
return app
suggestions_str = suggestions.choices[0].message.content
if not suggestions_str:
raise ValueError
print("Suggestions: ", suggestions_str)
winner = client.chat.completions.create(
messages=[
{
"role": "system",
"content": "You are an editor at a satirical newspaper. Improve on the following satirical headline. For a given headline, you diligently evaluate: (1) Whether the headline is funny; (2) Whether the headline follows a clear satirical goal; (3) Whether the headline has sufficient substance and bite. Based on the outcomes of your review, you pick your favorite headline from the given suggestions and you make targeted revisions to it. Your output consists solely of the revised headline.",
},
{
"role": "user",
"content": suggestions_str,
},
],
model="llama-3.3-70b-versatile",
)
print("Winner: ", winner.choices[0].message.content)
winner_str = winner.choices[0].message.content
if not winner_str:
raise ValueError
return winner_str.strip(" \"'")
app = init()
def rewrite_summary_with_groq(orig: Original, improved_title: str) -> str:
client = Groq(api_key=os.getenv("GROQ_API_KEY", "NO_API_KEY_FOUND"))
summary = client.chat.completions.create(
messages=[
{
"role": "user",
"content": f"Below there is an original title and an original summary. Then follows an improved title. Write an improved summary based on the original summary which fits to the improved title. Only output the improved summary.\n\nTitle:{orig.title}\nSummary:{orig.summary}\n---\nTitle:{improved_title}\nSummary:",
}
],
model="llama-3.3-70b-versatile",
)
summary_str = summary.choices[0].message.content
if not summary_str:
raise ValueError
print("Improved summary", summary_str)
return summary_str.strip(" \"'")
app = FastAPI()
origins = [
"http://localhost",
"http://localhost:8080",
]
app.add_middleware(
CORSMiddleware,
allow_origins=origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.get("/improve-title")
def improve_headline(content: str):
return llm.rewrite_title(content)
return rewrite_title_with_groq(content)
@app.get("/improve-summary")
@ -98,26 +217,99 @@ def improve_summary(original_title: str, new_title: str, original_summary: str):
o = Original(
title=original_title, summary=original_summary, link="", date=datetime.now()
)
return llm.rewrite_summary(o, new_title)
return rewrite_summary_with_groq(o, new_title)
# TODO: Switch to lifecycle events to avoid deprecated method
@app.on_event("startup")
@repeat_every(seconds=REFRESH_PERIOD)
async def refresh_articles():
_ = await fetch_update()
def refresh_articles():
adding = keep_only_new_originals(grab_latest_originals())
improved = improve_originals(adding)
save_new_improvements(improved)
print(f"Updated articles. Added {len(improved)} new ones.")
@app.get("/update")
async def fetch_update(debug_print: bool = True):
adding = keep_only_new_originals(grab_latest_originals())
improved = improve_originals(adding)
repo.add_all(improved)
if debug_print:
print(f"Updated articles. Added {len(improved)} new ones.")
async def fetch_update():
await refresh_articles()
return json.dumps(improved)
@app.get("/improvements", response_class=HTMLResponse)
def list_improvements():
improved = load_existing_improvements()
return (
"""<button hx-get="/originals" hx-target="#content">Originals</button> """
+ "\n".join(
f"""
<div class="card">
<div class="card-img">
<img src="{item.original.image_link if item.original.image_link else "https://placehold.co/300x200"}" width="600">
</div>
<div class="card-title">{item.title}</div>
<div class="card-summary">{item.summary}</div>
</div>"""
for item in sorted(improved, key=lambda i: i.original.date, reverse=True)
)
)
@app.get("/originals", response_class=HTMLResponse)
def list_originals():
improved = load_existing_improvements()
return (
"""<button hx-get="/improvements" hx-target="#content">Improvements</button> """
+ "\n".join(
f"""
<div class="card">
<div class="card-img">
<img src="{item.original.image_link if item.original.image_link else "https://placehold.co/300x200"}" width="600">
</div>
<div class="card-title">{item.original.title}</div>
<div class="card-summary">{item.original.summary}</div>
</div>"""
for item in sorted(improved, key=lambda i: i.original.date, reverse=True)
)
)
style = """
.card {
border: 1px solid #ccc;
padding: 10px;
margin: auto;
margin-bottom: 40px;
width: 600px;
}
.card-title {
font-size: 24px;
margin-bottom: 5px;
}
"""
@app.get("/", response_class=HTMLResponse)
def root_route():
return f"""
<!DOCTYPE html>
<html>
<head>
<title>The Pollen Prophet</title>
<script src="https://unpkg.com/htmx.org@1.6.1"></script>
<style>
{style}
</style>
</head>
<body>
<h1>The Pollen Prophet</h1>
<h2>Making funny since 2025 what ought not bee.</h2>
<div hx-get="/improvements" hx-target="#content" hx-trigger="load" id="content"></div>
</body>
</html>
"""
def start() -> None:
from uvicorn import run
@ -132,7 +324,7 @@ if __name__ == "__main__":
# save_new_improvements(improved)
# migrate to newer version
improved = repo.get_all()
improved = load_existing_improvements()
for imp in improved:
imp.original.__post_init__()
print(f"Old Title: {imp.original.title}")
@ -143,4 +335,4 @@ if __name__ == "__main__":
print(f"Summary: {imp.summary}")
print("-" * 50)
repo.add_all(improved)
save_new_improvements(improved)

View file

@ -1,22 +0,0 @@
import os
# Load environment variables from .env
from dataclasses import dataclass
from dotenv import load_dotenv
_ = load_dotenv()
@dataclass
class AiConfig:
API_KEY: str
@classmethod
def from_env(cls) -> "AiConfig":
API_KEY = os.getenv("GROQ_API_KEY", "")
if not API_KEY:
raise ValueError(f"{API_KEY} cannot be empty")
return cls(**{"API_KEY": API_KEY})

View file

@ -1,12 +0,0 @@
from dataclasses import dataclass
from uuid import uuid4
from prophet.domain.original import Original
@dataclass
class Improvement: # GoodJoke: Queen
original: Original
title: str
summary: str
id: str = str(uuid4())

View file

@ -1,21 +0,0 @@
from typing import Protocol
from prophet.domain.improvement import Improvement
class ImprovementNotFoundError(Exception):
pass
class IImprovementRepo(Protocol):
def add(self, improvement: Improvement) -> None:
raise NotImplementedError
def add_all(self, improvements: list[Improvement]) -> None:
raise NotImplementedError
def get(self, id: str) -> Improvement:
raise NotImplementedError
def get_all(self) -> list[Improvement]:
raise NotImplementedError

View file

@ -1,34 +0,0 @@
import hashlib
import re
from dataclasses import dataclass, field
from datetime import datetime
@dataclass
class Original: # BadJoke: Sting
title: str
summary: str
link: str
date: datetime
image_link: str | None = None
id: str = field(init=False)
def _extract_img(self, s: str) -> tuple[str, str]: # [img_link, rest of string]
img: str
m = re.match(r'<img src="(?P<img>.+?)"', s)
try:
img = m.group("img")
except (IndexError, NameError):
return ("", s)
if img:
rest = re.sub(r"<img src=.+?>", "", s)
return (img, rest)
def __post_init__(self):
self.id = hashlib.sha256(self.link.encode()).hexdigest()
extracted = self._extract_img(self.summary)
if extracted[0]:
self.image_link = extracted[0]
self.summary = extracted[1]

View file

@ -1,49 +0,0 @@
import pickle
from pathlib import Path
from typing import override
from prophet.domain.improvement import Improvement
from prophet.domain.improvement_repo import IImprovementRepo, ImprovementNotFoundError
class ImprovementPickleRepo(IImprovementRepo):
pickle_dir: Path
def __init__(self, pickle_dir: str | Path = "/tmp/pollenprophet") -> None:
self.pickle_dir = Path(pickle_dir)
self.pickle_dir.mkdir(parents=True, exist_ok=True)
@override
def add(self, improvement: Improvement) -> None:
fname = self.pickle_dir / improvement.id
try:
with open(fname, "wb") as f:
pickle.dump(improvement, f)
print(f"Saved {fname}")
except FileExistsError:
print(f"Error saving file {fname}")
@override
def add_all(self, improvements: list[Improvement]) -> None:
for imp in improvements:
self.add(imp)
@override
def get(self, id: str) -> Improvement:
try:
with open(self.pickle_dir / id, "rb") as f:
improvement: Improvement = pickle.load(f)
except FileNotFoundError:
raise ImprovementNotFoundError
return improvement
@override
def get_all(self) -> list[Improvement]:
improvements: list[Improvement] = []
for fname in Path(self.pickle_dir).iterdir():
try:
improvements.append(self.get(fname.name))
except ImprovementNotFoundError:
print(f"File {fname.absolute()} is not a valid Improvement.")
return improvements

View file

@ -1,68 +0,0 @@
from groq import Groq
from prophet.config import AiConfig
from prophet.domain.original import Original
class LLMClient:
config_ai: AiConfig
client: Groq
def __init__(
self, config_ai: AiConfig | None = None, client: Groq | None = None
) -> None:
self.config_ai = config_ai if config_ai else AiConfig.from_env()
self.client = client if client else Groq(api_key=self.config_ai.API_KEY)
def rewrite_title(self, original_content: str) -> str:
suggestions = self.client.chat.completions.create(
messages=[
{
"role": "system",
"content": "You are a comedy writer at a satirical newspaper. Improve on the following satirical headline. Your new headline is funny, can involve current political events and has an edge to it. Print only the suggestions, with one suggestion on each line.",
},
{
"role": "user",
"content": original_content,
},
],
model="llama-3.3-70b-versatile",
)
suggestions_str = suggestions.choices[0].message.content
if not suggestions_str:
raise ValueError
print("Suggestions: ", suggestions_str)
winner = self.client.chat.completions.create(
messages=[
{
"role": "system",
"content": "You are an editor at a satirical newspaper. Improve on the following satirical headline. For a given headline, you diligently evaluate: (1) Whether the headline is funny; (2) Whether the headline follows a clear satirical goal; (3) Whether the headline has sufficient substance and bite. Based on the outcomes of your review, you pick your favorite headline from the given suggestions and you make targeted revisions to it. Your output consists solely of the revised headline.",
},
{
"role": "user",
"content": suggestions_str,
},
],
model="llama-3.3-70b-versatile",
)
print("Winner: ", winner.choices[0].message.content)
winner_str = winner.choices[0].message.content
if not winner_str:
raise ValueError
return winner_str.strip(" \"'")
def rewrite_summary(self, orig: Original, improved_title: str) -> str:
summary = self.client.chat.completions.create(
messages=[
{
"role": "user",
"content": f"Below there is an original title and an original summary. Then follows an improved title. Write an improved summary based on the original summary which fits to the improved title. Only output the improved summary.\n\nTitle:{orig.title}\nSummary:{orig.summary}\n---\nTitle:{improved_title}\nSummary:",
}
],
model="llama-3.3-70b-versatile",
)
summary_str = summary.choices[0].message.content
if not summary_str:
raise ValueError
print("Improved summary", summary_str)
return summary_str.strip(" \"'")

View file

@ -1,69 +0,0 @@
# pyright: reportUnusedFunction=false
from fastapi import FastAPI
from fastapi.responses import HTMLResponse
from prophet.domain.improvement_repo import IImprovementRepo
from prophet.infra.improvement_pickle_repo import ImprovementPickleRepo
repo: IImprovementRepo = ImprovementPickleRepo()
def define_routes(app: FastAPI):
@app.get("/improvements", response_class=HTMLResponse)
def list_improvements():
improved = repo.get_all()
return (
"""<button hx-get="/originals" hx-target="#content">Originals</button> """
+ "\n".join(
f"""
<div class="card">
<div class="card-img">
<img src="{item.original.image_link if item.original.image_link else "https://placehold.co/300x200"}" width="600">
</div>
<div class="card-title">{item.title}</div>
<div class="card-summary">{item.summary}</div>
</div>"""
for item in sorted(
improved, key=lambda i: i.original.date, reverse=True
)
)
)
@app.get("/originals", response_class=HTMLResponse)
def list_originals():
improved = repo.get_all()
return (
"""<button hx-get="/improvements" hx-target="#content">Improvements</button> """
+ "\n".join(
f"""
<div class="card">
<div class="card-img">
<img src="{item.original.image_link if item.original.image_link else "https://placehold.co/300x200"}" width="600">
</div>
<div class="card-title">{item.original.title}</div>
<div class="card-summary">{item.original.summary}</div>
</div>"""
for item in sorted(
improved, key=lambda i: i.original.date, reverse=True
)
)
)
@app.get("/", response_class=HTMLResponse)
def root_route():
return """
<!DOCTYPE html>
<html>
<head>
<title>The Pollen Prophet</title>
<script src="https://unpkg.com/htmx.org@1.6.1"></script>
<link href="static/style.css" rel="stylesheet"
</head>
<body>
<h1>The Pollen Prophet</h1>
<h2>Making funny since 2025 what ought not bee.</h2>
<div hx-get="/improvements" hx-target="#content" hx-trigger="load" id="content"></div>
</body>
</html>
"""

Binary file not shown.

Before

Width:  |  Height:  |  Size: 288 KiB

View file

@ -1,12 +0,0 @@
.card {
border: 1px solid #ccc;
padding: 10px;
margin: auto;
margin-bottom: 40px;
width: 600px;
}
.card-title {
font-size: 24px;
margin-bottom: 5px;
}