feat: Remove old articles from database
Some checks failed
Create and publish a Docker image / build-and-push-image (push) Has been cancelled

This commit is contained in:
Marty Oehme 2025-06-19 17:57:52 +02:00
parent 90a419eebe
commit 434773025b
Signed by: Marty
GPG key ID: 4E535BC19C61886E
3 changed files with 50 additions and 4 deletions

View file

@ -19,6 +19,7 @@ BEE_FEED = "https://babylonbee.com/feed"
BEE_FEED_TEST = "test/resources/feed_short.atom" # NOTE: Switch out when done testing BEE_FEED_TEST = "test/resources/feed_short.atom" # NOTE: Switch out when done testing
REFRESH_PERIOD = 3600 # between fetching articles, in seconds REFRESH_PERIOD = 3600 # between fetching articles, in seconds
NUM_ARTICLES_TO_KEEP = 50
llm: GroqClient = GroqClient() llm: GroqClient = GroqClient()
repo: IImprovementRepo = ImprovementSupaRepo() repo: IImprovementRepo = ImprovementSupaRepo()
@ -107,6 +108,18 @@ def improve_summary(original_title: str, new_title: str, original_summary: str):
@repeat_every(seconds=REFRESH_PERIOD) @repeat_every(seconds=REFRESH_PERIOD)
async def refresh_articles(): async def refresh_articles():
_ = await fetch_update() _ = await fetch_update()
truncate_to(NUM_ARTICLES_TO_KEEP)
def truncate_to(max_num: int = 50):
all = repo.get_all()
if len(all) > max_num:
to_delete = all[max_num:]
to_delete_ids = [a.id for a in to_delete]
try:
_ = repo.remove_all(to_delete_ids)
except ValueError:
print(f"Error deleting articles with IDs: {id}")
@app.get("/update") @app.get("/update")
@ -128,11 +141,12 @@ def start() -> None:
if __name__ == "__main__": if __name__ == "__main__":
# start() # start()
## ADD MANUALLY
# adding = keep_only_new_originals(grab_latest_originals()) # adding = keep_only_new_originals(grab_latest_originals())
# improved = improve_originals(adding) # improved = improve_originals(adding)
# save_new_improvements(improved) # save_new_improvements(improved)
# migrate to newer version ## SHOW ALL
improved = repo.get_all() improved = repo.get_all()
for imp in improved: for imp in improved:
imp.original.__post_init__() imp.original.__post_init__()
@ -144,4 +158,7 @@ if __name__ == "__main__":
print(f"Summary: {imp.summary}") print(f"Summary: {imp.summary}")
print("-" * 50) print("-" * 50)
repo.add_all(improved) # repo.add_all(improved)
## DELETE TOO_MANY
# truncate_to(48)

View file

@ -19,3 +19,11 @@ class IImprovementRepo(Protocol):
def get_all(self) -> list[Improvement]: def get_all(self) -> list[Improvement]:
raise NotImplementedError raise NotImplementedError
def remove(self, id: str) -> Improvement:
"""Returns single deleted improvement"""
raise NotImplementedError
def remove_all(self, ids: list[str]) -> list[Improvement]:
"""Returns list of deleted improvements"""
raise NotImplementedError

View file

@ -1,7 +1,6 @@
from datetime import timezone from datetime import datetime, timezone
from typing import override from typing import override
from datetime import datetime
from supabase import Client from supabase import Client
from prophet.config import SupaConfig from prophet.config import SupaConfig
@ -59,6 +58,28 @@ class ImprovementSupaRepo(IImprovementRepo):
.data .data
] ]
@override
def remove(self, id: str) -> Improvement:
resp = (
self.client.table(self.config.TABLE).delete().eq("uuid", id).execute().data
)
if not resp:
raise ValueError
return self._from_tbl_row(resp[0])
@override
def remove_all(self, ids: list[str]) -> list[Improvement]:
resp = (
self.client.table(self.config.TABLE)
.delete()
.in_("uuid", ids)
.execute()
.data
)
if not resp:
raise ValueError
return [self._from_tbl_row(item) for item in resp]
def _to_tbl_row(self, imp: Improvement) -> dict[str, str | int]: def _to_tbl_row(self, imp: Improvement) -> dict[str, str | int]:
return { return {
"uuid": imp.id, "uuid": imp.id,