Add filter function to avoid adding existing articles

This commit is contained in:
Marty Oehme 2025-06-05 19:05:17 +02:00
parent 32f7298517
commit fed9f0a3d8
Signed by: Marty
GPG key ID: 4E535BC19C61886E

View file

@ -4,7 +4,6 @@ import pickle
from dataclasses import dataclass, field from dataclasses import dataclass, field
from datetime import datetime from datetime import datetime
from pathlib import Path from pathlib import Path
from pprint import pprint
from uuid import uuid4 from uuid import uuid4
import feedparser import feedparser
@ -81,6 +80,22 @@ def load_existing_improvements() -> list[Improvement]:
return improvements return improvements
def keep_only_new_originals(
additional: list[Original], existing: list[Original] | None = None
):
if not existing:
existing = [e.original for e in load_existing_improvements()]
existing_hashes = set([e.id for e in existing])
remaining: list[Original] = []
for new in additional:
if new.id not in existing_hashes:
remaining.append(new)
return remaining
def improve_originals(originals: list[Original]) -> list[Improvement]: def improve_originals(originals: list[Original]) -> list[Improvement]:
improvements: list[Improvement] = [] improvements: list[Improvement] = []
for orig in originals: for orig in originals:
@ -179,8 +194,8 @@ def start() -> None:
if __name__ == "__main__": if __name__ == "__main__":
# start() # start()
orig = grab_latest_originals() adding = keep_only_new_originals(grab_latest_originals())
improved = improve_originals(orig) improved = improve_originals(adding)
save_new_improvements(improved) save_new_improvements(improved)
improved = load_existing_improvements() improved = load_existing_improvements()