Add filter function to avoid adding existing articles

This commit is contained in:
Marty Oehme 2025-06-05 19:05:17 +02:00
parent 32f7298517
commit fed9f0a3d8
Signed by: Marty
GPG key ID: 4E535BC19C61886E

View file

@ -4,7 +4,6 @@ import pickle
from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
from pprint import pprint
from uuid import uuid4
import feedparser
@ -81,6 +80,22 @@ def load_existing_improvements() -> list[Improvement]:
return improvements
def keep_only_new_originals(
additional: list[Original], existing: list[Original] | None = None
):
if not existing:
existing = [e.original for e in load_existing_improvements()]
existing_hashes = set([e.id for e in existing])
remaining: list[Original] = []
for new in additional:
if new.id not in existing_hashes:
remaining.append(new)
return remaining
def improve_originals(originals: list[Original]) -> list[Improvement]:
improvements: list[Improvement] = []
for orig in originals:
@ -179,8 +194,8 @@ def start() -> None:
if __name__ == "__main__":
# start()
orig = grab_latest_originals()
improved = improve_originals(orig)
adding = keep_only_new_originals(grab_latest_originals())
improved = improve_originals(adding)
save_new_improvements(improved)
improved = load_existing_improvements()