Add filter function to avoid adding existing articles
This commit is contained in:
parent
32f7298517
commit
fed9f0a3d8
1 changed files with 18 additions and 3 deletions
|
|
@ -4,7 +4,6 @@ import pickle
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from pprint import pprint
|
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
|
|
||||||
import feedparser
|
import feedparser
|
||||||
|
|
@ -81,6 +80,22 @@ def load_existing_improvements() -> list[Improvement]:
|
||||||
return improvements
|
return improvements
|
||||||
|
|
||||||
|
|
||||||
|
def keep_only_new_originals(
|
||||||
|
additional: list[Original], existing: list[Original] | None = None
|
||||||
|
):
|
||||||
|
if not existing:
|
||||||
|
existing = [e.original for e in load_existing_improvements()]
|
||||||
|
|
||||||
|
existing_hashes = set([e.id for e in existing])
|
||||||
|
|
||||||
|
remaining: list[Original] = []
|
||||||
|
for new in additional:
|
||||||
|
if new.id not in existing_hashes:
|
||||||
|
remaining.append(new)
|
||||||
|
|
||||||
|
return remaining
|
||||||
|
|
||||||
|
|
||||||
def improve_originals(originals: list[Original]) -> list[Improvement]:
|
def improve_originals(originals: list[Original]) -> list[Improvement]:
|
||||||
improvements: list[Improvement] = []
|
improvements: list[Improvement] = []
|
||||||
for orig in originals:
|
for orig in originals:
|
||||||
|
|
@ -179,8 +194,8 @@ def start() -> None:
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# start()
|
# start()
|
||||||
|
|
||||||
orig = grab_latest_originals()
|
adding = keep_only_new_originals(grab_latest_originals())
|
||||||
improved = improve_originals(orig)
|
improved = improve_originals(adding)
|
||||||
save_new_improvements(improved)
|
save_new_improvements(improved)
|
||||||
|
|
||||||
improved = load_existing_improvements()
|
improved = load_existing_improvements()
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue