Extract data classes and llm class

This commit is contained in:
Marty Oehme 2025-06-06 10:26:44 +02:00
parent f96b6413e2
commit c537b1e750
Signed by: Marty
GPG key ID: 4E535BC19C61886E
3 changed files with 118 additions and 108 deletions

43
prophet/data.py Normal file
View file

@ -0,0 +1,43 @@
import hashlib
import re
from dataclasses import dataclass, field
from datetime import datetime
from uuid import uuid4
@dataclass
class Original: # BadJoke: Sting
title: str
summary: str
link: str
date: datetime
image_link: str | None = None
id: str = field(init=False)
def _extract_img(self, s: str) -> tuple[str, str]: # [img_link, rest of string]
img: str
m = re.match(r'<img src="(?P<img>.+?)"', s)
try:
img = m.group("img")
except (IndexError, NameError):
return ("", s)
if img:
rest = re.sub(r"<img src=.+?>", "", s)
return (img, rest)
def __post_init__(self):
self.id = hashlib.sha256(self.link.encode()).hexdigest()
extracted = self._extract_img(self.summary)
if extracted[0]:
self.image_link = extracted[0]
self.summary = extracted[1]
@dataclass
class Improvement: # GoodJoke: Queen
original: Original
title: str
summary: str
id: str = str(uuid4())