Extract data classes and llm class
This commit is contained in:
parent
f96b6413e2
commit
c537b1e750
3 changed files with 118 additions and 108 deletions
43
prophet/data.py
Normal file
43
prophet/data.py
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
import hashlib
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from uuid import uuid4
|
||||
|
||||
|
||||
@dataclass
|
||||
class Original: # BadJoke: Sting
|
||||
title: str
|
||||
summary: str
|
||||
link: str
|
||||
date: datetime
|
||||
image_link: str | None = None
|
||||
id: str = field(init=False)
|
||||
|
||||
def _extract_img(self, s: str) -> tuple[str, str]: # [img_link, rest of string]
|
||||
img: str
|
||||
m = re.match(r'<img src="(?P<img>.+?)"', s)
|
||||
try:
|
||||
img = m.group("img")
|
||||
except (IndexError, NameError):
|
||||
return ("", s)
|
||||
|
||||
if img:
|
||||
rest = re.sub(r"<img src=.+?>", "", s)
|
||||
return (img, rest)
|
||||
|
||||
def __post_init__(self):
|
||||
self.id = hashlib.sha256(self.link.encode()).hexdigest()
|
||||
|
||||
extracted = self._extract_img(self.summary)
|
||||
if extracted[0]:
|
||||
self.image_link = extracted[0]
|
||||
self.summary = extracted[1]
|
||||
|
||||
|
||||
@dataclass
|
||||
class Improvement: # GoodJoke: Queen
|
||||
original: Original
|
||||
title: str
|
||||
summary: str
|
||||
id: str = str(uuid4())
|
||||
Loading…
Add table
Add a link
Reference in a new issue