Extract images from summary in Original dataclasses

This commit is contained in:
Marty Oehme 2025-06-05 22:33:48 +02:00
parent 7a11e45d67
commit 742845a329
Signed by: Marty
GPG key ID: 4E535BC19C61886E

View file

@ -2,6 +2,7 @@ import hashlib
import json
import os
import pickle
import re
from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
@ -28,11 +29,29 @@ class Original: # BadJoke: Sting
summary: str
link: str
date: datetime
image_link: str | None = None
id: str = field(init=False)
def _extract_img(self, s: str) -> tuple[str, str]: # [img_link, rest of string]
img: str
m = re.match(r'<img src="(?P<img>.+?)"', s)
try:
img = m.group("img")
except (IndexError, NameError):
return ("", s)
if img:
rest = re.sub(r"<img src=.+?>", "", s)
return (img, rest)
def __post_init__(self):
self.id = hashlib.sha256(self.link.encode()).hexdigest()
extracted = self._extract_img(self.summary)
if extracted[0]:
self.image_link = extracted[0]
self.summary = extracted[1]
@dataclass
class Improvement: # GoodJoke: Queen