From 3948b9060c27ba68582ab1f339f6b4dd282aee97 Mon Sep 17 00:00:00 2001 From: Marty Oehme Date: Mon, 9 Jun 2025 22:16:00 +0200 Subject: [PATCH] feat: Remove html tags from original summary --- prophet/domain/original.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/prophet/domain/original.py b/prophet/domain/original.py index 9a5de11..cc8357d 100644 --- a/prophet/domain/original.py +++ b/prophet/domain/original.py @@ -26,10 +26,13 @@ class Original: # BadJoke: Sting return (img, rest) return ("", s) + def _remove_html_tags(self, s: str) -> str: + return re.sub(r"<.*?>", "", s) + def __post_init__(self): self.id = hashlib.sha256(self.link.encode()).hexdigest() extracted = self._extract_img(self.summary) if extracted[0]: self.image_link = extracted[0] - self.summary = extracted[1] + self.summary = self._remove_html_tags(extracted[1])