Extract images from summary in Original dataclasses
This commit is contained in:
parent
7a11e45d67
commit
742845a329
1 changed files with 19 additions and 0 deletions
|
|
@ -2,6 +2,7 @@ import hashlib
|
|||
import json
|
||||
import os
|
||||
import pickle
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
|
@ -28,11 +29,29 @@ class Original: # BadJoke: Sting
|
|||
summary: str
|
||||
link: str
|
||||
date: datetime
|
||||
image_link: str | None = None
|
||||
id: str = field(init=False)
|
||||
|
||||
def _extract_img(self, s: str) -> tuple[str, str]: # [img_link, rest of string]
|
||||
img: str
|
||||
m = re.match(r'<img src="(?P<img>.+?)"', s)
|
||||
try:
|
||||
img = m.group("img")
|
||||
except (IndexError, NameError):
|
||||
return ("", s)
|
||||
|
||||
if img:
|
||||
rest = re.sub(r"<img src=.+?>", "", s)
|
||||
return (img, rest)
|
||||
|
||||
def __post_init__(self):
|
||||
self.id = hashlib.sha256(self.link.encode()).hexdigest()
|
||||
|
||||
extracted = self._extract_img(self.summary)
|
||||
if extracted[0]:
|
||||
self.image_link = extracted[0]
|
||||
self.summary = extracted[1]
|
||||
|
||||
|
||||
@dataclass
|
||||
class Improvement: # GoodJoke: Queen
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue