Extract images from summary in Original dataclasses
This commit is contained in:
parent
7a11e45d67
commit
742845a329
1 changed files with 19 additions and 0 deletions
|
|
@ -2,6 +2,7 @@ import hashlib
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import pickle
|
import pickle
|
||||||
|
import re
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
@ -28,11 +29,29 @@ class Original: # BadJoke: Sting
|
||||||
summary: str
|
summary: str
|
||||||
link: str
|
link: str
|
||||||
date: datetime
|
date: datetime
|
||||||
|
image_link: str | None = None
|
||||||
id: str = field(init=False)
|
id: str = field(init=False)
|
||||||
|
|
||||||
|
def _extract_img(self, s: str) -> tuple[str, str]: # [img_link, rest of string]
|
||||||
|
img: str
|
||||||
|
m = re.match(r'<img src="(?P<img>.+?)"', s)
|
||||||
|
try:
|
||||||
|
img = m.group("img")
|
||||||
|
except (IndexError, NameError):
|
||||||
|
return ("", s)
|
||||||
|
|
||||||
|
if img:
|
||||||
|
rest = re.sub(r"<img src=.+?>", "", s)
|
||||||
|
return (img, rest)
|
||||||
|
|
||||||
def __post_init__(self):
|
def __post_init__(self):
|
||||||
self.id = hashlib.sha256(self.link.encode()).hexdigest()
|
self.id = hashlib.sha256(self.link.encode()).hexdigest()
|
||||||
|
|
||||||
|
extracted = self._extract_img(self.summary)
|
||||||
|
if extracted[0]:
|
||||||
|
self.image_link = extracted[0]
|
||||||
|
self.summary = extracted[1]
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Improvement: # GoodJoke: Queen
|
class Improvement: # GoodJoke: Queen
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue