From f8d9ae4d4b2451e53b10280b95c79bec27041c2b Mon Sep 17 00:00:00 2001 From: Marty Oehme Date: Thu, 14 Sep 2023 22:16:45 +0200 Subject: [PATCH] Add title word and char length options --- README.md | 47 ++++++++++++++++++++++++----- papis_bbt_formatter/__init__.py | 53 +++++++++++++++++++++------------ 2 files changed, 74 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index 6985727..e12e26a 100644 --- a/README.md +++ b/README.md @@ -19,24 +19,57 @@ formater = bbt ref-format = bbt ``` -For now, the ref-format is simply `bbt` as well, though should I further develop this plugin (with additional options), -that settings will surely change. +For now, the ref-format *has* to start with `bbt`. -Currently, you can change the length that the `TitleShort` in `Name2008TitleShort` will be cut down to by setting: +### Title length + +Currently, you can change the length that the `TitleShort` in `Name2008TitleShort` will be cut down to by setting +the maximum length in words or in characters. + +To set a maximum word length, do: ```toml [settings] formater = bbt -ref-format = bbt[4] +ref-format = bbt[title-words=4] ``` -In this case, the title will be shortened to 4 words maximum (the default), change the number to shorten/lengthen to your preference. +In this case, the title will be shortened to 4 words maximum (the default), +change the number to shorten/lengthen to your preference. +Same idea for maximum character length: -This plugin is a rather simple adaption from [this](https://github.com/hrdl-github/papis/commit/b9b9c6eaa3de159e1b210174ef49e90a89271eb8) commit, -turned into an installable papis plugin and extended slightly for now. +```toml +[settings] +formater = bbt +ref-format = bbt[title-chars=10] +``` + +This will allow a maximum of 10 characters for the title. +Using both: + +```toml +[settings] +formater = bbt +ref-format = bbt[title-words=4][title-chars=15] +``` + +This will ensure a maximum of 4 words, however if they go more than 20 characters they will be cut off mid-word. +You can set either option to `-1` to turn it off: + +```toml +[settings] +formater = bbt +ref-format = bbt[title-words=4][title-chars=-1] +``` + +This will ensure that a maximum of 4 words will be placed in the ref, but they do not have a maximum character length, +so will always be fully written out (the default behavior without title length options provided). --- +For now this plugin is a rather simple adaption from [this](https://github.com/hrdl-github/papis/commit/b9b9c6eaa3de159e1b210174ef49e90a89271eb8) commit, +turned into an installable papis plugin and extended slightly. + If you spot a bug or have an idea feel free to open an issue.\ I might be slow to respond but will consider them all! diff --git a/papis_bbt_formatter/__init__.py b/papis_bbt_formatter/__init__.py index ab3a39e..4513627 100644 --- a/papis_bbt_formatter/__init__.py +++ b/papis_bbt_formatter/__init__.py @@ -1,5 +1,3 @@ -# adapted from https://github.com/hrdl-github/papis/commit/b9b9c6eaa3de159e1b210174ef49e90a89271eb8 -# with much gratitude. import re from typing import Any import papis.format @@ -10,7 +8,9 @@ import papis.logging logger = papis.logging.get_logger(__name__) -DEFAULT_TITLE_LENGTH=4 +DEFAULT_TITLE_LENGTH_WORDS = 3 +DEFAULT_TITLE_LENGTH_CHARS = -1 + class BBTFormatter(papis.format.Formater): """Provides zotero better-bibtex-like keys.""" @@ -28,32 +28,47 @@ class BBTFormatter(papis.format.Formater): if "author_list" in doc else doc["author"].split(maxsplit=1)[0] if "author" in doc - else "Unkown" + else "UNKNOWN" ) - title_unfmt = doc["title"] if "title" in doc else "No title" + title_unfmt = doc["title"] if "title" in doc else "NO TITLE" year_unfmt = str(doc["year"]) if "year" in doc else "0000" author = re.sub("[^a-z]+", "", author_unfmt.lower()) year = year_unfmt[-2:] - title = re.sub("-", " ", title_unfmt.lower()) - title = re.sub("[^0-9a-z ]+", "", title) - title = list( - map( - str.capitalize, - filter(lambda word: word and word not in SKIP_WORDS, title.split()), - ) - ) - title_len = self._title_length(fmt) - title = "".join(title[:title_len]) - return f"{author}{year}_{title}" + title = self.get_title(title_unfmt, fmt) + return f"{author}{year}{title}" else: return papis.format.PythonFormater().format(fmt, doc, doc_key, additional) - def _title_length(self, fmt: str) -> int: + def get_title(self, title: str, fmt: str) -> str: + title = re.sub("[^0-9a-z ]+", "", title.lower()) + title_words = list( + map( + str.capitalize, + filter(lambda word: word and word not in SKIP_WORDS, title.split()), + ) + ) + wlen = self._title_length_words(fmt) + clen = self._title_length_chars(fmt) + wlen = None if wlen == -1 else wlen + clen = None if clen == -1 else clen + title = "".join(title_words[:wlen])[:clen] + return title + + def _title_length_words(self, fmt: str) -> int: """Returns the length (in words) the title should be shortened to.""" - if match:=re.match(r'^bbt\[(\d+)\]', fmt): + if match := re.search(r"\[title-words=(-?\d+)\]", fmt): + logger.debug(f"Found title length: {match.group(1)} words.") return int(match.group(1)) - return DEFAULT_TITLE_LENGTH + return DEFAULT_TITLE_LENGTH_WORDS + + def _title_length_chars(self, fmt: str) -> int: + """Returns the length (in characters) the title should be shortened to.""" + if match := re.search(r"\[title-chars=(-?\d+)\]", fmt): + logger.debug(f"Found title length: {match.group(1)} chars.") + return int(match.group(1)) + return DEFAULT_TITLE_LENGTH_CHARS + SKIP_WORDS = set( [