Add title word and char length options

This commit is contained in:
Marty Oehme 2023-09-14 22:16:45 +02:00
parent 87a3ffdfaf
commit f8d9ae4d4b
Signed by: Marty
GPG key ID: EDBF2ED917B2EF6A
2 changed files with 74 additions and 26 deletions

View file

@ -19,24 +19,57 @@ formater = bbt
ref-format = bbt
```
For now, the ref-format is simply `bbt` as well, though should I further develop this plugin (with additional options),
that settings will surely change.
For now, the ref-format *has* to start with `bbt`.
Currently, you can change the length that the `TitleShort` in `Name2008TitleShort` will be cut down to by setting:
### Title length
Currently, you can change the length that the `TitleShort` in `Name2008TitleShort` will be cut down to by setting
the maximum length in words or in characters.
To set a maximum word length, do:
```toml
[settings]
formater = bbt
ref-format = bbt[4]
ref-format = bbt[title-words=4]
```
In this case, the title will be shortened to 4 words maximum (the default), change the number to shorten/lengthen to your preference.
In this case, the title will be shortened to 4 words maximum (the default),
change the number to shorten/lengthen to your preference.
Same idea for maximum character length:
This plugin is a rather simple adaption from [this](https://github.com/hrdl-github/papis/commit/b9b9c6eaa3de159e1b210174ef49e90a89271eb8) commit,
turned into an installable papis plugin and extended slightly for now.
```toml
[settings]
formater = bbt
ref-format = bbt[title-chars=10]
```
This will allow a maximum of 10 characters for the title.
Using both:
```toml
[settings]
formater = bbt
ref-format = bbt[title-words=4][title-chars=15]
```
This will ensure a maximum of 4 words, however if they go more than 20 characters they will be cut off mid-word.
You can set either option to `-1` to turn it off:
```toml
[settings]
formater = bbt
ref-format = bbt[title-words=4][title-chars=-1]
```
This will ensure that a maximum of 4 words will be placed in the ref, but they do not have a maximum character length,
so will always be fully written out (the default behavior without title length options provided).
---
For now this plugin is a rather simple adaption from [this](https://github.com/hrdl-github/papis/commit/b9b9c6eaa3de159e1b210174ef49e90a89271eb8) commit,
turned into an installable papis plugin and extended slightly.
If you spot a bug or have an idea feel free to open an issue.\
I might be slow to respond but will consider them all!

View file

@ -1,5 +1,3 @@
# adapted from https://github.com/hrdl-github/papis/commit/b9b9c6eaa3de159e1b210174ef49e90a89271eb8
# with much gratitude.
import re
from typing import Any
import papis.format
@ -10,7 +8,9 @@ import papis.logging
logger = papis.logging.get_logger(__name__)
DEFAULT_TITLE_LENGTH=4
DEFAULT_TITLE_LENGTH_WORDS = 3
DEFAULT_TITLE_LENGTH_CHARS = -1
class BBTFormatter(papis.format.Formater):
"""Provides zotero better-bibtex-like keys."""
@ -28,32 +28,47 @@ class BBTFormatter(papis.format.Formater):
if "author_list" in doc
else doc["author"].split(maxsplit=1)[0]
if "author" in doc
else "Unkown"
else "UNKNOWN"
)
title_unfmt = doc["title"] if "title" in doc else "No title"
title_unfmt = doc["title"] if "title" in doc else "NO TITLE"
year_unfmt = str(doc["year"]) if "year" in doc else "0000"
author = re.sub("[^a-z]+", "", author_unfmt.lower())
year = year_unfmt[-2:]
title = re.sub("-", " ", title_unfmt.lower())
title = re.sub("[^0-9a-z ]+", "", title)
title = list(
map(
str.capitalize,
filter(lambda word: word and word not in SKIP_WORDS, title.split()),
)
)
title_len = self._title_length(fmt)
title = "".join(title[:title_len])
return f"{author}{year}_{title}"
title = self.get_title(title_unfmt, fmt)
return f"{author}{year}{title}"
else:
return papis.format.PythonFormater().format(fmt, doc, doc_key, additional)
def _title_length(self, fmt: str) -> int:
def get_title(self, title: str, fmt: str) -> str:
title = re.sub("[^0-9a-z ]+", "", title.lower())
title_words = list(
map(
str.capitalize,
filter(lambda word: word and word not in SKIP_WORDS, title.split()),
)
)
wlen = self._title_length_words(fmt)
clen = self._title_length_chars(fmt)
wlen = None if wlen == -1 else wlen
clen = None if clen == -1 else clen
title = "".join(title_words[:wlen])[:clen]
return title
def _title_length_words(self, fmt: str) -> int:
"""Returns the length (in words) the title should be shortened to."""
if match:=re.match(r'^bbt\[(\d+)\]', fmt):
if match := re.search(r"\[title-words=(-?\d+)\]", fmt):
logger.debug(f"Found title length: {match.group(1)} words.")
return int(match.group(1))
return DEFAULT_TITLE_LENGTH
return DEFAULT_TITLE_LENGTH_WORDS
def _title_length_chars(self, fmt: str) -> int:
"""Returns the length (in characters) the title should be shortened to."""
if match := re.search(r"\[title-chars=(-?\d+)\]", fmt):
logger.debug(f"Found title length: {match.group(1)} chars.")
return int(match.group(1))
return DEFAULT_TITLE_LENGTH_CHARS
SKIP_WORDS = set(
[