From 976bb0b709c5c9cc438fb3985fc508a29d771d8c Mon Sep 17 00:00:00 2001 From: Marty Oehme Date: Thu, 2 Nov 2023 22:42:14 +0100 Subject: [PATCH] feat: Add zotero live citations for docx Citations in docx format will, instead of the normal quarto-provided linking to their references, consist of live-citations that work with Zotero (can be changed, updated and are automatically used for the bibliography at the end of the file). Uses better-bibtex created pandoc lua filter. --- 01-scripts/pandoc-to-zotero-live.lua | 2447 ++++++++++++++++++++++++++ scoping_review.qmd | 49 +- 2 files changed, 2475 insertions(+), 21 deletions(-) create mode 100644 01-scripts/pandoc-to-zotero-live.lua diff --git a/01-scripts/pandoc-to-zotero-live.lua b/01-scripts/pandoc-to-zotero-live.lua new file mode 100644 index 0000000..0bfe046 --- /dev/null +++ b/01-scripts/pandoc-to-zotero-live.lua @@ -0,0 +1,2447 @@ +local pandoc = require('pandoc') +print('zotero-live-citations f47be20') +local mt, latest = pandoc.mediabag.fetch('https://retorque.re/zotero-better-bibtex/exporting/zotero.lua.revision') +latest = string.sub(latest, 1, 10) +if 'f47be20' ~= latest then + print('new version "' .. latest .. '" available at https://retorque.re/zotero-better-bibtex/exporting') +end + +do + local _ENV = _ENV + package.preload["locator"] = function(...) + local arg = _G.arg; + local utils = require('utils') + -- local lpeg = require('lpeg') + + local book = (lpeg.P('book') + lpeg.P('bk.') + lpeg.P('bks.')) / 'book' + local chapter = (lpeg.P('chapter') + lpeg.P('chap.') + lpeg.P('chaps.')) / 'chapter' + local column = (lpeg.P('column') + lpeg.P('col.') + lpeg.P('cols.')) / 'column' + local figure = (lpeg.P('figure') + lpeg.P('fig.') + lpeg.P('figs.')) / 'figure' + local folio = (lpeg.P('folio') + lpeg.P('fol.') + lpeg.P('fols.')) / 'folio' + local number = (lpeg.P('number') + lpeg.P('no.') + lpeg.P('nos.')) / 'number' + local line = (lpeg.P('line') + lpeg.P('l.') + lpeg.P('ll.')) / 'line' + local note = (lpeg.P('note') + lpeg.P('n.') + lpeg.P('nn.')) / 'note' + local opus = (lpeg.P('opus') + lpeg.P('op.') + lpeg.P('opp.')) / 'opus' + local page = (lpeg.P('page') + lpeg.P('p.') + lpeg.P('pp.')) / 'page' + local paragraph = (lpeg.P('paragraph') + lpeg.P('para.') + lpeg.P('paras.') + lpeg.P('¶¶') + lpeg.P('¶')) / + 'paragraph' + local part = (lpeg.P('part') + lpeg.P('pt.') + lpeg.P('pts.')) / 'part' + local section = (lpeg.P('section') + lpeg.P('sec.') + lpeg.P('secs.') + lpeg.P('§§') + lpeg.P('§')) / + 'section' + local subverbo = (lpeg.P('sub verbo') + lpeg.P('s.v.') + lpeg.P('s.vv.')) / 'sub verbo' + local verse = (lpeg.P('verse') + lpeg.P('v.') + lpeg.P('vv.')) / 'verse' + local volume = (lpeg.P('volume') + lpeg.P('vol.') + lpeg.P('vols.')) / 'volume' + local label = book + chapter + column + figure + folio + number + line + note + opus + page + paragraph + part + + section + subverbo + verse + volume + + local whitespace = lpeg.P(' ') ^ 0 + local nonspace = lpeg.P(1) - lpeg.S(' ') + local nonbrace = lpeg.P(1) - lpeg.S('{}') + + local word = nonspace ^ 1 / 1 + -- local roman = lpeg.S('IiVvXxLlCcDdMm]')^1 + local number = lpeg.R('09') ^ 1 -- + roman + + local numbers = number * (whitespace * lpeg.S('-') ^ 1 * whitespace * number) ^ -1 + local ranges = (numbers * (whitespace * lpeg.P(',') * whitespace * numbers) ^ 0) / 1 + + -- local braced_locator = lpeg.P('{') * lpeg.Cs(label + lpeg.Cc('page')) * whitespace * lpeg.C(nonbrace^1) * lpeg.P('}') + local braced_locator = lpeg.P('{') * label * whitespace * lpeg.C(nonbrace ^ 1) * lpeg.P('}') + local braced_implicit_locator = lpeg.P('{') * lpeg.Cc('page') * lpeg.Cs(numbers) * lpeg.P('}') + local locator = braced_locator + braced_implicit_locator + (label * whitespace * ranges) + + (label * whitespace * word) + (lpeg.Cc('page') * ranges) + local remainder = lpeg.C(lpeg.P(1) ^ 0) + + local suffix = lpeg.C(lpeg.P(',') ^ -1 * whitespace) * locator * remainder + + local pseudo_locator = lpeg.C(lpeg.P(',') ^ -1 * whitespace) * lpeg.P('{') * lpeg.C(nonbrace ^ 0) * lpeg.P('}') * + remainder + + local module = {} + + function module.parse(input, shortlabel) + local parsed = lpeg.Ct(suffix):match(input) + if parsed then + local _prefix, _label, _locator, _suffix = table.unpack(parsed) + if utils.trim(_prefix) == ',' then _prefix = '' end + return _label, _locator, _prefix .. _suffix + end + + parsed = lpeg.Ct(pseudo_locator):match(input) + if parsed then + local _prefix, _locator, _suffix = table.unpack(parsed) + if utils.trim(_prefix) == ',' then _prefix = '' end + -- return nil, nil, _prefix .. _locator .. _suffix + return 'page', _locator, _prefix .. _suffix + end + + return nil, nil, input + end + + return module + end +end + +do + local _ENV = _ENV + package.preload["lunajson"] = function(...) + local arg = _G.arg; + local newdecoder = require 'lunajson.decoder' + local newencoder = require 'lunajson.encoder' + local sax = require 'lunajson.sax' + -- If you need multiple contexts of decoder and/or encoder, + -- you can require lunajson.decoder and/or lunajson.encoder directly. + return { + decode = newdecoder(), + encode = newencoder(), + newparser = sax.newparser, + newfileparser = sax.newfileparser, + } + end +end + +do + local _ENV = _ENV + package.preload["lunajson.decoder"] = function(...) + local arg = _G.arg; + local setmetatable, tonumber, tostring = + setmetatable, tonumber, tostring + local floor, inf = + math.floor, math.huge + local mininteger, tointeger = + math.mininteger or nil, math.tointeger or nil + local byte, char, find, gsub, match, sub = + string.byte, string.char, string.find, string.gsub, string.match, string.sub + + local function _decode_error(pos, errmsg) + error("parse error at " .. pos .. ": " .. errmsg, 2) + end + + local f_str_ctrl_pat + if _VERSION == "Lua 5.1" then + -- use the cluttered pattern because lua 5.1 does not handle \0 in a pattern correctly + f_str_ctrl_pat = '[^\32-\255]' + else + f_str_ctrl_pat = '[\0-\31]' + end + + local _ENV = nil + + + local function newdecoder() + local json, pos, nullv, arraylen, rec_depth + + -- `f` is the temporary for dispatcher[c] and + -- the dummy for the first return value of `find` + local dispatcher, f + + --[[ + Helper + --]] + local function decode_error(errmsg) + return _decode_error(pos, errmsg) + end + + --[[ + Invalid + --]] + local function f_err() + decode_error('invalid value') + end + + --[[ + Constants + --]] + -- null + local function f_nul() + if sub(json, pos, pos + 2) == 'ull' then + pos = pos + 3 + return nullv + end + decode_error('invalid value') + end + + -- false + local function f_fls() + if sub(json, pos, pos + 3) == 'alse' then + pos = pos + 4 + return false + end + decode_error('invalid value') + end + + -- true + local function f_tru() + if sub(json, pos, pos + 2) == 'rue' then + pos = pos + 3 + return true + end + decode_error('invalid value') + end + + --[[ + Numbers + Conceptually, the longest prefix that matches to `[-+.0-9A-Za-z]+` (in regexp) + is captured as a number and its conformance to the JSON spec is checked. + --]] + -- deal with non-standard locales + local radixmark = match(tostring(0.5), '[^0-9]') + local fixedtonumber = tonumber + if radixmark ~= '.' then + if find(radixmark, '%W') then + radixmark = '%' .. radixmark + end + fixedtonumber = function(s) + return tonumber(gsub(s, '.', radixmark)) + end + end + + local function number_error() + return decode_error('invalid number') + end + + -- `0(\.[0-9]*)?([eE][+-]?[0-9]*)?` + local function f_zro(mns) + local num, c = match(json, '^(%.?[0-9]*)([-+.A-Za-z]?)', pos) -- skipping 0 + + if num == '' then + if c == '' then + if mns then + return -0.0 + end + return 0 + end + + if c == 'e' or c == 'E' then + num, c = match(json, '^([^eE]*[eE][-+]?[0-9]+)([-+.A-Za-z]?)', pos) + if c == '' then + pos = pos + #num + if mns then + return -0.0 + end + return 0.0 + end + end + number_error() + end + + if byte(num) ~= 0x2E or byte(num, -1) == 0x2E then + number_error() + end + + if c ~= '' then + if c == 'e' or c == 'E' then + num, c = match(json, '^([^eE]*[eE][-+]?[0-9]+)([-+.A-Za-z]?)', pos) + end + if c ~= '' then + number_error() + end + end + + pos = pos + #num + c = fixedtonumber(num) + + if mns then + c = -c + end + return c + end + + -- `[1-9][0-9]*(\.[0-9]*)?([eE][+-]?[0-9]*)?` + local function f_num(mns) + pos = pos - 1 + local num, c = match(json, '^([0-9]+%.?[0-9]*)([-+.A-Za-z]?)', pos) + if byte(num, -1) == 0x2E then -- error if ended with period + number_error() + end + + if c ~= '' then + if c ~= 'e' and c ~= 'E' then + number_error() + end + num, c = match(json, '^([^eE]*[eE][-+]?[0-9]+)([-+.A-Za-z]?)', pos) + if not num or c ~= '' then + number_error() + end + end + + pos = pos + #num + c = fixedtonumber(num) + + if mns then + c = -c + if c == mininteger and not find(num, '[^0-9]') then + c = mininteger + end + end + return c + end + + -- skip minus sign + local function f_mns() + local c = byte(json, pos) + if c then + pos = pos + 1 + if c > 0x30 then + if c < 0x3A then + return f_num(true) + end + else + if c > 0x2F then + return f_zro(true) + end + end + end + decode_error('invalid number') + end + + --[[ + Strings + --]] + local f_str_hextbl = { + 0x0, + 0x1, + 0x2, + 0x3, + 0x4, + 0x5, + 0x6, + 0x7, + 0x8, + 0x9, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + 0xA, + 0xB, + 0xC, + 0xD, + 0xE, + 0xF, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + 0xA, + 0xB, + 0xC, + 0xD, + 0xE, + 0xF, + __index = function() + return inf + end + } + setmetatable(f_str_hextbl, f_str_hextbl) + + local f_str_escapetbl = { + ['"'] = '"', + ['\\'] = '\\', + ['/'] = '/', + ['b'] = '\b', + ['f'] = '\f', + ['n'] = '\n', + ['r'] = '\r', + ['t'] = '\t', + __index = function() + decode_error("invalid escape sequence") + end + } + setmetatable(f_str_escapetbl, f_str_escapetbl) + + local function surrogate_first_error() + return decode_error("1st surrogate pair byte not continued by 2nd") + end + + local f_str_surrogate_prev = 0 + local function f_str_subst(ch, ucode) + if ch == 'u' then + local c1, c2, c3, c4, rest = byte(ucode, 1, 5) + ucode = f_str_hextbl[c1 - 47] * 0x1000 + + f_str_hextbl[c2 - 47] * 0x100 + + f_str_hextbl[c3 - 47] * 0x10 + + f_str_hextbl[c4 - 47] + if ucode ~= inf then + if ucode < 0x80 then -- 1byte + if rest then + return char(ucode, rest) + end + return char(ucode) + elseif ucode < 0x800 then -- 2bytes + c1 = floor(ucode / 0x40) + c2 = ucode - c1 * 0x40 + c1 = c1 + 0xC0 + c2 = c2 + 0x80 + if rest then + return char(c1, c2, rest) + end + return char(c1, c2) + elseif ucode < 0xD800 or 0xE000 <= ucode then -- 3bytes + c1 = floor(ucode / 0x1000) + ucode = ucode - c1 * 0x1000 + c2 = floor(ucode / 0x40) + c3 = ucode - c2 * 0x40 + c1 = c1 + 0xE0 + c2 = c2 + 0x80 + c3 = c3 + 0x80 + if rest then + return char(c1, c2, c3, rest) + end + return char(c1, c2, c3) + elseif 0xD800 <= ucode and ucode < 0xDC00 then -- surrogate pair 1st + if f_str_surrogate_prev == 0 then + f_str_surrogate_prev = ucode + if not rest then + return '' + end + surrogate_first_error() + end + f_str_surrogate_prev = 0 + surrogate_first_error() + else -- surrogate pair 2nd + if f_str_surrogate_prev ~= 0 then + ucode = 0x10000 + + (f_str_surrogate_prev - 0xD800) * 0x400 + + (ucode - 0xDC00) + f_str_surrogate_prev = 0 + c1 = floor(ucode / 0x40000) + ucode = ucode - c1 * 0x40000 + c2 = floor(ucode / 0x1000) + ucode = ucode - c2 * 0x1000 + c3 = floor(ucode / 0x40) + c4 = ucode - c3 * 0x40 + c1 = c1 + 0xF0 + c2 = c2 + 0x80 + c3 = c3 + 0x80 + c4 = c4 + 0x80 + if rest then + return char(c1, c2, c3, c4, rest) + end + return char(c1, c2, c3, c4) + end + decode_error("2nd surrogate pair byte appeared without 1st") + end + end + decode_error("invalid unicode codepoint literal") + end + if f_str_surrogate_prev ~= 0 then + f_str_surrogate_prev = 0 + surrogate_first_error() + end + return f_str_escapetbl[ch] .. ucode + end + + -- caching interpreted keys for speed + local f_str_keycache = setmetatable({}, { __mode = "v" }) + + local function f_str(iskey) + local newpos = pos + local tmppos, c1, c2 + repeat + newpos = find(json, '"', newpos, true) -- search '"' + if not newpos then + decode_error("unterminated string") + end + tmppos = newpos - 1 + newpos = newpos + 1 + c1, c2 = byte(json, tmppos - 1, tmppos) + if c2 == 0x5C and c1 == 0x5C then -- skip preceding '\\'s + repeat + tmppos = tmppos - 2 + c1, c2 = byte(json, tmppos - 1, tmppos) + until c2 ~= 0x5C or c1 ~= 0x5C + tmppos = newpos - 2 + end + until c2 ~= 0x5C -- leave if '"' is not preceded by '\' + + local str = sub(json, pos, tmppos) + pos = newpos + + if iskey then -- check key cache + tmppos = f_str_keycache[str] -- reuse tmppos for cache key/val + if tmppos then + return tmppos + end + tmppos = str + end + + if find(str, f_str_ctrl_pat) then + decode_error("unescaped control string") + end + if find(str, '\\', 1, true) then -- check whether a backslash exists + -- We need to grab 4 characters after the escape char, + -- for encoding unicode codepoint to UTF-8. + -- As we need to ensure that every first surrogate pair byte is + -- immediately followed by second one, we grab upto 5 characters and + -- check the last for this purpose. + str = gsub(str, '\\(.)([^\\]?[^\\]?[^\\]?[^\\]?[^\\]?)', f_str_subst) + if f_str_surrogate_prev ~= 0 then + f_str_surrogate_prev = 0 + decode_error("1st surrogate pair byte not continued by 2nd") + end + end + if iskey then -- commit key cache + f_str_keycache[tmppos] = str + end + return str + end + + --[[ + Arrays, Objects + --]] + -- array + local function f_ary() + rec_depth = rec_depth + 1 + if rec_depth > 1000 then + decode_error('too deeply nested json (> 1000)') + end + local ary = {} + + pos = match(json, '^[ \n\r\t]*()', pos) + + local i = 0 + if byte(json, pos) == 0x5D then -- check closing bracket ']' which means the array empty + pos = pos + 1 + else + local newpos = pos + repeat + i = i + 1 + f = dispatcher[byte(json, newpos)] -- parse value + pos = newpos + 1 + ary[i] = f() + newpos = match(json, '^[ \n\r\t]*,[ \n\r\t]*()', pos) -- check comma + until not newpos + + newpos = match(json, '^[ \n\r\t]*%]()', pos) -- check closing bracket + if not newpos then + decode_error("no closing bracket of an array") + end + pos = newpos + end + + if arraylen then -- commit the length of the array if `arraylen` is set + ary[0] = i + end + rec_depth = rec_depth - 1 + return ary + end + + -- objects + local function f_obj() + rec_depth = rec_depth + 1 + if rec_depth > 1000 then + decode_error('too deeply nested json (> 1000)') + end + local obj = {} + + pos = match(json, '^[ \n\r\t]*()', pos) + if byte(json, pos) == 0x7D then -- check closing bracket '}' which means the object empty + pos = pos + 1 + else + local newpos = pos + + repeat + if byte(json, newpos) ~= 0x22 then -- check '"' + decode_error("not key") + end + pos = newpos + 1 + local key = f_str(true) -- parse key + + -- optimized for compact json + -- c1, c2 == ':', or + -- c1, c2, c3 == ':', ' ', + f = f_err + local c1, c2, c3 = byte(json, pos, pos + 3) + if c1 == 0x3A then + if c2 ~= 0x20 then + f = dispatcher[c2] + newpos = pos + 2 + else + f = dispatcher[c3] + newpos = pos + 3 + end + end + if f == f_err then -- read a colon and arbitrary number of spaces + newpos = match(json, '^[ \n\r\t]*:[ \n\r\t]*()', pos) + if not newpos then + decode_error("no colon after a key") + end + f = dispatcher[byte(json, newpos)] + newpos = newpos + 1 + end + pos = newpos + obj[key] = f() -- parse value + newpos = match(json, '^[ \n\r\t]*,[ \n\r\t]*()', pos) + until not newpos + + newpos = match(json, '^[ \n\r\t]*}()', pos) + if not newpos then + decode_error("no closing bracket of an object") + end + pos = newpos + end + + rec_depth = rec_depth - 1 + return obj + end + + --[[ + The jump table to dispatch a parser for a value, + indexed by the code of the value's first char. + Nil key means the end of json. + --]] + dispatcher = { + [0] = + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_str, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_mns, + f_err, + f_err, + f_zro, + f_num, + f_num, + f_num, + f_num, + f_num, + f_num, + f_num, + f_num, + f_num, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_ary, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_fls, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_nul, + f_err, + f_err, + f_err, + f_err, + f_err, + f_tru, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_obj, + f_err, + f_err, + f_err, + f_err, + __index = function() + decode_error("unexpected termination") + end + } + setmetatable(dispatcher, dispatcher) + + --[[ + run decoder + --]] + local function decode(json_, pos_, nullv_, arraylen_) + json, pos, nullv, arraylen = json_, pos_, nullv_, arraylen_ + rec_depth = 0 + + pos = match(json, '^[ \n\r\t]*()', pos) + + f = dispatcher[byte(json, pos)] + pos = pos + 1 + local v = f() + + if pos_ then + return v, pos + else + f, pos = find(json, '^[ \n\r\t]*', pos) + if pos ~= #json then + decode_error('json ended') + end + return v + end + end + + return decode + end + + return newdecoder + end +end + +do + local _ENV = _ENV + package.preload["lunajson.encoder"] = function(...) + local arg = _G.arg; + local error = error + local byte, find, format, gsub, match = string.byte, string.find, string.format, string.gsub, string.match + local concat = table.concat + local tostring = tostring + local pairs, type = pairs, type + local setmetatable = setmetatable + local huge, tiny = 1 / 0, -1 / 0 + + local f_string_esc_pat + if _VERSION == "Lua 5.1" then + -- use the cluttered pattern because lua 5.1 does not handle \0 in a pattern correctly + f_string_esc_pat = '[^ -!#-[%]^-\255]' + else + f_string_esc_pat = '[\0-\31"\\]' + end + + local _ENV = nil + + + local function newencoder() + local v, nullv + local i, builder, visited + + local function f_tostring(v) + builder[i] = tostring(v) + i = i + 1 + end + + local radixmark = match(tostring(0.5), '[^0-9]') + local delimmark = match(tostring(12345.12345), '[^0-9' .. radixmark .. ']') + if radixmark == '.' then + radixmark = nil + end + + local radixordelim + if radixmark or delimmark then + radixordelim = true + if radixmark and find(radixmark, '%W') then + radixmark = '%' .. radixmark + end + if delimmark and find(delimmark, '%W') then + delimmark = '%' .. delimmark + end + end + + local f_number = function(n) + if tiny < n and n < huge then + local s = format("%.17g", n) + if radixordelim then + if delimmark then + s = gsub(s, delimmark, '') + end + if radixmark then + s = gsub(s, radixmark, '.') + end + end + builder[i] = s + i = i + 1 + return + end + error('invalid number') + end + + local doencode + + local f_string_subst = { + ['"'] = '\\"', + ['\\'] = '\\\\', + ['\b'] = '\\b', + ['\f'] = '\\f', + ['\n'] = '\\n', + ['\r'] = '\\r', + ['\t'] = '\\t', + __index = function(_, c) + return format('\\u00%02X', byte(c)) + end + } + setmetatable(f_string_subst, f_string_subst) + + local function f_string(s) + builder[i] = '"' + if find(s, f_string_esc_pat) then + s = gsub(s, f_string_esc_pat, f_string_subst) + end + builder[i + 1] = s + builder[i + 2] = '"' + i = i + 3 + end + + local function f_table(o) + if visited[o] then + error("loop detected") + end + visited[o] = true + + local tmp = o[0] + if type(tmp) == 'number' then -- arraylen available + builder[i] = '[' + i = i + 1 + for j = 1, tmp do + doencode(o[j]) + builder[i] = ',' + i = i + 1 + end + if tmp > 0 then + i = i - 1 + end + builder[i] = ']' + else + tmp = o[1] + if tmp ~= nil then -- detected as array + builder[i] = '[' + i = i + 1 + local j = 2 + repeat + doencode(tmp) + tmp = o[j] + if tmp == nil then + break + end + j = j + 1 + builder[i] = ',' + i = i + 1 + until false + builder[i] = ']' + else -- detected as object + builder[i] = '{' + i = i + 1 + local tmp = i + for k, v in pairs(o) do + if type(k) ~= 'string' then + error("non-string key") + end + f_string(k) + builder[i] = ':' + i = i + 1 + doencode(v) + builder[i] = ',' + i = i + 1 + end + if i > tmp then + i = i - 1 + end + builder[i] = '}' + end + end + + i = i + 1 + visited[o] = nil + end + + local dispatcher = { + boolean = f_tostring, + number = f_number, + string = f_string, + table = f_table, + __index = function() + error("invalid type value") + end + } + setmetatable(dispatcher, dispatcher) + + function doencode(v) + if v == nullv then + builder[i] = 'null' + i = i + 1 + return + end + return dispatcher[type(v)](v) + end + + local function encode(v_, nullv_) + v, nullv = v_, nullv_ + i, builder, visited = 1, {}, {} + + doencode(v) + return concat(builder) + end + + return encode + end + + return newencoder + end +end + +do + local _ENV = _ENV + package.preload["lunajson.sax"] = function(...) + local arg = _G.arg; + local setmetatable, tonumber, tostring = + setmetatable, tonumber, tostring + local floor, inf = + math.floor, math.huge + local mininteger, tointeger = + math.mininteger or nil, math.tointeger or nil + local byte, char, find, gsub, match, sub = + string.byte, string.char, string.find, string.gsub, string.match, string.sub + + local function _parse_error(pos, errmsg) + error("parse error at " .. pos .. ": " .. errmsg, 2) + end + + local f_str_ctrl_pat + if _VERSION == "Lua 5.1" then + -- use the cluttered pattern because lua 5.1 does not handle \0 in a pattern correctly + f_str_ctrl_pat = '[^\32-\255]' + else + f_str_ctrl_pat = '[\0-\31]' + end + + local type, unpack = type, table.unpack or unpack + local open = io.open + + local _ENV = nil + + + local function nop() end + + local function newparser(src, saxtbl) + local json, jsonnxt, rec_depth + local jsonlen, pos, acc = 0, 1, 0 + + -- `f` is the temporary for dispatcher[c] and + -- the dummy for the first return value of `find` + local dispatcher, f + + -- initialize + if type(src) == 'string' then + json = src + jsonlen = #json + jsonnxt = function() + json = '' + jsonlen = 0 + jsonnxt = nop + end + else + jsonnxt = function() + acc = acc + jsonlen + pos = 1 + repeat + json = src() + if not json then + json = '' + jsonlen = 0 + jsonnxt = nop + return + end + jsonlen = #json + until jsonlen > 0 + end + jsonnxt() + end + + local sax_startobject = saxtbl.startobject or nop + local sax_key = saxtbl.key or nop + local sax_endobject = saxtbl.endobject or nop + local sax_startarray = saxtbl.startarray or nop + local sax_endarray = saxtbl.endarray or nop + local sax_string = saxtbl.string or nop + local sax_number = saxtbl.number or nop + local sax_boolean = saxtbl.boolean or nop + local sax_null = saxtbl.null or nop + + --[[ + Helper + --]] + local function tryc() + local c = byte(json, pos) + if not c then + jsonnxt() + c = byte(json, pos) + end + return c + end + + local function parse_error(errmsg) + return _parse_error(acc + pos, errmsg) + end + + local function tellc() + return tryc() or parse_error("unexpected termination") + end + + local function spaces() -- skip spaces and prepare the next char + while true do + pos = match(json, '^[ \n\r\t]*()', pos) + if pos <= jsonlen then + return + end + if jsonlen == 0 then + parse_error("unexpected termination") + end + jsonnxt() + end + end + + --[[ + Invalid + --]] + local function f_err() + parse_error('invalid value') + end + + --[[ + Constants + --]] + -- fallback slow constants parser + local function generic_constant(target, targetlen, ret, sax_f) + for i = 1, targetlen do + local c = tellc() + if byte(target, i) ~= c then + parse_error("invalid char") + end + pos = pos + 1 + end + return sax_f(ret) + end + + -- null + local function f_nul() + if sub(json, pos, pos + 2) == 'ull' then + pos = pos + 3 + return sax_null(nil) + end + return generic_constant('ull', 3, nil, sax_null) + end + + -- false + local function f_fls() + if sub(json, pos, pos + 3) == 'alse' then + pos = pos + 4 + return sax_boolean(false) + end + return generic_constant('alse', 4, false, sax_boolean) + end + + -- true + local function f_tru() + if sub(json, pos, pos + 2) == 'rue' then + pos = pos + 3 + return sax_boolean(true) + end + return generic_constant('rue', 3, true, sax_boolean) + end + + --[[ + Numbers + Conceptually, the longest prefix that matches to `[-+.0-9A-Za-z]+` (in regexp) + is captured as a number and its conformance to the JSON spec is checked. + --]] + -- deal with non-standard locales + local radixmark = match(tostring(0.5), '[^0-9]') + local fixedtonumber = tonumber + if radixmark ~= '.' then + if find(radixmark, '%W') then + radixmark = '%' .. radixmark + end + fixedtonumber = function(s) + return tonumber(gsub(s, '.', radixmark)) + end + end + + local function number_error() + return parse_error('invalid number') + end + + -- fallback slow parser + local function generic_number(mns) + local buf = {} + local i = 1 + local is_int = true + + local c = byte(json, pos) + pos = pos + 1 + + local function nxt() + buf[i] = c + i = i + 1 + c = tryc() + pos = pos + 1 + end + + if c == 0x30 then + nxt() + if c and 0x30 <= c and c < 0x3A then + number_error() + end + else + repeat nxt() until not (c and 0x30 <= c and c < 0x3A) + end + if c == 0x2E then + is_int = false + nxt() + if not (c and 0x30 <= c and c < 0x3A) then + number_error() + end + repeat nxt() until not (c and 0x30 <= c and c < 0x3A) + end + if c == 0x45 or c == 0x65 then + is_int = false + nxt() + if c == 0x2B or c == 0x2D then + nxt() + end + if not (c and 0x30 <= c and c < 0x3A) then + number_error() + end + repeat nxt() until not (c and 0x30 <= c and c < 0x3A) + end + if c and (0x41 <= c and c <= 0x5B or + 0x61 <= c and c <= 0x7B or + c == 0x2B or c == 0x2D or c == 0x2E) then + number_error() + end + pos = pos - 1 + + local num = char(unpack(buf)) + num = fixedtonumber(num) + if mns then + num = -num + if num == mininteger and is_int then + num = mininteger + end + end + return sax_number(num) + end + + -- `0(\.[0-9]*)?([eE][+-]?[0-9]*)?` + local function f_zro(mns) + local num, c = match(json, '^(%.?[0-9]*)([-+.A-Za-z]?)', pos) -- skipping 0 + + if num == '' then + if pos > jsonlen then + pos = pos - 1 + return generic_number(mns) + end + if c == '' then + if mns then + return sax_number(-0.0) + end + return sax_number(0) + end + + if c == 'e' or c == 'E' then + num, c = match(json, '^([^eE]*[eE][-+]?[0-9]+)([-+.A-Za-z]?)', pos) + if c == '' then + pos = pos + #num + if pos > jsonlen then + pos = pos - #num - 1 + return generic_number(mns) + end + if mns then + return sax_number(-0.0) + end + return sax_number(0.0) + end + end + pos = pos - 1 + return generic_number(mns) + end + + if byte(num) ~= 0x2E or byte(num, -1) == 0x2E then + pos = pos - 1 + return generic_number(mns) + end + + if c ~= '' then + if c == 'e' or c == 'E' then + num, c = match(json, '^([^eE]*[eE][-+]?[0-9]+)([-+.A-Za-z]?)', pos) + end + if c ~= '' then + pos = pos - 1 + return generic_number(mns) + end + end + + pos = pos + #num + if pos > jsonlen then + pos = pos - #num - 1 + return generic_number(mns) + end + c = fixedtonumber(num) + + if mns then + c = -c + end + return sax_number(c) + end + + -- `[1-9][0-9]*(\.[0-9]*)?([eE][+-]?[0-9]*)?` + local function f_num(mns) + pos = pos - 1 + local num, c = match(json, '^([0-9]+%.?[0-9]*)([-+.A-Za-z]?)', pos) + if byte(num, -1) == 0x2E then -- error if ended with period + return generic_number(mns) + end + + if c ~= '' then + if c ~= 'e' and c ~= 'E' then + return generic_number(mns) + end + num, c = match(json, '^([^eE]*[eE][-+]?[0-9]+)([-+.A-Za-z]?)', pos) + if not num or c ~= '' then + return generic_number(mns) + end + end + + pos = pos + #num + if pos > jsonlen then + pos = pos - #num + return generic_number(mns) + end + c = fixedtonumber(num) + + if mns then + c = -c + if c == mininteger and not find(num, '[^0-9]') then + c = mininteger + end + end + return sax_number(c) + end + + -- skip minus sign + local function f_mns() + local c = byte(json, pos) or tellc() + if c then + pos = pos + 1 + if c > 0x30 then + if c < 0x3A then + return f_num(true) + end + else + if c > 0x2F then + return f_zro(true) + end + end + end + parse_error("invalid number") + end + + --[[ + Strings + --]] + local f_str_hextbl = { + 0x0, + 0x1, + 0x2, + 0x3, + 0x4, + 0x5, + 0x6, + 0x7, + 0x8, + 0x9, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + 0xA, + 0xB, + 0xC, + 0xD, + 0xE, + 0xF, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + 0xA, + 0xB, + 0xC, + 0xD, + 0xE, + 0xF, + __index = function() + return inf + end + } + setmetatable(f_str_hextbl, f_str_hextbl) + + local f_str_escapetbl = { + ['"'] = '"', + ['\\'] = '\\', + ['/'] = '/', + ['b'] = '\b', + ['f'] = '\f', + ['n'] = '\n', + ['r'] = '\r', + ['t'] = '\t', + __index = function() + parse_error("invalid escape sequence") + end + } + setmetatable(f_str_escapetbl, f_str_escapetbl) + + local function surrogate_first_error() + return parse_error("1st surrogate pair byte not continued by 2nd") + end + + local f_str_surrogate_prev = 0 + local function f_str_subst(ch, ucode) + if ch == 'u' then + local c1, c2, c3, c4, rest = byte(ucode, 1, 5) + ucode = f_str_hextbl[c1 - 47] * 0x1000 + + f_str_hextbl[c2 - 47] * 0x100 + + f_str_hextbl[c3 - 47] * 0x10 + + f_str_hextbl[c4 - 47] + if ucode ~= inf then + if ucode < 0x80 then -- 1byte + if rest then + return char(ucode, rest) + end + return char(ucode) + elseif ucode < 0x800 then -- 2bytes + c1 = floor(ucode / 0x40) + c2 = ucode - c1 * 0x40 + c1 = c1 + 0xC0 + c2 = c2 + 0x80 + if rest then + return char(c1, c2, rest) + end + return char(c1, c2) + elseif ucode < 0xD800 or 0xE000 <= ucode then -- 3bytes + c1 = floor(ucode / 0x1000) + ucode = ucode - c1 * 0x1000 + c2 = floor(ucode / 0x40) + c3 = ucode - c2 * 0x40 + c1 = c1 + 0xE0 + c2 = c2 + 0x80 + c3 = c3 + 0x80 + if rest then + return char(c1, c2, c3, rest) + end + return char(c1, c2, c3) + elseif 0xD800 <= ucode and ucode < 0xDC00 then -- surrogate pair 1st + if f_str_surrogate_prev == 0 then + f_str_surrogate_prev = ucode + if not rest then + return '' + end + surrogate_first_error() + end + f_str_surrogate_prev = 0 + surrogate_first_error() + else -- surrogate pair 2nd + if f_str_surrogate_prev ~= 0 then + ucode = 0x10000 + + (f_str_surrogate_prev - 0xD800) * 0x400 + + (ucode - 0xDC00) + f_str_surrogate_prev = 0 + c1 = floor(ucode / 0x40000) + ucode = ucode - c1 * 0x40000 + c2 = floor(ucode / 0x1000) + ucode = ucode - c2 * 0x1000 + c3 = floor(ucode / 0x40) + c4 = ucode - c3 * 0x40 + c1 = c1 + 0xF0 + c2 = c2 + 0x80 + c3 = c3 + 0x80 + c4 = c4 + 0x80 + if rest then + return char(c1, c2, c3, c4, rest) + end + return char(c1, c2, c3, c4) + end + parse_error("2nd surrogate pair byte appeared without 1st") + end + end + parse_error("invalid unicode codepoint literal") + end + if f_str_surrogate_prev ~= 0 then + f_str_surrogate_prev = 0 + surrogate_first_error() + end + return f_str_escapetbl[ch] .. ucode + end + + local function f_str(iskey) + local pos2 = pos + local newpos + local str = '' + local bs + while true do + while true do -- search '\' or '"' + newpos = find(json, '[\\"]', pos2) + if newpos then + break + end + str = str .. sub(json, pos, jsonlen) + if pos2 == jsonlen + 2 then + pos2 = 2 + else + pos2 = 1 + end + jsonnxt() + if jsonlen == 0 then + parse_error("unterminated string") + end + end + if byte(json, newpos) == 0x22 then -- break if '"' + break + end + pos2 = newpos + 2 -- skip '\' + bs = true -- mark the existence of a backslash + end + str = str .. sub(json, pos, newpos - 1) + pos = newpos + 1 + + if find(str, f_str_ctrl_pat) then + parse_error("unescaped control string") + end + if bs then -- a backslash exists + -- We need to grab 4 characters after the escape char, + -- for encoding unicode codepoint to UTF-8. + -- As we need to ensure that every first surrogate pair byte is + -- immediately followed by second one, we grab upto 5 characters and + -- check the last for this purpose. + str = gsub(str, '\\(.)([^\\]?[^\\]?[^\\]?[^\\]?[^\\]?)', f_str_subst) + if f_str_surrogate_prev ~= 0 then + f_str_surrogate_prev = 0 + parse_error("1st surrogate pair byte not continued by 2nd") + end + end + + if iskey then + return sax_key(str) + end + return sax_string(str) + end + + --[[ + Arrays, Objects + --]] + -- arrays + local function f_ary() + rec_depth = rec_depth + 1 + if rec_depth > 1000 then + parse_error('too deeply nested json (> 1000)') + end + sax_startarray() + + spaces() + if byte(json, pos) == 0x5D then -- check closing bracket ']' which means the array empty + pos = pos + 1 + else + local newpos + while true do + f = dispatcher[byte(json, pos)] -- parse value + pos = pos + 1 + f() + newpos = match(json, '^[ \n\r\t]*,[ \n\r\t]*()', pos) -- check comma + if newpos then + pos = newpos + else + newpos = match(json, '^[ \n\r\t]*%]()', pos) -- check closing bracket + if newpos then + pos = newpos + break + end + spaces() -- since the current chunk can be ended, skip spaces toward following chunks + local c = byte(json, pos) + pos = pos + 1 + if c == 0x2C then -- check comma again + spaces() + elseif c == 0x5D then -- check closing bracket again + break + else + parse_error("no closing bracket of an array") + end + end + if pos > jsonlen then + spaces() + end + end + end + + rec_depth = rec_depth - 1 + return sax_endarray() + end + + -- objects + local function f_obj() + rec_depth = rec_depth + 1 + if rec_depth > 1000 then + parse_error('too deeply nested json (> 1000)') + end + sax_startobject() + + spaces() + if byte(json, pos) == 0x7D then -- check closing bracket '}' which means the object empty + pos = pos + 1 + else + local newpos + while true do + if byte(json, pos) ~= 0x22 then + parse_error("not key") + end + pos = pos + 1 + f_str(true) -- parse key + newpos = match(json, '^[ \n\r\t]*:[ \n\r\t]*()', pos) -- check colon + if newpos then + pos = newpos + else + spaces() -- read spaces through chunks + if byte(json, pos) ~= 0x3A then -- check colon again + parse_error("no colon after a key") + end + pos = pos + 1 + spaces() + end + if pos > jsonlen then + spaces() + end + f = dispatcher[byte(json, pos)] + pos = pos + 1 + f() -- parse value + newpos = match(json, '^[ \n\r\t]*,[ \n\r\t]*()', pos) -- check comma + if newpos then + pos = newpos + else + newpos = match(json, '^[ \n\r\t]*}()', pos) -- check closing bracket + if newpos then + pos = newpos + break + end + spaces() -- read spaces through chunks + local c = byte(json, pos) + pos = pos + 1 + if c == 0x2C then -- check comma again + spaces() + elseif c == 0x7D then -- check closing bracket again + break + else + parse_error("no closing bracket of an object") + end + end + if pos > jsonlen then + spaces() + end + end + end + + rec_depth = rec_depth - 1 + return sax_endobject() + end + + --[[ + The jump table to dispatch a parser for a value, + indexed by the code of the value's first char. + Key should be non-nil. + --]] + dispatcher = { + [0] = + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_str, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_mns, + f_err, + f_err, + f_zro, + f_num, + f_num, + f_num, + f_num, + f_num, + f_num, + f_num, + f_num, + f_num, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_ary, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_fls, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_nul, + f_err, + f_err, + f_err, + f_err, + f_err, + f_tru, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_obj, + f_err, + f_err, + f_err, + f_err, + } + + --[[ + public funcitons + --]] + local function run() + rec_depth = 0 + spaces() + f = dispatcher[byte(json, pos)] + pos = pos + 1 + f() + end + + local function read(n) + if n < 0 then + error("the argument must be non-negative") + end + local pos2 = (pos - 1) + n + local str = sub(json, pos, pos2) + while pos2 > jsonlen and jsonlen ~= 0 do + jsonnxt() + pos2 = pos2 - (jsonlen - (pos - 1)) + str = str .. sub(json, pos, pos2) + end + if jsonlen ~= 0 then + pos = pos2 + 1 + end + return str + end + + local function tellpos() + return acc + pos + end + + return { + run = run, + tryc = tryc, + read = read, + tellpos = tellpos, + } + end + + local function newfileparser(fn, saxtbl) + local fp = open(fn) + local function gen() + local s + if fp then + s = fp:read(8192) + if not s then + fp:close() + fp = nil + end + end + return s + end + return newparser(gen, saxtbl) + end + + return { + newparser = newparser, + newfileparser = newfileparser + } + end +end + +do + local _ENV = _ENV + package.preload["utils"] = function(...) + local arg = _G.arg; + local module = {} + + function module.tablelength(T) + local count = 0 + for _ in pairs(T) do count = count + 1 end + return count + end + + module.id_number = 0 + function module.next_id(length) + module.id_number = module.id_number + 1 + return string.format(string.format('%%0%dd', length), module.id_number) + end + + local function url_encode_char(chr) + return string.format("%%%X", string.byte(chr)) + end + + function module.urlencode(str) + local output, t = string.gsub(str, "[^%w]", url_encode_char) + return output + end + + function module.xmlescape(str) + return string.gsub(str, '["<>&]', { ['&'] = '&', ['<'] = '<', ['>'] = '>', ['"'] = '"' }) + end + + function module.trim(s) + return (s:gsub("^%s*(.-)%s*$", "%1")) + end + + function module.deepcopy(orig) + local orig_type = type(orig) + local copy + if orig_type == 'table' then + copy = {} + for orig_key, orig_value in next, orig, nil do + copy[module.deepcopy(orig_key)] = module.deepcopy(orig_value) + end + setmetatable(copy, module.deepcopy(getmetatable(orig))) + else -- number, string, boolean, etc + copy = orig + end + return copy + end + + function module.trim(s) + if s == nil then + return s + end + return (s:gsub("^%s*(.-)%s*$", "%1")) + end + + return module + end +end + +do + local _ENV = _ENV + package.preload["zotero"] = function(...) + local arg = _G.arg; + local module = {} + + local utils = require('utils') + local json = require('lunajson') + -- local pl = require('pl.pretty') -- for pl.pretty.dump + + local state = { + reported = {}, + } + + module.citekeys = {} + + function module.authors(csl_or_item) + local authors = {} + local author + + if csl_or_item.author ~= nil then + for _, author in ipairs(csl_or_item.author) do + if author.literal ~= nil then + table.insert(authors, author.literal) + elseif author.family ~= nil then + table.insert(authors, author.family) + end + end + elseif csl_or_item.creators ~= nil then + for _, author in ipairs(csl_or_item.creators) do + if author.name ~= nil then + table.insert(authors, author.name) + elseif author.lastName ~= nil then + table.insert(authors, author.lastName) + end + end + elseif csl_or_item.reporter ~= nil then + table.insert(authors, csl_or_item.reporter) + end + + if utils.tablelength(authors) == 0 then + return nil + end + + local last = table.remove(authors) + if utils.tablelength(authors) == 0 then + return last + end + authors = table.concat(authors, ', ') + return table.concat({ authors, last }, ' and ') + end + + local function load_items() + if state.fetched ~= nil then + return + end + + state.fetched = { + items = {}, + errors = {}, + } + + local citekeys = {} + for k, _ in pairs(module.citekeys) do + table.insert(citekeys, k) + end + + if utils.tablelength(citekeys) == 0 then + return + end + + citekeys = table.concat(citekeys, ',') + local url = module.url .. utils.urlencode(citekeys) + local mt, contents = pandoc.mediabag.fetch(url, '.') + local ok, fetched = pcall(json.decode, contents) + if not ok then + print('could not fetch Zotero items: ' .. contents) + return + end + state.fetched = fetched + end + + function module.get(citekey) + load_items() + + if state.reported[citekey] ~= nil then + return nil + end + + if state.fetched.errors[citekey] ~= nil then + state.reported[citekey] = true + print('@' .. citekey .. ': ' .. state.fetched.errors[citekey]) + return nil + end + + if state.fetched.items[citekey] == nil then + state.reported[citekey] = true + print('@' .. citekey .. ' not in Zotero') + return nil + end + + return state.fetched.items[citekey], state.fetched.zotero[citekey] + end + + return module + end +end + +-- +-- bbt-to-live-doc +-- +-- Copyright (c) 2020 Emiliano Heyns +-- +-- Permission is hereby granted, free of charge, to any person obtaining a copy of +-- this software and associated documentation files (the "Software"), to deal in +-- the Software without restriction, including without limitation the rights to +-- use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +-- of the Software, and to permit persons to whom the Software is furnished to do +-- so, subject to the following conditions: +-- +-- The above copyright notice and this permission notice shall be included in all +-- copies or substantial portions of the Software. +-- +-- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +-- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +-- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +-- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +-- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +-- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +-- SOFTWARE. +-- + +if lpeg == nil then + print('upgrade pandoc to version 2.16.2 or later') + os.exit() +end + +local json = require('lunajson') +local csl_locator = require('locator') +local utils = require('utils') +local zotero = require('zotero') + +-- -- global state -- -- +local config = { + client = 'zotero', + scannable_cite = false, + csl_style = 'apa7', + format = nil, -- more to document than anything else -- Lua does not store nils in tables + transferable = false +} + +-- -- -- bibliography marker generator -- -- -- +function zotero_docpreferences_odt(csl_style) + return string.format( + '' + .. ' ' + .. '