Marty Oehme
8cd6dc790d
Instead of it lying in the src directory (where, reasonably, only python data extraction, processing and modelling code should lie), I turned the retorque filter into a simple quarto extension. Could reasonably be repackaged separately from this repo since I believe other people could profit off it.
2524 lines
57 KiB
Lua
2524 lines
57 KiB
Lua
local pandoc = require("pandoc")
|
|
print("zotero-live-citations cd1ef3f")
|
|
local mt, latest = pandoc.mediabag.fetch("https://retorque.re/zotero-better-bibtex/exporting/zotero.lua.revision")
|
|
latest = string.sub(latest, 1, 10)
|
|
if "cd1ef3f" ~= latest then
|
|
print('new version "' .. latest .. '" available at https://retorque.re/zotero-better-bibtex/exporting')
|
|
end
|
|
|
|
do
|
|
local _ENV = _ENV
|
|
package.preload["locator"] = function(...)
|
|
local arg = _G.arg
|
|
local utils = require("utils")
|
|
-- local lpeg = require('lpeg')
|
|
|
|
local book = (lpeg.P("book") + lpeg.P("bk.") + lpeg.P("bks.")) / "book"
|
|
local chapter = (lpeg.P("chapter") + lpeg.P("chap.") + lpeg.P("chaps.")) / "chapter"
|
|
local column = (lpeg.P("column") + lpeg.P("col.") + lpeg.P("cols.")) / "column"
|
|
local figure = (lpeg.P("figure") + lpeg.P("fig.") + lpeg.P("figs.")) / "figure"
|
|
local folio = (lpeg.P("folio") + lpeg.P("fol.") + lpeg.P("fols.")) / "folio"
|
|
local number = (lpeg.P("number") + lpeg.P("no.") + lpeg.P("nos.")) / "number"
|
|
local line = (lpeg.P("line") + lpeg.P("l.") + lpeg.P("ll.")) / "line"
|
|
local note = (lpeg.P("note") + lpeg.P("n.") + lpeg.P("nn.")) / "note"
|
|
local opus = (lpeg.P("opus") + lpeg.P("op.") + lpeg.P("opp.")) / "opus"
|
|
local page = (lpeg.P("page") + lpeg.P("p.") + lpeg.P("pp.")) / "page"
|
|
local paragraph = (
|
|
lpeg.P("paragraph")
|
|
+ lpeg.P("para.")
|
|
+ lpeg.P("paras.")
|
|
+ lpeg.P("¶¶")
|
|
+ lpeg.P("¶")
|
|
) / "paragraph"
|
|
local part = (lpeg.P("part") + lpeg.P("pt.") + lpeg.P("pts.")) / "part"
|
|
local section = (lpeg.P("section") + lpeg.P("sec.") + lpeg.P("secs.") + lpeg.P("§§") + lpeg.P("§"))
|
|
/ "section"
|
|
local subverbo = (lpeg.P("sub verbo") + lpeg.P("s.v.") + lpeg.P("s.vv.")) / "sub verbo"
|
|
local verse = (lpeg.P("verse") + lpeg.P("v.") + lpeg.P("vv.")) / "verse"
|
|
local volume = (lpeg.P("volume") + lpeg.P("vol.") + lpeg.P("vols.")) / "volume"
|
|
local label = book
|
|
+ chapter
|
|
+ column
|
|
+ figure
|
|
+ folio
|
|
+ number
|
|
+ line
|
|
+ note
|
|
+ opus
|
|
+ page
|
|
+ paragraph
|
|
+ part
|
|
+ section
|
|
+ subverbo
|
|
+ verse
|
|
+ volume
|
|
|
|
local whitespace = lpeg.P(" ") ^ 0
|
|
local nonspace = lpeg.P(1) - lpeg.S(" ")
|
|
local nonbrace = lpeg.P(1) - lpeg.S("{}")
|
|
|
|
local word = nonspace ^ 1 / 1
|
|
-- local roman = lpeg.S('IiVvXxLlCcDdMm]')^1
|
|
local number = lpeg.R("09") ^ 1 -- + roman
|
|
|
|
local numbers = number * (whitespace * lpeg.S("-") ^ 1 * whitespace * number) ^ -1
|
|
local ranges = (numbers * (whitespace * lpeg.P(",") * whitespace * numbers) ^ 0) / 1
|
|
|
|
-- local braced_locator = lpeg.P('{') * lpeg.Cs(label + lpeg.Cc('page')) * whitespace * lpeg.C(nonbrace^1) * lpeg.P('}')
|
|
local braced_locator = lpeg.P("{") * label * whitespace * lpeg.C(nonbrace ^ 1) * lpeg.P("}")
|
|
local braced_implicit_locator = lpeg.P("{") * lpeg.Cc("page") * lpeg.Cs(numbers) * lpeg.P("}")
|
|
local locator = braced_locator
|
|
+ braced_implicit_locator
|
|
+ (label * whitespace * ranges)
|
|
+ (label * whitespace * word)
|
|
+ (lpeg.Cc("page") * ranges)
|
|
local remainder = lpeg.C(lpeg.P(1) ^ 0)
|
|
|
|
local suffix = lpeg.C(lpeg.P(",") ^ -1 * whitespace) * locator * remainder
|
|
|
|
local pseudo_locator = lpeg.C(lpeg.P(",") ^ -1 * whitespace)
|
|
* lpeg.P("{")
|
|
* lpeg.C(nonbrace ^ 0)
|
|
* lpeg.P("}")
|
|
* remainder
|
|
|
|
local module = {}
|
|
|
|
function module.parse(input)
|
|
local parsed, _prefix, _label, _locator, _suffix
|
|
|
|
parsed = lpeg.Ct(suffix):match(input)
|
|
if parsed then
|
|
_prefix, _label, _locator, _suffix = table.unpack(parsed)
|
|
else
|
|
parsed = lpeg.Ct(pseudo_locator):match(input)
|
|
if parsed then
|
|
_label = "page"
|
|
_prefix, _locator, _suffix = table.unpack(parsed)
|
|
else
|
|
return nil, nil, input
|
|
end
|
|
end
|
|
|
|
if utils.trim(_prefix) == "," then
|
|
_prefix = ""
|
|
end
|
|
local _space = ""
|
|
if utils.trim(_prefix) ~= _prefix then
|
|
_space = " "
|
|
end
|
|
|
|
_prefix = utils.trim(_prefix)
|
|
_label = utils.trim(_label)
|
|
_locator = utils.trim(_locator)
|
|
_suffix = utils.trim(_suffix)
|
|
|
|
return _label, _locator, utils.trim(_prefix .. _space .. _suffix)
|
|
end
|
|
|
|
return module
|
|
end
|
|
end
|
|
|
|
do
|
|
local _ENV = _ENV
|
|
package.preload["lunajson"] = function(...)
|
|
local arg = _G.arg
|
|
local newdecoder = require("lunajson.decoder")
|
|
local newencoder = require("lunajson.encoder")
|
|
local sax = require("lunajson.sax")
|
|
-- If you need multiple contexts of decoder and/or encoder,
|
|
-- you can require lunajson.decoder and/or lunajson.encoder directly.
|
|
return {
|
|
decode = newdecoder(),
|
|
encode = newencoder(),
|
|
newparser = sax.newparser,
|
|
newfileparser = sax.newfileparser,
|
|
}
|
|
end
|
|
end
|
|
|
|
do
|
|
local _ENV = _ENV
|
|
package.preload["lunajson.decoder"] = function(...)
|
|
local arg = _G.arg
|
|
local setmetatable, tonumber, tostring = setmetatable, tonumber, tostring
|
|
local floor, inf = math.floor, math.huge
|
|
local mininteger, tointeger = math.mininteger or nil, math.tointeger or nil
|
|
local byte, char, find, gsub, match, sub =
|
|
string.byte, string.char, string.find, string.gsub, string.match, string.sub
|
|
|
|
local function _decode_error(pos, errmsg)
|
|
error("parse error at " .. pos .. ": " .. errmsg, 2)
|
|
end
|
|
|
|
local f_str_ctrl_pat
|
|
if _VERSION == "Lua 5.1" then
|
|
-- use the cluttered pattern because lua 5.1 does not handle \0 in a pattern correctly
|
|
f_str_ctrl_pat = "[^\32-\255]"
|
|
else
|
|
f_str_ctrl_pat = "[\0-\31]"
|
|
end
|
|
|
|
local _ENV = nil
|
|
|
|
local function newdecoder()
|
|
local json, pos, nullv, arraylen, rec_depth
|
|
|
|
-- `f` is the temporary for dispatcher[c] and
|
|
-- the dummy for the first return value of `find`
|
|
local dispatcher, f
|
|
|
|
--[[
|
|
Helper
|
|
--]]
|
|
local function decode_error(errmsg)
|
|
return _decode_error(pos, errmsg)
|
|
end
|
|
|
|
--[[
|
|
Invalid
|
|
--]]
|
|
local function f_err()
|
|
decode_error("invalid value")
|
|
end
|
|
|
|
--[[
|
|
Constants
|
|
--]]
|
|
-- null
|
|
local function f_nul()
|
|
if sub(json, pos, pos + 2) == "ull" then
|
|
pos = pos + 3
|
|
return nullv
|
|
end
|
|
decode_error("invalid value")
|
|
end
|
|
|
|
-- false
|
|
local function f_fls()
|
|
if sub(json, pos, pos + 3) == "alse" then
|
|
pos = pos + 4
|
|
return false
|
|
end
|
|
decode_error("invalid value")
|
|
end
|
|
|
|
-- true
|
|
local function f_tru()
|
|
if sub(json, pos, pos + 2) == "rue" then
|
|
pos = pos + 3
|
|
return true
|
|
end
|
|
decode_error("invalid value")
|
|
end
|
|
|
|
--[[
|
|
Numbers
|
|
Conceptually, the longest prefix that matches to `[-+.0-9A-Za-z]+` (in regexp)
|
|
is captured as a number and its conformance to the JSON spec is checked.
|
|
--]]
|
|
-- deal with non-standard locales
|
|
local radixmark = match(tostring(0.5), "[^0-9]")
|
|
local fixedtonumber = tonumber
|
|
if radixmark ~= "." then
|
|
if find(radixmark, "%W") then
|
|
radixmark = "%" .. radixmark
|
|
end
|
|
fixedtonumber = function(s)
|
|
return tonumber(gsub(s, ".", radixmark))
|
|
end
|
|
end
|
|
|
|
local function number_error()
|
|
return decode_error("invalid number")
|
|
end
|
|
|
|
-- `0(\.[0-9]*)?([eE][+-]?[0-9]*)?`
|
|
local function f_zro(mns)
|
|
local num, c = match(json, "^(%.?[0-9]*)([-+.A-Za-z]?)", pos) -- skipping 0
|
|
|
|
if num == "" then
|
|
if c == "" then
|
|
if mns then
|
|
return -0.0
|
|
end
|
|
return 0
|
|
end
|
|
|
|
if c == "e" or c == "E" then
|
|
num, c = match(json, "^([^eE]*[eE][-+]?[0-9]+)([-+.A-Za-z]?)", pos)
|
|
if c == "" then
|
|
pos = pos + #num
|
|
if mns then
|
|
return -0.0
|
|
end
|
|
return 0.0
|
|
end
|
|
end
|
|
number_error()
|
|
end
|
|
|
|
if byte(num) ~= 0x2E or byte(num, -1) == 0x2E then
|
|
number_error()
|
|
end
|
|
|
|
if c ~= "" then
|
|
if c == "e" or c == "E" then
|
|
num, c = match(json, "^([^eE]*[eE][-+]?[0-9]+)([-+.A-Za-z]?)", pos)
|
|
end
|
|
if c ~= "" then
|
|
number_error()
|
|
end
|
|
end
|
|
|
|
pos = pos + #num
|
|
c = fixedtonumber(num)
|
|
|
|
if mns then
|
|
c = -c
|
|
end
|
|
return c
|
|
end
|
|
|
|
-- `[1-9][0-9]*(\.[0-9]*)?([eE][+-]?[0-9]*)?`
|
|
local function f_num(mns)
|
|
pos = pos - 1
|
|
local num, c = match(json, "^([0-9]+%.?[0-9]*)([-+.A-Za-z]?)", pos)
|
|
if byte(num, -1) == 0x2E then -- error if ended with period
|
|
number_error()
|
|
end
|
|
|
|
if c ~= "" then
|
|
if c ~= "e" and c ~= "E" then
|
|
number_error()
|
|
end
|
|
num, c = match(json, "^([^eE]*[eE][-+]?[0-9]+)([-+.A-Za-z]?)", pos)
|
|
if not num or c ~= "" then
|
|
number_error()
|
|
end
|
|
end
|
|
|
|
pos = pos + #num
|
|
c = fixedtonumber(num)
|
|
|
|
if mns then
|
|
c = -c
|
|
if c == mininteger and not find(num, "[^0-9]") then
|
|
c = mininteger
|
|
end
|
|
end
|
|
return c
|
|
end
|
|
|
|
-- skip minus sign
|
|
local function f_mns()
|
|
local c = byte(json, pos)
|
|
if c then
|
|
pos = pos + 1
|
|
if c > 0x30 then
|
|
if c < 0x3A then
|
|
return f_num(true)
|
|
end
|
|
else
|
|
if c > 0x2F then
|
|
return f_zro(true)
|
|
end
|
|
end
|
|
end
|
|
decode_error("invalid number")
|
|
end
|
|
|
|
--[[
|
|
Strings
|
|
--]]
|
|
local f_str_hextbl = {
|
|
0x0,
|
|
0x1,
|
|
0x2,
|
|
0x3,
|
|
0x4,
|
|
0x5,
|
|
0x6,
|
|
0x7,
|
|
0x8,
|
|
0x9,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
0xA,
|
|
0xB,
|
|
0xC,
|
|
0xD,
|
|
0xE,
|
|
0xF,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
0xA,
|
|
0xB,
|
|
0xC,
|
|
0xD,
|
|
0xE,
|
|
0xF,
|
|
__index = function()
|
|
return inf
|
|
end,
|
|
}
|
|
setmetatable(f_str_hextbl, f_str_hextbl)
|
|
|
|
local f_str_escapetbl = {
|
|
['"'] = '"',
|
|
["\\"] = "\\",
|
|
["/"] = "/",
|
|
["b"] = "\b",
|
|
["f"] = "\f",
|
|
["n"] = "\n",
|
|
["r"] = "\r",
|
|
["t"] = "\t",
|
|
__index = function()
|
|
decode_error("invalid escape sequence")
|
|
end,
|
|
}
|
|
setmetatable(f_str_escapetbl, f_str_escapetbl)
|
|
|
|
local function surrogate_first_error()
|
|
return decode_error("1st surrogate pair byte not continued by 2nd")
|
|
end
|
|
|
|
local f_str_surrogate_prev = 0
|
|
local function f_str_subst(ch, ucode)
|
|
if ch == "u" then
|
|
local c1, c2, c3, c4, rest = byte(ucode, 1, 5)
|
|
ucode = f_str_hextbl[c1 - 47] * 0x1000
|
|
+ f_str_hextbl[c2 - 47] * 0x100
|
|
+ f_str_hextbl[c3 - 47] * 0x10
|
|
+ f_str_hextbl[c4 - 47]
|
|
if ucode ~= inf then
|
|
if ucode < 0x80 then -- 1byte
|
|
if rest then
|
|
return char(ucode, rest)
|
|
end
|
|
return char(ucode)
|
|
elseif ucode < 0x800 then -- 2bytes
|
|
c1 = floor(ucode / 0x40)
|
|
c2 = ucode - c1 * 0x40
|
|
c1 = c1 + 0xC0
|
|
c2 = c2 + 0x80
|
|
if rest then
|
|
return char(c1, c2, rest)
|
|
end
|
|
return char(c1, c2)
|
|
elseif ucode < 0xD800 or 0xE000 <= ucode then -- 3bytes
|
|
c1 = floor(ucode / 0x1000)
|
|
ucode = ucode - c1 * 0x1000
|
|
c2 = floor(ucode / 0x40)
|
|
c3 = ucode - c2 * 0x40
|
|
c1 = c1 + 0xE0
|
|
c2 = c2 + 0x80
|
|
c3 = c3 + 0x80
|
|
if rest then
|
|
return char(c1, c2, c3, rest)
|
|
end
|
|
return char(c1, c2, c3)
|
|
elseif 0xD800 <= ucode and ucode < 0xDC00 then -- surrogate pair 1st
|
|
if f_str_surrogate_prev == 0 then
|
|
f_str_surrogate_prev = ucode
|
|
if not rest then
|
|
return ""
|
|
end
|
|
surrogate_first_error()
|
|
end
|
|
f_str_surrogate_prev = 0
|
|
surrogate_first_error()
|
|
else -- surrogate pair 2nd
|
|
if f_str_surrogate_prev ~= 0 then
|
|
ucode = 0x10000 + (f_str_surrogate_prev - 0xD800) * 0x400 + (ucode - 0xDC00)
|
|
f_str_surrogate_prev = 0
|
|
c1 = floor(ucode / 0x40000)
|
|
ucode = ucode - c1 * 0x40000
|
|
c2 = floor(ucode / 0x1000)
|
|
ucode = ucode - c2 * 0x1000
|
|
c3 = floor(ucode / 0x40)
|
|
c4 = ucode - c3 * 0x40
|
|
c1 = c1 + 0xF0
|
|
c2 = c2 + 0x80
|
|
c3 = c3 + 0x80
|
|
c4 = c4 + 0x80
|
|
if rest then
|
|
return char(c1, c2, c3, c4, rest)
|
|
end
|
|
return char(c1, c2, c3, c4)
|
|
end
|
|
decode_error("2nd surrogate pair byte appeared without 1st")
|
|
end
|
|
end
|
|
decode_error("invalid unicode codepoint literal")
|
|
end
|
|
if f_str_surrogate_prev ~= 0 then
|
|
f_str_surrogate_prev = 0
|
|
surrogate_first_error()
|
|
end
|
|
return f_str_escapetbl[ch] .. ucode
|
|
end
|
|
|
|
-- caching interpreted keys for speed
|
|
local f_str_keycache = setmetatable({}, { __mode = "v" })
|
|
|
|
local function f_str(iskey)
|
|
local newpos = pos
|
|
local tmppos, c1, c2
|
|
repeat
|
|
newpos = find(json, '"', newpos, true) -- search '"'
|
|
if not newpos then
|
|
decode_error("unterminated string")
|
|
end
|
|
tmppos = newpos - 1
|
|
newpos = newpos + 1
|
|
c1, c2 = byte(json, tmppos - 1, tmppos)
|
|
if c2 == 0x5C and c1 == 0x5C then -- skip preceding '\\'s
|
|
repeat
|
|
tmppos = tmppos - 2
|
|
c1, c2 = byte(json, tmppos - 1, tmppos)
|
|
until c2 ~= 0x5C or c1 ~= 0x5C
|
|
tmppos = newpos - 2
|
|
end
|
|
until c2 ~= 0x5C -- leave if '"' is not preceded by '\'
|
|
|
|
local str = sub(json, pos, tmppos)
|
|
pos = newpos
|
|
|
|
if iskey then -- check key cache
|
|
tmppos = f_str_keycache[str] -- reuse tmppos for cache key/val
|
|
if tmppos then
|
|
return tmppos
|
|
end
|
|
tmppos = str
|
|
end
|
|
|
|
if find(str, f_str_ctrl_pat) then
|
|
decode_error("unescaped control string")
|
|
end
|
|
if find(str, "\\", 1, true) then -- check whether a backslash exists
|
|
-- We need to grab 4 characters after the escape char,
|
|
-- for encoding unicode codepoint to UTF-8.
|
|
-- As we need to ensure that every first surrogate pair byte is
|
|
-- immediately followed by second one, we grab upto 5 characters and
|
|
-- check the last for this purpose.
|
|
str = gsub(str, "\\(.)([^\\]?[^\\]?[^\\]?[^\\]?[^\\]?)", f_str_subst)
|
|
if f_str_surrogate_prev ~= 0 then
|
|
f_str_surrogate_prev = 0
|
|
decode_error("1st surrogate pair byte not continued by 2nd")
|
|
end
|
|
end
|
|
if iskey then -- commit key cache
|
|
f_str_keycache[tmppos] = str
|
|
end
|
|
return str
|
|
end
|
|
|
|
--[[
|
|
Arrays, Objects
|
|
--]]
|
|
-- array
|
|
local function f_ary()
|
|
rec_depth = rec_depth + 1
|
|
if rec_depth > 1000 then
|
|
decode_error("too deeply nested json (> 1000)")
|
|
end
|
|
local ary = {}
|
|
|
|
pos = match(json, "^[ \n\r\t]*()", pos)
|
|
|
|
local i = 0
|
|
if byte(json, pos) == 0x5D then -- check closing bracket ']' which means the array empty
|
|
pos = pos + 1
|
|
else
|
|
local newpos = pos
|
|
repeat
|
|
i = i + 1
|
|
f = dispatcher[byte(json, newpos)] -- parse value
|
|
pos = newpos + 1
|
|
ary[i] = f()
|
|
newpos = match(json, "^[ \n\r\t]*,[ \n\r\t]*()", pos) -- check comma
|
|
until not newpos
|
|
|
|
newpos = match(json, "^[ \n\r\t]*%]()", pos) -- check closing bracket
|
|
if not newpos then
|
|
decode_error("no closing bracket of an array")
|
|
end
|
|
pos = newpos
|
|
end
|
|
|
|
if arraylen then -- commit the length of the array if `arraylen` is set
|
|
ary[0] = i
|
|
end
|
|
rec_depth = rec_depth - 1
|
|
return ary
|
|
end
|
|
|
|
-- objects
|
|
local function f_obj()
|
|
rec_depth = rec_depth + 1
|
|
if rec_depth > 1000 then
|
|
decode_error("too deeply nested json (> 1000)")
|
|
end
|
|
local obj = {}
|
|
|
|
pos = match(json, "^[ \n\r\t]*()", pos)
|
|
if byte(json, pos) == 0x7D then -- check closing bracket '}' which means the object empty
|
|
pos = pos + 1
|
|
else
|
|
local newpos = pos
|
|
|
|
repeat
|
|
if byte(json, newpos) ~= 0x22 then -- check '"'
|
|
decode_error("not key")
|
|
end
|
|
pos = newpos + 1
|
|
local key = f_str(true) -- parse key
|
|
|
|
-- optimized for compact json
|
|
-- c1, c2 == ':', <the first char of the value> or
|
|
-- c1, c2, c3 == ':', ' ', <the first char of the value>
|
|
f = f_err
|
|
local c1, c2, c3 = byte(json, pos, pos + 3)
|
|
if c1 == 0x3A then
|
|
if c2 ~= 0x20 then
|
|
f = dispatcher[c2]
|
|
newpos = pos + 2
|
|
else
|
|
f = dispatcher[c3]
|
|
newpos = pos + 3
|
|
end
|
|
end
|
|
if f == f_err then -- read a colon and arbitrary number of spaces
|
|
newpos = match(json, "^[ \n\r\t]*:[ \n\r\t]*()", pos)
|
|
if not newpos then
|
|
decode_error("no colon after a key")
|
|
end
|
|
f = dispatcher[byte(json, newpos)]
|
|
newpos = newpos + 1
|
|
end
|
|
pos = newpos
|
|
obj[key] = f() -- parse value
|
|
newpos = match(json, "^[ \n\r\t]*,[ \n\r\t]*()", pos)
|
|
until not newpos
|
|
|
|
newpos = match(json, "^[ \n\r\t]*}()", pos)
|
|
if not newpos then
|
|
decode_error("no closing bracket of an object")
|
|
end
|
|
pos = newpos
|
|
end
|
|
|
|
rec_depth = rec_depth - 1
|
|
return obj
|
|
end
|
|
|
|
--[[
|
|
The jump table to dispatch a parser for a value,
|
|
indexed by the code of the value's first char.
|
|
Nil key means the end of json.
|
|
--]]
|
|
dispatcher = {
|
|
[0] = f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_str,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_mns,
|
|
f_err,
|
|
f_err,
|
|
f_zro,
|
|
f_num,
|
|
f_num,
|
|
f_num,
|
|
f_num,
|
|
f_num,
|
|
f_num,
|
|
f_num,
|
|
f_num,
|
|
f_num,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_ary,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_fls,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_nul,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_tru,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_obj,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
__index = function()
|
|
decode_error("unexpected termination")
|
|
end,
|
|
}
|
|
setmetatable(dispatcher, dispatcher)
|
|
|
|
--[[
|
|
run decoder
|
|
--]]
|
|
local function decode(json_, pos_, nullv_, arraylen_)
|
|
json, pos, nullv, arraylen = json_, pos_, nullv_, arraylen_
|
|
rec_depth = 0
|
|
|
|
pos = match(json, "^[ \n\r\t]*()", pos)
|
|
|
|
f = dispatcher[byte(json, pos)]
|
|
pos = pos + 1
|
|
local v = f()
|
|
|
|
if pos_ then
|
|
return v, pos
|
|
else
|
|
f, pos = find(json, "^[ \n\r\t]*", pos)
|
|
if pos ~= #json then
|
|
decode_error("json ended")
|
|
end
|
|
return v
|
|
end
|
|
end
|
|
|
|
return decode
|
|
end
|
|
|
|
return newdecoder
|
|
end
|
|
end
|
|
|
|
do
|
|
local _ENV = _ENV
|
|
package.preload["lunajson.encoder"] = function(...)
|
|
local arg = _G.arg
|
|
local error = error
|
|
local byte, find, format, gsub, match = string.byte, string.find, string.format, string.gsub, string.match
|
|
local concat = table.concat
|
|
local tostring = tostring
|
|
local pairs, type = pairs, type
|
|
local setmetatable = setmetatable
|
|
local huge, tiny = 1 / 0, -1 / 0
|
|
|
|
local f_string_esc_pat
|
|
if _VERSION == "Lua 5.1" then
|
|
-- use the cluttered pattern because lua 5.1 does not handle \0 in a pattern correctly
|
|
f_string_esc_pat = "[^ -!#-[%]^-\255]"
|
|
else
|
|
f_string_esc_pat = '[\0-\31"\\]'
|
|
end
|
|
|
|
local _ENV = nil
|
|
|
|
local function newencoder()
|
|
local v, nullv
|
|
local i, builder, visited
|
|
|
|
local function f_tostring(v)
|
|
builder[i] = tostring(v)
|
|
i = i + 1
|
|
end
|
|
|
|
local radixmark = match(tostring(0.5), "[^0-9]")
|
|
local delimmark = match(tostring(12345.12345), "[^0-9" .. radixmark .. "]")
|
|
if radixmark == "." then
|
|
radixmark = nil
|
|
end
|
|
|
|
local radixordelim
|
|
if radixmark or delimmark then
|
|
radixordelim = true
|
|
if radixmark and find(radixmark, "%W") then
|
|
radixmark = "%" .. radixmark
|
|
end
|
|
if delimmark and find(delimmark, "%W") then
|
|
delimmark = "%" .. delimmark
|
|
end
|
|
end
|
|
|
|
local f_number = function(n)
|
|
if tiny < n and n < huge then
|
|
local s = format("%.17g", n)
|
|
if radixordelim then
|
|
if delimmark then
|
|
s = gsub(s, delimmark, "")
|
|
end
|
|
if radixmark then
|
|
s = gsub(s, radixmark, ".")
|
|
end
|
|
end
|
|
builder[i] = s
|
|
i = i + 1
|
|
return
|
|
end
|
|
error("invalid number")
|
|
end
|
|
|
|
local doencode
|
|
|
|
local f_string_subst = {
|
|
['"'] = '\\"',
|
|
["\\"] = "\\\\",
|
|
["\b"] = "\\b",
|
|
["\f"] = "\\f",
|
|
["\n"] = "\\n",
|
|
["\r"] = "\\r",
|
|
["\t"] = "\\t",
|
|
__index = function(_, c)
|
|
return format("\\u00%02X", byte(c))
|
|
end,
|
|
}
|
|
setmetatable(f_string_subst, f_string_subst)
|
|
|
|
local function f_string(s)
|
|
builder[i] = '"'
|
|
if find(s, f_string_esc_pat) then
|
|
s = gsub(s, f_string_esc_pat, f_string_subst)
|
|
end
|
|
builder[i + 1] = s
|
|
builder[i + 2] = '"'
|
|
i = i + 3
|
|
end
|
|
|
|
local function f_table(o)
|
|
if visited[o] then
|
|
error("loop detected")
|
|
end
|
|
visited[o] = true
|
|
|
|
local tmp = o[0]
|
|
if type(tmp) == "number" then -- arraylen available
|
|
builder[i] = "["
|
|
i = i + 1
|
|
for j = 1, tmp do
|
|
doencode(o[j])
|
|
builder[i] = ","
|
|
i = i + 1
|
|
end
|
|
if tmp > 0 then
|
|
i = i - 1
|
|
end
|
|
builder[i] = "]"
|
|
else
|
|
tmp = o[1]
|
|
if tmp ~= nil then -- detected as array
|
|
builder[i] = "["
|
|
i = i + 1
|
|
local j = 2
|
|
repeat
|
|
doencode(tmp)
|
|
tmp = o[j]
|
|
if tmp == nil then
|
|
break
|
|
end
|
|
j = j + 1
|
|
builder[i] = ","
|
|
i = i + 1
|
|
until false
|
|
builder[i] = "]"
|
|
else -- detected as object
|
|
builder[i] = "{"
|
|
i = i + 1
|
|
local tmp = i
|
|
for k, v in pairs(o) do
|
|
if type(k) ~= "string" then
|
|
error("non-string key")
|
|
end
|
|
f_string(k)
|
|
builder[i] = ":"
|
|
i = i + 1
|
|
doencode(v)
|
|
builder[i] = ","
|
|
i = i + 1
|
|
end
|
|
if i > tmp then
|
|
i = i - 1
|
|
end
|
|
builder[i] = "}"
|
|
end
|
|
end
|
|
|
|
i = i + 1
|
|
visited[o] = nil
|
|
end
|
|
|
|
local dispatcher = {
|
|
boolean = f_tostring,
|
|
number = f_number,
|
|
string = f_string,
|
|
table = f_table,
|
|
__index = function()
|
|
error("invalid type value")
|
|
end,
|
|
}
|
|
setmetatable(dispatcher, dispatcher)
|
|
|
|
function doencode(v)
|
|
if v == nullv then
|
|
builder[i] = "null"
|
|
i = i + 1
|
|
return
|
|
end
|
|
return dispatcher[type(v)](v)
|
|
end
|
|
|
|
local function encode(v_, nullv_)
|
|
v, nullv = v_, nullv_
|
|
i, builder, visited = 1, {}, {}
|
|
|
|
doencode(v)
|
|
return concat(builder)
|
|
end
|
|
|
|
return encode
|
|
end
|
|
|
|
return newencoder
|
|
end
|
|
end
|
|
|
|
do
|
|
local _ENV = _ENV
|
|
package.preload["lunajson.sax"] = function(...)
|
|
local arg = _G.arg
|
|
local setmetatable, tonumber, tostring = setmetatable, tonumber, tostring
|
|
local floor, inf = math.floor, math.huge
|
|
local mininteger, tointeger = math.mininteger or nil, math.tointeger or nil
|
|
local byte, char, find, gsub, match, sub =
|
|
string.byte, string.char, string.find, string.gsub, string.match, string.sub
|
|
|
|
local function _parse_error(pos, errmsg)
|
|
error("parse error at " .. pos .. ": " .. errmsg, 2)
|
|
end
|
|
|
|
local f_str_ctrl_pat
|
|
if _VERSION == "Lua 5.1" then
|
|
-- use the cluttered pattern because lua 5.1 does not handle \0 in a pattern correctly
|
|
f_str_ctrl_pat = "[^\32-\255]"
|
|
else
|
|
f_str_ctrl_pat = "[\0-\31]"
|
|
end
|
|
|
|
local type, unpack = type, table.unpack or unpack
|
|
local open = io.open
|
|
|
|
local _ENV = nil
|
|
|
|
local function nop() end
|
|
|
|
local function newparser(src, saxtbl)
|
|
local json, jsonnxt, rec_depth
|
|
local jsonlen, pos, acc = 0, 1, 0
|
|
|
|
-- `f` is the temporary for dispatcher[c] and
|
|
-- the dummy for the first return value of `find`
|
|
local dispatcher, f
|
|
|
|
-- initialize
|
|
if type(src) == "string" then
|
|
json = src
|
|
jsonlen = #json
|
|
jsonnxt = function()
|
|
json = ""
|
|
jsonlen = 0
|
|
jsonnxt = nop
|
|
end
|
|
else
|
|
jsonnxt = function()
|
|
acc = acc + jsonlen
|
|
pos = 1
|
|
repeat
|
|
json = src()
|
|
if not json then
|
|
json = ""
|
|
jsonlen = 0
|
|
jsonnxt = nop
|
|
return
|
|
end
|
|
jsonlen = #json
|
|
until jsonlen > 0
|
|
end
|
|
jsonnxt()
|
|
end
|
|
|
|
local sax_startobject = saxtbl.startobject or nop
|
|
local sax_key = saxtbl.key or nop
|
|
local sax_endobject = saxtbl.endobject or nop
|
|
local sax_startarray = saxtbl.startarray or nop
|
|
local sax_endarray = saxtbl.endarray or nop
|
|
local sax_string = saxtbl.string or nop
|
|
local sax_number = saxtbl.number or nop
|
|
local sax_boolean = saxtbl.boolean or nop
|
|
local sax_null = saxtbl.null or nop
|
|
|
|
--[[
|
|
Helper
|
|
--]]
|
|
local function tryc()
|
|
local c = byte(json, pos)
|
|
if not c then
|
|
jsonnxt()
|
|
c = byte(json, pos)
|
|
end
|
|
return c
|
|
end
|
|
|
|
local function parse_error(errmsg)
|
|
return _parse_error(acc + pos, errmsg)
|
|
end
|
|
|
|
local function tellc()
|
|
return tryc() or parse_error("unexpected termination")
|
|
end
|
|
|
|
local function spaces() -- skip spaces and prepare the next char
|
|
while true do
|
|
pos = match(json, "^[ \n\r\t]*()", pos)
|
|
if pos <= jsonlen then
|
|
return
|
|
end
|
|
if jsonlen == 0 then
|
|
parse_error("unexpected termination")
|
|
end
|
|
jsonnxt()
|
|
end
|
|
end
|
|
|
|
--[[
|
|
Invalid
|
|
--]]
|
|
local function f_err()
|
|
parse_error("invalid value")
|
|
end
|
|
|
|
--[[
|
|
Constants
|
|
--]]
|
|
-- fallback slow constants parser
|
|
local function generic_constant(target, targetlen, ret, sax_f)
|
|
for i = 1, targetlen do
|
|
local c = tellc()
|
|
if byte(target, i) ~= c then
|
|
parse_error("invalid char")
|
|
end
|
|
pos = pos + 1
|
|
end
|
|
return sax_f(ret)
|
|
end
|
|
|
|
-- null
|
|
local function f_nul()
|
|
if sub(json, pos, pos + 2) == "ull" then
|
|
pos = pos + 3
|
|
return sax_null(nil)
|
|
end
|
|
return generic_constant("ull", 3, nil, sax_null)
|
|
end
|
|
|
|
-- false
|
|
local function f_fls()
|
|
if sub(json, pos, pos + 3) == "alse" then
|
|
pos = pos + 4
|
|
return sax_boolean(false)
|
|
end
|
|
return generic_constant("alse", 4, false, sax_boolean)
|
|
end
|
|
|
|
-- true
|
|
local function f_tru()
|
|
if sub(json, pos, pos + 2) == "rue" then
|
|
pos = pos + 3
|
|
return sax_boolean(true)
|
|
end
|
|
return generic_constant("rue", 3, true, sax_boolean)
|
|
end
|
|
|
|
--[[
|
|
Numbers
|
|
Conceptually, the longest prefix that matches to `[-+.0-9A-Za-z]+` (in regexp)
|
|
is captured as a number and its conformance to the JSON spec is checked.
|
|
--]]
|
|
-- deal with non-standard locales
|
|
local radixmark = match(tostring(0.5), "[^0-9]")
|
|
local fixedtonumber = tonumber
|
|
if radixmark ~= "." then
|
|
if find(radixmark, "%W") then
|
|
radixmark = "%" .. radixmark
|
|
end
|
|
fixedtonumber = function(s)
|
|
return tonumber(gsub(s, ".", radixmark))
|
|
end
|
|
end
|
|
|
|
local function number_error()
|
|
return parse_error("invalid number")
|
|
end
|
|
|
|
-- fallback slow parser
|
|
local function generic_number(mns)
|
|
local buf = {}
|
|
local i = 1
|
|
local is_int = true
|
|
|
|
local c = byte(json, pos)
|
|
pos = pos + 1
|
|
|
|
local function nxt()
|
|
buf[i] = c
|
|
i = i + 1
|
|
c = tryc()
|
|
pos = pos + 1
|
|
end
|
|
|
|
if c == 0x30 then
|
|
nxt()
|
|
if c and 0x30 <= c and c < 0x3A then
|
|
number_error()
|
|
end
|
|
else
|
|
repeat
|
|
nxt()
|
|
until not (c and 0x30 <= c and c < 0x3A)
|
|
end
|
|
if c == 0x2E then
|
|
is_int = false
|
|
nxt()
|
|
if not (c and 0x30 <= c and c < 0x3A) then
|
|
number_error()
|
|
end
|
|
repeat
|
|
nxt()
|
|
until not (c and 0x30 <= c and c < 0x3A)
|
|
end
|
|
if c == 0x45 or c == 0x65 then
|
|
is_int = false
|
|
nxt()
|
|
if c == 0x2B or c == 0x2D then
|
|
nxt()
|
|
end
|
|
if not (c and 0x30 <= c and c < 0x3A) then
|
|
number_error()
|
|
end
|
|
repeat
|
|
nxt()
|
|
until not (c and 0x30 <= c and c < 0x3A)
|
|
end
|
|
if
|
|
c
|
|
and (0x41 <= c and c <= 0x5B or 0x61 <= c and c <= 0x7B or c == 0x2B or c == 0x2D or c == 0x2E)
|
|
then
|
|
number_error()
|
|
end
|
|
pos = pos - 1
|
|
|
|
local num = char(unpack(buf))
|
|
num = fixedtonumber(num)
|
|
if mns then
|
|
num = -num
|
|
if num == mininteger and is_int then
|
|
num = mininteger
|
|
end
|
|
end
|
|
return sax_number(num)
|
|
end
|
|
|
|
-- `0(\.[0-9]*)?([eE][+-]?[0-9]*)?`
|
|
local function f_zro(mns)
|
|
local num, c = match(json, "^(%.?[0-9]*)([-+.A-Za-z]?)", pos) -- skipping 0
|
|
|
|
if num == "" then
|
|
if pos > jsonlen then
|
|
pos = pos - 1
|
|
return generic_number(mns)
|
|
end
|
|
if c == "" then
|
|
if mns then
|
|
return sax_number(-0.0)
|
|
end
|
|
return sax_number(0)
|
|
end
|
|
|
|
if c == "e" or c == "E" then
|
|
num, c = match(json, "^([^eE]*[eE][-+]?[0-9]+)([-+.A-Za-z]?)", pos)
|
|
if c == "" then
|
|
pos = pos + #num
|
|
if pos > jsonlen then
|
|
pos = pos - #num - 1
|
|
return generic_number(mns)
|
|
end
|
|
if mns then
|
|
return sax_number(-0.0)
|
|
end
|
|
return sax_number(0.0)
|
|
end
|
|
end
|
|
pos = pos - 1
|
|
return generic_number(mns)
|
|
end
|
|
|
|
if byte(num) ~= 0x2E or byte(num, -1) == 0x2E then
|
|
pos = pos - 1
|
|
return generic_number(mns)
|
|
end
|
|
|
|
if c ~= "" then
|
|
if c == "e" or c == "E" then
|
|
num, c = match(json, "^([^eE]*[eE][-+]?[0-9]+)([-+.A-Za-z]?)", pos)
|
|
end
|
|
if c ~= "" then
|
|
pos = pos - 1
|
|
return generic_number(mns)
|
|
end
|
|
end
|
|
|
|
pos = pos + #num
|
|
if pos > jsonlen then
|
|
pos = pos - #num - 1
|
|
return generic_number(mns)
|
|
end
|
|
c = fixedtonumber(num)
|
|
|
|
if mns then
|
|
c = -c
|
|
end
|
|
return sax_number(c)
|
|
end
|
|
|
|
-- `[1-9][0-9]*(\.[0-9]*)?([eE][+-]?[0-9]*)?`
|
|
local function f_num(mns)
|
|
pos = pos - 1
|
|
local num, c = match(json, "^([0-9]+%.?[0-9]*)([-+.A-Za-z]?)", pos)
|
|
if byte(num, -1) == 0x2E then -- error if ended with period
|
|
return generic_number(mns)
|
|
end
|
|
|
|
if c ~= "" then
|
|
if c ~= "e" and c ~= "E" then
|
|
return generic_number(mns)
|
|
end
|
|
num, c = match(json, "^([^eE]*[eE][-+]?[0-9]+)([-+.A-Za-z]?)", pos)
|
|
if not num or c ~= "" then
|
|
return generic_number(mns)
|
|
end
|
|
end
|
|
|
|
pos = pos + #num
|
|
if pos > jsonlen then
|
|
pos = pos - #num
|
|
return generic_number(mns)
|
|
end
|
|
c = fixedtonumber(num)
|
|
|
|
if mns then
|
|
c = -c
|
|
if c == mininteger and not find(num, "[^0-9]") then
|
|
c = mininteger
|
|
end
|
|
end
|
|
return sax_number(c)
|
|
end
|
|
|
|
-- skip minus sign
|
|
local function f_mns()
|
|
local c = byte(json, pos) or tellc()
|
|
if c then
|
|
pos = pos + 1
|
|
if c > 0x30 then
|
|
if c < 0x3A then
|
|
return f_num(true)
|
|
end
|
|
else
|
|
if c > 0x2F then
|
|
return f_zro(true)
|
|
end
|
|
end
|
|
end
|
|
parse_error("invalid number")
|
|
end
|
|
|
|
--[[
|
|
Strings
|
|
--]]
|
|
local f_str_hextbl = {
|
|
0x0,
|
|
0x1,
|
|
0x2,
|
|
0x3,
|
|
0x4,
|
|
0x5,
|
|
0x6,
|
|
0x7,
|
|
0x8,
|
|
0x9,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
0xA,
|
|
0xB,
|
|
0xC,
|
|
0xD,
|
|
0xE,
|
|
0xF,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
inf,
|
|
0xA,
|
|
0xB,
|
|
0xC,
|
|
0xD,
|
|
0xE,
|
|
0xF,
|
|
__index = function()
|
|
return inf
|
|
end,
|
|
}
|
|
setmetatable(f_str_hextbl, f_str_hextbl)
|
|
|
|
local f_str_escapetbl = {
|
|
['"'] = '"',
|
|
["\\"] = "\\",
|
|
["/"] = "/",
|
|
["b"] = "\b",
|
|
["f"] = "\f",
|
|
["n"] = "\n",
|
|
["r"] = "\r",
|
|
["t"] = "\t",
|
|
__index = function()
|
|
parse_error("invalid escape sequence")
|
|
end,
|
|
}
|
|
setmetatable(f_str_escapetbl, f_str_escapetbl)
|
|
|
|
local function surrogate_first_error()
|
|
return parse_error("1st surrogate pair byte not continued by 2nd")
|
|
end
|
|
|
|
local f_str_surrogate_prev = 0
|
|
local function f_str_subst(ch, ucode)
|
|
if ch == "u" then
|
|
local c1, c2, c3, c4, rest = byte(ucode, 1, 5)
|
|
ucode = f_str_hextbl[c1 - 47] * 0x1000
|
|
+ f_str_hextbl[c2 - 47] * 0x100
|
|
+ f_str_hextbl[c3 - 47] * 0x10
|
|
+ f_str_hextbl[c4 - 47]
|
|
if ucode ~= inf then
|
|
if ucode < 0x80 then -- 1byte
|
|
if rest then
|
|
return char(ucode, rest)
|
|
end
|
|
return char(ucode)
|
|
elseif ucode < 0x800 then -- 2bytes
|
|
c1 = floor(ucode / 0x40)
|
|
c2 = ucode - c1 * 0x40
|
|
c1 = c1 + 0xC0
|
|
c2 = c2 + 0x80
|
|
if rest then
|
|
return char(c1, c2, rest)
|
|
end
|
|
return char(c1, c2)
|
|
elseif ucode < 0xD800 or 0xE000 <= ucode then -- 3bytes
|
|
c1 = floor(ucode / 0x1000)
|
|
ucode = ucode - c1 * 0x1000
|
|
c2 = floor(ucode / 0x40)
|
|
c3 = ucode - c2 * 0x40
|
|
c1 = c1 + 0xE0
|
|
c2 = c2 + 0x80
|
|
c3 = c3 + 0x80
|
|
if rest then
|
|
return char(c1, c2, c3, rest)
|
|
end
|
|
return char(c1, c2, c3)
|
|
elseif 0xD800 <= ucode and ucode < 0xDC00 then -- surrogate pair 1st
|
|
if f_str_surrogate_prev == 0 then
|
|
f_str_surrogate_prev = ucode
|
|
if not rest then
|
|
return ""
|
|
end
|
|
surrogate_first_error()
|
|
end
|
|
f_str_surrogate_prev = 0
|
|
surrogate_first_error()
|
|
else -- surrogate pair 2nd
|
|
if f_str_surrogate_prev ~= 0 then
|
|
ucode = 0x10000 + (f_str_surrogate_prev - 0xD800) * 0x400 + (ucode - 0xDC00)
|
|
f_str_surrogate_prev = 0
|
|
c1 = floor(ucode / 0x40000)
|
|
ucode = ucode - c1 * 0x40000
|
|
c2 = floor(ucode / 0x1000)
|
|
ucode = ucode - c2 * 0x1000
|
|
c3 = floor(ucode / 0x40)
|
|
c4 = ucode - c3 * 0x40
|
|
c1 = c1 + 0xF0
|
|
c2 = c2 + 0x80
|
|
c3 = c3 + 0x80
|
|
c4 = c4 + 0x80
|
|
if rest then
|
|
return char(c1, c2, c3, c4, rest)
|
|
end
|
|
return char(c1, c2, c3, c4)
|
|
end
|
|
parse_error("2nd surrogate pair byte appeared without 1st")
|
|
end
|
|
end
|
|
parse_error("invalid unicode codepoint literal")
|
|
end
|
|
if f_str_surrogate_prev ~= 0 then
|
|
f_str_surrogate_prev = 0
|
|
surrogate_first_error()
|
|
end
|
|
return f_str_escapetbl[ch] .. ucode
|
|
end
|
|
|
|
local function f_str(iskey)
|
|
local pos2 = pos
|
|
local newpos
|
|
local str = ""
|
|
local bs
|
|
while true do
|
|
while true do -- search '\' or '"'
|
|
newpos = find(json, '[\\"]', pos2)
|
|
if newpos then
|
|
break
|
|
end
|
|
str = str .. sub(json, pos, jsonlen)
|
|
if pos2 == jsonlen + 2 then
|
|
pos2 = 2
|
|
else
|
|
pos2 = 1
|
|
end
|
|
jsonnxt()
|
|
if jsonlen == 0 then
|
|
parse_error("unterminated string")
|
|
end
|
|
end
|
|
if byte(json, newpos) == 0x22 then -- break if '"'
|
|
break
|
|
end
|
|
pos2 = newpos + 2 -- skip '\<char>'
|
|
bs = true -- mark the existence of a backslash
|
|
end
|
|
str = str .. sub(json, pos, newpos - 1)
|
|
pos = newpos + 1
|
|
|
|
if find(str, f_str_ctrl_pat) then
|
|
parse_error("unescaped control string")
|
|
end
|
|
if bs then -- a backslash exists
|
|
-- We need to grab 4 characters after the escape char,
|
|
-- for encoding unicode codepoint to UTF-8.
|
|
-- As we need to ensure that every first surrogate pair byte is
|
|
-- immediately followed by second one, we grab upto 5 characters and
|
|
-- check the last for this purpose.
|
|
str = gsub(str, "\\(.)([^\\]?[^\\]?[^\\]?[^\\]?[^\\]?)", f_str_subst)
|
|
if f_str_surrogate_prev ~= 0 then
|
|
f_str_surrogate_prev = 0
|
|
parse_error("1st surrogate pair byte not continued by 2nd")
|
|
end
|
|
end
|
|
|
|
if iskey then
|
|
return sax_key(str)
|
|
end
|
|
return sax_string(str)
|
|
end
|
|
|
|
--[[
|
|
Arrays, Objects
|
|
--]]
|
|
-- arrays
|
|
local function f_ary()
|
|
rec_depth = rec_depth + 1
|
|
if rec_depth > 1000 then
|
|
parse_error("too deeply nested json (> 1000)")
|
|
end
|
|
sax_startarray()
|
|
|
|
spaces()
|
|
if byte(json, pos) == 0x5D then -- check closing bracket ']' which means the array empty
|
|
pos = pos + 1
|
|
else
|
|
local newpos
|
|
while true do
|
|
f = dispatcher[byte(json, pos)] -- parse value
|
|
pos = pos + 1
|
|
f()
|
|
newpos = match(json, "^[ \n\r\t]*,[ \n\r\t]*()", pos) -- check comma
|
|
if newpos then
|
|
pos = newpos
|
|
else
|
|
newpos = match(json, "^[ \n\r\t]*%]()", pos) -- check closing bracket
|
|
if newpos then
|
|
pos = newpos
|
|
break
|
|
end
|
|
spaces() -- since the current chunk can be ended, skip spaces toward following chunks
|
|
local c = byte(json, pos)
|
|
pos = pos + 1
|
|
if c == 0x2C then -- check comma again
|
|
spaces()
|
|
elseif c == 0x5D then -- check closing bracket again
|
|
break
|
|
else
|
|
parse_error("no closing bracket of an array")
|
|
end
|
|
end
|
|
if pos > jsonlen then
|
|
spaces()
|
|
end
|
|
end
|
|
end
|
|
|
|
rec_depth = rec_depth - 1
|
|
return sax_endarray()
|
|
end
|
|
|
|
-- objects
|
|
local function f_obj()
|
|
rec_depth = rec_depth + 1
|
|
if rec_depth > 1000 then
|
|
parse_error("too deeply nested json (> 1000)")
|
|
end
|
|
sax_startobject()
|
|
|
|
spaces()
|
|
if byte(json, pos) == 0x7D then -- check closing bracket '}' which means the object empty
|
|
pos = pos + 1
|
|
else
|
|
local newpos
|
|
while true do
|
|
if byte(json, pos) ~= 0x22 then
|
|
parse_error("not key")
|
|
end
|
|
pos = pos + 1
|
|
f_str(true) -- parse key
|
|
newpos = match(json, "^[ \n\r\t]*:[ \n\r\t]*()", pos) -- check colon
|
|
if newpos then
|
|
pos = newpos
|
|
else
|
|
spaces() -- read spaces through chunks
|
|
if byte(json, pos) ~= 0x3A then -- check colon again
|
|
parse_error("no colon after a key")
|
|
end
|
|
pos = pos + 1
|
|
spaces()
|
|
end
|
|
if pos > jsonlen then
|
|
spaces()
|
|
end
|
|
f = dispatcher[byte(json, pos)]
|
|
pos = pos + 1
|
|
f() -- parse value
|
|
newpos = match(json, "^[ \n\r\t]*,[ \n\r\t]*()", pos) -- check comma
|
|
if newpos then
|
|
pos = newpos
|
|
else
|
|
newpos = match(json, "^[ \n\r\t]*}()", pos) -- check closing bracket
|
|
if newpos then
|
|
pos = newpos
|
|
break
|
|
end
|
|
spaces() -- read spaces through chunks
|
|
local c = byte(json, pos)
|
|
pos = pos + 1
|
|
if c == 0x2C then -- check comma again
|
|
spaces()
|
|
elseif c == 0x7D then -- check closing bracket again
|
|
break
|
|
else
|
|
parse_error("no closing bracket of an object")
|
|
end
|
|
end
|
|
if pos > jsonlen then
|
|
spaces()
|
|
end
|
|
end
|
|
end
|
|
|
|
rec_depth = rec_depth - 1
|
|
return sax_endobject()
|
|
end
|
|
|
|
--[[
|
|
The jump table to dispatch a parser for a value,
|
|
indexed by the code of the value's first char.
|
|
Key should be non-nil.
|
|
--]]
|
|
dispatcher = {
|
|
[0] = f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_str,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_mns,
|
|
f_err,
|
|
f_err,
|
|
f_zro,
|
|
f_num,
|
|
f_num,
|
|
f_num,
|
|
f_num,
|
|
f_num,
|
|
f_num,
|
|
f_num,
|
|
f_num,
|
|
f_num,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_ary,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_fls,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_nul,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_tru,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_obj,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
f_err,
|
|
}
|
|
|
|
--[[
|
|
public funcitons
|
|
--]]
|
|
local function run()
|
|
rec_depth = 0
|
|
spaces()
|
|
f = dispatcher[byte(json, pos)]
|
|
pos = pos + 1
|
|
f()
|
|
end
|
|
|
|
local function read(n)
|
|
if n < 0 then
|
|
error("the argument must be non-negative")
|
|
end
|
|
local pos2 = (pos - 1) + n
|
|
local str = sub(json, pos, pos2)
|
|
while pos2 > jsonlen and jsonlen ~= 0 do
|
|
jsonnxt()
|
|
pos2 = pos2 - (jsonlen - (pos - 1))
|
|
str = str .. sub(json, pos, pos2)
|
|
end
|
|
if jsonlen ~= 0 then
|
|
pos = pos2 + 1
|
|
end
|
|
return str
|
|
end
|
|
|
|
local function tellpos()
|
|
return acc + pos
|
|
end
|
|
|
|
return {
|
|
run = run,
|
|
tryc = tryc,
|
|
read = read,
|
|
tellpos = tellpos,
|
|
}
|
|
end
|
|
|
|
local function newfileparser(fn, saxtbl)
|
|
local fp = open(fn)
|
|
local function gen()
|
|
local s
|
|
if fp then
|
|
s = fp:read(8192)
|
|
if not s then
|
|
fp:close()
|
|
fp = nil
|
|
end
|
|
end
|
|
return s
|
|
end
|
|
return newparser(gen, saxtbl)
|
|
end
|
|
|
|
return {
|
|
newparser = newparser,
|
|
newfileparser = newfileparser,
|
|
}
|
|
end
|
|
end
|
|
|
|
do
|
|
local _ENV = _ENV
|
|
package.preload["utils"] = function(...)
|
|
local arg = _G.arg
|
|
local module = {}
|
|
|
|
function module.tablelength(T)
|
|
local count = 0
|
|
for _ in pairs(T) do
|
|
count = count + 1
|
|
end
|
|
return count
|
|
end
|
|
|
|
module.id_number = 0
|
|
function module.next_id(length)
|
|
module.id_number = module.id_number + 1
|
|
return string.format(string.format("%%0%dd", length), module.id_number)
|
|
end
|
|
|
|
local function url_encode_char(chr)
|
|
return string.format("%%%X", string.byte(chr))
|
|
end
|
|
|
|
function module.urlencode(str)
|
|
local output, t = string.gsub(str, "[^%w]", url_encode_char)
|
|
return output
|
|
end
|
|
|
|
function module.xmlescape(str)
|
|
return string.gsub(str, "[<>&]", { ["&"] = "&", ["<"] = "<", [">"] = ">" })
|
|
end
|
|
|
|
function module.xmlattr(str)
|
|
return string.gsub(str, '["<>&]', { ["&"] = "&", ["<"] = "<", [">"] = ">", ['"'] = """ })
|
|
end
|
|
|
|
function module.trim(s)
|
|
return s:gsub("^%s*(.-)%s*$", "%1")
|
|
end
|
|
|
|
function module.deepcopy(orig)
|
|
local orig_type = type(orig)
|
|
local copy
|
|
if orig_type == "table" then
|
|
copy = {}
|
|
for orig_key, orig_value in next, orig, nil do
|
|
copy[module.deepcopy(orig_key)] = module.deepcopy(orig_value)
|
|
end
|
|
setmetatable(copy, module.deepcopy(getmetatable(orig)))
|
|
else -- number, string, boolean, etc
|
|
copy = orig
|
|
end
|
|
return copy
|
|
end
|
|
|
|
function module.dump(o)
|
|
if type(o) == "table" then
|
|
local s = "{ "
|
|
for k, v in pairs(o) do
|
|
if type(k) ~= "number" then
|
|
k = '"' .. k .. '"'
|
|
end
|
|
s = s .. "[" .. k .. "] = " .. module.dump(v) .. ","
|
|
end
|
|
return s .. "} "
|
|
else
|
|
return tostring(o)
|
|
end
|
|
end
|
|
|
|
function module.trim(s)
|
|
if s == nil then
|
|
return s
|
|
end
|
|
return (s:gsub("^%s*(.-)%s*$", "%1"))
|
|
end
|
|
|
|
return module
|
|
end
|
|
end
|
|
|
|
do
|
|
local _ENV = _ENV
|
|
package.preload["zotero"] = function(...)
|
|
local arg = _G.arg
|
|
local module = {}
|
|
|
|
local utils = require("utils")
|
|
local json = require("lunajson")
|
|
-- local pl = require('pl.pretty') -- for pl.pretty.dump
|
|
|
|
local state = {
|
|
reported = {},
|
|
}
|
|
|
|
module.citekeys = {}
|
|
|
|
local function load_items()
|
|
if state.fetched ~= nil then
|
|
return
|
|
end
|
|
|
|
state.fetched = {
|
|
items = {},
|
|
errors = {},
|
|
}
|
|
|
|
local citekeys = {}
|
|
for k, _ in pairs(module.citekeys) do
|
|
table.insert(citekeys, k)
|
|
end
|
|
|
|
if utils.tablelength(citekeys) == 0 then
|
|
return
|
|
end
|
|
|
|
module.request.params.citekeys = citekeys
|
|
local url = module.url .. utils.urlencode(json.encode(module.request))
|
|
local mt, body = pandoc.mediabag.fetch(url, ".")
|
|
local ok, response = pcall(json.decode, body)
|
|
if not ok then
|
|
print("could not fetch Zotero items: " .. response .. "(" .. body .. ")")
|
|
return
|
|
end
|
|
if response.error ~= nil then
|
|
print("could not fetch Zotero items: " .. response.error.message)
|
|
return
|
|
end
|
|
state.fetched = response.result
|
|
end
|
|
|
|
function module.get(citekey)
|
|
load_items()
|
|
|
|
if state.reported[citekey] ~= nil then
|
|
return nil
|
|
end
|
|
|
|
if state.fetched.errors[citekey] ~= nil then
|
|
state.reported[citekey] = true
|
|
if state.fetched.errors[citekey] == 0 then
|
|
print("@" .. citekey .. ": not found")
|
|
else
|
|
print("@" .. citekey .. ": duplicates found")
|
|
end
|
|
return nil
|
|
end
|
|
|
|
if state.fetched.items[citekey] == nil then
|
|
state.reported[citekey] = true
|
|
print("@" .. citekey .. " not in Zotero")
|
|
return nil
|
|
end
|
|
|
|
return state.fetched.items[citekey]
|
|
end
|
|
|
|
return module
|
|
end
|
|
end
|
|
|
|
--
|
|
-- bbt-to-live-doc
|
|
--
|
|
-- Copyright (c) 2020 Emiliano Heyns
|
|
--
|
|
-- Permission is hereby granted, free of charge, to any person obtaining a copy of
|
|
-- this software and associated documentation files (the "Software"), to deal in
|
|
-- the Software without restriction, including without limitation the rights to
|
|
-- use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
|
-- of the Software, and to permit persons to whom the Software is furnished to do
|
|
-- so, subject to the following conditions:
|
|
--
|
|
-- The above copyright notice and this permission notice shall be included in all
|
|
-- copies or substantial portions of the Software.
|
|
--
|
|
-- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
-- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
-- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
-- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
-- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
-- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
-- SOFTWARE.
|
|
--
|
|
|
|
if lpeg == nil then
|
|
print("upgrade pandoc to version 2.16.2 or later")
|
|
os.exit()
|
|
end
|
|
|
|
local json = require("lunajson")
|
|
local csl_locator = require("locator")
|
|
local utils = require("utils")
|
|
local zotero = require("zotero")
|
|
|
|
-- -- global state -- --
|
|
local config = {
|
|
client = "zotero",
|
|
scannable_cite = false,
|
|
csl_style = "apa",
|
|
format = nil, -- more to document than anything else -- Lua does not store nils in tables
|
|
transferable = false,
|
|
sorted = true,
|
|
}
|
|
|
|
-- -- bibliography marker generator -- --
|
|
function zotero_docpreferences_odt(csl_style)
|
|
return string.format(
|
|
'<data data-version="3" zotero-version="5.0.89">'
|
|
.. ' <session id="OGe1IYVe"/>'
|
|
.. ' <style id="http://www.zotero.org/styles/%s" locale="en-US" hasBibliography="1" bibliographyStyleHasBeenSet="0"/>'
|
|
.. " <prefs>"
|
|
.. ' <pref name="fieldType" value="ReferenceMark"/>'
|
|
-- .. ' <pref name="delayCitationUpdates" value="true"/>'
|
|
.. " </prefs>"
|
|
.. "</data>",
|
|
csl_style
|
|
)
|
|
end
|
|
|
|
local function zotero_bibl_odt_banner()
|
|
if not (config.format == "odt" and config.csl_style and config.transferable) then
|
|
error("zotero_bibl_odt_banner: This should not happen")
|
|
end
|
|
|
|
local banner = ""
|
|
.. '<text:p text:style-name="Bibliography_20_1">'
|
|
.. "ZOTERO_TRANSFER_DOCUMENT"
|
|
.. "</text:p>"
|
|
.. '<text:p text:style-name="Bibliography_20_1">'
|
|
.. "The Zotero citations in this document have been converted to a format"
|
|
.. "that can be safely transferred between word processors. Open this"
|
|
.. "document in a supported word processor and press Refresh in the "
|
|
.. config.client
|
|
.. "plugin to continue working with the citations."
|
|
.. "</text:p>"
|
|
|
|
local doc_preferences = ""
|
|
.. '<text:p text:style-name="Text_20_body">'
|
|
.. '<text:a xlink:type="simple" xlink:href="https://www.zotero.org/" text:style-name="Internet_20_link">'
|
|
.. "DOCUMENT_PREFERENCES "
|
|
.. utils.xmlescape(zotero_docpreferences_odt(config.csl_style))
|
|
.. "</text:a>"
|
|
.. "</text:p>"
|
|
|
|
return banner .. doc_preferences
|
|
end
|
|
|
|
local function zotero_bibl_odt()
|
|
if config.format ~= "odt" or not config.csl_style then
|
|
error("zotero_bibl_odt: This should not happen")
|
|
end
|
|
|
|
local message = "<Bibliography: Do " .. config.client .. " Refresh>"
|
|
local bib_settings = '{"uncited":[],"omitted":[],"custom":[]}'
|
|
|
|
if config.transferable then
|
|
return '<text:p text:style-name="Text_20_body">'
|
|
.. '<text:a xlink:type="simple" xlink:href="https://www.zotero.org/" text:style-name="Internet_20_link">'
|
|
.. "BIBL "
|
|
.. utils.xmlescape(bib_settings)
|
|
.. " "
|
|
.. "CSL_BIBLIOGRAPHY"
|
|
.. "</text:a>"
|
|
.. "</text:p>"
|
|
end
|
|
|
|
return string.format(
|
|
'<text:section text:name=" %s">'
|
|
.. '<text:p text:style-name="Bibliography_20_1">'
|
|
.. utils.xmlescape(message)
|
|
.. "</text:p>"
|
|
.. "</text:section>",
|
|
"ZOTERO_BIBL " .. utils.xmlattr(bib_settings) .. " CSL_BIBLIOGRAPHY" .. " RND" .. utils.next_id(10)
|
|
)
|
|
end
|
|
|
|
-- -- -- citation marker generators -- -- --
|
|
|
|
function clean_csl(item)
|
|
local cleaned = {}
|
|
for k, v in pairs(item) do
|
|
cleaned[k] = v
|
|
end
|
|
cleaned.custom = nil
|
|
return setmetatable(cleaned, getmetatable(item))
|
|
end
|
|
|
|
local function zotero_ref(cite)
|
|
local content = pandoc.utils.stringify(cite.content)
|
|
local csl = {
|
|
citationID = utils.next_id(8),
|
|
properties = {
|
|
unsorted = not config.sorted,
|
|
formattedCitation = content,
|
|
plainCitation = nil, -- otherwise we get a barrage of "you have edited this citation" popups
|
|
-- dontUpdate = false,
|
|
noteIndex = 0,
|
|
},
|
|
citationItems = {},
|
|
schema = "https://github.com/citation-style-language/schema/raw/master/csl-citation.json",
|
|
}
|
|
local author_in_text = ""
|
|
|
|
notfound = false
|
|
for k, item in pairs(cite.citations) do
|
|
local itemData = zotero.get(item.id)
|
|
if itemData == nil then
|
|
notfound = true
|
|
else
|
|
local citation = {
|
|
id = itemData.custom.itemID,
|
|
uris = { itemData.custom.uri },
|
|
-- uri = { zoteroData.uri },
|
|
itemData = clean_csl(itemData),
|
|
}
|
|
|
|
if item.mode == "AuthorInText" then -- not formally supported in Zotero
|
|
if config.author_in_text then
|
|
local authors = itemData.custom.author
|
|
if authors == nil or authors == "" then
|
|
return cite
|
|
else
|
|
author_in_text = pandoc.utils.stringify(pandoc.Str(authors)) .. " "
|
|
author_in_text = '<w:r><w:t xml:space="preserve">'
|
|
.. utils.xmlescape(author_in_text)
|
|
.. "</w:t></w:r>"
|
|
citation["suppress-author"] = true
|
|
end
|
|
else
|
|
return cite
|
|
end
|
|
end
|
|
|
|
if item.mode == "SuppressAuthor" then
|
|
citation["suppress-author"] = true
|
|
end
|
|
citation.prefix = pandoc.utils.stringify(item.prefix):gsub("\194\160", " ")
|
|
local label, locator, suffix = csl_locator.parse(pandoc.utils.stringify(item.suffix):gsub("\194\160", " "))
|
|
if suffix and suffix ~= "" then
|
|
citation.suffix = suffix
|
|
end
|
|
if label and label ~= "" then
|
|
citation.label = label
|
|
end
|
|
if locator and locator ~= "" then
|
|
citation.locator = locator
|
|
end
|
|
|
|
table.insert(csl.citationItems, citation)
|
|
end
|
|
end
|
|
|
|
if notfound then
|
|
return cite
|
|
end
|
|
|
|
local message = "<Do Zotero Refresh: " .. content .. ">"
|
|
|
|
if config.format == "docx" then
|
|
local field = author_in_text
|
|
.. '<w:r><w:fldChar w:fldCharType="begin"/></w:r><w:r><w:instrText xml:space="preserve">'
|
|
field = field .. " ADDIN ZOTERO_ITEM CSL_CITATION " .. utils.xmlescape(json.encode(csl)) .. " "
|
|
field = field
|
|
.. '</w:instrText></w:r><w:r><w:fldChar w:fldCharType="separate"/></w:r><w:r><w:rPr><w:noProof/></w:rPr><w:t>'
|
|
field = field .. utils.xmlescape(message)
|
|
field = field .. '</w:t></w:r><w:r><w:fldChar w:fldCharType="end"/></w:r>'
|
|
|
|
return pandoc.RawInline("openxml", field)
|
|
else
|
|
if config.transferable then
|
|
local field = author_in_text
|
|
.. '<text:a xlink:type="simple" xlink:href="https://www.zotero.org/" text:style-name="Internet_20_link">'
|
|
.. "ITEM CSL_CITATION "
|
|
.. utils.xmlescape(json.encode(csl))
|
|
.. "</text:a>"
|
|
return pandoc.RawInline("opendocument", field)
|
|
end
|
|
|
|
csl = "ZOTERO_ITEM CSL_CITATION " .. utils.xmlattr(json.encode(csl)) .. " RND" .. utils.next_id(10)
|
|
local field = author_in_text .. '<text:reference-mark-start text:name="' .. csl .. '"/>'
|
|
field = field .. utils.xmlescape(message)
|
|
field = field .. '<text:reference-mark-end text:name="' .. csl .. '"/>'
|
|
|
|
return pandoc.RawInline("opendocument", field)
|
|
end
|
|
end
|
|
|
|
local function scannable_cite(cite)
|
|
local citations = ""
|
|
for k, item in pairs(cite.citations) do
|
|
citation = zotero.get(item.id)
|
|
if citation == nil then
|
|
return cite
|
|
end
|
|
|
|
if item.mode == "AuthorInText" then -- not formally supported in Zotero
|
|
if config.author_in_text then
|
|
local authors = zotero.authors(citation)
|
|
if authors == nil then
|
|
return cite
|
|
else
|
|
return pandoc.Str(authors)
|
|
end
|
|
else
|
|
return cite
|
|
end
|
|
end
|
|
|
|
local suppress = (item.mode == "SuppressAuthor" and "-" or "")
|
|
local s, e, ug, id, key
|
|
s, e, key = string.find(citation.uri, "http://zotero.org/users/local/%w+/items/(%w+)")
|
|
if key then
|
|
ug = "users"
|
|
id = "0"
|
|
else
|
|
s, e, ug, id, key = string.find(citation.uri, "http://zotero.org/(%w+)/(%w+)/items/(%w+)")
|
|
end
|
|
|
|
local shortlabel = {
|
|
book = "bk.",
|
|
chapter = "chap.",
|
|
column = "col.",
|
|
figure = "fig.",
|
|
folio = "fol.",
|
|
number = "no.",
|
|
line = "l.",
|
|
note = "n.",
|
|
opus = "op.",
|
|
page = "p.",
|
|
paragraph = "para.",
|
|
part = "pt.",
|
|
section = "sec.",
|
|
["sub verbo"] = "s.v.",
|
|
verse = "v.",
|
|
volume = "vol.",
|
|
}
|
|
local label, locator, suffix = csl_locator.parse(pandoc.utils.stringify(item.suffix))
|
|
if label then
|
|
locator = shortlabel[label] .. " " .. locator
|
|
else
|
|
locator = ""
|
|
end
|
|
|
|
citations = citations
|
|
.. "{ "
|
|
.. (pandoc.utils.stringify(item.prefix) or "")
|
|
.. " | "
|
|
.. suppress
|
|
.. utils.trim(string.gsub(pandoc.utils.stringify(cite.content) or "", "[|{}]", ""))
|
|
.. " | "
|
|
.. locator
|
|
.. " | "
|
|
.. (suffix or "")
|
|
.. " | "
|
|
.. (ug == "groups" and "zg:" or "zu:")
|
|
.. id
|
|
.. ":"
|
|
.. key
|
|
.. " }"
|
|
end
|
|
|
|
return pandoc.Str(citations)
|
|
end
|
|
|
|
-- -- -- get config -- -- --
|
|
local function test_enum(k, v, values)
|
|
for _, valid in ipairs(values) do
|
|
if type(v) ~= type(valid) then
|
|
error(k .. " expects an " .. type(valid) .. ", got an " .. type(v))
|
|
end
|
|
|
|
if v == valid then
|
|
return v
|
|
end
|
|
end
|
|
|
|
error(k .. " expects one of " .. table.concat(values, ", ") .. ", got " .. v)
|
|
end
|
|
local function test_boolean(k, v)
|
|
if type(v) == "boolean" then
|
|
return v
|
|
elseif type(v) == "nil" then
|
|
return false
|
|
end
|
|
return (test_enum(k, v, { "true", "false" }) == "true")
|
|
end
|
|
|
|
function Meta(meta)
|
|
-- create meta.zotero if it does not exist
|
|
if not meta.zotero then
|
|
-- ADDED FOR QUARTO 1.4 COMPATIBILITY
|
|
meta.zotero = meta.zoterolive or {}
|
|
end
|
|
|
|
-- copy meta.zotero_<key>, which are likely command line params and take precedence, over to meta.zotero
|
|
for k, v in pairs(meta) do
|
|
local s, e, key = string.find(k, "^zotero[-_](.*)")
|
|
if key then
|
|
meta.zotero[key:gsub("_", "-")] = v
|
|
end
|
|
end
|
|
|
|
-- normalize values
|
|
for k, v in pairs(meta.zotero) do
|
|
meta.zotero[k] = pandoc.utils.stringify(v)
|
|
end
|
|
|
|
config.scannable_cite = test_boolean("scannable-cite", meta.zotero["scannable-cite"])
|
|
config.author_in_text = test_boolean("author-in-text", meta.zotero["author-in-text"])
|
|
|
|
if meta.zotero["csl-style"] ~= nil then
|
|
config.csl_style = pandoc.utils.stringify(meta.zotero["csl-style"])
|
|
if config.csl_style == "apa7" then
|
|
config.csl_style = "apa"
|
|
end
|
|
end
|
|
|
|
config.transferable = test_boolean("transferable", meta.zotero["transferable"])
|
|
|
|
-- refuse to create a transferable document, when csl style is not specified
|
|
if config.transferable and not config.csl_style then
|
|
error("Transferable documents need a CSL style")
|
|
end
|
|
if config.transferable and not config.scannable_cite then
|
|
error("Scannable-cite documents are not transferable")
|
|
end
|
|
|
|
if type(meta.zotero.client) == "nil" then -- should never happen as the default is 'zotero'
|
|
meta.zotero.client = "zotero"
|
|
else
|
|
test_enum("client", meta.zotero.client, { "zotero", "jurism" })
|
|
end
|
|
config.client = meta.zotero.client
|
|
|
|
if config.client == "zotero" then
|
|
zotero.url = "http://127.0.0.1:23119/better-bibtex/json-rpc?"
|
|
elseif config.client == "jurism" then
|
|
zotero.url = "http://127.0.0.1:24119/better-bibtex/json-rpc?"
|
|
end
|
|
|
|
zotero.request = {
|
|
jsonrpc = "2.0",
|
|
method = "item.pandoc_filter",
|
|
params = {
|
|
style = config.csl_style or "apa",
|
|
},
|
|
}
|
|
if string.match(FORMAT, "odt") and config.scannable_cite then
|
|
-- scannable-cite takes precedence over csl-style
|
|
config.format = "scannable-cite"
|
|
zotero.request.params.asCSL = false
|
|
elseif string.match(FORMAT, "odt") or string.match(FORMAT, "docx") then
|
|
config.format = FORMAT
|
|
zotero.request.params.asCSL = true
|
|
end
|
|
|
|
if type(meta.zotero.library) ~= "nil" then
|
|
zotero.request.params.libraryID = meta.zotero.library
|
|
end
|
|
|
|
if config.format == "odt" and config.csl_style then
|
|
-- These will be added to the document metadata by pandoc automatically
|
|
meta.ZOTERO_PREF_1 = zotero_docpreferences_odt(config.csl_style)
|
|
meta.ZOTERO_PREF_2 = ""
|
|
end
|
|
|
|
return meta
|
|
end
|
|
|
|
-- -- -- replace citations -- -- --
|
|
function Cite_collect(cite)
|
|
if not config.format then
|
|
return nil
|
|
end
|
|
|
|
for _, item in pairs(cite.citations) do
|
|
zotero.citekeys[item.id] = true
|
|
end
|
|
|
|
return nil
|
|
end
|
|
|
|
function Cite_replace(cite)
|
|
if not config.format then
|
|
return nil
|
|
end
|
|
|
|
if config.format == "scannable-cite" then
|
|
return scannable_cite(cite)
|
|
else
|
|
return zotero_ref(cite)
|
|
end
|
|
end
|
|
|
|
local refsDivSeen = false
|
|
function Div(div)
|
|
if not div.attr or div.attr.identifier ~= "refs" then
|
|
return nil
|
|
end
|
|
if config.format ~= "odt" or not config.csl_style then
|
|
return nil
|
|
end
|
|
|
|
refsDivSeen = true
|
|
return pandoc.RawBlock("opendocument", zotero_bibl_odt())
|
|
end
|
|
|
|
function Doc(doc)
|
|
if config.format ~= "odt" then
|
|
return nil
|
|
end
|
|
|
|
if config.transferable then
|
|
table.insert(doc.blocks, 1, pandoc.RawBlock("opendocument", zotero_bibl_odt_banner()))
|
|
end
|
|
|
|
if config.csl_style and not refsDivSeen then
|
|
table.insert(doc.blocks, pandoc.RawBlock("opendocument", zotero_bibl_odt()))
|
|
end
|
|
|
|
return pandoc.Pandoc(doc.blocks, doc.meta)
|
|
end
|
|
|
|
return {
|
|
{ Meta = Meta },
|
|
{ Cite = Cite_collect },
|
|
{ Cite = Cite_replace },
|
|
{ Div = Div },
|
|
{ Doc = Doc },
|
|
}
|