diff --git a/01-scripts/pandoc-to-zotero-live.lua b/01-scripts/pandoc-to-zotero-live.lua index 0bfe046..d9ff0b9 100644 --- a/01-scripts/pandoc-to-zotero-live.lua +++ b/01-scripts/pandoc-to-zotero-live.lua @@ -1,2032 +1,2068 @@ -local pandoc = require('pandoc') -print('zotero-live-citations f47be20') -local mt, latest = pandoc.mediabag.fetch('https://retorque.re/zotero-better-bibtex/exporting/zotero.lua.revision') +local pandoc = require("pandoc") +print("zotero-live-citations caf41cc") +local mt, latest = pandoc.mediabag.fetch("https://retorque.re/zotero-better-bibtex/exporting/zotero.lua.revision") latest = string.sub(latest, 1, 10) -if 'f47be20' ~= latest then - print('new version "' .. latest .. '" available at https://retorque.re/zotero-better-bibtex/exporting') +if "caf41cc" ~= latest then + print('new version "' .. latest .. '" available at https://retorque.re/zotero-better-bibtex/exporting') end do - local _ENV = _ENV - package.preload["locator"] = function(...) - local arg = _G.arg; - local utils = require('utils') - -- local lpeg = require('lpeg') + local _ENV = _ENV + package.preload["locator"] = function(...) + local arg = _G.arg + local utils = require("utils") + -- local lpeg = require('lpeg') - local book = (lpeg.P('book') + lpeg.P('bk.') + lpeg.P('bks.')) / 'book' - local chapter = (lpeg.P('chapter') + lpeg.P('chap.') + lpeg.P('chaps.')) / 'chapter' - local column = (lpeg.P('column') + lpeg.P('col.') + lpeg.P('cols.')) / 'column' - local figure = (lpeg.P('figure') + lpeg.P('fig.') + lpeg.P('figs.')) / 'figure' - local folio = (lpeg.P('folio') + lpeg.P('fol.') + lpeg.P('fols.')) / 'folio' - local number = (lpeg.P('number') + lpeg.P('no.') + lpeg.P('nos.')) / 'number' - local line = (lpeg.P('line') + lpeg.P('l.') + lpeg.P('ll.')) / 'line' - local note = (lpeg.P('note') + lpeg.P('n.') + lpeg.P('nn.')) / 'note' - local opus = (lpeg.P('opus') + lpeg.P('op.') + lpeg.P('opp.')) / 'opus' - local page = (lpeg.P('page') + lpeg.P('p.') + lpeg.P('pp.')) / 'page' - local paragraph = (lpeg.P('paragraph') + lpeg.P('para.') + lpeg.P('paras.') + lpeg.P('¶¶') + lpeg.P('¶')) / - 'paragraph' - local part = (lpeg.P('part') + lpeg.P('pt.') + lpeg.P('pts.')) / 'part' - local section = (lpeg.P('section') + lpeg.P('sec.') + lpeg.P('secs.') + lpeg.P('§§') + lpeg.P('§')) / - 'section' - local subverbo = (lpeg.P('sub verbo') + lpeg.P('s.v.') + lpeg.P('s.vv.')) / 'sub verbo' - local verse = (lpeg.P('verse') + lpeg.P('v.') + lpeg.P('vv.')) / 'verse' - local volume = (lpeg.P('volume') + lpeg.P('vol.') + lpeg.P('vols.')) / 'volume' - local label = book + chapter + column + figure + folio + number + line + note + opus + page + paragraph + part + - section + subverbo + verse + volume + local book = (lpeg.P("book") + lpeg.P("bk.") + lpeg.P("bks.")) / "book" + local chapter = (lpeg.P("chapter") + lpeg.P("chap.") + lpeg.P("chaps.")) / "chapter" + local column = (lpeg.P("column") + lpeg.P("col.") + lpeg.P("cols.")) / "column" + local figure = (lpeg.P("figure") + lpeg.P("fig.") + lpeg.P("figs.")) / "figure" + local folio = (lpeg.P("folio") + lpeg.P("fol.") + lpeg.P("fols.")) / "folio" + local number = (lpeg.P("number") + lpeg.P("no.") + lpeg.P("nos.")) / "number" + local line = (lpeg.P("line") + lpeg.P("l.") + lpeg.P("ll.")) / "line" + local note = (lpeg.P("note") + lpeg.P("n.") + lpeg.P("nn.")) / "note" + local opus = (lpeg.P("opus") + lpeg.P("op.") + lpeg.P("opp.")) / "opus" + local page = (lpeg.P("page") + lpeg.P("p.") + lpeg.P("pp.")) / "page" + local paragraph = ( + lpeg.P("paragraph") + + lpeg.P("para.") + + lpeg.P("paras.") + + lpeg.P("¶¶") + + lpeg.P("¶") + ) / "paragraph" + local part = (lpeg.P("part") + lpeg.P("pt.") + lpeg.P("pts.")) / "part" + local section = (lpeg.P("section") + lpeg.P("sec.") + lpeg.P("secs.") + lpeg.P("§§") + lpeg.P("§")) + / "section" + local subverbo = (lpeg.P("sub verbo") + lpeg.P("s.v.") + lpeg.P("s.vv.")) / "sub verbo" + local verse = (lpeg.P("verse") + lpeg.P("v.") + lpeg.P("vv.")) / "verse" + local volume = (lpeg.P("volume") + lpeg.P("vol.") + lpeg.P("vols.")) / "volume" + local label = book + + chapter + + column + + figure + + folio + + number + + line + + note + + opus + + page + + paragraph + + part + + section + + subverbo + + verse + + volume - local whitespace = lpeg.P(' ') ^ 0 - local nonspace = lpeg.P(1) - lpeg.S(' ') - local nonbrace = lpeg.P(1) - lpeg.S('{}') + local whitespace = lpeg.P(" ") ^ 0 + local nonspace = lpeg.P(1) - lpeg.S(" ") + local nonbrace = lpeg.P(1) - lpeg.S("{}") - local word = nonspace ^ 1 / 1 - -- local roman = lpeg.S('IiVvXxLlCcDdMm]')^1 - local number = lpeg.R('09') ^ 1 -- + roman + local word = nonspace ^ 1 / 1 + -- local roman = lpeg.S('IiVvXxLlCcDdMm]')^1 + local number = lpeg.R("09") ^ 1 -- + roman - local numbers = number * (whitespace * lpeg.S('-') ^ 1 * whitespace * number) ^ -1 - local ranges = (numbers * (whitespace * lpeg.P(',') * whitespace * numbers) ^ 0) / 1 + local numbers = number * (whitespace * lpeg.S("-") ^ 1 * whitespace * number) ^ -1 + local ranges = (numbers * (whitespace * lpeg.P(",") * whitespace * numbers) ^ 0) / 1 - -- local braced_locator = lpeg.P('{') * lpeg.Cs(label + lpeg.Cc('page')) * whitespace * lpeg.C(nonbrace^1) * lpeg.P('}') - local braced_locator = lpeg.P('{') * label * whitespace * lpeg.C(nonbrace ^ 1) * lpeg.P('}') - local braced_implicit_locator = lpeg.P('{') * lpeg.Cc('page') * lpeg.Cs(numbers) * lpeg.P('}') - local locator = braced_locator + braced_implicit_locator + (label * whitespace * ranges) + - (label * whitespace * word) + (lpeg.Cc('page') * ranges) - local remainder = lpeg.C(lpeg.P(1) ^ 0) + -- local braced_locator = lpeg.P('{') * lpeg.Cs(label + lpeg.Cc('page')) * whitespace * lpeg.C(nonbrace^1) * lpeg.P('}') + local braced_locator = lpeg.P("{") * label * whitespace * lpeg.C(nonbrace ^ 1) * lpeg.P("}") + local braced_implicit_locator = lpeg.P("{") * lpeg.Cc("page") * lpeg.Cs(numbers) * lpeg.P("}") + local locator = braced_locator + + braced_implicit_locator + + (label * whitespace * ranges) + + (label * whitespace * word) + + (lpeg.Cc("page") * ranges) + local remainder = lpeg.C(lpeg.P(1) ^ 0) - local suffix = lpeg.C(lpeg.P(',') ^ -1 * whitespace) * locator * remainder + local suffix = lpeg.C(lpeg.P(",") ^ -1 * whitespace) * locator * remainder - local pseudo_locator = lpeg.C(lpeg.P(',') ^ -1 * whitespace) * lpeg.P('{') * lpeg.C(nonbrace ^ 0) * lpeg.P('}') * - remainder + local pseudo_locator = lpeg.C(lpeg.P(",") ^ -1 * whitespace) + * lpeg.P("{") + * lpeg.C(nonbrace ^ 0) + * lpeg.P("}") + * remainder - local module = {} + local module = {} - function module.parse(input, shortlabel) - local parsed = lpeg.Ct(suffix):match(input) - if parsed then - local _prefix, _label, _locator, _suffix = table.unpack(parsed) - if utils.trim(_prefix) == ',' then _prefix = '' end - return _label, _locator, _prefix .. _suffix - end + function module.parse(input) + local parsed, _prefix, _label, _locator, _suffix - parsed = lpeg.Ct(pseudo_locator):match(input) - if parsed then - local _prefix, _locator, _suffix = table.unpack(parsed) - if utils.trim(_prefix) == ',' then _prefix = '' end - -- return nil, nil, _prefix .. _locator .. _suffix - return 'page', _locator, _prefix .. _suffix - end + parsed = lpeg.Ct(suffix):match(input) + if parsed then + _prefix, _label, _locator, _suffix = table.unpack(parsed) + else + parsed = lpeg.Ct(pseudo_locator):match(input) + if parsed then + _label = "page" + _prefix, _locator, _suffix = table.unpack(parsed) + else + return nil, nil, input + end + end - return nil, nil, input - end + if utils.trim(_prefix) == "," then + _prefix = "" + end + local _space = "" + if utils.trim(_prefix) ~= _prefix then + _space = " " + end - return module - end + _prefix = utils.trim(_prefix) + _label = utils.trim(_label) + _locator = utils.trim(_locator) + _suffix = utils.trim(_suffix) + + return _label, _locator, utils.trim(_prefix .. _space .. _suffix) + end + + return module + end end do - local _ENV = _ENV - package.preload["lunajson"] = function(...) - local arg = _G.arg; - local newdecoder = require 'lunajson.decoder' - local newencoder = require 'lunajson.encoder' - local sax = require 'lunajson.sax' - -- If you need multiple contexts of decoder and/or encoder, - -- you can require lunajson.decoder and/or lunajson.encoder directly. - return { - decode = newdecoder(), - encode = newencoder(), - newparser = sax.newparser, - newfileparser = sax.newfileparser, - } - end + local _ENV = _ENV + package.preload["lunajson"] = function(...) + local arg = _G.arg + local newdecoder = require("lunajson.decoder") + local newencoder = require("lunajson.encoder") + local sax = require("lunajson.sax") + -- If you need multiple contexts of decoder and/or encoder, + -- you can require lunajson.decoder and/or lunajson.encoder directly. + return { + decode = newdecoder(), + encode = newencoder(), + newparser = sax.newparser, + newfileparser = sax.newfileparser, + } + end end do - local _ENV = _ENV - package.preload["lunajson.decoder"] = function(...) - local arg = _G.arg; - local setmetatable, tonumber, tostring = - setmetatable, tonumber, tostring - local floor, inf = - math.floor, math.huge - local mininteger, tointeger = - math.mininteger or nil, math.tointeger or nil - local byte, char, find, gsub, match, sub = - string.byte, string.char, string.find, string.gsub, string.match, string.sub + local _ENV = _ENV + package.preload["lunajson.decoder"] = function(...) + local arg = _G.arg + local setmetatable, tonumber, tostring = setmetatable, tonumber, tostring + local floor, inf = math.floor, math.huge + local mininteger, tointeger = math.mininteger or nil, math.tointeger or nil + local byte, char, find, gsub, match, sub = + string.byte, string.char, string.find, string.gsub, string.match, string.sub - local function _decode_error(pos, errmsg) - error("parse error at " .. pos .. ": " .. errmsg, 2) - end + local function _decode_error(pos, errmsg) + error("parse error at " .. pos .. ": " .. errmsg, 2) + end - local f_str_ctrl_pat - if _VERSION == "Lua 5.1" then - -- use the cluttered pattern because lua 5.1 does not handle \0 in a pattern correctly - f_str_ctrl_pat = '[^\32-\255]' - else - f_str_ctrl_pat = '[\0-\31]' - end + local f_str_ctrl_pat + if _VERSION == "Lua 5.1" then + -- use the cluttered pattern because lua 5.1 does not handle \0 in a pattern correctly + f_str_ctrl_pat = "[^\32-\255]" + else + f_str_ctrl_pat = "[\0-\31]" + end - local _ENV = nil + local _ENV = nil + local function newdecoder() + local json, pos, nullv, arraylen, rec_depth - local function newdecoder() - local json, pos, nullv, arraylen, rec_depth + -- `f` is the temporary for dispatcher[c] and + -- the dummy for the first return value of `find` + local dispatcher, f - -- `f` is the temporary for dispatcher[c] and - -- the dummy for the first return value of `find` - local dispatcher, f - - --[[ + --[[ Helper --]] - local function decode_error(errmsg) - return _decode_error(pos, errmsg) - end + local function decode_error(errmsg) + return _decode_error(pos, errmsg) + end - --[[ + --[[ Invalid --]] - local function f_err() - decode_error('invalid value') - end + local function f_err() + decode_error("invalid value") + end - --[[ + --[[ Constants --]] - -- null - local function f_nul() - if sub(json, pos, pos + 2) == 'ull' then - pos = pos + 3 - return nullv - end - decode_error('invalid value') - end + -- null + local function f_nul() + if sub(json, pos, pos + 2) == "ull" then + pos = pos + 3 + return nullv + end + decode_error("invalid value") + end - -- false - local function f_fls() - if sub(json, pos, pos + 3) == 'alse' then - pos = pos + 4 - return false - end - decode_error('invalid value') - end + -- false + local function f_fls() + if sub(json, pos, pos + 3) == "alse" then + pos = pos + 4 + return false + end + decode_error("invalid value") + end - -- true - local function f_tru() - if sub(json, pos, pos + 2) == 'rue' then - pos = pos + 3 - return true - end - decode_error('invalid value') - end + -- true + local function f_tru() + if sub(json, pos, pos + 2) == "rue" then + pos = pos + 3 + return true + end + decode_error("invalid value") + end - --[[ + --[[ Numbers Conceptually, the longest prefix that matches to `[-+.0-9A-Za-z]+` (in regexp) is captured as a number and its conformance to the JSON spec is checked. --]] - -- deal with non-standard locales - local radixmark = match(tostring(0.5), '[^0-9]') - local fixedtonumber = tonumber - if radixmark ~= '.' then - if find(radixmark, '%W') then - radixmark = '%' .. radixmark - end - fixedtonumber = function(s) - return tonumber(gsub(s, '.', radixmark)) - end - end + -- deal with non-standard locales + local radixmark = match(tostring(0.5), "[^0-9]") + local fixedtonumber = tonumber + if radixmark ~= "." then + if find(radixmark, "%W") then + radixmark = "%" .. radixmark + end + fixedtonumber = function(s) + return tonumber(gsub(s, ".", radixmark)) + end + end - local function number_error() - return decode_error('invalid number') - end + local function number_error() + return decode_error("invalid number") + end - -- `0(\.[0-9]*)?([eE][+-]?[0-9]*)?` - local function f_zro(mns) - local num, c = match(json, '^(%.?[0-9]*)([-+.A-Za-z]?)', pos) -- skipping 0 + -- `0(\.[0-9]*)?([eE][+-]?[0-9]*)?` + local function f_zro(mns) + local num, c = match(json, "^(%.?[0-9]*)([-+.A-Za-z]?)", pos) -- skipping 0 - if num == '' then - if c == '' then - if mns then - return -0.0 - end - return 0 - end + if num == "" then + if c == "" then + if mns then + return -0.0 + end + return 0 + end - if c == 'e' or c == 'E' then - num, c = match(json, '^([^eE]*[eE][-+]?[0-9]+)([-+.A-Za-z]?)', pos) - if c == '' then - pos = pos + #num - if mns then - return -0.0 - end - return 0.0 - end - end - number_error() - end + if c == "e" or c == "E" then + num, c = match(json, "^([^eE]*[eE][-+]?[0-9]+)([-+.A-Za-z]?)", pos) + if c == "" then + pos = pos + #num + if mns then + return -0.0 + end + return 0.0 + end + end + number_error() + end - if byte(num) ~= 0x2E or byte(num, -1) == 0x2E then - number_error() - end + if byte(num) ~= 0x2E or byte(num, -1) == 0x2E then + number_error() + end - if c ~= '' then - if c == 'e' or c == 'E' then - num, c = match(json, '^([^eE]*[eE][-+]?[0-9]+)([-+.A-Za-z]?)', pos) - end - if c ~= '' then - number_error() - end - end + if c ~= "" then + if c == "e" or c == "E" then + num, c = match(json, "^([^eE]*[eE][-+]?[0-9]+)([-+.A-Za-z]?)", pos) + end + if c ~= "" then + number_error() + end + end - pos = pos + #num - c = fixedtonumber(num) + pos = pos + #num + c = fixedtonumber(num) - if mns then - c = -c - end - return c - end + if mns then + c = -c + end + return c + end - -- `[1-9][0-9]*(\.[0-9]*)?([eE][+-]?[0-9]*)?` - local function f_num(mns) - pos = pos - 1 - local num, c = match(json, '^([0-9]+%.?[0-9]*)([-+.A-Za-z]?)', pos) - if byte(num, -1) == 0x2E then -- error if ended with period - number_error() - end + -- `[1-9][0-9]*(\.[0-9]*)?([eE][+-]?[0-9]*)?` + local function f_num(mns) + pos = pos - 1 + local num, c = match(json, "^([0-9]+%.?[0-9]*)([-+.A-Za-z]?)", pos) + if byte(num, -1) == 0x2E then -- error if ended with period + number_error() + end - if c ~= '' then - if c ~= 'e' and c ~= 'E' then - number_error() - end - num, c = match(json, '^([^eE]*[eE][-+]?[0-9]+)([-+.A-Za-z]?)', pos) - if not num or c ~= '' then - number_error() - end - end + if c ~= "" then + if c ~= "e" and c ~= "E" then + number_error() + end + num, c = match(json, "^([^eE]*[eE][-+]?[0-9]+)([-+.A-Za-z]?)", pos) + if not num or c ~= "" then + number_error() + end + end - pos = pos + #num - c = fixedtonumber(num) + pos = pos + #num + c = fixedtonumber(num) - if mns then - c = -c - if c == mininteger and not find(num, '[^0-9]') then - c = mininteger - end - end - return c - end + if mns then + c = -c + if c == mininteger and not find(num, "[^0-9]") then + c = mininteger + end + end + return c + end - -- skip minus sign - local function f_mns() - local c = byte(json, pos) - if c then - pos = pos + 1 - if c > 0x30 then - if c < 0x3A then - return f_num(true) - end - else - if c > 0x2F then - return f_zro(true) - end - end - end - decode_error('invalid number') - end + -- skip minus sign + local function f_mns() + local c = byte(json, pos) + if c then + pos = pos + 1 + if c > 0x30 then + if c < 0x3A then + return f_num(true) + end + else + if c > 0x2F then + return f_zro(true) + end + end + end + decode_error("invalid number") + end - --[[ + --[[ Strings --]] - local f_str_hextbl = { - 0x0, - 0x1, - 0x2, - 0x3, - 0x4, - 0x5, - 0x6, - 0x7, - 0x8, - 0x9, - inf, - inf, - inf, - inf, - inf, - inf, - inf, - 0xA, - 0xB, - 0xC, - 0xD, - 0xE, - 0xF, - inf, - inf, - inf, - inf, - inf, - inf, - inf, - inf, - inf, - inf, - inf, - inf, - inf, - inf, - inf, - inf, - inf, - inf, - inf, - inf, - inf, - inf, - inf, - inf, - inf, - inf, - 0xA, - 0xB, - 0xC, - 0xD, - 0xE, - 0xF, - __index = function() - return inf - end - } - setmetatable(f_str_hextbl, f_str_hextbl) + local f_str_hextbl = { + 0x0, + 0x1, + 0x2, + 0x3, + 0x4, + 0x5, + 0x6, + 0x7, + 0x8, + 0x9, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + 0xA, + 0xB, + 0xC, + 0xD, + 0xE, + 0xF, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + 0xA, + 0xB, + 0xC, + 0xD, + 0xE, + 0xF, + __index = function() + return inf + end, + } + setmetatable(f_str_hextbl, f_str_hextbl) - local f_str_escapetbl = { - ['"'] = '"', - ['\\'] = '\\', - ['/'] = '/', - ['b'] = '\b', - ['f'] = '\f', - ['n'] = '\n', - ['r'] = '\r', - ['t'] = '\t', - __index = function() - decode_error("invalid escape sequence") - end - } - setmetatable(f_str_escapetbl, f_str_escapetbl) + local f_str_escapetbl = { + ['"'] = '"', + ["\\"] = "\\", + ["/"] = "/", + ["b"] = "\b", + ["f"] = "\f", + ["n"] = "\n", + ["r"] = "\r", + ["t"] = "\t", + __index = function() + decode_error("invalid escape sequence") + end, + } + setmetatable(f_str_escapetbl, f_str_escapetbl) - local function surrogate_first_error() - return decode_error("1st surrogate pair byte not continued by 2nd") - end + local function surrogate_first_error() + return decode_error("1st surrogate pair byte not continued by 2nd") + end - local f_str_surrogate_prev = 0 - local function f_str_subst(ch, ucode) - if ch == 'u' then - local c1, c2, c3, c4, rest = byte(ucode, 1, 5) - ucode = f_str_hextbl[c1 - 47] * 0x1000 + - f_str_hextbl[c2 - 47] * 0x100 + - f_str_hextbl[c3 - 47] * 0x10 + - f_str_hextbl[c4 - 47] - if ucode ~= inf then - if ucode < 0x80 then -- 1byte - if rest then - return char(ucode, rest) - end - return char(ucode) - elseif ucode < 0x800 then -- 2bytes - c1 = floor(ucode / 0x40) - c2 = ucode - c1 * 0x40 - c1 = c1 + 0xC0 - c2 = c2 + 0x80 - if rest then - return char(c1, c2, rest) - end - return char(c1, c2) - elseif ucode < 0xD800 or 0xE000 <= ucode then -- 3bytes - c1 = floor(ucode / 0x1000) - ucode = ucode - c1 * 0x1000 - c2 = floor(ucode / 0x40) - c3 = ucode - c2 * 0x40 - c1 = c1 + 0xE0 - c2 = c2 + 0x80 - c3 = c3 + 0x80 - if rest then - return char(c1, c2, c3, rest) - end - return char(c1, c2, c3) - elseif 0xD800 <= ucode and ucode < 0xDC00 then -- surrogate pair 1st - if f_str_surrogate_prev == 0 then - f_str_surrogate_prev = ucode - if not rest then - return '' - end - surrogate_first_error() - end - f_str_surrogate_prev = 0 - surrogate_first_error() - else -- surrogate pair 2nd - if f_str_surrogate_prev ~= 0 then - ucode = 0x10000 + - (f_str_surrogate_prev - 0xD800) * 0x400 + - (ucode - 0xDC00) - f_str_surrogate_prev = 0 - c1 = floor(ucode / 0x40000) - ucode = ucode - c1 * 0x40000 - c2 = floor(ucode / 0x1000) - ucode = ucode - c2 * 0x1000 - c3 = floor(ucode / 0x40) - c4 = ucode - c3 * 0x40 - c1 = c1 + 0xF0 - c2 = c2 + 0x80 - c3 = c3 + 0x80 - c4 = c4 + 0x80 - if rest then - return char(c1, c2, c3, c4, rest) - end - return char(c1, c2, c3, c4) - end - decode_error("2nd surrogate pair byte appeared without 1st") - end - end - decode_error("invalid unicode codepoint literal") - end - if f_str_surrogate_prev ~= 0 then - f_str_surrogate_prev = 0 - surrogate_first_error() - end - return f_str_escapetbl[ch] .. ucode - end + local f_str_surrogate_prev = 0 + local function f_str_subst(ch, ucode) + if ch == "u" then + local c1, c2, c3, c4, rest = byte(ucode, 1, 5) + ucode = f_str_hextbl[c1 - 47] * 0x1000 + + f_str_hextbl[c2 - 47] * 0x100 + + f_str_hextbl[c3 - 47] * 0x10 + + f_str_hextbl[c4 - 47] + if ucode ~= inf then + if ucode < 0x80 then -- 1byte + if rest then + return char(ucode, rest) + end + return char(ucode) + elseif ucode < 0x800 then -- 2bytes + c1 = floor(ucode / 0x40) + c2 = ucode - c1 * 0x40 + c1 = c1 + 0xC0 + c2 = c2 + 0x80 + if rest then + return char(c1, c2, rest) + end + return char(c1, c2) + elseif ucode < 0xD800 or 0xE000 <= ucode then -- 3bytes + c1 = floor(ucode / 0x1000) + ucode = ucode - c1 * 0x1000 + c2 = floor(ucode / 0x40) + c3 = ucode - c2 * 0x40 + c1 = c1 + 0xE0 + c2 = c2 + 0x80 + c3 = c3 + 0x80 + if rest then + return char(c1, c2, c3, rest) + end + return char(c1, c2, c3) + elseif 0xD800 <= ucode and ucode < 0xDC00 then -- surrogate pair 1st + if f_str_surrogate_prev == 0 then + f_str_surrogate_prev = ucode + if not rest then + return "" + end + surrogate_first_error() + end + f_str_surrogate_prev = 0 + surrogate_first_error() + else -- surrogate pair 2nd + if f_str_surrogate_prev ~= 0 then + ucode = 0x10000 + (f_str_surrogate_prev - 0xD800) * 0x400 + (ucode - 0xDC00) + f_str_surrogate_prev = 0 + c1 = floor(ucode / 0x40000) + ucode = ucode - c1 * 0x40000 + c2 = floor(ucode / 0x1000) + ucode = ucode - c2 * 0x1000 + c3 = floor(ucode / 0x40) + c4 = ucode - c3 * 0x40 + c1 = c1 + 0xF0 + c2 = c2 + 0x80 + c3 = c3 + 0x80 + c4 = c4 + 0x80 + if rest then + return char(c1, c2, c3, c4, rest) + end + return char(c1, c2, c3, c4) + end + decode_error("2nd surrogate pair byte appeared without 1st") + end + end + decode_error("invalid unicode codepoint literal") + end + if f_str_surrogate_prev ~= 0 then + f_str_surrogate_prev = 0 + surrogate_first_error() + end + return f_str_escapetbl[ch] .. ucode + end - -- caching interpreted keys for speed - local f_str_keycache = setmetatable({}, { __mode = "v" }) + -- caching interpreted keys for speed + local f_str_keycache = setmetatable({}, { __mode = "v" }) - local function f_str(iskey) - local newpos = pos - local tmppos, c1, c2 - repeat - newpos = find(json, '"', newpos, true) -- search '"' - if not newpos then - decode_error("unterminated string") - end - tmppos = newpos - 1 - newpos = newpos + 1 - c1, c2 = byte(json, tmppos - 1, tmppos) - if c2 == 0x5C and c1 == 0x5C then -- skip preceding '\\'s - repeat - tmppos = tmppos - 2 - c1, c2 = byte(json, tmppos - 1, tmppos) - until c2 ~= 0x5C or c1 ~= 0x5C - tmppos = newpos - 2 - end - until c2 ~= 0x5C -- leave if '"' is not preceded by '\' + local function f_str(iskey) + local newpos = pos + local tmppos, c1, c2 + repeat + newpos = find(json, '"', newpos, true) -- search '"' + if not newpos then + decode_error("unterminated string") + end + tmppos = newpos - 1 + newpos = newpos + 1 + c1, c2 = byte(json, tmppos - 1, tmppos) + if c2 == 0x5C and c1 == 0x5C then -- skip preceding '\\'s + repeat + tmppos = tmppos - 2 + c1, c2 = byte(json, tmppos - 1, tmppos) + until c2 ~= 0x5C or c1 ~= 0x5C + tmppos = newpos - 2 + end + until c2 ~= 0x5C -- leave if '"' is not preceded by '\' - local str = sub(json, pos, tmppos) - pos = newpos + local str = sub(json, pos, tmppos) + pos = newpos - if iskey then -- check key cache - tmppos = f_str_keycache[str] -- reuse tmppos for cache key/val - if tmppos then - return tmppos - end - tmppos = str - end + if iskey then -- check key cache + tmppos = f_str_keycache[str] -- reuse tmppos for cache key/val + if tmppos then + return tmppos + end + tmppos = str + end - if find(str, f_str_ctrl_pat) then - decode_error("unescaped control string") - end - if find(str, '\\', 1, true) then -- check whether a backslash exists - -- We need to grab 4 characters after the escape char, - -- for encoding unicode codepoint to UTF-8. - -- As we need to ensure that every first surrogate pair byte is - -- immediately followed by second one, we grab upto 5 characters and - -- check the last for this purpose. - str = gsub(str, '\\(.)([^\\]?[^\\]?[^\\]?[^\\]?[^\\]?)', f_str_subst) - if f_str_surrogate_prev ~= 0 then - f_str_surrogate_prev = 0 - decode_error("1st surrogate pair byte not continued by 2nd") - end - end - if iskey then -- commit key cache - f_str_keycache[tmppos] = str - end - return str - end + if find(str, f_str_ctrl_pat) then + decode_error("unescaped control string") + end + if find(str, "\\", 1, true) then -- check whether a backslash exists + -- We need to grab 4 characters after the escape char, + -- for encoding unicode codepoint to UTF-8. + -- As we need to ensure that every first surrogate pair byte is + -- immediately followed by second one, we grab upto 5 characters and + -- check the last for this purpose. + str = gsub(str, "\\(.)([^\\]?[^\\]?[^\\]?[^\\]?[^\\]?)", f_str_subst) + if f_str_surrogate_prev ~= 0 then + f_str_surrogate_prev = 0 + decode_error("1st surrogate pair byte not continued by 2nd") + end + end + if iskey then -- commit key cache + f_str_keycache[tmppos] = str + end + return str + end - --[[ + --[[ Arrays, Objects --]] - -- array - local function f_ary() - rec_depth = rec_depth + 1 - if rec_depth > 1000 then - decode_error('too deeply nested json (> 1000)') - end - local ary = {} + -- array + local function f_ary() + rec_depth = rec_depth + 1 + if rec_depth > 1000 then + decode_error("too deeply nested json (> 1000)") + end + local ary = {} - pos = match(json, '^[ \n\r\t]*()', pos) + pos = match(json, "^[ \n\r\t]*()", pos) - local i = 0 - if byte(json, pos) == 0x5D then -- check closing bracket ']' which means the array empty - pos = pos + 1 - else - local newpos = pos - repeat - i = i + 1 - f = dispatcher[byte(json, newpos)] -- parse value - pos = newpos + 1 - ary[i] = f() - newpos = match(json, '^[ \n\r\t]*,[ \n\r\t]*()', pos) -- check comma - until not newpos + local i = 0 + if byte(json, pos) == 0x5D then -- check closing bracket ']' which means the array empty + pos = pos + 1 + else + local newpos = pos + repeat + i = i + 1 + f = dispatcher[byte(json, newpos)] -- parse value + pos = newpos + 1 + ary[i] = f() + newpos = match(json, "^[ \n\r\t]*,[ \n\r\t]*()", pos) -- check comma + until not newpos - newpos = match(json, '^[ \n\r\t]*%]()', pos) -- check closing bracket - if not newpos then - decode_error("no closing bracket of an array") - end - pos = newpos - end + newpos = match(json, "^[ \n\r\t]*%]()", pos) -- check closing bracket + if not newpos then + decode_error("no closing bracket of an array") + end + pos = newpos + end - if arraylen then -- commit the length of the array if `arraylen` is set - ary[0] = i - end - rec_depth = rec_depth - 1 - return ary - end + if arraylen then -- commit the length of the array if `arraylen` is set + ary[0] = i + end + rec_depth = rec_depth - 1 + return ary + end - -- objects - local function f_obj() - rec_depth = rec_depth + 1 - if rec_depth > 1000 then - decode_error('too deeply nested json (> 1000)') - end - local obj = {} + -- objects + local function f_obj() + rec_depth = rec_depth + 1 + if rec_depth > 1000 then + decode_error("too deeply nested json (> 1000)") + end + local obj = {} - pos = match(json, '^[ \n\r\t]*()', pos) - if byte(json, pos) == 0x7D then -- check closing bracket '}' which means the object empty - pos = pos + 1 - else - local newpos = pos + pos = match(json, "^[ \n\r\t]*()", pos) + if byte(json, pos) == 0x7D then -- check closing bracket '}' which means the object empty + pos = pos + 1 + else + local newpos = pos - repeat - if byte(json, newpos) ~= 0x22 then -- check '"' - decode_error("not key") - end - pos = newpos + 1 - local key = f_str(true) -- parse key + repeat + if byte(json, newpos) ~= 0x22 then -- check '"' + decode_error("not key") + end + pos = newpos + 1 + local key = f_str(true) -- parse key - -- optimized for compact json - -- c1, c2 == ':', or - -- c1, c2, c3 == ':', ' ', - f = f_err - local c1, c2, c3 = byte(json, pos, pos + 3) - if c1 == 0x3A then - if c2 ~= 0x20 then - f = dispatcher[c2] - newpos = pos + 2 - else - f = dispatcher[c3] - newpos = pos + 3 - end - end - if f == f_err then -- read a colon and arbitrary number of spaces - newpos = match(json, '^[ \n\r\t]*:[ \n\r\t]*()', pos) - if not newpos then - decode_error("no colon after a key") - end - f = dispatcher[byte(json, newpos)] - newpos = newpos + 1 - end - pos = newpos - obj[key] = f() -- parse value - newpos = match(json, '^[ \n\r\t]*,[ \n\r\t]*()', pos) - until not newpos + -- optimized for compact json + -- c1, c2 == ':', or + -- c1, c2, c3 == ':', ' ', + f = f_err + local c1, c2, c3 = byte(json, pos, pos + 3) + if c1 == 0x3A then + if c2 ~= 0x20 then + f = dispatcher[c2] + newpos = pos + 2 + else + f = dispatcher[c3] + newpos = pos + 3 + end + end + if f == f_err then -- read a colon and arbitrary number of spaces + newpos = match(json, "^[ \n\r\t]*:[ \n\r\t]*()", pos) + if not newpos then + decode_error("no colon after a key") + end + f = dispatcher[byte(json, newpos)] + newpos = newpos + 1 + end + pos = newpos + obj[key] = f() -- parse value + newpos = match(json, "^[ \n\r\t]*,[ \n\r\t]*()", pos) + until not newpos - newpos = match(json, '^[ \n\r\t]*}()', pos) - if not newpos then - decode_error("no closing bracket of an object") - end - pos = newpos - end + newpos = match(json, "^[ \n\r\t]*}()", pos) + if not newpos then + decode_error("no closing bracket of an object") + end + pos = newpos + end - rec_depth = rec_depth - 1 - return obj - end + rec_depth = rec_depth - 1 + return obj + end - --[[ + --[[ The jump table to dispatch a parser for a value, indexed by the code of the value's first char. Nil key means the end of json. --]] - dispatcher = { - [0] = - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_str, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_mns, - f_err, - f_err, - f_zro, - f_num, - f_num, - f_num, - f_num, - f_num, - f_num, - f_num, - f_num, - f_num, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_ary, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_fls, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_nul, - f_err, - f_err, - f_err, - f_err, - f_err, - f_tru, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_obj, - f_err, - f_err, - f_err, - f_err, - __index = function() - decode_error("unexpected termination") - end - } - setmetatable(dispatcher, dispatcher) + dispatcher = { + [0] = f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_str, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_mns, + f_err, + f_err, + f_zro, + f_num, + f_num, + f_num, + f_num, + f_num, + f_num, + f_num, + f_num, + f_num, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_ary, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_fls, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_nul, + f_err, + f_err, + f_err, + f_err, + f_err, + f_tru, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_obj, + f_err, + f_err, + f_err, + f_err, + __index = function() + decode_error("unexpected termination") + end, + } + setmetatable(dispatcher, dispatcher) - --[[ + --[[ run decoder --]] - local function decode(json_, pos_, nullv_, arraylen_) - json, pos, nullv, arraylen = json_, pos_, nullv_, arraylen_ - rec_depth = 0 + local function decode(json_, pos_, nullv_, arraylen_) + json, pos, nullv, arraylen = json_, pos_, nullv_, arraylen_ + rec_depth = 0 - pos = match(json, '^[ \n\r\t]*()', pos) + pos = match(json, "^[ \n\r\t]*()", pos) - f = dispatcher[byte(json, pos)] - pos = pos + 1 - local v = f() + f = dispatcher[byte(json, pos)] + pos = pos + 1 + local v = f() - if pos_ then - return v, pos - else - f, pos = find(json, '^[ \n\r\t]*', pos) - if pos ~= #json then - decode_error('json ended') - end - return v - end - end + if pos_ then + return v, pos + else + f, pos = find(json, "^[ \n\r\t]*", pos) + if pos ~= #json then + decode_error("json ended") + end + return v + end + end - return decode - end + return decode + end - return newdecoder - end + return newdecoder + end end do - local _ENV = _ENV - package.preload["lunajson.encoder"] = function(...) - local arg = _G.arg; - local error = error - local byte, find, format, gsub, match = string.byte, string.find, string.format, string.gsub, string.match - local concat = table.concat - local tostring = tostring - local pairs, type = pairs, type - local setmetatable = setmetatable - local huge, tiny = 1 / 0, -1 / 0 + local _ENV = _ENV + package.preload["lunajson.encoder"] = function(...) + local arg = _G.arg + local error = error + local byte, find, format, gsub, match = string.byte, string.find, string.format, string.gsub, string.match + local concat = table.concat + local tostring = tostring + local pairs, type = pairs, type + local setmetatable = setmetatable + local huge, tiny = 1 / 0, -1 / 0 - local f_string_esc_pat - if _VERSION == "Lua 5.1" then - -- use the cluttered pattern because lua 5.1 does not handle \0 in a pattern correctly - f_string_esc_pat = '[^ -!#-[%]^-\255]' - else - f_string_esc_pat = '[\0-\31"\\]' - end + local f_string_esc_pat + if _VERSION == "Lua 5.1" then + -- use the cluttered pattern because lua 5.1 does not handle \0 in a pattern correctly + f_string_esc_pat = "[^ -!#-[%]^-\255]" + else + f_string_esc_pat = '[\0-\31"\\]' + end - local _ENV = nil + local _ENV = nil + local function newencoder() + local v, nullv + local i, builder, visited - local function newencoder() - local v, nullv - local i, builder, visited + local function f_tostring(v) + builder[i] = tostring(v) + i = i + 1 + end - local function f_tostring(v) - builder[i] = tostring(v) - i = i + 1 - end + local radixmark = match(tostring(0.5), "[^0-9]") + local delimmark = match(tostring(12345.12345), "[^0-9" .. radixmark .. "]") + if radixmark == "." then + radixmark = nil + end - local radixmark = match(tostring(0.5), '[^0-9]') - local delimmark = match(tostring(12345.12345), '[^0-9' .. radixmark .. ']') - if radixmark == '.' then - radixmark = nil - end + local radixordelim + if radixmark or delimmark then + radixordelim = true + if radixmark and find(radixmark, "%W") then + radixmark = "%" .. radixmark + end + if delimmark and find(delimmark, "%W") then + delimmark = "%" .. delimmark + end + end - local radixordelim - if radixmark or delimmark then - radixordelim = true - if radixmark and find(radixmark, '%W') then - radixmark = '%' .. radixmark - end - if delimmark and find(delimmark, '%W') then - delimmark = '%' .. delimmark - end - end + local f_number = function(n) + if tiny < n and n < huge then + local s = format("%.17g", n) + if radixordelim then + if delimmark then + s = gsub(s, delimmark, "") + end + if radixmark then + s = gsub(s, radixmark, ".") + end + end + builder[i] = s + i = i + 1 + return + end + error("invalid number") + end - local f_number = function(n) - if tiny < n and n < huge then - local s = format("%.17g", n) - if radixordelim then - if delimmark then - s = gsub(s, delimmark, '') - end - if radixmark then - s = gsub(s, radixmark, '.') - end - end - builder[i] = s - i = i + 1 - return - end - error('invalid number') - end + local doencode - local doencode + local f_string_subst = { + ['"'] = '\\"', + ["\\"] = "\\\\", + ["\b"] = "\\b", + ["\f"] = "\\f", + ["\n"] = "\\n", + ["\r"] = "\\r", + ["\t"] = "\\t", + __index = function(_, c) + return format("\\u00%02X", byte(c)) + end, + } + setmetatable(f_string_subst, f_string_subst) - local f_string_subst = { - ['"'] = '\\"', - ['\\'] = '\\\\', - ['\b'] = '\\b', - ['\f'] = '\\f', - ['\n'] = '\\n', - ['\r'] = '\\r', - ['\t'] = '\\t', - __index = function(_, c) - return format('\\u00%02X', byte(c)) - end - } - setmetatable(f_string_subst, f_string_subst) + local function f_string(s) + builder[i] = '"' + if find(s, f_string_esc_pat) then + s = gsub(s, f_string_esc_pat, f_string_subst) + end + builder[i + 1] = s + builder[i + 2] = '"' + i = i + 3 + end - local function f_string(s) - builder[i] = '"' - if find(s, f_string_esc_pat) then - s = gsub(s, f_string_esc_pat, f_string_subst) - end - builder[i + 1] = s - builder[i + 2] = '"' - i = i + 3 - end + local function f_table(o) + if visited[o] then + error("loop detected") + end + visited[o] = true - local function f_table(o) - if visited[o] then - error("loop detected") - end - visited[o] = true + local tmp = o[0] + if type(tmp) == "number" then -- arraylen available + builder[i] = "[" + i = i + 1 + for j = 1, tmp do + doencode(o[j]) + builder[i] = "," + i = i + 1 + end + if tmp > 0 then + i = i - 1 + end + builder[i] = "]" + else + tmp = o[1] + if tmp ~= nil then -- detected as array + builder[i] = "[" + i = i + 1 + local j = 2 + repeat + doencode(tmp) + tmp = o[j] + if tmp == nil then + break + end + j = j + 1 + builder[i] = "," + i = i + 1 + until false + builder[i] = "]" + else -- detected as object + builder[i] = "{" + i = i + 1 + local tmp = i + for k, v in pairs(o) do + if type(k) ~= "string" then + error("non-string key") + end + f_string(k) + builder[i] = ":" + i = i + 1 + doencode(v) + builder[i] = "," + i = i + 1 + end + if i > tmp then + i = i - 1 + end + builder[i] = "}" + end + end - local tmp = o[0] - if type(tmp) == 'number' then -- arraylen available - builder[i] = '[' - i = i + 1 - for j = 1, tmp do - doencode(o[j]) - builder[i] = ',' - i = i + 1 - end - if tmp > 0 then - i = i - 1 - end - builder[i] = ']' - else - tmp = o[1] - if tmp ~= nil then -- detected as array - builder[i] = '[' - i = i + 1 - local j = 2 - repeat - doencode(tmp) - tmp = o[j] - if tmp == nil then - break - end - j = j + 1 - builder[i] = ',' - i = i + 1 - until false - builder[i] = ']' - else -- detected as object - builder[i] = '{' - i = i + 1 - local tmp = i - for k, v in pairs(o) do - if type(k) ~= 'string' then - error("non-string key") - end - f_string(k) - builder[i] = ':' - i = i + 1 - doencode(v) - builder[i] = ',' - i = i + 1 - end - if i > tmp then - i = i - 1 - end - builder[i] = '}' - end - end + i = i + 1 + visited[o] = nil + end - i = i + 1 - visited[o] = nil - end + local dispatcher = { + boolean = f_tostring, + number = f_number, + string = f_string, + table = f_table, + __index = function() + error("invalid type value") + end, + } + setmetatable(dispatcher, dispatcher) - local dispatcher = { - boolean = f_tostring, - number = f_number, - string = f_string, - table = f_table, - __index = function() - error("invalid type value") - end - } - setmetatable(dispatcher, dispatcher) + function doencode(v) + if v == nullv then + builder[i] = "null" + i = i + 1 + return + end + return dispatcher[type(v)](v) + end - function doencode(v) - if v == nullv then - builder[i] = 'null' - i = i + 1 - return - end - return dispatcher[type(v)](v) - end + local function encode(v_, nullv_) + v, nullv = v_, nullv_ + i, builder, visited = 1, {}, {} - local function encode(v_, nullv_) - v, nullv = v_, nullv_ - i, builder, visited = 1, {}, {} + doencode(v) + return concat(builder) + end - doencode(v) - return concat(builder) - end + return encode + end - return encode - end - - return newencoder - end + return newencoder + end end do - local _ENV = _ENV - package.preload["lunajson.sax"] = function(...) - local arg = _G.arg; - local setmetatable, tonumber, tostring = - setmetatable, tonumber, tostring - local floor, inf = - math.floor, math.huge - local mininteger, tointeger = - math.mininteger or nil, math.tointeger or nil - local byte, char, find, gsub, match, sub = - string.byte, string.char, string.find, string.gsub, string.match, string.sub + local _ENV = _ENV + package.preload["lunajson.sax"] = function(...) + local arg = _G.arg + local setmetatable, tonumber, tostring = setmetatable, tonumber, tostring + local floor, inf = math.floor, math.huge + local mininteger, tointeger = math.mininteger or nil, math.tointeger or nil + local byte, char, find, gsub, match, sub = + string.byte, string.char, string.find, string.gsub, string.match, string.sub - local function _parse_error(pos, errmsg) - error("parse error at " .. pos .. ": " .. errmsg, 2) - end + local function _parse_error(pos, errmsg) + error("parse error at " .. pos .. ": " .. errmsg, 2) + end - local f_str_ctrl_pat - if _VERSION == "Lua 5.1" then - -- use the cluttered pattern because lua 5.1 does not handle \0 in a pattern correctly - f_str_ctrl_pat = '[^\32-\255]' - else - f_str_ctrl_pat = '[\0-\31]' - end + local f_str_ctrl_pat + if _VERSION == "Lua 5.1" then + -- use the cluttered pattern because lua 5.1 does not handle \0 in a pattern correctly + f_str_ctrl_pat = "[^\32-\255]" + else + f_str_ctrl_pat = "[\0-\31]" + end - local type, unpack = type, table.unpack or unpack - local open = io.open + local type, unpack = type, table.unpack or unpack + local open = io.open - local _ENV = nil + local _ENV = nil + local function nop() end - local function nop() end + local function newparser(src, saxtbl) + local json, jsonnxt, rec_depth + local jsonlen, pos, acc = 0, 1, 0 - local function newparser(src, saxtbl) - local json, jsonnxt, rec_depth - local jsonlen, pos, acc = 0, 1, 0 + -- `f` is the temporary for dispatcher[c] and + -- the dummy for the first return value of `find` + local dispatcher, f - -- `f` is the temporary for dispatcher[c] and - -- the dummy for the first return value of `find` - local dispatcher, f + -- initialize + if type(src) == "string" then + json = src + jsonlen = #json + jsonnxt = function() + json = "" + jsonlen = 0 + jsonnxt = nop + end + else + jsonnxt = function() + acc = acc + jsonlen + pos = 1 + repeat + json = src() + if not json then + json = "" + jsonlen = 0 + jsonnxt = nop + return + end + jsonlen = #json + until jsonlen > 0 + end + jsonnxt() + end - -- initialize - if type(src) == 'string' then - json = src - jsonlen = #json - jsonnxt = function() - json = '' - jsonlen = 0 - jsonnxt = nop - end - else - jsonnxt = function() - acc = acc + jsonlen - pos = 1 - repeat - json = src() - if not json then - json = '' - jsonlen = 0 - jsonnxt = nop - return - end - jsonlen = #json - until jsonlen > 0 - end - jsonnxt() - end + local sax_startobject = saxtbl.startobject or nop + local sax_key = saxtbl.key or nop + local sax_endobject = saxtbl.endobject or nop + local sax_startarray = saxtbl.startarray or nop + local sax_endarray = saxtbl.endarray or nop + local sax_string = saxtbl.string or nop + local sax_number = saxtbl.number or nop + local sax_boolean = saxtbl.boolean or nop + local sax_null = saxtbl.null or nop - local sax_startobject = saxtbl.startobject or nop - local sax_key = saxtbl.key or nop - local sax_endobject = saxtbl.endobject or nop - local sax_startarray = saxtbl.startarray or nop - local sax_endarray = saxtbl.endarray or nop - local sax_string = saxtbl.string or nop - local sax_number = saxtbl.number or nop - local sax_boolean = saxtbl.boolean or nop - local sax_null = saxtbl.null or nop - - --[[ + --[[ Helper --]] - local function tryc() - local c = byte(json, pos) - if not c then - jsonnxt() - c = byte(json, pos) - end - return c - end + local function tryc() + local c = byte(json, pos) + if not c then + jsonnxt() + c = byte(json, pos) + end + return c + end - local function parse_error(errmsg) - return _parse_error(acc + pos, errmsg) - end + local function parse_error(errmsg) + return _parse_error(acc + pos, errmsg) + end - local function tellc() - return tryc() or parse_error("unexpected termination") - end + local function tellc() + return tryc() or parse_error("unexpected termination") + end - local function spaces() -- skip spaces and prepare the next char - while true do - pos = match(json, '^[ \n\r\t]*()', pos) - if pos <= jsonlen then - return - end - if jsonlen == 0 then - parse_error("unexpected termination") - end - jsonnxt() - end - end + local function spaces() -- skip spaces and prepare the next char + while true do + pos = match(json, "^[ \n\r\t]*()", pos) + if pos <= jsonlen then + return + end + if jsonlen == 0 then + parse_error("unexpected termination") + end + jsonnxt() + end + end - --[[ + --[[ Invalid --]] - local function f_err() - parse_error('invalid value') - end + local function f_err() + parse_error("invalid value") + end - --[[ + --[[ Constants --]] - -- fallback slow constants parser - local function generic_constant(target, targetlen, ret, sax_f) - for i = 1, targetlen do - local c = tellc() - if byte(target, i) ~= c then - parse_error("invalid char") - end - pos = pos + 1 - end - return sax_f(ret) - end + -- fallback slow constants parser + local function generic_constant(target, targetlen, ret, sax_f) + for i = 1, targetlen do + local c = tellc() + if byte(target, i) ~= c then + parse_error("invalid char") + end + pos = pos + 1 + end + return sax_f(ret) + end - -- null - local function f_nul() - if sub(json, pos, pos + 2) == 'ull' then - pos = pos + 3 - return sax_null(nil) - end - return generic_constant('ull', 3, nil, sax_null) - end + -- null + local function f_nul() + if sub(json, pos, pos + 2) == "ull" then + pos = pos + 3 + return sax_null(nil) + end + return generic_constant("ull", 3, nil, sax_null) + end - -- false - local function f_fls() - if sub(json, pos, pos + 3) == 'alse' then - pos = pos + 4 - return sax_boolean(false) - end - return generic_constant('alse', 4, false, sax_boolean) - end + -- false + local function f_fls() + if sub(json, pos, pos + 3) == "alse" then + pos = pos + 4 + return sax_boolean(false) + end + return generic_constant("alse", 4, false, sax_boolean) + end - -- true - local function f_tru() - if sub(json, pos, pos + 2) == 'rue' then - pos = pos + 3 - return sax_boolean(true) - end - return generic_constant('rue', 3, true, sax_boolean) - end + -- true + local function f_tru() + if sub(json, pos, pos + 2) == "rue" then + pos = pos + 3 + return sax_boolean(true) + end + return generic_constant("rue", 3, true, sax_boolean) + end - --[[ + --[[ Numbers Conceptually, the longest prefix that matches to `[-+.0-9A-Za-z]+` (in regexp) is captured as a number and its conformance to the JSON spec is checked. --]] - -- deal with non-standard locales - local radixmark = match(tostring(0.5), '[^0-9]') - local fixedtonumber = tonumber - if radixmark ~= '.' then - if find(radixmark, '%W') then - radixmark = '%' .. radixmark - end - fixedtonumber = function(s) - return tonumber(gsub(s, '.', radixmark)) - end - end + -- deal with non-standard locales + local radixmark = match(tostring(0.5), "[^0-9]") + local fixedtonumber = tonumber + if radixmark ~= "." then + if find(radixmark, "%W") then + radixmark = "%" .. radixmark + end + fixedtonumber = function(s) + return tonumber(gsub(s, ".", radixmark)) + end + end - local function number_error() - return parse_error('invalid number') - end + local function number_error() + return parse_error("invalid number") + end - -- fallback slow parser - local function generic_number(mns) - local buf = {} - local i = 1 - local is_int = true + -- fallback slow parser + local function generic_number(mns) + local buf = {} + local i = 1 + local is_int = true - local c = byte(json, pos) - pos = pos + 1 + local c = byte(json, pos) + pos = pos + 1 - local function nxt() - buf[i] = c - i = i + 1 - c = tryc() - pos = pos + 1 - end + local function nxt() + buf[i] = c + i = i + 1 + c = tryc() + pos = pos + 1 + end - if c == 0x30 then - nxt() - if c and 0x30 <= c and c < 0x3A then - number_error() - end - else - repeat nxt() until not (c and 0x30 <= c and c < 0x3A) - end - if c == 0x2E then - is_int = false - nxt() - if not (c and 0x30 <= c and c < 0x3A) then - number_error() - end - repeat nxt() until not (c and 0x30 <= c and c < 0x3A) - end - if c == 0x45 or c == 0x65 then - is_int = false - nxt() - if c == 0x2B or c == 0x2D then - nxt() - end - if not (c and 0x30 <= c and c < 0x3A) then - number_error() - end - repeat nxt() until not (c and 0x30 <= c and c < 0x3A) - end - if c and (0x41 <= c and c <= 0x5B or - 0x61 <= c and c <= 0x7B or - c == 0x2B or c == 0x2D or c == 0x2E) then - number_error() - end - pos = pos - 1 + if c == 0x30 then + nxt() + if c and 0x30 <= c and c < 0x3A then + number_error() + end + else + repeat + nxt() + until not (c and 0x30 <= c and c < 0x3A) + end + if c == 0x2E then + is_int = false + nxt() + if not (c and 0x30 <= c and c < 0x3A) then + number_error() + end + repeat + nxt() + until not (c and 0x30 <= c and c < 0x3A) + end + if c == 0x45 or c == 0x65 then + is_int = false + nxt() + if c == 0x2B or c == 0x2D then + nxt() + end + if not (c and 0x30 <= c and c < 0x3A) then + number_error() + end + repeat + nxt() + until not (c and 0x30 <= c and c < 0x3A) + end + if + c + and (0x41 <= c and c <= 0x5B or 0x61 <= c and c <= 0x7B or c == 0x2B or c == 0x2D or c == 0x2E) + then + number_error() + end + pos = pos - 1 - local num = char(unpack(buf)) - num = fixedtonumber(num) - if mns then - num = -num - if num == mininteger and is_int then - num = mininteger - end - end - return sax_number(num) - end + local num = char(unpack(buf)) + num = fixedtonumber(num) + if mns then + num = -num + if num == mininteger and is_int then + num = mininteger + end + end + return sax_number(num) + end - -- `0(\.[0-9]*)?([eE][+-]?[0-9]*)?` - local function f_zro(mns) - local num, c = match(json, '^(%.?[0-9]*)([-+.A-Za-z]?)', pos) -- skipping 0 + -- `0(\.[0-9]*)?([eE][+-]?[0-9]*)?` + local function f_zro(mns) + local num, c = match(json, "^(%.?[0-9]*)([-+.A-Za-z]?)", pos) -- skipping 0 - if num == '' then - if pos > jsonlen then - pos = pos - 1 - return generic_number(mns) - end - if c == '' then - if mns then - return sax_number(-0.0) - end - return sax_number(0) - end + if num == "" then + if pos > jsonlen then + pos = pos - 1 + return generic_number(mns) + end + if c == "" then + if mns then + return sax_number(-0.0) + end + return sax_number(0) + end - if c == 'e' or c == 'E' then - num, c = match(json, '^([^eE]*[eE][-+]?[0-9]+)([-+.A-Za-z]?)', pos) - if c == '' then - pos = pos + #num - if pos > jsonlen then - pos = pos - #num - 1 - return generic_number(mns) - end - if mns then - return sax_number(-0.0) - end - return sax_number(0.0) - end - end - pos = pos - 1 - return generic_number(mns) - end + if c == "e" or c == "E" then + num, c = match(json, "^([^eE]*[eE][-+]?[0-9]+)([-+.A-Za-z]?)", pos) + if c == "" then + pos = pos + #num + if pos > jsonlen then + pos = pos - #num - 1 + return generic_number(mns) + end + if mns then + return sax_number(-0.0) + end + return sax_number(0.0) + end + end + pos = pos - 1 + return generic_number(mns) + end - if byte(num) ~= 0x2E or byte(num, -1) == 0x2E then - pos = pos - 1 - return generic_number(mns) - end + if byte(num) ~= 0x2E or byte(num, -1) == 0x2E then + pos = pos - 1 + return generic_number(mns) + end - if c ~= '' then - if c == 'e' or c == 'E' then - num, c = match(json, '^([^eE]*[eE][-+]?[0-9]+)([-+.A-Za-z]?)', pos) - end - if c ~= '' then - pos = pos - 1 - return generic_number(mns) - end - end + if c ~= "" then + if c == "e" or c == "E" then + num, c = match(json, "^([^eE]*[eE][-+]?[0-9]+)([-+.A-Za-z]?)", pos) + end + if c ~= "" then + pos = pos - 1 + return generic_number(mns) + end + end - pos = pos + #num - if pos > jsonlen then - pos = pos - #num - 1 - return generic_number(mns) - end - c = fixedtonumber(num) + pos = pos + #num + if pos > jsonlen then + pos = pos - #num - 1 + return generic_number(mns) + end + c = fixedtonumber(num) - if mns then - c = -c - end - return sax_number(c) - end + if mns then + c = -c + end + return sax_number(c) + end - -- `[1-9][0-9]*(\.[0-9]*)?([eE][+-]?[0-9]*)?` - local function f_num(mns) - pos = pos - 1 - local num, c = match(json, '^([0-9]+%.?[0-9]*)([-+.A-Za-z]?)', pos) - if byte(num, -1) == 0x2E then -- error if ended with period - return generic_number(mns) - end + -- `[1-9][0-9]*(\.[0-9]*)?([eE][+-]?[0-9]*)?` + local function f_num(mns) + pos = pos - 1 + local num, c = match(json, "^([0-9]+%.?[0-9]*)([-+.A-Za-z]?)", pos) + if byte(num, -1) == 0x2E then -- error if ended with period + return generic_number(mns) + end - if c ~= '' then - if c ~= 'e' and c ~= 'E' then - return generic_number(mns) - end - num, c = match(json, '^([^eE]*[eE][-+]?[0-9]+)([-+.A-Za-z]?)', pos) - if not num or c ~= '' then - return generic_number(mns) - end - end + if c ~= "" then + if c ~= "e" and c ~= "E" then + return generic_number(mns) + end + num, c = match(json, "^([^eE]*[eE][-+]?[0-9]+)([-+.A-Za-z]?)", pos) + if not num or c ~= "" then + return generic_number(mns) + end + end - pos = pos + #num - if pos > jsonlen then - pos = pos - #num - return generic_number(mns) - end - c = fixedtonumber(num) + pos = pos + #num + if pos > jsonlen then + pos = pos - #num + return generic_number(mns) + end + c = fixedtonumber(num) - if mns then - c = -c - if c == mininteger and not find(num, '[^0-9]') then - c = mininteger - end - end - return sax_number(c) - end + if mns then + c = -c + if c == mininteger and not find(num, "[^0-9]") then + c = mininteger + end + end + return sax_number(c) + end - -- skip minus sign - local function f_mns() - local c = byte(json, pos) or tellc() - if c then - pos = pos + 1 - if c > 0x30 then - if c < 0x3A then - return f_num(true) - end - else - if c > 0x2F then - return f_zro(true) - end - end - end - parse_error("invalid number") - end + -- skip minus sign + local function f_mns() + local c = byte(json, pos) or tellc() + if c then + pos = pos + 1 + if c > 0x30 then + if c < 0x3A then + return f_num(true) + end + else + if c > 0x2F then + return f_zro(true) + end + end + end + parse_error("invalid number") + end - --[[ + --[[ Strings --]] - local f_str_hextbl = { - 0x0, - 0x1, - 0x2, - 0x3, - 0x4, - 0x5, - 0x6, - 0x7, - 0x8, - 0x9, - inf, - inf, - inf, - inf, - inf, - inf, - inf, - 0xA, - 0xB, - 0xC, - 0xD, - 0xE, - 0xF, - inf, - inf, - inf, - inf, - inf, - inf, - inf, - inf, - inf, - inf, - inf, - inf, - inf, - inf, - inf, - inf, - inf, - inf, - inf, - inf, - inf, - inf, - inf, - inf, - inf, - inf, - 0xA, - 0xB, - 0xC, - 0xD, - 0xE, - 0xF, - __index = function() - return inf - end - } - setmetatable(f_str_hextbl, f_str_hextbl) + local f_str_hextbl = { + 0x0, + 0x1, + 0x2, + 0x3, + 0x4, + 0x5, + 0x6, + 0x7, + 0x8, + 0x9, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + 0xA, + 0xB, + 0xC, + 0xD, + 0xE, + 0xF, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + inf, + 0xA, + 0xB, + 0xC, + 0xD, + 0xE, + 0xF, + __index = function() + return inf + end, + } + setmetatable(f_str_hextbl, f_str_hextbl) - local f_str_escapetbl = { - ['"'] = '"', - ['\\'] = '\\', - ['/'] = '/', - ['b'] = '\b', - ['f'] = '\f', - ['n'] = '\n', - ['r'] = '\r', - ['t'] = '\t', - __index = function() - parse_error("invalid escape sequence") - end - } - setmetatable(f_str_escapetbl, f_str_escapetbl) + local f_str_escapetbl = { + ['"'] = '"', + ["\\"] = "\\", + ["/"] = "/", + ["b"] = "\b", + ["f"] = "\f", + ["n"] = "\n", + ["r"] = "\r", + ["t"] = "\t", + __index = function() + parse_error("invalid escape sequence") + end, + } + setmetatable(f_str_escapetbl, f_str_escapetbl) - local function surrogate_first_error() - return parse_error("1st surrogate pair byte not continued by 2nd") - end + local function surrogate_first_error() + return parse_error("1st surrogate pair byte not continued by 2nd") + end - local f_str_surrogate_prev = 0 - local function f_str_subst(ch, ucode) - if ch == 'u' then - local c1, c2, c3, c4, rest = byte(ucode, 1, 5) - ucode = f_str_hextbl[c1 - 47] * 0x1000 + - f_str_hextbl[c2 - 47] * 0x100 + - f_str_hextbl[c3 - 47] * 0x10 + - f_str_hextbl[c4 - 47] - if ucode ~= inf then - if ucode < 0x80 then -- 1byte - if rest then - return char(ucode, rest) - end - return char(ucode) - elseif ucode < 0x800 then -- 2bytes - c1 = floor(ucode / 0x40) - c2 = ucode - c1 * 0x40 - c1 = c1 + 0xC0 - c2 = c2 + 0x80 - if rest then - return char(c1, c2, rest) - end - return char(c1, c2) - elseif ucode < 0xD800 or 0xE000 <= ucode then -- 3bytes - c1 = floor(ucode / 0x1000) - ucode = ucode - c1 * 0x1000 - c2 = floor(ucode / 0x40) - c3 = ucode - c2 * 0x40 - c1 = c1 + 0xE0 - c2 = c2 + 0x80 - c3 = c3 + 0x80 - if rest then - return char(c1, c2, c3, rest) - end - return char(c1, c2, c3) - elseif 0xD800 <= ucode and ucode < 0xDC00 then -- surrogate pair 1st - if f_str_surrogate_prev == 0 then - f_str_surrogate_prev = ucode - if not rest then - return '' - end - surrogate_first_error() - end - f_str_surrogate_prev = 0 - surrogate_first_error() - else -- surrogate pair 2nd - if f_str_surrogate_prev ~= 0 then - ucode = 0x10000 + - (f_str_surrogate_prev - 0xD800) * 0x400 + - (ucode - 0xDC00) - f_str_surrogate_prev = 0 - c1 = floor(ucode / 0x40000) - ucode = ucode - c1 * 0x40000 - c2 = floor(ucode / 0x1000) - ucode = ucode - c2 * 0x1000 - c3 = floor(ucode / 0x40) - c4 = ucode - c3 * 0x40 - c1 = c1 + 0xF0 - c2 = c2 + 0x80 - c3 = c3 + 0x80 - c4 = c4 + 0x80 - if rest then - return char(c1, c2, c3, c4, rest) - end - return char(c1, c2, c3, c4) - end - parse_error("2nd surrogate pair byte appeared without 1st") - end - end - parse_error("invalid unicode codepoint literal") - end - if f_str_surrogate_prev ~= 0 then - f_str_surrogate_prev = 0 - surrogate_first_error() - end - return f_str_escapetbl[ch] .. ucode - end + local f_str_surrogate_prev = 0 + local function f_str_subst(ch, ucode) + if ch == "u" then + local c1, c2, c3, c4, rest = byte(ucode, 1, 5) + ucode = f_str_hextbl[c1 - 47] * 0x1000 + + f_str_hextbl[c2 - 47] * 0x100 + + f_str_hextbl[c3 - 47] * 0x10 + + f_str_hextbl[c4 - 47] + if ucode ~= inf then + if ucode < 0x80 then -- 1byte + if rest then + return char(ucode, rest) + end + return char(ucode) + elseif ucode < 0x800 then -- 2bytes + c1 = floor(ucode / 0x40) + c2 = ucode - c1 * 0x40 + c1 = c1 + 0xC0 + c2 = c2 + 0x80 + if rest then + return char(c1, c2, rest) + end + return char(c1, c2) + elseif ucode < 0xD800 or 0xE000 <= ucode then -- 3bytes + c1 = floor(ucode / 0x1000) + ucode = ucode - c1 * 0x1000 + c2 = floor(ucode / 0x40) + c3 = ucode - c2 * 0x40 + c1 = c1 + 0xE0 + c2 = c2 + 0x80 + c3 = c3 + 0x80 + if rest then + return char(c1, c2, c3, rest) + end + return char(c1, c2, c3) + elseif 0xD800 <= ucode and ucode < 0xDC00 then -- surrogate pair 1st + if f_str_surrogate_prev == 0 then + f_str_surrogate_prev = ucode + if not rest then + return "" + end + surrogate_first_error() + end + f_str_surrogate_prev = 0 + surrogate_first_error() + else -- surrogate pair 2nd + if f_str_surrogate_prev ~= 0 then + ucode = 0x10000 + (f_str_surrogate_prev - 0xD800) * 0x400 + (ucode - 0xDC00) + f_str_surrogate_prev = 0 + c1 = floor(ucode / 0x40000) + ucode = ucode - c1 * 0x40000 + c2 = floor(ucode / 0x1000) + ucode = ucode - c2 * 0x1000 + c3 = floor(ucode / 0x40) + c4 = ucode - c3 * 0x40 + c1 = c1 + 0xF0 + c2 = c2 + 0x80 + c3 = c3 + 0x80 + c4 = c4 + 0x80 + if rest then + return char(c1, c2, c3, c4, rest) + end + return char(c1, c2, c3, c4) + end + parse_error("2nd surrogate pair byte appeared without 1st") + end + end + parse_error("invalid unicode codepoint literal") + end + if f_str_surrogate_prev ~= 0 then + f_str_surrogate_prev = 0 + surrogate_first_error() + end + return f_str_escapetbl[ch] .. ucode + end - local function f_str(iskey) - local pos2 = pos - local newpos - local str = '' - local bs - while true do - while true do -- search '\' or '"' - newpos = find(json, '[\\"]', pos2) - if newpos then - break - end - str = str .. sub(json, pos, jsonlen) - if pos2 == jsonlen + 2 then - pos2 = 2 - else - pos2 = 1 - end - jsonnxt() - if jsonlen == 0 then - parse_error("unterminated string") - end - end - if byte(json, newpos) == 0x22 then -- break if '"' - break - end - pos2 = newpos + 2 -- skip '\' - bs = true -- mark the existence of a backslash - end - str = str .. sub(json, pos, newpos - 1) - pos = newpos + 1 + local function f_str(iskey) + local pos2 = pos + local newpos + local str = "" + local bs + while true do + while true do -- search '\' or '"' + newpos = find(json, '[\\"]', pos2) + if newpos then + break + end + str = str .. sub(json, pos, jsonlen) + if pos2 == jsonlen + 2 then + pos2 = 2 + else + pos2 = 1 + end + jsonnxt() + if jsonlen == 0 then + parse_error("unterminated string") + end + end + if byte(json, newpos) == 0x22 then -- break if '"' + break + end + pos2 = newpos + 2 -- skip '\' + bs = true -- mark the existence of a backslash + end + str = str .. sub(json, pos, newpos - 1) + pos = newpos + 1 - if find(str, f_str_ctrl_pat) then - parse_error("unescaped control string") - end - if bs then -- a backslash exists - -- We need to grab 4 characters after the escape char, - -- for encoding unicode codepoint to UTF-8. - -- As we need to ensure that every first surrogate pair byte is - -- immediately followed by second one, we grab upto 5 characters and - -- check the last for this purpose. - str = gsub(str, '\\(.)([^\\]?[^\\]?[^\\]?[^\\]?[^\\]?)', f_str_subst) - if f_str_surrogate_prev ~= 0 then - f_str_surrogate_prev = 0 - parse_error("1st surrogate pair byte not continued by 2nd") - end - end + if find(str, f_str_ctrl_pat) then + parse_error("unescaped control string") + end + if bs then -- a backslash exists + -- We need to grab 4 characters after the escape char, + -- for encoding unicode codepoint to UTF-8. + -- As we need to ensure that every first surrogate pair byte is + -- immediately followed by second one, we grab upto 5 characters and + -- check the last for this purpose. + str = gsub(str, "\\(.)([^\\]?[^\\]?[^\\]?[^\\]?[^\\]?)", f_str_subst) + if f_str_surrogate_prev ~= 0 then + f_str_surrogate_prev = 0 + parse_error("1st surrogate pair byte not continued by 2nd") + end + end - if iskey then - return sax_key(str) - end - return sax_string(str) - end + if iskey then + return sax_key(str) + end + return sax_string(str) + end - --[[ + --[[ Arrays, Objects --]] - -- arrays - local function f_ary() - rec_depth = rec_depth + 1 - if rec_depth > 1000 then - parse_error('too deeply nested json (> 1000)') - end - sax_startarray() + -- arrays + local function f_ary() + rec_depth = rec_depth + 1 + if rec_depth > 1000 then + parse_error("too deeply nested json (> 1000)") + end + sax_startarray() - spaces() - if byte(json, pos) == 0x5D then -- check closing bracket ']' which means the array empty - pos = pos + 1 - else - local newpos - while true do - f = dispatcher[byte(json, pos)] -- parse value - pos = pos + 1 - f() - newpos = match(json, '^[ \n\r\t]*,[ \n\r\t]*()', pos) -- check comma - if newpos then - pos = newpos - else - newpos = match(json, '^[ \n\r\t]*%]()', pos) -- check closing bracket - if newpos then - pos = newpos - break - end - spaces() -- since the current chunk can be ended, skip spaces toward following chunks - local c = byte(json, pos) - pos = pos + 1 - if c == 0x2C then -- check comma again - spaces() - elseif c == 0x5D then -- check closing bracket again - break - else - parse_error("no closing bracket of an array") - end - end - if pos > jsonlen then - spaces() - end - end - end + spaces() + if byte(json, pos) == 0x5D then -- check closing bracket ']' which means the array empty + pos = pos + 1 + else + local newpos + while true do + f = dispatcher[byte(json, pos)] -- parse value + pos = pos + 1 + f() + newpos = match(json, "^[ \n\r\t]*,[ \n\r\t]*()", pos) -- check comma + if newpos then + pos = newpos + else + newpos = match(json, "^[ \n\r\t]*%]()", pos) -- check closing bracket + if newpos then + pos = newpos + break + end + spaces() -- since the current chunk can be ended, skip spaces toward following chunks + local c = byte(json, pos) + pos = pos + 1 + if c == 0x2C then -- check comma again + spaces() + elseif c == 0x5D then -- check closing bracket again + break + else + parse_error("no closing bracket of an array") + end + end + if pos > jsonlen then + spaces() + end + end + end - rec_depth = rec_depth - 1 - return sax_endarray() - end + rec_depth = rec_depth - 1 + return sax_endarray() + end - -- objects - local function f_obj() - rec_depth = rec_depth + 1 - if rec_depth > 1000 then - parse_error('too deeply nested json (> 1000)') - end - sax_startobject() + -- objects + local function f_obj() + rec_depth = rec_depth + 1 + if rec_depth > 1000 then + parse_error("too deeply nested json (> 1000)") + end + sax_startobject() - spaces() - if byte(json, pos) == 0x7D then -- check closing bracket '}' which means the object empty - pos = pos + 1 - else - local newpos - while true do - if byte(json, pos) ~= 0x22 then - parse_error("not key") - end - pos = pos + 1 - f_str(true) -- parse key - newpos = match(json, '^[ \n\r\t]*:[ \n\r\t]*()', pos) -- check colon - if newpos then - pos = newpos - else - spaces() -- read spaces through chunks - if byte(json, pos) ~= 0x3A then -- check colon again - parse_error("no colon after a key") - end - pos = pos + 1 - spaces() - end - if pos > jsonlen then - spaces() - end - f = dispatcher[byte(json, pos)] - pos = pos + 1 - f() -- parse value - newpos = match(json, '^[ \n\r\t]*,[ \n\r\t]*()', pos) -- check comma - if newpos then - pos = newpos - else - newpos = match(json, '^[ \n\r\t]*}()', pos) -- check closing bracket - if newpos then - pos = newpos - break - end - spaces() -- read spaces through chunks - local c = byte(json, pos) - pos = pos + 1 - if c == 0x2C then -- check comma again - spaces() - elseif c == 0x7D then -- check closing bracket again - break - else - parse_error("no closing bracket of an object") - end - end - if pos > jsonlen then - spaces() - end - end - end + spaces() + if byte(json, pos) == 0x7D then -- check closing bracket '}' which means the object empty + pos = pos + 1 + else + local newpos + while true do + if byte(json, pos) ~= 0x22 then + parse_error("not key") + end + pos = pos + 1 + f_str(true) -- parse key + newpos = match(json, "^[ \n\r\t]*:[ \n\r\t]*()", pos) -- check colon + if newpos then + pos = newpos + else + spaces() -- read spaces through chunks + if byte(json, pos) ~= 0x3A then -- check colon again + parse_error("no colon after a key") + end + pos = pos + 1 + spaces() + end + if pos > jsonlen then + spaces() + end + f = dispatcher[byte(json, pos)] + pos = pos + 1 + f() -- parse value + newpos = match(json, "^[ \n\r\t]*,[ \n\r\t]*()", pos) -- check comma + if newpos then + pos = newpos + else + newpos = match(json, "^[ \n\r\t]*}()", pos) -- check closing bracket + if newpos then + pos = newpos + break + end + spaces() -- read spaces through chunks + local c = byte(json, pos) + pos = pos + 1 + if c == 0x2C then -- check comma again + spaces() + elseif c == 0x7D then -- check closing bracket again + break + else + parse_error("no closing bracket of an object") + end + end + if pos > jsonlen then + spaces() + end + end + end - rec_depth = rec_depth - 1 - return sax_endobject() - end + rec_depth = rec_depth - 1 + return sax_endobject() + end - --[[ + --[[ The jump table to dispatch a parser for a value, indexed by the code of the value's first char. Key should be non-nil. --]] - dispatcher = { - [0] = - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_str, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_mns, - f_err, - f_err, - f_zro, - f_num, - f_num, - f_num, - f_num, - f_num, - f_num, - f_num, - f_num, - f_num, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_ary, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_fls, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_nul, - f_err, - f_err, - f_err, - f_err, - f_err, - f_tru, - f_err, - f_err, - f_err, - f_err, - f_err, - f_err, - f_obj, - f_err, - f_err, - f_err, - f_err, - } + dispatcher = { + [0] = f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_str, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_mns, + f_err, + f_err, + f_zro, + f_num, + f_num, + f_num, + f_num, + f_num, + f_num, + f_num, + f_num, + f_num, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_ary, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_fls, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_nul, + f_err, + f_err, + f_err, + f_err, + f_err, + f_tru, + f_err, + f_err, + f_err, + f_err, + f_err, + f_err, + f_obj, + f_err, + f_err, + f_err, + f_err, + } - --[[ + --[[ public funcitons --]] - local function run() - rec_depth = 0 - spaces() - f = dispatcher[byte(json, pos)] - pos = pos + 1 - f() - end + local function run() + rec_depth = 0 + spaces() + f = dispatcher[byte(json, pos)] + pos = pos + 1 + f() + end - local function read(n) - if n < 0 then - error("the argument must be non-negative") - end - local pos2 = (pos - 1) + n - local str = sub(json, pos, pos2) - while pos2 > jsonlen and jsonlen ~= 0 do - jsonnxt() - pos2 = pos2 - (jsonlen - (pos - 1)) - str = str .. sub(json, pos, pos2) - end - if jsonlen ~= 0 then - pos = pos2 + 1 - end - return str - end + local function read(n) + if n < 0 then + error("the argument must be non-negative") + end + local pos2 = (pos - 1) + n + local str = sub(json, pos, pos2) + while pos2 > jsonlen and jsonlen ~= 0 do + jsonnxt() + pos2 = pos2 - (jsonlen - (pos - 1)) + str = str .. sub(json, pos, pos2) + end + if jsonlen ~= 0 then + pos = pos2 + 1 + end + return str + end - local function tellpos() - return acc + pos - end + local function tellpos() + return acc + pos + end - return { - run = run, - tryc = tryc, - read = read, - tellpos = tellpos, - } - end + return { + run = run, + tryc = tryc, + read = read, + tellpos = tellpos, + } + end - local function newfileparser(fn, saxtbl) - local fp = open(fn) - local function gen() - local s - if fp then - s = fp:read(8192) - if not s then - fp:close() - fp = nil - end - end - return s - end - return newparser(gen, saxtbl) - end + local function newfileparser(fn, saxtbl) + local fp = open(fn) + local function gen() + local s + if fp then + s = fp:read(8192) + if not s then + fp:close() + fp = nil + end + end + return s + end + return newparser(gen, saxtbl) + end - return { - newparser = newparser, - newfileparser = newfileparser - } - end + return { + newparser = newparser, + newfileparser = newfileparser, + } + end end do - local _ENV = _ENV - package.preload["utils"] = function(...) - local arg = _G.arg; - local module = {} + local _ENV = _ENV + package.preload["utils"] = function(...) + local arg = _G.arg + local module = {} - function module.tablelength(T) - local count = 0 - for _ in pairs(T) do count = count + 1 end - return count - end + function module.tablelength(T) + local count = 0 + for _ in pairs(T) do + count = count + 1 + end + return count + end - module.id_number = 0 - function module.next_id(length) - module.id_number = module.id_number + 1 - return string.format(string.format('%%0%dd', length), module.id_number) - end + module.id_number = 0 + function module.next_id(length) + module.id_number = module.id_number + 1 + return string.format(string.format("%%0%dd", length), module.id_number) + end - local function url_encode_char(chr) - return string.format("%%%X", string.byte(chr)) - end + local function url_encode_char(chr) + return string.format("%%%X", string.byte(chr)) + end - function module.urlencode(str) - local output, t = string.gsub(str, "[^%w]", url_encode_char) - return output - end + function module.urlencode(str) + local output, t = string.gsub(str, "[^%w]", url_encode_char) + return output + end - function module.xmlescape(str) - return string.gsub(str, '["<>&]', { ['&'] = '&', ['<'] = '<', ['>'] = '>', ['"'] = '"' }) - end + function module.xmlescape(str) + return string.gsub(str, "[<>&]", { ["&"] = "&", ["<"] = "<", [">"] = ">" }) + end - function module.trim(s) - return (s:gsub("^%s*(.-)%s*$", "%1")) - end + function module.xmlattr(str) + return string.gsub(str, '["<>&]', { ["&"] = "&", ["<"] = "<", [">"] = ">", ['"'] = """ }) + end - function module.deepcopy(orig) - local orig_type = type(orig) - local copy - if orig_type == 'table' then - copy = {} - for orig_key, orig_value in next, orig, nil do - copy[module.deepcopy(orig_key)] = module.deepcopy(orig_value) - end - setmetatable(copy, module.deepcopy(getmetatable(orig))) - else -- number, string, boolean, etc - copy = orig - end - return copy - end + function module.trim(s) + return s:gsub("^%s*(.-)%s*$", "%1") + end - function module.trim(s) - if s == nil then - return s - end - return (s:gsub("^%s*(.-)%s*$", "%1")) - end + function module.deepcopy(orig) + local orig_type = type(orig) + local copy + if orig_type == "table" then + copy = {} + for orig_key, orig_value in next, orig, nil do + copy[module.deepcopy(orig_key)] = module.deepcopy(orig_value) + end + setmetatable(copy, module.deepcopy(getmetatable(orig))) + else -- number, string, boolean, etc + copy = orig + end + return copy + end - return module - end + function module.trim(s) + if s == nil then + return s + end + return (s:gsub("^%s*(.-)%s*$", "%1")) + end + + return module + end end do - local _ENV = _ENV - package.preload["zotero"] = function(...) - local arg = _G.arg; - local module = {} + local _ENV = _ENV + package.preload["zotero"] = function(...) + local arg = _G.arg + local module = {} - local utils = require('utils') - local json = require('lunajson') - -- local pl = require('pl.pretty') -- for pl.pretty.dump + local utils = require("utils") + local json = require("lunajson") + -- local pl = require('pl.pretty') -- for pl.pretty.dump - local state = { - reported = {}, - } + local state = { + reported = {}, + } - module.citekeys = {} + module.citekeys = {} - function module.authors(csl_or_item) - local authors = {} - local author + function module.authors(csl_or_item) + local authors = {} + local author - if csl_or_item.author ~= nil then - for _, author in ipairs(csl_or_item.author) do - if author.literal ~= nil then - table.insert(authors, author.literal) - elseif author.family ~= nil then - table.insert(authors, author.family) - end - end - elseif csl_or_item.creators ~= nil then - for _, author in ipairs(csl_or_item.creators) do - if author.name ~= nil then - table.insert(authors, author.name) - elseif author.lastName ~= nil then - table.insert(authors, author.lastName) - end - end - elseif csl_or_item.reporter ~= nil then - table.insert(authors, csl_or_item.reporter) - end + if csl_or_item.author ~= nil then + for _, author in ipairs(csl_or_item.author) do + if author.literal ~= nil then + table.insert(authors, author.literal) + elseif author.family ~= nil then + table.insert(authors, author.family) + end + end + elseif csl_or_item.creators ~= nil then + for _, author in ipairs(csl_or_item.creators) do + if author.name ~= nil then + table.insert(authors, author.name) + elseif author.lastName ~= nil then + table.insert(authors, author.lastName) + end + end + elseif csl_or_item.reporter ~= nil then + table.insert(authors, csl_or_item.reporter) + end - if utils.tablelength(authors) == 0 then - return nil - end + if utils.tablelength(authors) == 0 then + return nil + end - local last = table.remove(authors) - if utils.tablelength(authors) == 0 then - return last - end - authors = table.concat(authors, ', ') - return table.concat({ authors, last }, ' and ') - end + local last = table.remove(authors) + if utils.tablelength(authors) == 0 then + return last + end + authors = table.concat(authors, ", ") + return table.concat({ authors, last }, " and ") + end - local function load_items() - if state.fetched ~= nil then - return - end + local function load_items() + if state.fetched ~= nil then + return + end - state.fetched = { - items = {}, - errors = {}, - } + state.fetched = { + items = {}, + errors = {}, + } - local citekeys = {} - for k, _ in pairs(module.citekeys) do - table.insert(citekeys, k) - end + local citekeys = {} + for k, _ in pairs(module.citekeys) do + table.insert(citekeys, k) + end - if utils.tablelength(citekeys) == 0 then - return - end + if utils.tablelength(citekeys) == 0 then + return + end - citekeys = table.concat(citekeys, ',') - local url = module.url .. utils.urlencode(citekeys) - local mt, contents = pandoc.mediabag.fetch(url, '.') - local ok, fetched = pcall(json.decode, contents) - if not ok then - print('could not fetch Zotero items: ' .. contents) - return - end - state.fetched = fetched - end + citekeys = table.concat(citekeys, ",") + local url = module.url .. utils.urlencode(citekeys) + local mt, contents = pandoc.mediabag.fetch(url, ".") + local ok, fetched = pcall(json.decode, contents) + if not ok then + print("could not fetch Zotero items: " .. contents) + return + end + state.fetched = fetched + end - function module.get(citekey) - load_items() + function module.get(citekey) + load_items() - if state.reported[citekey] ~= nil then - return nil - end + if state.reported[citekey] ~= nil then + return nil + end - if state.fetched.errors[citekey] ~= nil then - state.reported[citekey] = true - print('@' .. citekey .. ': ' .. state.fetched.errors[citekey]) - return nil - end + if state.fetched.errors[citekey] ~= nil then + state.reported[citekey] = true + print("@" .. citekey .. ": " .. state.fetched.errors[citekey]) + return nil + end - if state.fetched.items[citekey] == nil then - state.reported[citekey] = true - print('@' .. citekey .. ' not in Zotero') - return nil - end + if state.fetched.items[citekey] == nil then + state.reported[citekey] = true + print("@" .. citekey .. " not in Zotero") + return nil + end - return state.fetched.items[citekey], state.fetched.zotero[citekey] - end + return state.fetched.items[citekey], state.fetched.zotero[citekey] + end - return module - end + return module + end end -- @@ -2054,394 +2090,424 @@ end -- if lpeg == nil then - print('upgrade pandoc to version 2.16.2 or later') - os.exit() + print("upgrade pandoc to version 2.16.2 or later") + os.exit() end -local json = require('lunajson') -local csl_locator = require('locator') -local utils = require('utils') -local zotero = require('zotero') +local json = require("lunajson") +local csl_locator = require("locator") +local utils = require("utils") +local zotero = require("zotero") -- -- global state -- -- local config = { - client = 'zotero', - scannable_cite = false, - csl_style = 'apa7', - format = nil, -- more to document than anything else -- Lua does not store nils in tables - transferable = false + client = "zotero", + scannable_cite = false, + csl_style = "apa7", + format = nil, -- more to document than anything else -- Lua does not store nils in tables + transferable = false, } -- -- -- bibliography marker generator -- -- -- function zotero_docpreferences_odt(csl_style) - return string.format( - '' - .. ' ' - .. '