dotfiles/multimedia/.config/mpv/scripts/uosc/lib/text.lua
2024-04-20 09:27:09 +02:00

515 lines
17 KiB
Lua
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

-- https://en.wikipedia.org/wiki/Unicode_block
---@alias CodePointRange {[1]: integer; [2]: integer}
---@type CodePointRange[]
local zero_width_blocks = {
{0x0000, 0x001F}, -- C0
{0x007F, 0x009F}, -- Delete + C1
{0x034F, 0x034F}, -- combining grapheme joiner
{0x061C, 0x061C}, -- Arabic Letter Strong
{0x200B, 0x200F}, -- {zero-width space, zero-width non-joiner, zero-width joiner, left-to-right mark, right-to-left mark}
{0x2028, 0x202E}, -- {line separator, paragraph separator, Left-to-Right Embedding, Right-to-Left Embedding, Pop Directional Format, Left-to-Right Override, Right-to-Left Override}
{0x2060, 0x2060}, -- word joiner
{0x2066, 0x2069}, -- {Left-to-Right Isolate, Right-to-Left Isolate, First Strong Isolate, Pop Directional Isolate}
{0xFEFF, 0xFEFF}, -- zero-width non-breaking space
-- Some other characters can also be combined https://en.wikipedia.org/wiki/Combining_character
{0x0300, 0x036F}, -- Combining Diacritical Marks 0 BMP Inherited
{0x1AB0, 0x1AFF}, -- Combining Diacritical Marks Extended 0 BMP Inherited
{0x1DC0, 0x1DFF}, -- Combining Diacritical Marks Supplement 0 BMP Inherited
{0x20D0, 0x20FF}, -- Combining Diacritical Marks for Symbols 0 BMP Inherited
{0xFE20, 0xFE2F}, -- Combining Half Marks 0 BMP Cyrillic (2 characters), Inherited (14 characters)
-- Egyptian Hieroglyph Format Controls and Shorthand format Controls
{0x13430, 0x1345F}, -- Egyptian Hieroglyph Format Controls 1 SMP Egyptian Hieroglyphs
{0x1BCA0, 0x1BCAF}, -- Shorthand Format Controls 1 SMP Common
-- not sure how to deal with those https://en.wikipedia.org/wiki/Spacing_Modifier_Letters
{0x02B0, 0x02FF}, -- Spacing Modifier Letters 0 BMP Bopomofo (2 characters), Latin (14 characters), Common (64 characters)
}
-- All characters have the same width as the first one
---@type CodePointRange[]
local same_width_blocks = {
{0x3400, 0x4DBF}, -- CJK Unified Ideographs Extension A 0 BMP Han
{0x4E00, 0x9FFF}, -- CJK Unified Ideographs 0 BMP Han
{0x20000, 0x2A6DF}, -- CJK Unified Ideographs Extension B 2 SIP Han
{0x2A700, 0x2B73F}, -- CJK Unified Ideographs Extension C 2 SIP Han
{0x2B740, 0x2B81F}, -- CJK Unified Ideographs Extension D 2 SIP Han
{0x2B820, 0x2CEAF}, -- CJK Unified Ideographs Extension E 2 SIP Han
{0x2CEB0, 0x2EBEF}, -- CJK Unified Ideographs Extension F 2 SIP Han
{0x2F800, 0x2FA1F}, -- CJK Compatibility Ideographs Supplement 2 SIP Han
{0x30000, 0x3134F}, -- CJK Unified Ideographs Extension G 3 TIP Han
{0x31350, 0x323AF}, -- CJK Unified Ideographs Extension H 3 TIP Han
}
local width_length_ratio = 0.5
---@type integer, integer
local osd_width, osd_height = 100, 100
---Get byte count of utf-8 character at index i in str
---@param str string
---@param i integer?
---@return integer
local function utf8_char_bytes(str, i)
local char_byte = str:byte(i)
local max_bytes = #str - i + 1
if char_byte < 0xC0 then
return math.min(max_bytes, 1)
elseif char_byte < 0xE0 then
return math.min(max_bytes, 2)
elseif char_byte < 0xF0 then
return math.min(max_bytes, 3)
elseif char_byte < 0xF8 then
return math.min(max_bytes, 4)
else
return math.min(max_bytes, 1)
end
end
---Creates an iterator for an utf-8 encoded string
---Iterates over utf-8 characters instead of bytes
---@param str string
---@return fun(): integer?, string?
function utf8_iter(str)
local byte_start = 1
return function()
local start = byte_start
if #str < start then return nil end
local byte_count = utf8_char_bytes(str, start)
byte_start = start + byte_count
return start, str:sub(start, start + byte_count - 1)
end
end
---Estimating string length based on the number of characters
---@param char string
---@return number
function utf8_length(str)
local str_length = 0
for _, c in utf8_iter(str) do
str_length = str_length + 1
end
return str_length
end
---Extract Unicode code point from utf-8 character at index i in str
---@param str string
---@param i integer
---@return integer
local function utf8_to_unicode(str, i)
local byte_count = utf8_char_bytes(str, i)
local char_byte = str:byte(i)
local unicode = char_byte
if byte_count ~= 1 then
local shift = 2 ^ (8 - byte_count)
char_byte = char_byte - math.floor(0xFF / shift) * shift
unicode = char_byte * (2 ^ 6) ^ (byte_count - 1)
end
for j = 2, byte_count do
char_byte = str:byte(i + j - 1) - 0x80
unicode = unicode + char_byte * (2 ^ 6) ^ (byte_count - j)
end
return round(unicode)
end
---Convert Unicode code point to utf-8 string
---@param unicode integer
---@return string?
local function unicode_to_utf8(unicode)
if unicode < 0x80 then
return string.char(unicode)
else
local byte_count
if unicode < 0x800 then
byte_count = 2
elseif unicode < 0x10000 then
byte_count = 3
elseif unicode < 0x110000 then
byte_count = 4
else
return
end -- too big
local res = {}
local shift = 2 ^ 6
local after_shift = unicode
for _ = byte_count, 2, -1 do
local before_shift = after_shift
after_shift = math.floor(before_shift / shift)
table.insert(res, 1, before_shift - after_shift * shift + 0x80)
end
shift = 2 ^ (8 - byte_count)
table.insert(res, 1, after_shift + math.floor(0xFF / shift) * shift)
---@diagnostic disable-next-line: deprecated
return string.char(unpack(res))
end
end
---Update osd resolution if valid
---@param width integer
---@param height integer
local function update_osd_resolution(width, height)
if width > 0 and height > 0 then osd_width, osd_height = width, height end
end
mp.observe_property('osd-dimensions', 'native', function(_, dim)
if dim then update_osd_resolution(dim.w, dim.h) end
end)
local measure_bounds
do
local text_osd = mp.create_osd_overlay('ass-events')
text_osd.compute_bounds, text_osd.hidden = true, true
---@param ass_text string
---@return integer, integer, integer, integer
measure_bounds = function(ass_text)
update_osd_resolution(mp.get_osd_size())
text_osd.res_x, text_osd.res_y = osd_width, osd_height
text_osd.data = ass_text
local res = text_osd:update()
return res.x0, res.y0, res.x1, res.y1
end
end
local normalized_text_width
do
---@type {wrap: integer; bold: boolean; italic: boolean, rotate: number; size: number}
local bounds_opts = {wrap = 2, bold = false, italic = false, rotate = 0, size = 0}
---Measure text width and normalize to a font size of 1
---text has to be ass safe
---@param text string
---@param size number
---@param bold boolean
---@param italic boolean
---@param horizontal boolean
---@return number, integer
normalized_text_width = function(text, size, bold, italic, horizontal)
bounds_opts.bold, bounds_opts.italic, bounds_opts.rotate = bold, italic, horizontal and 0 or -90
local x1, y1 = nil, nil
size = size / 0.8
-- prevent endless loop
local repetitions_left = 5
repeat
size = size * 0.8
bounds_opts.size = size
local ass = assdraw.ass_new()
ass:txt(0, 0, horizontal and 7 or 1, text, bounds_opts)
_, _, x1, y1 = measure_bounds(ass.text)
repetitions_left = repetitions_left - 1
-- make sure nothing got clipped
until (x1 and x1 < osd_width and y1 < osd_height) or repetitions_left == 0
local width = (repetitions_left == 0 and not x1) and 0 or (horizontal and x1 or y1)
return width / size, horizontal and osd_width or osd_height
end
end
---Estimates character length based on utf8 byte count
---1 character length is roughly the size of a latin character
---@param char string
---@return number
local function char_length(char)
return #char > 2 and 2 or 1
end
---Estimates string length based on utf8 byte count
---Note: Making a string in the iterator with the character is a waste here,
---but as this function is only used when measuring whole string widths it's fine
---@param text string
---@return number
local function text_length(text)
if not text or text == '' then return 0 end
local text_length = 0
for _, char in utf8_iter(tostring(text)) do text_length = text_length + char_length(char) end
return text_length
end
---Finds the best orientation of text on screen and returns the estimated max size
---and if the text should be drawn horizontally
---@param text string
---@return number, boolean
local function fit_on_screen(text)
local estimated_width = text_length(text) * width_length_ratio
if osd_width >= osd_height then
-- Fill the screen as much as we can, bigger is more accurate.
return math.min(osd_width / estimated_width, osd_height), true
else
return math.min(osd_height / estimated_width, osd_width), false
end
end
---Gets next stage from cache
---@param cache {[any]: table}
---@param value any
local function get_cache_stage(cache, value)
local stage = cache[value]
if not stage then
stage = {}
cache[value] = stage
end
return stage
end
---Is measured resolution sufficient
---@param px integer
---@return boolean
local function no_remeasure_required(px)
return px >= 800 or (px * 1.1 >= osd_width and px * 1.1 >= osd_height)
end
local character_width
do
---@type {[boolean]: {[string]: {[1]: number, [2]: integer}}}
local char_width_cache = {}
---Get measured width of character
---@param char string
---@param bold boolean
---@return number, integer
character_width = function(char, bold)
---@type {[string]: {[1]: number, [2]: integer}}
local char_widths = get_cache_stage(char_width_cache, bold)
local width_px = char_widths[char]
if width_px and no_remeasure_required(width_px[2]) then return width_px[1], width_px[2] end
local unicode = utf8_to_unicode(char, 1)
for _, block in ipairs(zero_width_blocks) do
if unicode >= block[1] and unicode <= block[2] then
char_widths[char] = {0, math.huge}
return 0, math.huge
end
end
local measured_char = nil
for _, block in ipairs(same_width_blocks) do
if unicode >= block[1] and unicode <= block[2] then
measured_char = unicode_to_utf8(block[1])
width_px = char_widths[measured_char]
if width_px and no_remeasure_required(width_px[2]) then
char_widths[char] = width_px
return width_px[1], width_px[2]
end
break
end
end
if not measured_char then measured_char = char end
-- half as many repetitions for wide characters
local char_count = 10 / char_length(char)
local max_size, horizontal = fit_on_screen(measured_char:rep(char_count))
local size = math.min(max_size * 0.9, 50)
char_count = math.min(math.floor(char_count * max_size / size * 0.8), 100)
local enclosing_char, enclosing_width, next_char_count = '|', 0, char_count
if measured_char == enclosing_char then
enclosing_char = ''
else
enclosing_width = 2 * character_width(enclosing_char, bold)
end
local width_ratio, width, px = nil, nil, nil
repeat
char_count = next_char_count
local str = enclosing_char .. measured_char:rep(char_count) .. enclosing_char
width, px = normalized_text_width(str, size, bold, false, horizontal)
width = width - enclosing_width
width_ratio = width * size / (horizontal and osd_width or osd_height)
next_char_count = math.min(math.floor(char_count / width_ratio * 0.9), 100)
until width_ratio < 0.05 or width_ratio > 0.5 or char_count == next_char_count
width = width / char_count
width_px = {width, px}
if char ~= measured_char then char_widths[measured_char] = width_px end
char_widths[char] = width_px
return width, px
end
end
---Calculate text width from individual measured characters
---@param text string|number
---@param bold boolean
---@return number, integer
local function character_based_width(text, bold)
local max_width = 0
local min_px = math.huge
for line in tostring(text):gmatch('([^\n]*)\n?') do
local total_width = 0
for _, char in utf8_iter(line) do
local width, px = character_width(char, bold)
total_width = total_width + width
if px < min_px then min_px = px end
end
if total_width > max_width then max_width = total_width end
end
return max_width, min_px
end
---Measure width of whole text
---@param text string|number
---@param bold boolean
---@param italic boolean
---@return number, integer
local function whole_text_width(text, bold, italic)
text = tostring(text)
local size, horizontal = fit_on_screen(text)
return normalized_text_width(ass_escape(text), size * 0.9, bold, italic, horizontal)
end
---Scale normalized width to real width based on font size and italic
---@param opts {size: number; italic?: boolean}
---@return number, number
local function opts_factor_offset(opts)
return opts.size, opts.italic and opts.size * 0.2 or 0
end
---Scale normalized width to real width based on font size and italic
---@param opts {size: number; italic?: boolean}
---@return number
local function normalized_to_real(width, opts)
local factor, offset = opts_factor_offset(opts)
return factor * width + offset
end
do
---@type {[boolean]: {[boolean]: {[string|number]: {[1]: number, [2]: integer}}}} | {[boolean]: {[string|number]: {[1]: number, [2]: integer}}}
local width_cache = {}
---Calculate width of text with the given opts
---@param text string|number
---@return number
---@param opts {size: number; bold?: boolean; italic?: boolean}
function text_width(text, opts)
if not text or text == '' then return 0 end
---@type boolean, boolean
local bold, italic = opts.bold or options.font_bold, opts.italic or false
if config.refine.text_width then
---@type {[string|number]: {[1]: number, [2]: integer}}
local text_width = get_cache_stage(width_cache, bold)
local width_px = text_width[text]
if width_px and no_remeasure_required(width_px[2]) then return normalized_to_real(width_px[1], opts) end
local width, px = character_based_width(text, bold)
width_cache[bold][text] = {width, px}
return normalized_to_real(width, opts)
else
---@type {[string|number]: {[1]: number, [2]: integer}}
local text_width = get_cache_stage(get_cache_stage(width_cache, bold), italic)
local width_px = text_width[text]
if width_px and no_remeasure_required(width_px[2]) then return width_px[1] * opts.size end
local width, px = whole_text_width(text, bold, italic)
width_cache[bold][italic][text] = {width, px}
return width * opts.size
end
end
end
do
---@type {[string]: string}
local cache = {}
function timestamp_zero_rep_clear_cache()
cache = {}
end
---Replace all timestamp digits with 0
---@param timestamp string
function timestamp_zero_rep(timestamp)
local substitute = cache[#timestamp]
if not substitute then
substitute = timestamp:gsub('%d', '0')
cache[#timestamp] = substitute
end
return substitute
end
---Get width of formatted timestamp as if all the digits were replaced with 0
---@param timestamp string
---@param opts {size: number; bold?: boolean; italic?: boolean}
---@return number
function timestamp_width(timestamp, opts)
return text_width(timestamp_zero_rep(timestamp), opts)
end
end
do
local wrap_at_chars = {' ', ' ', '-', ''}
local remove_when_wrap = {' ', ' '}
---Wrap the text at the closest opportunity to target_line_length
---@param text string
---@param opts {size: number; bold?: boolean; italic?: boolean}
---@param target_line_length number
---@return string, integer
function wrap_text(text, opts, target_line_length)
local target_line_width = target_line_length * width_length_ratio * opts.size
local bold, scale_factor, scale_offset = opts.bold or false, opts_factor_offset(opts)
local wrap_at_chars, remove_when_wrap = wrap_at_chars, remove_when_wrap
local lines = {}
for _, text_line in ipairs(split(text, '\n')) do
local line_width = scale_offset
local line_start = 1
local before_end = nil
local before_width = scale_offset
local before_line_start = 0
local before_removed_width = 0
for char_start, char in utf8_iter(text_line) do
local char_end = char_start + #char - 1
local char_width = character_width(char, bold) * scale_factor
line_width = line_width + char_width
if (char_end == #text_line) or itable_has(wrap_at_chars, char) then
local remove = itable_has(remove_when_wrap, char)
local line_width_after_remove = line_width - (remove and char_width or 0)
if line_width_after_remove < target_line_width then
before_end = remove and char_start - 1 or char_end
before_width = line_width_after_remove
before_line_start = char_end + 1
before_removed_width = remove and char_width or 0
else
if (target_line_width - before_width) <
(line_width_after_remove - target_line_width) then
lines[#lines + 1] = text_line:sub(line_start, before_end)
line_start = before_line_start
line_width = line_width - before_width - before_removed_width + scale_offset
else
lines[#lines + 1] = text_line:sub(line_start, remove and char_start - 1 or char_end)
line_start = char_end + 1
line_width = scale_offset
end
before_end = line_start
before_width = scale_offset
end
end
end
if #text_line >= line_start then
lines[#lines + 1] = text_line:sub(line_start)
elseif text_line == '' then
lines[#lines + 1] = ''
end
end
return table.concat(lines, '\n'), #lines
end
end
do
local word_separators = create_set({
' ', ' ', '\t', '-', '', '_', ',', '.', '+', '&', '(', ')', '[', ']', '{', '}', '<', '>', '/', '\\',
'', '', '', '', '', '', '', '', '', '', '', '', '', '',
})
---Get the first character of each word
---@param str string
---@return string[]
function initials(str)
local initials, is_word_start, word_separators = {}, true, word_separators
for _, char in utf8_iter(str) do
if word_separators[char] then
is_word_start = true
elseif is_word_start then
initials[#initials + 1] = char
is_word_start = false
end
end
return initials
end
end