size: 30 KiB
| 1 | --- @module 'djot.ast' |
| 2 | --- Construct an AST for a djot document. |
| 3 | |
| 4 | --- @class Attributes |
| 5 | --- @field class? string |
| 6 | --- @field id? string |
| 7 | |
| 8 | --- @class AST |
| 9 | --- @field t string tag for the node |
| 10 | --- @field s? string text for the node |
| 11 | --- @field c AST[] child node |
| 12 | --- @field alias string |
| 13 | --- @field level integer |
| 14 | --- @field startidx integer |
| 15 | --- @field startmarker string |
| 16 | --- @field styles table |
| 17 | --- @field style_marker string |
| 18 | --- @field attr Attributes |
| 19 | --- @field display boolean |
| 20 | --- @field references table |
| 21 | --- @field footnotes table |
| 22 | --- @field pos? string[] |
| 23 | --- @field destination? string[] |
| 24 | |
| 25 | if not utf8 then -- if not lua 5.3 or higher... |
| 26 | -- this is needed for the __pairs metamethod, used below |
| 27 | -- The following code is derived from the compat53 rock: |
| 28 | -- override pairs |
| 29 | local oldpairs = pairs |
| 30 | pairs = function(t) |
| 31 | local mt = getmetatable(t) |
| 32 | if type(mt) == "table" and type(mt.__pairs) == "function" then |
| 33 | return mt.__pairs(t) |
| 34 | else |
| 35 | return oldpairs(t) |
| 36 | end |
| 37 | end |
| 38 | end |
| 39 | local unpack = unpack or table.unpack |
| 40 | |
| 41 | local find, lower, sub, rep, format = |
| 42 | string.find, string.lower, string.sub, string.rep, string.format |
| 43 | |
| 44 | -- Creates a sparse array whose indices are byte positions. |
| 45 | -- sourcepos_map[bytepos] = "line:column:charpos" |
| 46 | local function make_sourcepos_map(input) |
| 47 | local sourcepos_map = {line = {}, col = {}, charpos = {}} |
| 48 | local line = 1 |
| 49 | local col = 0 |
| 50 | local charpos = 0 |
| 51 | local bytepos = 1 |
| 52 | |
| 53 | local byte = string.byte(input, bytepos) |
| 54 | while byte do |
| 55 | col = col + 1 |
| 56 | charpos = charpos + 1 |
| 57 | -- get next code point: |
| 58 | local newbytepos |
| 59 | if byte < 0xC0 then |
| 60 | newbytepos = bytepos + 1 |
| 61 | elseif byte < 0xE0 then |
| 62 | newbytepos = bytepos + 2 |
| 63 | elseif byte < 0xF0 then |
| 64 | newbytepos = bytepos + 3 |
| 65 | else |
| 66 | newbytepos = bytepos + 4 |
| 67 | end |
| 68 | while bytepos < newbytepos do |
| 69 | sourcepos_map.line[bytepos] = line |
| 70 | sourcepos_map.col[bytepos] = col |
| 71 | sourcepos_map.charpos[bytepos] = charpos |
| 72 | bytepos = bytepos + 1 |
| 73 | end |
| 74 | if byte == 10 then -- newline |
| 75 | line = line + 1 |
| 76 | col = 0 |
| 77 | end |
| 78 | byte = string.byte(input, bytepos) |
| 79 | end |
| 80 | |
| 81 | sourcepos_map.line[bytepos] = line + 1 |
| 82 | sourcepos_map.col[bytepos] = 1 |
| 83 | sourcepos_map.charpos[bytepos] = charpos + 1 |
| 84 | |
| 85 | return sourcepos_map |
| 86 | end |
| 87 | |
| 88 | local function add_string_content(node, buffer) |
| 89 | if node.s then |
| 90 | buffer[#buffer + 1] = node.s |
| 91 | elseif node.t == "softbreak" then |
| 92 | buffer[#buffer + 1] = "\n" |
| 93 | elseif node.c then |
| 94 | for i=1, #node.c do |
| 95 | add_string_content(node.c[i], buffer) |
| 96 | end |
| 97 | end |
| 98 | end |
| 99 | |
| 100 | local function get_string_content(node) |
| 101 | local buffer = {}; |
| 102 | add_string_content(node, buffer) |
| 103 | return table.concat(buffer) |
| 104 | end |
| 105 | |
| 106 | local roman_digits = { |
| 107 | i = 1, |
| 108 | v = 5, |
| 109 | x = 10, |
| 110 | l = 50, |
| 111 | c = 100, |
| 112 | d = 500, |
| 113 | m = 1000 } |
| 114 | |
| 115 | local function roman_to_number(s) |
| 116 | -- go backwards through the digits |
| 117 | local total = 0 |
| 118 | local prevdigit = 0 |
| 119 | local i=#s |
| 120 | while i > 0 do |
| 121 | local c = lower(sub(s,i,i)) |
| 122 | local n = roman_digits[c] |
| 123 | assert(n ~= nil, "Encountered bad character in roman numeral " .. s) |
| 124 | if n < prevdigit then -- e.g. ix |
| 125 | total = total - n |
| 126 | else |
| 127 | total = total + n |
| 128 | end |
| 129 | prevdigit = n |
| 130 | i = i - 1 |
| 131 | end |
| 132 | return total |
| 133 | end |
| 134 | |
| 135 | local function get_list_start(marker, style) |
| 136 | local numtype = string.gsub(style, "%p", "") |
| 137 | local s = string.gsub(marker, "%p", "") |
| 138 | if numtype == "1" then |
| 139 | return tonumber(s) |
| 140 | elseif numtype == "A" then |
| 141 | return (string.byte(s) - string.byte("A") + 1) |
| 142 | elseif numtype == "a" then |
| 143 | return (string.byte(s) - string.byte("a") + 1) |
| 144 | elseif numtype == "I" then |
| 145 | return roman_to_number(s) |
| 146 | elseif numtype == "i" then |
| 147 | return roman_to_number(s) |
| 148 | elseif numtype == "" then |
| 149 | return nil |
| 150 | end |
| 151 | end |
| 152 | |
| 153 | local ignorable = { |
| 154 | image_marker = true, |
| 155 | escape = true, |
| 156 | blankline = true |
| 157 | } |
| 158 | |
| 159 | local function sortedpairs(compare_function, to_displaykey) |
| 160 | return function(tbl) |
| 161 | local keys = {} |
| 162 | local k = nil |
| 163 | k = next(tbl, k) |
| 164 | while k do |
| 165 | keys[#keys + 1] = k |
| 166 | k = next(tbl, k) |
| 167 | end |
| 168 | table.sort(keys, compare_function) |
| 169 | local keyindex = 0 |
| 170 | local function ordered_next(tabl,_) |
| 171 | keyindex = keyindex + 1 |
| 172 | local key = keys[keyindex] |
| 173 | -- use canonical names |
| 174 | local displaykey = to_displaykey(key) |
| 175 | if key then |
| 176 | return displaykey, tabl[key] |
| 177 | else |
| 178 | return nil |
| 179 | end |
| 180 | end |
| 181 | -- Return an iterator function, the table, starting point |
| 182 | return ordered_next, tbl, nil |
| 183 | end |
| 184 | end |
| 185 | |
| 186 | -- provide children, tag, and text as aliases of c, t, s, |
| 187 | -- which we use above for better performance: |
| 188 | local mt = {} |
| 189 | local special = { |
| 190 | children = 'c', |
| 191 | text = 's', |
| 192 | tag = 't' } |
| 193 | local displaykeys = { |
| 194 | c = 'children', |
| 195 | s = 'text', |
| 196 | t = 'tag' } |
| 197 | mt.__index = function(table, key) |
| 198 | local k = special[key] |
| 199 | if k then |
| 200 | return rawget(table, k) |
| 201 | else |
| 202 | return rawget(table, key) |
| 203 | end |
| 204 | end |
| 205 | mt.__newindex = function(table, key, val) |
| 206 | local k = special[key] |
| 207 | if k then |
| 208 | rawset(table, k, val) |
| 209 | else |
| 210 | rawset(table, key, val) |
| 211 | end |
| 212 | end |
| 213 | mt.__pairs = sortedpairs(function(a,b) |
| 214 | if a == "t" then -- t is always first |
| 215 | return true |
| 216 | elseif a == "s" then -- s is always second |
| 217 | return (b ~= "t") |
| 218 | elseif a == "c" then -- c only before references, footnotes |
| 219 | return (b == "references" or b == "footnotes") |
| 220 | elseif a == "references" then |
| 221 | return (b == "footnotes") |
| 222 | elseif a == "footnotes" then |
| 223 | return false |
| 224 | elseif b == "t" or b == "s" then |
| 225 | return false |
| 226 | elseif b == "c" or b == "references" or b == "footnotes" then |
| 227 | return true |
| 228 | else |
| 229 | return (a < b) |
| 230 | end |
| 231 | end, function(k) return displaykeys[k] or k end) |
| 232 | |
| 233 | |
| 234 | --- Create a new AST node. |
| 235 | --- @param tag (string) tag for the node |
| 236 | --- @return (AST) node (table) |
| 237 | local function new_node(tag) |
| 238 | local node = { t = tag, c = nil } |
| 239 | setmetatable(node, mt) |
| 240 | return node |
| 241 | end |
| 242 | |
| 243 | --- Add `child` as a child of `node`. |
| 244 | --- @param node (AST) node parent node |
| 245 | --- @param child (AST) node child node |
| 246 | local function add_child(node, child) |
| 247 | if (not node.c) then |
| 248 | node.c = {child} |
| 249 | else |
| 250 | node.c[#node.c + 1] = child |
| 251 | end |
| 252 | end |
| 253 | |
| 254 | --- Returns true if `node` has children. |
| 255 | --- @param node (AST) node to check |
| 256 | --- @return (boolean) true if node has children |
| 257 | local function has_children(node) |
| 258 | return (node.c and #node.c > 0) |
| 259 | end |
| 260 | |
| 261 | --- Returns an attributes object. |
| 262 | --- @param tbl (Attributes?) table of attributes and values |
| 263 | --- @return (Attributes) attributes object (table including special metatable for |
| 264 | --- deterministic order of iteration) |
| 265 | local function new_attributes(tbl) |
| 266 | local attr = tbl or {} |
| 267 | -- ensure deterministic order of iteration |
| 268 | setmetatable(attr, {__pairs = sortedpairs(function(a,b) return a < b end, |
| 269 | function(k) return k end)}) |
| 270 | return attr |
| 271 | end |
| 272 | |
| 273 | --- Insert an attribute into an attributes object. |
| 274 | --- @param attr (Attributes) |
| 275 | --- @param key (string) key of new attribute |
| 276 | --- @param val (string) value of new attribute |
| 277 | local function insert_attribute(attr, key, val) |
| 278 | val = val:gsub("%s+", " ") -- normalize spaces |
| 279 | if key == "class" then |
| 280 | if attr.class then |
| 281 | attr.class = attr.class .. " " .. val |
| 282 | else |
| 283 | attr.class = val |
| 284 | end |
| 285 | else |
| 286 | attr[key] = val |
| 287 | end |
| 288 | end |
| 289 | |
| 290 | --- Copy attributes from `source` to `target`. |
| 291 | --- @param target (Attributes) |
| 292 | --- @param source (table) associating keys and values |
| 293 | local function copy_attributes(target, source) |
| 294 | if source then |
| 295 | for k,v in pairs(source) do |
| 296 | insert_attribute(target, k, v) |
| 297 | end |
| 298 | end |
| 299 | end |
| 300 | |
| 301 | --- @param targetnode (AST) |
| 302 | --- @param cs (AST) |
| 303 | local function insert_attributes_from_nodes(targetnode, cs) |
| 304 | targetnode.attr = targetnode.attr or new_attributes() |
| 305 | local i=1 |
| 306 | while i <= #cs do |
| 307 | local x, y = cs[i].t, cs[i].s |
| 308 | if x == "id" or x == "class" then |
| 309 | insert_attribute(targetnode.attr, x, y) |
| 310 | elseif x == "key" then |
| 311 | local val = {} |
| 312 | while cs[i + 1] and cs[i + 1].t == "value" do |
| 313 | val[#val + 1] = cs[i + 1].s:gsub("\\(%p)", "%1") |
| 314 | -- resolve backslash escapes |
| 315 | i = i + 1 |
| 316 | end |
| 317 | insert_attribute(targetnode.attr, y, table.concat(val,"\n")) |
| 318 | end |
| 319 | i = i + 1 |
| 320 | end |
| 321 | end |
| 322 | |
| 323 | --- @param node (AST) |
| 324 | local function make_definition_list_item(node) |
| 325 | node.t = "definition_list_item" |
| 326 | if not has_children(node) then |
| 327 | node.c = {} |
| 328 | end |
| 329 | if node.c[1] and node.c[1].t == "para" then |
| 330 | node.c[1].t = "term" |
| 331 | else |
| 332 | table.insert(node.c, 1, new_node("term")) |
| 333 | end |
| 334 | if node.c[2] then |
| 335 | local defn = new_node("definition") |
| 336 | defn.c = {} |
| 337 | for i=2,#node.c do |
| 338 | defn.c[#defn.c + 1] = node.c[i] |
| 339 | node.c[i] = nil |
| 340 | end |
| 341 | node.c[2] = defn |
| 342 | end |
| 343 | end |
| 344 | |
| 345 | local function resolve_style(list) |
| 346 | local style = nil |
| 347 | for k,i in pairs(list.styles) do |
| 348 | if not style or i < style.priority then |
| 349 | style = {name = k, priority = i} |
| 350 | end |
| 351 | end |
| 352 | list.style = style.name |
| 353 | list.styles = nil |
| 354 | list.start = get_list_start(list.startmarker, list.style) |
| 355 | list.startmarker = nil |
| 356 | end |
| 357 | |
| 358 | local function get_verbatim_content(node) |
| 359 | local s = get_string_content(node) |
| 360 | -- trim space next to ` at beginning or end |
| 361 | if find(s, "^ +`") then |
| 362 | s = s:sub(2) |
| 363 | end |
| 364 | if find(s, "` +$") then |
| 365 | s = s:sub(1, #s - 1) |
| 366 | end |
| 367 | return s |
| 368 | end |
| 369 | |
| 370 | local function add_sections(ast) |
| 371 | if not has_children(ast) then |
| 372 | return ast |
| 373 | end |
| 374 | local newast = new_node("doc") |
| 375 | local secs = { {sec = newast, level = 0 } } |
| 376 | for _,node in ipairs(ast.c) do |
| 377 | if node.t == "heading" then |
| 378 | local level = node.level |
| 379 | local curlevel = (#secs > 0 and secs[#secs].level) or 0 |
| 380 | if curlevel >= level then |
| 381 | while secs[#secs].level >= level do |
| 382 | local sec = table.remove(secs).sec |
| 383 | add_child(secs[#secs].sec, sec) |
| 384 | end |
| 385 | end |
| 386 | -- now we know: curlevel < level |
| 387 | local newsec = new_node("section") |
| 388 | newsec.attr = new_attributes{id = node.attr.id} |
| 389 | node.attr.id = nil |
| 390 | add_child(newsec, node) |
| 391 | secs[#secs + 1] = {sec = newsec, level = level} |
| 392 | else |
| 393 | add_child(secs[#secs].sec, node) |
| 394 | end |
| 395 | end |
| 396 | while #secs > 1 do |
| 397 | local sec = table.remove(secs).sec |
| 398 | add_child(secs[#secs].sec, sec) |
| 399 | end |
| 400 | assert(secs[1].sec == newast) |
| 401 | return newast |
| 402 | end |
| 403 | |
| 404 | |
| 405 | --- Create an abstract syntax tree based on an event |
| 406 | --- stream and references. |
| 407 | --- @param parser (Parser) djot streaming parser |
| 408 | --- @param sourcepos (boolean) if true, include source positions |
| 409 | --- @return table representing the AST |
| 410 | local function to_ast(parser, sourcepos) |
| 411 | local subject = parser.subject |
| 412 | local warn = parser.warn |
| 413 | if not warn then |
| 414 | warn = function() end |
| 415 | end |
| 416 | local sourceposmap |
| 417 | if sourcepos then |
| 418 | sourceposmap = make_sourcepos_map(subject) |
| 419 | end |
| 420 | local references = {} |
| 421 | local footnotes = {} |
| 422 | local identifiers = {} -- identifiers used (to ensure uniqueness) |
| 423 | |
| 424 | -- generate auto identifier for heading |
| 425 | local function get_identifier(s) |
| 426 | local base = s:gsub("[][~!@#$%^&*(){}`,.<>\\|=+/?]","") |
| 427 | :gsub("^%s+",""):gsub("%s+$","") |
| 428 | :gsub("%s+","-") |
| 429 | local i = 0 |
| 430 | local ident = base |
| 431 | -- generate unique id |
| 432 | while ident == "" or identifiers[ident] do |
| 433 | i = i + 1 |
| 434 | if base == "" then |
| 435 | base = "s" |
| 436 | end |
| 437 | ident = base .. "-" .. tostring(i) |
| 438 | end |
| 439 | identifiers[ident] = true |
| 440 | return ident |
| 441 | end |
| 442 | |
| 443 | local function format_sourcepos(bytepos) |
| 444 | if bytepos then |
| 445 | return string.format("%d:%d:%d", sourceposmap.line[bytepos], |
| 446 | sourceposmap.col[bytepos], sourceposmap.charpos[bytepos]) |
| 447 | end |
| 448 | end |
| 449 | |
| 450 | local function set_startpos(node, pos) |
| 451 | if sourceposmap then |
| 452 | local sp = format_sourcepos(pos) |
| 453 | if node.pos then |
| 454 | node.pos[1] = sp |
| 455 | else |
| 456 | node.pos = {sp, nil} |
| 457 | end |
| 458 | end |
| 459 | end |
| 460 | |
| 461 | local function set_endpos(node, pos) |
| 462 | if sourceposmap and node.pos then |
| 463 | local ep = format_sourcepos(pos) |
| 464 | if node.pos then |
| 465 | node.pos[2] = ep |
| 466 | else |
| 467 | node.pos = {nil, ep} |
| 468 | end |
| 469 | end |
| 470 | end |
| 471 | |
| 472 | local blocktag = { |
| 473 | heading = true, |
| 474 | div = true, |
| 475 | list = true, |
| 476 | list_item = true, |
| 477 | code_block = true, |
| 478 | para = true, |
| 479 | blockquote = true, |
| 480 | table = true, |
| 481 | thematic_break = true, |
| 482 | raw_block = true, |
| 483 | reference_definition = true, |
| 484 | footnote = true |
| 485 | } |
| 486 | |
| 487 | local block_attributes = nil |
| 488 | local function add_block_attributes(node) |
| 489 | if block_attributes and blocktag[node.t:gsub("%|.*","")] then |
| 490 | for i=1,#block_attributes do |
| 491 | insert_attributes_from_nodes(node, block_attributes[i]) |
| 492 | end |
| 493 | -- add to identifiers table so we don't get duplicate auto-generated ids |
| 494 | if node.attr and node.attr.id then |
| 495 | identifiers[node.attr.id] = true |
| 496 | end |
| 497 | block_attributes = nil |
| 498 | end |
| 499 | end |
| 500 | |
| 501 | -- two variables used for tight/loose list determination: |
| 502 | local tags = {} -- used to keep track of blank lines |
| 503 | local matchidx = 0 -- keep track of the index of the match |
| 504 | |
| 505 | local function is_tight(startidx, endidx, is_last_item) |
| 506 | -- see if there are any blank lines between blocks in a list item. |
| 507 | local blanklines = 0 |
| 508 | -- we don't care about blank lines at very end of list |
| 509 | if is_last_item then |
| 510 | while tags[endidx] == "blankline" or tags[endidx] == "-list_item" do |
| 511 | endidx = endidx - 1 |
| 512 | end |
| 513 | end |
| 514 | for i=startidx, endidx do |
| 515 | local tag = tags[i] |
| 516 | if tag == "blankline" then |
| 517 | if not ((string.find(tags[i+1], "%+list_item") or |
| 518 | (string.find(tags[i+1], "%-list_item") and |
| 519 | (is_last_item or |
| 520 | string.find(tags[i+2], "%-list_item"))))) then |
| 521 | -- don't count blank lines before list starts |
| 522 | -- don't count blank lines at end of nested lists or end of last item |
| 523 | blanklines = blanklines + 1 |
| 524 | end |
| 525 | end |
| 526 | end |
| 527 | return (blanklines == 0) |
| 528 | end |
| 529 | |
| 530 | local function add_child_to_tip(containers, child) |
| 531 | if containers[#containers].t == "list" and |
| 532 | not (child.t == "list_item" or child.t == "definition_list_item") then |
| 533 | -- close list |
| 534 | local oldlist = table.remove(containers) |
| 535 | add_child_to_tip(containers, oldlist) |
| 536 | end |
| 537 | if child.t == "list" then |
| 538 | if child.pos then |
| 539 | child.pos[2] = child.c[#child.c].pos[2] |
| 540 | end |
| 541 | -- calculate tightness (TODO not quite right) |
| 542 | local tight = true |
| 543 | for i=1,#child.c do |
| 544 | tight = tight and is_tight(child.c[i].startidx, |
| 545 | child.c[i].endidx, i == #child.c) |
| 546 | child.c[i].startidx = nil |
| 547 | child.c[i].endidx = nil |
| 548 | end |
| 549 | child.tight = tight |
| 550 | |
| 551 | -- resolve style if still ambiguous |
| 552 | resolve_style(child) |
| 553 | end |
| 554 | add_child(containers[#containers], child) |
| 555 | end |
| 556 | |
| 557 | |
| 558 | -- process a match: |
| 559 | -- containers is the stack of containers, with #container |
| 560 | -- being the one that would receive a new node |
| 561 | local function handle_match(containers, startpos, endpos, annot) |
| 562 | matchidx = matchidx + 1 |
| 563 | local mod, tag = string.match(annot, "^([-+]?)(.+)") |
| 564 | tags[matchidx] = annot |
| 565 | if ignorable[tag] then |
| 566 | return |
| 567 | end |
| 568 | if mod == "+" then |
| 569 | -- process open match: |
| 570 | -- * open a new node and put it at end of containers stack |
| 571 | -- * depending on the tag name, do other things |
| 572 | local node = new_node(tag) |
| 573 | set_startpos(node, startpos) |
| 574 | |
| 575 | -- add block attributes if any have accumulated: |
| 576 | add_block_attributes(node) |
| 577 | |
| 578 | if tag == "heading" then |
| 579 | node.level = (endpos - startpos) + 1 |
| 580 | |
| 581 | elseif find(tag, "^list_item") then |
| 582 | node.t = "list_item" |
| 583 | node.startidx = matchidx -- for tight/loose determination |
| 584 | local _, _, style_marker = string.find(tag, "(%|.*)") |
| 585 | local styles = {} |
| 586 | if style_marker then |
| 587 | local i=1 |
| 588 | for sty in string.gmatch(style_marker, "%|([^%|%]]*)") do |
| 589 | styles[sty] = i |
| 590 | i = i + 1 |
| 591 | end |
| 592 | end |
| 593 | node.style_marker = style_marker |
| 594 | |
| 595 | local marker = string.match(subject, "^%S+", startpos) |
| 596 | |
| 597 | -- adjust container stack so that the tip can accept this |
| 598 | -- kind of list item, adding a list if needed and possibly |
| 599 | -- closing an existing list |
| 600 | |
| 601 | local tip = containers[#containers] |
| 602 | if tip.t ~= "list" then |
| 603 | -- container is not a list ; add one |
| 604 | local list = new_node("list") |
| 605 | set_startpos(list, startpos) |
| 606 | list.styles = styles |
| 607 | list.attr = node.attr |
| 608 | list.startmarker = marker |
| 609 | node.attr = nil |
| 610 | containers[#containers + 1] = list |
| 611 | else |
| 612 | -- it's a list, but is it the right kind? |
| 613 | local matched_styles = {} |
| 614 | local has_match = false |
| 615 | for k,_ in pairs(styles) do |
| 616 | if tip.styles[k] then |
| 617 | has_match = true |
| 618 | matched_styles[k] = styles[k] |
| 619 | end |
| 620 | end |
| 621 | if has_match then |
| 622 | -- yes, list can accept this item |
| 623 | tip.styles = matched_styles |
| 624 | else |
| 625 | -- no, list can't accept this item ; close it |
| 626 | local oldlist = table.remove(containers) |
| 627 | add_child_to_tip(containers, oldlist) |
| 628 | -- add a new sibling list node with the right style |
| 629 | local list = new_node("list") |
| 630 | set_startpos(list, startpos) |
| 631 | list.styles = styles |
| 632 | list.attr = node.attr |
| 633 | list.startmarker = marker |
| 634 | node.attr = nil |
| 635 | containers[#containers + 1] = list |
| 636 | end |
| 637 | end |
| 638 | |
| 639 | |
| 640 | end |
| 641 | |
| 642 | -- add to container stack |
| 643 | containers[#containers + 1] = node |
| 644 | |
| 645 | elseif mod == "-" then |
| 646 | -- process close match: |
| 647 | -- * check end of containers stack; if tag matches, add |
| 648 | -- end position, pop the item off the stack, and add |
| 649 | -- it as a child of the next container on the stack |
| 650 | -- * if it doesn't match, issue a warning and ignore this tag |
| 651 | |
| 652 | if containers[#containers].t == "list" then |
| 653 | local listnode = table.remove(containers) |
| 654 | add_child_to_tip(containers, listnode) |
| 655 | end |
| 656 | |
| 657 | if tag == containers[#containers].t then |
| 658 | local node = table.remove(containers) |
| 659 | set_endpos(node, endpos) |
| 660 | |
| 661 | if node.t == "block_attributes" then |
| 662 | if not block_attributes then |
| 663 | block_attributes = {} |
| 664 | end |
| 665 | block_attributes[#block_attributes + 1] = node.c |
| 666 | return -- we don't add this to parent; instead we store |
| 667 | -- the block attributes and add them to the next block |
| 668 | |
| 669 | elseif node.t == "attributes" then |
| 670 | -- parse attributes, add to last node |
| 671 | local tip = containers[#containers] |
| 672 | --- @type AST|false |
| 673 | local prevnode = has_children(tip) and tip.c[#tip.c] |
| 674 | if prevnode then |
| 675 | local endswithspace = false |
| 676 | if prevnode.t == "str" then |
| 677 | -- split off last consecutive word of string |
| 678 | -- to which to attach attributes |
| 679 | local lastwordpos = string.find(prevnode.s, "[^%s]+$") |
| 680 | if not lastwordpos then |
| 681 | endswithspace = true |
| 682 | elseif lastwordpos > 1 then |
| 683 | local newnode = new_node("str") |
| 684 | newnode.s = sub(prevnode.s, lastwordpos, -1) |
| 685 | prevnode.s = sub(prevnode.s, 1, lastwordpos - 1) |
| 686 | add_child_to_tip(containers, newnode) |
| 687 | prevnode = newnode |
| 688 | end |
| 689 | end |
| 690 | if has_children(node) and not endswithspace then |
| 691 | insert_attributes_from_nodes(prevnode, node.c) |
| 692 | else |
| 693 | warn({message = "Ignoring unattached attribute", pos = startpos}) |
| 694 | end |
| 695 | else |
| 696 | warn({message = "Ignoring unattached attribute", pos = startpos}) |
| 697 | end |
| 698 | return -- don't add the attribute node to the tree |
| 699 | |
| 700 | elseif tag == "reference_definition" then |
| 701 | local dest = "" |
| 702 | local key |
| 703 | for i=1,#node.c do |
| 704 | if node.c[i].t == "reference_key" then |
| 705 | key = node.c[i].s |
| 706 | end |
| 707 | if node.c[i].t == "reference_value" then |
| 708 | dest = dest .. node.c[i].s |
| 709 | end |
| 710 | end |
| 711 | references[key] = new_node("reference") |
| 712 | references[key].destination = dest |
| 713 | if node.attr then |
| 714 | references[key].attr = node.attr |
| 715 | end |
| 716 | return -- don't include in tree |
| 717 | |
| 718 | elseif tag == "footnote" then |
| 719 | local label |
| 720 | if has_children(node) and node.c[1].t == "note_label" then |
| 721 | label = node.c[1].s |
| 722 | table.remove(node.c, 1) |
| 723 | end |
| 724 | if label then |
| 725 | footnotes[label] = node |
| 726 | end |
| 727 | return -- don't include in tree |
| 728 | |
| 729 | |
| 730 | elseif tag == "table" then |
| 731 | |
| 732 | -- Children are the rows. Look for a separator line: |
| 733 | -- if found, make the preceding rows headings |
| 734 | -- and set attributes for column alignments on the table. |
| 735 | |
| 736 | local i=1 |
| 737 | local aligns = {} |
| 738 | while i <= #node.c do |
| 739 | local found, align, _ |
| 740 | if node.c[i].t == "row" then |
| 741 | local row = node.c[i].c |
| 742 | for j=1,#row do |
| 743 | found, _, align = find(row[j].t, "^separator_(.*)") |
| 744 | if not found then |
| 745 | break |
| 746 | end |
| 747 | aligns[j] = align |
| 748 | end |
| 749 | if found and #aligns > 0 then |
| 750 | -- set previous row to head and adjust aligns |
| 751 | local prevrow = node.c[i - 1] |
| 752 | if prevrow and prevrow.t == "row" then |
| 753 | prevrow.head = true |
| 754 | for k=1,#prevrow.c do |
| 755 | -- set head on cells too |
| 756 | prevrow.c[k].head = true |
| 757 | if aligns[k] ~= "default" then |
| 758 | prevrow.c[k].align = aligns[k] |
| 759 | end |
| 760 | end |
| 761 | end |
| 762 | table.remove(node.c, i) -- remove sep line |
| 763 | -- we don't need to increment i because we removed ith elt |
| 764 | else |
| 765 | if #aligns > 0 then |
| 766 | for l=1,#node.c[i].c do |
| 767 | if aligns[l] ~= "default" then |
| 768 | node.c[i].c[l].align = aligns[l] |
| 769 | end |
| 770 | end |
| 771 | end |
| 772 | i = i + 1 |
| 773 | end |
| 774 | end |
| 775 | end |
| 776 | |
| 777 | elseif tag == "code_block" then |
| 778 | if has_children(node) then |
| 779 | if node.c[1].t == "code_language" then |
| 780 | node.lang = node.c[1].s |
| 781 | table.remove(node.c, 1) |
| 782 | elseif node.c[1].t == "raw_format" then |
| 783 | local fmt = node.c[1].s:sub(2) |
| 784 | table.remove(node.c, 1) |
| 785 | node.t = "raw_block" |
| 786 | node.format = fmt |
| 787 | end |
| 788 | end |
| 789 | node.s = get_string_content(node) |
| 790 | node.c = nil |
| 791 | |
| 792 | elseif find(tag, "^list_item") then |
| 793 | node.t = "list_item" |
| 794 | node.endidx = matchidx -- for tight/loose determination |
| 795 | |
| 796 | if node.style_marker == "|:" then |
| 797 | make_definition_list_item(node) |
| 798 | end |
| 799 | |
| 800 | if node.style_marker == "|X" and has_children(node) then |
| 801 | if node.c[1].t == "checkbox_checked" then |
| 802 | node.checkbox = "checked" |
| 803 | table.remove(node.c, 1) |
| 804 | elseif node.c[1].t == "checkbox_unchecked" then |
| 805 | node.checkbox = "unchecked" |
| 806 | table.remove(node.c, 1) |
| 807 | end |
| 808 | end |
| 809 | |
| 810 | node.style_marker = nil |
| 811 | |
| 812 | elseif tag == "inline_math" then |
| 813 | node.t = "math" |
| 814 | node.s = get_verbatim_content(node) |
| 815 | node.c = nil |
| 816 | node.display = false |
| 817 | node.attr = new_attributes{class = "math inline"} |
| 818 | |
| 819 | elseif tag == "display_math" then |
| 820 | node.t = "math" |
| 821 | node.s = get_verbatim_content(node) |
| 822 | node.c = nil |
| 823 | node.display = true |
| 824 | node.attr = new_attributes{class = "math display"} |
| 825 | |
| 826 | elseif tag == "imagetext" then |
| 827 | node.t = "image" |
| 828 | |
| 829 | elseif tag == "linktext" then |
| 830 | node.t = "link" |
| 831 | |
| 832 | elseif tag == "div" then |
| 833 | node.c = node.c or {} |
| 834 | if node.c[1] and node.c[1].t == "class" then |
| 835 | node.attr = new_attributes(node.attr) |
| 836 | insert_attribute(node.attr, "class", get_string_content(node.c[1])) |
| 837 | table.remove(node.c, 1) |
| 838 | end |
| 839 | |
| 840 | elseif tag == "verbatim" then |
| 841 | node.s = get_verbatim_content(node) |
| 842 | node.c = nil |
| 843 | |
| 844 | elseif tag == "url" then |
| 845 | node.destination = get_string_content(node) |
| 846 | |
| 847 | elseif tag == "email" then |
| 848 | node.destination = "mailto:" .. get_string_content(node) |
| 849 | |
| 850 | elseif tag == "caption" then |
| 851 | local tip = containers[#containers] |
| 852 | local prevnode = has_children(tip) and tip.c[#tip.c] |
| 853 | if prevnode and prevnode.t == "table" then |
| 854 | -- move caption in table node |
| 855 | table.insert(prevnode.c, 1, node) |
| 856 | else |
| 857 | warn({ message = "Ignoring caption without preceding table", |
| 858 | pos = startpos }) |
| 859 | end |
| 860 | return |
| 861 | |
| 862 | elseif tag == "heading" then |
| 863 | local heading_str = |
| 864 | get_string_content(node):gsub("^%s+",""):gsub("%s+$","") |
| 865 | if not node.attr then |
| 866 | node.attr = new_attributes{} |
| 867 | end |
| 868 | if not node.attr.id then -- generate id attribute from heading |
| 869 | insert_attribute(node.attr, "id", get_identifier(heading_str)) |
| 870 | end |
| 871 | -- insert into references unless there's a same-named one already: |
| 872 | if not references[heading_str] then |
| 873 | references[heading_str] = |
| 874 | new_node("reference") |
| 875 | references[heading_str].destination = "#" .. node.attr.id |
| 876 | end |
| 877 | |
| 878 | elseif tag == "destination" then |
| 879 | local tip = containers[#containers] |
| 880 | local prevnode = has_children(tip) and tip.c[#tip.c] |
| 881 | assert(prevnode and (prevnode.t == "image" or prevnode.t == "link"), |
| 882 | "destination with no preceding link or image") |
| 883 | prevnode.destination = get_string_content(node):gsub("\r?\n", "") |
| 884 | return -- do not put on container stack |
| 885 | |
| 886 | elseif tag == "reference" then |
| 887 | local tip = containers[#containers] |
| 888 | local prevnode = has_children(tip) and tip.c[#tip.c] |
| 889 | assert(prevnode and (prevnode.t == "image" or prevnode.t == "link"), |
| 890 | "reference with no preceding link or image") |
| 891 | if has_children(node) then |
| 892 | prevnode.reference = get_string_content(node):gsub("\r?\n", " ") |
| 893 | else |
| 894 | prevnode.reference = get_string_content(prevnode):gsub("\r?\n", " ") |
| 895 | end |
| 896 | return -- do not put on container stack |
| 897 | end |
| 898 | |
| 899 | add_child_to_tip(containers, node) |
| 900 | else |
| 901 | assert(false, "unmatched " .. annot .. " encountered at byte " .. |
| 902 | startpos) |
| 903 | return |
| 904 | end |
| 905 | else |
| 906 | -- process leaf node: |
| 907 | -- * add position info |
| 908 | -- * special handling depending on tag type |
| 909 | -- * add node as child of container at end of containers stack |
| 910 | local node = new_node(tag) |
| 911 | add_block_attributes(node) |
| 912 | set_startpos(node, startpos) |
| 913 | set_endpos(node, endpos) |
| 914 | |
| 915 | -- special handling: |
| 916 | if tag == "softbreak" then |
| 917 | node.s = nil |
| 918 | elseif tag == "reference_key" then |
| 919 | node.s = sub(subject, startpos + 1, endpos - 1) |
| 920 | elseif tag == "footnote_reference" then |
| 921 | node.s = sub(subject, startpos + 2, endpos - 1) |
| 922 | elseif tag == "symbol" then |
| 923 | node.alias = sub(subject, startpos + 1, endpos - 1) |
| 924 | elseif tag == "raw_format" then |
| 925 | local tip = containers[#containers] |
| 926 | local prevnode = has_children(tip) and tip.c[#tip.c] |
| 927 | if prevnode and prevnode.t == "verbatim" then |
| 928 | local s = get_string_content(prevnode) |
| 929 | prevnode.t = "raw_inline" |
| 930 | prevnode.s = s |
| 931 | prevnode.c = nil |
| 932 | prevnode.format = sub(subject, startpos + 2, endpos - 1) |
| 933 | return -- don't add this node to containers |
| 934 | else |
| 935 | node.s = sub(subject, startpos, endpos) |
| 936 | end |
| 937 | else |
| 938 | node.s = sub(subject, startpos, endpos) |
| 939 | end |
| 940 | |
| 941 | add_child_to_tip(containers, node) |
| 942 | |
| 943 | end |
| 944 | end |
| 945 | |
| 946 | local doc = new_node("doc") |
| 947 | local containers = {doc} |
| 948 | for sp, ep, annot in parser:events() do |
| 949 | handle_match(containers, sp, ep, annot) |
| 950 | end |
| 951 | -- close any open containers |
| 952 | while #containers > 1 do |
| 953 | local node = table.remove(containers) |
| 954 | add_child_to_tip(containers, node) |
| 955 | -- note: doc container doesn't have pos, so we check: |
| 956 | if sourceposmap and containers[#containers].pos then |
| 957 | containers[#containers].pos[2] = node.pos[2] |
| 958 | end |
| 959 | end |
| 960 | doc = add_sections(doc) |
| 961 | |
| 962 | doc.references = references |
| 963 | doc.footnotes = footnotes |
| 964 | |
| 965 | return doc |
| 966 | end |
| 967 | |
| 968 | local function render_node(node, handle, indent) |
| 969 | indent = indent or 0 |
| 970 | handle:write(rep(" ", indent)) |
| 971 | if indent > 128 then |
| 972 | handle:write("(((DEEPLY NESTED CONTENT OMITTED)))\n") |
| 973 | return |
| 974 | end |
| 975 | |
| 976 | if node.t then |
| 977 | handle:write(node.t) |
| 978 | if node.pos then |
| 979 | handle:write(format(" (%s-%s)", node.pos[1], node.pos[2])) |
| 980 | end |
| 981 | for k,v in pairs(node) do |
| 982 | if type(k) == "string" and k ~= "children" and |
| 983 | k ~= "tag" and k ~= "pos" and k ~= "attr" and |
| 984 | k ~= "references" and k ~= "footnotes" then |
| 985 | handle:write(format(" %s=%q", k, tostring(v))) |
| 986 | end |
| 987 | end |
| 988 | if node.attr then |
| 989 | for k,v in pairs(node.attr) do |
| 990 | handle:write(format(" %s=%q", k, v)) |
| 991 | end |
| 992 | end |
| 993 | else |
| 994 | io.stderr:write("Encountered node without tag:\n" .. |
| 995 | require'inspect'(node)) |
| 996 | os.exit(1) |
| 997 | end |
| 998 | handle:write("\n") |
| 999 | if node.c then |
| 1000 | for _,v in ipairs(node.c) do |
| 1001 | render_node(v, handle, indent + 2) |
| 1002 | end |
| 1003 | end |
| 1004 | end |
| 1005 | |
| 1006 | --- Render an AST in human-readable form, with indentation |
| 1007 | --- showing the hierarchy. |
| 1008 | --- @param doc (AST) djot AST |
| 1009 | --- @param handle (StringHandle) handle to which to write content |
| 1010 | local function render(doc, handle) |
| 1011 | render_node(doc, handle, 0) |
| 1012 | if next(doc.references) ~= nil then |
| 1013 | handle:write("references\n") |
| 1014 | for k,v in pairs(doc.references) do |
| 1015 | handle:write(format(" [%q] =\n", k)) |
| 1016 | render_node(v, handle, 4) |
| 1017 | end |
| 1018 | end |
| 1019 | if next(doc.footnotes) ~= nil then |
| 1020 | handle:write("footnotes\n") |
| 1021 | for k,v in pairs(doc.footnotes) do |
| 1022 | handle:write(format(" [%q] =\n", k)) |
| 1023 | render_node(v, handle, 4) |
| 1024 | end |
| 1025 | end |
| 1026 | end |
| 1027 | |
| 1028 | --- @export |
| 1029 | return { to_ast = to_ast, |
| 1030 | render = render, |
| 1031 | insert_attribute = insert_attribute, |
| 1032 | copy_attributes = copy_attributes, |
| 1033 | new_attributes = new_attributes, |
| 1034 | new_node = new_node, |
| 1035 | add_child = add_child, |
| 1036 | has_children = has_children } |