size: 30 KiB

1--- @module 'djot.ast'
2--- Construct an AST for a djot document.
3
4--- @class Attributes
5--- @field class? string
6--- @field id? string
7
8--- @class AST
9--- @field t string tag for the node
10--- @field s? string text for the node
11--- @field c AST[] child node
12--- @field alias string
13--- @field level integer
14--- @field startidx integer
15--- @field startmarker string
16--- @field styles table
17--- @field style_marker string
18--- @field attr Attributes
19--- @field display boolean
20--- @field references table
21--- @field footnotes table
22--- @field pos? string[]
23--- @field destination? string[]
24
25if not utf8 then -- if not lua 5.3 or higher...
26 -- this is needed for the __pairs metamethod, used below
27 -- The following code is derived from the compat53 rock:
28 -- override pairs
29 local oldpairs = pairs
30 pairs = function(t)
31 local mt = getmetatable(t)
32 if type(mt) == "table" and type(mt.__pairs) == "function" then
33 return mt.__pairs(t)
34 else
35 return oldpairs(t)
36 end
37 end
38end
39local unpack = unpack or table.unpack
40
41local find, lower, sub, rep, format =
42 string.find, string.lower, string.sub, string.rep, string.format
43
44-- Creates a sparse array whose indices are byte positions.
45-- sourcepos_map[bytepos] = "line:column:charpos"
46local function make_sourcepos_map(input)
47 local sourcepos_map = {line = {}, col = {}, charpos = {}}
48 local line = 1
49 local col = 0
50 local charpos = 0
51 local bytepos = 1
52
53 local byte = string.byte(input, bytepos)
54 while byte do
55 col = col + 1
56 charpos = charpos + 1
57 -- get next code point:
58 local newbytepos
59 if byte < 0xC0 then
60 newbytepos = bytepos + 1
61 elseif byte < 0xE0 then
62 newbytepos = bytepos + 2
63 elseif byte < 0xF0 then
64 newbytepos = bytepos + 3
65 else
66 newbytepos = bytepos + 4
67 end
68 while bytepos < newbytepos do
69 sourcepos_map.line[bytepos] = line
70 sourcepos_map.col[bytepos] = col
71 sourcepos_map.charpos[bytepos] = charpos
72 bytepos = bytepos + 1
73 end
74 if byte == 10 then -- newline
75 line = line + 1
76 col = 0
77 end
78 byte = string.byte(input, bytepos)
79 end
80
81 sourcepos_map.line[bytepos] = line + 1
82 sourcepos_map.col[bytepos] = 1
83 sourcepos_map.charpos[bytepos] = charpos + 1
84
85 return sourcepos_map
86end
87
88local function add_string_content(node, buffer)
89 if node.s then
90 buffer[#buffer + 1] = node.s
91 elseif node.t == "softbreak" then
92 buffer[#buffer + 1] = "\n"
93 elseif node.c then
94 for i=1, #node.c do
95 add_string_content(node.c[i], buffer)
96 end
97 end
98end
99
100local function get_string_content(node)
101 local buffer = {};
102 add_string_content(node, buffer)
103 return table.concat(buffer)
104end
105
106local roman_digits = {
107 i = 1,
108 v = 5,
109 x = 10,
110 l = 50,
111 c = 100,
112 d = 500,
113 m = 1000 }
114
115local function roman_to_number(s)
116 -- go backwards through the digits
117 local total = 0
118 local prevdigit = 0
119 local i=#s
120 while i > 0 do
121 local c = lower(sub(s,i,i))
122 local n = roman_digits[c]
123 assert(n ~= nil, "Encountered bad character in roman numeral " .. s)
124 if n < prevdigit then -- e.g. ix
125 total = total - n
126 else
127 total = total + n
128 end
129 prevdigit = n
130 i = i - 1
131 end
132 return total
133end
134
135local function get_list_start(marker, style)
136 local numtype = string.gsub(style, "%p", "")
137 local s = string.gsub(marker, "%p", "")
138 if numtype == "1" then
139 return tonumber(s)
140 elseif numtype == "A" then
141 return (string.byte(s) - string.byte("A") + 1)
142 elseif numtype == "a" then
143 return (string.byte(s) - string.byte("a") + 1)
144 elseif numtype == "I" then
145 return roman_to_number(s)
146 elseif numtype == "i" then
147 return roman_to_number(s)
148 elseif numtype == "" then
149 return nil
150 end
151end
152
153local ignorable = {
154 image_marker = true,
155 escape = true,
156 blankline = true
157}
158
159local function sortedpairs(compare_function, to_displaykey)
160 return function(tbl)
161 local keys = {}
162 local k = nil
163 k = next(tbl, k)
164 while k do
165 keys[#keys + 1] = k
166 k = next(tbl, k)
167 end
168 table.sort(keys, compare_function)
169 local keyindex = 0
170 local function ordered_next(tabl,_)
171 keyindex = keyindex + 1
172 local key = keys[keyindex]
173 -- use canonical names
174 local displaykey = to_displaykey(key)
175 if key then
176 return displaykey, tabl[key]
177 else
178 return nil
179 end
180 end
181 -- Return an iterator function, the table, starting point
182 return ordered_next, tbl, nil
183 end
184end
185
186-- provide children, tag, and text as aliases of c, t, s,
187-- which we use above for better performance:
188local mt = {}
189local special = {
190 children = 'c',
191 text = 's',
192 tag = 't' }
193local displaykeys = {
194 c = 'children',
195 s = 'text',
196 t = 'tag' }
197mt.__index = function(table, key)
198 local k = special[key]
199 if k then
200 return rawget(table, k)
201 else
202 return rawget(table, key)
203 end
204end
205mt.__newindex = function(table, key, val)
206 local k = special[key]
207 if k then
208 rawset(table, k, val)
209 else
210 rawset(table, key, val)
211 end
212end
213mt.__pairs = sortedpairs(function(a,b)
214 if a == "t" then -- t is always first
215 return true
216 elseif a == "s" then -- s is always second
217 return (b ~= "t")
218 elseif a == "c" then -- c only before references, footnotes
219 return (b == "references" or b == "footnotes")
220 elseif a == "references" then
221 return (b == "footnotes")
222 elseif a == "footnotes" then
223 return false
224 elseif b == "t" or b == "s" then
225 return false
226 elseif b == "c" or b == "references" or b == "footnotes" then
227 return true
228 else
229 return (a < b)
230 end
231 end, function(k) return displaykeys[k] or k end)
232
233
234--- Create a new AST node.
235--- @param tag (string) tag for the node
236--- @return (AST) node (table)
237local function new_node(tag)
238 local node = { t = tag, c = nil }
239 setmetatable(node, mt)
240 return node
241end
242
243--- Add `child` as a child of `node`.
244--- @param node (AST) node parent node
245--- @param child (AST) node child node
246local function add_child(node, child)
247 if (not node.c) then
248 node.c = {child}
249 else
250 node.c[#node.c + 1] = child
251 end
252end
253
254--- Returns true if `node` has children.
255--- @param node (AST) node to check
256--- @return (boolean) true if node has children
257local function has_children(node)
258 return (node.c and #node.c > 0)
259end
260
261--- Returns an attributes object.
262--- @param tbl (Attributes?) table of attributes and values
263--- @return (Attributes) attributes object (table including special metatable for
264--- deterministic order of iteration)
265local function new_attributes(tbl)
266 local attr = tbl or {}
267 -- ensure deterministic order of iteration
268 setmetatable(attr, {__pairs = sortedpairs(function(a,b) return a < b end,
269 function(k) return k end)})
270 return attr
271end
272
273--- Insert an attribute into an attributes object.
274--- @param attr (Attributes)
275--- @param key (string) key of new attribute
276--- @param val (string) value of new attribute
277local function insert_attribute(attr, key, val)
278 val = val:gsub("%s+", " ") -- normalize spaces
279 if key == "class" then
280 if attr.class then
281 attr.class = attr.class .. " " .. val
282 else
283 attr.class = val
284 end
285 else
286 attr[key] = val
287 end
288end
289
290--- Copy attributes from `source` to `target`.
291--- @param target (Attributes)
292--- @param source (table) associating keys and values
293local function copy_attributes(target, source)
294 if source then
295 for k,v in pairs(source) do
296 insert_attribute(target, k, v)
297 end
298 end
299end
300
301--- @param targetnode (AST)
302--- @param cs (AST)
303local function insert_attributes_from_nodes(targetnode, cs)
304 targetnode.attr = targetnode.attr or new_attributes()
305 local i=1
306 while i <= #cs do
307 local x, y = cs[i].t, cs[i].s
308 if x == "id" or x == "class" then
309 insert_attribute(targetnode.attr, x, y)
310 elseif x == "key" then
311 local val = {}
312 while cs[i + 1] and cs[i + 1].t == "value" do
313 val[#val + 1] = cs[i + 1].s:gsub("\\(%p)", "%1")
314 -- resolve backslash escapes
315 i = i + 1
316 end
317 insert_attribute(targetnode.attr, y, table.concat(val,"\n"))
318 end
319 i = i + 1
320 end
321end
322
323--- @param node (AST)
324local function make_definition_list_item(node)
325 node.t = "definition_list_item"
326 if not has_children(node) then
327 node.c = {}
328 end
329 if node.c[1] and node.c[1].t == "para" then
330 node.c[1].t = "term"
331 else
332 table.insert(node.c, 1, new_node("term"))
333 end
334 if node.c[2] then
335 local defn = new_node("definition")
336 defn.c = {}
337 for i=2,#node.c do
338 defn.c[#defn.c + 1] = node.c[i]
339 node.c[i] = nil
340 end
341 node.c[2] = defn
342 end
343end
344
345local function resolve_style(list)
346 local style = nil
347 for k,i in pairs(list.styles) do
348 if not style or i < style.priority then
349 style = {name = k, priority = i}
350 end
351 end
352 list.style = style.name
353 list.styles = nil
354 list.start = get_list_start(list.startmarker, list.style)
355 list.startmarker = nil
356end
357
358local function get_verbatim_content(node)
359 local s = get_string_content(node)
360 -- trim space next to ` at beginning or end
361 if find(s, "^ +`") then
362 s = s:sub(2)
363 end
364 if find(s, "` +$") then
365 s = s:sub(1, #s - 1)
366 end
367 return s
368end
369
370local function add_sections(ast)
371 if not has_children(ast) then
372 return ast
373 end
374 local newast = new_node("doc")
375 local secs = { {sec = newast, level = 0 } }
376 for _,node in ipairs(ast.c) do
377 if node.t == "heading" then
378 local level = node.level
379 local curlevel = (#secs > 0 and secs[#secs].level) or 0
380 if curlevel >= level then
381 while secs[#secs].level >= level do
382 local sec = table.remove(secs).sec
383 add_child(secs[#secs].sec, sec)
384 end
385 end
386 -- now we know: curlevel < level
387 local newsec = new_node("section")
388 newsec.attr = new_attributes{id = node.attr.id}
389 node.attr.id = nil
390 add_child(newsec, node)
391 secs[#secs + 1] = {sec = newsec, level = level}
392 else
393 add_child(secs[#secs].sec, node)
394 end
395 end
396 while #secs > 1 do
397 local sec = table.remove(secs).sec
398 add_child(secs[#secs].sec, sec)
399 end
400 assert(secs[1].sec == newast)
401 return newast
402end
403
404
405--- Create an abstract syntax tree based on an event
406--- stream and references.
407--- @param parser (Parser) djot streaming parser
408--- @param sourcepos (boolean) if true, include source positions
409--- @return table representing the AST
410local function to_ast(parser, sourcepos)
411 local subject = parser.subject
412 local warn = parser.warn
413 if not warn then
414 warn = function() end
415 end
416 local sourceposmap
417 if sourcepos then
418 sourceposmap = make_sourcepos_map(subject)
419 end
420 local references = {}
421 local footnotes = {}
422 local identifiers = {} -- identifiers used (to ensure uniqueness)
423
424 -- generate auto identifier for heading
425 local function get_identifier(s)
426 local base = s:gsub("[][~!@#$%^&*(){}`,.<>\\|=+/?]","")
427 :gsub("^%s+",""):gsub("%s+$","")
428 :gsub("%s+","-")
429 local i = 0
430 local ident = base
431 -- generate unique id
432 while ident == "" or identifiers[ident] do
433 i = i + 1
434 if base == "" then
435 base = "s"
436 end
437 ident = base .. "-" .. tostring(i)
438 end
439 identifiers[ident] = true
440 return ident
441 end
442
443 local function format_sourcepos(bytepos)
444 if bytepos then
445 return string.format("%d:%d:%d", sourceposmap.line[bytepos],
446 sourceposmap.col[bytepos], sourceposmap.charpos[bytepos])
447 end
448 end
449
450 local function set_startpos(node, pos)
451 if sourceposmap then
452 local sp = format_sourcepos(pos)
453 if node.pos then
454 node.pos[1] = sp
455 else
456 node.pos = {sp, nil}
457 end
458 end
459 end
460
461 local function set_endpos(node, pos)
462 if sourceposmap and node.pos then
463 local ep = format_sourcepos(pos)
464 if node.pos then
465 node.pos[2] = ep
466 else
467 node.pos = {nil, ep}
468 end
469 end
470 end
471
472 local blocktag = {
473 heading = true,
474 div = true,
475 list = true,
476 list_item = true,
477 code_block = true,
478 para = true,
479 blockquote = true,
480 table = true,
481 thematic_break = true,
482 raw_block = true,
483 reference_definition = true,
484 footnote = true
485 }
486
487 local block_attributes = nil
488 local function add_block_attributes(node)
489 if block_attributes and blocktag[node.t:gsub("%|.*","")] then
490 for i=1,#block_attributes do
491 insert_attributes_from_nodes(node, block_attributes[i])
492 end
493 -- add to identifiers table so we don't get duplicate auto-generated ids
494 if node.attr and node.attr.id then
495 identifiers[node.attr.id] = true
496 end
497 block_attributes = nil
498 end
499 end
500
501 -- two variables used for tight/loose list determination:
502 local tags = {} -- used to keep track of blank lines
503 local matchidx = 0 -- keep track of the index of the match
504
505 local function is_tight(startidx, endidx, is_last_item)
506 -- see if there are any blank lines between blocks in a list item.
507 local blanklines = 0
508 -- we don't care about blank lines at very end of list
509 if is_last_item then
510 while tags[endidx] == "blankline" or tags[endidx] == "-list_item" do
511 endidx = endidx - 1
512 end
513 end
514 for i=startidx, endidx do
515 local tag = tags[i]
516 if tag == "blankline" then
517 if not ((string.find(tags[i+1], "%+list_item") or
518 (string.find(tags[i+1], "%-list_item") and
519 (is_last_item or
520 string.find(tags[i+2], "%-list_item"))))) then
521 -- don't count blank lines before list starts
522 -- don't count blank lines at end of nested lists or end of last item
523 blanklines = blanklines + 1
524 end
525 end
526 end
527 return (blanklines == 0)
528 end
529
530 local function add_child_to_tip(containers, child)
531 if containers[#containers].t == "list" and
532 not (child.t == "list_item" or child.t == "definition_list_item") then
533 -- close list
534 local oldlist = table.remove(containers)
535 add_child_to_tip(containers, oldlist)
536 end
537 if child.t == "list" then
538 if child.pos then
539 child.pos[2] = child.c[#child.c].pos[2]
540 end
541 -- calculate tightness (TODO not quite right)
542 local tight = true
543 for i=1,#child.c do
544 tight = tight and is_tight(child.c[i].startidx,
545 child.c[i].endidx, i == #child.c)
546 child.c[i].startidx = nil
547 child.c[i].endidx = nil
548 end
549 child.tight = tight
550
551 -- resolve style if still ambiguous
552 resolve_style(child)
553 end
554 add_child(containers[#containers], child)
555 end
556
557
558 -- process a match:
559 -- containers is the stack of containers, with #container
560 -- being the one that would receive a new node
561 local function handle_match(containers, startpos, endpos, annot)
562 matchidx = matchidx + 1
563 local mod, tag = string.match(annot, "^([-+]?)(.+)")
564 tags[matchidx] = annot
565 if ignorable[tag] then
566 return
567 end
568 if mod == "+" then
569 -- process open match:
570 -- * open a new node and put it at end of containers stack
571 -- * depending on the tag name, do other things
572 local node = new_node(tag)
573 set_startpos(node, startpos)
574
575 -- add block attributes if any have accumulated:
576 add_block_attributes(node)
577
578 if tag == "heading" then
579 node.level = (endpos - startpos) + 1
580
581 elseif find(tag, "^list_item") then
582 node.t = "list_item"
583 node.startidx = matchidx -- for tight/loose determination
584 local _, _, style_marker = string.find(tag, "(%|.*)")
585 local styles = {}
586 if style_marker then
587 local i=1
588 for sty in string.gmatch(style_marker, "%|([^%|%]]*)") do
589 styles[sty] = i
590 i = i + 1
591 end
592 end
593 node.style_marker = style_marker
594
595 local marker = string.match(subject, "^%S+", startpos)
596
597 -- adjust container stack so that the tip can accept this
598 -- kind of list item, adding a list if needed and possibly
599 -- closing an existing list
600
601 local tip = containers[#containers]
602 if tip.t ~= "list" then
603 -- container is not a list ; add one
604 local list = new_node("list")
605 set_startpos(list, startpos)
606 list.styles = styles
607 list.attr = node.attr
608 list.startmarker = marker
609 node.attr = nil
610 containers[#containers + 1] = list
611 else
612 -- it's a list, but is it the right kind?
613 local matched_styles = {}
614 local has_match = false
615 for k,_ in pairs(styles) do
616 if tip.styles[k] then
617 has_match = true
618 matched_styles[k] = styles[k]
619 end
620 end
621 if has_match then
622 -- yes, list can accept this item
623 tip.styles = matched_styles
624 else
625 -- no, list can't accept this item ; close it
626 local oldlist = table.remove(containers)
627 add_child_to_tip(containers, oldlist)
628 -- add a new sibling list node with the right style
629 local list = new_node("list")
630 set_startpos(list, startpos)
631 list.styles = styles
632 list.attr = node.attr
633 list.startmarker = marker
634 node.attr = nil
635 containers[#containers + 1] = list
636 end
637 end
638
639
640 end
641
642 -- add to container stack
643 containers[#containers + 1] = node
644
645 elseif mod == "-" then
646 -- process close match:
647 -- * check end of containers stack; if tag matches, add
648 -- end position, pop the item off the stack, and add
649 -- it as a child of the next container on the stack
650 -- * if it doesn't match, issue a warning and ignore this tag
651
652 if containers[#containers].t == "list" then
653 local listnode = table.remove(containers)
654 add_child_to_tip(containers, listnode)
655 end
656
657 if tag == containers[#containers].t then
658 local node = table.remove(containers)
659 set_endpos(node, endpos)
660
661 if node.t == "block_attributes" then
662 if not block_attributes then
663 block_attributes = {}
664 end
665 block_attributes[#block_attributes + 1] = node.c
666 return -- we don't add this to parent; instead we store
667 -- the block attributes and add them to the next block
668
669 elseif node.t == "attributes" then
670 -- parse attributes, add to last node
671 local tip = containers[#containers]
672 --- @type AST|false
673 local prevnode = has_children(tip) and tip.c[#tip.c]
674 if prevnode then
675 local endswithspace = false
676 if prevnode.t == "str" then
677 -- split off last consecutive word of string
678 -- to which to attach attributes
679 local lastwordpos = string.find(prevnode.s, "[^%s]+$")
680 if not lastwordpos then
681 endswithspace = true
682 elseif lastwordpos > 1 then
683 local newnode = new_node("str")
684 newnode.s = sub(prevnode.s, lastwordpos, -1)
685 prevnode.s = sub(prevnode.s, 1, lastwordpos - 1)
686 add_child_to_tip(containers, newnode)
687 prevnode = newnode
688 end
689 end
690 if has_children(node) and not endswithspace then
691 insert_attributes_from_nodes(prevnode, node.c)
692 else
693 warn({message = "Ignoring unattached attribute", pos = startpos})
694 end
695 else
696 warn({message = "Ignoring unattached attribute", pos = startpos})
697 end
698 return -- don't add the attribute node to the tree
699
700 elseif tag == "reference_definition" then
701 local dest = ""
702 local key
703 for i=1,#node.c do
704 if node.c[i].t == "reference_key" then
705 key = node.c[i].s
706 end
707 if node.c[i].t == "reference_value" then
708 dest = dest .. node.c[i].s
709 end
710 end
711 references[key] = new_node("reference")
712 references[key].destination = dest
713 if node.attr then
714 references[key].attr = node.attr
715 end
716 return -- don't include in tree
717
718 elseif tag == "footnote" then
719 local label
720 if has_children(node) and node.c[1].t == "note_label" then
721 label = node.c[1].s
722 table.remove(node.c, 1)
723 end
724 if label then
725 footnotes[label] = node
726 end
727 return -- don't include in tree
728
729
730 elseif tag == "table" then
731
732 -- Children are the rows. Look for a separator line:
733 -- if found, make the preceding rows headings
734 -- and set attributes for column alignments on the table.
735
736 local i=1
737 local aligns = {}
738 while i <= #node.c do
739 local found, align, _
740 if node.c[i].t == "row" then
741 local row = node.c[i].c
742 for j=1,#row do
743 found, _, align = find(row[j].t, "^separator_(.*)")
744 if not found then
745 break
746 end
747 aligns[j] = align
748 end
749 if found and #aligns > 0 then
750 -- set previous row to head and adjust aligns
751 local prevrow = node.c[i - 1]
752 if prevrow and prevrow.t == "row" then
753 prevrow.head = true
754 for k=1,#prevrow.c do
755 -- set head on cells too
756 prevrow.c[k].head = true
757 if aligns[k] ~= "default" then
758 prevrow.c[k].align = aligns[k]
759 end
760 end
761 end
762 table.remove(node.c, i) -- remove sep line
763 -- we don't need to increment i because we removed ith elt
764 else
765 if #aligns > 0 then
766 for l=1,#node.c[i].c do
767 if aligns[l] ~= "default" then
768 node.c[i].c[l].align = aligns[l]
769 end
770 end
771 end
772 i = i + 1
773 end
774 end
775 end
776
777 elseif tag == "code_block" then
778 if has_children(node) then
779 if node.c[1].t == "code_language" then
780 node.lang = node.c[1].s
781 table.remove(node.c, 1)
782 elseif node.c[1].t == "raw_format" then
783 local fmt = node.c[1].s:sub(2)
784 table.remove(node.c, 1)
785 node.t = "raw_block"
786 node.format = fmt
787 end
788 end
789 node.s = get_string_content(node)
790 node.c = nil
791
792 elseif find(tag, "^list_item") then
793 node.t = "list_item"
794 node.endidx = matchidx -- for tight/loose determination
795
796 if node.style_marker == "|:" then
797 make_definition_list_item(node)
798 end
799
800 if node.style_marker == "|X" and has_children(node) then
801 if node.c[1].t == "checkbox_checked" then
802 node.checkbox = "checked"
803 table.remove(node.c, 1)
804 elseif node.c[1].t == "checkbox_unchecked" then
805 node.checkbox = "unchecked"
806 table.remove(node.c, 1)
807 end
808 end
809
810 node.style_marker = nil
811
812 elseif tag == "inline_math" then
813 node.t = "math"
814 node.s = get_verbatim_content(node)
815 node.c = nil
816 node.display = false
817 node.attr = new_attributes{class = "math inline"}
818
819 elseif tag == "display_math" then
820 node.t = "math"
821 node.s = get_verbatim_content(node)
822 node.c = nil
823 node.display = true
824 node.attr = new_attributes{class = "math display"}
825
826 elseif tag == "imagetext" then
827 node.t = "image"
828
829 elseif tag == "linktext" then
830 node.t = "link"
831
832 elseif tag == "div" then
833 node.c = node.c or {}
834 if node.c[1] and node.c[1].t == "class" then
835 node.attr = new_attributes(node.attr)
836 insert_attribute(node.attr, "class", get_string_content(node.c[1]))
837 table.remove(node.c, 1)
838 end
839
840 elseif tag == "verbatim" then
841 node.s = get_verbatim_content(node)
842 node.c = nil
843
844 elseif tag == "url" then
845 node.destination = get_string_content(node)
846
847 elseif tag == "email" then
848 node.destination = "mailto:" .. get_string_content(node)
849
850 elseif tag == "caption" then
851 local tip = containers[#containers]
852 local prevnode = has_children(tip) and tip.c[#tip.c]
853 if prevnode and prevnode.t == "table" then
854 -- move caption in table node
855 table.insert(prevnode.c, 1, node)
856 else
857 warn({ message = "Ignoring caption without preceding table",
858 pos = startpos })
859 end
860 return
861
862 elseif tag == "heading" then
863 local heading_str =
864 get_string_content(node):gsub("^%s+",""):gsub("%s+$","")
865 if not node.attr then
866 node.attr = new_attributes{}
867 end
868 if not node.attr.id then -- generate id attribute from heading
869 insert_attribute(node.attr, "id", get_identifier(heading_str))
870 end
871 -- insert into references unless there's a same-named one already:
872 if not references[heading_str] then
873 references[heading_str] =
874 new_node("reference")
875 references[heading_str].destination = "#" .. node.attr.id
876 end
877
878 elseif tag == "destination" then
879 local tip = containers[#containers]
880 local prevnode = has_children(tip) and tip.c[#tip.c]
881 assert(prevnode and (prevnode.t == "image" or prevnode.t == "link"),
882 "destination with no preceding link or image")
883 prevnode.destination = get_string_content(node):gsub("\r?\n", "")
884 return -- do not put on container stack
885
886 elseif tag == "reference" then
887 local tip = containers[#containers]
888 local prevnode = has_children(tip) and tip.c[#tip.c]
889 assert(prevnode and (prevnode.t == "image" or prevnode.t == "link"),
890 "reference with no preceding link or image")
891 if has_children(node) then
892 prevnode.reference = get_string_content(node):gsub("\r?\n", " ")
893 else
894 prevnode.reference = get_string_content(prevnode):gsub("\r?\n", " ")
895 end
896 return -- do not put on container stack
897 end
898
899 add_child_to_tip(containers, node)
900 else
901 assert(false, "unmatched " .. annot .. " encountered at byte " ..
902 startpos)
903 return
904 end
905 else
906 -- process leaf node:
907 -- * add position info
908 -- * special handling depending on tag type
909 -- * add node as child of container at end of containers stack
910 local node = new_node(tag)
911 add_block_attributes(node)
912 set_startpos(node, startpos)
913 set_endpos(node, endpos)
914
915 -- special handling:
916 if tag == "softbreak" then
917 node.s = nil
918 elseif tag == "reference_key" then
919 node.s = sub(subject, startpos + 1, endpos - 1)
920 elseif tag == "footnote_reference" then
921 node.s = sub(subject, startpos + 2, endpos - 1)
922 elseif tag == "symbol" then
923 node.alias = sub(subject, startpos + 1, endpos - 1)
924 elseif tag == "raw_format" then
925 local tip = containers[#containers]
926 local prevnode = has_children(tip) and tip.c[#tip.c]
927 if prevnode and prevnode.t == "verbatim" then
928 local s = get_string_content(prevnode)
929 prevnode.t = "raw_inline"
930 prevnode.s = s
931 prevnode.c = nil
932 prevnode.format = sub(subject, startpos + 2, endpos - 1)
933 return -- don't add this node to containers
934 else
935 node.s = sub(subject, startpos, endpos)
936 end
937 else
938 node.s = sub(subject, startpos, endpos)
939 end
940
941 add_child_to_tip(containers, node)
942
943 end
944 end
945
946 local doc = new_node("doc")
947 local containers = {doc}
948 for sp, ep, annot in parser:events() do
949 handle_match(containers, sp, ep, annot)
950 end
951 -- close any open containers
952 while #containers > 1 do
953 local node = table.remove(containers)
954 add_child_to_tip(containers, node)
955 -- note: doc container doesn't have pos, so we check:
956 if sourceposmap and containers[#containers].pos then
957 containers[#containers].pos[2] = node.pos[2]
958 end
959 end
960 doc = add_sections(doc)
961
962 doc.references = references
963 doc.footnotes = footnotes
964
965 return doc
966end
967
968local function render_node(node, handle, indent)
969 indent = indent or 0
970 handle:write(rep(" ", indent))
971 if indent > 128 then
972 handle:write("(((DEEPLY NESTED CONTENT OMITTED)))\n")
973 return
974 end
975
976 if node.t then
977 handle:write(node.t)
978 if node.pos then
979 handle:write(format(" (%s-%s)", node.pos[1], node.pos[2]))
980 end
981 for k,v in pairs(node) do
982 if type(k) == "string" and k ~= "children" and
983 k ~= "tag" and k ~= "pos" and k ~= "attr" and
984 k ~= "references" and k ~= "footnotes" then
985 handle:write(format(" %s=%q", k, tostring(v)))
986 end
987 end
988 if node.attr then
989 for k,v in pairs(node.attr) do
990 handle:write(format(" %s=%q", k, v))
991 end
992 end
993 else
994 io.stderr:write("Encountered node without tag:\n" ..
995 require'inspect'(node))
996 os.exit(1)
997 end
998 handle:write("\n")
999 if node.c then
1000 for _,v in ipairs(node.c) do
1001 render_node(v, handle, indent + 2)
1002 end
1003 end
1004end
1005
1006--- Render an AST in human-readable form, with indentation
1007--- showing the hierarchy.
1008--- @param doc (AST) djot AST
1009--- @param handle (StringHandle) handle to which to write content
1010local function render(doc, handle)
1011 render_node(doc, handle, 0)
1012 if next(doc.references) ~= nil then
1013 handle:write("references\n")
1014 for k,v in pairs(doc.references) do
1015 handle:write(format(" [%q] =\n", k))
1016 render_node(v, handle, 4)
1017 end
1018 end
1019 if next(doc.footnotes) ~= nil then
1020 handle:write("footnotes\n")
1021 for k,v in pairs(doc.footnotes) do
1022 handle:write(format(" [%q] =\n", k))
1023 render_node(v, handle, 4)
1024 end
1025 end
1026end
1027
1028--- @export
1029return { to_ast = to_ast,
1030 render = render,
1031 insert_attribute = insert_attribute,
1032 copy_attributes = copy_attributes,
1033 new_attributes = new_attributes,
1034 new_node = new_node,
1035 add_child = add_child,
1036 has_children = has_children }