size: 22 KiB
| 1 | -- this allows the code to work with both lua and luajit: |
| 2 | local unpack = unpack or table.unpack |
| 3 | local attributes = require("djot.attributes") |
| 4 | local find, byte = string.find, string.byte |
| 5 | |
| 6 | -- allow up to 3 captures... |
| 7 | local function bounded_find(subj, patt, startpos, endpos) |
| 8 | local sp,ep,c1,c2,c3 = find(subj, patt, startpos) |
| 9 | if ep and ep <= endpos then |
| 10 | return sp,ep,c1,c2,c3 |
| 11 | end |
| 12 | end |
| 13 | |
| 14 | -- General note on the parsing strategy: our objective is to |
| 15 | -- parse without backtracking. To that end, we keep a stack of |
| 16 | -- potential 'openers' for links, images, emphasis, and other |
| 17 | -- inline containers. When we parse a potential closer for |
| 18 | -- one of these constructions, we can scan the stack of openers |
| 19 | -- for a match, which will tell us the location of the potential |
| 20 | -- opener. We can then change the annotation of the match at |
| 21 | -- that location to '+emphasis' or whatever. |
| 22 | |
| 23 | local InlineParser = {} |
| 24 | |
| 25 | function InlineParser:new(subject, warn) |
| 26 | local state = |
| 27 | { warn = warn or function() end, -- function to issue warnings |
| 28 | subject = subject, -- text to parse |
| 29 | matches = {}, -- table pos : (endpos, annotation) |
| 30 | openers = {}, -- map from closer_type to array of (pos, data) in reverse order |
| 31 | verbatim = 0, -- parsing verbatim span to be ended by n backticks |
| 32 | verbatim_type = nil, -- whether verbatim is math or regular |
| 33 | destination = false, -- parsing link destination in () |
| 34 | firstpos = 0, -- position of first slice |
| 35 | lastpos = 0, -- position of last slice |
| 36 | allow_attributes = true, -- allow parsing of attributes |
| 37 | attribute_parser = nil, -- attribute parser |
| 38 | attribute_start = nil, -- start of potential attribute |
| 39 | attribute_slices = nil, -- slices we've tried to parse as attributes |
| 40 | } |
| 41 | setmetatable(state, self) |
| 42 | self.__index = self |
| 43 | return state |
| 44 | end |
| 45 | |
| 46 | function InlineParser:add_match(startpos, endpos, annotation) |
| 47 | self.matches[startpos] = {startpos, endpos, annotation} |
| 48 | end |
| 49 | |
| 50 | function InlineParser:add_opener(name, ...) |
| 51 | -- 1 = startpos, 2 = endpos, 3 = annotation, 4 = substartpos, 5 = endpos |
| 52 | -- |
| 53 | -- [link text](url) |
| 54 | -- ^ ^^ |
| 55 | -- 1,2 4 5 3 = "explicit_link" |
| 56 | |
| 57 | if not self.openers[name] then |
| 58 | self.openers[name] = {} |
| 59 | end |
| 60 | table.insert(self.openers[name], {...}) |
| 61 | end |
| 62 | |
| 63 | function InlineParser:clear_openers(startpos, endpos) |
| 64 | -- remove other openers in between the matches |
| 65 | for _,v in pairs(self.openers) do |
| 66 | local i = #v |
| 67 | while v[i] do |
| 68 | local sp,ep,_,sp2,ep2 = unpack(v[i]) |
| 69 | if sp >= startpos and ep <= endpos then |
| 70 | v[i] = nil |
| 71 | elseif (sp2 and sp2 >= startpos) and (ep2 and ep2 <= endpos) then |
| 72 | v[i][3] = nil |
| 73 | v[i][4] = nil |
| 74 | v[i][5] = nil |
| 75 | else |
| 76 | break |
| 77 | end |
| 78 | i = i - 1 |
| 79 | end |
| 80 | end |
| 81 | end |
| 82 | |
| 83 | function InlineParser:str_matches(startpos, endpos) |
| 84 | for i = startpos, endpos do |
| 85 | local m = self.matches[i] |
| 86 | if m then |
| 87 | local sp, ep, annot = unpack(m) |
| 88 | if annot ~= "str" and annot ~= "escape" then |
| 89 | self.matches[i] = {sp, ep, "str"} |
| 90 | end |
| 91 | end |
| 92 | end |
| 93 | end |
| 94 | |
| 95 | local function matches_pattern(match, patt) |
| 96 | if match then |
| 97 | return string.find(match[3], patt) |
| 98 | end |
| 99 | end |
| 100 | |
| 101 | |
| 102 | function InlineParser.between_matched(c, annotation, defaultmatch, opentest) |
| 103 | return function(self, pos, endpos) |
| 104 | defaultmatch = defaultmatch or "str" |
| 105 | local subject = self.subject |
| 106 | local can_open = find(subject, "^%S", pos + 1) |
| 107 | local can_close = find(subject, "^%S", pos - 1) |
| 108 | local has_open_marker = matches_pattern(self.matches[pos - 1], "^open%_marker") |
| 109 | local has_close_marker = pos + 1 <= endpos and |
| 110 | byte(subject, pos + 1) == 125 -- } |
| 111 | local endcloser = pos |
| 112 | local startopener = pos |
| 113 | |
| 114 | if type(opentest) == "function" then |
| 115 | can_open = can_open and opentest(self, pos) |
| 116 | end |
| 117 | |
| 118 | -- allow explicit open/close markers to override: |
| 119 | if has_open_marker then |
| 120 | can_open = true |
| 121 | can_close = false |
| 122 | startopener = pos - 1 |
| 123 | end |
| 124 | if not has_open_marker and has_close_marker then |
| 125 | can_close = true |
| 126 | can_open = false |
| 127 | endcloser = pos + 1 |
| 128 | end |
| 129 | |
| 130 | if has_open_marker and defaultmatch:match("^right") then |
| 131 | defaultmatch = defaultmatch:gsub("^right", "left") |
| 132 | elseif has_close_marker and defaultmatch:match("^left") then |
| 133 | defaultmatch = defaultmatch:gsub("^left", "right") |
| 134 | end |
| 135 | |
| 136 | local d |
| 137 | if has_close_marker then |
| 138 | d = "{" .. c |
| 139 | else |
| 140 | d = c |
| 141 | end |
| 142 | local openers = self.openers[d] |
| 143 | if can_close and openers and #openers > 0 then |
| 144 | -- check openers for a match |
| 145 | local openpos, openposend = unpack(openers[#openers]) |
| 146 | if openposend ~= pos - 1 then -- exclude empty emph |
| 147 | self:clear_openers(openpos, pos) |
| 148 | self:add_match(openpos, openposend, "+" .. annotation) |
| 149 | self:add_match(pos, endcloser, "-" .. annotation) |
| 150 | return endcloser + 1 |
| 151 | end |
| 152 | end |
| 153 | |
| 154 | -- if we get here, we didn't match an opener |
| 155 | if can_open then |
| 156 | if has_open_marker then |
| 157 | d = "{" .. c |
| 158 | else |
| 159 | d = c |
| 160 | end |
| 161 | self:add_opener(d, startopener, pos) |
| 162 | self:add_match(startopener, pos, defaultmatch) |
| 163 | return pos + 1 |
| 164 | else |
| 165 | self:add_match(pos, endcloser, defaultmatch) |
| 166 | return endcloser + 1 |
| 167 | end |
| 168 | end |
| 169 | end |
| 170 | |
| 171 | InlineParser.matchers = { |
| 172 | -- 96 = ` |
| 173 | [96] = function(self, pos, endpos) |
| 174 | local subject = self.subject |
| 175 | local _, endchar = bounded_find(subject, "^`*", pos, endpos) |
| 176 | if not endchar then |
| 177 | return nil |
| 178 | end |
| 179 | if find(subject, "^%$%$", pos - 2) and |
| 180 | not find(subject, "^\\", pos - 3) then |
| 181 | self.matches[pos - 2] = nil |
| 182 | self.matches[pos - 1] = nil |
| 183 | self:add_match(pos - 2, endchar, "+display_math") |
| 184 | self.verbatim_type = "display_math" |
| 185 | elseif find(subject, "^%$", pos - 1) then |
| 186 | self.matches[pos - 1] = nil |
| 187 | self:add_match(pos - 1, endchar, "+inline_math") |
| 188 | self.verbatim_type = "inline_math" |
| 189 | else |
| 190 | self:add_match(pos, endchar, "+verbatim") |
| 191 | self.verbatim_type = "verbatim" |
| 192 | end |
| 193 | self.verbatim = endchar - pos + 1 |
| 194 | return endchar + 1 |
| 195 | end, |
| 196 | |
| 197 | -- 92 = \ |
| 198 | [92] = function(self, pos, endpos) |
| 199 | local subject = self.subject |
| 200 | local _, endchar = bounded_find(subject, "^[ \t]*\r?\n", pos + 1, endpos) |
| 201 | self:add_match(pos, pos, "escape") |
| 202 | if endchar then |
| 203 | -- see if there were preceding spaces |
| 204 | if #self.matches > 0 then |
| 205 | local sp, ep, annot = unpack(self.matches[#self.matches]) |
| 206 | if annot == "str" then |
| 207 | while ep >= sp and |
| 208 | (subject:byte(ep) == 32 or subject:byte(ep) == 9) do |
| 209 | ep = ep -1 |
| 210 | end |
| 211 | if ep < sp then |
| 212 | self.matches[#self.matches] = nil |
| 213 | else |
| 214 | self:add_match(sp, ep, "str") |
| 215 | end |
| 216 | end |
| 217 | end |
| 218 | self:add_match(pos + 1, endchar, "hardbreak") |
| 219 | return endchar + 1 |
| 220 | else |
| 221 | local _, ec = bounded_find(subject, "^[%p ]", pos + 1, endpos) |
| 222 | if not ec then |
| 223 | self:add_match(pos, pos, "str") |
| 224 | return pos + 1 |
| 225 | else |
| 226 | self:add_match(pos, pos, "escape") |
| 227 | if find(subject, "^ ", pos + 1) then |
| 228 | self:add_match(pos + 1, ec, "nbsp") |
| 229 | else |
| 230 | self:add_match(pos + 1, ec, "str") |
| 231 | end |
| 232 | return ec + 1 |
| 233 | end |
| 234 | end |
| 235 | end, |
| 236 | |
| 237 | -- 60 = < |
| 238 | [60] = function(self, pos, endpos) |
| 239 | local subject = self.subject |
| 240 | local starturl, endurl = |
| 241 | bounded_find(subject, "^%<[^<>%s]+%>", pos, endpos) |
| 242 | if starturl then |
| 243 | local is_url = bounded_find(subject, "^%a+:", pos + 1, endurl) |
| 244 | local is_email = bounded_find(subject, "^[^:]+%@", pos + 1, endurl) |
| 245 | if is_email then |
| 246 | self:add_match(starturl, starturl, "+email") |
| 247 | self:add_match(starturl + 1, endurl - 1, "str") |
| 248 | self:add_match(endurl, endurl, "-email") |
| 249 | return endurl + 1 |
| 250 | elseif is_url then |
| 251 | self:add_match(starturl, starturl, "+url") |
| 252 | self:add_match(starturl + 1, endurl - 1, "str") |
| 253 | self:add_match(endurl, endurl, "-url") |
| 254 | return endurl + 1 |
| 255 | end |
| 256 | end |
| 257 | end, |
| 258 | |
| 259 | -- 126 = ~ |
| 260 | [126] = InlineParser.between_matched('~', 'subscript'), |
| 261 | |
| 262 | -- 94 = ^ |
| 263 | [94] = InlineParser.between_matched('^', 'superscript'), |
| 264 | |
| 265 | -- 91 = [ |
| 266 | [91] = function(self, pos, endpos) |
| 267 | local sp, ep = bounded_find(self.subject, "^%^([^]]+)%]", pos + 1, endpos) |
| 268 | if sp then -- footnote ref |
| 269 | self:add_match(pos, ep, "footnote_reference") |
| 270 | return ep + 1 |
| 271 | else |
| 272 | self:add_opener("[", pos, pos) |
| 273 | self:add_match(pos, pos, "str") |
| 274 | return pos + 1 |
| 275 | end |
| 276 | end, |
| 277 | |
| 278 | -- 93 = ] |
| 279 | [93] = function(self, pos, endpos) |
| 280 | local openers = self.openers["["] |
| 281 | local subject = self.subject |
| 282 | if openers and #openers > 0 then |
| 283 | local opener = openers[#openers] |
| 284 | if opener[3] == "reference_link" then |
| 285 | -- found a reference link |
| 286 | -- add the matches |
| 287 | local is_image = bounded_find(subject, "^!", opener[1] - 1, endpos) |
| 288 | and not bounded_find(subject, "^[\\]", opener[1] - 2, endpos) |
| 289 | if is_image then |
| 290 | self:add_match(opener[1] - 1, opener[1] - 1, "image_marker") |
| 291 | self:add_match(opener[1], opener[2], "+imagetext") |
| 292 | self:add_match(opener[4], opener[4], "-imagetext") |
| 293 | else |
| 294 | self:add_match(opener[1], opener[2], "+linktext") |
| 295 | self:add_match(opener[4], opener[4], "-linktext") |
| 296 | end |
| 297 | self:add_match(opener[5], opener[5], "+reference") |
| 298 | self:add_match(pos, pos, "-reference") |
| 299 | -- convert all matches to str |
| 300 | self:str_matches(opener[5] + 1, pos - 1) |
| 301 | -- remove from openers |
| 302 | self:clear_openers(opener[1], pos) |
| 303 | return pos + 1 |
| 304 | elseif bounded_find(subject, "^%[", pos + 1, endpos) then |
| 305 | opener[3] = "reference_link" |
| 306 | opener[4] = pos -- intermediate ] |
| 307 | opener[5] = pos + 1 -- intermediate [ |
| 308 | self:add_match(pos, pos + 1, "str") |
| 309 | -- remove any openers between [ and ] |
| 310 | self:clear_openers(opener[1] + 1, pos - 1) |
| 311 | return pos + 2 |
| 312 | elseif bounded_find(subject, "^%(", pos + 1, endpos) then |
| 313 | self.openers["("] = {} -- clear ( openers |
| 314 | opener[3] = "explicit_link" |
| 315 | opener[4] = pos -- intermediate ] |
| 316 | opener[5] = pos + 1 -- intermediate ( |
| 317 | self.destination = true |
| 318 | self:add_match(pos, pos + 1, "str") |
| 319 | -- remove any openers between [ and ] |
| 320 | self:clear_openers(opener[1] + 1, pos - 1) |
| 321 | return pos + 2 |
| 322 | elseif bounded_find(subject, "^%{", pos + 1, endpos) then |
| 323 | -- assume this is attributes, bracketed span |
| 324 | self:add_match(opener[1], opener[2], "+span") |
| 325 | self:add_match(pos, pos, "-span") |
| 326 | -- remove any openers between [ and ] |
| 327 | self:clear_openers(opener[1], pos) |
| 328 | return pos + 1 |
| 329 | end |
| 330 | end |
| 331 | end, |
| 332 | |
| 333 | |
| 334 | -- 40 = ( |
| 335 | [40] = function(self, pos) |
| 336 | if not self.destination then return nil end |
| 337 | self:add_opener("(", pos, pos) |
| 338 | self:add_match(pos, pos, "str") |
| 339 | return pos + 1 |
| 340 | end, |
| 341 | |
| 342 | -- 41 = ) |
| 343 | [41] = function(self, pos, endpos) |
| 344 | if not self.destination then return nil end |
| 345 | local parens = self.openers["("] |
| 346 | if parens and #parens > 0 and parens[#parens][1] then |
| 347 | parens[#parens] = nil -- clear opener |
| 348 | self:add_match(pos, pos, "str") |
| 349 | return pos + 1 |
| 350 | else |
| 351 | local subject = self.subject |
| 352 | local openers = self.openers["["] |
| 353 | if openers and #openers > 0 |
| 354 | and openers[#openers][3] == "explicit_link" then |
| 355 | local opener = openers[#openers] |
| 356 | -- we have inline link |
| 357 | local is_image = bounded_find(subject, "^!", opener[1] - 1, endpos) |
| 358 | and not bounded_find(subject, "^[\\]", opener[1] - 2, endpos) |
| 359 | if is_image then |
| 360 | self:add_match(opener[1] - 1, opener[1] - 1, "image_marker") |
| 361 | self:add_match(opener[1], opener[2], "+imagetext") |
| 362 | self:add_match(opener[4], opener[4], "-imagetext") |
| 363 | else |
| 364 | self:add_match(opener[1], opener[2], "+linktext") |
| 365 | self:add_match(opener[4], opener[4], "-linktext") |
| 366 | end |
| 367 | self:add_match(opener[5], opener[5], "+destination") |
| 368 | self:add_match(pos, pos, "-destination") |
| 369 | self.destination = false |
| 370 | -- convert all matches to str |
| 371 | self:str_matches(opener[5] + 1, pos - 1) |
| 372 | -- remove from openers |
| 373 | self:clear_openers(opener[1], pos) |
| 374 | return pos + 1 |
| 375 | end |
| 376 | end |
| 377 | end, |
| 378 | |
| 379 | -- 95 = _ |
| 380 | [95] = InlineParser.between_matched('_', 'emph'), |
| 381 | |
| 382 | -- 42 = * |
| 383 | [42] = InlineParser.between_matched('*', 'strong'), |
| 384 | |
| 385 | -- 123 = { |
| 386 | [123] = function(self, pos, endpos) |
| 387 | if bounded_find(self.subject, "^[_*~^+='\"-]", pos + 1, endpos) then |
| 388 | self:add_match(pos, pos, "open_marker") |
| 389 | return pos + 1 |
| 390 | elseif self.allow_attributes then |
| 391 | self.attribute_parser = attributes.AttributeParser:new(self.subject) |
| 392 | self.attribute_start = pos |
| 393 | self.attribute_slices = {} |
| 394 | return pos |
| 395 | else |
| 396 | self:add_match(pos, pos, "str") |
| 397 | return pos + 1 |
| 398 | end |
| 399 | end, |
| 400 | |
| 401 | -- 58 = : |
| 402 | [58] = function(self, pos, endpos) |
| 403 | local sp, ep = bounded_find(self.subject, "^%:[%w_+-]+%:", pos, endpos) |
| 404 | if sp then |
| 405 | self:add_match(sp, ep, "symbol") |
| 406 | return ep + 1 |
| 407 | else |
| 408 | self:add_match(pos, pos, "str") |
| 409 | return pos + 1 |
| 410 | end |
| 411 | end, |
| 412 | |
| 413 | -- 43 = + |
| 414 | [43] = InlineParser.between_matched("+", "insert", "str", |
| 415 | function(self, pos) |
| 416 | return find(self.subject, "^%{", pos - 1) or |
| 417 | find(self.subject, "^%}", pos + 1) |
| 418 | end), |
| 419 | |
| 420 | -- 61 = = |
| 421 | [61] = InlineParser.between_matched("=", "mark", "str", |
| 422 | function(self, pos) |
| 423 | return find(self.subject, "^%{", pos - 1) or |
| 424 | find(self.subject, "^%}", pos + 1) |
| 425 | end), |
| 426 | |
| 427 | -- 39 = ' |
| 428 | [39] = InlineParser.between_matched("'", "single_quoted", "right_single_quote", |
| 429 | function(self, pos) -- test to open |
| 430 | return pos == 1 or |
| 431 | find(self.subject, "^[%s\"'-([]", pos - 1) |
| 432 | end), |
| 433 | |
| 434 | -- 34 = " |
| 435 | [34] = InlineParser.between_matched('"', "double_quoted", "left_double_quote"), |
| 436 | |
| 437 | -- 45 = - |
| 438 | [45] = function(self, pos, endpos) |
| 439 | local subject = self.subject |
| 440 | local nextpos |
| 441 | if byte(subject, pos - 1) == 123 or |
| 442 | byte(subject, pos + 1) == 125 then -- (123 = { 125 = }) |
| 443 | nextpos = InlineParser.between_matched("-", "delete", "str", |
| 444 | function(slf, p) |
| 445 | return find(slf.subject, "^%{", p - 1) or |
| 446 | find(slf.subject, "^%}", p + 1) |
| 447 | end)(self, pos, endpos) |
| 448 | return nextpos |
| 449 | end |
| 450 | -- didn't match a del, try for smart hyphens: |
| 451 | local _, ep = find(subject, "^%-*", pos) |
| 452 | if endpos < ep then |
| 453 | ep = endpos |
| 454 | end |
| 455 | local hyphens = 1 + ep - pos |
| 456 | if byte(subject, ep + 1) == 125 then -- 125 = } |
| 457 | hyphens = hyphens - 1 -- last hyphen is close del |
| 458 | end |
| 459 | if hyphens == 0 then -- this means we have '-}' |
| 460 | self:add_match(pos, pos + 1, "str") |
| 461 | return pos + 2 |
| 462 | end |
| 463 | -- Try to construct a homogeneous sequence of dashes |
| 464 | local all_em = hyphens % 3 == 0 |
| 465 | local all_en = hyphens % 2 == 0 |
| 466 | while hyphens > 0 do |
| 467 | if all_em then |
| 468 | self:add_match(pos, pos + 2, "em_dash") |
| 469 | pos = pos + 3 |
| 470 | hyphens = hyphens - 3 |
| 471 | elseif all_en then |
| 472 | self:add_match(pos, pos + 1, "en_dash") |
| 473 | pos = pos + 2 |
| 474 | hyphens = hyphens - 2 |
| 475 | elseif hyphens >= 3 and (hyphens % 2 ~= 0 or hyphens > 4) then |
| 476 | self:add_match(pos, pos + 2, "em_dash") |
| 477 | pos = pos + 3 |
| 478 | hyphens = hyphens - 3 |
| 479 | elseif hyphens >= 2 then |
| 480 | self:add_match(pos, pos + 1, "en_dash") |
| 481 | pos = pos + 2 |
| 482 | hyphens = hyphens - 2 |
| 483 | else |
| 484 | self:add_match(pos, pos, "str") |
| 485 | pos = pos + 1 |
| 486 | hyphens = hyphens - 1 |
| 487 | end |
| 488 | end |
| 489 | return pos |
| 490 | end, |
| 491 | |
| 492 | -- 46 = . |
| 493 | [46] = function(self, pos, endpos) |
| 494 | if bounded_find(self.subject, "^%.%.", pos + 1, endpos) then |
| 495 | self:add_match(pos, pos +2, "ellipses") |
| 496 | return pos + 3 |
| 497 | end |
| 498 | end |
| 499 | } |
| 500 | |
| 501 | function InlineParser:single_char(pos) |
| 502 | self:add_match(pos, pos, "str") |
| 503 | return pos + 1 |
| 504 | end |
| 505 | |
| 506 | -- Reparse attribute_slices that we tried to parse as an attribute |
| 507 | function InlineParser:reparse_attributes() |
| 508 | local slices = self.attribute_slices |
| 509 | if not slices then |
| 510 | return |
| 511 | end |
| 512 | self.allow_attributes = false |
| 513 | self.attribute_parser = nil |
| 514 | self.attribute_start = nil |
| 515 | if slices then |
| 516 | for i=1,#slices do |
| 517 | self:feed(unpack(slices[i])) |
| 518 | end |
| 519 | end |
| 520 | self.allow_attributes = true |
| 521 | self.attribute_slices = nil |
| 522 | end |
| 523 | |
| 524 | -- Feed a slice to the parser, updating state. |
| 525 | function InlineParser:feed(spos, endpos) |
| 526 | local special = "[][\\`{}_*()!<>~^:=+$\r\n'\".-]" |
| 527 | local subject = self.subject |
| 528 | local matchers = self.matchers |
| 529 | local pos |
| 530 | if self.firstpos == 0 or spos < self.firstpos then |
| 531 | self.firstpos = spos |
| 532 | end |
| 533 | if self.lastpos == 0 or endpos > self.lastpos then |
| 534 | self.lastpos = endpos |
| 535 | end |
| 536 | pos = spos |
| 537 | while pos <= endpos do |
| 538 | if self.attribute_parser then |
| 539 | local sp = pos |
| 540 | local ep2 = bounded_find(subject, special, pos, endpos) |
| 541 | if not ep2 or ep2 > endpos then |
| 542 | ep2 = endpos |
| 543 | end |
| 544 | local status, ep = self.attribute_parser:feed(sp, ep2) |
| 545 | if status == "done" then |
| 546 | local attribute_start = self.attribute_start |
| 547 | -- add attribute matches |
| 548 | self:add_match(attribute_start, attribute_start, "+attributes") |
| 549 | self:add_match(ep, ep, "-attributes") |
| 550 | local attr_matches = self.attribute_parser:get_matches() |
| 551 | -- add attribute matches |
| 552 | for i=1,#attr_matches do |
| 553 | self:add_match(unpack(attr_matches[i])) |
| 554 | end |
| 555 | -- restore state to prior to adding attribute parser: |
| 556 | self.attribute_parser = nil |
| 557 | self.attribute_start = nil |
| 558 | self.attribute_slices = nil |
| 559 | pos = ep + 1 |
| 560 | elseif status == "fail" then |
| 561 | self:reparse_attributes() |
| 562 | pos = sp -- we'll want to go over the whole failed portion again, |
| 563 | -- as no slice was added for it |
| 564 | elseif status == "continue" then |
| 565 | if #self.attribute_slices == 0 then |
| 566 | self.attribute_slices = {} |
| 567 | end |
| 568 | self.attribute_slices[#self.attribute_slices + 1] = {sp,ep} |
| 569 | pos = ep + 1 |
| 570 | end |
| 571 | else |
| 572 | -- find next interesting character: |
| 573 | local newpos = bounded_find(subject, special, pos, endpos) or endpos + 1 |
| 574 | if newpos > pos then |
| 575 | self:add_match(pos, newpos - 1, "str") |
| 576 | pos = newpos |
| 577 | if pos > endpos then |
| 578 | break -- otherwise, fall through: |
| 579 | end |
| 580 | end |
| 581 | -- if we get here, then newpos = pos, |
| 582 | -- i.e. we have something interesting at pos |
| 583 | local c = byte(subject, pos) |
| 584 | |
| 585 | if c == 13 or c == 10 then -- cr or lf |
| 586 | if c == 13 and bounded_find(subject, "^[%n]", pos + 1, endpos) then |
| 587 | self:add_match(pos, pos + 1, "softbreak") |
| 588 | pos = pos + 2 |
| 589 | else |
| 590 | self:add_match(pos, pos, "softbreak") |
| 591 | pos = pos + 1 |
| 592 | end |
| 593 | elseif self.verbatim > 0 then |
| 594 | if c == 96 then |
| 595 | local _, endchar = bounded_find(subject, "^`+", pos, endpos) |
| 596 | if endchar and endchar - pos + 1 == self.verbatim then |
| 597 | -- check for raw attribute |
| 598 | local sp, ep = |
| 599 | bounded_find(subject, "^%{%=[^%s{}`]+%}", endchar + 1, endpos) |
| 600 | if sp and self.verbatim_type == "verbatim" then -- raw |
| 601 | self:add_match(pos, endchar, "-" .. self.verbatim_type) |
| 602 | self:add_match(sp, ep, "raw_format") |
| 603 | pos = ep + 1 |
| 604 | else |
| 605 | self:add_match(pos, endchar, "-" .. self.verbatim_type) |
| 606 | pos = endchar + 1 |
| 607 | end |
| 608 | self.verbatim = 0 |
| 609 | self.verbatim_type = nil |
| 610 | else |
| 611 | endchar = endchar or endpos |
| 612 | self:add_match(pos, endchar, "str") |
| 613 | pos = endchar + 1 |
| 614 | end |
| 615 | else |
| 616 | self:add_match(pos, pos, "str") |
| 617 | pos = pos + 1 |
| 618 | end |
| 619 | else |
| 620 | local matcher = matchers[c] |
| 621 | pos = (matcher and matcher(self, pos, endpos)) or self:single_char(pos) |
| 622 | end |
| 623 | end |
| 624 | end |
| 625 | end |
| 626 | |
| 627 | -- Return true if we're parsing verbatim content. |
| 628 | function InlineParser:in_verbatim() |
| 629 | return self.verbatim > 0 |
| 630 | end |
| 631 | |
| 632 | function InlineParser:get_matches() |
| 633 | local sorted = {} |
| 634 | local subject = self.subject |
| 635 | local lastsp, lastep, lastannot |
| 636 | if self.attribute_parser then -- we're still in an attribute parse |
| 637 | self:reparse_attributes() |
| 638 | end |
| 639 | for i=self.firstpos, self.lastpos do |
| 640 | if self.matches[i] then |
| 641 | local sp, ep, annot = unpack(self.matches[i]) |
| 642 | if annot == "str" and lastannot == "str" and lastep + 1 == sp then |
| 643 | -- consolidate adjacent strs |
| 644 | sorted[#sorted] = {lastsp, ep, annot} |
| 645 | lastsp, lastep, lastannot = lastsp, ep, annot |
| 646 | else |
| 647 | sorted[#sorted + 1] = self.matches[i] |
| 648 | lastsp, lastep, lastannot = sp, ep, annot |
| 649 | end |
| 650 | end |
| 651 | end |
| 652 | if #sorted > 0 then |
| 653 | local last = sorted[#sorted] |
| 654 | local startpos, endpos, annot = unpack(last) |
| 655 | -- remove final softbreak |
| 656 | if annot == "softbreak" then |
| 657 | sorted[#sorted] = nil |
| 658 | last = sorted[#sorted] |
| 659 | if not last then |
| 660 | return sorted |
| 661 | end |
| 662 | startpos, endpos, annot = unpack(last) |
| 663 | end |
| 664 | -- remove trailing spaces |
| 665 | if annot == "str" and byte(subject, endpos) == 32 then |
| 666 | while endpos > startpos and byte(subject, endpos) == 32 do |
| 667 | endpos = endpos - 1 |
| 668 | end |
| 669 | sorted[#sorted] = {startpos, endpos, annot} |
| 670 | end |
| 671 | if self.verbatim > 0 then -- unclosed verbatim |
| 672 | self.warn({ message = "Unclosed verbatim", pos = endpos }) |
| 673 | sorted[#sorted + 1] = {endpos, endpos, "-" .. self.verbatim_type} |
| 674 | end |
| 675 | end |
| 676 | return sorted |
| 677 | end |
| 678 | |
| 679 | return { InlineParser = InlineParser } |