size: 6 KiB

1local find, sub = string.find, string.sub
2
3-- Parser for attributes
4-- attributes { id = "foo", class = "bar baz",
5-- key1 = "val1", key2 = "val2" }
6-- syntax:
7--
8-- attributes <- '{' whitespace* attribute (whitespace attribute)* whitespace* '}'
9-- attribute <- identifier | class | keyval
10-- identifier <- '#' name
11-- class <- '.' name
12-- name <- (nonspace, nonpunctuation other than ':', '_', '-')+
13-- keyval <- key '=' val
14-- key <- (ASCII_ALPHANUM | ':' | '_' | '-')+
15-- val <- bareval | quotedval
16-- bareval <- (ASCII_ALPHANUM | ':' | '_' | '-')+
17-- quotedval <- '"' ([^"] | '\"') '"'
18
19-- states:
20local SCANNING = 0
21local SCANNING_ID = 1
22local SCANNING_CLASS= 2
23local SCANNING_KEY = 3
24local SCANNING_VALUE = 4
25local SCANNING_BARE_VALUE = 5
26local SCANNING_QUOTED_VALUE = 6
27local SCANNING_QUOTED_VALUE_CONTINUATION = 7
28local SCANNING_ESCAPED = 8
29local SCANNING_ESCAPED_IN_CONTINUATION = 9
30local SCANNING_COMMENT = 10
31local FAIL = 11
32local DONE = 12
33local START = 13
34
35local AttributeParser = {}
36
37local handlers = {}
38
39handlers[START] = function(self, pos)
40 if find(self.subject, "^{", pos) then
41 return SCANNING
42 else
43 return FAIL
44 end
45end
46
47handlers[FAIL] = function(_self, _pos)
48 return FAIL
49end
50
51handlers[DONE] = function(_self, _pos)
52 return DONE
53end
54
55handlers[SCANNING] = function(self, pos)
56 local c = sub(self.subject, pos, pos)
57 if c == ' ' or c == '\t' or c == '\n' or c == '\r' then
58 return SCANNING
59 elseif c == '}' then
60 return DONE
61 elseif c == '#' then
62 self.begin = pos
63 return SCANNING_ID
64 elseif c == '%' then
65 self.begin = pos
66 return SCANNING_COMMENT
67 elseif c == '.' then
68 self.begin = pos
69 return SCANNING_CLASS
70 elseif find(c, "^[%a%d_:-]") then
71 self.begin = pos
72 return SCANNING_KEY
73 else -- TODO
74 return FAIL
75 end
76end
77
78handlers[SCANNING_COMMENT] = function(self, pos)
79 local c = sub(self.subject, pos, pos)
80 if c == "%" then
81 return SCANNING
82 elseif c == "}" then
83 return DONE
84 else
85 return SCANNING_COMMENT
86 end
87end
88
89handlers[SCANNING_ID] = function(self, pos)
90 local c = sub(self.subject, pos, pos)
91 if find(c, "^[^%s%p]") or c == "_" or c == "-" or c == ":" then
92 return SCANNING_ID
93 elseif c == '}' then
94 if self.lastpos > self.begin then
95 self:add_match(self.begin + 1, self.lastpos, "id")
96 end
97 self.begin = nil
98 return DONE
99 elseif find(c, "^%s") then
100 if self.lastpos > self.begin then
101 self:add_match(self.begin + 1, self.lastpos, "id")
102 end
103 self.begin = nil
104 return SCANNING
105 else
106 return FAIL
107 end
108end
109
110handlers[SCANNING_CLASS] = function(self, pos)
111 local c = sub(self.subject, pos, pos)
112 if find(c, "^[^%s%p]") or c == "_" or c == "-" or c == ":" then
113 return SCANNING_CLASS
114 elseif c == '}' then
115 if self.lastpos > self.begin then
116 self:add_match(self.begin + 1, self.lastpos, "class")
117 end
118 self.begin = nil
119 return DONE
120 elseif find(c, "^%s") then
121 if self.lastpos > self.begin then
122 self:add_match(self.begin + 1, self.lastpos, "class")
123 end
124 self.begin = nil
125 return SCANNING
126 else
127 return FAIL
128 end
129end
130
131handlers[SCANNING_KEY] = function(self, pos)
132 local c = sub(self.subject, pos, pos)
133 if c == "=" then
134 self:add_match(self.begin, self.lastpos, "key")
135 self.begin = nil
136 return SCANNING_VALUE
137 elseif find(c, "^[%a%d_:-]") then
138 return SCANNING_KEY
139 else
140 return FAIL
141 end
142end
143
144handlers[SCANNING_VALUE] = function(self, pos)
145 local c = sub(self.subject, pos, pos)
146 if c == '"' then
147 self.begin = pos
148 return SCANNING_QUOTED_VALUE
149 elseif find(c, "^[%a%d_:-]") then
150 self.begin = pos
151 return SCANNING_BARE_VALUE
152 else
153 return FAIL
154 end
155end
156
157handlers[SCANNING_BARE_VALUE] = function(self, pos)
158 local c = sub(self.subject, pos, pos)
159 if find(c, "^[%a%d_:-]") then
160 return SCANNING_BARE_VALUE
161 elseif c == '}' then
162 self:add_match(self.begin, self.lastpos, "value")
163 self.begin = nil
164 return DONE
165 elseif find(c, "^%s") then
166 self:add_match(self.begin, self.lastpos, "value")
167 self.begin = nil
168 return SCANNING
169 else
170 return FAIL
171 end
172end
173
174handlers[SCANNING_ESCAPED] = function(_self, _pos)
175 return SCANNING_QUOTED_VALUE
176end
177
178handlers[SCANNING_ESCAPED_IN_CONTINUATION] = function(_self, _pos)
179 return SCANNING_QUOTED_VALUE_CONTINUATION
180end
181
182handlers[SCANNING_QUOTED_VALUE] = function(self, pos)
183 local c = sub(self.subject, pos, pos)
184 if c == '"' then
185 self:add_match(self.begin + 1, self.lastpos, "value")
186 self.begin = nil
187 return SCANNING
188 elseif c == "\n" then
189 self:add_match(self.begin + 1, self.lastpos, "value")
190 self.begin = nil
191 return SCANNING_QUOTED_VALUE_CONTINUATION
192 elseif c == "\\" then
193 return SCANNING_ESCAPED
194 else
195 return SCANNING_QUOTED_VALUE
196 end
197end
198
199handlers[SCANNING_QUOTED_VALUE_CONTINUATION] = function(self, pos)
200 local c = sub(self.subject, pos, pos)
201 if self.begin == nil then
202 self.begin = pos
203 end
204 if c == '"' then
205 self:add_match(self.begin, self.lastpos, "value")
206 self.begin = nil
207 return SCANNING
208 elseif c == "\n" then
209 self:add_match(self.begin, self.lastpos, "value")
210 self.begin = nil
211 return SCANNING_QUOTED_VALUE_CONTINUATION
212 elseif c == "\\" then
213 return SCANNING_ESCAPED_IN_CONTINUATION
214 else
215 return SCANNING_QUOTED_VALUE_CONTINUATION
216 end
217end
218
219function AttributeParser:new(subject)
220 local state = {
221 subject = subject,
222 state = START,
223 begin = nil,
224 lastpos = nil,
225 matches = {}
226 }
227 setmetatable(state, self)
228 self.__index = self
229 return state
230end
231
232function AttributeParser:add_match(sp, ep, tag)
233 self.matches[#self.matches + 1] = {sp, ep, tag}
234end
235
236function AttributeParser:get_matches()
237 return self.matches
238end
239
240-- Feed parser a slice of text from the subject, between
241-- startpos and endpos inclusive. Return status, position,
242-- where status is either "done" (position should point to
243-- final '}'), "fail" (position should point to first character
244-- that could not be parsed), or "continue" (position should
245-- point to last character parsed).
246function AttributeParser:feed(startpos, endpos)
247 local pos = startpos
248 while pos <= endpos do
249 self.state = handlers[self.state](self, pos)
250 if self.state == DONE then
251 return "done", pos
252 elseif self.state == FAIL then
253 self.lastpos = pos
254 return "fail", pos
255 else
256 self.lastpos = pos
257 pos = pos + 1
258 end
259 end
260 return "continue", endpos
261end
262
263--[[
264local test = function()
265 local parser = AttributeParser:new("{a=b #ident\n.class\nkey=val1\n .class key2=\"val two \\\" ok\" x")
266 local x,y,z = parser:feed(1,56)
267 print(require'inspect'(parser:get_matches{}))
268end
269
270test()
271--]]
272
273return { AttributeParser = AttributeParser }