Commit | Line | Data |
---|---|---|
9d42904e JM |
1 | local rex = require('rex_pcre') |
2 | local string = require('string') | |
3 | local table = require('table') | |
4 | local types = require('types') | |
5 | local throw, Nil, Symbol, List = types.throw, types.Nil, | |
6 | types.Symbol, types.List | |
7 | ||
8 | local M = {} | |
9 | ||
10 | Reader = {} | |
11 | function Reader:new(tokens) | |
12 | local newObj = {tokens = tokens, position = 1} | |
13 | self.__index = self | |
14 | return setmetatable(newObj, self) | |
15 | end | |
16 | function Reader:next() | |
17 | self.position = self.position + 1 | |
18 | return self.tokens[self.position-1] | |
19 | end | |
20 | function Reader:peek() | |
21 | return self.tokens[self.position] | |
22 | end | |
23 | ||
24 | function M.tokenize(str) | |
25 | local results = {} | |
26 | local re_pos = 1 | |
27 | local re = rex.new("[\\s,]*(~@|[\\[\\]{}()'`~^@]|\"(?:\\\\.|[^\\\\\"])*\"|;[^\n]*|[^\\s\\[\\]{}('\"`,;)]*)", rex.flags().EXTENDED) | |
28 | while true do | |
29 | local s, e, t = re:exec(str, re_pos) | |
30 | if not s or s > e then break end | |
31 | re_pos = e + 1 | |
32 | local val = string.sub(str,t[1],t[2]) | |
33 | if string.sub(val,1,1) ~= ";" then | |
34 | table.insert(results, val) | |
35 | end | |
36 | end | |
37 | return results | |
38 | end | |
39 | ||
40 | function M.read_atom(rdr) | |
41 | local int_re = rex.new("^-?[0-9]+$") | |
42 | local float_re = rex.new("^-?[0-9][0-9.]*$") | |
43 | local token = rdr:next() | |
44 | if int_re:exec(token) then return tonumber(token) | |
45 | elseif float_re:exec(token) then return tonumber(token) | |
46 | elseif string.sub(token,1,1) == '"' then | |
47 | local sval = string.sub(token,2,string.len(token)-1) | |
48 | sval = string.gsub(sval, '\\"', '"') | |
49 | sval = string.gsub(sval, '\\n', '\n') | |
50 | return sval | |
51 | elseif string.sub(token,1,1) == ':' then | |
52 | return "\177" .. string.sub(token,2) | |
53 | elseif token == "nil" then return Nil | |
54 | elseif token == "true" then return true | |
55 | elseif token == "false" then return false | |
56 | else return Symbol:new(token) | |
57 | end | |
58 | end | |
59 | ||
60 | function M.read_sequence(rdr, start, last) | |
61 | local ast = {} | |
62 | local token = rdr:next() | |
63 | if token ~= start then throw("expected '"..start.."'") end | |
64 | ||
65 | token = rdr:peek() | |
66 | while token ~= last do | |
67 | if not token then throw("expected '"..last.."', got EOF") end | |
68 | table.insert(ast, M.read_form(rdr)) | |
69 | token = rdr:peek() | |
70 | end | |
71 | rdr:next() | |
72 | return ast | |
73 | end | |
74 | ||
75 | function M.read_list(rdr) | |
76 | return types.List:new(M.read_sequence(rdr, '(', ')')) | |
77 | end | |
78 | ||
79 | function M.read_vector(rdr) | |
80 | return types.Vector:new(M.read_sequence(rdr, '[', ']')) | |
81 | end | |
82 | ||
83 | function M.read_hash_map(rdr) | |
84 | local seq = M.read_sequence(rdr, '{', '}') | |
85 | return types._assoc_BANG(types.HashMap:new(), unpack(seq)) | |
86 | end | |
87 | ||
88 | function M.read_form(rdr) | |
89 | local token = rdr:peek() | |
90 | ||
91 | if "'" == token then | |
92 | rdr:next() | |
93 | return List:new({Symbol:new('quote'), M.read_form(rdr)}) | |
94 | elseif '`' == token then | |
95 | rdr:next() | |
96 | return List:new({Symbol:new('quasiquote'), M.read_form(rdr)}) | |
97 | elseif '~' == token then | |
98 | rdr:next() | |
99 | return List:new({Symbol:new('unquote'), M.read_form(rdr)}) | |
100 | elseif '~@' == token then | |
101 | rdr:next() | |
102 | return List:new({Symbol:new('splice-unquote'), M.read_form(rdr)}) | |
103 | elseif '^' == token then | |
104 | rdr:next() | |
105 | local meta = M.read_form(rdr) | |
106 | return List:new({Symbol:new('with-meta'), M.read_form(rdr), meta}) | |
107 | elseif '@' == token then | |
108 | rdr:next() | |
109 | return List:new({Symbol:new('deref'), M.read_form(rdr)}) | |
110 | ||
111 | elseif ')' == token then throw("unexpected ')'") | |
112 | elseif '(' == token then return M.read_list(rdr) | |
113 | elseif ']' == token then throw("unexpected ']'") | |
114 | elseif '[' == token then return M.read_vector(rdr) | |
115 | elseif '}' == token then throw("unexpected '}'") | |
116 | elseif '{' == token then return M.read_hash_map(rdr) | |
117 | else return M.read_atom(rdr) | |
118 | end | |
119 | end | |
120 | ||
121 | function M.read_str(str) | |
122 | local tokens = M.tokenize(str) | |
123 | if #tokens == 0 then error(nil) end | |
124 | return M.read_form(Reader:new(tokens)) | |
125 | end | |
126 | ||
127 | return M |