Commit | Line | Data |
---|---|---|
9d42904e JM |
1 | local rex = require('rex_pcre') |
2 | local string = require('string') | |
3 | local table = require('table') | |
4 | local types = require('types') | |
5 | local throw, Nil, Symbol, List = types.throw, types.Nil, | |
6 | types.Symbol, types.List | |
7 | ||
8 | local M = {} | |
9 | ||
10 | Reader = {} | |
11 | function Reader:new(tokens) | |
12 | local newObj = {tokens = tokens, position = 1} | |
13 | self.__index = self | |
14 | return setmetatable(newObj, self) | |
15 | end | |
16 | function Reader:next() | |
17 | self.position = self.position + 1 | |
18 | return self.tokens[self.position-1] | |
19 | end | |
20 | function Reader:peek() | |
21 | return self.tokens[self.position] | |
22 | end | |
23 | ||
24 | function M.tokenize(str) | |
25 | local results = {} | |
26 | local re_pos = 1 | |
27 | local re = rex.new("[\\s,]*(~@|[\\[\\]{}()'`~^@]|\"(?:\\\\.|[^\\\\\"])*\"|;[^\n]*|[^\\s\\[\\]{}('\"`,;)]*)", rex.flags().EXTENDED) | |
28 | while true do | |
29 | local s, e, t = re:exec(str, re_pos) | |
30 | if not s or s > e then break end | |
31 | re_pos = e + 1 | |
32 | local val = string.sub(str,t[1],t[2]) | |
33 | if string.sub(val,1,1) ~= ";" then | |
34 | table.insert(results, val) | |
35 | end | |
36 | end | |
37 | return results | |
38 | end | |
39 | ||
40 | function M.read_atom(rdr) | |
41 | local int_re = rex.new("^-?[0-9]+$") | |
42 | local float_re = rex.new("^-?[0-9][0-9.]*$") | |
43 | local token = rdr:next() | |
44 | if int_re:exec(token) then return tonumber(token) | |
45 | elseif float_re:exec(token) then return tonumber(token) | |
46 | elseif string.sub(token,1,1) == '"' then | |
47 | local sval = string.sub(token,2,string.len(token)-1) | |
48 | sval = string.gsub(sval, '\\"', '"') | |
49 | sval = string.gsub(sval, '\\n', '\n') | |
8d78bc26 | 50 | sval = string.gsub(sval, '\\\\', '\\') |
9d42904e JM |
51 | return sval |
52 | elseif string.sub(token,1,1) == ':' then | |
53 | return "\177" .. string.sub(token,2) | |
54 | elseif token == "nil" then return Nil | |
55 | elseif token == "true" then return true | |
56 | elseif token == "false" then return false | |
57 | else return Symbol:new(token) | |
58 | end | |
59 | end | |
60 | ||
61 | function M.read_sequence(rdr, start, last) | |
62 | local ast = {} | |
63 | local token = rdr:next() | |
64 | if token ~= start then throw("expected '"..start.."'") end | |
65 | ||
66 | token = rdr:peek() | |
67 | while token ~= last do | |
68 | if not token then throw("expected '"..last.."', got EOF") end | |
69 | table.insert(ast, M.read_form(rdr)) | |
70 | token = rdr:peek() | |
71 | end | |
72 | rdr:next() | |
73 | return ast | |
74 | end | |
75 | ||
76 | function M.read_list(rdr) | |
77 | return types.List:new(M.read_sequence(rdr, '(', ')')) | |
78 | end | |
79 | ||
80 | function M.read_vector(rdr) | |
81 | return types.Vector:new(M.read_sequence(rdr, '[', ']')) | |
82 | end | |
83 | ||
84 | function M.read_hash_map(rdr) | |
85 | local seq = M.read_sequence(rdr, '{', '}') | |
86 | return types._assoc_BANG(types.HashMap:new(), unpack(seq)) | |
87 | end | |
88 | ||
89 | function M.read_form(rdr) | |
90 | local token = rdr:peek() | |
91 | ||
92 | if "'" == token then | |
93 | rdr:next() | |
94 | return List:new({Symbol:new('quote'), M.read_form(rdr)}) | |
95 | elseif '`' == token then | |
96 | rdr:next() | |
97 | return List:new({Symbol:new('quasiquote'), M.read_form(rdr)}) | |
98 | elseif '~' == token then | |
99 | rdr:next() | |
100 | return List:new({Symbol:new('unquote'), M.read_form(rdr)}) | |
101 | elseif '~@' == token then | |
102 | rdr:next() | |
103 | return List:new({Symbol:new('splice-unquote'), M.read_form(rdr)}) | |
104 | elseif '^' == token then | |
105 | rdr:next() | |
106 | local meta = M.read_form(rdr) | |
107 | return List:new({Symbol:new('with-meta'), M.read_form(rdr), meta}) | |
108 | elseif '@' == token then | |
109 | rdr:next() | |
110 | return List:new({Symbol:new('deref'), M.read_form(rdr)}) | |
111 | ||
112 | elseif ')' == token then throw("unexpected ')'") | |
113 | elseif '(' == token then return M.read_list(rdr) | |
114 | elseif ']' == token then throw("unexpected ']'") | |
115 | elseif '[' == token then return M.read_vector(rdr) | |
116 | elseif '}' == token then throw("unexpected '}'") | |
117 | elseif '{' == token then return M.read_hash_map(rdr) | |
118 | else return M.read_atom(rdr) | |
119 | end | |
120 | end | |
121 | ||
122 | function M.read_str(str) | |
123 | local tokens = M.tokenize(str) | |
124 | if #tokens == 0 then error(nil) end | |
125 | return M.read_form(Reader:new(tokens)) | |
126 | end | |
127 | ||
128 | return M |