Merge pull request #406 from chr15m/lib-alias-hacks
[jackhill/mal.git] / lua / reader.lua
1 local rex = require('rex_pcre')
2 local string = require('string')
3 local table = require('table')
4 local types = require('types')
5 local throw, Nil, Symbol, List = types.throw, types.Nil,
6 types.Symbol, types.List
7
8 local M = {}
9
10 Reader = {}
11 function Reader:new(tokens)
12 local newObj = {tokens = tokens, position = 1}
13 self.__index = self
14 return setmetatable(newObj, self)
15 end
16 function Reader:next()
17 self.position = self.position + 1
18 return self.tokens[self.position-1]
19 end
20 function Reader:peek()
21 return self.tokens[self.position]
22 end
23
24 function M.tokenize(str)
25 local results = {}
26 local re_pos = 1
27 local re = rex.new("[\\s,]*(~@|[\\[\\]{}()'`~^@]|\"(?:\\\\.|[^\\\\\"])*\"?|;[^\n]*|[^\\s\\[\\]{}('\"`,;)]*)", rex.flags().EXTENDED)
28 while true do
29 local s, e, t = re:exec(str, re_pos)
30 if not s or s > e then break end
31 re_pos = e + 1
32 local val = string.sub(str,t[1],t[2])
33 if string.sub(val,1,1) ~= ";" then
34 table.insert(results, val)
35 end
36 end
37 return results
38 end
39
40 function M.read_atom(rdr)
41 local int_re = rex.new("^-?[0-9]+$")
42 local float_re = rex.new("^-?[0-9][0-9.]*$")
43 local string_re = rex.new("^\"(?:\\\\.|[^\\\\\"])*\"$")
44 local token = rdr:next()
45 if int_re:exec(token) then return tonumber(token)
46 elseif float_re:exec(token) then return tonumber(token)
47 elseif string_re:exec(token) then
48 local sval = string.sub(token,2,string.len(token)-1)
49 sval = string.gsub(sval, '\\\\', '\177')
50 sval = string.gsub(sval, '\\"', '"')
51 sval = string.gsub(sval, '\\n', '\n')
52 sval = string.gsub(sval, '\177', '\\')
53 return sval
54 elseif string.sub(token,1,1) == '"' then
55 throw("expected '\"', got EOF")
56 elseif string.sub(token,1,1) == ':' then
57 return "\177" .. string.sub(token,2)
58 elseif token == "nil" then return Nil
59 elseif token == "true" then return true
60 elseif token == "false" then return false
61 else return Symbol:new(token)
62 end
63 end
64
65 function M.read_sequence(rdr, start, last)
66 local ast = {}
67 local token = rdr:next()
68 if token ~= start then throw("expected '"..start.."'") end
69
70 token = rdr:peek()
71 while token ~= last do
72 if not token then throw("expected '"..last.."', got EOF") end
73 table.insert(ast, M.read_form(rdr))
74 token = rdr:peek()
75 end
76 rdr:next()
77 return ast
78 end
79
80 function M.read_list(rdr)
81 return types.List:new(M.read_sequence(rdr, '(', ')'))
82 end
83
84 function M.read_vector(rdr)
85 return types.Vector:new(M.read_sequence(rdr, '[', ']'))
86 end
87
88 function M.read_hash_map(rdr)
89 local seq = M.read_sequence(rdr, '{', '}')
90 return types._assoc_BANG(types.HashMap:new(), table.unpack(seq))
91 end
92
93 function M.read_form(rdr)
94 local token = rdr:peek()
95
96 if "'" == token then
97 rdr:next()
98 return List:new({Symbol:new('quote'), M.read_form(rdr)})
99 elseif '`' == token then
100 rdr:next()
101 return List:new({Symbol:new('quasiquote'), M.read_form(rdr)})
102 elseif '~' == token then
103 rdr:next()
104 return List:new({Symbol:new('unquote'), M.read_form(rdr)})
105 elseif '~@' == token then
106 rdr:next()
107 return List:new({Symbol:new('splice-unquote'), M.read_form(rdr)})
108 elseif '^' == token then
109 rdr:next()
110 local meta = M.read_form(rdr)
111 return List:new({Symbol:new('with-meta'), M.read_form(rdr), meta})
112 elseif '@' == token then
113 rdr:next()
114 return List:new({Symbol:new('deref'), M.read_form(rdr)})
115
116 elseif ')' == token then throw("unexpected ')'")
117 elseif '(' == token then return M.read_list(rdr)
118 elseif ']' == token then throw("unexpected ']'")
119 elseif '[' == token then return M.read_vector(rdr)
120 elseif '}' == token then throw("unexpected '}'")
121 elseif '{' == token then return M.read_hash_map(rdr)
122 else return M.read_atom(rdr)
123 end
124 end
125
126 function M.read_str(str)
127 local tokens = M.tokenize(str)
128 if #tokens == 0 then error(nil) end
129 return M.read_form(Reader:new(tokens))
130 end
131
132 return M