vb: add seq and string?
[jackhill/mal.git] / julia / reader.jl
1 module reader
2
3 export read_str
4
5 import types
6
7 type Reader
8 tokens
9 position::Int64
10 end
11
12 function next(rdr::Reader)
13 if rdr.position > length(rdr.tokens)
14 return nothing
15 end
16 rdr.position += 1
17 rdr.tokens[rdr.position-1]
18 end
19
20 function peek(rdr::Reader)
21 if rdr.position > length(rdr.tokens)
22 return nothing
23 end
24 rdr.tokens[rdr.position]
25 end
26
27
28 function tokenize(str)
29 re = r"[\s,]*(~@|[\[\]{}()'`~^@]|\"(?:\\.|[^\\\"])*\"|;.*|[^\s\[\]{}('\"`,;)]*)"
30 tokens = map((m) -> m.captures[1], eachmatch(re, str))
31 filter((t) -> t != "" && t[1] != ';', tokens)
32 end
33
34 function read_atom(rdr)
35 token = next(rdr)
36 if ismatch(r"^-?[0-9]+$", token)
37 parse(Int,token)
38 elseif ismatch(r"^-?[0-9][0-9.]*$", token)
39 float(token)
40 elseif ismatch(r"^\".*\"$", token)
41 replace(
42 replace(
43 replace(token[2:end-1],
44 "\\\"", "\""),
45 "\\n", "\n"),
46 "\\\\", "\\")
47 elseif token[1] == ':'
48 "\u029e$(token[2:end])"
49 elseif token == "nil"
50 nothing
51 elseif token == "true"
52 true
53 elseif token == "false"
54 false
55 else
56 symbol(token)
57 end
58 end
59
60 function read_list(rdr, start="(", last=")")
61 ast = Any[]
62 token = next(rdr)
63 if (token != start)
64 error("expected '$(start)'")
65 end
66 while ((token = peek(rdr)) != last)
67 if token == nothing
68 error("expected '$(last)', got EOF")
69 end
70 push!(ast, read_form(rdr))
71 end
72 next(rdr)
73 ast
74 end
75
76 function read_vector(rdr)
77 lst = read_list(rdr, "[", "]")
78 tuple(lst...)
79 end
80
81 function read_hash_map(rdr)
82 lst = read_list(rdr, "{", "}")
83 types.hash_map(lst...)
84 end
85
86 function read_form(rdr)
87 token = peek(rdr)
88 if token == "'"
89 next(rdr)
90 [[:quote]; Any[read_form(rdr)]]
91 elseif token == "`"
92 next(rdr)
93 [[:quasiquote]; Any[read_form(rdr)]]
94 elseif token == "~"
95 next(rdr)
96 [[:unquote]; Any[read_form(rdr)]]
97 elseif token == "~@"
98 next(rdr)
99 [[symbol("splice-unquote")]; Any[read_form(rdr)]]
100 elseif token == "^"
101 next(rdr)
102 meta = read_form(rdr)
103 [[symbol("with-meta")]; Any[read_form(rdr)]; Any[meta]]
104 elseif token == "@"
105 next(rdr)
106 [[symbol("deref")]; Any[read_form(rdr)]]
107
108 elseif token == ")"
109 error("unexpected ')'")
110 elseif token == "("
111 read_list(rdr)
112 elseif token == "]"
113 error("unexpected ']'")
114 elseif token == "["
115 read_vector(rdr)
116 elseif token == "}"
117 error("unexpected '}'")
118 elseif token == "{"
119 read_hash_map(rdr)
120 else
121 read_atom(rdr)
122 end
123 end
124
125 function read_str(str)
126 tokens = tokenize(str)
127 if length(tokens) == 0
128 return nothing
129 end
130 read_form(Reader(tokens, 1))
131 end
132
133 end