Merge branch 'master' into chuck-implementation
[jackhill/mal.git] / crystal / reader.cr
1 require "./types"
2 require "./error"
3
4 class Reader
5 def initialize(@tokens : Array(String))
6 @pos = 0
7 end
8
9 def current_token
10 @tokens[@pos] rescue nil
11 end
12
13 def peek
14 t = current_token
15
16 if t && t[0] == ';'
17 @pos += 1
18 peek
19 else
20 t
21 end
22 end
23
24 def next
25 peek
26 ensure
27 @pos += 1
28 end
29
30 def read_sequence(init, open, close)
31 token = self.next
32 parse_error "expected '#{open}', got EOF" unless token
33 parse_error "expected '#{open}', got #{token}" unless token[0] == open
34
35 loop do
36 token = peek
37 parse_error "expected '#{close}', got EOF" unless token
38 break if token[0] == close
39
40 init << read_form
41 peek
42 end
43
44 self.next
45 init
46 end
47
48 def read_list
49 Mal::Type.new read_sequence(Mal::List.new, '(', ')')
50 end
51
52 def read_vector
53 Mal::Type.new read_sequence(Mal::Vector.new, '[', ']')
54 end
55
56 def read_hashmap
57 types = read_sequence([] of Mal::Type, '{', '}')
58
59 parse_error "odd number of elements for hash-map: #{types.size}" if types.size.odd?
60 map = Mal::HashMap.new
61
62 types.each_slice(2) do |kv|
63 k, v = kv[0].unwrap, kv[1]
64 case k
65 when String
66 map[k] = v
67 else
68 parse_error("key of hash-map must be string or keyword")
69 end
70 end
71
72 Mal::Type.new map
73 end
74
75 def read_atom
76 token = self.next
77 parse_error "expected Atom but got EOF" unless token
78
79 Mal::Type.new case
80 when token =~ /^-?\d+$/ then token.to_i64
81 when token == "true" then true
82 when token == "false" then false
83 when token == "nil" then nil
84 when token[0] == '"' then token[1..-2].gsub(/\\"/, "\"")
85 .gsub(/\\n/, "\n")
86 .gsub(/\\\\/, "\\")
87 when token[0] == ':' then "\u029e#{token[1..-1]}"
88 else Mal::Symbol.new token
89 end
90 end
91
92 def list_of(symname)
93 Mal::List.new << gen_type(Mal::Symbol, symname) << read_form
94 end
95
96 def read_form
97 token = peek
98
99 parse_error "unexpected EOF" unless token
100 parse_error "unexpected comment" if token[0] == ';'
101
102 Mal::Type.new case token
103 when "(" then read_list
104 when ")" then parse_error "unexpected ')'"
105 when "[" then read_vector
106 when "]" then parse_error "unexpected ']'"
107 when "{" then read_hashmap
108 when "}" then parse_error "unexpected '}'"
109 when "'" then self.next; list_of("quote")
110 when "`" then self.next; list_of("quasiquote")
111 when "~" then self.next; list_of("unquote")
112 when "~@" then self.next; list_of("splice-unquote")
113 when "@" then self.next; list_of("deref")
114 when "^"
115 self.next
116 meta = read_form
117 list_of("with-meta") << meta
118 else read_atom
119 end
120 end
121
122 end
123
124 def tokenize(str)
125 regex = /[\s,]*(~@|[\[\]{}()'`~^@]|"(?:\\.|[^\\"])*"|;.*|[^\s\[\]{}('"`,;)]*)/
126 str.scan(regex).map{|m| m[1]}.reject(&.empty?)
127 end
128
129 def read_str(str)
130 r = Reader.new(tokenize(str))
131 begin
132 r.read_form
133 ensure
134 unless r.peek.nil?
135 raise Mal::ParseException.new "expected EOF, got #{r.peek.to_s}"
136 end
137 end
138 end
139