crystal: add symbols and fix ignoring comment
[jackhill/mal.git] / crystal / reader.cr
1 require "./types"
2
3 class Reader
4 def initialize(@tokens)
5 @pos = 0
6 end
7
8 def current_token
9 @tokens[@pos] rescue nil
10 end
11
12 def peek
13 t = current_token
14
15 if t && t[0] == ';'
16 @pos += 1
17 peek
18 else
19 t
20 end
21 end
22
23 def next
24 peek
25 ensure
26 @pos += 1
27 end
28 end
29
30 def tokenize(str)
31 regex = /[\s,]*(~@|[\[\]{}()'`~^@]|"(?:\\.|[^\\"])*"|;.*|[^\s\[\]{}('"`,;)]*)/
32 str.scan(regex).map{|m| m[1]}.reject(&.empty?)
33 end
34
35 def read_str(str)
36 r = Reader.new(tokenize(str))
37 begin
38 read_form r
39 ensure
40 unless r.peek.nil?
41 raise "expected EOF, got #{r.peek.to_s}"
42 end
43 end
44 end
45
46 def parse_error(msg)
47 raise Mal::ParseException.new msg
48 end
49
50 def read_sequence(reader, init, open, close)
51 token = reader.next
52 parse_error "expected '#{open}', got EOF" unless token
53 parse_error "expected '#{open}', got #{token}" unless token[0] == open
54
55 loop do
56 token = reader.peek
57 parse_error "expected '#{close}', got EOF" unless token
58 break if token[0] == close
59
60 init << read_form reader
61 reader.peek
62 end
63
64 reader.next
65 init
66 end
67
68 def read_list(reader)
69 read_sequence(reader, Mal::List.new, '(', ')')
70 end
71
72 def read_vector(reader)
73 read_sequence(reader, Mal::Vector.new, '[', ']')
74 end
75
76 def read_atom(reader)
77 token = reader.next
78 parse_error "expected Atom but got EOF" unless token
79
80 case
81 when token =~ /^-?\d+$/ then token.to_i
82 when token == "true" then true
83 when token == "false" then false
84 when token == "nil" then nil
85 when token[0] == '"' then token[1..-2].gsub(/\\"/, "\"")
86 else Mal::Symbol.new token
87 end
88 end
89
90 def list_of(symname, reader)
91 reader.next
92 Mal::List.new << Mal::Symbol.new(symname) << read_form(reader)
93 end
94
95 def read_form(reader) : Mal::Type
96 token = reader.peek
97
98 parse_error "unexpected EOF" unless token
99 parse_error "unexpected comment" if token[0] == ';'
100
101 case token
102 when "(" then read_list reader
103 when ")" then parse_error "unexpected ')'"
104 when "[" then read_vector reader
105 when "]" then parse_error "unexpected ']'"
106 when "'" then list_of("quote", reader)
107 when "`" then list_of("quasiquote", reader)
108 when "~" then list_of("unquote", reader)
109 when "~@" then list_of("splice-unquote", reader)
110 when "^" then list_of("with-meta", reader)
111 when "@" then list_of("deref", reader)
112 else read_atom reader
113 end
114 end
115
116 require "./printer"
117 puts pr_str(read_str("[+ 1 2] ; bar"))