All: don't ignore */mal. Fixes #99
[jackhill/mal.git] / python / reader.py
1 import re
2 from mal_types import (_symbol, _keyword, _list, _vector, _hash_map)
3
4 class Blank(Exception): pass
5
6 class Reader():
7 def __init__(self, tokens, position=0):
8 self.tokens = tokens
9 self.position = position
10
11 def next(self):
12 self.position += 1
13 return self.tokens[self.position-1]
14
15 def peek(self):
16 if len(self.tokens) > self.position:
17 return self.tokens[self.position]
18 else:
19 return None
20
21 def tokenize(str):
22 tre = re.compile(r"""[\s,]*(~@|[\[\]{}()'`~^@]|"(?:[\\].|[^\\"])*"|;.*|[^\s\[\]{}()'"`@,;]+)""");
23 return [t for t in re.findall(tre, str) if t[0] != ';']
24
25 def read_atom(reader):
26 int_re = re.compile(r"-?[0-9]+$")
27 float_re = re.compile(r"-?[0-9][0-9.]*$")
28 token = reader.next()
29 if re.match(int_re, token): return int(token)
30 elif re.match(float_re, token): return int(token)
31 elif token[0] == '"': return token[1:-1].replace('\\"', '"')
32 elif token[0] == ':': return _keyword(token[1:])
33 elif token == "nil": return None
34 elif token == "true": return True
35 elif token == "false": return False
36 else: return _symbol(token)
37
38 def read_sequence(reader, typ=list, start='(', end=')'):
39 ast = typ()
40 token = reader.next()
41 if token != start: raise Exception("expected '" + start + "'")
42
43 token = reader.peek()
44 while token != end:
45 if not token: raise Exception("expected '" + end + "', got EOF")
46 ast.append(read_form(reader))
47 token = reader.peek()
48 reader.next()
49 return ast
50
51 def read_hash_map(reader):
52 lst = read_sequence(reader, list, '{', '}')
53 return _hash_map(*lst)
54
55 def read_list(reader):
56 return read_sequence(reader, _list, '(', ')')
57
58 def read_vector(reader):
59 return read_sequence(reader, _vector, '[', ']')
60
61 def read_form(reader):
62 token = reader.peek()
63 # reader macros/transforms
64 if token[0] == ';':
65 reader.next()
66 return None
67 elif token == '\'':
68 reader.next()
69 return _list(_symbol('quote'), read_form(reader))
70 elif token == '`':
71 reader.next()
72 return _list(_symbol('quasiquote'), read_form(reader))
73 elif token == '~':
74 reader.next()
75 return _list(_symbol('unquote'), read_form(reader))
76 elif token == '~@':
77 reader.next()
78 return _list(_symbol('splice-unquote'), read_form(reader))
79 elif token == '^':
80 reader.next()
81 meta = read_form(reader)
82 return _list(_symbol('with-meta'), read_form(reader), meta)
83 elif token == '@':
84 reader.next()
85 return _list(_symbol('deref'), read_form(reader))
86
87 # list
88 elif token == ')': raise Exception("unexpected ')'")
89 elif token == '(': return read_list(reader)
90
91 # vector
92 elif token == ']': raise Exception("unexpected ']'");
93 elif token == '[': return read_vector(reader);
94
95 # hash-map
96 elif token == '}': raise Exception("unexpected '}'");
97 elif token == '{': return read_hash_map(reader);
98
99 # atom
100 else: return read_atom(reader);
101
102 def read_str(str):
103 tokens = tokenize(str)
104 if len(tokens) == 0: raise Blank("Blank Line")
105 return read_form(Reader(tokens))