bbc-basic: Start of step 9: add 'try*' form and 'throw'.
[jackhill/mal.git] / python / reader.py
1 import re
2 from mal_types import (_symbol, _keyword, _list, _vector, _hash_map, _s2u, _u)
3
4 class Blank(Exception): pass
5
6 class Reader():
7 def __init__(self, tokens, position=0):
8 self.tokens = tokens
9 self.position = position
10
11 def next(self):
12 self.position += 1
13 return self.tokens[self.position-1]
14
15 def peek(self):
16 if len(self.tokens) > self.position:
17 return self.tokens[self.position]
18 else:
19 return None
20
21 def tokenize(str):
22 tre = re.compile(r"""[\s,]*(~@|[\[\]{}()'`~^@]|"(?:[\\].|[^\\"])*"?|;.*|[^\s\[\]{}()'"`@,;]+)""");
23 return [t for t in re.findall(tre, str) if t[0] != ';']
24
25 def _unescape(s):
26 return s.replace('\\\\', _u('\u029e')).replace('\\"', '"').replace('\\n', '\n').replace(_u('\u029e'), '\\')
27
28 def read_atom(reader):
29 int_re = re.compile(r"-?[0-9]+$")
30 float_re = re.compile(r"-?[0-9][0-9.]*$")
31 string_re = re.compile(r'"(?:[\\].|[^\\"])*"')
32 token = reader.next()
33 if re.match(int_re, token): return int(token)
34 elif re.match(float_re, token): return int(token)
35 elif re.match(string_re, token):return _s2u(_unescape(token[1:-1]))
36 elif token[0] == '"': raise Exception("expected '\"', got EOF")
37 elif token[0] == ':': return _keyword(token[1:])
38 elif token == "nil": return None
39 elif token == "true": return True
40 elif token == "false": return False
41 else: return _symbol(token)
42
43 def read_sequence(reader, typ=list, start='(', end=')'):
44 ast = typ()
45 token = reader.next()
46 if token != start: raise Exception("expected '" + start + "'")
47
48 token = reader.peek()
49 while token != end:
50 if not token: raise Exception("expected '" + end + "', got EOF")
51 ast.append(read_form(reader))
52 token = reader.peek()
53 reader.next()
54 return ast
55
56 def read_hash_map(reader):
57 lst = read_sequence(reader, list, '{', '}')
58 return _hash_map(*lst)
59
60 def read_list(reader):
61 return read_sequence(reader, _list, '(', ')')
62
63 def read_vector(reader):
64 return read_sequence(reader, _vector, '[', ']')
65
66 def read_form(reader):
67 token = reader.peek()
68 # reader macros/transforms
69 if token[0] == ';':
70 reader.next()
71 return None
72 elif token == '\'':
73 reader.next()
74 return _list(_symbol('quote'), read_form(reader))
75 elif token == '`':
76 reader.next()
77 return _list(_symbol('quasiquote'), read_form(reader))
78 elif token == '~':
79 reader.next()
80 return _list(_symbol('unquote'), read_form(reader))
81 elif token == '~@':
82 reader.next()
83 return _list(_symbol('splice-unquote'), read_form(reader))
84 elif token == '^':
85 reader.next()
86 meta = read_form(reader)
87 return _list(_symbol('with-meta'), read_form(reader), meta)
88 elif token == '@':
89 reader.next()
90 return _list(_symbol('deref'), read_form(reader))
91
92 # list
93 elif token == ')': raise Exception("unexpected ')'")
94 elif token == '(': return read_list(reader)
95
96 # vector
97 elif token == ']': raise Exception("unexpected ']'");
98 elif token == '[': return read_vector(reader);
99
100 # hash-map
101 elif token == '}': raise Exception("unexpected '}'");
102 elif token == '{': return read_hash_map(reader);
103
104 # atom
105 else: return read_atom(reader);
106
107 def read_str(str):
108 tokens = tokenize(str)
109 if len(tokens) == 0: raise Blank("Blank Line")
110 return read_form(Reader(tokens))