Commit | Line | Data |
---|---|---|
31690700 | 1 | import re |
e91c55c2 | 2 | from mal_types import (_symbol, _keyword, _list, _vector, _hash_map, _s2u, _u) |
31690700 JM |
3 | |
4 | class Blank(Exception): pass | |
5 | ||
6 | class Reader(): | |
7 | def __init__(self, tokens, position=0): | |
8 | self.tokens = tokens | |
9 | self.position = position | |
10 | ||
11 | def next(self): | |
12 | self.position += 1 | |
13 | return self.tokens[self.position-1] | |
14 | ||
15 | def peek(self): | |
16 | if len(self.tokens) > self.position: | |
17 | return self.tokens[self.position] | |
18 | else: | |
19 | return None | |
20 | ||
21 | def tokenize(str): | |
8d78bc26 | 22 | tre = re.compile(r"""[\s,]*(~@|[\[\]{}()'`~^@]|"(?:[\\].|[^\\"])*"?|;.*|[^\s\[\]{}()'"`@,;]+)"""); |
31690700 JM |
23 | return [t for t in re.findall(tre, str) if t[0] != ';'] |
24 | ||
8d78bc26 | 25 | def _unescape(s): |
e91c55c2 | 26 | return s.replace('\\\\', _u('\u029e')).replace('\\"', '"').replace('\\n', '\n').replace(_u('\u029e'), '\\') |
8d78bc26 | 27 | |
31690700 JM |
28 | def read_atom(reader): |
29 | int_re = re.compile(r"-?[0-9]+$") | |
30 | float_re = re.compile(r"-?[0-9][0-9.]*$") | |
16309256 | 31 | string_re = re.compile(r'"(?:[\\].|[^\\"])*"') |
31690700 JM |
32 | token = reader.next() |
33 | if re.match(int_re, token): return int(token) | |
34 | elif re.match(float_re, token): return int(token) | |
16309256 BH |
35 | elif re.match(string_re, token):return _s2u(_unescape(token[1:-1])) |
36 | elif token[0] == '"': raise Exception("expected '\"', got EOF") | |
b8ee29b2 | 37 | elif token[0] == ':': return _keyword(token[1:]) |
31690700 JM |
38 | elif token == "nil": return None |
39 | elif token == "true": return True | |
40 | elif token == "false": return False | |
ea81a808 | 41 | else: return _symbol(token) |
31690700 JM |
42 | |
43 | def read_sequence(reader, typ=list, start='(', end=')'): | |
44 | ast = typ() | |
45 | token = reader.next() | |
46 | if token != start: raise Exception("expected '" + start + "'") | |
47 | ||
48 | token = reader.peek() | |
49 | while token != end: | |
50 | if not token: raise Exception("expected '" + end + "', got EOF") | |
51 | ast.append(read_form(reader)) | |
52 | token = reader.peek() | |
53 | reader.next() | |
54 | return ast | |
55 | ||
56 | def read_hash_map(reader): | |
57 | lst = read_sequence(reader, list, '{', '}') | |
ea81a808 | 58 | return _hash_map(*lst) |
31690700 JM |
59 | |
60 | def read_list(reader): | |
ea81a808 | 61 | return read_sequence(reader, _list, '(', ')') |
31690700 JM |
62 | |
63 | def read_vector(reader): | |
ea81a808 | 64 | return read_sequence(reader, _vector, '[', ']') |
31690700 JM |
65 | |
66 | def read_form(reader): | |
67 | token = reader.peek() | |
68 | # reader macros/transforms | |
69 | if token[0] == ';': | |
70 | reader.next() | |
71 | return None | |
72 | elif token == '\'': | |
73 | reader.next() | |
ea81a808 | 74 | return _list(_symbol('quote'), read_form(reader)) |
31690700 JM |
75 | elif token == '`': |
76 | reader.next() | |
ea81a808 | 77 | return _list(_symbol('quasiquote'), read_form(reader)) |
31690700 JM |
78 | elif token == '~': |
79 | reader.next() | |
ea81a808 | 80 | return _list(_symbol('unquote'), read_form(reader)) |
31690700 JM |
81 | elif token == '~@': |
82 | reader.next() | |
ea81a808 | 83 | return _list(_symbol('splice-unquote'), read_form(reader)) |
31690700 JM |
84 | elif token == '^': |
85 | reader.next() | |
86 | meta = read_form(reader) | |
ea81a808 | 87 | return _list(_symbol('with-meta'), read_form(reader), meta) |
31690700 JM |
88 | elif token == '@': |
89 | reader.next() | |
ea81a808 | 90 | return _list(_symbol('deref'), read_form(reader)) |
31690700 JM |
91 | |
92 | # list | |
93 | elif token == ')': raise Exception("unexpected ')'") | |
94 | elif token == '(': return read_list(reader) | |
95 | ||
96 | # vector | |
97 | elif token == ']': raise Exception("unexpected ']'"); | |
98 | elif token == '[': return read_vector(reader); | |
99 | ||
100 | # hash-map | |
101 | elif token == '}': raise Exception("unexpected '}'"); | |
102 | elif token == '{': return read_hash_map(reader); | |
103 | ||
104 | # atom | |
105 | else: return read_atom(reader); | |
106 | ||
107 | def read_str(str): | |
108 | tokens = tokenize(str) | |
89bd4de1 | 109 | if len(tokens) == 0: raise Blank("Blank Line") |
31690700 | 110 | return read_form(Reader(tokens)) |