DISABLE FDs (REMOVE ME).
[jackhill/mal.git] / python / reader.py
CommitLineData
31690700 1import re
e91c55c2 2from mal_types import (_symbol, _keyword, _list, _vector, _hash_map, _s2u, _u)
31690700
JM
3
4class Blank(Exception): pass
5
6class Reader():
7 def __init__(self, tokens, position=0):
8 self.tokens = tokens
9 self.position = position
10
11 def next(self):
12 self.position += 1
13 return self.tokens[self.position-1]
14
15 def peek(self):
16 if len(self.tokens) > self.position:
17 return self.tokens[self.position]
18 else:
19 return None
20
21def tokenize(str):
8d78bc26 22 tre = re.compile(r"""[\s,]*(~@|[\[\]{}()'`~^@]|"(?:[\\].|[^\\"])*"?|;.*|[^\s\[\]{}()'"`@,;]+)""");
31690700
JM
23 return [t for t in re.findall(tre, str) if t[0] != ';']
24
8d78bc26 25def _unescape(s):
e91c55c2 26 return s.replace('\\\\', _u('\u029e')).replace('\\"', '"').replace('\\n', '\n').replace(_u('\u029e'), '\\')
8d78bc26 27
31690700
JM
28def read_atom(reader):
29 int_re = re.compile(r"-?[0-9]+$")
30 float_re = re.compile(r"-?[0-9][0-9.]*$")
16309256 31 string_re = re.compile(r'"(?:[\\].|[^\\"])*"')
31690700
JM
32 token = reader.next()
33 if re.match(int_re, token): return int(token)
34 elif re.match(float_re, token): return int(token)
16309256
BH
35 elif re.match(string_re, token):return _s2u(_unescape(token[1:-1]))
36 elif token[0] == '"': raise Exception("expected '\"', got EOF")
b8ee29b2 37 elif token[0] == ':': return _keyword(token[1:])
31690700
JM
38 elif token == "nil": return None
39 elif token == "true": return True
40 elif token == "false": return False
ea81a808 41 else: return _symbol(token)
31690700
JM
42
43def read_sequence(reader, typ=list, start='(', end=')'):
44 ast = typ()
45 token = reader.next()
46 if token != start: raise Exception("expected '" + start + "'")
47
48 token = reader.peek()
49 while token != end:
50 if not token: raise Exception("expected '" + end + "', got EOF")
51 ast.append(read_form(reader))
52 token = reader.peek()
53 reader.next()
54 return ast
55
56def read_hash_map(reader):
57 lst = read_sequence(reader, list, '{', '}')
ea81a808 58 return _hash_map(*lst)
31690700
JM
59
60def read_list(reader):
ea81a808 61 return read_sequence(reader, _list, '(', ')')
31690700
JM
62
63def read_vector(reader):
ea81a808 64 return read_sequence(reader, _vector, '[', ']')
31690700
JM
65
66def read_form(reader):
67 token = reader.peek()
68 # reader macros/transforms
69 if token[0] == ';':
70 reader.next()
71 return None
72 elif token == '\'':
73 reader.next()
ea81a808 74 return _list(_symbol('quote'), read_form(reader))
31690700
JM
75 elif token == '`':
76 reader.next()
ea81a808 77 return _list(_symbol('quasiquote'), read_form(reader))
31690700
JM
78 elif token == '~':
79 reader.next()
ea81a808 80 return _list(_symbol('unquote'), read_form(reader))
31690700
JM
81 elif token == '~@':
82 reader.next()
ea81a808 83 return _list(_symbol('splice-unquote'), read_form(reader))
31690700
JM
84 elif token == '^':
85 reader.next()
86 meta = read_form(reader)
ea81a808 87 return _list(_symbol('with-meta'), read_form(reader), meta)
31690700
JM
88 elif token == '@':
89 reader.next()
ea81a808 90 return _list(_symbol('deref'), read_form(reader))
31690700
JM
91
92 # list
93 elif token == ')': raise Exception("unexpected ')'")
94 elif token == '(': return read_list(reader)
95
96 # vector
97 elif token == ']': raise Exception("unexpected ']'");
98 elif token == '[': return read_vector(reader);
99
100 # hash-map
101 elif token == '}': raise Exception("unexpected '}'");
102 elif token == '{': return read_hash_map(reader);
103
104 # atom
105 else: return read_atom(reader);
106
107def read_str(str):
108 tokens = tokenize(str)
89bd4de1 109 if len(tokens) == 0: raise Blank("Blank Line")
31690700 110 return read_form(Reader(tokens))