Commit | Line | Data |
---|---|---|
80320efc JM |
1 | import sys |
2 | IS_RPYTHON = sys.argv[0].endswith('rpython') | |
3 | ||
4 | if IS_RPYTHON: | |
5 | from rpython.rlib.rsre import rsre_re as re | |
6 | else: | |
7 | import re | |
8 | ||
9 | import mal_types as types | |
8855a05a JM |
10 | from mal_types import (MalSym, MalInt, MalStr, _keywordu, |
11 | _list, _listl, _vectorl, _hash_mapl) | |
80320efc JM |
12 | |
13 | class Blank(Exception): pass | |
14 | ||
15 | class Reader(): | |
16 | def __init__(self, tokens, position=0): | |
17 | self.tokens = tokens | |
18 | self.position = position | |
19 | ||
20 | def next(self): | |
21 | self.position += 1 | |
22 | return self.tokens[self.position-1] | |
23 | ||
24 | def peek(self): | |
25 | if len(self.tokens) > self.position: | |
26 | return self.tokens[self.position] | |
27 | else: | |
28 | return None | |
29 | ||
30 | def tokenize(str): | |
31 | re_str = "[\s,]*(~@|[\[\]{}()'`~^@]|\"(?:[\\\\].|[^\\\\\"])*\"|;.*|[^\s\[\]{}()'\"`@,;]+)" | |
32 | if IS_RPYTHON: | |
33 | tok_re = re_str | |
34 | else: | |
35 | tok_re = re.compile(re_str) | |
36 | return [t for t in re.findall(tok_re, str) if t[0] != ';'] | |
37 | ||
38 | def read_atom(reader): | |
39 | if IS_RPYTHON: | |
40 | int_re = '-?[0-9]+$' | |
41 | float_re = '-?[0-9][0-9.]*$' | |
42 | else: | |
43 | int_re = re.compile('-?[0-9]+$') | |
44 | float_re = re.compile('-?[0-9][0-9.]*$') | |
45 | token = reader.next() | |
46 | if re.match(int_re, token): return MalInt(int(token)) | |
47 | ## elif re.match(float_re, token): return int(token) | |
48 | elif token[0] == '"': | |
49 | end = len(token)-1 | |
50 | if end < 2: | |
f0cd1318 | 51 | return MalStr(u"") |
80320efc | 52 | else: |
8d78bc26 | 53 | s = unicode(token[1:end]) |
42aecee6 JM |
54 | s = types._replace(u'\\\\', u"\u029e", s) |
55 | s = types._replace(u'\\"', u'"', s) | |
56 | s = types._replace(u'\\n', u"\n", s) | |
273226aa | 57 | s = types._replace(u"\u029e", u"\\", s) |
8d78bc26 | 58 | return MalStr(s) |
f0cd1318 | 59 | elif token[0] == ':': return _keywordu(unicode(token[1:])) |
80320efc JM |
60 | elif token == "nil": return types.nil |
61 | elif token == "true": return types.true | |
62 | elif token == "false": return types.false | |
f0cd1318 | 63 | else: return MalSym(unicode(token)) |
80320efc | 64 | |
8855a05a JM |
65 | def read_sequence(reader, start='(', end=')'): |
66 | ast = [] | |
80320efc | 67 | token = reader.next() |
9be6d5a6 | 68 | if token != start: types.throw_str("expected '" + start + "'") |
80320efc JM |
69 | |
70 | token = reader.peek() | |
71 | while token != end: | |
9be6d5a6 | 72 | if not token: types.throw_str("expected '" + end + "', got EOF") |
80320efc JM |
73 | ast.append(read_form(reader)) |
74 | token = reader.peek() | |
75 | reader.next() | |
76 | return ast | |
77 | ||
80320efc | 78 | def read_list(reader): |
8855a05a JM |
79 | lst = read_sequence(reader, '(', ')') |
80 | return _listl(lst) | |
81 | ||
82 | def read_vector(reader): | |
83 | lst = read_sequence(reader, '[', ']') | |
84 | return _vectorl(lst) | |
80320efc | 85 | |
8855a05a JM |
86 | def read_hash_map(reader): |
87 | lst = read_sequence(reader, '{', '}') | |
88 | return _hash_mapl(lst) | |
80320efc JM |
89 | |
90 | def read_form(reader): | |
91 | token = reader.peek() | |
92 | # reader macros/transforms | |
93 | if token[0] == ';': | |
94 | reader.next() | |
95 | return None | |
96 | elif token == '\'': | |
97 | reader.next() | |
f0cd1318 | 98 | return _list(MalSym(u'quote'), read_form(reader)) |
80320efc JM |
99 | elif token == '`': |
100 | reader.next() | |
f0cd1318 | 101 | return _list(MalSym(u'quasiquote'), read_form(reader)) |
80320efc JM |
102 | elif token == '~': |
103 | reader.next() | |
f0cd1318 | 104 | return _list(MalSym(u'unquote'), read_form(reader)) |
80320efc JM |
105 | elif token == '~@': |
106 | reader.next() | |
f0cd1318 | 107 | return _list(MalSym(u'splice-unquote'), read_form(reader)) |
80320efc JM |
108 | elif token == '^': |
109 | reader.next() | |
110 | meta = read_form(reader) | |
f0cd1318 | 111 | return _list(MalSym(u'with-meta'), read_form(reader), meta) |
80320efc JM |
112 | elif token == '@': |
113 | reader.next() | |
f0cd1318 | 114 | return _list(MalSym(u'deref'), read_form(reader)) |
80320efc JM |
115 | |
116 | # list | |
9be6d5a6 | 117 | elif token == ')': types.throw_str("unexpected ')'") |
80320efc JM |
118 | elif token == '(': return read_list(reader) |
119 | ||
8855a05a JM |
120 | # vector |
121 | elif token == ']': types.throw_str("unexpected ']'"); | |
122 | elif token == '[': return read_vector(reader); | |
123 | ||
124 | # hash-map | |
125 | elif token == '}': types.throw_str("unexpected '}'"); | |
126 | elif token == '{': return read_hash_map(reader); | |
80320efc JM |
127 | |
128 | # atom | |
129 | else: return read_atom(reader); | |
130 | ||
131 | def read_str(str): | |
132 | tokens = tokenize(str) | |
133 | if len(tokens) == 0: raise Blank("Blank Line") | |
134 | return read_form(Reader(tokens)) |