clojure, groovy, rpython, scala: fix macro result evaluation
[jackhill/mal.git] / rpython / reader.py
1 import sys
2 IS_RPYTHON = sys.argv[0].endswith('rpython')
3
4 if IS_RPYTHON:
5 from rpython.rlib.rsre import rsre_re as re
6 else:
7 import re
8
9 import mal_types as types
10 from mal_types import (MalSym, MalInt, MalStr, _keywordu,
11 _list, _listl, _vectorl, _hash_mapl)
12
13 class Blank(Exception): pass
14
15 class Reader():
16 def __init__(self, tokens, position=0):
17 self.tokens = tokens
18 self.position = position
19
20 def next(self):
21 self.position += 1
22 return self.tokens[self.position-1]
23
24 def peek(self):
25 if len(self.tokens) > self.position:
26 return self.tokens[self.position]
27 else:
28 return None
29
30 def tokenize(str):
31 re_str = "[\s,]*(~@|[\[\]{}()'`~^@]|\"(?:[\\\\].|[^\\\\\"])*\"|;.*|[^\s\[\]{}()'\"`@,;]+)"
32 if IS_RPYTHON:
33 tok_re = re_str
34 else:
35 tok_re = re.compile(re_str)
36 return [t for t in re.findall(tok_re, str) if t[0] != ';']
37
38 def read_atom(reader):
39 if IS_RPYTHON:
40 int_re = '-?[0-9]+$'
41 float_re = '-?[0-9][0-9.]*$'
42 else:
43 int_re = re.compile('-?[0-9]+$')
44 float_re = re.compile('-?[0-9][0-9.]*$')
45 token = reader.next()
46 if re.match(int_re, token): return MalInt(int(token))
47 ## elif re.match(float_re, token): return int(token)
48 elif token[0] == '"':
49 end = len(token)-1
50 if end < 2:
51 return MalStr(u"")
52 else:
53 s = unicode(token[1:end])
54 s = types._replace(u'\\"', u'"', s)
55 s = types._replace(u'\\n', u"\n", s)
56 s = types._replace(u'\\\\', u"\\", s)
57 return MalStr(s)
58 elif token[0] == ':': return _keywordu(unicode(token[1:]))
59 elif token == "nil": return types.nil
60 elif token == "true": return types.true
61 elif token == "false": return types.false
62 else: return MalSym(unicode(token))
63
64 def read_sequence(reader, start='(', end=')'):
65 ast = []
66 token = reader.next()
67 if token != start: types.throw_str("expected '" + start + "'")
68
69 token = reader.peek()
70 while token != end:
71 if not token: types.throw_str("expected '" + end + "', got EOF")
72 ast.append(read_form(reader))
73 token = reader.peek()
74 reader.next()
75 return ast
76
77 def read_list(reader):
78 lst = read_sequence(reader, '(', ')')
79 return _listl(lst)
80
81 def read_vector(reader):
82 lst = read_sequence(reader, '[', ']')
83 return _vectorl(lst)
84
85 def read_hash_map(reader):
86 lst = read_sequence(reader, '{', '}')
87 return _hash_mapl(lst)
88
89 def read_form(reader):
90 token = reader.peek()
91 # reader macros/transforms
92 if token[0] == ';':
93 reader.next()
94 return None
95 elif token == '\'':
96 reader.next()
97 return _list(MalSym(u'quote'), read_form(reader))
98 elif token == '`':
99 reader.next()
100 return _list(MalSym(u'quasiquote'), read_form(reader))
101 elif token == '~':
102 reader.next()
103 return _list(MalSym(u'unquote'), read_form(reader))
104 elif token == '~@':
105 reader.next()
106 return _list(MalSym(u'splice-unquote'), read_form(reader))
107 elif token == '^':
108 reader.next()
109 meta = read_form(reader)
110 return _list(MalSym(u'with-meta'), read_form(reader), meta)
111 elif token == '@':
112 reader.next()
113 return _list(MalSym(u'deref'), read_form(reader))
114
115 # list
116 elif token == ')': types.throw_str("unexpected ')'")
117 elif token == '(': return read_list(reader)
118
119 # vector
120 elif token == ']': types.throw_str("unexpected ']'");
121 elif token == '[': return read_vector(reader);
122
123 # hash-map
124 elif token == '}': types.throw_str("unexpected '}'");
125 elif token == '{': return read_hash_map(reader);
126
127 # atom
128 else: return read_atom(reader);
129
130 def read_str(str):
131 tokens = tokenize(str)
132 if len(tokens) == 0: raise Blank("Blank Line")
133 return read_form(Reader(tokens))