c, rpython, vhdl: add number?, fn?, and macro?
[jackhill/mal.git] / rpython / reader.py
CommitLineData
80320efc
JM
1import sys
2IS_RPYTHON = sys.argv[0].endswith('rpython')
3
4if IS_RPYTHON:
5 from rpython.rlib.rsre import rsre_re as re
6else:
7 import re
8
9import mal_types as types
8855a05a
JM
10from mal_types import (MalSym, MalInt, MalStr, _keywordu,
11 _list, _listl, _vectorl, _hash_mapl)
80320efc
JM
12
13class Blank(Exception): pass
14
15class Reader():
16 def __init__(self, tokens, position=0):
17 self.tokens = tokens
18 self.position = position
19
20 def next(self):
21 self.position += 1
22 return self.tokens[self.position-1]
23
24 def peek(self):
25 if len(self.tokens) > self.position:
26 return self.tokens[self.position]
27 else:
28 return None
29
30def tokenize(str):
31 re_str = "[\s,]*(~@|[\[\]{}()'`~^@]|\"(?:[\\\\].|[^\\\\\"])*\"|;.*|[^\s\[\]{}()'\"`@,;]+)"
32 if IS_RPYTHON:
33 tok_re = re_str
34 else:
35 tok_re = re.compile(re_str)
36 return [t for t in re.findall(tok_re, str) if t[0] != ';']
37
38def read_atom(reader):
39 if IS_RPYTHON:
40 int_re = '-?[0-9]+$'
41 float_re = '-?[0-9][0-9.]*$'
42 else:
43 int_re = re.compile('-?[0-9]+$')
44 float_re = re.compile('-?[0-9][0-9.]*$')
45 token = reader.next()
46 if re.match(int_re, token): return MalInt(int(token))
47## elif re.match(float_re, token): return int(token)
48 elif token[0] == '"':
49 end = len(token)-1
50 if end < 2:
f0cd1318 51 return MalStr(u"")
80320efc 52 else:
8d78bc26 53 s = unicode(token[1:end])
42aecee6
JM
54 s = types._replace(u'\\\\', u"\u029e", s)
55 s = types._replace(u'\\"', u'"', s)
56 s = types._replace(u'\\n', u"\n", s)
273226aa 57 s = types._replace(u"\u029e", u"\\", s)
8d78bc26 58 return MalStr(s)
f0cd1318 59 elif token[0] == ':': return _keywordu(unicode(token[1:]))
80320efc
JM
60 elif token == "nil": return types.nil
61 elif token == "true": return types.true
62 elif token == "false": return types.false
f0cd1318 63 else: return MalSym(unicode(token))
80320efc 64
8855a05a
JM
65def read_sequence(reader, start='(', end=')'):
66 ast = []
80320efc 67 token = reader.next()
9be6d5a6 68 if token != start: types.throw_str("expected '" + start + "'")
80320efc
JM
69
70 token = reader.peek()
71 while token != end:
9be6d5a6 72 if not token: types.throw_str("expected '" + end + "', got EOF")
80320efc
JM
73 ast.append(read_form(reader))
74 token = reader.peek()
75 reader.next()
76 return ast
77
80320efc 78def read_list(reader):
8855a05a
JM
79 lst = read_sequence(reader, '(', ')')
80 return _listl(lst)
81
82def read_vector(reader):
83 lst = read_sequence(reader, '[', ']')
84 return _vectorl(lst)
80320efc 85
8855a05a
JM
86def read_hash_map(reader):
87 lst = read_sequence(reader, '{', '}')
88 return _hash_mapl(lst)
80320efc
JM
89
90def read_form(reader):
91 token = reader.peek()
92 # reader macros/transforms
93 if token[0] == ';':
94 reader.next()
95 return None
96 elif token == '\'':
97 reader.next()
f0cd1318 98 return _list(MalSym(u'quote'), read_form(reader))
80320efc
JM
99 elif token == '`':
100 reader.next()
f0cd1318 101 return _list(MalSym(u'quasiquote'), read_form(reader))
80320efc
JM
102 elif token == '~':
103 reader.next()
f0cd1318 104 return _list(MalSym(u'unquote'), read_form(reader))
80320efc
JM
105 elif token == '~@':
106 reader.next()
f0cd1318 107 return _list(MalSym(u'splice-unquote'), read_form(reader))
80320efc
JM
108 elif token == '^':
109 reader.next()
110 meta = read_form(reader)
f0cd1318 111 return _list(MalSym(u'with-meta'), read_form(reader), meta)
80320efc
JM
112 elif token == '@':
113 reader.next()
f0cd1318 114 return _list(MalSym(u'deref'), read_form(reader))
80320efc
JM
115
116 # list
9be6d5a6 117 elif token == ')': types.throw_str("unexpected ')'")
80320efc
JM
118 elif token == '(': return read_list(reader)
119
8855a05a
JM
120 # vector
121 elif token == ']': types.throw_str("unexpected ']'");
122 elif token == '[': return read_vector(reader);
123
124 # hash-map
125 elif token == '}': types.throw_str("unexpected '}'");
126 elif token == '{': return read_hash_map(reader);
80320efc
JM
127
128 # atom
129 else: return read_atom(reader);
130
131def read_str(str):
132 tokens = tokenize(str)
133 if len(tokens) == 0: raise Blank("Blank Line")
134 return read_form(Reader(tokens))