DISABLE FDs (REMOVE ME).
[jackhill/mal.git] / rpython / reader.py
1 import sys
2 IS_RPYTHON = sys.argv[0].endswith('rpython')
3
4 if IS_RPYTHON:
5 from rpython.rlib.rsre import rsre_re as re
6 else:
7 import re
8
9 import mal_types as types
10 from mal_types import (MalSym, MalInt, MalStr, _keywordu,
11 _list, _listl, _vectorl, _hash_mapl)
12
13 class Blank(Exception): pass
14
15 class Reader():
16 def __init__(self, tokens, position=0):
17 self.tokens = tokens
18 self.position = position
19
20 def next(self):
21 self.position += 1
22 return self.tokens[self.position-1]
23
24 def peek(self):
25 if len(self.tokens) > self.position:
26 return self.tokens[self.position]
27 else:
28 return None
29
30 def tokenize(str):
31 re_str = "[\s,]*(~@|[\[\]{}()'`~^@]|\"(?:[\\\\].|[^\\\\\"])*\"?|;.*|[^\s\[\]{}()'\"`@,;]+)"
32 if IS_RPYTHON:
33 tok_re = re_str
34 else:
35 tok_re = re.compile(re_str)
36 return [t for t in re.findall(tok_re, str) if t[0] != ';']
37
38 def read_atom(reader):
39 if IS_RPYTHON:
40 int_re = '-?[0-9]+$'
41 float_re = '-?[0-9][0-9.]*$'
42 str_re = '"(?:[\\\\].|[^\\\\"])*"'
43 else:
44 int_re = re.compile('-?[0-9]+$')
45 float_re = re.compile('-?[0-9][0-9.]*$')
46 str_re = re.compile('"(?:[\\\\].|[^\\\\"])*"')
47 token = reader.next()
48 if re.match(int_re, token): return MalInt(int(token))
49 ## elif re.match(float_re, token): return int(token)
50 elif re.match(str_re, token):
51 end = len(token)-1
52 if end <= 1:
53 return MalStr(u"")
54 else:
55 s = unicode(token[1:end])
56 s = types._replace(u'\\\\', u"\u029e", s)
57 s = types._replace(u'\\"', u'"', s)
58 s = types._replace(u'\\n', u"\n", s)
59 s = types._replace(u"\u029e", u"\\", s)
60 return MalStr(s)
61 elif token[0] == '"':
62 types.throw_str("expected '\"', got EOF")
63 elif token[0] == ':': return _keywordu(unicode(token[1:]))
64 elif token == "nil": return types.nil
65 elif token == "true": return types.true
66 elif token == "false": return types.false
67 else: return MalSym(unicode(token))
68
69 def read_sequence(reader, start='(', end=')'):
70 ast = []
71 token = reader.next()
72 if token != start: types.throw_str("expected '" + start + "'")
73
74 token = reader.peek()
75 while token != end:
76 if not token: types.throw_str("expected '" + end + "', got EOF")
77 ast.append(read_form(reader))
78 token = reader.peek()
79 reader.next()
80 return ast
81
82 def read_list(reader):
83 lst = read_sequence(reader, '(', ')')
84 return _listl(lst)
85
86 def read_vector(reader):
87 lst = read_sequence(reader, '[', ']')
88 return _vectorl(lst)
89
90 def read_hash_map(reader):
91 lst = read_sequence(reader, '{', '}')
92 return _hash_mapl(lst)
93
94 def read_form(reader):
95 token = reader.peek()
96 # reader macros/transforms
97 if token[0] == ';':
98 reader.next()
99 return None
100 elif token == '\'':
101 reader.next()
102 return _list(MalSym(u'quote'), read_form(reader))
103 elif token == '`':
104 reader.next()
105 return _list(MalSym(u'quasiquote'), read_form(reader))
106 elif token == '~':
107 reader.next()
108 return _list(MalSym(u'unquote'), read_form(reader))
109 elif token == '~@':
110 reader.next()
111 return _list(MalSym(u'splice-unquote'), read_form(reader))
112 elif token == '^':
113 reader.next()
114 meta = read_form(reader)
115 return _list(MalSym(u'with-meta'), read_form(reader), meta)
116 elif token == '@':
117 reader.next()
118 return _list(MalSym(u'deref'), read_form(reader))
119
120 # list
121 elif token == ')': types.throw_str("unexpected ')'")
122 elif token == '(': return read_list(reader)
123
124 # vector
125 elif token == ']': types.throw_str("unexpected ']'");
126 elif token == '[': return read_vector(reader);
127
128 # hash-map
129 elif token == '}': types.throw_str("unexpected '}'");
130 elif token == '{': return read_hash_map(reader);
131
132 # atom
133 else: return read_atom(reader);
134
135 def read_str(str):
136 tokens = tokenize(str)
137 if len(tokens) == 0: raise Blank("Blank Line")
138 return read_form(Reader(tokens))