RPython: step7
[jackhill/mal.git] / rpython / reader.py
1 import sys
2 IS_RPYTHON = sys.argv[0].endswith('rpython')
3
4 if IS_RPYTHON:
5 from rpython.rlib.rsre import rsre_re as re
6 else:
7 import re
8
9 import mal_types as types
10 from mal_types import (MalSym, MalInt, MalStr, _keywordu, _list)
11
12 class Blank(Exception): pass
13
14 class Reader():
15 def __init__(self, tokens, position=0):
16 self.tokens = tokens
17 self.position = position
18
19 def next(self):
20 self.position += 1
21 return self.tokens[self.position-1]
22
23 def peek(self):
24 if len(self.tokens) > self.position:
25 return self.tokens[self.position]
26 else:
27 return None
28
29 def tokenize(str):
30 re_str = "[\s,]*(~@|[\[\]{}()'`~^@]|\"(?:[\\\\].|[^\\\\\"])*\"|;.*|[^\s\[\]{}()'\"`@,;]+)"
31 if IS_RPYTHON:
32 tok_re = re_str
33 else:
34 tok_re = re.compile(re_str)
35 return [t for t in re.findall(tok_re, str) if t[0] != ';']
36
37 def read_atom(reader):
38 if IS_RPYTHON:
39 int_re = '-?[0-9]+$'
40 float_re = '-?[0-9][0-9.]*$'
41 else:
42 int_re = re.compile('-?[0-9]+$')
43 float_re = re.compile('-?[0-9][0-9.]*$')
44 token = reader.next()
45 if re.match(int_re, token): return MalInt(int(token))
46 ## elif re.match(float_re, token): return int(token)
47 elif token[0] == '"':
48 end = len(token)-1
49 if end < 2:
50 return MalStr(u"")
51 else:
52 return MalStr(types._replace(u'\\"', u'"', unicode(token[1:end])))
53 elif token[0] == ':': return _keywordu(unicode(token[1:]))
54 elif token == "nil": return types.nil
55 elif token == "true": return types.true
56 elif token == "false": return types.false
57 else: return MalSym(unicode(token))
58
59 def read_sequence(reader, typ, start='(', end=')'):
60 ast = typ()
61 token = reader.next()
62 if token != start: raise Exception("expected '" + start + "'")
63
64 token = reader.peek()
65 while token != end:
66 if not token: raise Exception("expected '" + end + "', got EOF")
67 ast.append(read_form(reader))
68 token = reader.peek()
69 reader.next()
70 return ast
71
72 ##def read_hash_map(reader):
73 ## lst = read_sequence(reader, list, '{', '}')
74 ## return _hash_map(*lst)
75
76 def read_list(reader):
77 return read_sequence(reader, _list, '(', ')')
78
79 ##def read_vector(reader):
80 ## return read_sequence(reader, _vector, '[', ']')
81
82 def read_form(reader):
83 token = reader.peek()
84 # reader macros/transforms
85 if token[0] == ';':
86 reader.next()
87 return None
88 elif token == '\'':
89 reader.next()
90 return _list(MalSym(u'quote'), read_form(reader))
91 elif token == '`':
92 reader.next()
93 return _list(MalSym(u'quasiquote'), read_form(reader))
94 elif token == '~':
95 reader.next()
96 return _list(MalSym(u'unquote'), read_form(reader))
97 elif token == '~@':
98 reader.next()
99 return _list(MalSym(u'splice-unquote'), read_form(reader))
100 elif token == '^':
101 reader.next()
102 meta = read_form(reader)
103 return _list(MalSym(u'with-meta'), read_form(reader), meta)
104 elif token == '@':
105 reader.next()
106 return _list(MalSym(u'deref'), read_form(reader))
107
108 # list
109 elif token == ')': raise Exception("unexpected ')'")
110 elif token == '(': return read_list(reader)
111
112 ## # vector
113 ## elif token == ']': raise Exception("unexpected ']'");
114 ## elif token == '[': return read_vector(reader);
115 ##
116 ## # hash-map
117 ## elif token == '}': raise Exception("unexpected '}'");
118 ## elif token == '{': return read_hash_map(reader);
119
120 # atom
121 else: return read_atom(reader);
122
123 def read_str(str):
124 tokens = tokenize(str)
125 if len(tokens) == 0: raise Blank("Blank Line")
126 return read_form(Reader(tokens))