6a52dd269190192ff0c55efbf24f17d0280827a0
[jackhill/mal.git] / rust / src / reader.rs
1 //#![feature(phase)]
2 //#[phase(plugin)]
3 //extern crate regex_macros;
4 //extern crate regex;
5
6 extern crate pcre;
7
8 use types::{MalVal,MalRet,
9 _nil,_true,_false,_int,symbol,string,list,vector,hash_mapv};
10 use self::pcre::Pcre;
11 use super::printer::unescape_str;
12
13 #[deriving(Show, Clone)]
14 struct Reader {
15 tokens : Vec<String>,
16 position : uint,
17 }
18
19 impl Reader {
20 fn next(&mut self) -> Option<String> {
21 if self.position < self.tokens.len() {
22 self.position += 1;
23 Some(self.tokens[self.position-1].to_string())
24 } else {
25 None
26 }
27 }
28 fn peek(&self) -> Option<String> {
29 if self.position < self.tokens.len() {
30 Some(self.tokens[self.position].to_string())
31 } else {
32 None
33 }
34 }
35 }
36
37 fn tokenize(str :String) -> Vec<String> {
38 let mut results = vec![];
39
40 let re = match Pcre::compile(r###"[\s,]*(~@|[\[\]{}()'`~^@]|"(?:\\.|[^\\"])*"|;.*|[^\s\[\]{}('"`,;)]*)"###) {
41 Err(_) => { fail!("failed to compile regex") },
42 Ok(re) => re
43 };
44
45 let mut it = re.matches(str.as_slice());
46 loop {
47 let opt_m = it.next();
48 if opt_m.is_none() { break; }
49 let m = opt_m.unwrap();
50 if m.group(1) == "" { break; }
51 if m.group(1).starts_with(";") { continue; }
52
53 results.push((*m.group(1)).to_string());
54 }
55 results
56 }
57
58 fn read_atom(rdr : &mut Reader) -> MalRet {
59 let otoken = rdr.next();
60 //println!("read_atom: {}", otoken);
61 if otoken.is_none() { return Err("read_atom underflow".to_string()); }
62 let stoken = otoken.unwrap();
63 let token = stoken.as_slice();
64 if regex!(r"^-?[0-9]+$").is_match(token) {
65 let num : Option<int> = from_str(token);
66 Ok(_int(num.unwrap()))
67 } else if regex!(r#"^".*"$"#).is_match(token) {
68 let new_str = token.slice(1,token.len()-1);
69 Ok(string(unescape_str(new_str)))
70 } else if token == "nil" {
71 Ok(_nil())
72 } else if token == "true" {
73 Ok(_true())
74 } else if token == "false" {
75 Ok(_false())
76 } else {
77 Ok(symbol(token))
78 }
79 }
80
81 fn read_seq(rdr : &mut Reader, start: &str, end: &str) -> Result<Vec<MalVal>,String> {
82 let otoken = rdr.next();
83 if otoken.is_none() {
84 return Err("read_atom underflow".to_string());
85 }
86 let stoken = otoken.unwrap();
87 let token = stoken.as_slice();
88 if token != start {
89 return Err("expected '".to_string() + start.to_string() + "'".to_string());
90 }
91
92 let mut ast_vec : Vec<MalVal> = vec![];
93 loop {
94 let otoken = rdr.peek();
95 if otoken.is_none() {
96 return Err("expected '".to_string() + end.to_string() + "', got EOF".to_string());
97 }
98 let stoken = otoken.unwrap();
99 let token = stoken.as_slice();
100 if token == end { break; }
101
102 match read_form(rdr) {
103 Ok(mv) => ast_vec.push(mv),
104 Err(e) => return Err(e),
105 }
106 }
107 rdr.next();
108
109 Ok(ast_vec)
110 }
111
112 fn read_list(rdr : &mut Reader) -> MalRet {
113 match read_seq(rdr, "(", ")") {
114 Ok(seq) => Ok(list(seq)),
115 Err(e) => Err(e),
116 }
117 }
118
119 fn read_vector(rdr : &mut Reader) -> MalRet {
120 match read_seq(rdr, "[", "]") {
121 Ok(seq) => Ok(vector(seq)),
122 Err(e) => Err(e),
123 }
124 }
125
126 fn read_hash_map(rdr : &mut Reader) -> MalRet {
127 match read_seq(rdr, "{", "}") {
128 Ok(seq) => hash_mapv(seq),
129 Err(e) => Err(e),
130 }
131 }
132
133 fn read_form(rdr : &mut Reader) -> MalRet {
134 let otoken = rdr.peek();
135 //println!("read_form: {}", otoken);
136 let stoken = otoken.unwrap();
137 let token = stoken.as_slice();
138 match token {
139 "'" => {
140 let _ = rdr.next();
141 match read_form(rdr) {
142 Ok(f) => Ok(list(vec![symbol("quote"), f])),
143 Err(e) => Err(e),
144 }
145 },
146 "`" => {
147 let _ = rdr.next();
148 match read_form(rdr) {
149 Ok(f) => Ok(list(vec![symbol("quasiquote"), f])),
150 Err(e) => Err(e),
151 }
152 },
153 "~" => {
154 let _ = rdr.next();
155 match read_form(rdr) {
156 Ok(f) => Ok(list(vec![symbol("unquote"), f])),
157 Err(e) => Err(e),
158 }
159 },
160 "~@" => {
161 let _ = rdr.next();
162 match read_form(rdr) {
163 Ok(f) => Ok(list(vec![symbol("splice-unquote"), f])),
164 Err(e) => Err(e),
165 }
166 },
167
168 ")" => Err("unexected ')'".to_string()),
169 "(" => read_list(rdr),
170
171 "]" => Err("unexected ']'".to_string()),
172 "[" => read_vector(rdr),
173
174 "}" => Err("unexected '}'".to_string()),
175 "{" => read_hash_map(rdr),
176
177 _ => read_atom(rdr)
178 }
179 }
180
181 pub fn read_str(str :String) -> MalRet {
182 let tokens = tokenize(str);
183 if tokens.len() == 0 {
184 return Err("<empty line>".to_string());
185 }
186 //println!("tokens: {}", tokens);
187 let rdr = &mut Reader{tokens: tokens, position: 0};
188 read_form(rdr)
189 }