Commit | Line | Data |
---|---|---|
55a253cb | 1 | use std::borrow::ToOwned; |
bbeb1b87 AC |
2 | use types::MalError::{ErrString, ErrMalVal}; |
3 | use types::{MalVal, MalRet, | |
4 | _nil, _true, _false, _int, symbol, string, list, vector, hash_mapv, | |
5 | err_str, err_string, err_val}; | |
abdd56eb JM |
6 | use super::printer::unescape_str; |
7 | ||
bbeb1b87 | 8 | #[derive(Debug, Clone)] |
abdd56eb | 9 | struct Reader { |
bbeb1b87 AC |
10 | tokens: Vec<String>, |
11 | position: usize, | |
abdd56eb JM |
12 | } |
13 | ||
14 | impl Reader { | |
15 | fn next(&mut self) -> Option<String> { | |
16 | if self.position < self.tokens.len() { | |
17 | self.position += 1; | |
18 | Some(self.tokens[self.position-1].to_string()) | |
19 | } else { | |
20 | None | |
21 | } | |
22 | } | |
23 | fn peek(&self) -> Option<String> { | |
24 | if self.position < self.tokens.len() { | |
25 | Some(self.tokens[self.position].to_string()) | |
26 | } else { | |
27 | None | |
28 | } | |
29 | } | |
30 | } | |
31 | ||
bbeb1b87 | 32 | fn tokenize(str: String) -> Vec<String> { |
abdd56eb | 33 | let mut results = vec![]; |
55a253cb JM |
34 | let re = regex!(r###"[\s,]*(~@|[\[\]{}()'`~^@]|"(?:\\.|[^\\"])*"|;.*|[^\s\[\]{}('"`,;)]*)"###); |
35 | for cap in re.captures_iter(&str) { | |
36 | let group = cap.at(1).unwrap_or(""); | |
37 | if group == "" { break; } | |
38 | if group.starts_with(";") { continue; } | |
39 | results.push(group.to_owned()); | |
abdd56eb JM |
40 | } |
41 | results | |
42 | } | |
43 | ||
0ab374bc | 44 | fn read_atom(rdr : &mut Reader) -> MalRet { |
abdd56eb JM |
45 | let otoken = rdr.next(); |
46 | //println!("read_atom: {}", otoken); | |
3744d566 | 47 | if otoken.is_none() { return err_str("read_atom underflow"); } |
abdd56eb | 48 | let stoken = otoken.unwrap(); |
bbeb1b87 | 49 | let token = &stoken[..]; |
abdd56eb | 50 | if regex!(r"^-?[0-9]+$").is_match(token) { |
bbeb1b87 | 51 | let num : Option<isize> = token.parse().ok(); |
4ee7c0f2 | 52 | Ok(_int(num.unwrap())) |
abdd56eb | 53 | } else if regex!(r#"^".*"$"#).is_match(token) { |
bbeb1b87 | 54 | let new_str = &token[1..token.len()-1]; |
4ee7c0f2 | 55 | Ok(string(unescape_str(new_str))) |
b8ee29b2 | 56 | } else if regex!(r#"^:"#).is_match(token) { |
bbeb1b87 | 57 | Ok(string(format!("\u{29e}{}", &token[1..]))) |
abdd56eb | 58 | } else if token == "nil" { |
4ee7c0f2 | 59 | Ok(_nil()) |
abdd56eb | 60 | } else if token == "true" { |
4ee7c0f2 | 61 | Ok(_true()) |
abdd56eb | 62 | } else if token == "false" { |
4ee7c0f2 | 63 | Ok(_false()) |
abdd56eb | 64 | } else { |
4ee7c0f2 | 65 | Ok(symbol(token)) |
abdd56eb JM |
66 | } |
67 | } | |
68 | ||
5939404b | 69 | fn read_seq(rdr : &mut Reader, start: &str, end: &str) -> Result<Vec<MalVal>,String> { |
abdd56eb | 70 | let otoken = rdr.next(); |
5939404b JM |
71 | if otoken.is_none() { |
72 | return Err("read_atom underflow".to_string()); | |
73 | } | |
abdd56eb | 74 | let stoken = otoken.unwrap(); |
bbeb1b87 | 75 | let token = &stoken[..]; |
5939404b | 76 | if token != start { |
bbeb1b87 | 77 | return Err(format!("expected '{}'", start)) |
5939404b | 78 | } |
abdd56eb JM |
79 | |
80 | let mut ast_vec : Vec<MalVal> = vec![]; | |
81 | loop { | |
82 | let otoken = rdr.peek(); | |
5939404b | 83 | if otoken.is_none() { |
bbeb1b87 | 84 | return Err(format!("expected '{}', got EOF", end)); |
5939404b | 85 | } |
abdd56eb | 86 | let stoken = otoken.unwrap(); |
bbeb1b87 | 87 | let token = &stoken[..]; |
5939404b | 88 | if token == end { break; } |
abdd56eb JM |
89 | |
90 | match read_form(rdr) { | |
91 | Ok(mv) => ast_vec.push(mv), | |
3744d566 JM |
92 | Err(ErrString(es)) => return Err(es), |
93 | Err(ErrMalVal(_)) => return Err("read_seq exception".to_string()), | |
abdd56eb JM |
94 | } |
95 | } | |
96 | rdr.next(); | |
97 | ||
5939404b JM |
98 | Ok(ast_vec) |
99 | } | |
100 | ||
101 | fn read_list(rdr : &mut Reader) -> MalRet { | |
102 | match read_seq(rdr, "(", ")") { | |
103 | Ok(seq) => Ok(list(seq)), | |
3744d566 | 104 | Err(es) => err_string(es), |
5939404b JM |
105 | } |
106 | } | |
107 | ||
108 | fn read_vector(rdr : &mut Reader) -> MalRet { | |
109 | match read_seq(rdr, "[", "]") { | |
110 | Ok(seq) => Ok(vector(seq)), | |
3744d566 | 111 | Err(es) => err_string(es), |
5939404b JM |
112 | } |
113 | } | |
114 | ||
115 | fn read_hash_map(rdr : &mut Reader) -> MalRet { | |
116 | match read_seq(rdr, "{", "}") { | |
117 | Ok(seq) => hash_mapv(seq), | |
3744d566 | 118 | Err(es) => err_string(es), |
5939404b | 119 | } |
abdd56eb JM |
120 | } |
121 | ||
0ab374bc | 122 | fn read_form(rdr : &mut Reader) -> MalRet { |
abdd56eb JM |
123 | let otoken = rdr.peek(); |
124 | //println!("read_form: {}", otoken); | |
125 | let stoken = otoken.unwrap(); | |
bbeb1b87 | 126 | let token = &stoken[..]; |
abdd56eb | 127 | match token { |
4ee7c0f2 JM |
128 | "'" => { |
129 | let _ = rdr.next(); | |
130 | match read_form(rdr) { | |
131 | Ok(f) => Ok(list(vec![symbol("quote"), f])), | |
132 | Err(e) => Err(e), | |
133 | } | |
134 | }, | |
135 | "`" => { | |
136 | let _ = rdr.next(); | |
137 | match read_form(rdr) { | |
138 | Ok(f) => Ok(list(vec![symbol("quasiquote"), f])), | |
139 | Err(e) => Err(e), | |
140 | } | |
141 | }, | |
142 | "~" => { | |
143 | let _ = rdr.next(); | |
144 | match read_form(rdr) { | |
145 | Ok(f) => Ok(list(vec![symbol("unquote"), f])), | |
146 | Err(e) => Err(e), | |
147 | } | |
148 | }, | |
149 | "~@" => { | |
150 | let _ = rdr.next(); | |
151 | match read_form(rdr) { | |
152 | Ok(f) => Ok(list(vec![symbol("splice-unquote"), f])), | |
153 | Err(e) => Err(e), | |
154 | } | |
155 | }, | |
bd306723 JM |
156 | "^" => { |
157 | let _ = rdr.next(); | |
158 | match read_form(rdr) { | |
159 | Ok(meta) => { | |
160 | match read_form(rdr) { | |
161 | Ok(f) => Ok(list(vec![symbol("with-meta"), f, meta])), | |
162 | Err(e) => Err(e), | |
163 | } | |
164 | }, | |
165 | Err(e) => Err(e), | |
166 | } | |
167 | }, | |
06fef9b5 JM |
168 | "@" => { |
169 | let _ = rdr.next(); | |
170 | match read_form(rdr) { | |
171 | Ok(f) => Ok(list(vec![symbol("deref"), f])), | |
172 | Err(e) => Err(e), | |
173 | } | |
174 | }, | |
4ee7c0f2 | 175 | |
3744d566 | 176 | ")" => err_str("unexected ')'"), |
abdd56eb | 177 | "(" => read_list(rdr), |
5939404b | 178 | |
3744d566 | 179 | "]" => err_str("unexected ']'"), |
5939404b JM |
180 | "[" => read_vector(rdr), |
181 | ||
3744d566 | 182 | "}" => err_str("unexected '}'"), |
5939404b JM |
183 | "{" => read_hash_map(rdr), |
184 | ||
abdd56eb JM |
185 | _ => read_atom(rdr) |
186 | } | |
187 | } | |
188 | ||
0ab374bc | 189 | pub fn read_str(str :String) -> MalRet { |
abdd56eb JM |
190 | let tokens = tokenize(str); |
191 | if tokens.len() == 0 { | |
3744d566 JM |
192 | // any malval as the error slot means empty line |
193 | return err_val(_nil()) | |
abdd56eb JM |
194 | } |
195 | //println!("tokens: {}", tokens); | |
196 | let rdr = &mut Reader{tokens: tokens, position: 0}; | |
197 | read_form(rdr) | |
198 | } |