Commit | Line | Data |
---|---|---|
4ef4b17c JM |
1 | use std::rc::Rc; |
2 | use regex::{Regex,Captures}; | |
3 | ||
4 | use types::{MalVal,MalRet,MalErr,error,hash_map}; | |
5 | use types::MalVal::{Nil,Bool,Int,Str,Sym,List,Vector}; | |
6 | use types::MalErr::ErrString; | |
7 | ||
8 | #[derive(Debug, Clone)] | |
9 | struct Reader { | |
10 | tokens: Vec<String>, | |
11 | pos: usize, | |
12 | } | |
13 | ||
14 | impl Reader { | |
15 | fn next(&mut self) -> Result<String,MalErr> { | |
16 | self.pos = self.pos + 1; | |
17 | Ok(self.tokens.get(self.pos-1) | |
18 | .ok_or(ErrString("underflow".to_string()))?.to_string()) | |
19 | } | |
20 | fn peek(&self) -> Result<String,MalErr> { | |
21 | Ok(self.tokens.get(self.pos) | |
22 | .ok_or(ErrString("underflow".to_string()))?.to_string()) | |
23 | } | |
24 | } | |
25 | ||
26 | fn tokenize(str: &str) -> Vec<String> { | |
27 | lazy_static! { | |
28 | static ref RE: Regex = Regex::new(r###"[\s,]*(~@|[\[\]{}()'`~^@]|"(?:\\.|[^\\"])*"?|;.*|[^\s\[\]{}('"`,;)]+)"###).unwrap(); | |
29 | } | |
30 | ||
31 | let mut res = vec![]; | |
32 | for cap in RE.captures_iter(str) { | |
33 | if cap[1].starts_with(";") { continue } | |
34 | res.push(String::from(&cap[1])); | |
35 | } | |
36 | res | |
37 | } | |
38 | ||
39 | fn unescape_str(s: &str) -> String { | |
40 | lazy_static! { | |
41 | static ref RE: Regex = Regex::new(r#"\\(.)"#).unwrap(); | |
42 | } | |
43 | RE.replace_all(&s, |caps: &Captures| { | |
44 | format!("{}", if &caps[1] == "n" { "\n" } else { &caps[1] }) | |
45 | }).to_string() | |
46 | } | |
47 | ||
48 | fn read_atom(rdr: &mut Reader) -> MalRet { | |
49 | lazy_static! { | |
50 | static ref INT_RE: Regex = Regex::new(r"^-?[0-9]+$").unwrap(); | |
4b66bbdf | 51 | static ref STR_RE: Regex = Regex::new(r#""(?:\\.|[^\\"])*""#).unwrap(); |
4ef4b17c JM |
52 | } |
53 | let token = rdr.next()?; | |
54 | match &token[..] { | |
55 | "nil" => Ok(Nil), | |
56 | "false" => Ok(Bool(false)), | |
57 | "true" => Ok(Bool(true)), | |
58 | _ => { | |
59 | if INT_RE.is_match(&token) { | |
60 | Ok(Int(token.parse().unwrap())) | |
4b66bbdf BH |
61 | } else if STR_RE.is_match(&token) { |
62 | Ok(Str(unescape_str(&token[1..token.len()-1]))) | |
4ef4b17c | 63 | } else if token.starts_with("\"") { |
4b66bbdf | 64 | error("expected '\"', got EOF") |
4ef4b17c JM |
65 | } else if token.starts_with(":") { |
66 | Ok(Str(format!("\u{29e}{}", &token[1..]))) | |
67 | } else { | |
68 | Ok(Sym(token.to_string())) | |
69 | } | |
70 | } | |
71 | } | |
72 | } | |
73 | ||
74 | fn read_seq(rdr: &mut Reader, end: &str) -> MalRet { | |
75 | let mut seq : Vec<MalVal> = vec![]; | |
76 | rdr.next()?; | |
77 | loop { | |
78 | let token = match rdr.peek() { | |
79 | Ok(t) => t, | |
80 | Err(_) => return error(&format!("expected '{}', got EOF", end)) | |
81 | }; | |
82 | if token == end { break } | |
83 | seq.push(read_form(rdr)?) | |
84 | } | |
85 | let _ = rdr.next(); | |
86 | match end { | |
87 | ")" => Ok(list!(seq)), | |
88 | "]" => Ok(vector!(seq)), | |
89 | "}" => hash_map(seq), | |
90 | _ => error("read_seq unknown end value"), | |
91 | } | |
92 | } | |
93 | ||
94 | fn read_form(rdr: &mut Reader) -> MalRet { | |
95 | let token = rdr.peek()?; | |
96 | match &token[..] { | |
97 | "'" => { | |
98 | let _ = rdr.next(); | |
99 | Ok(list![Sym("quote".to_string()), read_form(rdr)?]) | |
100 | }, | |
101 | "`" => { | |
102 | let _ = rdr.next(); | |
103 | Ok(list![Sym("quasiquote".to_string()), read_form(rdr)?]) | |
104 | }, | |
105 | "~" => { | |
106 | let _ = rdr.next(); | |
107 | Ok(list![Sym("unquote".to_string()), read_form(rdr)?]) | |
108 | }, | |
109 | "~@" => { | |
110 | let _ = rdr.next(); | |
111 | Ok(list![Sym("splice-unquote".to_string()), read_form(rdr)?]) | |
112 | }, | |
113 | "^" => { | |
114 | let _ = rdr.next(); | |
115 | let meta = read_form(rdr)?; | |
116 | Ok(list![Sym("with-meta".to_string()), read_form(rdr)?, meta]) | |
117 | }, | |
118 | "@" => { | |
119 | let _ = rdr.next(); | |
120 | Ok(list![Sym("deref".to_string()), read_form(rdr)?]) | |
121 | }, | |
122 | ")" => error("unexpected ')'"), | |
123 | "(" => read_seq(rdr, ")"), | |
124 | "]" => error("unexpected ']'"), | |
125 | "[" => read_seq(rdr, "]"), | |
126 | "}" => error("unexpected '}'"), | |
127 | "{" => read_seq(rdr, "}"), | |
128 | _ => read_atom(rdr), | |
129 | } | |
130 | } | |
131 | ||
132 | pub fn read_str(str: String) -> MalRet { | |
133 | let tokens = tokenize(&str); | |
134 | //println!("tokens: {:?}", tokens); | |
135 | if tokens.len() == 0 { | |
136 | return error("no input"); | |
137 | } | |
138 | read_form(&mut Reader { pos: 0, tokens: tokens }) | |
139 | } | |
140 | ||
141 | // vim: ts=2:sw=2:expandtab |