Commit | Line | Data |
---|---|---|
2539e6af KR |
1 | //****************************************************************************** |
2 | // MAL - reader | |
3 | //****************************************************************************** | |
4 | ||
5 | import Foundation | |
6 | ||
425305df KR |
7 | private let kSymbolWithMeta = make_symbol("with-meta") |
8 | private let kSymbolDeref = make_symbol("deref") | |
2539e6af | 9 | |
425305df | 10 | private let token_pattern = |
2539e6af KR |
11 | "[[:space:],]*" + // Skip whitespace: a sequence of zero or more commas or [:space:]'s |
12 | "(" + | |
13 | "~@" + // Literal "~@" | |
14 | "|" + | |
15 | "[\\[\\]{}()`'~^@]" + // Punctuation: Any one of []{}()`'~^@ | |
16 | "|" + | |
4aa0ebdf | 17 | "\"(?:\\\\.|[^\\\\\"])*\"?" + // Quoted string: characters other than \ or ", or any escaped characters |
2539e6af KR |
18 | "|" + |
19 | ";.*" + // Comment: semicolon followed by anything | |
20 | "|" + | |
21 | "[^[:space:]\\[\\]{}()`'\",;]*" + // Symbol, keyword, number, nil, true, false: any sequence of chars but [:space:] or []{}()`'",; | |
22 | ")" | |
23 | ||
425305df | 24 | private let atom_pattern = |
2539e6af KR |
25 | "(^;.*$)" + // Comment |
26 | "|" + | |
27 | "(^-?[0-9]+$)" + // Integer | |
28 | "|" + | |
29 | "(^-?[0-9][0-9.]*$)" + // Float | |
30 | "|" + | |
31 | "(^nil$)" + // nil | |
32 | "|" + | |
33 | "(^true$)" + // true | |
34 | "|" + | |
35 | "(^false$)" + // false | |
36 | "|" + | |
37 | "(^\".*\"$)" + // String | |
38 | "|" + | |
4aa0ebdf JM |
39 | "(^\".*$)" + // Invalid/unclosed string |
40 | "|" + | |
2539e6af KR |
41 | "(:.*)" + // Keyword |
42 | "|" + | |
43 | "(^[^\"]*$)" // Symbol | |
44 | ||
425305df KR |
45 | private var token_regex: NSRegularExpression = try! NSRegularExpression(pattern: token_pattern, options: NSRegularExpressionOptions()) |
46 | private var atom_regex: NSRegularExpression = try! NSRegularExpression(pattern: atom_pattern, options: NSRegularExpressionOptions()) | |
2539e6af | 47 | |
425305df | 48 | private final class Reader { |
2539e6af KR |
49 | |
50 | init(_ tokens: [String]) { | |
51 | self.tokens = tokens | |
52 | self.index = 0 | |
53 | } | |
54 | ||
55 | func next() -> String? { | |
56 | let token = peek() | |
57 | increment() | |
58 | return token | |
59 | } | |
60 | ||
61 | func peek() -> String? { | |
62 | if index < tokens.count { | |
63 | return tokens[index] | |
64 | } | |
65 | return nil | |
66 | } | |
67 | ||
68 | private func increment() { | |
69 | ++index | |
70 | } | |
71 | ||
72 | private let tokens: [String] | |
73 | private var index: Int | |
74 | } | |
75 | ||
425305df | 76 | private func tokenizer(s: String) -> [String] { |
2539e6af | 77 | var tokens = [String]() |
425305df KR |
78 | let range = NSMakeRange(0, s.characters.count) |
79 | let matches = token_regex.matchesInString(s, options: NSMatchingOptions(), range: range) | |
80 | for match in matches { | |
2539e6af KR |
81 | if match.range.length > 0 { |
82 | let token = (s as NSString).substringWithRange(match.rangeAtIndex(1)) | |
83 | tokens.append(token) | |
84 | } | |
85 | } | |
86 | return tokens | |
87 | } | |
88 | ||
425305df | 89 | private func have_match(match: NSTextCheckingResult, at_index index: Int) -> Bool { |
2539e6af KR |
90 | return Int64(match.rangeAtIndex(index).location) < LLONG_MAX |
91 | } | |
92 | ||
425305df KR |
93 | private func read_atom(token: String) throws -> MalVal { |
94 | let range = NSMakeRange(0, token.characters.count) | |
95 | let matches = atom_regex.matchesInString(token, options: NSMatchingOptions(), range: range) | |
96 | for match in matches { | |
97 | if have_match(match, at_index: 1) { // Comment | |
98 | return make_comment() | |
99 | } else if have_match(match, at_index: 2) { // Integer | |
100 | guard let value = NSNumberFormatter().numberFromString(token)?.longLongValue else { | |
101 | try throw_error("invalid integer: \(token)") | |
2539e6af | 102 | } |
425305df KR |
103 | return make_integer(value) |
104 | } else if have_match(match, at_index: 3) { // Float | |
105 | guard let value = NSNumberFormatter().numberFromString(token)?.doubleValue else { | |
106 | try throw_error("invalid float: \(token)") | |
2539e6af | 107 | } |
425305df KR |
108 | return make_float(value) |
109 | } else if have_match(match, at_index: 4) { // nil | |
110 | return make_nil() | |
111 | } else if have_match(match, at_index: 5) { // true | |
112 | return make_true() | |
113 | } else if have_match(match, at_index: 6) { // false | |
114 | return make_false() | |
115 | } else if have_match(match, at_index: 7) { // String | |
116 | return make_string(unescape(token)) | |
4aa0ebdf JM |
117 | } else if have_match(match, at_index: 8) { // Invalid/unclosed string |
118 | try throw_error("expected '\"', got EOF") | |
119 | } else if have_match(match, at_index: 9) { // Keyword | |
425305df | 120 | return make_keyword(token[token.startIndex.successor() ..< token.endIndex]) |
4aa0ebdf | 121 | } else if have_match(match, at_index: 10) { // Symbol |
425305df | 122 | return make_symbol(token) |
2539e6af KR |
123 | } |
124 | } | |
425305df | 125 | try throw_error("Unknown token=\(token)") |
2539e6af KR |
126 | } |
127 | ||
425305df | 128 | private func read_elements(r: Reader, _ open: String, _ close: String) throws -> [MalVal] { |
2539e6af KR |
129 | var list = [MalVal]() |
130 | while let token = r.peek() { | |
131 | if token == close { | |
132 | r.increment() // Consume the closing paren | |
425305df | 133 | return list |
2539e6af | 134 | } else { |
425305df KR |
135 | let item = try read_form(r) |
136 | if !is_comment(item) { | |
2539e6af KR |
137 | list.append(item) |
138 | } | |
139 | } | |
140 | } | |
425305df | 141 | try throw_error("ran out of tokens -- possibly unbalanced ()'s") |
2539e6af KR |
142 | } |
143 | ||
425305df KR |
144 | private func read_list(r: Reader) throws -> MalVal { |
145 | return make_list(try read_elements(r, "(", ")")) | |
2539e6af KR |
146 | } |
147 | ||
425305df KR |
148 | private func read_vector(r: Reader) throws -> MalVal { |
149 | return make_vector(try read_elements(r, "[", "]")) | |
2539e6af KR |
150 | } |
151 | ||
425305df KR |
152 | private func read_hashmap(r: Reader) throws -> MalVal { |
153 | return make_hashmap(try read_elements(r, "{", "}")) | |
2539e6af KR |
154 | } |
155 | ||
425305df KR |
156 | private func common_quote(r: Reader, _ symbol: String) throws -> MalVal { |
157 | let next = try read_form(r) | |
158 | return make_list_from(make_symbol(symbol), next) | |
2539e6af KR |
159 | } |
160 | ||
425305df | 161 | private func read_form(r: Reader) throws -> MalVal { |
2539e6af KR |
162 | if let token = r.next() { |
163 | switch token { | |
164 | case "(": | |
425305df | 165 | return try read_list(r) |
2539e6af | 166 | case ")": |
425305df | 167 | try throw_error("unexpected \")\"") |
2539e6af | 168 | case "[": |
425305df | 169 | return try read_vector(r) |
2539e6af | 170 | case "]": |
425305df | 171 | try throw_error("unexpected \"]\"") |
2539e6af | 172 | case "{": |
425305df | 173 | return try read_hashmap(r) |
2539e6af | 174 | case "}": |
425305df | 175 | try throw_error("unexpected \"}\"") |
2539e6af | 176 | case "`": |
425305df | 177 | return try common_quote(r, "quasiquote") |
2539e6af | 178 | case "'": |
425305df | 179 | return try common_quote(r, "quote") |
2539e6af | 180 | case "~": |
425305df | 181 | return try common_quote(r, "unquote") |
2539e6af | 182 | case "~@": |
425305df | 183 | return try common_quote(r, "splice-unquote") |
2539e6af | 184 | case "^": |
425305df KR |
185 | let meta = try read_form(r) |
186 | let form = try read_form(r) | |
187 | return make_list_from(kSymbolWithMeta, form, meta) | |
2539e6af | 188 | case "@": |
425305df KR |
189 | let form = try read_form(r) |
190 | return make_list_from(kSymbolDeref, form) | |
2539e6af | 191 | default: |
425305df | 192 | return try read_atom(token) |
2539e6af KR |
193 | } |
194 | } | |
425305df | 195 | try throw_error("ran out of tokens -- possibly unbalanced ()'s") |
2539e6af KR |
196 | } |
197 | ||
425305df | 198 | func read_str(s: String) throws -> MalVal { |
2539e6af KR |
199 | let tokens = tokenizer(s) |
200 | let reader = Reader(tokens) | |
425305df | 201 | let obj = try read_form(reader) |
2539e6af KR |
202 | return obj |
203 | } |