Commit | Line | Data |
---|---|---|
2539e6af KR |
1 | //****************************************************************************** |
2 | // MAL - reader | |
3 | //****************************************************************************** | |
4 | ||
5 | import Foundation | |
6 | ||
425305df KR |
7 | private let kSymbolWithMeta = make_symbol("with-meta") |
8 | private let kSymbolDeref = make_symbol("deref") | |
2539e6af | 9 | |
425305df | 10 | private let token_pattern = |
2539e6af KR |
11 | "[[:space:],]*" + // Skip whitespace: a sequence of zero or more commas or [:space:]'s |
12 | "(" + | |
13 | "~@" + // Literal "~@" | |
14 | "|" + | |
15 | "[\\[\\]{}()`'~^@]" + // Punctuation: Any one of []{}()`'~^@ | |
16 | "|" + | |
17 | "\"(?:\\\\.|[^\\\\\"])*\"" + // Quoted string: characters other than \ or ", or any escaped characters | |
18 | "|" + | |
19 | ";.*" + // Comment: semicolon followed by anything | |
20 | "|" + | |
21 | "[^[:space:]\\[\\]{}()`'\",;]*" + // Symbol, keyword, number, nil, true, false: any sequence of chars but [:space:] or []{}()`'",; | |
22 | ")" | |
23 | ||
425305df | 24 | private let atom_pattern = |
2539e6af KR |
25 | "(^;.*$)" + // Comment |
26 | "|" + | |
27 | "(^-?[0-9]+$)" + // Integer | |
28 | "|" + | |
29 | "(^-?[0-9][0-9.]*$)" + // Float | |
30 | "|" + | |
31 | "(^nil$)" + // nil | |
32 | "|" + | |
33 | "(^true$)" + // true | |
34 | "|" + | |
35 | "(^false$)" + // false | |
36 | "|" + | |
37 | "(^\".*\"$)" + // String | |
38 | "|" + | |
39 | "(:.*)" + // Keyword | |
40 | "|" + | |
41 | "(^[^\"]*$)" // Symbol | |
42 | ||
425305df KR |
43 | private var token_regex: NSRegularExpression = try! NSRegularExpression(pattern: token_pattern, options: NSRegularExpressionOptions()) |
44 | private var atom_regex: NSRegularExpression = try! NSRegularExpression(pattern: atom_pattern, options: NSRegularExpressionOptions()) | |
2539e6af | 45 | |
425305df | 46 | private final class Reader { |
2539e6af KR |
47 | |
48 | init(_ tokens: [String]) { | |
49 | self.tokens = tokens | |
50 | self.index = 0 | |
51 | } | |
52 | ||
53 | func next() -> String? { | |
54 | let token = peek() | |
55 | increment() | |
56 | return token | |
57 | } | |
58 | ||
59 | func peek() -> String? { | |
60 | if index < tokens.count { | |
61 | return tokens[index] | |
62 | } | |
63 | return nil | |
64 | } | |
65 | ||
66 | private func increment() { | |
67 | ++index | |
68 | } | |
69 | ||
70 | private let tokens: [String] | |
71 | private var index: Int | |
72 | } | |
73 | ||
425305df | 74 | private func tokenizer(s: String) -> [String] { |
2539e6af | 75 | var tokens = [String]() |
425305df KR |
76 | let range = NSMakeRange(0, s.characters.count) |
77 | let matches = token_regex.matchesInString(s, options: NSMatchingOptions(), range: range) | |
78 | for match in matches { | |
2539e6af KR |
79 | if match.range.length > 0 { |
80 | let token = (s as NSString).substringWithRange(match.rangeAtIndex(1)) | |
81 | tokens.append(token) | |
82 | } | |
83 | } | |
84 | return tokens | |
85 | } | |
86 | ||
425305df | 87 | private func have_match(match: NSTextCheckingResult, at_index index: Int) -> Bool { |
2539e6af KR |
88 | return Int64(match.rangeAtIndex(index).location) < LLONG_MAX |
89 | } | |
90 | ||
425305df KR |
91 | private func read_atom(token: String) throws -> MalVal { |
92 | let range = NSMakeRange(0, token.characters.count) | |
93 | let matches = atom_regex.matchesInString(token, options: NSMatchingOptions(), range: range) | |
94 | for match in matches { | |
95 | if have_match(match, at_index: 1) { // Comment | |
96 | return make_comment() | |
97 | } else if have_match(match, at_index: 2) { // Integer | |
98 | guard let value = NSNumberFormatter().numberFromString(token)?.longLongValue else { | |
99 | try throw_error("invalid integer: \(token)") | |
2539e6af | 100 | } |
425305df KR |
101 | return make_integer(value) |
102 | } else if have_match(match, at_index: 3) { // Float | |
103 | guard let value = NSNumberFormatter().numberFromString(token)?.doubleValue else { | |
104 | try throw_error("invalid float: \(token)") | |
2539e6af | 105 | } |
425305df KR |
106 | return make_float(value) |
107 | } else if have_match(match, at_index: 4) { // nil | |
108 | return make_nil() | |
109 | } else if have_match(match, at_index: 5) { // true | |
110 | return make_true() | |
111 | } else if have_match(match, at_index: 6) { // false | |
112 | return make_false() | |
113 | } else if have_match(match, at_index: 7) { // String | |
114 | return make_string(unescape(token)) | |
115 | } else if have_match(match, at_index: 8) { // Keyword | |
116 | return make_keyword(token[token.startIndex.successor() ..< token.endIndex]) | |
117 | } else if have_match(match, at_index: 9) { // Symbol | |
118 | return make_symbol(token) | |
2539e6af KR |
119 | } |
120 | } | |
425305df | 121 | try throw_error("Unknown token=\(token)") |
2539e6af KR |
122 | } |
123 | ||
425305df | 124 | private func read_elements(r: Reader, _ open: String, _ close: String) throws -> [MalVal] { |
2539e6af KR |
125 | var list = [MalVal]() |
126 | while let token = r.peek() { | |
127 | if token == close { | |
128 | r.increment() // Consume the closing paren | |
425305df | 129 | return list |
2539e6af | 130 | } else { |
425305df KR |
131 | let item = try read_form(r) |
132 | if !is_comment(item) { | |
2539e6af KR |
133 | list.append(item) |
134 | } | |
135 | } | |
136 | } | |
425305df | 137 | try throw_error("ran out of tokens -- possibly unbalanced ()'s") |
2539e6af KR |
138 | } |
139 | ||
425305df KR |
140 | private func read_list(r: Reader) throws -> MalVal { |
141 | return make_list(try read_elements(r, "(", ")")) | |
2539e6af KR |
142 | } |
143 | ||
425305df KR |
144 | private func read_vector(r: Reader) throws -> MalVal { |
145 | return make_vector(try read_elements(r, "[", "]")) | |
2539e6af KR |
146 | } |
147 | ||
425305df KR |
148 | private func read_hashmap(r: Reader) throws -> MalVal { |
149 | return make_hashmap(try read_elements(r, "{", "}")) | |
2539e6af KR |
150 | } |
151 | ||
425305df KR |
152 | private func common_quote(r: Reader, _ symbol: String) throws -> MalVal { |
153 | let next = try read_form(r) | |
154 | return make_list_from(make_symbol(symbol), next) | |
2539e6af KR |
155 | } |
156 | ||
425305df | 157 | private func read_form(r: Reader) throws -> MalVal { |
2539e6af KR |
158 | if let token = r.next() { |
159 | switch token { | |
160 | case "(": | |
425305df | 161 | return try read_list(r) |
2539e6af | 162 | case ")": |
425305df | 163 | try throw_error("unexpected \")\"") |
2539e6af | 164 | case "[": |
425305df | 165 | return try read_vector(r) |
2539e6af | 166 | case "]": |
425305df | 167 | try throw_error("unexpected \"]\"") |
2539e6af | 168 | case "{": |
425305df | 169 | return try read_hashmap(r) |
2539e6af | 170 | case "}": |
425305df | 171 | try throw_error("unexpected \"}\"") |
2539e6af | 172 | case "`": |
425305df | 173 | return try common_quote(r, "quasiquote") |
2539e6af | 174 | case "'": |
425305df | 175 | return try common_quote(r, "quote") |
2539e6af | 176 | case "~": |
425305df | 177 | return try common_quote(r, "unquote") |
2539e6af | 178 | case "~@": |
425305df | 179 | return try common_quote(r, "splice-unquote") |
2539e6af | 180 | case "^": |
425305df KR |
181 | let meta = try read_form(r) |
182 | let form = try read_form(r) | |
183 | return make_list_from(kSymbolWithMeta, form, meta) | |
2539e6af | 184 | case "@": |
425305df KR |
185 | let form = try read_form(r) |
186 | return make_list_from(kSymbolDeref, form) | |
2539e6af | 187 | default: |
425305df | 188 | return try read_atom(token) |
2539e6af KR |
189 | } |
190 | } | |
425305df | 191 | try throw_error("ran out of tokens -- possibly unbalanced ()'s") |
2539e6af KR |
192 | } |
193 | ||
425305df | 194 | func read_str(s: String) throws -> MalVal { |
2539e6af KR |
195 | let tokens = tokenizer(s) |
196 | let reader = Reader(tokens) | |
425305df | 197 | let obj = try read_form(reader) |
2539e6af KR |
198 | return obj |
199 | } |