Merge pull request #406 from chr15m/lib-alias-hacks
[jackhill/mal.git] / swift / reader.swift
CommitLineData
2539e6af
KR
1//******************************************************************************
2// MAL - reader
3//******************************************************************************
4
5import Foundation
6
425305df
KR
7private let kSymbolWithMeta = make_symbol("with-meta")
8private let kSymbolDeref = make_symbol("deref")
2539e6af 9
425305df 10private let token_pattern =
2539e6af
KR
11 "[[:space:],]*" + // Skip whitespace: a sequence of zero or more commas or [:space:]'s
12 "(" +
13 "~@" + // Literal "~@"
14 "|" +
15 "[\\[\\]{}()`'~^@]" + // Punctuation: Any one of []{}()`'~^@
16 "|" +
4aa0ebdf 17 "\"(?:\\\\.|[^\\\\\"])*\"?" + // Quoted string: characters other than \ or ", or any escaped characters
2539e6af
KR
18 "|" +
19 ";.*" + // Comment: semicolon followed by anything
20 "|" +
21 "[^[:space:]\\[\\]{}()`'\",;]*" + // Symbol, keyword, number, nil, true, false: any sequence of chars but [:space:] or []{}()`'",;
22 ")"
23
425305df 24private let atom_pattern =
2539e6af
KR
25 "(^;.*$)" + // Comment
26 "|" +
27 "(^-?[0-9]+$)" + // Integer
28 "|" +
29 "(^-?[0-9][0-9.]*$)" + // Float
30 "|" +
31 "(^nil$)" + // nil
32 "|" +
33 "(^true$)" + // true
34 "|" +
35 "(^false$)" + // false
36 "|" +
37 "(^\".*\"$)" + // String
38 "|" +
4aa0ebdf
JM
39 "(^\".*$)" + // Invalid/unclosed string
40 "|" +
2539e6af
KR
41 "(:.*)" + // Keyword
42 "|" +
43 "(^[^\"]*$)" // Symbol
44
425305df
KR
45private var token_regex: NSRegularExpression = try! NSRegularExpression(pattern: token_pattern, options: NSRegularExpressionOptions())
46private var atom_regex: NSRegularExpression = try! NSRegularExpression(pattern: atom_pattern, options: NSRegularExpressionOptions())
2539e6af 47
425305df 48private final class Reader {
2539e6af
KR
49
50 init(_ tokens: [String]) {
51 self.tokens = tokens
52 self.index = 0
53 }
54
55 func next() -> String? {
56 let token = peek()
57 increment()
58 return token
59 }
60
61 func peek() -> String? {
62 if index < tokens.count {
63 return tokens[index]
64 }
65 return nil
66 }
67
68 private func increment() {
69 ++index
70 }
71
72 private let tokens: [String]
73 private var index: Int
74}
75
425305df 76private func tokenizer(s: String) -> [String] {
2539e6af 77 var tokens = [String]()
425305df
KR
78 let range = NSMakeRange(0, s.characters.count)
79 let matches = token_regex.matchesInString(s, options: NSMatchingOptions(), range: range)
80 for match in matches {
2539e6af
KR
81 if match.range.length > 0 {
82 let token = (s as NSString).substringWithRange(match.rangeAtIndex(1))
83 tokens.append(token)
84 }
85 }
86 return tokens
87}
88
425305df 89private func have_match(match: NSTextCheckingResult, at_index index: Int) -> Bool {
2539e6af
KR
90 return Int64(match.rangeAtIndex(index).location) < LLONG_MAX
91}
92
425305df
KR
93private func read_atom(token: String) throws -> MalVal {
94 let range = NSMakeRange(0, token.characters.count)
95 let matches = atom_regex.matchesInString(token, options: NSMatchingOptions(), range: range)
96 for match in matches {
97 if have_match(match, at_index: 1) { // Comment
98 return make_comment()
99 } else if have_match(match, at_index: 2) { // Integer
100 guard let value = NSNumberFormatter().numberFromString(token)?.longLongValue else {
101 try throw_error("invalid integer: \(token)")
2539e6af 102 }
425305df
KR
103 return make_integer(value)
104 } else if have_match(match, at_index: 3) { // Float
105 guard let value = NSNumberFormatter().numberFromString(token)?.doubleValue else {
106 try throw_error("invalid float: \(token)")
2539e6af 107 }
425305df
KR
108 return make_float(value)
109 } else if have_match(match, at_index: 4) { // nil
110 return make_nil()
111 } else if have_match(match, at_index: 5) { // true
112 return make_true()
113 } else if have_match(match, at_index: 6) { // false
114 return make_false()
115 } else if have_match(match, at_index: 7) { // String
116 return make_string(unescape(token))
4aa0ebdf
JM
117 } else if have_match(match, at_index: 8) { // Invalid/unclosed string
118 try throw_error("expected '\"', got EOF")
119 } else if have_match(match, at_index: 9) { // Keyword
425305df 120 return make_keyword(token[token.startIndex.successor() ..< token.endIndex])
4aa0ebdf 121 } else if have_match(match, at_index: 10) { // Symbol
425305df 122 return make_symbol(token)
2539e6af
KR
123 }
124 }
425305df 125 try throw_error("Unknown token=\(token)")
2539e6af
KR
126}
127
425305df 128private func read_elements(r: Reader, _ open: String, _ close: String) throws -> [MalVal] {
2539e6af
KR
129 var list = [MalVal]()
130 while let token = r.peek() {
131 if token == close {
132 r.increment() // Consume the closing paren
425305df 133 return list
2539e6af 134 } else {
425305df
KR
135 let item = try read_form(r)
136 if !is_comment(item) {
2539e6af
KR
137 list.append(item)
138 }
139 }
140 }
425305df 141 try throw_error("ran out of tokens -- possibly unbalanced ()'s")
2539e6af
KR
142}
143
425305df
KR
144private func read_list(r: Reader) throws -> MalVal {
145 return make_list(try read_elements(r, "(", ")"))
2539e6af
KR
146}
147
425305df
KR
148private func read_vector(r: Reader) throws -> MalVal {
149 return make_vector(try read_elements(r, "[", "]"))
2539e6af
KR
150}
151
425305df
KR
152private func read_hashmap(r: Reader) throws -> MalVal {
153 return make_hashmap(try read_elements(r, "{", "}"))
2539e6af
KR
154}
155
425305df
KR
156private func common_quote(r: Reader, _ symbol: String) throws -> MalVal {
157 let next = try read_form(r)
158 return make_list_from(make_symbol(symbol), next)
2539e6af
KR
159}
160
425305df 161private func read_form(r: Reader) throws -> MalVal {
2539e6af
KR
162 if let token = r.next() {
163 switch token {
164 case "(":
425305df 165 return try read_list(r)
2539e6af 166 case ")":
425305df 167 try throw_error("unexpected \")\"")
2539e6af 168 case "[":
425305df 169 return try read_vector(r)
2539e6af 170 case "]":
425305df 171 try throw_error("unexpected \"]\"")
2539e6af 172 case "{":
425305df 173 return try read_hashmap(r)
2539e6af 174 case "}":
425305df 175 try throw_error("unexpected \"}\"")
2539e6af 176 case "`":
425305df 177 return try common_quote(r, "quasiquote")
2539e6af 178 case "'":
425305df 179 return try common_quote(r, "quote")
2539e6af 180 case "~":
425305df 181 return try common_quote(r, "unquote")
2539e6af 182 case "~@":
425305df 183 return try common_quote(r, "splice-unquote")
2539e6af 184 case "^":
425305df
KR
185 let meta = try read_form(r)
186 let form = try read_form(r)
187 return make_list_from(kSymbolWithMeta, form, meta)
2539e6af 188 case "@":
425305df
KR
189 let form = try read_form(r)
190 return make_list_from(kSymbolDeref, form)
2539e6af 191 default:
425305df 192 return try read_atom(token)
2539e6af
KR
193 }
194 }
425305df 195 try throw_error("ran out of tokens -- possibly unbalanced ()'s")
2539e6af
KR
196}
197
425305df 198func read_str(s: String) throws -> MalVal {
2539e6af
KR
199 let tokens = tokenizer(s)
200 let reader = Reader(tokens)
425305df 201 let obj = try read_form(reader)
2539e6af
KR
202 return obj
203}