Swift*: add seq/string?. swift: gensym/or macro fix
[jackhill/mal.git] / swift / reader.swift
CommitLineData
2539e6af
KR
1//******************************************************************************
2// MAL - reader
3//******************************************************************************
4
5import Foundation
6
425305df
KR
7private let kSymbolWithMeta = make_symbol("with-meta")
8private let kSymbolDeref = make_symbol("deref")
2539e6af 9
425305df 10private let token_pattern =
2539e6af
KR
11 "[[:space:],]*" + // Skip whitespace: a sequence of zero or more commas or [:space:]'s
12 "(" +
13 "~@" + // Literal "~@"
14 "|" +
15 "[\\[\\]{}()`'~^@]" + // Punctuation: Any one of []{}()`'~^@
16 "|" +
17 "\"(?:\\\\.|[^\\\\\"])*\"" + // Quoted string: characters other than \ or ", or any escaped characters
18 "|" +
19 ";.*" + // Comment: semicolon followed by anything
20 "|" +
21 "[^[:space:]\\[\\]{}()`'\",;]*" + // Symbol, keyword, number, nil, true, false: any sequence of chars but [:space:] or []{}()`'",;
22 ")"
23
425305df 24private let atom_pattern =
2539e6af
KR
25 "(^;.*$)" + // Comment
26 "|" +
27 "(^-?[0-9]+$)" + // Integer
28 "|" +
29 "(^-?[0-9][0-9.]*$)" + // Float
30 "|" +
31 "(^nil$)" + // nil
32 "|" +
33 "(^true$)" + // true
34 "|" +
35 "(^false$)" + // false
36 "|" +
37 "(^\".*\"$)" + // String
38 "|" +
39 "(:.*)" + // Keyword
40 "|" +
41 "(^[^\"]*$)" // Symbol
42
425305df
KR
43private var token_regex: NSRegularExpression = try! NSRegularExpression(pattern: token_pattern, options: NSRegularExpressionOptions())
44private var atom_regex: NSRegularExpression = try! NSRegularExpression(pattern: atom_pattern, options: NSRegularExpressionOptions())
2539e6af 45
425305df 46private final class Reader {
2539e6af
KR
47
48 init(_ tokens: [String]) {
49 self.tokens = tokens
50 self.index = 0
51 }
52
53 func next() -> String? {
54 let token = peek()
55 increment()
56 return token
57 }
58
59 func peek() -> String? {
60 if index < tokens.count {
61 return tokens[index]
62 }
63 return nil
64 }
65
66 private func increment() {
67 ++index
68 }
69
70 private let tokens: [String]
71 private var index: Int
72}
73
425305df 74private func tokenizer(s: String) -> [String] {
2539e6af 75 var tokens = [String]()
425305df
KR
76 let range = NSMakeRange(0, s.characters.count)
77 let matches = token_regex.matchesInString(s, options: NSMatchingOptions(), range: range)
78 for match in matches {
2539e6af
KR
79 if match.range.length > 0 {
80 let token = (s as NSString).substringWithRange(match.rangeAtIndex(1))
81 tokens.append(token)
82 }
83 }
84 return tokens
85}
86
425305df 87private func have_match(match: NSTextCheckingResult, at_index index: Int) -> Bool {
2539e6af
KR
88 return Int64(match.rangeAtIndex(index).location) < LLONG_MAX
89}
90
425305df
KR
91private func read_atom(token: String) throws -> MalVal {
92 let range = NSMakeRange(0, token.characters.count)
93 let matches = atom_regex.matchesInString(token, options: NSMatchingOptions(), range: range)
94 for match in matches {
95 if have_match(match, at_index: 1) { // Comment
96 return make_comment()
97 } else if have_match(match, at_index: 2) { // Integer
98 guard let value = NSNumberFormatter().numberFromString(token)?.longLongValue else {
99 try throw_error("invalid integer: \(token)")
2539e6af 100 }
425305df
KR
101 return make_integer(value)
102 } else if have_match(match, at_index: 3) { // Float
103 guard let value = NSNumberFormatter().numberFromString(token)?.doubleValue else {
104 try throw_error("invalid float: \(token)")
2539e6af 105 }
425305df
KR
106 return make_float(value)
107 } else if have_match(match, at_index: 4) { // nil
108 return make_nil()
109 } else if have_match(match, at_index: 5) { // true
110 return make_true()
111 } else if have_match(match, at_index: 6) { // false
112 return make_false()
113 } else if have_match(match, at_index: 7) { // String
114 return make_string(unescape(token))
115 } else if have_match(match, at_index: 8) { // Keyword
116 return make_keyword(token[token.startIndex.successor() ..< token.endIndex])
117 } else if have_match(match, at_index: 9) { // Symbol
118 return make_symbol(token)
2539e6af
KR
119 }
120 }
425305df 121 try throw_error("Unknown token=\(token)")
2539e6af
KR
122}
123
425305df 124private func read_elements(r: Reader, _ open: String, _ close: String) throws -> [MalVal] {
2539e6af
KR
125 var list = [MalVal]()
126 while let token = r.peek() {
127 if token == close {
128 r.increment() // Consume the closing paren
425305df 129 return list
2539e6af 130 } else {
425305df
KR
131 let item = try read_form(r)
132 if !is_comment(item) {
2539e6af
KR
133 list.append(item)
134 }
135 }
136 }
425305df 137 try throw_error("ran out of tokens -- possibly unbalanced ()'s")
2539e6af
KR
138}
139
425305df
KR
140private func read_list(r: Reader) throws -> MalVal {
141 return make_list(try read_elements(r, "(", ")"))
2539e6af
KR
142}
143
425305df
KR
144private func read_vector(r: Reader) throws -> MalVal {
145 return make_vector(try read_elements(r, "[", "]"))
2539e6af
KR
146}
147
425305df
KR
148private func read_hashmap(r: Reader) throws -> MalVal {
149 return make_hashmap(try read_elements(r, "{", "}"))
2539e6af
KR
150}
151
425305df
KR
152private func common_quote(r: Reader, _ symbol: String) throws -> MalVal {
153 let next = try read_form(r)
154 return make_list_from(make_symbol(symbol), next)
2539e6af
KR
155}
156
425305df 157private func read_form(r: Reader) throws -> MalVal {
2539e6af
KR
158 if let token = r.next() {
159 switch token {
160 case "(":
425305df 161 return try read_list(r)
2539e6af 162 case ")":
425305df 163 try throw_error("unexpected \")\"")
2539e6af 164 case "[":
425305df 165 return try read_vector(r)
2539e6af 166 case "]":
425305df 167 try throw_error("unexpected \"]\"")
2539e6af 168 case "{":
425305df 169 return try read_hashmap(r)
2539e6af 170 case "}":
425305df 171 try throw_error("unexpected \"}\"")
2539e6af 172 case "`":
425305df 173 return try common_quote(r, "quasiquote")
2539e6af 174 case "'":
425305df 175 return try common_quote(r, "quote")
2539e6af 176 case "~":
425305df 177 return try common_quote(r, "unquote")
2539e6af 178 case "~@":
425305df 179 return try common_quote(r, "splice-unquote")
2539e6af 180 case "^":
425305df
KR
181 let meta = try read_form(r)
182 let form = try read_form(r)
183 return make_list_from(kSymbolWithMeta, form, meta)
2539e6af 184 case "@":
425305df
KR
185 let form = try read_form(r)
186 return make_list_from(kSymbolDeref, form)
2539e6af 187 default:
425305df 188 return try read_atom(token)
2539e6af
KR
189 }
190 }
425305df 191 try throw_error("ran out of tokens -- possibly unbalanced ()'s")
2539e6af
KR
192}
193
425305df 194func read_str(s: String) throws -> MalVal {
2539e6af
KR
195 let tokens = tokenizer(s)
196 let reader = Reader(tokens)
425305df 197 let obj = try read_form(reader)
2539e6af
KR
198 return obj
199}