Commit | Line | Data |
---|---|---|
57350ed7 JM |
1 | #import <Foundation/Foundation.h> |
2 | ||
3 | #import "types.h" | |
4 | ||
5 | // Only used here, so define interface locally | |
6 | @interface Reader : NSObject | |
7 | ||
8 | - (id)initWithTokens:(NSArray *)toks; | |
9 | - (id)init; | |
10 | ||
11 | - (NSString *) next; | |
12 | - (NSString *) peek; | |
13 | ||
14 | @end | |
15 | ||
16 | ||
17 | @implementation Reader | |
18 | ||
19 | NSArray *_tokens; | |
20 | int _position; | |
21 | ||
22 | - (id)initWithTokens:(NSArray *)toks { | |
23 | self = [super init]; | |
24 | if (self) { | |
25 | _tokens = toks; | |
26 | _position = 0; | |
27 | } | |
28 | return self; | |
29 | } | |
30 | ||
31 | - (id)init { | |
32 | return [self initWithTokens:@[]]; | |
33 | } | |
34 | ||
35 | - (NSString *)next { | |
36 | _position++; | |
37 | return _tokens[_position-1]; | |
38 | } | |
39 | ||
40 | - (NSString *)peek { | |
41 | if ([_tokens count] > _position) { | |
42 | return _tokens[_position]; | |
43 | } else { | |
44 | return nil; | |
45 | } | |
46 | } | |
47 | ||
48 | @end | |
49 | ||
50 | ||
51 | NSArray * tokenize(NSString *str) { | |
52 | NSRegularExpression *regex = [NSRegularExpression | |
53 | regularExpressionWithPattern:@"[\\s,]*(~@|[\\[\\]{}()'`~^@]|\"(?:[\\\\].|[^\\\\\"])*\"?|;.*|[^\\s\\[\\]{}()'\"`@,;]+)" | |
54 | options:0 | |
55 | error:NULL]; | |
56 | ||
57 | NSArray *matches = [regex | |
58 | matchesInString:str | |
59 | options:0 | |
60 | range:NSMakeRange(0, [str length])]; | |
61 | ||
62 | NSMutableArray * tokens = [NSMutableArray array]; | |
63 | for (NSTextCheckingResult *match in matches) { | |
7cae6e6f JM |
64 | NSString * mstr = [str substringWithRange:[match rangeAtIndex:1]]; |
65 | if ([mstr characterAtIndex:0] == ';') { continue; } | |
66 | [tokens addObject:mstr]; | |
57350ed7 JM |
67 | } |
68 | return tokens; | |
69 | } | |
70 | ||
71 | NSObject * read_atom(Reader * rdr) { | |
72 | NSRegularExpression *regex = [NSRegularExpression | |
4564d4e4 | 73 | regularExpressionWithPattern:@"(^-?[0-9]+$)|(^-?[0-9][0-9.]*$)|(^nil$)|(^true$)|(^false$)|^\"((?:[\\\\].|[^\\\\\"])*)\"$|^\"(.*)$|:(.*)|(^[^\"]*$)" |
57350ed7 JM |
74 | options:0 |
75 | error:NULL]; | |
76 | NSNumberFormatter *numf = [[NSNumberFormatter alloc] init]; | |
77 | numf.numberStyle = NSNumberFormatterDecimalStyle; | |
78 | ||
79 | NSString *token = [rdr next]; | |
80 | ||
81 | NSArray *matches = [regex | |
82 | matchesInString:token | |
83 | options:0 | |
84 | range:NSMakeRange(0, [token length])]; | |
85 | ||
86 | if ([matches count] > 0) { | |
2faae94c | 87 | |
57350ed7 | 88 | NSTextCheckingResult *match = matches[0]; |
2faae94c | 89 | if ([match rangeAtIndex:1].location < -1ULL/2) { // integer |
57350ed7 | 90 | return [numf numberFromString:token]; |
2faae94c | 91 | } else if ([match rangeAtIndex:2].location < -1ULL/2) { // float |
57350ed7 | 92 | return [numf numberFromString:token]; |
2faae94c | 93 | } else if ([match rangeAtIndex:3].location < -1ULL/2) { // nil |
57350ed7 | 94 | return [NSNull alloc]; |
2faae94c | 95 | } else if ([match rangeAtIndex:4].location < -1ULL/2) { // true |
57350ed7 | 96 | return [MalTrue alloc]; // TODO: intern |
2faae94c | 97 | } else if ([match rangeAtIndex:5].location < -1ULL/2) { // false |
57350ed7 | 98 | return [MalFalse alloc]; // TODO: intern |
2faae94c | 99 | } else if ([match rangeAtIndex:6].location < -1ULL/2) { // string |
57350ed7 | 100 | NSString * str = [token substringWithRange:[match rangeAtIndex:6]]; |
da9aef12 JM |
101 | return [[[[str |
102 | stringByReplacingOccurrencesOfString:@"\\\\" withString:@"\u029e"] | |
57350ed7 JM |
103 | stringByReplacingOccurrencesOfString:@"\\\"" withString:@"\""] |
104 | stringByReplacingOccurrencesOfString:@"\\n" withString:@"\n"] | |
da9aef12 | 105 | stringByReplacingOccurrencesOfString:@"\u029e" withString:@"\\"]; |
4aa0ebdf JM |
106 | } else if ([match rangeAtIndex:7].location < -1ULL/2) { // string |
107 | @throw @"read_atom: expected '\"', got EOF"; | |
108 | } else if ([match rangeAtIndex:8].location < -1ULL/2) { // keyword | |
57350ed7 | 109 | return [NSString stringWithFormat:@"\u029e%@", |
4aa0ebdf JM |
110 | [token substringWithRange:[match rangeAtIndex:8]]]; |
111 | } else if ([match rangeAtIndex:9].location < -1ULL/2) { // symbol | |
57350ed7 JM |
112 | return [MalSymbol stringWithString:token]; |
113 | } | |
114 | } | |
115 | ||
2faae94c | 116 | @throw @"read_atom: invalid token"; |
57350ed7 JM |
117 | } |
118 | ||
119 | // Only used locally, so declare here | |
120 | NSObject * read_form(Reader * rdr); | |
121 | ||
122 | NSArray * read_list(Reader * rdr, char start, char end) { | |
123 | NSString * token = [rdr next]; | |
124 | NSMutableArray * ast = [NSMutableArray array]; | |
125 | ||
126 | if ([token characterAtIndex:0] != start) { | |
127 | @throw [NSString stringWithFormat:@"expected '%c'", start]; | |
128 | } | |
129 | while ((token = [rdr peek]) && ([token characterAtIndex:0] != end)) { | |
130 | [ast addObject:read_form(rdr)]; | |
131 | } | |
132 | if (!token) { | |
133 | @throw [NSString stringWithFormat:@"expected '%c', got EOF", end]; | |
134 | } | |
135 | [rdr next]; | |
136 | return ast; | |
137 | } | |
138 | ||
139 | NSObject * read_form(Reader * rdr) { | |
140 | NSString *token = [rdr peek]; | |
141 | switch ([token characterAtIndex:0]) { | |
142 | case '\'': [rdr next]; | |
143 | return @[[MalSymbol stringWithString:@"quote"], | |
144 | read_form(rdr)]; | |
145 | case '`': [rdr next]; | |
146 | return @[[MalSymbol stringWithString:@"quasiquote"], | |
147 | read_form(rdr)]; | |
148 | case '~': [rdr next]; | |
149 | if ([token isEqualToString:@"~@"]) { | |
150 | return @[[MalSymbol stringWithString:@"splice-unquote"], | |
151 | read_form(rdr)]; | |
152 | } else { | |
153 | return @[[MalSymbol stringWithString:@"unquote"], | |
154 | read_form(rdr)]; | |
155 | } | |
156 | case '^': [rdr next]; | |
157 | NSObject * meta = read_form(rdr); | |
158 | return @[[MalSymbol stringWithString:@"with-meta"], | |
159 | read_form(rdr), | |
160 | meta]; | |
161 | case '@': [rdr next]; | |
162 | return @[[MalSymbol stringWithString:@"deref"], | |
163 | read_form(rdr)]; | |
164 | ||
165 | // lists | |
166 | case ')': | |
167 | @throw @"unexpected ')'"; | |
168 | case '(': | |
169 | return read_list(rdr, '(', ')'); | |
170 | ||
171 | // vectors | |
172 | case ']': | |
173 | @throw @"unexpected ']'"; | |
174 | case '[': | |
175 | return [MalVector fromArray:read_list(rdr, '[', ']')]; | |
176 | ||
177 | // hash maps | |
178 | case '}': | |
179 | @throw @"unexpected '}'"; | |
180 | case '{': | |
181 | return hash_map(read_list(rdr, '{', '}')); | |
182 | default: | |
183 | return read_atom(rdr); | |
184 | } | |
185 | } | |
186 | ||
187 | NSObject * read_str(NSString *str) { | |
188 | NSArray * tokens = tokenize(str); | |
7cae6e6f JM |
189 | if ([tokens count] == 0) { @throw [NSException exceptionWithName:@"ReaderContinue" |
190 | reason:@"empty token" | |
191 | userInfo:nil]; } | |
192 | //if ([tokens count] == 0) { @throw [[MalContinue alloc] init]; } | |
57350ed7 JM |
193 | return read_form([[Reader alloc] initWithTokens:tokens]); |
194 | } |