objc: Detect more unterminated strings.
[jackhill/mal.git] / objc / reader.m
CommitLineData
57350ed7
JM
1#import <Foundation/Foundation.h>
2
3#import "types.h"
4
5// Only used here, so define interface locally
6@interface Reader : NSObject
7
8- (id)initWithTokens:(NSArray *)toks;
9- (id)init;
10
11- (NSString *) next;
12- (NSString *) peek;
13
14@end
15
16
17@implementation Reader
18
19NSArray *_tokens;
20int _position;
21
22- (id)initWithTokens:(NSArray *)toks {
23 self = [super init];
24 if (self) {
25 _tokens = toks;
26 _position = 0;
27 }
28 return self;
29}
30
31- (id)init {
32 return [self initWithTokens:@[]];
33}
34
35- (NSString *)next {
36 _position++;
37 return _tokens[_position-1];
38}
39
40- (NSString *)peek {
41 if ([_tokens count] > _position) {
42 return _tokens[_position];
43 } else {
44 return nil;
45 }
46}
47
48@end
49
50
51NSArray * tokenize(NSString *str) {
52 NSRegularExpression *regex = [NSRegularExpression
53 regularExpressionWithPattern:@"[\\s,]*(~@|[\\[\\]{}()'`~^@]|\"(?:[\\\\].|[^\\\\\"])*\"?|;.*|[^\\s\\[\\]{}()'\"`@,;]+)"
54 options:0
55 error:NULL];
56
57 NSArray *matches = [regex
58 matchesInString:str
59 options:0
60 range:NSMakeRange(0, [str length])];
61
62 NSMutableArray * tokens = [NSMutableArray array];
63 for (NSTextCheckingResult *match in matches) {
7cae6e6f
JM
64 NSString * mstr = [str substringWithRange:[match rangeAtIndex:1]];
65 if ([mstr characterAtIndex:0] == ';') { continue; }
66 [tokens addObject:mstr];
57350ed7
JM
67 }
68 return tokens;
69}
70
71NSObject * read_atom(Reader * rdr) {
72 NSRegularExpression *regex = [NSRegularExpression
4564d4e4 73 regularExpressionWithPattern:@"(^-?[0-9]+$)|(^-?[0-9][0-9.]*$)|(^nil$)|(^true$)|(^false$)|^\"((?:[\\\\].|[^\\\\\"])*)\"$|^\"(.*)$|:(.*)|(^[^\"]*$)"
57350ed7
JM
74 options:0
75 error:NULL];
76 NSNumberFormatter *numf = [[NSNumberFormatter alloc] init];
77 numf.numberStyle = NSNumberFormatterDecimalStyle;
78
79 NSString *token = [rdr next];
80
81 NSArray *matches = [regex
82 matchesInString:token
83 options:0
84 range:NSMakeRange(0, [token length])];
85
86 if ([matches count] > 0) {
2faae94c 87
57350ed7 88 NSTextCheckingResult *match = matches[0];
2faae94c 89 if ([match rangeAtIndex:1].location < -1ULL/2) { // integer
57350ed7 90 return [numf numberFromString:token];
2faae94c 91 } else if ([match rangeAtIndex:2].location < -1ULL/2) { // float
57350ed7 92 return [numf numberFromString:token];
2faae94c 93 } else if ([match rangeAtIndex:3].location < -1ULL/2) { // nil
57350ed7 94 return [NSNull alloc];
2faae94c 95 } else if ([match rangeAtIndex:4].location < -1ULL/2) { // true
57350ed7 96 return [MalTrue alloc]; // TODO: intern
2faae94c 97 } else if ([match rangeAtIndex:5].location < -1ULL/2) { // false
57350ed7 98 return [MalFalse alloc]; // TODO: intern
2faae94c 99 } else if ([match rangeAtIndex:6].location < -1ULL/2) { // string
57350ed7 100 NSString * str = [token substringWithRange:[match rangeAtIndex:6]];
da9aef12
JM
101 return [[[[str
102 stringByReplacingOccurrencesOfString:@"\\\\" withString:@"\u029e"]
57350ed7
JM
103 stringByReplacingOccurrencesOfString:@"\\\"" withString:@"\""]
104 stringByReplacingOccurrencesOfString:@"\\n" withString:@"\n"]
da9aef12 105 stringByReplacingOccurrencesOfString:@"\u029e" withString:@"\\"];
4aa0ebdf
JM
106 } else if ([match rangeAtIndex:7].location < -1ULL/2) { // string
107 @throw @"read_atom: expected '\"', got EOF";
108 } else if ([match rangeAtIndex:8].location < -1ULL/2) { // keyword
57350ed7 109 return [NSString stringWithFormat:@"\u029e%@",
4aa0ebdf
JM
110 [token substringWithRange:[match rangeAtIndex:8]]];
111 } else if ([match rangeAtIndex:9].location < -1ULL/2) { // symbol
57350ed7
JM
112 return [MalSymbol stringWithString:token];
113 }
114 }
115
2faae94c 116 @throw @"read_atom: invalid token";
57350ed7
JM
117}
118
119// Only used locally, so declare here
120NSObject * read_form(Reader * rdr);
121
122NSArray * read_list(Reader * rdr, char start, char end) {
123 NSString * token = [rdr next];
124 NSMutableArray * ast = [NSMutableArray array];
125
126 if ([token characterAtIndex:0] != start) {
127 @throw [NSString stringWithFormat:@"expected '%c'", start];
128 }
129 while ((token = [rdr peek]) && ([token characterAtIndex:0] != end)) {
130 [ast addObject:read_form(rdr)];
131 }
132 if (!token) {
133 @throw [NSString stringWithFormat:@"expected '%c', got EOF", end];
134 }
135 [rdr next];
136 return ast;
137}
138
139NSObject * read_form(Reader * rdr) {
140 NSString *token = [rdr peek];
141 switch ([token characterAtIndex:0]) {
142 case '\'': [rdr next];
143 return @[[MalSymbol stringWithString:@"quote"],
144 read_form(rdr)];
145 case '`': [rdr next];
146 return @[[MalSymbol stringWithString:@"quasiquote"],
147 read_form(rdr)];
148 case '~': [rdr next];
149 if ([token isEqualToString:@"~@"]) {
150 return @[[MalSymbol stringWithString:@"splice-unquote"],
151 read_form(rdr)];
152 } else {
153 return @[[MalSymbol stringWithString:@"unquote"],
154 read_form(rdr)];
155 }
156 case '^': [rdr next];
157 NSObject * meta = read_form(rdr);
158 return @[[MalSymbol stringWithString:@"with-meta"],
159 read_form(rdr),
160 meta];
161 case '@': [rdr next];
162 return @[[MalSymbol stringWithString:@"deref"],
163 read_form(rdr)];
164
165 // lists
166 case ')':
167 @throw @"unexpected ')'";
168 case '(':
169 return read_list(rdr, '(', ')');
170
171 // vectors
172 case ']':
173 @throw @"unexpected ']'";
174 case '[':
175 return [MalVector fromArray:read_list(rdr, '[', ']')];
176
177 // hash maps
178 case '}':
179 @throw @"unexpected '}'";
180 case '{':
181 return hash_map(read_list(rdr, '{', '}'));
182 default:
183 return read_atom(rdr);
184 }
185}
186
187NSObject * read_str(NSString *str) {
188 NSArray * tokens = tokenize(str);
7cae6e6f
JM
189 if ([tokens count] == 0) { @throw [NSException exceptionWithName:@"ReaderContinue"
190 reason:@"empty token"
191 userInfo:nil]; }
192 //if ([tokens count] == 0) { @throw [[MalContinue alloc] init]; }
57350ed7
JM
193 return read_form([[Reader alloc] initWithTokens:tokens]);
194}