Merge pull request #156 from omarrayward/explain-regexp-tokenizer
[jackhill/mal.git] / ps / reader.ps
1 % requires types.ps to be included first
2
3 /token_delim (;,"` \n{}\(\)[]) def
4 /token_number (0123456789-) def
5
6 % read_number: read a single number from string/idx
7 % string idx -> read_number -> number string new_idx
8 /read_number { 5 dict begin
9 %(in read_number\n) print
10 /idx exch def
11 /str exch def
12 /start idx def
13 /cnt 0 def
14 { % loop
15 idx str length ge { exit } if % EOF, break loop
16 /ch str idx get def % current character
17 ch 48 ge ch 57 le and 45 ch eq or { %if number
18 /cnt cnt 1 add def
19 }{ % else
20 exit
21 } ifelse
22 /idx idx 1 add def % increment idx
23 } loop
24
25 str start cnt getinterval cvi % the matched number
26 str idx % return: number string new_idx
27 end } def
28
29
30 % read_symbol: read a single symbol from string/idx
31 % string idx -> read_symbol -> name string new_idx
32 /read_symbol { 5 dict begin
33 %(in read_symbol\n) print
34 /idx exch def
35 /str exch def
36 /start idx def
37 /cnt 0 def
38 { % loop
39 idx str length ge { exit } if % EOF, break loop
40 /ch str idx 1 getinterval def
41 token_delim ch search { % if token delimeter
42 pop pop pop exit
43 }{ % else not a delim
44 pop
45 /cnt cnt 1 add def
46 } ifelse
47 /idx idx 1 add def % increment idx
48 } loop
49
50 str start cnt getinterval cvn % the matched symbol
51 str idx % return: symbol string new_idx
52 end } def
53
54
55 % read_keyword: read a single keyword from string/idx
56 % string idx -> read_keyword -> name string new_idx
57 /read_keyword { 5 dict begin
58 %(in read_keyword\n) print
59 /idx exch def
60 /str exch def
61 /start idx def
62 /cnt 0 def
63 { % loop
64 idx str length ge { exit } if % EOF, break loop
65 /ch str idx 1 getinterval def
66 token_delim ch search { % if token delimeter
67 pop pop pop exit
68 }{ % else not a delim
69 pop
70 /cnt cnt 1 add def
71 } ifelse
72 /idx idx 1 add def % increment idx
73 } loop
74
75 str start cnt getinterval % the matched keyword string
76 dup 0 127 put % TODO: something like (\x029e) would be better
77 str idx % return: keyword string new_idx
78 end } def
79
80
81 % read_string: read a single string from string/idx
82 % string idx -> read_string -> new_string string new_idx
83 /read_string { 5 dict begin
84 %(in read_string\n) print
85 /idx exch 1 add def
86 /str exch def
87 /start idx def
88 /cnt 0 def
89 { % loop
90 idx str length ge { %if EOF
91 (unexpected EOF reading string) _throw
92 } if
93 /ch str idx get def % current character
94 /idx idx 1 add def
95 ch 92 eq { % if \
96 str idx get 34 eq { %if \"
97 /idx idx 1 add def
98 /cnt cnt 1 add def % 1 more below
99 } if
100 } if
101 ch 34 eq { exit } if % '"' is end of string
102 /cnt cnt 1 add def
103 } loop
104 str start cnt getinterval % the matched string
105 (\\") (") replace
106 (\\n) (\n) replace
107 (\\\\) (\\) replace
108 str idx % return: new_string string new_idx
109 end } def
110
111
112 % read_atom: read a single atom from string/idx
113 % string idx -> read_atom -> int string new_idx
114 /read_atom { 3 dict begin
115 %(in read_atom\n) print
116 /idx exch def
117 /str exch def
118 str length idx le { % ifelse
119 exit % EOF
120 }{
121 /ch str idx get def % current character
122 %ch 48 ge ch 57 le and 45 ch eq or { %if number
123 ch 48 ge ch 57 le and { %if number
124 str idx read_number
125 }{ ch 34 eq { %elseif double-quote (string)
126 str idx read_string
127 }{ ch 58 eq { %elseif colon (keyword)
128 str idx read_keyword
129 }{
130 str idx read_symbol
131 /idx exch def pop
132 dup /nil eq { %if nil
133 pop null str idx
134 }{ dup /true eq { %elseif true
135 pop true str idx
136 }{ dup /false eq { %elseif false
137 pop false str idx
138 }{ %else
139 str idx % return the original symbol/name
140 } ifelse } ifelse } ifelse
141 } ifelse } ifelse } ifelse
142 }ifelse
143
144 % return: atom string new_idx
145 end } def
146
147 % read_until: read a list from string/idx until stopchar is found
148 % string idx stopchar -> read_until -> list string new_idx
149 /read_until { 3 dict begin
150 %(in read_until\n) print
151 /stopchar exch def
152 /idx exch 1 add def
153 /str exch def
154 [
155 { % loop
156 str idx read_spaces /idx exch def pop
157 str length idx le { %if EOF
158 (unexpected EOF reading list) _throw
159 } if
160 /ch str idx get def % current character
161 ch stopchar eq { exit } if % stop at stopchar
162 str idx read_form /idx exch def pop
163 } loop
164 ]
165 str idx 1 add
166 end } def
167
168 % read_spaces: advance idx to the first non-whitespace
169 % string idx -> read_form -> string new_idx
170 /read_spaces { 3 dict begin
171 %(in read_spaces\n) print
172 /idx exch def
173 /str exch def
174 { % loop
175 str length idx le { exit } if % EOF, break loop
176 /ch str idx get def % current character
177 %(left1.1:) print str idx str length idx sub getinterval print (\n) print
178 % eliminate comments
179 ch 59 eq { %if ';'
180 { % loop
181 /idx idx 1 add def % increment idx
182 str length idx le { exit } if % EOF, break loop
183 /ch str idx get def % current character
184 %(left1.2:) print str idx str length idx sub getinterval print (\n) print
185 % if newline then we are done
186 ch 10 eq { exit } if
187 } loop
188 /idx idx 1 add def
189 str length idx le { exit } if % EOF, break loop
190 /ch str idx get def % current character
191 } if
192 % if not whitespace then exit
193 ch 32 ne ch 10 ne ch 44 ne and and { exit } if
194 /idx idx 1 add def % increment idx
195 } loop
196
197 %(left1.3:) print str idx str length idx sub getinterval print (\n) print
198 str idx % return: string new_idx
199 end } def
200
201 % read_form: read the next form from string start at idx
202 % string idx -> read_form -> ast string new_idx
203 /read_form { 3 dict begin
204 %(in read_form\n) print
205 read_spaces
206 /idx exch def
207 /str exch def
208
209 %idx str length ge { (unexpected EOF) _throw } if % EOF
210 idx str length ge { null str idx }{ %if EOF
211
212 /ch str idx get def % current character
213 %(LEFT2.1:) print str idx str length idx sub getinterval print (\n) print
214 ch 39 eq { %if '\''
215 /idx idx 1 add def
216 str idx read_form
217 3 -1 roll /quote exch 2 _list 3 1 roll
218 }{ ch 96 eq { %if '`'
219 /idx idx 1 add def
220 str idx read_form
221 3 -1 roll /quasiquote exch 2 _list 3 1 roll
222 }{ ch 126 eq { %if '~'
223 /idx idx 1 add def
224 /ch str idx get def % current character
225 ch 64 eq { %if '~@'
226 /idx idx 1 add def
227 str idx read_form
228 3 -1 roll /splice-unquote exch 2 _list 3 1 roll
229 }{ %else just '~'
230 str idx read_form
231 3 -1 roll /unquote exch 2 _list 3 1 roll
232 } ifelse
233 }{ ch 94 eq { %if '^'
234 /idx idx 1 add def
235 str idx read_form read_form % stack: meta form str idx
236 4 2 roll exch /with-meta 3 1 roll 3 _list 3 1 roll
237 }{ ch 64 eq { %if '@'
238 /idx idx 1 add def
239 str idx read_form
240 3 -1 roll /deref exch 2 _list 3 1 roll
241 }{ ch 40 eq { %if '('
242 str idx 41 read_until dup /idx exch def
243 %(LEFT2.2:) print str idx str length idx sub getinterval print (\n) print
244 3 -1 roll _list_from_array 3 1 roll
245 %(LEFT2.3:) print str idx str length idx sub getinterval print (\n) print
246 }{ ch 41 eq { %elseif ')'
247 (unexpected '\)') _throw
248 }{ ch 91 eq { %if '['
249 str idx 93 read_until dup /idx exch def
250 %(LEFT2.4:) print str idx str length idx sub getinterval print (\n) print
251 3 -1 roll _vector_from_array 3 1 roll
252 }{ ch 93 eq { %elseif ']'
253 (unexpected ']') _throw
254 }{ ch 123 eq { %elseif '{'
255 str idx 125 read_until dup /idx exch def
256 3 -1 roll _hash_map_from_array 3 1 roll
257 }{ ch 125 eq { %elseif '}'
258 (unexpected '}') _throw
259 }{ % else
260 str idx read_atom
261 } ifelse } ifelse } ifelse } ifelse } ifelse } ifelse } ifelse } ifelse } ifelse } ifelse } ifelse
262
263 } ifelse % not EOF
264
265 % return: ast string new_idx
266 end } def
267
268 % string -> read_str -> ast
269 /read_str {
270 %(in read_str\n) print
271 0 % current index into the string
272 read_form
273
274 pop pop % drop the string, idx. return: ast
275 } def