Commit | Line | Data |
---|---|---|
49c172c0 | 1 | REM > reader library for mal in BBC BASIC |
9d27e709 | 2 | |
ba8629dd BH |
3 | REM ** Reader ** |
4 | ||
5 | REM The Reader object is implemented as an array and a mutable pointer. | |
6 | ||
7 | DEF FNreader_peek(tokens$(), RETURN tokptr%) | |
8 | =tokens$(tokptr%) | |
9 | ||
10 | DEF FNreader_next(token$(), RETURN tokptr%) | |
11 | tokptr% += 1 | |
12 | =tokens$(tokptr% - 1) | |
13 | ||
0f97a00d | 14 | DEF FNread_str(src%) |
ba8629dd | 15 | LOCAL ntokens%, tokptr%, tokens$() |
6cbd11b9 | 16 | DIM tokens$(2048) |
0f97a00d | 17 | ntokens% = FNtokenize(src%, tokens$()) |
ba8629dd BH |
18 | tokptr% = 0 |
19 | =FNread_form(tokens$(), tokptr%) | |
9d27e709 BH |
20 | |
21 | REM ** Tokenizer ** | |
22 | ||
0f97a00d | 23 | DEF FNtokenize(src%, tokens$()) |
9d27e709 BH |
24 | REM The tokenizer is implemented explicitly as a deterministic |
25 | REM finite automaton. | |
0f97a00d | 26 | LOCAL p%, state%, tok$, tokptr%, c$, rc$, match$, action% |
9d27e709 BH |
27 | LOCAL DATA |
28 | ||
2ef194a2 | 29 | state% = 1 |
9d27e709 | 30 | tokptr% = 0 |
0f97a00d BH |
31 | tok$ = "" |
32 | FOR p% = 1 TO FNstring_len(src%) | |
33 | c$ = FNstring_chr(src%, p%) | |
34 | rc$ = c$ | |
9d27e709 BH |
35 | REM Convert some characters to ones that are easier to put into |
36 | REM DATA statements. These substitutions are only used for | |
37 | REM matching: the token still contains the original character. | |
38 | CASE ASC(c$) OF | |
64566e19 BH |
39 | REM Fold some upper-case letters together so that we can re-use |
40 | REM them to represent more awkward characters. | |
41 | WHEN 78, 81: c$ = "A" | |
42 | REM Now convert newlines into "N" | |
43 | WHEN 10: c$ = "N" | |
44 | REM These are the other characters that Perl's "\s" escape matches. | |
45 | WHEN 9, 11, 12, 13: c$ = " " | |
46 | REM Brandy has a bug whereby it doesn't correctly parse strings | |
47 | REM in DATA statements that begin with quotation marks, so convert | |
48 | REM quotation marks to "Q". | |
9d27e709 BH |
49 | WHEN 34: c$ = "Q" |
50 | ENDCASE | |
51 | REM The state table consists of a DATA statement for each current | |
52 | REM state, which triples representing transitions. Each triple | |
53 | REM consists of a string of characters to match, an action, and a | |
54 | REM next state. A matching string of "" matches any character, | |
55 | REM and hence marks the end of a state. | |
56 | ||
2ef194a2 BH |
57 | REM Actions are: |
58 | REM 0: Add this character to the current token | |
59 | REM 1: Emit token; start a new token with this character | |
60 | REM 5: Emit token; skip this character | |
9d27e709 | 61 | |
9d27e709 | 62 | RESTORE +state% |
2ef194a2 | 63 | REM state 1: Initial state, or inside a bare word |
64566e19 | 64 | DATA " N,",5,1, "~",1,5, "[]{}()'`^@",1,3, Q,1,7, ";",5,11, "",0,1 |
2ef194a2 | 65 | REM state 3: Just seen the end of a token |
64566e19 | 66 | DATA " N,",5,1, "~",1,5, "[]{}()'`^@",1,3, Q,1,7, ";",5,11, "",1,1 |
2ef194a2 | 67 | REM state 5: Just seen a "~" |
64566e19 | 68 | DATA " N,",5,1, "@",0,3, "~",1,5, "[]{}()'`^@",1,3, Q,1,7, ";",5,11, "",1,1 |
2ef194a2 | 69 | REM state 7: Inside a quoted string |
35848fbb | 70 | DATA "\",0,9, Q,0,3, "",0,7 |
2ef194a2 BH |
71 | REM state 9: After a backslash in a string |
72 | DATA "",0,7 | |
73 | REM state 11: Inside a comment | |
64566e19 | 74 | DATA N,5,3, "",5,11 |
9d27e709 BH |
75 | |
76 | REM Find a matching transition from the current state. | |
2ef194a2 | 77 | REM PRINT ;state%;"-->"; |
9d27e709 BH |
78 | REPEAT |
79 | READ match$, action%, state% | |
2ef194a2 | 80 | REM PRINT "[";match$;"](";action%;",";state%;")"; |
9d27e709 | 81 | UNTIL match$ = "" OR INSTR(match$, c$) > 0 |
2ef194a2 | 82 | REM PRINT ;"-->";state% |
9d27e709 BH |
83 | |
84 | REM Execute any actions. | |
0f97a00d BH |
85 | IF action% AND 1 AND tokens$(tokptr%) <> "" THEN tokptr% += 1 |
86 | IF (action% AND 4) = 0 THEN tokens$(tokptr%) += rc$ | |
9d27e709 | 87 | NEXT p% |
0f97a00d | 88 | IF tokens$(tokptr%) <> "" THEN tokptr% += 1 |
9d27e709 | 89 | =tokptr% |
ca23e632 BH |
90 | |
91 | REM ** More Reader ** | |
92 | ||
93 | DEF FNread_form(tokens$(), RETURN tokptr%) | |
0f97a00d | 94 | LOCAL tok$, x% |
ca23e632 | 95 | tok$ = FNreader_peek(tokens$(), tokptr%) |
5cdb7670 BH |
96 | CASE tok$ OF |
97 | WHEN "" : ERROR &40E80930, "Unexpected end of input" | |
98 | WHEN "(": =FNread_list(tokens$(), tokptr%) | |
07f3522f | 99 | WHEN "[": =FNread_vector(tokens$(), tokptr%) |
b6e4898c BH |
100 | WHEN "{": =FNread_hashmap(tokens$(), tokptr%) |
101 | WHEN ")", "]", "}": ERROR &40E80931, "Unexpected '"+tok$ +"'" | |
f4d5bbf1 BH |
102 | WHEN "'": =FNreader_macro("quote", tokens$(), tokptr%) |
103 | WHEN "`": =FNreader_macro("quasiquote", tokens$(), tokptr%) | |
104 | WHEN "~": =FNreader_macro("unquote", tokens$(), tokptr%) | |
105 | WHEN "~@":=FNreader_macro("splice-unquote", tokens$(), tokptr%) | |
106 | WHEN "@": =FNreader_macro("deref", tokens$(), tokptr%) | |
f354f898 | 107 | WHEN "^": =FNread_with_meta(tokens$(), tokptr%) |
5cdb7670 | 108 | ENDCASE |
ca23e632 BH |
109 | =FNread_atom(tokens$(), tokptr%) |
110 | ||
111 | DEF FNread_list(tokens$(), RETURN tokptr%) | |
112 | LOCAL tok$ | |
113 | tok$ = FNreader_next(tokens$(), tokptr%) : REM skip over "(" | |
07f3522f | 114 | =FNread_list_tail(tokens$(), tokptr%, ")") |
ca23e632 | 115 | |
07f3522f BH |
116 | DEF FNread_vector(tokens$(), RETURN tokptr%) |
117 | LOCAL tok$ | |
118 | tok$ = FNreader_next(tokens$(), tokptr%) : REM skip over "[" | |
119 | =FNas_vector(FNread_list_tail(tokens$(), tokptr%, "]")) | |
120 | ||
121 | DEF FNread_list_tail(tokens$(), RETURN tokptr%, term$) | |
ca23e632 | 122 | LOCAL tok$, car%, cdr% |
07f3522f | 123 | IF FNreader_peek(tokens$(), tokptr%) = term$ THEN |
ca23e632 BH |
124 | tok$ = FNreader_next(tokens$(), tokptr%) |
125 | =FNempty | |
126 | ENDIF | |
127 | car% = FNread_form(tokens$(), tokptr%) | |
07f3522f | 128 | cdr% = FNread_list_tail(tokens$(), tokptr%, term$) |
ca23e632 BH |
129 | =FNalloc_pair(car%, cdr%) |
130 | ||
b6e4898c | 131 | DEF FNread_hashmap(tokens$(), RETURN tokptr%) |
90f6b7a2 | 132 | LOCAL tok$, map%, key%, val% |
b6e4898c | 133 | tok$ = FNreader_next(tokens$(), tokptr%) : REM skip over "{" |
90f6b7a2 BH |
134 | map% = FNempty_hashmap |
135 | WHILE FNreader_peek(tokens$(), tokptr%) <> "}" | |
136 | key% = FNread_form(tokens$(), tokptr%) | |
137 | IF NOT FNis_string(key%) ERROR &40E80932, "Hash-map key must be a string" | |
138 | val% = FNread_form(tokens$(), tokptr%) | |
139 | map% = FNhashmap_set(map%, FNunbox_string(key%), val%) | |
140 | ENDWHILE | |
141 | tok$ = FNreader_next(tokens$(), tokptr%) : REM skip over "}" | |
142 | =map% | |
143 | ||
f4d5bbf1 | 144 | DEF FNreader_macro(quote$, token$(), RETURN tokptr%) |
983a0695 | 145 | LOCAL tok$ |
f4d5bbf1 | 146 | tok$ = FNreader_next(tokens$(), tokptr%) : REM skip quoting token |
983a0695 | 147 | =FNalloc_list2(FNalloc_symbol(quote$), FNread_form(tokens$(), tokptr%)) |
f4d5bbf1 | 148 | |
f354f898 BH |
149 | DEF FNread_with_meta(token$(), RETURN tokptr%) |
150 | LOCAL tok$, wm%, base%, meta% | |
151 | tok$ = FNreader_next(tokens$(), tokptr%) : REM skip '^' token | |
152 | wm% = FNalloc_symbol("with-meta") | |
153 | meta% = FNread_form(tokens$(), tokptr%) | |
154 | base% = FNread_form(tokens$(), tokptr%) | |
155 | =FNalloc_list3(wm%, base%, meta%) | |
156 | ||
ca23e632 BH |
157 | DEF FNis_token_numeric(tok$) |
158 | LOCAL i%, c% | |
159 | IF LEFT$(tok$, 1) = "-" THEN tok$ = MID$(tok$, 2) | |
160 | IF LEN(tok$) = 0 THEN =FALSE | |
161 | FOR i% = 1 TO LEN(tok$) | |
162 | c% = ASC(MID$(tok$, i%, 1)) | |
163 | IF c% < &30 OR c% > &39 THEN =FALSE | |
164 | NEXT i% | |
165 | =TRUE | |
166 | ||
167 | DEF FNread_atom(tokens$(), RETURN tokptr%) | |
168 | LOCAL strval$ | |
169 | strval$ = FNreader_next(tokens$(), tokptr%) | |
c32c31b3 | 170 | IF strval$ = "nil" THEN =FNnil |
3ab6b58b BH |
171 | IF strval$ = "true" THEN =FNalloc_boolean(TRUE) |
172 | IF strval$ = "false" THEN =FNalloc_boolean(FALSE) | |
68f0184e | 173 | IF LEFT$(strval$, 1) = """" THEN =FNalloc_string(FNunquote_string(strval$)) |
bc27a6ad | 174 | IF LEFT$(strval$, 1) = ":" THEN =FNalloc_string(CHR$(127) + MID$(strval$, 2)) |
ca23e632 BH |
175 | IF FNis_token_numeric(strval$) THEN =FNalloc_int(VAL(strval$)) |
176 | =FNalloc_symbol(strval$) | |
dd3d5ad7 | 177 | |
68f0184e BH |
178 | DEF FNunquote_string(strval$) |
179 | LOCAL inptr%, bs%, out$, c$ | |
180 | IF RIGHT$(strval$, 1) <> """" THEN ERROR &40E80930, "Unexpected end of input" | |
181 | inptr% = 2 | |
182 | REPEAT | |
183 | bs% = INSTR(strval$, "\", inptr%) | |
184 | IF bs% > 0 THEN | |
185 | out$ += MID$(strval$, inptr%, bs% - inptr%) | |
186 | c$ = MID$(strval$, bs% + 1, 1) | |
187 | IF c$ = "n" THEN c$ = CHR$(10) | |
188 | out$ += c$ | |
189 | inptr% = bs% + 2 | |
190 | ENDIF | |
191 | UNTIL bs% = 0 | |
192 | IF inptr% = LEN(strval$) + 1 THEN ERROR &40E80930, "Unexpected end of input" | |
193 | out$ += MID$(strval$, inptr%, LEN(strval$) - inptr%) | |
194 | =out$ | |
195 | ||
dd3d5ad7 BH |
196 | REM Local Variables: |
197 | REM indent-tabs-mode: nil | |
198 | REM End: |