DISABLE FDs (REMOVE ME).
[jackhill/mal.git] / bbc-basic / reader.bas
CommitLineData
49c172c0 1REM > reader library for mal in BBC BASIC
9d27e709 2
ba8629dd
BH
3REM ** Reader **
4
5REM The Reader object is implemented as an array and a mutable pointer.
6
7DEF FNreader_peek(tokens$(), RETURN tokptr%)
8=tokens$(tokptr%)
9
10DEF FNreader_next(token$(), RETURN tokptr%)
11 tokptr% += 1
12=tokens$(tokptr% - 1)
13
0f97a00d 14DEF FNread_str(src%)
ba8629dd 15 LOCAL ntokens%, tokptr%, tokens$()
6cbd11b9 16 DIM tokens$(2048)
0f97a00d 17 ntokens% = FNtokenize(src%, tokens$())
ba8629dd
BH
18 tokptr% = 0
19=FNread_form(tokens$(), tokptr%)
9d27e709
BH
20
21REM ** Tokenizer **
22
0f97a00d 23DEF FNtokenize(src%, tokens$())
9d27e709
BH
24 REM The tokenizer is implemented explicitly as a deterministic
25 REM finite automaton.
0f97a00d 26 LOCAL p%, state%, tok$, tokptr%, c$, rc$, match$, action%
9d27e709
BH
27 LOCAL DATA
28
2ef194a2 29 state% = 1
9d27e709 30 tokptr% = 0
0f97a00d
BH
31 tok$ = ""
32 FOR p% = 1 TO FNstring_len(src%)
33 c$ = FNstring_chr(src%, p%)
34 rc$ = c$
9d27e709
BH
35 REM Convert some characters to ones that are easier to put into
36 REM DATA statements. These substitutions are only used for
37 REM matching: the token still contains the original character.
38 CASE ASC(c$) OF
64566e19
BH
39 REM Fold some upper-case letters together so that we can re-use
40 REM them to represent more awkward characters.
41 WHEN 78, 81: c$ = "A"
42 REM Now convert newlines into "N"
43 WHEN 10: c$ = "N"
44 REM These are the other characters that Perl's "\s" escape matches.
45 WHEN 9, 11, 12, 13: c$ = " "
46 REM Brandy has a bug whereby it doesn't correctly parse strings
47 REM in DATA statements that begin with quotation marks, so convert
48 REM quotation marks to "Q".
9d27e709
BH
49 WHEN 34: c$ = "Q"
50 ENDCASE
51 REM The state table consists of a DATA statement for each current
52 REM state, which triples representing transitions. Each triple
53 REM consists of a string of characters to match, an action, and a
54 REM next state. A matching string of "" matches any character,
55 REM and hence marks the end of a state.
56
2ef194a2
BH
57 REM Actions are:
58 REM 0: Add this character to the current token
59 REM 1: Emit token; start a new token with this character
60 REM 5: Emit token; skip this character
9d27e709 61
9d27e709 62 RESTORE +state%
2ef194a2 63 REM state 1: Initial state, or inside a bare word
64566e19 64 DATA " N,",5,1, "~",1,5, "[]{}()'`^@",1,3, Q,1,7, ";",5,11, "",0,1
2ef194a2 65 REM state 3: Just seen the end of a token
64566e19 66 DATA " N,",5,1, "~",1,5, "[]{}()'`^@",1,3, Q,1,7, ";",5,11, "",1,1
2ef194a2 67 REM state 5: Just seen a "~"
64566e19 68 DATA " N,",5,1, "@",0,3, "~",1,5, "[]{}()'`^@",1,3, Q,1,7, ";",5,11, "",1,1
2ef194a2 69 REM state 7: Inside a quoted string
35848fbb 70 DATA "\",0,9, Q,0,3, "",0,7
2ef194a2
BH
71 REM state 9: After a backslash in a string
72 DATA "",0,7
73 REM state 11: Inside a comment
64566e19 74 DATA N,5,3, "",5,11
9d27e709
BH
75
76 REM Find a matching transition from the current state.
2ef194a2 77 REM PRINT ;state%;"-->";
9d27e709
BH
78 REPEAT
79 READ match$, action%, state%
2ef194a2 80 REM PRINT "[";match$;"](";action%;",";state%;")";
9d27e709 81 UNTIL match$ = "" OR INSTR(match$, c$) > 0
2ef194a2 82 REM PRINT ;"-->";state%
9d27e709
BH
83
84 REM Execute any actions.
0f97a00d
BH
85 IF action% AND 1 AND tokens$(tokptr%) <> "" THEN tokptr% += 1
86 IF (action% AND 4) = 0 THEN tokens$(tokptr%) += rc$
9d27e709 87 NEXT p%
0f97a00d 88 IF tokens$(tokptr%) <> "" THEN tokptr% += 1
9d27e709 89=tokptr%
ca23e632
BH
90
91REM ** More Reader **
92
93DEF FNread_form(tokens$(), RETURN tokptr%)
0f97a00d 94 LOCAL tok$, x%
ca23e632 95 tok$ = FNreader_peek(tokens$(), tokptr%)
5cdb7670
BH
96 CASE tok$ OF
97 WHEN "" : ERROR &40E80930, "Unexpected end of input"
98 WHEN "(": =FNread_list(tokens$(), tokptr%)
07f3522f 99 WHEN "[": =FNread_vector(tokens$(), tokptr%)
b6e4898c
BH
100 WHEN "{": =FNread_hashmap(tokens$(), tokptr%)
101 WHEN ")", "]", "}": ERROR &40E80931, "Unexpected '"+tok$ +"'"
f4d5bbf1
BH
102 WHEN "'": =FNreader_macro("quote", tokens$(), tokptr%)
103 WHEN "`": =FNreader_macro("quasiquote", tokens$(), tokptr%)
104 WHEN "~": =FNreader_macro("unquote", tokens$(), tokptr%)
105 WHEN "~@":=FNreader_macro("splice-unquote", tokens$(), tokptr%)
106 WHEN "@": =FNreader_macro("deref", tokens$(), tokptr%)
f354f898 107 WHEN "^": =FNread_with_meta(tokens$(), tokptr%)
5cdb7670 108 ENDCASE
ca23e632
BH
109=FNread_atom(tokens$(), tokptr%)
110
111DEF FNread_list(tokens$(), RETURN tokptr%)
112 LOCAL tok$
113 tok$ = FNreader_next(tokens$(), tokptr%) : REM skip over "("
07f3522f 114=FNread_list_tail(tokens$(), tokptr%, ")")
ca23e632 115
07f3522f
BH
116DEF FNread_vector(tokens$(), RETURN tokptr%)
117 LOCAL tok$
118 tok$ = FNreader_next(tokens$(), tokptr%) : REM skip over "["
119=FNas_vector(FNread_list_tail(tokens$(), tokptr%, "]"))
120
121DEF FNread_list_tail(tokens$(), RETURN tokptr%, term$)
ca23e632 122 LOCAL tok$, car%, cdr%
07f3522f 123 IF FNreader_peek(tokens$(), tokptr%) = term$ THEN
ca23e632
BH
124 tok$ = FNreader_next(tokens$(), tokptr%)
125 =FNempty
126 ENDIF
127 car% = FNread_form(tokens$(), tokptr%)
07f3522f 128 cdr% = FNread_list_tail(tokens$(), tokptr%, term$)
ca23e632
BH
129=FNalloc_pair(car%, cdr%)
130
b6e4898c 131DEF FNread_hashmap(tokens$(), RETURN tokptr%)
90f6b7a2 132 LOCAL tok$, map%, key%, val%
b6e4898c 133 tok$ = FNreader_next(tokens$(), tokptr%) : REM skip over "{"
90f6b7a2
BH
134 map% = FNempty_hashmap
135 WHILE FNreader_peek(tokens$(), tokptr%) <> "}"
136 key% = FNread_form(tokens$(), tokptr%)
137 IF NOT FNis_string(key%) ERROR &40E80932, "Hash-map key must be a string"
138 val% = FNread_form(tokens$(), tokptr%)
139 map% = FNhashmap_set(map%, FNunbox_string(key%), val%)
140 ENDWHILE
141 tok$ = FNreader_next(tokens$(), tokptr%) : REM skip over "}"
142=map%
143
f4d5bbf1 144DEF FNreader_macro(quote$, token$(), RETURN tokptr%)
983a0695 145 LOCAL tok$
f4d5bbf1 146 tok$ = FNreader_next(tokens$(), tokptr%) : REM skip quoting token
983a0695 147=FNalloc_list2(FNalloc_symbol(quote$), FNread_form(tokens$(), tokptr%))
f4d5bbf1 148
f354f898
BH
149DEF FNread_with_meta(token$(), RETURN tokptr%)
150 LOCAL tok$, wm%, base%, meta%
151 tok$ = FNreader_next(tokens$(), tokptr%) : REM skip '^' token
152 wm% = FNalloc_symbol("with-meta")
153 meta% = FNread_form(tokens$(), tokptr%)
154 base% = FNread_form(tokens$(), tokptr%)
155=FNalloc_list3(wm%, base%, meta%)
156
ca23e632
BH
157DEF FNis_token_numeric(tok$)
158 LOCAL i%, c%
159 IF LEFT$(tok$, 1) = "-" THEN tok$ = MID$(tok$, 2)
160 IF LEN(tok$) = 0 THEN =FALSE
161 FOR i% = 1 TO LEN(tok$)
162 c% = ASC(MID$(tok$, i%, 1))
163 IF c% < &30 OR c% > &39 THEN =FALSE
164 NEXT i%
165=TRUE
166
167DEF FNread_atom(tokens$(), RETURN tokptr%)
168 LOCAL strval$
169 strval$ = FNreader_next(tokens$(), tokptr%)
c32c31b3 170 IF strval$ = "nil" THEN =FNnil
3ab6b58b
BH
171 IF strval$ = "true" THEN =FNalloc_boolean(TRUE)
172 IF strval$ = "false" THEN =FNalloc_boolean(FALSE)
68f0184e 173 IF LEFT$(strval$, 1) = """" THEN =FNalloc_string(FNunquote_string(strval$))
bc27a6ad 174 IF LEFT$(strval$, 1) = ":" THEN =FNalloc_string(CHR$(127) + MID$(strval$, 2))
ca23e632
BH
175 IF FNis_token_numeric(strval$) THEN =FNalloc_int(VAL(strval$))
176=FNalloc_symbol(strval$)
dd3d5ad7 177
68f0184e
BH
178DEF FNunquote_string(strval$)
179 LOCAL inptr%, bs%, out$, c$
180 IF RIGHT$(strval$, 1) <> """" THEN ERROR &40E80930, "Unexpected end of input"
181 inptr% = 2
182 REPEAT
183 bs% = INSTR(strval$, "\", inptr%)
184 IF bs% > 0 THEN
185 out$ += MID$(strval$, inptr%, bs% - inptr%)
186 c$ = MID$(strval$, bs% + 1, 1)
187 IF c$ = "n" THEN c$ = CHR$(10)
188 out$ += c$
189 inptr% = bs% + 2
190 ENDIF
191 UNTIL bs% = 0
192 IF inptr% = LEN(strval$) + 1 THEN ERROR &40E80930, "Unexpected end of input"
193 out$ += MID$(strval$, inptr%, LEN(strval$) - inptr%)
194=out$
195
dd3d5ad7
BH
196REM Local Variables:
197REM indent-tabs-mode: nil
198REM End: