DISABLE FDs (REMOVE ME).
[jackhill/mal.git] / bbc-basic / reader.bas
1 REM > reader library for mal in BBC BASIC
2
3 REM ** Reader **
4
5 REM The Reader object is implemented as an array and a mutable pointer.
6
7 DEF FNreader_peek(tokens$(), RETURN tokptr%)
8 =tokens$(tokptr%)
9
10 DEF FNreader_next(token$(), RETURN tokptr%)
11 tokptr% += 1
12 =tokens$(tokptr% - 1)
13
14 DEF FNread_str(src%)
15 LOCAL ntokens%, tokptr%, tokens$()
16 DIM tokens$(2048)
17 ntokens% = FNtokenize(src%, tokens$())
18 tokptr% = 0
19 =FNread_form(tokens$(), tokptr%)
20
21 REM ** Tokenizer **
22
23 DEF FNtokenize(src%, tokens$())
24 REM The tokenizer is implemented explicitly as a deterministic
25 REM finite automaton.
26 LOCAL p%, state%, tok$, tokptr%, c$, rc$, match$, action%
27 LOCAL DATA
28
29 state% = 1
30 tokptr% = 0
31 tok$ = ""
32 FOR p% = 1 TO FNstring_len(src%)
33 c$ = FNstring_chr(src%, p%)
34 rc$ = c$
35 REM Convert some characters to ones that are easier to put into
36 REM DATA statements. These substitutions are only used for
37 REM matching: the token still contains the original character.
38 CASE ASC(c$) OF
39 REM Fold some upper-case letters together so that we can re-use
40 REM them to represent more awkward characters.
41 WHEN 78, 81: c$ = "A"
42 REM Now convert newlines into "N"
43 WHEN 10: c$ = "N"
44 REM These are the other characters that Perl's "\s" escape matches.
45 WHEN 9, 11, 12, 13: c$ = " "
46 REM Brandy has a bug whereby it doesn't correctly parse strings
47 REM in DATA statements that begin with quotation marks, so convert
48 REM quotation marks to "Q".
49 WHEN 34: c$ = "Q"
50 ENDCASE
51 REM The state table consists of a DATA statement for each current
52 REM state, which triples representing transitions. Each triple
53 REM consists of a string of characters to match, an action, and a
54 REM next state. A matching string of "" matches any character,
55 REM and hence marks the end of a state.
56
57 REM Actions are:
58 REM 0: Add this character to the current token
59 REM 1: Emit token; start a new token with this character
60 REM 5: Emit token; skip this character
61
62 RESTORE +state%
63 REM state 1: Initial state, or inside a bare word
64 DATA " N,",5,1, "~",1,5, "[]{}()'`^@",1,3, Q,1,7, ";",5,11, "",0,1
65 REM state 3: Just seen the end of a token
66 DATA " N,",5,1, "~",1,5, "[]{}()'`^@",1,3, Q,1,7, ";",5,11, "",1,1
67 REM state 5: Just seen a "~"
68 DATA " N,",5,1, "@",0,3, "~",1,5, "[]{}()'`^@",1,3, Q,1,7, ";",5,11, "",1,1
69 REM state 7: Inside a quoted string
70 DATA "\",0,9, Q,0,3, "",0,7
71 REM state 9: After a backslash in a string
72 DATA "",0,7
73 REM state 11: Inside a comment
74 DATA N,5,3, "",5,11
75
76 REM Find a matching transition from the current state.
77 REM PRINT ;state%;"-->";
78 REPEAT
79 READ match$, action%, state%
80 REM PRINT "[";match$;"](";action%;",";state%;")";
81 UNTIL match$ = "" OR INSTR(match$, c$) > 0
82 REM PRINT ;"-->";state%
83
84 REM Execute any actions.
85 IF action% AND 1 AND tokens$(tokptr%) <> "" THEN tokptr% += 1
86 IF (action% AND 4) = 0 THEN tokens$(tokptr%) += rc$
87 NEXT p%
88 IF tokens$(tokptr%) <> "" THEN tokptr% += 1
89 =tokptr%
90
91 REM ** More Reader **
92
93 DEF FNread_form(tokens$(), RETURN tokptr%)
94 LOCAL tok$, x%
95 tok$ = FNreader_peek(tokens$(), tokptr%)
96 CASE tok$ OF
97 WHEN "" : ERROR &40E80930, "Unexpected end of input"
98 WHEN "(": =FNread_list(tokens$(), tokptr%)
99 WHEN "[": =FNread_vector(tokens$(), tokptr%)
100 WHEN "{": =FNread_hashmap(tokens$(), tokptr%)
101 WHEN ")", "]", "}": ERROR &40E80931, "Unexpected '"+tok$ +"'"
102 WHEN "'": =FNreader_macro("quote", tokens$(), tokptr%)
103 WHEN "`": =FNreader_macro("quasiquote", tokens$(), tokptr%)
104 WHEN "~": =FNreader_macro("unquote", tokens$(), tokptr%)
105 WHEN "~@":=FNreader_macro("splice-unquote", tokens$(), tokptr%)
106 WHEN "@": =FNreader_macro("deref", tokens$(), tokptr%)
107 WHEN "^": =FNread_with_meta(tokens$(), tokptr%)
108 ENDCASE
109 =FNread_atom(tokens$(), tokptr%)
110
111 DEF FNread_list(tokens$(), RETURN tokptr%)
112 LOCAL tok$
113 tok$ = FNreader_next(tokens$(), tokptr%) : REM skip over "("
114 =FNread_list_tail(tokens$(), tokptr%, ")")
115
116 DEF FNread_vector(tokens$(), RETURN tokptr%)
117 LOCAL tok$
118 tok$ = FNreader_next(tokens$(), tokptr%) : REM skip over "["
119 =FNas_vector(FNread_list_tail(tokens$(), tokptr%, "]"))
120
121 DEF FNread_list_tail(tokens$(), RETURN tokptr%, term$)
122 LOCAL tok$, car%, cdr%
123 IF FNreader_peek(tokens$(), tokptr%) = term$ THEN
124 tok$ = FNreader_next(tokens$(), tokptr%)
125 =FNempty
126 ENDIF
127 car% = FNread_form(tokens$(), tokptr%)
128 cdr% = FNread_list_tail(tokens$(), tokptr%, term$)
129 =FNalloc_pair(car%, cdr%)
130
131 DEF FNread_hashmap(tokens$(), RETURN tokptr%)
132 LOCAL tok$, map%, key%, val%
133 tok$ = FNreader_next(tokens$(), tokptr%) : REM skip over "{"
134 map% = FNempty_hashmap
135 WHILE FNreader_peek(tokens$(), tokptr%) <> "}"
136 key% = FNread_form(tokens$(), tokptr%)
137 IF NOT FNis_string(key%) ERROR &40E80932, "Hash-map key must be a string"
138 val% = FNread_form(tokens$(), tokptr%)
139 map% = FNhashmap_set(map%, FNunbox_string(key%), val%)
140 ENDWHILE
141 tok$ = FNreader_next(tokens$(), tokptr%) : REM skip over "}"
142 =map%
143
144 DEF FNreader_macro(quote$, token$(), RETURN tokptr%)
145 LOCAL tok$
146 tok$ = FNreader_next(tokens$(), tokptr%) : REM skip quoting token
147 =FNalloc_list2(FNalloc_symbol(quote$), FNread_form(tokens$(), tokptr%))
148
149 DEF FNread_with_meta(token$(), RETURN tokptr%)
150 LOCAL tok$, wm%, base%, meta%
151 tok$ = FNreader_next(tokens$(), tokptr%) : REM skip '^' token
152 wm% = FNalloc_symbol("with-meta")
153 meta% = FNread_form(tokens$(), tokptr%)
154 base% = FNread_form(tokens$(), tokptr%)
155 =FNalloc_list3(wm%, base%, meta%)
156
157 DEF FNis_token_numeric(tok$)
158 LOCAL i%, c%
159 IF LEFT$(tok$, 1) = "-" THEN tok$ = MID$(tok$, 2)
160 IF LEN(tok$) = 0 THEN =FALSE
161 FOR i% = 1 TO LEN(tok$)
162 c% = ASC(MID$(tok$, i%, 1))
163 IF c% < &30 OR c% > &39 THEN =FALSE
164 NEXT i%
165 =TRUE
166
167 DEF FNread_atom(tokens$(), RETURN tokptr%)
168 LOCAL strval$
169 strval$ = FNreader_next(tokens$(), tokptr%)
170 IF strval$ = "nil" THEN =FNnil
171 IF strval$ = "true" THEN =FNalloc_boolean(TRUE)
172 IF strval$ = "false" THEN =FNalloc_boolean(FALSE)
173 IF LEFT$(strval$, 1) = """" THEN =FNalloc_string(FNunquote_string(strval$))
174 IF LEFT$(strval$, 1) = ":" THEN =FNalloc_string(CHR$(127) + MID$(strval$, 2))
175 IF FNis_token_numeric(strval$) THEN =FNalloc_int(VAL(strval$))
176 =FNalloc_symbol(strval$)
177
178 DEF FNunquote_string(strval$)
179 LOCAL inptr%, bs%, out$, c$
180 IF RIGHT$(strval$, 1) <> """" THEN ERROR &40E80930, "Unexpected end of input"
181 inptr% = 2
182 REPEAT
183 bs% = INSTR(strval$, "\", inptr%)
184 IF bs% > 0 THEN
185 out$ += MID$(strval$, inptr%, bs% - inptr%)
186 c$ = MID$(strval$, bs% + 1, 1)
187 IF c$ = "n" THEN c$ = CHR$(10)
188 out$ += c$
189 inptr% = bs% + 2
190 ENDIF
191 UNTIL bs% = 0
192 IF inptr% = LEN(strval$) + 1 THEN ERROR &40E80930, "Unexpected end of input"
193 out$ += MID$(strval$, inptr%, LEN(strval$) - inptr%)
194 =out$
195
196 REM Local Variables:
197 REM indent-tabs-mode: nil
198 REM End: