make, swift3: fix parsing empty literal sequences.
[jackhill/mal.git] / basic / reader.in.bas
CommitLineData
e0bcd3fb
JM
1REM READ_TOKEN(RF=0, A$, RI) -> T$
2REM READ_TOKEN(RF=1) -> T$
11f94d2e 3READ_TOKEN:
e0bcd3fb
JM
4 IF RF=1 THEN RF=2:T$="(":RETURN
5 IF RF=2 THEN RF=3:T$="do":RETURN
9d59cdb3 6 GOSUB SKIP_SPACES
e0bcd3fb
JM
7 REM PRINT "READ_TOKEN: "+STR$(RI)+", "+MID$(A$,RI,1)
8 GOSUB READ_CHAR
9 IF C$=";" THEN GOSUB SKIP_TO_EOL:GOTO READ_TOKEN
10 T$=C$
7895453b 11 IF T$="(" OR T$=")" OR T$="[" OR T$="]" OR T$="{" OR T$="}" OR T$="'" OR T$="`" OR T$="@" THEN RETURN
e0bcd3fb
JM
12 GOSUB PEEK_CHAR: REM peek at next character
13 IF T$="~" AND C$<>"@" THEN RETURN
60ef223c 14 S1=0:S2=0: REM S1: INSTRING?, S2: ESCAPED?
bf8d1f7d 15 IF T$=CHR$(34) THEN S1=1
11f94d2e 16 READ_TOKEN_LOOP:
e0bcd3fb
JM
17 GOSUB PEEK_CHAR: REM peek at next character
18 IF C$="" THEN RETURN
11f94d2e 19 IF S1 THEN GOTO READ_TOKEN_CONT
4fab6aa5
JM
20 IF C$=" " OR C$="," OR C$=CHR$(13) OR C$=CHR$(10) THEN RETURN
21 IF C$="(" OR C$=")" OR C$="[" OR C$="]" OR C$="{" OR C$="}" THEN RETURN
11f94d2e 22 READ_TOKEN_CONT:
e0bcd3fb 23 GOSUB READ_CHAR
4fab6aa5 24 T$=T$+C$
9e8f5211 25 IF T$="~@" THEN RETURN
e0bcd3fb
JM
26 IF S1=0 OR S2=1 THEN S2=0:GOTO READ_TOKEN_LOOP
27 REM S1=1 (INSTRING?) and S2=0 (not ESCAPED?)
28 IF C$=CHR$(92) THEN S2=1
29 IF C$=CHR$(34) THEN RETURN
11f94d2e
JM
30 GOTO READ_TOKEN_LOOP
31
e0bcd3fb
JM
32
33REM READ_CHAR(A$, RI) -> C$
34READ_CHAR:
35 RJ=1:GOSUB DO_READ_CHAR
36 RETURN
37
38REM PEEK_CHAR(A$, RI) -> C$
39PEEK_CHAR:
40 RJ=0:GOSUB DO_READ_CHAR
41 RETURN
42
43REM DO_READ_CHAR(RJ, A$, RI):
44REM - RI is position in A$
45REM - RJ=1 is read, RJ=0 is peek
46DO_READ_CHAR:
47 C$=""
48 IF RF>0 THEN GOTO READ_FILE_CHAR
49 IF RI<=LEN(A$) THEN C$=MID$(A$,RI,1):RI=RI+RJ
50 RETURN
51
52REM READ_FILE_CHAR(RJ) -> C$
53REM - RJ=1 is read, RJ=0 is peek
54REM - D$ is global used for already read pending character
55REM - EZ is global used for end of file state
56READ_FILE_CHAR:
57 IF D$<>"" THEN C$=D$:IF RJ=0 THEN RETURN
58 IF D$<>"" AND RJ=1 THEN D$="":RETURN
59 D$=""
60 IF EZ>2 THEN C$=""
61 IF EZ=2 THEN C$=")"
62 IF EZ=1 THEN C$=CHR$(10)
63 IF EZ>0 THEN EZ=EZ+RJ:RETURN
64 #cbm GET#2,C$
65 #qbasic C$=INPUT$(1,2)
66 #qbasic IF EOF(2) THEN EZ=1:RETURN
67 IF RJ=0 THEN D$=C$
68 #cbm IF (ST AND 64) THEN EZ=1:RETURN
69 #cbm IF (ST AND 255) THEN EZ=1:ER=-1:E$="File read error"+STR$(ST)
70 RETURN
0e508fa5 71
11f94d2e 72SKIP_SPACES:
e0bcd3fb
JM
73 GOSUB PEEK_CHAR: REM peek at next character
74 IF C$=" " OR C$="," OR C$=CHR$(13) OR C$=CHR$(10) THEN GOSUB READ_CHAR:GOTO SKIP_SPACES
75 RETURN
11f94d2e 76
0e508fa5 77SKIP_TO_EOL:
e0bcd3fb 78 GOSUB READ_CHAR
4fab6aa5 79 IF C$="" OR C$=CHR$(13) OR C$=CHR$(10) THEN RETURN
0e508fa5
JM
80 GOTO SKIP_TO_EOL
81
11f94d2e 82
0e508fa5 83REM READ_FORM(A$, RI, RF) -> R
9d59cdb3
JM
84SUB READ_FORM
85 Q=T:GOSUB PUSH_Q: REM save current value of T
86 READ_FORM_RECUR:
87 IF ER<>-2 THEN GOTO READ_FORM_RETURN
11f94d2e 88 GOSUB READ_TOKEN
bf8d1f7d 89 REM PRINT "READ_FORM T$: ["+T$+"]"
4202ef7b 90 IF T$="" THEN R=0:GOSUB INC_REF_R:GOTO READ_FORM_RETURN
cc9dbd92
JM
91 IF T$="nil" THEN T=0:GOTO READ_NIL_BOOL
92 IF T$="false" THEN T=1:GOTO READ_NIL_BOOL
93 IF T$="true" THEN T=2:GOTO READ_NIL_BOOL
c756af81
JM
94 IF T$="'" THEN B$="quote":GOTO READ_MACRO
95 IF T$="`" THEN B$="quasiquote":GOTO READ_MACRO
96 IF T$="~" THEN B$="unquote":GOTO READ_MACRO
97 IF T$="~@" THEN B$="splice-unquote":GOTO READ_MACRO
98 IF T$="^" THEN B$="with-meta":GOTO READ_MACRO
99 IF T$="@" THEN B$="deref":GOTO READ_MACRO
4fab6aa5
JM
100 C$=MID$(T$,1,1)
101 REM PRINT "C$: ["+C$+"]("+STR$(ASC(C$))+")"
4fab6aa5
JM
102 IF C$>="0" AND C$<="9" THEN GOTO READ_NUMBER
103 IF C$="-" THEN GOTO READ_SYMBOL_MAYBE
104
105 IF C$=CHR$(34) THEN GOTO READ_STRING
106 IF C$=":" THEN GOTO READ_KEYWORD
9d59cdb3 107 REM set end character in Q and read the sequence
e0bcd3fb
JM
108 IF C$="(" THEN T=6:Q=41:GOTO READ_SEQ_START: REM ")"
109 IF C$="[" THEN T=7:Q=93:GOTO READ_SEQ_START: REM "]"
110 IF C$="{" THEN T=8:Q=125:GOTO READ_SEQ_START: REM "}"
9d59cdb3 111 IF C$=")" OR C$="]" OR C$="}" THEN R=-1:ER=-1:E$="unexpected "+C$:GOTO READ_FORM_RETURN
11f94d2e
JM
112 GOTO READ_SYMBOL
113
4b84a23b
JM
114 READ_NIL_BOOL:
115 REM PRINT "READ_NIL_BOOL"
d7a6c2d6 116 R=T*2
4202ef7b 117 GOSUB INC_REF_R
9d59cdb3 118 GOTO READ_FORM_RETURN
11f94d2e
JM
119 READ_NUMBER:
120 REM PRINT "READ_NUMBER"
a742287e 121 T=2:L=VAL(T$):GOSUB ALLOC
9d59cdb3 122 GOTO READ_FORM_RETURN
9e8f5211 123 READ_MACRO:
9d59cdb3 124 REM push macro type
93593012 125 Q=-1*(T$="^"):GOSUB PUSH_Q
9e8f5211 126
c756af81 127 REM B$ is set above
93593012 128 T=5:GOSUB STRING
9d59cdb3 129 REM push string
93593012 130 GOSUB PUSH_R
9e8f5211 131
9d59cdb3
JM
132 CALL READ_FORM
133 REM push first form
93593012 134 GOSUB PUSH_R
9d59cdb3 135 IF ER>-2 THEN GOTO READ_MACRO_DONE
bbab5c5d 136
9d59cdb3 137 GOSUB PEEK_Q_2
93593012 138 IF Q THEN GOTO READ_MACRO_3
bbab5c5d
JM
139
140 READ_MACRO_2:
93593012
JM
141 GOSUB PEEK_Q_1:B=Q
142 GOSUB PEEK_Q:A=Q
143 GOSUB LIST2
bbab5c5d
JM
144 GOTO READ_MACRO_DONE
145
146 READ_MACRO_3:
9d59cdb3 147 CALL READ_FORM
93593012
JM
148 GOSUB PEEK_Q_1:C=Q
149 B=R
150 GOSUB PEEK_Q:A=Q
151 GOSUB LIST3
c756af81 152 AY=C:GOSUB RELEASE
bbab5c5d
JM
153
154 READ_MACRO_DONE:
155 REM release values, list has ownership
c756af81
JM
156 AY=B:GOSUB RELEASE
157 AY=A:GOSUB RELEASE
bbab5c5d 158
9d59cdb3
JM
159 REM pop the stack
160 GOSUB POP_Q: REM pop first form
161 GOSUB POP_Q: REM pop string
162 GOSUB POP_Q: REM pop macro type
bbab5c5d 163 T$="": REM necessary to prevent unexpected EOF errors
9d59cdb3
JM
164 GOTO READ_FORM_RETURN
165
11f94d2e 166 READ_STRING:
85d70fb7 167 REM PRINT "READ_STRING"
037815e0 168 C=ASC(MID$(T$,LEN(T$),1))
4aa0ebdf 169 IF C<>34 THEN R=-1:ER=-1:E$="expected '"+CHR$(34)+"', got EOF":GOTO READ_FORM_RETURN
85d70fb7 170 R$=MID$(T$,2,LEN(T$)-2)
ea02f464 171 S1$=CHR$(92)+CHR$(92):S2$=CHR$(127):GOSUB REPLACE: REM protect backslashes
60ef223c 172 S1$=CHR$(92)+CHR$(34):S2$=CHR$(34):GOSUB REPLACE: REM unescape quotes
115e430d
JM
173 #cbm S1$=CHR$(92)+"n":S2$=CHR$(13):GOSUB REPLACE: REM unescape newlines
174 #qbasic S1$=CHR$(92)+"n":S2$=CHR$(10):GOSUB REPLACE: REM unescape newlines
ea02f464 175 S1$=CHR$(127):S2$=CHR$(92):GOSUB REPLACE: REM unescape backslashes
241d5d57 176 REM intern string value
c756af81 177 B$=R$:T=4:GOSUB STRING
9d59cdb3 178 GOTO READ_FORM_RETURN
a742287e
JM
179 READ_KEYWORD:
180 R$=CHR$(127)+MID$(T$,2,LEN(T$)-1)
c756af81 181 B$=R$:T=4:GOSUB STRING
9d59cdb3 182 GOTO READ_FORM_RETURN
b7b1787f 183 READ_SYMBOL_MAYBE:
4fab6aa5
JM
184 C$=MID$(T$,2,1)
185 IF C$>="0" AND C$<="9" THEN GOTO READ_NUMBER
11f94d2e
JM
186 READ_SYMBOL:
187 REM PRINT "READ_SYMBOL"
c756af81 188 B$=T$:T=5:GOSUB STRING
9d59cdb3 189 GOTO READ_FORM_RETURN
11f94d2e 190
9d59cdb3 191 READ_SEQ_START:
9d59cdb3 192 SD=SD+1
4b84a23b 193
9d59cdb3 194 GOSUB PUSH_Q: REM push return character
4b84a23b 195
e0bcd3fb 196 REM setup the stack for the loop, T has type
9d59cdb3 197 GOSUB MAP_LOOP_START
4b84a23b 198
9d59cdb3 199 READ_SEQ_LOOP:
e0bcd3fb
JM
200
201 REM TODO: reduce redundancy with READ_TOKEN
202 GOSUB SKIP_SPACES
203 GOSUB PEEK_CHAR: REM peek at next character
204 IF C$="" THEN ER=-1:E$="unexpected EOF":GOTO READ_SEQ_DONE
205 IF C$=";" THEN GOSUB SKIP_TO_EOL:GOTO READ_SEQ_LOOP
9d59cdb3 206 Q=3:GOSUB PEEK_Q_Q
e0bcd3fb 207 IF C$=CHR$(Q) THEN GOSUB READ_CHAR:GOTO READ_SEQ_DONE
9d59cdb3
JM
208
209 CALL READ_FORM
d7a6c2d6 210 M=R: REM value (or key for hash-maps)
9d59cdb3
JM
211
212 REM if error, release the unattached element
213 IF ER<>-2 THEN AY=R:GOSUB RELEASE:GOTO READ_SEQ_DONE
214
215 REM if this is a hash-map, READ_FORM again
216 IF T=8 THEN GOSUB PUSH_R:CALL READ_FORM
d7a6c2d6 217 IF T=8 THEN N=R:GOSUB POP_Q:M=Q: REM set key and value
9d59cdb3
JM
218
219 REM update the return sequence structure
220 REM release N since list takes full ownership
221 C=1:GOSUB MAP_LOOP_UPDATE
222
223 GOTO READ_SEQ_LOOP
11f94d2e 224
9d59cdb3
JM
225 READ_SEQ_DONE:
226 SD=SD-1
227 REM cleanup stack and get return value
228 GOSUB MAP_LOOP_DONE
11f94d2e 229
9d59cdb3 230 GOSUB POP_Q: REM pop end character ptr
9d59cdb3 231 GOTO READ_FORM_RETURN
11f94d2e 232
9d59cdb3 233 READ_FORM_RETURN:
9d59cdb3 234 GOSUB POP_Q:T=Q: REM restore current value of T
4b84a23b 235
9d59cdb3 236END SUB
4b84a23b
JM
237
238
cc9dbd92 239REM READ_STR(A$) -> R
11f94d2e 240READ_STR:
0e508fa5
JM
241 RI=1: REM index into A$
242 RF=0: REM not reading from file
cc9dbd92 243 SD=0: REM sequence read depth
9d59cdb3 244 CALL READ_FORM
11f94d2e 245 RETURN
0e508fa5
JM
246
247REM READ_FILE(A$) -> R
248READ_FILE:
0e508fa5 249 RF=1: REM reading from file
037815e0 250 EZ=0: REM file read state (1: EOF)
0e508fa5 251 SD=0: REM sequence read depth
e0bcd3fb 252 D$="": REM pending read/peek character
01975886 253 #cbm OPEN 2,8,0,A$
c756af81 254 #qbasic IF NOT _FILEEXISTS(A$) THEN ER=-1:E$="File not found":RETURN
01975886 255 #qbasic OPEN A$ FOR INPUT AS #2
e0bcd3fb 256 REM READ_TOKEN adds "(do ... )"
9d59cdb3 257 CALL READ_FORM
0e508fa5 258 CLOSE 2
037815e0 259 EZ=0
0e508fa5 260 RETURN