bbc-basic: Slight tweak to heap size.
[jackhill/mal.git] / nasm / reader.asm
1 %include "macros.mac"
2
3 section .data
4
5 ;; Reader macro strings
6
7 static quote_symbol_string, db "quote"
8 static quasiquote_symbol_string, db "quasiquote"
9 static unquote_symbol_string, db "unquote"
10 static splice_unquote_symbol_string, db "splice-unquote"
11 static deref_symbol_string, db "deref"
12 static with_meta_symbol_string, db "with-meta"
13
14 ;; Error message strings
15
16 static error_string_unexpected_end, db "Error: Unexpected end of input (EOF). Could be a missing ) or ]", 10
17 static error_string_bracket_not_brace, db "Error: Expecting '}' but got ')'"
18
19 ;; Symbols for comparison
20
21 static_symbol nil_symbol, 'nil'
22 static_symbol true_symbol, 'true'
23 static_symbol false_symbol, 'false'
24
25 section .text
26
27 ;; Read a string into memory as a form (nested lists and atoms)
28 ;; Note: In this implementation the tokenizer is not done separately
29 ;;
30 ;; Input: Address of string (char array) in RSI
31 ;;
32 ;; Output: Address of object in RAX
33 ;;
34 ;; Uses registers:
35 ;; R12 Address of the start of the current list (starts 0)
36 ;; R13 Address of the current list tail
37 ;; R14 Stack pointer at start. Used for unwinding on error
38 ;; R15 Address of first list. Used for unwinding on error
39 ;;
40 ;; In addition, the tokenizer uses
41 ;;
42 ;; RAX (object return)
43 ;; RBX
44 ;; RCX (character return in CL)
45 ;; RDX
46 ;; R8 ** State must be preserved
47 ;; R9 **
48 ;; R10 **
49 ;; R12
50 ;; R13
51 ;; R14 Original stack pointer on call
52 ;; R15 Top-level list, so all can be released on error
53 ;;
54 read_str:
55 ; Initialise tokenizer
56 call tokenizer_init
57
58 ; Set current list to zero
59 mov r12, 0
60
61 ; Set first list to zero
62 mov r15, 0
63
64 ; Save stack pointer for unwinding
65 mov r14, rsp
66
67 .read_loop:
68
69 call tokenizer_next
70 cmp cl, 0
71 jne .got_token
72
73 ; Unexpected end of tokens
74 mov rdx, error_string_unexpected_end.len
75 mov rsi, error_string_unexpected_end
76 jmp .error
77
78 .got_token:
79
80 cmp cl, 'i' ; An integer. Cons object in RAX
81 je .finished
82 cmp cl, '"' ; A string. Array object in RAX
83 je .finished
84 cmp cl, 's' ; A symbol
85 je .symbol
86
87 cmp cl, '('
88 je .list_start
89
90 cmp cl, ')'
91 je .return_nil ; Note: if reading a list, cl will be tested in the list reader
92
93 cmp cl, '{'
94 je .map_start
95
96 cmp cl, '}' ; cl tested in map reader
97 je .return_nil
98
99 cmp cl, '['
100 je .vector_start
101
102 cmp cl, ']' ; cl tested in vector reader
103 je .return_nil
104
105 cmp cl, 39 ; quote '
106 je .handle_quote
107 cmp cl, '`'
108 je .handle_quasiquote
109 cmp cl, '~'
110 je .handle_unquote
111 cmp cl, 1
112 je .handle_splice_unquote
113 cmp cl, '@'
114 je .handle_deref
115
116 cmp cl, '^'
117 je .handle_with_meta
118
119 ; Unknown
120 jmp .return_nil
121
122 ; --------------------------------
123
124 .list_start:
125
126 ; Get the first value
127 ; Note that we call rather than jmp because the first
128 ; value needs to be treated differently. There's nothing
129 ; to append to yet...
130 call .read_loop
131
132 ; rax now contains the first object
133 cmp cl, ')' ; Check if it was end of list
134 jne .list_has_contents
135 mov cl, 0 ; so ')' doesn't propagate to nested lists
136 ; Set list to empty
137 mov [rax], BYTE maltype_empty_list
138 ret ; Returns 'nil' given "()"
139 .list_has_contents:
140 ; If this is a Cons then use it
141 ; If not, then need to allocate a Cons
142 mov cl, BYTE [rax]
143 mov ch, cl
144 and ch, (block_mask + container_mask) ; Tests block and container type
145 jz .list_is_value
146
147 ; If here then not a simple value, so need to allocate
148 ; a Cons object
149
150 ; Start new list
151 push rax
152 call alloc_cons ; Address in rax
153 pop rbx
154 mov [rax], BYTE (block_cons + container_list + content_pointer)
155 mov [rax + Cons.car], rbx
156 ; Now have Cons in RAX, containing pointer to object as car
157
158 .list_is_value:
159 ; Cons in RAX
160 ; Make sure it's marked as a list
161 mov cl, BYTE [rax]
162 or cl, container_list
163 mov [rax], BYTE cl
164
165 mov r12, rax ; Start of current list
166 mov r13, rax ; Set current list
167 cmp r15, 0 ; Test if first list
168 jne .list_read_loop
169 mov r15, rax ; Save the first, for unwinding
170
171 .list_read_loop:
172 ; Repeatedly get the next value in the list
173 ; (which may be other lists)
174 ; until we get a ')' token
175
176 push r12
177 push r13
178 call .read_loop ; object in rax
179 pop r13
180 pop r12
181
182 cmp cl, ')' ; Check if it was end of list
183 je .list_done ; Have nil object in rax
184
185 ; Test if this is a Cons value
186 mov cl, BYTE [rax]
187 mov ch, cl
188 and ch, (block_mask + container_mask) ; Tests block and container type
189 jz .list_loop_is_value
190
191 ; If here then not a simple value, so need to allocate
192 ; a Cons object
193
194 ; Start new list
195 push rax
196 call alloc_cons ; Address in rax
197 pop rbx
198 mov [rax], BYTE (block_cons + container_list + content_pointer)
199 mov [rax + Cons.car], rbx
200 ; Now have Cons in RAX, containing pointer to object as car
201
202 .list_loop_is_value:
203 ; Cons in RAX
204
205 ; Make sure it's marked as a list
206 mov cl, BYTE [rax]
207 or cl, container_list
208 mov [rax], BYTE cl
209
210 ; Append to r13
211 mov [r13 + Cons.typecdr], BYTE content_pointer
212 mov [r13 + Cons.cdr], rax
213 mov r13, rax ; Set current list
214
215 jmp .list_read_loop
216
217 .list_done:
218 ; Release nil object in rax
219 mov rsi, rax
220 call release_cons
221
222 ; Terminate the list
223 mov [r13 + Cons.typecdr], BYTE content_nil
224 mov QWORD [r13 + Cons.cdr], QWORD 0
225 mov rax, r12 ; Start of current list
226
227 ret
228
229 ; --------------------------------
230
231 .map_start:
232
233 ; Get the first value
234 ; Note that we call rather than jmp because the first
235 ; value needs to be treated differently. There's nothing
236 ; to append to yet...
237 call .read_loop
238
239 ; rax now contains the first object
240 cmp cl, '}' ; Check if it was end of map
241 jne .map_has_contents
242 mov cl, 0 ; so '}' doesn't propagate to nested maps
243 ; Set map to empty
244 mov [rax], BYTE maltype_empty_map
245 ret ; Returns 'nil' given "()"
246 .map_has_contents:
247 ; If this is a Cons then use it
248 ; If not, then need to allocate a Cons
249 mov cl, BYTE [rax]
250 mov ch, cl
251 and ch, (block_mask + container_mask) ; Tests block and container type
252 jz .map_is_value
253
254 ; If here then not a simple value, so need to allocate
255 ; a Cons object
256
257 ; Start new map
258 push rax
259 call alloc_cons ; Address in rax
260 pop rbx
261 mov [rax], BYTE (block_cons + container_map + content_pointer)
262 mov [rax + Cons.car], rbx
263 ; Now have Cons in RAX, containing pointer to object as car
264
265 .map_is_value:
266 ; Cons in RAX
267 ; Make sure it's marked as a map
268 mov cl, BYTE [rax]
269 or cl, container_map
270 mov [rax], BYTE cl
271
272 mov r12, rax ; Start of current map
273 mov r13, rax ; Set current map
274 cmp r15, 0 ; Test if first map
275 jne .map_read_loop
276 mov r15, rax ; Save the first, for unwinding
277
278 .map_read_loop:
279 ; Repeatedly get the next value in the map
280 ; (which may be other maps)
281 ; until we get a '}' token
282
283 push r12
284 push r13
285 call .read_loop ; object in rax
286 pop r13
287 pop r12
288
289 cmp cl, '}' ; Check if it was end of map
290 je .map_done ; Have nil object in rax
291
292 ; Test if this is a Cons value
293 mov cl, BYTE [rax]
294 mov ch, cl
295 and ch, (block_mask + container_mask) ; Tests block and container type
296 jz .map_loop_is_value
297
298 ; If here then not a simple value, so need to allocate
299 ; a Cons object
300
301 ; Start new map
302 push rax
303 call alloc_cons ; Address in rax
304 pop rbx
305 mov [rax], BYTE (block_cons + container_map + content_pointer)
306 mov [rax + Cons.car], rbx
307 ; Now have Cons in RAX, containing pointer to object as car
308
309 .map_loop_is_value:
310 ; Cons in RAX
311
312 ; Make sure it's marked as a map
313 mov cl, BYTE [rax]
314 or cl, container_map
315 mov [rax], BYTE cl
316
317 ; Append to r13
318 mov [r13 + Cons.typecdr], BYTE content_pointer
319 mov [r13 + Cons.cdr], rax
320 mov r13, rax ; Set current map
321
322 jmp .map_read_loop
323
324 .map_done:
325 ; Release nil object in rax
326 mov rsi, rax
327 call release_cons
328
329 ; Terminate the map
330 mov [r13 + Cons.typecdr], BYTE content_nil
331 mov QWORD [r13 + Cons.cdr], QWORD 0
332 mov rax, r12 ; Start of current map
333
334 ret
335
336 ; --------------------------------
337
338 .vector_start:
339
340 ; Get the first value
341 ; Note that we call rather than jmp because the first
342 ; value needs to be treated differently. There's nothing
343 ; to append to yet...
344 call .read_loop
345
346 ; rax now contains the first object
347 cmp cl, ']' ; Check if it was end of vector
348 jne .vector_has_contents
349 mov cl, 0 ; so ']' doesn't propagate to nested vectors
350 ; Set vector to empty
351 mov [rax], BYTE maltype_empty_vector
352 ret ; Returns 'nil' given "()"
353 .vector_has_contents:
354 ; If this is a Cons then use it
355 ; If not, then need to allocate a Cons
356 mov cl, BYTE [rax]
357 mov ch, cl
358 and ch, (block_mask + container_mask) ; Tests block and container type
359 jz .vector_is_value
360
361 ; If here then not a simple value, so need to allocate
362 ; a Cons object
363
364 ; Start new vector
365 push rax
366 call alloc_cons ; Address in rax
367 pop rbx
368 mov [rax], BYTE (block_cons + container_vector + content_pointer)
369 mov [rax + Cons.car], rbx
370 ; Now have Cons in RAX, containing pointer to object as car
371
372 .vector_is_value:
373 ; Cons in RAX
374 ; Make sure it's marked as a vector
375 mov cl, BYTE [rax]
376 or cl, container_vector
377 mov [rax], BYTE cl
378
379 mov r12, rax ; Start of current vector
380 mov r13, rax ; Set current vector
381 cmp r15, 0 ; Test if first vector
382 jne .vector_read_loop
383 mov r15, rax ; Save the first, for unwinding
384
385 .vector_read_loop:
386 ; Repeatedly get the next value in the vector
387 ; (which may be other vectors)
388 ; until we get a ']' token
389
390 push r12
391 push r13
392 call .read_loop ; object in rax
393 pop r13
394 pop r12
395
396 cmp cl, ']' ; Check if it was end of vector
397 je .vector_done ; Have nil object in rax
398
399 ; Test if this is a Cons value
400 mov cl, BYTE [rax]
401 mov ch, cl
402 and ch, (block_mask + container_mask) ; Tests block and container type
403 jz .vector_loop_is_value
404
405 ; If here then not a simple value, so need to allocate
406 ; a Cons object
407
408 ; Start new vector
409 push rax
410 call alloc_cons ; Address in rax
411 pop rbx
412 mov [rax], BYTE (block_cons + container_vector + content_pointer)
413 mov [rax + Cons.car], rbx
414 ; Now have Cons in RAX, containing pointer to object as car
415
416 .vector_loop_is_value:
417 ; Cons in RAX
418
419 ; Make sure it's marked as a vector
420 mov cl, BYTE [rax]
421 or cl, container_vector
422 mov [rax], BYTE cl
423
424 ; Append to r13
425 mov [r13 + Cons.typecdr], BYTE content_pointer
426 mov [r13 + Cons.cdr], rax
427 mov r13, rax ; Set current vector
428
429 jmp .vector_read_loop
430
431 .vector_done:
432 ; Release nil object in rax
433 mov rsi, rax
434 call release_cons
435
436 ; Terminate the vector
437 mov [r13 + Cons.typecdr], BYTE content_nil
438 mov QWORD [r13 + Cons.cdr], QWORD 0
439 mov rax, r12 ; Start of current vector
440
441 ret
442
443 ; --------------------------------
444 .handle_quote:
445 ; Turn 'a into (quote a)
446 call alloc_cons ; Address in rax
447 mov r12, rax
448
449 ; Get a symbol "quote"
450 push r8
451 push r9
452 mov rsi, quote_symbol_string
453 mov edx, quote_symbol_string.len
454 call raw_to_string ; Address in rax
455 pop r9
456 pop r8
457
458 .wrap_next_object:
459 mov [rax], BYTE maltype_symbol
460 mov [r12], BYTE (block_cons + container_list + content_pointer)
461 mov [r12 + Cons.car], rax
462
463 ; Get the next object
464 push r12
465 call .read_loop ; object in rax
466 pop r12
467
468 mov r13, rax ; Put object to be quoted in r13
469
470 call alloc_cons ; Address in rax
471 mov [rax], BYTE (block_cons + container_list + content_pointer)
472 mov [rax + Cons.car], r13
473 mov [rax + Cons.typecdr], BYTE content_nil
474
475 ; Cons object in rax. Append to object in r12
476 mov [r12 + Cons.typecdr], BYTE content_pointer
477 mov [r12 + Cons.cdr], rax
478
479 mov rax, r12
480 ret
481
482 ; --------------------------------
483 .handle_quasiquote:
484 ; Turn `a into (quasiquote a)
485 call alloc_cons ; Address in rax
486 mov r12, rax
487
488 ; Get a symbol "quasiquote"
489 push r8
490 push r9
491 mov rsi, quasiquote_symbol_string
492 mov edx, quasiquote_symbol_string.len
493 call raw_to_string ; Address in rax
494 pop r9
495 pop r8
496 jmp .wrap_next_object ; From there the same as handle_quote
497
498 ; --------------------------------
499 .handle_unquote:
500 ; Turn ~a into (unquote a)
501 call alloc_cons ; Address in rax
502 mov r12, rax
503
504 ; Get a symbol "unquote"
505 push r8
506 push r9
507 mov rsi, unquote_symbol_string
508 mov edx, unquote_symbol_string.len
509 call raw_to_string ; Address in rax
510 pop r9
511 pop r8
512 jmp .wrap_next_object ; From there the same as handle_quote
513
514 ; --------------------------------
515 .handle_splice_unquote:
516 ; Turn ~@a into (unquote a)
517 call alloc_cons ; Address in rax
518 mov r12, rax
519
520 ; Get a symbol "unquote"
521 push r8
522 push r9
523 mov rsi, splice_unquote_symbol_string
524 mov edx, splice_unquote_symbol_string.len
525 call raw_to_string ; Address in rax
526 pop r9
527 pop r8
528 jmp .wrap_next_object ; From there the same as handle_quote
529
530 ; --------------------------------
531
532 .handle_deref:
533 ; Turn @a into (deref a)
534
535 call alloc_cons ; Address in rax
536 mov r12, rax
537
538 ; Get a symbol "deref"
539 push r8
540 push r9
541 mov rsi, deref_symbol_string
542 mov edx, deref_symbol_string.len
543 call raw_to_string ; Address in rax
544 pop r9
545 pop r8
546 jmp .wrap_next_object ; From there the same as handle_quote
547
548 ; --------------------------------
549
550 .handle_with_meta:
551 ; Turn ^ a b into (with-meta b a)
552
553 call alloc_cons ; Address in rax
554 mov r12, rax
555
556 ; Get a symbol "with-meta"
557 push r8
558 push r9
559 mov rsi, with_meta_symbol_string
560 mov edx, with_meta_symbol_string.len
561 call raw_to_string ; Address in rax
562 pop r9
563 pop r8
564
565 mov [rax], BYTE maltype_symbol
566 mov [r12], BYTE (block_cons + container_list + content_pointer)
567 mov [r12 + Cons.car], rax
568
569 ; Get the next two objects
570 push r12
571 call .read_loop ; object in rax
572 pop r12
573 push rax
574 push r12
575 call .read_loop ; in RAX
576 pop r12
577
578 mov r13, rax
579
580 call alloc_cons ; Address in rax
581 mov [rax], BYTE (block_cons + container_list + content_pointer)
582 mov [rax + Cons.car], r13
583
584 ; Cons object in rax. Append to object in r12
585 mov [r12 + Cons.typecdr], BYTE content_pointer
586 mov [r12 + Cons.cdr], rax
587
588 mov r13, rax
589
590 call alloc_cons ; Address in rax
591 mov [rax], BYTE (block_cons + container_list + content_pointer)
592
593 pop rdi ; First object
594 mov [rax + Cons.car], rdi
595
596 ; Append to object in R13
597 mov [r13 + Cons.typecdr], BYTE content_pointer
598 mov [r13 + Cons.cdr], rax
599
600 mov rax, r12
601 ret
602
603 ; --------------------------------
604 .symbol:
605 ; symbol is in RAX
606 ; Some symbols are have their own type
607 ; - nil, true, false
608 ;
609
610 mov rsi, rax
611 mov rdi, nil_symbol
612 push rsi
613 call compare_char_array
614 pop rsi
615 cmp rax, 0
616 je .symbol_nil
617
618 mov rdi, true_symbol
619 push rsi
620 call compare_char_array
621 pop rsi
622 cmp rax, 0
623 je .symbol_true
624
625 mov rdi, false_symbol
626 push rsi
627 call compare_char_array
628 pop rsi
629 cmp rax, 0
630 je .symbol_false
631
632 ; not a special symbol, so return
633 mov rax, rsi
634 ret
635
636 .symbol_nil:
637 ; symbol in rsi not needed
638 call release_array
639
640 call alloc_cons
641 mov [rax], BYTE maltype_nil ; a nil type
642 ret
643
644 .symbol_true:
645 call release_array
646
647 call alloc_cons
648 mov [rax], BYTE maltype_true
649 ret
650
651 .symbol_false:
652 call release_array
653
654 call alloc_cons
655 mov [rax], BYTE maltype_false
656 ret
657
658 ; --------------------------------
659 .finished:
660 ret
661
662 .error:
663 ; Jump here on error with raw string in RSI
664 ; and string length in rdx
665 push r14
666 push r15
667 call print_rawstring
668 pop r15
669 pop r14
670
671 ; fall through to unwind
672 .unwind:
673 ; Jump to here cleans up
674
675 mov rsp, r14 ; Rewind stack pointer
676 cmp r15, 0 ; Check if there is a list
677 je .return_nil
678 mov rsi, r15
679 call release_cons ; releases everything recursively
680 ; fall through to return_nil
681 .return_nil:
682 ; Allocates a new Cons object with nil and returns
683 ; Cleanup should happen before jumping here
684 push rcx
685 call alloc_cons
686 pop rcx
687 mov [rax], BYTE maltype_nil
688 mov [rax + Cons.typecdr], BYTE content_nil
689 ret
690
691
692
693 ;; Initialise the tokenizer
694 ;;
695 ;; Input: Address of string in RSI
696 ;;
697 ;; NOTE: This uses RSI, RAX and RBX, and expects these to be preserved
698 ;; between calls to tokenizer_next_char
699 ;;
700 ;; R9 Address of string
701 ;; R10 Position in data array
702 ;; R11 End of data array
703 ;;
704 tokenizer_init:
705 ; Save string to r9
706 mov r9, rsi
707 ; Put start of data array into r10
708 mov r10, rsi
709 add r10, Array.data
710 ; Put end of data array into r11
711 mov r11d, [rsi + Array.length] ; Length of array, zero-extended
712 add r11, r10
713
714 ret
715
716 ;; Move onto the next chunk of the array
717 ;; This is needed because strings are not stored in one
718 ;; contiguous block of memory, but may use multiple Array
719 ;; objects in a linked list
720 ;;
721 ;; If no chunks are left, then R10 = R11
722 tokenizer_next_chunk:
723 mov r10, [r9 + Array.next]
724 cmp r10, 0
725 je .no_more
726 ; More chunks left
727 push rsi ; Because symbol reading uses RSI (tokenizer_next.handle_symbol)
728 mov rsi, r10
729 call tokenizer_init
730 pop rsi
731 ret
732 .no_more:
733 ; No more chunks left. R10 is zero
734 mov r11, r10
735 ret
736
737 ;; Moves the next char into CL
738 ;; If no more, puts 0 into CL
739 tokenizer_next_char:
740 ; Check if we have reached the end of this chunk
741 cmp r10, r11
742 jne .chars_remain
743
744 ; Hit the end. See if there is another chunk
745 call tokenizer_next_chunk
746 cmp r10, r11
747 jne .chars_remain ; Success, got another
748
749 ; No more chunks
750 mov cl, 0 ; Null char signals end
751 ret
752
753 .chars_remain:
754 mov cl, BYTE [r10]
755 inc r10 ; point to next byte
756 ret
757
758 ;; Get the next token
759 ;; Token code is in CL register. Could be:
760 ;; - 0 : Nil, finished
761 ;; - Characters ()[]()'`~^@
762 ;; - Pair '~@', represented by code 1
763 ;; - A string: " in CL, and address in RAX
764 ;; - An integer: 'i' in CL
765 ;; - A symbol: 's' in CL, address in RAX
766 ;;
767 ;; Address of object in RAX
768 ;;
769 ;; May use registers:
770 ;; RBX
771 ;; RCX
772 ;; RDX
773 ;;
774 tokenizer_next:
775
776 .next_char:
777 ; Fetch the next char into CL
778 call tokenizer_next_char
779
780 cmp cl, 0
781 je .found ; End, no more tokens
782
783 ; Here expect to have:
784 ; - The current character in CL
785 ; - Address of next data in r10
786 ; - Address of data end in r11
787
788 ; Skip whitespace or commas
789 cmp cl, ' ' ; Space
790 je .next_char
791 cmp cl, ',' ; Comma
792 je .next_char
793 cmp cl, 9 ; Tab
794 je .next_char
795 cmp cl, 10 ; Line Feed
796 je .next_char
797 cmp cl, 13 ; Carriage Return
798 je .next_char
799
800 ; Special characters. These are returned in CL as-is
801 cmp cl, '('
802 je .found
803 cmp cl, ')'
804 je .found
805 cmp cl, '['
806 je .found
807 cmp cl, ']'
808 je .found
809 cmp cl, '{'
810 je .found
811 cmp cl, '}'
812 je .found
813 cmp cl, 39 ; character '
814 je .found
815 cmp cl, 96 ; character `
816 je .found
817 cmp cl, '^'
818 je .found
819 cmp cl, '@'
820 je .found
821 cmp cl, '~' ; Could be followed by '@'
822 je .handle_tilde
823
824 cmp cl, ';' ; Start of a comment
825 je .comment
826
827 cmp cl, 34 ; Opening string quotes
828 je .handle_string
829
830 ; Could be number or symbol
831
832 cmp cl, '-' ; Minus sign
833 je .handle_minus
834 mov ch, 0
835
836 ; Check for a character 0-9
837 cmp cl, '0'
838 jl .handle_symbol
839 cmp cl, '9'
840 jg .handle_symbol
841
842 ; Here an integer
843 jmp .handle_integer
844
845 .comment:
846 ; Start of a comment. Keep reading until a new line or end
847
848 ; Fetch the next char into CL
849 call tokenizer_next_char
850
851 cmp cl, 0
852 je .found ; End, no more tokens
853
854 cmp cl, 10
855 je .next_char ; Next line, start reading again
856
857 jmp .comment
858
859 .handle_minus:
860
861 ; Push current state of the tokenizer
862 push r9
863 push r10
864 push r11
865
866 ; Get the next character
867 call tokenizer_next_char
868
869 ; Check if it is a number
870 cmp cl, '0'
871 jl .minus_not_number
872 cmp cl, '9'
873 jg .minus_not_number
874
875 ; Here is a number
876 mov ch, '-' ; Put '-' in ch for later
877
878 ; Discard old state by moving stack pointer
879 add rsp, 24 ; 3 * 8 bytes
880
881 jmp .handle_integer
882
883 .minus_not_number:
884
885 ; Restore state
886 pop r11
887 pop r10
888 pop r9
889
890 mov cl, '-' ; Put back
891
892 jmp .handle_symbol
893
894 .handle_integer:
895 ; Start integer
896 ; accumulate in EDX
897 xor edx, edx
898
899 .integer_loop:
900 ; Here have a char 0-9 in CL
901 sub cl, '0' ; Convert to number between 0 and 9
902 movzx ebx, cl
903 add edx, ebx
904
905 ; Push current state of the tokenizer
906 push r9
907 push r10
908 push r11
909
910 ; Peek at next character
911 call tokenizer_next_char ; Next char in CL
912
913 cmp cl, '0'
914 jl .integer_finished
915 cmp cl, '9'
916 jg .integer_finished
917
918 ; Discard old state by moving stack pointer
919 add rsp, 24 ; 3 * 8 bytes
920
921 imul edx, 10
922
923 jmp .integer_loop
924
925 .integer_finished:
926 ; Next char not an int
927
928 ; Restore state of the tokenizer
929 pop r11
930 pop r10
931 pop r9
932
933 push rdx ; Save the integer
934 ; Get a Cons object to put the result into
935 call alloc_cons
936
937 pop rdx ; Restore integer
938
939 ; Check if the number should be negative
940 cmp ch, '-'
941 jne .integer_store
942 neg rdx
943
944 .integer_store:
945 ; Address of Cons now in RAX
946 mov [rax], BYTE maltype_integer
947
948 mov [rax + Cons.car], rdx
949
950 mov cl, 'i' ; Mark as an integer
951 ret
952
953 ; -------------------------------------------
954 .handle_symbol:
955 ; Read characters until reaching whitespace, special character or end
956
957 call string_new
958 mov rsi, rax ; Output string in rsi
959
960 .symbol_loop:
961 ; Put the current character into the array
962 call string_append_char
963
964 ; Push current state of the tokenizer
965 push r9
966 push r10
967 push r11
968
969 call tokenizer_next_char
970 cmp cl, 0 ; End of characters
971 je .symbol_finished
972
973 cmp cl, ' ' ; Space
974 je .symbol_finished
975 cmp cl, ',' ; Comma
976 je .symbol_finished
977 cmp cl, 9 ; Tab
978 je .symbol_finished
979 cmp cl, 10 ; Line Feed
980 je .symbol_finished
981 cmp cl, 13 ; Carriage Return
982 je .symbol_finished
983
984 cmp cl, '('
985 je .symbol_finished
986 cmp cl, ')'
987 je .symbol_finished
988 cmp cl, '['
989 je .symbol_finished
990 cmp cl, ']'
991 je .symbol_finished
992 cmp cl, '{'
993 je .symbol_finished
994 cmp cl, '}'
995 je .symbol_finished
996 cmp cl, 39 ; character '
997 je .symbol_finished
998 cmp cl, 96 ; character `
999 je .symbol_finished
1000 cmp cl, '^'
1001 je .symbol_finished
1002 cmp cl, '@'
1003 je .symbol_finished
1004 cmp cl, '~'
1005 je .symbol_finished
1006 cmp cl, ';' ; Start of a comment
1007 je .symbol_finished
1008 cmp cl, 34 ; Opening string quotes
1009 je .symbol_finished
1010
1011 ; Keeping current character
1012 ; Discard old state by moving stack pointer
1013 add rsp, 24 ; 3 * 8 bytes
1014
1015 jmp .symbol_loop ; Append to array
1016
1017 .symbol_finished:
1018 ; Not keeping current character
1019 ; Restore state of the tokenizer
1020 pop r11
1021 pop r10
1022 pop r9
1023
1024 mov rax, rsi
1025 mov [rax], BYTE maltype_symbol ; Mark as a symbol
1026 mov cl, 's' ; used by read_str
1027 ret
1028
1029 ; --------------------------------------------
1030 .handle_string:
1031 ; Get an array to put the string into
1032
1033 call string_new ; Array in RAX
1034
1035 ; Put start of data array into rbx
1036 mov rbx, rax
1037 add rbx, Array.data
1038 ; Put end of data array into rdx
1039 mov edx, DWORD [rax + Array.length] ; Length of array, zero-extended
1040 add rdx, rbx
1041
1042 ; Now read chars from input string and push into output
1043 .string_loop:
1044
1045 call tokenizer_next_char
1046 cmp cl, 0 ; End of characters
1047 je .error
1048
1049 cmp cl, 34 ; Finishing '"'
1050 je .string_done ; Leave '"' in CL
1051
1052 cmp cl, 92 ; Escape '\'
1053 jne .end_string_escape
1054
1055 ; Current character is a '\'
1056 call tokenizer_next_char
1057 cmp cl, 0 ; End of characters
1058 je .error
1059
1060 cmp cl, 'n' ; \n, newline
1061 je .insert_newline
1062
1063 ; Whatever is in cl is now put into string
1064 ; including '"'
1065 jmp .end_string_escape
1066
1067 .insert_newline:
1068 mov cl, 10
1069 jmp .end_string_escape
1070
1071 .end_string_escape:
1072
1073 ; Put CL onto result array
1074 ; NOTE: this doesn't handle long strings (multiple memory blocks)
1075 mov [rbx], cl
1076 inc rbx
1077
1078 jmp .string_loop
1079
1080 .string_done:
1081 ; Calculate the length from rbx
1082 sub rbx, Array.data
1083 sub rbx, rax
1084 mov [rax+Array.length], DWORD ebx
1085 ret
1086
1087 ; ---------------------------------
1088
1089 .handle_tilde:
1090 ; Could have '~' or '~@'. Need to peek at the next char
1091
1092 ; Push current state of the tokenizer
1093 push r9
1094 push r10
1095 push r11
1096 call tokenizer_next_char ; Next char in CL
1097 cmp cl, '@'
1098 jne .tilde_no_amp ; Just '~', not '~@'
1099 ; Got '~@'
1100 mov cl, 1 ; Signals '~@'
1101
1102 ; Discard old state by moving stack pointer
1103 add rsp, 24 ; 3 * 8 bytes
1104 ret
1105
1106 .tilde_no_amp:
1107 mov cl, '~'
1108 ; Restore state of the tokenizer
1109 pop r11
1110 pop r10
1111 pop r9
1112 ; fall through to .found
1113 .found:
1114 ret
1115
1116 .error:
1117 ret
1118