5 ;; Reader macro strings
7 static quote_symbol_string
, db "quote"
8 static quasiquote_symbol_string
, db "quasiquote"
9 static unquote_symbol_string
, db "unquote"
10 static splice_unquote_symbol_string
, db "splice-unquote"
11 static deref_symbol_string
, db "deref"
12 static with_meta_symbol_string
, db "with-meta"
14 ;; Error message strings
16 static error_string_unexpected_end
, db "Error: Unexpected end of input (EOF). Could be a missing ) or ]", 10
17 static error_string_bracket_not_brace
, db "Error: Expecting '}' but got ')'"
19 ;; Symbols for comparison
21 static_symbol nil_symbol
, 'nil'
22 static_symbol true_symbol
, 'true'
23 static_symbol false_symbol
, 'false'
27 ;; Read a string into memory as a form (nested lists and atoms)
28 ;; Note: In this implementation the tokenizer is not done separately
30 ;; Input: Address of string (char array) in RSI
32 ;; Output: Address of object in RAX
35 ;; R12 Address of the start of the current list (starts 0)
36 ;; R13 Address of the current list tail
37 ;; R14 Stack pointer at start. Used for unwinding on error
38 ;; R15 Address of first list. Used for unwinding on error
40 ;; In addition, the tokenizer uses
42 ;; RAX (object return)
44 ;; RCX (character return in CL)
46 ;; R8 ** State must be preserved
51 ;; R14 Original stack pointer on call
52 ;; R15 Top-level list, so all can be released on error
55 ; Initialise tokenizer
58 ; Set current list to zero
61 ; Set first list to zero
64 ; Save stack pointer for unwinding
73 ; Unexpected end of tokens
74 mov rdx
, error_string_unexpected_end.len
75 mov rsi
, error_string_unexpected_end
80 cmp cl, 'i' ; An integer. Cons object in RAX
82 cmp cl, '"' ; A string. Array object in RAX
84 cmp cl, 's' ; A symbol
91 je .return_nil
; Note: if reading a list, cl will be tested in the list reader
96 cmp cl, '}' ; cl tested in map reader
102 cmp cl, ']' ; cl tested in vector reader
108 je .handle_quasiquote
112 je .handle_splice_unquote
122 ; --------------------------------
126 ; Get the first value
127 ; Note that we call rather than jmp because the first
128 ; value needs to be treated differently. There's nothing
129 ; to append to yet...
132 ; rax now contains the first object
133 cmp cl, ')' ; Check if it was end of list
134 jne .list_has_contents
135 mov cl, 0 ; so ')' doesn't propagate to nested lists
137 mov [rax
], BYTE maltype_empty_list
138 ret ; Returns 'nil' given "()"
140 ; If this is a Cons then use it
141 ; If not, then need to allocate a Cons
144 and ch, (block_mask
+ container_mask
) ; Tests block and container type
147 ; If here then not a simple value, so need to allocate
152 call alloc_cons
; Address in rax
154 mov [rax
], BYTE (block_cons
+ container_list
+ content_pointer
)
155 mov [rax
+ Cons.car
], rbx
156 ; Now have Cons in RAX, containing pointer to object as car
160 ; Make sure it's marked as a list
162 or cl, container_list
165 mov r12
, rax
; Start of current list
166 mov r13
, rax
; Set current list
167 cmp r15
, 0 ; Test if first list
169 mov r15
, rax
; Save the first, for unwinding
172 ; Repeatedly get the next value in the list
173 ; (which may be other lists)
174 ; until we get a ')' token
178 call .read_loop
; object in rax
182 cmp cl, ')' ; Check if it was end of list
183 je .list_done
; Have nil object in rax
185 ; Test if this is a Cons value
188 and ch, (block_mask
+ container_mask
) ; Tests block and container type
189 jz .list_loop_is_value
191 ; If here then not a simple value, so need to allocate
196 call alloc_cons
; Address in rax
198 mov [rax
], BYTE (block_cons
+ container_list
+ content_pointer
)
199 mov [rax
+ Cons.car
], rbx
200 ; Now have Cons in RAX, containing pointer to object as car
205 ; Make sure it's marked as a list
207 or cl, container_list
211 mov [r13
+ Cons.typecdr
], BYTE content_pointer
212 mov [r13
+ Cons.cdr
], rax
213 mov r13
, rax
; Set current list
218 ; Release nil object in rax
223 mov [r13
+ Cons.typecdr
], BYTE content_nil
224 mov QWORD [r13
+ Cons.cdr
], QWORD 0
225 mov rax
, r12
; Start of current list
229 ; --------------------------------
233 ; Get the first value
234 ; Note that we call rather than jmp because the first
235 ; value needs to be treated differently. There's nothing
236 ; to append to yet...
239 ; rax now contains the first object
240 cmp cl, '}' ; Check if it was end of map
241 jne .map_has_contents
242 mov cl, 0 ; so '}' doesn't propagate to nested maps
244 mov [rax
], BYTE maltype_empty_map
245 ret ; Returns 'nil' given "()"
247 ; If this is a Cons then use it
248 ; If not, then need to allocate a Cons
251 and ch, (block_mask
+ container_mask
) ; Tests block and container type
254 ; If here then not a simple value, so need to allocate
259 call alloc_cons
; Address in rax
261 mov [rax
], BYTE (block_cons
+ container_map
+ content_pointer
)
262 mov [rax
+ Cons.car
], rbx
263 ; Now have Cons in RAX, containing pointer to object as car
267 ; Make sure it's marked as a map
272 mov r12
, rax
; Start of current map
273 mov r13
, rax
; Set current map
274 cmp r15
, 0 ; Test if first map
276 mov r15
, rax
; Save the first, for unwinding
279 ; Repeatedly get the next value in the map
280 ; (which may be other maps)
281 ; until we get a '}' token
285 call .read_loop
; object in rax
289 cmp cl, '}' ; Check if it was end of map
290 je .map_done
; Have nil object in rax
292 ; Test if this is a Cons value
295 and ch, (block_mask
+ container_mask
) ; Tests block and container type
296 jz .map_loop_is_value
298 ; If here then not a simple value, so need to allocate
303 call alloc_cons
; Address in rax
305 mov [rax
], BYTE (block_cons
+ container_map
+ content_pointer
)
306 mov [rax
+ Cons.car
], rbx
307 ; Now have Cons in RAX, containing pointer to object as car
312 ; Make sure it's marked as a map
318 mov [r13
+ Cons.typecdr
], BYTE content_pointer
319 mov [r13
+ Cons.cdr
], rax
320 mov r13
, rax
; Set current map
325 ; Release nil object in rax
330 mov [r13
+ Cons.typecdr
], BYTE content_nil
331 mov QWORD [r13
+ Cons.cdr
], QWORD 0
332 mov rax
, r12
; Start of current map
336 ; --------------------------------
340 ; Get the first value
341 ; Note that we call rather than jmp because the first
342 ; value needs to be treated differently. There's nothing
343 ; to append to yet...
346 ; rax now contains the first object
347 cmp cl, ']' ; Check if it was end of vector
348 jne .vector_has_contents
349 mov cl, 0 ; so ']' doesn't propagate to nested vectors
350 ; Set vector to empty
351 mov [rax
], BYTE maltype_empty_vector
352 ret ; Returns 'nil' given "()"
353 .
vector_has_contents:
354 ; If this is a Cons then use it
355 ; If not, then need to allocate a Cons
358 and ch, (block_mask
+ container_mask
) ; Tests block and container type
361 ; If here then not a simple value, so need to allocate
366 call alloc_cons
; Address in rax
368 mov [rax
], BYTE (block_cons
+ container_vector
+ content_pointer
)
369 mov [rax
+ Cons.car
], rbx
370 ; Now have Cons in RAX, containing pointer to object as car
374 ; Make sure it's marked as a vector
376 or cl, container_vector
379 mov r12
, rax
; Start of current vector
380 mov r13
, rax
; Set current vector
381 cmp r15
, 0 ; Test if first vector
382 jne .vector_read_loop
383 mov r15
, rax
; Save the first, for unwinding
386 ; Repeatedly get the next value in the vector
387 ; (which may be other vectors)
388 ; until we get a ']' token
392 call .read_loop
; object in rax
396 cmp cl, ']' ; Check if it was end of vector
397 je .vector_done
; Have nil object in rax
399 ; Test if this is a Cons value
402 and ch, (block_mask
+ container_mask
) ; Tests block and container type
403 jz .vector_loop_is_value
405 ; If here then not a simple value, so need to allocate
410 call alloc_cons
; Address in rax
412 mov [rax
], BYTE (block_cons
+ container_vector
+ content_pointer
)
413 mov [rax
+ Cons.car
], rbx
414 ; Now have Cons in RAX, containing pointer to object as car
416 .
vector_loop_is_value:
419 ; Make sure it's marked as a vector
421 or cl, container_vector
425 mov [r13
+ Cons.typecdr
], BYTE content_pointer
426 mov [r13
+ Cons.cdr
], rax
427 mov r13
, rax
; Set current vector
429 jmp .vector_read_loop
432 ; Release nil object in rax
436 ; Terminate the vector
437 mov [r13
+ Cons.typecdr
], BYTE content_nil
438 mov QWORD [r13
+ Cons.cdr
], QWORD 0
439 mov rax
, r12
; Start of current vector
443 ; --------------------------------
445 ; Turn 'a into (quote a)
446 call alloc_cons
; Address in rax
449 ; Get a symbol "quote"
452 mov rsi
, quote_symbol_string
453 mov edx, quote_symbol_string.len
454 call raw_to_string
; Address in rax
459 mov [rax
], BYTE maltype_symbol
460 mov [r12
], BYTE (block_cons
+ container_list
+ content_pointer
)
461 mov [r12
+ Cons.car
], rax
463 ; Get the next object
465 call .read_loop
; object in rax
468 mov r13
, rax
; Put object to be quoted in r13
470 call alloc_cons
; Address in rax
471 mov [rax
], BYTE (block_cons
+ container_list
+ content_pointer
)
472 mov [rax
+ Cons.car
], r13
473 mov [rax
+ Cons.typecdr
], BYTE content_nil
475 ; Cons object in rax. Append to object in r12
476 mov [r12
+ Cons.typecdr
], BYTE content_pointer
477 mov [r12
+ Cons.cdr
], rax
482 ; --------------------------------
484 ; Turn `a into (quasiquote a)
485 call alloc_cons
; Address in rax
488 ; Get a symbol "quasiquote"
491 mov rsi
, quasiquote_symbol_string
492 mov edx, quasiquote_symbol_string.len
493 call raw_to_string
; Address in rax
496 jmp .wrap_next_object
; From there the same as handle_quote
498 ; --------------------------------
500 ; Turn ~a into (unquote a)
501 call alloc_cons
; Address in rax
504 ; Get a symbol "unquote"
507 mov rsi
, unquote_symbol_string
508 mov edx, unquote_symbol_string.len
509 call raw_to_string
; Address in rax
512 jmp .wrap_next_object
; From there the same as handle_quote
514 ; --------------------------------
515 .
handle_splice_unquote:
516 ; Turn ~@a into (unquote a)
517 call alloc_cons
; Address in rax
520 ; Get a symbol "unquote"
523 mov rsi
, splice_unquote_symbol_string
524 mov edx, splice_unquote_symbol_string.len
525 call raw_to_string
; Address in rax
528 jmp .wrap_next_object
; From there the same as handle_quote
530 ; --------------------------------
533 ; Turn @a into (deref a)
535 call alloc_cons
; Address in rax
538 ; Get a symbol "deref"
541 mov rsi
, deref_symbol_string
542 mov edx, deref_symbol_string.len
543 call raw_to_string
; Address in rax
546 jmp .wrap_next_object
; From there the same as handle_quote
548 ; --------------------------------
551 ; Turn ^ a b into (with-meta b a)
553 call alloc_cons
; Address in rax
556 ; Get a symbol "with-meta"
559 mov rsi
, with_meta_symbol_string
560 mov edx, with_meta_symbol_string.len
561 call raw_to_string
; Address in rax
565 mov [rax
], BYTE maltype_symbol
566 mov [r12
], BYTE (block_cons
+ container_list
+ content_pointer
)
567 mov [r12
+ Cons.car
], rax
569 ; Get the next two objects
571 call .read_loop
; object in rax
575 call .read_loop
; in RAX
580 call alloc_cons
; Address in rax
581 mov [rax
], BYTE (block_cons
+ container_list
+ content_pointer
)
582 mov [rax
+ Cons.car
], r13
584 ; Cons object in rax. Append to object in r12
585 mov [r12
+ Cons.typecdr
], BYTE content_pointer
586 mov [r12
+ Cons.cdr
], rax
590 call alloc_cons
; Address in rax
591 mov [rax
], BYTE (block_cons
+ container_list
+ content_pointer
)
593 pop rdi
; First object
594 mov [rax
+ Cons.car
], rdi
596 ; Append to object in R13
597 mov [r13
+ Cons.typecdr
], BYTE content_pointer
598 mov [r13
+ Cons.cdr
], rax
603 ; --------------------------------
606 ; Some symbols are have their own type
613 call compare_char_array
620 call compare_char_array
625 mov rdi
, false_symbol
627 call compare_char_array
632 ; not a special symbol, so return
637 ; symbol in rsi not needed
641 mov [rax
], BYTE maltype_nil
; a nil type
648 mov [rax
], BYTE maltype_true
655 mov [rax
], BYTE maltype_false
658 ; --------------------------------
663 ; Jump here on error with raw string in RSI
664 ; and string length in rdx
671 ; fall through to unwind
673 ; Jump to here cleans up
675 mov rsp
, r14
; Rewind stack pointer
676 cmp r15
, 0 ; Check if there is a list
679 call release_cons
; releases everything recursively
680 ; fall through to return_nil
682 ; Allocates a new Cons object with nil and returns
683 ; Cleanup should happen before jumping here
687 mov [rax
], BYTE maltype_nil
688 mov [rax
+ Cons.typecdr
], BYTE content_nil
693 ;; Initialise the tokenizer
695 ;; Input: Address of string in RSI
697 ;; NOTE: This uses RSI, RAX and RBX, and expects these to be preserved
698 ;; between calls to tokenizer_next_char
700 ;; R9 Address of string
701 ;; R10 Position in data array
702 ;; R11 End of data array
707 ; Put start of data array into r10
710 ; Put end of data array into r11
711 mov r11d
, [rsi
+ Array.
length] ; Length of array, zero-extended
716 ;; Move onto the next chunk of the array
717 ;; This is needed because strings are not stored in one
718 ;; contiguous block of memory, but may use multiple Array
719 ;; objects in a linked list
721 ;; If no chunks are left, then R10 = R11
722 tokenizer_next_chunk:
723 mov r10
, [r9
+ Array.next
]
727 push rsi
; Because symbol reading uses RSI (tokenizer_next.handle_symbol)
733 ; No more chunks left. R10 is zero
737 ;; Moves the next char into CL
738 ;; If no more, puts 0 into CL
740 ; Check if we have reached the end of this chunk
744 ; Hit the end. See if there is another chunk
745 call tokenizer_next_chunk
747 jne .chars_remain
; Success, got another
750 mov cl, 0 ; Null char signals end
755 inc r10
; point to next byte
758 ;; Get the next token
759 ;; Token code is in CL register. Could be:
760 ;; - 0 : Nil, finished
761 ;; - Characters ()[]()'`~^@
762 ;; - Pair '~@', represented by code 1
763 ;; - A string: " in CL, and address in RAX
764 ;; - An integer: 'i' in CL
765 ;; - A symbol: 's' in CL, address in RAX
767 ;; Address of object in RAX
769 ;; May use registers:
777 ; Fetch the next char into CL
778 call tokenizer_next_char
781 je .found
; End, no more tokens
783 ; Here expect to have:
784 ; - The current character in CL
785 ; - Address of next data in r10
786 ; - Address of data end in r11
788 ; Skip whitespace or commas
795 cmp cl, 10 ; Line Feed
797 cmp cl, 13 ; Carriage Return
800 ; Special characters. These are returned in CL as-is
813 cmp cl, 39 ; character '
815 cmp cl, 96 ; character `
821 cmp cl, '~' ; Could be followed by '@'
824 cmp cl, ';' ; Start of a comment
827 cmp cl, 34 ; Opening string quotes
830 ; Could be number or symbol
832 cmp cl, '-' ; Minus sign
836 ; Check for a character 0-9
846 ; Start of a comment. Keep reading until a new line or end
848 ; Fetch the next char into CL
849 call tokenizer_next_char
852 je .found
; End, no more tokens
855 je .next_char
; Next line, start reading again
861 ; Push current state of the tokenizer
866 ; Get the next character
867 call tokenizer_next_char
869 ; Check if it is a number
876 mov ch, '-' ; Put '-' in ch for later
878 ; Discard old state by moving stack pointer
879 add rsp
, 24 ; 3 * 8 bytes
890 mov cl, '-' ; Put back
900 ; Here have a char 0-9 in CL
901 sub cl, '0' ; Convert to number between 0 and 9
905 ; Push current state of the tokenizer
910 ; Peek at next character
911 call tokenizer_next_char
; Next char in CL
918 ; Discard old state by moving stack pointer
919 add rsp
, 24 ; 3 * 8 bytes
926 ; Next char not an int
928 ; Restore state of the tokenizer
933 push rdx
; Save the integer
934 ; Get a Cons object to put the result into
937 pop rdx
; Restore integer
939 ; Check if the number should be negative
945 ; Address of Cons now in RAX
946 mov [rax
], BYTE maltype_integer
948 mov [rax
+ Cons.car
], rdx
950 mov cl, 'i' ; Mark as an integer
953 ; -------------------------------------------
955 ; Read characters until reaching whitespace, special character or end
958 mov rsi
, rax
; Output string in rsi
961 ; Put the current character into the array
962 call string_append_char
964 ; Push current state of the tokenizer
969 call tokenizer_next_char
970 cmp cl, 0 ; End of characters
979 cmp cl, 10 ; Line Feed
981 cmp cl, 13 ; Carriage Return
996 cmp cl, 39 ; character '
998 cmp cl, 96 ; character `
1006 cmp cl, ';' ; Start of a comment
1008 cmp cl, 34 ; Opening string quotes
1011 ; Keeping current character
1012 ; Discard old state by moving stack pointer
1013 add rsp
, 24 ; 3 * 8 bytes
1015 jmp .symbol_loop
; Append to array
1018 ; Not keeping current character
1019 ; Restore state of the tokenizer
1025 mov [rax
], BYTE maltype_symbol
; Mark as a symbol
1026 mov cl, 's' ; used by read_str
1029 ; --------------------------------------------
1031 ; Get an array to put the string into
1033 call string_new
; Array in RAX
1035 ; Put start of data array into rbx
1038 ; Put end of data array into rdx
1039 mov edx, DWORD [rax
+ Array.
length] ; Length of array, zero-extended
1042 ; Now read chars from input string and push into output
1045 call tokenizer_next_char
1046 cmp cl, 0 ; End of characters
1049 cmp cl, 34 ; Finishing '"'
1050 je .string_done
; Leave '"' in CL
1052 cmp cl, 92 ; Escape '\'
1053 jne .end_string_escape
1055 ; Current character is a '\'
1056 call tokenizer_next_char
1057 cmp cl, 0 ; End of characters
1060 cmp cl, 'n' ; \n, newline
1063 ; Whatever is in cl is now put into string
1065 jmp .end_string_escape
1069 jmp .end_string_escape
1073 ; Put CL onto result array
1074 ; NOTE: this doesn't handle long strings (multiple memory blocks)
1081 ; Calculate the length from rbx
1084 mov [rax
+Array.
length], DWORD ebx
1087 ; ---------------------------------
1090 ; Could have '~' or '~@'. Need to peek at the next char
1092 ; Push current state of the tokenizer
1096 call tokenizer_next_char
; Next char in CL
1098 jne .tilde_no_amp
; Just '~', not '~@'
1100 mov cl, 1 ; Signals '~@'
1102 ; Discard old state by moving stack pointer
1103 add rsp
, 24 ; 3 * 8 bytes
1108 ; Restore state of the tokenizer
1112 ; fall through to .found