Merge pull request #377 from asarhaddon/fix-runtests-pre-eval
[jackhill/mal.git] / common-lisp / src / reader.lisp
CommitLineData
8164982f 1(defpackage :reader
d045c9cf 2 (:use :common-lisp
2ce88322
IA
3 :types
4 :alexandria)
0795349b
IA
5 (:import-from :genhash
6 :hashref)
8164982f
IA
7 (:import-from :cl-ppcre
8 :create-scanner
9 :do-matches-as-strings
10 :scan)
11 (:import-from :utils
12 :replace-all)
13 (:export :read-str
14 :eof
15 :unexpected-token))
16
17(in-package :reader)
18
19;; Possible errors that can be raised while reading a string
20(define-condition unexpected-token (error)
21 ((expected :initarg :expected :reader expected-token)
22 (actual :initarg :actual :reader actual-token))
23 (:report (lambda (condition stream)
24 (format stream
25 "Unexpected token (~a) encountered while reading, expected ~a"
26 (actual-token condition)
27 (expected-token condition))))
28 (:documentation "Error raised when an unexpected token is encountered while reading."))
29
30(define-condition eof (error)
31 ((context :initarg :context :reader context))
32 (:report (lambda (condition stream)
33 (format stream
448f74e0 34 "EOF encountered while reading '~a'"
8164982f
IA
35 (context condition))))
36 (:documentation "Error raised when EOF is encountered while reading."))
37
38(defvar *tokenizer-re* (create-scanner "[\\s,]*(~@|[\\[\\]{}()'`~^@]|\"(?:\\\\.|[^\\\\\"])*\"?|;.*|[^\\s\\[\\]{}('\"`,;)]*)")
39 "Regular expression to tokenize Lisp code")
40
41(defvar *number-re* (create-scanner "^(-|\\+)?[\\d]+$")
42 "Regular expression to match a number")
43
44(defvar *string-re* (create-scanner "^\"(?:\\\\.|[^\\\\\"])*\"$")
45 "Regular expression to match a string")
46
47(defvar *whitespace-chars*
48 '(#\Space #\Newline #\Backspace #\Tab
49 #\Linefeed #\Page #\Return #\Rubout #\,)
50 "Characters to treat as whitespace, these are trimmed in `tokenize'")
51
52(defun tokenize (string)
53 "Tokenize given string.
54
55This function extracts all tokens from the string using *tokenizer-re*
56comments are ignored.
57
58Implementation notes: The regex scan generates some empty tokens, not really
59sure why."
60 (let (tokens)
61 (do-matches-as-strings (match *tokenizer-re* string)
62 (let ((token (string-trim *whitespace-chars* match)))
63 (unless (or (zerop (length token))
64 (char= (char token 0) #\;))
65 (push token tokens))))
66 (nreverse tokens)))
67
68;; Reader
69(defstruct (token-reader)
70 (tokens nil))
71
72(defun peek (reader)
73 "Returns the next token in the reader without advancing the token stream."
74 (car (token-reader-tokens reader)))
75
76(defun next (reader)
77 "Returns the next token and advances the token stream."
78 (pop (token-reader-tokens reader)))
79
80(defun consume (reader &optional (token nil token-provided-p))
81 "Consume the next token and advance the token stream.
82
83If the optional argument token is provided the token stream is advanced only
84if token being consumes matches it otherwise and unexpected token error is
85raised"
86 (let ((actual-token (pop (token-reader-tokens reader))))
87 (when (and token-provided-p
88 (not (equal actual-token token)))
2ce88322 89 (error 'unexpected-token :expected token :actual actual-token)))
8164982f
IA
90 reader)
91
92(defun parse-string (token)
e91c55c2 93 ;; read-from-string doesn't handle \n
8164982f
IA
94 (if (and (> (length token) 1)
95 (scan *string-re* token))
e91c55c2
VS
96 (let ((input (subseq token 1 (1- (length token)))))
97 (with-output-to-string (out)
98 (with-input-from-string (in input)
99 (loop while (peek-char nil in nil)
100 do (let ((char (read-char in)))
101 (if (eql char #\\ )
102 (let ((char (read-char in)))
103 (if (eql char #\n)
104 (terpri out)
105 (princ char out)))
106 (princ char out)))))))
2ce88322 107 (error 'eof :context "string")))
8164982f 108
8164982f 109(defun expand-quote (reader)
2ce88322
IA
110 (let ((quote-sym (make-mal-symbol (switch ((next reader) :test #'string=)
111 ("'" "quote")
112 ("`" "quasiquote")
113 ("~" "unquote")
114 ("~@" "splice-unquote")
115 ("@" "deref")))))
116 (make-mal-list (list quote-sym (read-form reader)))))
8164982f 117
448f74e0
IA
118(defun read-mal-sequence (reader &optional (type 'list) &aux forms)
119 (let ((context (string-downcase (symbol-name type)))
120 (delimiter (if (equal type 'list) ")" "]")))
121
122 ;; Consume the opening brace
123 (consume reader)
124
125 (setf forms (loop
126 until (string= (peek reader) delimiter)
127 collect (read-form-or-eof reader context)))
128
8164982f
IA
129 ;; Consume the closing brace
130 (consume reader)
448f74e0
IA
131
132 (apply type forms)))
8164982f
IA
133
134(defun read-hash-map (reader)
448f74e0
IA
135 (let ((map (make-mal-value-hash-table))
136 (context "hash-map"))
137
138 ;; Consume the open brace
139 (consume reader)
140
8164982f 141 (loop
448f74e0
IA
142 until (string= (peek reader) "}")
143 do (setf (hashref (read-form-or-eof reader context) map)
144 (read-form-or-eof reader context)))
145
8164982f
IA
146 ;; Consume the closing brace
147 (consume reader)
448f74e0
IA
148
149 map))
8164982f
IA
150
151(defun read-atom (reader)
152 (let ((token (next reader)))
448f74e0
IA
153 (cond ((string= token "false") mal-false)
154 ((string= token "true") mal-true)
155 ((string= token "nil") mal-nil)
156 ((char= (char token 0) #\") (make-mal-string (parse-string token)))
157 ((char= (char token 0) #\:) (make-mal-keyword token))
158 ((scan *number-re* token) (make-mal-number (read-from-string token)))
159 (t (make-mal-symbol token)))))
160
161(defun read-form-with-meta (reader)
162 (consume reader)
163
164 (let ((meta (read-form-or-eof reader "object meta"))
165 (value (read-form-or-eof reader "object meta")))
166 (make-mal-list (list (make-mal-symbol "with-meta") value meta))))
8164982f
IA
167
168(defun read-form (reader)
2ce88322
IA
169 (switch ((peek reader) :test #'equal)
170 (nil nil)
448f74e0
IA
171 ("(" (make-mal-list (read-mal-sequence reader 'list)))
172 ("[" (make-mal-vector (read-mal-sequence reader 'vector)))
2ce88322
IA
173 ("{" (make-mal-hash-map (read-hash-map reader)))
174 ("^" (read-form-with-meta reader))
175 ("'" (expand-quote reader))
176 ("`" (expand-quote reader))
177 ("~" (expand-quote reader))
178 ("~@" (expand-quote reader))
179 ("@" (expand-quote reader))
180 (t (read-atom reader))))
8164982f 181
448f74e0
IA
182(defun read-form-or-eof (reader context)
183 (or (read-form reader)
184 (error 'eof :context context)))
185
8164982f
IA
186(defun read-str (string)
187 (read-form (make-token-reader :tokens (tokenize string))))