Commit | Line | Data |
---|---|---|
8164982f | 1 | (defpackage :reader |
d045c9cf | 2 | (:use :common-lisp |
2ce88322 IA |
3 | :types |
4 | :alexandria) | |
0795349b IA |
5 | (:import-from :genhash |
6 | :hashref) | |
8164982f IA |
7 | (:import-from :cl-ppcre |
8 | :create-scanner | |
9 | :do-matches-as-strings | |
10 | :scan) | |
11 | (:import-from :utils | |
12 | :replace-all) | |
13 | (:export :read-str | |
14 | :eof | |
15 | :unexpected-token)) | |
16 | ||
17 | (in-package :reader) | |
18 | ||
19 | ;; Possible errors that can be raised while reading a string | |
20 | (define-condition unexpected-token (error) | |
21 | ((expected :initarg :expected :reader expected-token) | |
22 | (actual :initarg :actual :reader actual-token)) | |
23 | (:report (lambda (condition stream) | |
24 | (format stream | |
25 | "Unexpected token (~a) encountered while reading, expected ~a" | |
26 | (actual-token condition) | |
27 | (expected-token condition)))) | |
28 | (:documentation "Error raised when an unexpected token is encountered while reading.")) | |
29 | ||
30 | (define-condition eof (error) | |
31 | ((context :initarg :context :reader context)) | |
32 | (:report (lambda (condition stream) | |
33 | (format stream | |
448f74e0 | 34 | "EOF encountered while reading '~a'" |
8164982f IA |
35 | (context condition)))) |
36 | (:documentation "Error raised when EOF is encountered while reading.")) | |
37 | ||
38 | (defvar *tokenizer-re* (create-scanner "[\\s,]*(~@|[\\[\\]{}()'`~^@]|\"(?:\\\\.|[^\\\\\"])*\"?|;.*|[^\\s\\[\\]{}('\"`,;)]*)") | |
39 | "Regular expression to tokenize Lisp code") | |
40 | ||
41 | (defvar *number-re* (create-scanner "^(-|\\+)?[\\d]+$") | |
42 | "Regular expression to match a number") | |
43 | ||
44 | (defvar *string-re* (create-scanner "^\"(?:\\\\.|[^\\\\\"])*\"$") | |
45 | "Regular expression to match a string") | |
46 | ||
47 | (defvar *whitespace-chars* | |
48 | '(#\Space #\Newline #\Backspace #\Tab | |
49 | #\Linefeed #\Page #\Return #\Rubout #\,) | |
50 | "Characters to treat as whitespace, these are trimmed in `tokenize'") | |
51 | ||
52 | (defun tokenize (string) | |
53 | "Tokenize given string. | |
54 | ||
55 | This function extracts all tokens from the string using *tokenizer-re* | |
56 | comments are ignored. | |
57 | ||
58 | Implementation notes: The regex scan generates some empty tokens, not really | |
59 | sure why." | |
60 | (let (tokens) | |
61 | (do-matches-as-strings (match *tokenizer-re* string) | |
62 | (let ((token (string-trim *whitespace-chars* match))) | |
63 | (unless (or (zerop (length token)) | |
64 | (char= (char token 0) #\;)) | |
65 | (push token tokens)))) | |
66 | (nreverse tokens))) | |
67 | ||
68 | ;; Reader | |
69 | (defstruct (token-reader) | |
70 | (tokens nil)) | |
71 | ||
72 | (defun peek (reader) | |
73 | "Returns the next token in the reader without advancing the token stream." | |
74 | (car (token-reader-tokens reader))) | |
75 | ||
76 | (defun next (reader) | |
77 | "Returns the next token and advances the token stream." | |
78 | (pop (token-reader-tokens reader))) | |
79 | ||
80 | (defun consume (reader &optional (token nil token-provided-p)) | |
81 | "Consume the next token and advance the token stream. | |
82 | ||
83 | If the optional argument token is provided the token stream is advanced only | |
84 | if token being consumes matches it otherwise and unexpected token error is | |
85 | raised" | |
86 | (let ((actual-token (pop (token-reader-tokens reader)))) | |
87 | (when (and token-provided-p | |
88 | (not (equal actual-token token))) | |
2ce88322 | 89 | (error 'unexpected-token :expected token :actual actual-token))) |
8164982f IA |
90 | reader) |
91 | ||
92 | (defun parse-string (token) | |
e91c55c2 | 93 | ;; read-from-string doesn't handle \n |
8164982f IA |
94 | (if (and (> (length token) 1) |
95 | (scan *string-re* token)) | |
e91c55c2 VS |
96 | (let ((input (subseq token 1 (1- (length token))))) |
97 | (with-output-to-string (out) | |
98 | (with-input-from-string (in input) | |
99 | (loop while (peek-char nil in nil) | |
100 | do (let ((char (read-char in))) | |
101 | (if (eql char #\\ ) | |
102 | (let ((char (read-char in))) | |
103 | (if (eql char #\n) | |
104 | (terpri out) | |
105 | (princ char out))) | |
106 | (princ char out))))))) | |
2ce88322 | 107 | (error 'eof :context "string"))) |
8164982f | 108 | |
8164982f | 109 | (defun expand-quote (reader) |
2ce88322 IA |
110 | (let ((quote-sym (make-mal-symbol (switch ((next reader) :test #'string=) |
111 | ("'" "quote") | |
112 | ("`" "quasiquote") | |
113 | ("~" "unquote") | |
114 | ("~@" "splice-unquote") | |
115 | ("@" "deref"))))) | |
116 | (make-mal-list (list quote-sym (read-form reader))))) | |
8164982f | 117 | |
448f74e0 IA |
118 | (defun read-mal-sequence (reader &optional (type 'list) &aux forms) |
119 | (let ((context (string-downcase (symbol-name type))) | |
120 | (delimiter (if (equal type 'list) ")" "]"))) | |
121 | ||
122 | ;; Consume the opening brace | |
123 | (consume reader) | |
124 | ||
125 | (setf forms (loop | |
126 | until (string= (peek reader) delimiter) | |
127 | collect (read-form-or-eof reader context))) | |
128 | ||
8164982f IA |
129 | ;; Consume the closing brace |
130 | (consume reader) | |
448f74e0 IA |
131 | |
132 | (apply type forms))) | |
8164982f IA |
133 | |
134 | (defun read-hash-map (reader) | |
448f74e0 IA |
135 | (let ((map (make-mal-value-hash-table)) |
136 | (context "hash-map")) | |
137 | ||
138 | ;; Consume the open brace | |
139 | (consume reader) | |
140 | ||
8164982f | 141 | (loop |
448f74e0 IA |
142 | until (string= (peek reader) "}") |
143 | do (setf (hashref (read-form-or-eof reader context) map) | |
144 | (read-form-or-eof reader context))) | |
145 | ||
8164982f IA |
146 | ;; Consume the closing brace |
147 | (consume reader) | |
448f74e0 IA |
148 | |
149 | map)) | |
8164982f IA |
150 | |
151 | (defun read-atom (reader) | |
152 | (let ((token (next reader))) | |
448f74e0 IA |
153 | (cond ((string= token "false") mal-false) |
154 | ((string= token "true") mal-true) | |
155 | ((string= token "nil") mal-nil) | |
156 | ((char= (char token 0) #\") (make-mal-string (parse-string token))) | |
157 | ((char= (char token 0) #\:) (make-mal-keyword token)) | |
158 | ((scan *number-re* token) (make-mal-number (read-from-string token))) | |
159 | (t (make-mal-symbol token))))) | |
160 | ||
161 | (defun read-form-with-meta (reader) | |
162 | (consume reader) | |
163 | ||
164 | (let ((meta (read-form-or-eof reader "object meta")) | |
165 | (value (read-form-or-eof reader "object meta"))) | |
166 | (make-mal-list (list (make-mal-symbol "with-meta") value meta)))) | |
8164982f IA |
167 | |
168 | (defun read-form (reader) | |
2ce88322 IA |
169 | (switch ((peek reader) :test #'equal) |
170 | (nil nil) | |
448f74e0 IA |
171 | ("(" (make-mal-list (read-mal-sequence reader 'list))) |
172 | ("[" (make-mal-vector (read-mal-sequence reader 'vector))) | |
2ce88322 IA |
173 | ("{" (make-mal-hash-map (read-hash-map reader))) |
174 | ("^" (read-form-with-meta reader)) | |
175 | ("'" (expand-quote reader)) | |
176 | ("`" (expand-quote reader)) | |
177 | ("~" (expand-quote reader)) | |
178 | ("~@" (expand-quote reader)) | |
179 | ("@" (expand-quote reader)) | |
180 | (t (read-atom reader)))) | |
8164982f | 181 | |
448f74e0 IA |
182 | (defun read-form-or-eof (reader context) |
183 | (or (read-form reader) | |
184 | (error 'eof :context context))) | |
185 | ||
8164982f IA |
186 | (defun read-str (string) |
187 | (read-form (make-token-reader :tokens (tokenize string)))) |