1 ;;; GNU Guix --- Functional package management for GNU
2 ;;; Copyright © 2021-2022 Ludovic Courtès <ludo@gnu.org>
4 ;;; This file is part of GNU Guix.
6 ;;; GNU Guix is free software; you can redistribute it and/or modify it
7 ;;; under the terms of the GNU General Public License as published by
8 ;;; the Free Software Foundation; either version 3 of the License, or (at
9 ;;; your option) any later version.
11 ;;; GNU Guix is distributed in the hope that it will be useful, but
12 ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;;; GNU General Public License for more details.
16 ;;; You should have received a copy of the GNU General Public License
17 ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
19 (define-module (guix read-print)
20 #:use-module (ice-9 control)
21 #:use-module (ice-9 match)
22 #:use-module (ice-9 rdelim)
23 #:use-module (ice-9 vlist)
24 #:use-module (srfi srfi-1)
25 #:use-module (srfi srfi-26)
26 #:use-module (srfi srfi-34)
27 #:use-module (srfi srfi-35)
28 #:use-module (guix i18n)
29 #:use-module ((guix diagnostics)
30 #:select (formatted-message
31 &fix-hint &error-location
33 #:export (pretty-print-with-comments
34 pretty-print-with-comments/splice
36 read-with-comments/sequence
44 canonicalize-vertical-space
53 canonicalize-comment))
57 ;;; This module provides a comment-preserving reader and a comment-preserving
58 ;;; pretty-printer smarter than (ice-9 pretty-print).
64 ;;; Comment-preserving reader.
68 ;; The parent class for "blanks".
69 (make-record-type '<blank> '()
71 (format port "#<blank ~a>"
72 (number->string (object-address obj) 16)))
75 (define blank? (record-predicate <blank>))
77 (define <vertical-space>
78 (make-record-type '<vertical-space> '(height)
82 (define vertical-space? (record-predicate <vertical-space>))
83 (define vertical-space (record-type-constructor <vertical-space>))
84 (define vertical-space-height (record-accessor <vertical-space> 'height))
86 (define canonicalize-vertical-space
87 (let ((unit (vertical-space 1)))
89 "Return a vertical space corresponding to a single blank line."
93 (make-record-type '<page-break> '()
97 (define page-break? (record-predicate <page-break>))
99 (let ((break ((record-type-constructor <page-break>))))
106 (make-record-type '<comment> '(str margin?)
110 (define comment? (record-predicate <comment>))
111 (define string->comment (record-type-constructor <comment>))
112 (define comment->string (record-accessor <comment> 'str))
113 (define comment-margin? (record-accessor <comment> 'margin?))
115 (define* (comment str #:optional margin?)
116 "Return a new comment made from STR. When MARGIN? is true, return a margin
117 comment; otherwise return a line comment. STR must start with a semicolon and
118 end with newline, otherwise an error is raised."
119 (when (or (string-null? str)
120 (not (eqv? #\; (string-ref str 0)))
121 (not (string-suffix? "\n" str)))
123 (&message (message "invalid comment string")))))
124 (string->comment str margin?))
126 (define char-set:whitespace-sans-page-break
127 ;; White space, excluding #\page.
128 (char-set-difference char-set:whitespace (char-set #\page)))
131 "Return true if CHR is white space, except for page breaks."
132 (char-set-contains? char-set:whitespace-sans-page-break chr))
134 (define (read-vertical-space port)
135 "Read from PORT until a non-vertical-space character is met, and return a
136 single <vertical-space> record."
137 (let loop ((height 1))
138 (match (read-char port)
139 (#\newline (loop (+ 1 height)))
140 ((? eof-object?) (vertical-space height))
141 ((? space?) (loop height))
142 (chr (unread-char chr port) (vertical-space height)))))
144 (define (read-until-end-of-line port)
145 "Read white space from PORT until the end of line, included."
147 (match (read-char port)
151 (chr (unread-char chr port)))))
153 (define* (read-with-comments port #:key (blank-line? #t))
154 "Like 'read', but include <blank> objects when they're encountered. When
155 BLANK-LINE? is true, assume PORT is at the beginning of a new line."
156 ;; Note: Instead of implementing this functionality in 'read' proper, which
157 ;; is the best approach long-term, this code is a layer on top of 'read',
158 ;; such that we don't have to rely on a specific Guile version.
159 (define dot (list 'dot))
160 (define (dot? x) (eq? x dot))
162 (define (missing-closing-paren-error)
163 (raise (make-compound-condition
164 (formatted-message (G_ "unexpected end of file"))
167 (location (match (port-filename port)
171 (port-column port))))))
173 (hint (G_ "Did you forget a closing parenthesis?")))))))
175 (define (reverse/dot lst)
176 ;; Reverse LST and make it an improper list if it contains DOT.
177 (let loop ((result '())
182 (let ((dotted (reverse rest)))
183 (set-cdr! (last-pair dotted) (car result))
185 ((x . rest) (loop (cons x result) rest)))))
187 (let loop ((blank-line? blank-line?)
188 (return (const 'unbalanced)))
189 (match (read-char port)
193 (cond ((eqv? chr #\newline)
195 (read-vertical-space port)
198 ;; Assume that a page break is on a line of its own and read
199 ;; subsequent white space and newline.
200 (read-until-end-of-line port)
202 ((char-set-contains? char-set:whitespace chr)
203 (loop blank-line? return))
204 ((memv chr '(#\( #\[))
206 (let liip ((lst '()))
209 (((? blank?) . _) #t)
212 (return (reverse/dot lst)))))
213 (if (eof-object? item)
214 (missing-closing-paren-error)
215 (liip (cons item lst))))))
216 ((memv chr '(#\) #\]))
219 (list 'quote (loop #f return)))
221 (list 'quasiquote (loop #f return)))
223 (list (match (peek-char port)
231 (unread-char chr port)
232 (string->comment (read-line port 'concat)
235 (unread-char chr port)
238 (if (eq? chr #\.) dot token))
241 (define (read-with-comments/sequence port)
242 "Read from PORT until the end-of-file is reached and return the list of
243 expressions and blanks that were read."
246 (match (read-with-comments port #:blank-line? blank-line?)
250 (loop (cons blank lst) #t))
252 (loop (cons exp lst) #f)))))
256 ;;; Comment-preserving pretty-printer.
259 (define-syntax vhashq
260 (syntax-rules (quote)
262 ((_ (key (quote (lst ...))) rest ...)
263 (vhash-consq key '(lst ...) (vhashq rest ...)))
264 ((_ (key value) rest ...)
265 (vhash-consq key '((() . value)) (vhashq rest ...)))))
267 (define %special-forms
268 ;; Forms that are indented specially. The number is meant to be understood
269 ;; like Emacs' 'scheme-indent-function' symbol property. When given an
270 ;; alist instead of a number, the alist gives "context" in which the symbol
271 ;; is a special form; for instance, context (modify-phases) means that the
272 ;; symbol must appear within a (modify-phases ...) expression.
286 ('define-syntax-rule 2)
288 ('define-gexp-compiler 2)
289 ('define-record-type 2)
290 ('define-record-type* 4)
302 ('add-after '(((modify-phases) . 3)))
303 ('add-before '(((modify-phases) . 3)))
304 ('replace '(((modify-phases) . 2))) ;different from 'modify-inputs'
306 ('substitute-keyword-arguments 2)
307 ('call-with-input-file 2)
308 ('call-with-output-file 2)
309 ('with-output-to-file 2)
310 ('with-input-from-file 2)
311 ('with-directory-excursion 2)
313 ;; (gnu system) and (gnu services).
314 ('operating-system 1)
315 ('bootloader-configuration 1)
325 ('home-environment 1)))
327 (define %newline-forms
328 ;; List heads that must be followed by a newline. The second argument is
329 ;; the context in which they must appear. This is similar to a special form
330 ;; of 1, except that indent is 1 instead of 2 columns.
332 ('arguments '(package))
333 ('sha256 '(origin source package))
334 ('base32 '(sha256 origin))
335 ('git-reference '(uri origin source))
336 ('search-paths '(package))
337 ('native-search-paths '(package))
338 ('search-path-specification '())
340 ('services '(operating-system))
341 ('set-xorg-configuration '())
342 ('services '(home-environment))
343 ('home-bash-configuration '(service))))
345 (define (prefix? candidate lst)
346 "Return true if CANDIDATE is a prefix of LST."
347 (let loop ((candidate candidate)
355 (and (equal? head1 head2)
356 (loop rest1 rest2))))))))
358 (define (special-form-lead symbol context)
359 "If SYMBOL is a special form in the given CONTEXT, return its number of
360 arguments; otherwise return #f. CONTEXT is a stack of symbols lexically
362 (match (vhash-assq symbol %special-forms)
367 (and (prefix? prefix context) (- level 1))))
370 (define (newline-form? symbol context)
371 "Return true if parenthesized expressions starting with SYMBOL must be
372 followed by a newline."
373 (let ((matches (vhash-foldq* cons '() symbol %newline-forms)))
374 (find (cut prefix? <> context)
377 (define (escaped-string str)
378 "Return STR with backslashes and double quotes escaped. Everything else, in
379 particular newlines, is left as is."
382 ,@(string-fold-right (lambda (chr lst)
384 (#\" (cons* #\\ #\" lst))
385 (#\\ (cons* #\\ #\\ lst))
391 (define %natural-whitespace-string-forms
392 ;; When a string has one of these forms as its parent, only double quotes
393 ;; and backslashes are escaped; newlines, tabs, etc. are left as-is.
394 '(synopsis description G_ N_))
396 (define (printed-string str context)
397 "Return the read syntax for STR depending on CONTEXT."
400 (object->string str))
402 (if (memq head %natural-whitespace-string-forms)
404 (object->string str)))))
406 (define (string-width str)
407 "Return the \"width\" of STR--i.e., the width of the longest line of STR."
408 (apply max (map string-length (string-split str #\newline))))
410 (define (canonicalize-comment comment indent)
411 "Canonicalize COMMENT, which is to be printed at INDENT, ensuring it has the
412 \"right\" number of leading semicolons."
414 comment ;leave top-level comments unchanged
415 (let ((line (string-trim-both
416 (string-trim (comment->string comment) (char-set #\;)))))
417 (string->comment (string-append
418 (if (comment-margin? comment)
420 (if (string-null? line)
421 ";;" ;no trailing space
424 (comment-margin? comment)))))
427 (char-set-complement (char-set #\newline)))
429 (define (print-multi-line-comment str indent port)
430 "Print to PORT STR as a multi-line comment, with INDENT spaces preceding
431 each line except the first one (they're assumed to be already there)."
433 ;; While 'read-with-comments' only returns one-line comments, user-provided
434 ;; comments might span multiple lines, which is why this is necessary.
435 (let loop ((lst (string-tokenize str %not-newline)))
444 (display (make-string indent #\space) port)
447 (define %integer-forms
448 ;; Forms that take an integer as their argument, where said integer should
449 ;; be printed in base other than decimal base.
450 (letrec-syntax ((vhashq (syntax-rules ()
452 ((_ (key value) rest ...)
453 (vhash-consq key value (vhashq rest ...))))))
464 (define (integer->string integer context)
465 "Render INTEGER as a string using a base suitable based on CONTEXT."
466 (define (form-base form)
467 (match (vhash-assq form %integer-forms)
471 (define (octal? form)
472 (= 8 (form-base form)))
477 (match (form-base head)
479 (16 (if (any octal? tail) 8 16))
483 (string-append (match base
487 (number->string integer base)))
489 (define* (pretty-print-with-comments port obj
492 (lambda (comment indent) comment))
493 (format-vertical-space identity)
497 "Pretty-print OBJ to PORT, attempting to at most MAX-WIDTH character columns
498 and assuming the current column is INDENT. Comments present in OBJ are
499 included in the output.
501 Lists longer than LONG-LIST are written as one element per line. Comments are
502 passed through FORMAT-COMMENT before being emitted; a useful value for
503 FORMAT-COMMENT is 'canonicalize-comment'. Vertical space is passed through
504 FORMAT-VERTICAL-SPACE; a useful value of 'canonicalize-vertical-space'."
505 (define (list-of-lists? head tail)
506 ;; Return true if HEAD and TAIL denote a list of lists--e.g., a list of
509 ((thing _ ...) ;proper list
510 (and (not (memq thing
511 '(quote quasiquote unquote unquote-splicing)))
515 (let loop ((indent indent)
517 (delimited? #t) ;true if comes after a delimiter
518 (context '()) ;list of "parent" symbols
520 (define (print-sequence context indent column lst delimited?)
522 (> (length lst) long-list))
524 (let print ((lst lst)
526 (delimited? delimited?)
533 ;; Insert a newline if ITEM is itself a list, or if TAIL is long,
534 ;; but only if ITEM is not the first item. Also insert a newline
536 (and (or (pair? item) long?
538 (not (eq? item #:allow-other-keys))))
539 (not first?) (not delimited?)
540 (not (blank? item))))
544 (display (make-string indent #\space) port))
545 (let ((column (if newline? indent column)))
547 (keyword? item) ;keep #:key value next to one another
550 (or newline? delimited?)
554 (define (sequence-would-protrude? indent lst)
555 ;; Return true if elements of LST written at INDENT would protrude
556 ;; beyond MAX-WIDTH. This is implemented as a cheap test with false
557 ;; negatives to avoid actually rendering all of LST.
560 (>= (+ (string-width str) 2 indent) max-width))
562 (>= (+ (string-width (symbol->string symbol)) indent)
565 (>= (+ 2 indent) max-width))
567 (>= (+ 2 indent) max-width))
572 (define (special-form? head)
573 (special-form-lead head context))
576 ((? comment? comment)
577 (if (comment-margin? comment)
580 (display (comment->string (format-comment comment indent))
583 ;; When already at the beginning of a line, for example because
584 ;; COMMENT follows a margin comment, no need to emit a newline.
585 (unless (= column indent)
587 (display (make-string indent #\space) port))
588 (print-multi-line-comment (comment->string
589 (format-comment comment indent))
591 (display (make-string indent #\space) port)
593 ((? vertical-space? space)
594 (unless delimited? (newline port))
595 (let loop ((i (vertical-space-height (format-vertical-space space))))
599 (display (make-string indent #\space) port)
602 (unless delimited? (newline port))
603 (display #\page port)
605 (display (make-string indent #\space) port)
608 (unless delimited? (display " " port))
610 (loop indent (+ column (if delimited? 1 2)) #t context lst))
612 (unless delimited? (display " " port))
614 (loop indent (+ column (if delimited? 1 2)) #t context lst))
616 (unless delimited? (display " " port))
618 (loop indent (+ column (if delimited? 1 2)) #t context lst))
619 (('unquote-splicing lst)
620 (unless delimited? (display " " port))
622 (loop indent (+ column (if delimited? 2 3)) #t context lst))
624 (unless delimited? (display " " port))
626 (loop indent (+ column (if delimited? 2 3)) #t context lst))
628 (unless delimited? (display " " port))
630 (loop indent (+ column (if delimited? 2 3)) #t context obj))
631 (('ungexp-native obj)
632 (unless delimited? (display " " port))
634 (loop indent (+ column (if delimited? 2 3)) #t context obj))
635 (('ungexp-splicing lst)
636 (unless delimited? (display " " port))
638 (loop indent (+ column (if delimited? 3 4)) #t context lst))
639 (('ungexp-native-splicing lst)
640 (unless delimited? (display " " port))
642 (loop indent (+ column (if delimited? 3 4)) #t context lst))
643 (((? special-form? head) arguments ...)
644 ;; Special-case 'let', 'lambda', 'modify-inputs', etc. so the second
645 ;; and following arguments are less indented.
646 (let* ((lead (special-form-lead head context))
647 (context (cons head context))
648 (head (symbol->string head))
649 (total (length arguments)))
650 (unless delimited? (display " " port))
656 ;; Print the first LEAD arguments.
657 (let* ((indent (+ column 2
658 (if delimited? 0 1)))
660 (if (zero? lead) 0 1)
662 (string-length head)))
663 (initial-indent column))
666 (arguments (take arguments (min lead total)))
671 (display (make-string indent #\space) port)
677 (loop initial-indent column
682 ;; Print the remaining arguments.
683 (let ((column (print-sequence
684 context indent new-column
685 (drop arguments (min lead total))
690 (let* ((overflow? (>= column max-width))
691 (column (if overflow?
693 (+ column (if delimited? 1 2))))
694 (newline? (or (newline-form? head context)
695 (list-of-lists? head tail))) ;'let' bindings
696 (context (cons head context)))
700 (display (make-string indent #\space) port))
701 (unless delimited? (display " " port)))
704 (let* ((new-column (loop column column #t context head))
705 (indent (if (or (>= new-column max-width)
707 (sequence-would-protrude?
708 (+ new-column 1) tail)
713 ;; Insert a newline right after HEAD.
715 (display (make-string indent #\space) port))
718 (print-sequence context indent
719 (if newline? indent new-column)
724 (let* ((str (cond ((string? obj)
725 (printed-string obj context))
727 (integer->string obj context))
729 (object->string obj))))
730 (len (string-width str)))
731 (if (and (> (+ column 1 len) max-width)
735 (display (make-string indent #\space) port)
739 (unless delimited? (display " " port))
741 (+ column (if delimited? 0 1) len))))))))
743 (define (object->string* obj indent . args)
744 "Pretty-print OBJ with INDENT columns as the initial indent. ARGS are
745 passed as-is to 'pretty-print-with-comments'."
746 (call-with-output-string
748 (apply pretty-print-with-comments port obj
752 (define* (pretty-print-with-comments/splice port lst
754 "Write to PORT the expressions and blanks listed in LST."
755 (for-each (lambda (exp)
756 (apply pretty-print-with-comments port exp rest)