From 620c89651ae54f8f35c3d0926f8c2c36c3fdd174 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Ludovic=20Court=C3=A8s?= Date: Mon, 19 Oct 2009 22:38:34 +0200 Subject: [PATCH] Add support for R6RS/SRFI-30 nested block comments. Suggested by Andreas Rottmann . * libguile/read.c (flush_ws, scm_read_sharp): Add support for R6RS/SRFI-30 block comments. (scm_read_r6rs_block_comment): New function. * test-suite/tests/reader.test (exception:unterminated-block-comment): Adjust to match both block comment styles. ("reading")["R6RS/SRFI-30 block comment", "R6RS/SRFI-30 nested block comment", "R6RS/SRFI-30 block comment syntax overridden"]: New tests. ("exceptions")["R6RS/SRFI-30 unterminated nested block comment"]: New test. * doc/ref/api-evaluation.texi (Block Comments): Mention SRFI-30/R6RS block comments. * doc/ref/srfi-modules.texi (SRFI-30): New node. --- doc/ref/api-evaluation.texi | 15 ++++++++ doc/ref/srfi-modules.texi | 10 +++++- libguile/read.c | 68 +++++++++++++++++++++++++++++++++--- test-suite/tests/reader.test | 32 +++++++++++++++-- 4 files changed, 117 insertions(+), 8 deletions(-) diff --git a/doc/ref/api-evaluation.texi b/doc/ref/api-evaluation.texi index 8abd9f9cf..e50a51546 100644 --- a/doc/ref/api-evaluation.texi +++ b/doc/ref/api-evaluation.texi @@ -230,6 +230,21 @@ Thus a Guile script often starts like this. More details on Guile scripting can be found in the scripting section (@pxref{Guile Scripting}). +@cindex R6RS block comments +@cindex SRFI-30 block comments +Similarly, Guile (starting from version 2.0) supports nested block +comments as specified by R6RS and +@url{http://srfi.schemers.org/srfi-30/srfi-30.html, SRFI-30}: + +@lisp +(+ #| this is a #| nested |# block comment |# 2) +@result{} 3 +@end lisp + +For backward compatibility, this syntax can be overridden with +@code{read-hash-extend} (@pxref{Reader Extensions, +@code{read-hash-extend}}). + There is one special case where the contents of a comment can actually affect the interpretation of code. When a character encoding declaration, such as @code{coding: utf-8} appears in one of the first diff --git a/doc/ref/srfi-modules.texi b/doc/ref/srfi-modules.texi index 7c107e710..d3183e2d8 100644 --- a/doc/ref/srfi-modules.texi +++ b/doc/ref/srfi-modules.texi @@ -1,6 +1,6 @@ @c -*-texinfo-*- @c This is part of the GNU Guile Reference Manual. -@c Copyright (C) 1996, 1997, 2000, 2001, 2002, 2003, 2004, 2006, 2007, 2008 +@c Copyright (C) 1996, 1997, 2000, 2001, 2002, 2003, 2004, 2006, 2007, 2008, 2009 @c Free Software Foundation, Inc. @c See the file guile.texi for copying conditions. @@ -37,6 +37,7 @@ get the relevant SRFI documents from the SRFI home page * SRFI-18:: Multithreading support * SRFI-19:: Time/Date library. * SRFI-26:: Specializing parameters +* SRFI-30:: Nested multi-line block comments * SRFI-31:: A special form `rec' for recursive evaluation * SRFI-34:: Exception handling. * SRFI-35:: Conditions. @@ -2712,6 +2713,13 @@ or similar is typical. @end example @end deffn +@node SRFI-30 +@subsection SRFI-30 - Nested Multi-line Comments +@cindex SRFI-30 + +Starting from version 2.0, Guile's @code{read} supports SRFI-30/R6RS +nested multi-line comments by default, @ref{Block Comments}. + @node SRFI-31 @subsection SRFI-31 - A special form `rec' for recursive evaluation @cindex SRFI-31 diff --git a/libguile/read.c b/libguile/read.c index 07c8d7163..fc56418df 100644 --- a/libguile/read.c +++ b/libguile/read.c @@ -181,8 +181,10 @@ static SCM *scm_read_hash_procedures; || ((_chr) == 'd') || ((_chr) == 'l')) /* Read an SCSH block comment. */ -static inline SCM scm_read_scsh_block_comment (int chr, SCM port); -static SCM scm_read_commented_expression (int chr, SCM port); +static inline SCM scm_read_scsh_block_comment (scm_t_wchar, SCM); +static SCM scm_read_r6rs_block_comment (scm_t_wchar, SCM); +static SCM scm_read_commented_expression (scm_t_wchar, SCM); +static SCM scm_get_hash_procedure (int); /* Read from PORT until a delimiter (e.g., a whitespace) is read. Return zero if the whole token fits in BUF, non-zero otherwise. */ @@ -289,6 +291,13 @@ flush_ws (SCM port, const char *eoferr) case ';': scm_read_commented_expression (c, port); break; + case '|': + if (scm_is_false (scm_get_hash_procedure (c))) + { + scm_read_r6rs_block_comment (c, port); + break; + } + /* fall through */ default: scm_ungetc (c, port); return '#'; @@ -313,7 +322,6 @@ flush_ws (SCM port, const char *eoferr) static SCM scm_read_expression (SCM port); static SCM scm_read_sharp (int chr, SCM port); -static SCM scm_get_hash_procedure (int c); static SCM recsexpr (SCM obj, long line, int column, SCM filename); @@ -990,6 +998,45 @@ scm_read_scsh_block_comment (scm_t_wchar chr, SCM port) return SCM_UNSPECIFIED; } +static SCM +scm_read_r6rs_block_comment (scm_t_wchar chr, SCM port) +{ + /* Unlike SCSH-style block comments, SRFI-30/R6RS block comments may be + nested. So care must be taken. */ + int nesting_level = 1; + int opening_seen = 0, closing_seen = 0; + + while (nesting_level > 0) + { + int c = scm_getc (port); + + if (c == EOF) + scm_i_input_error (__FUNCTION__, port, + "unterminated `#| ... |#' comment", SCM_EOL); + + if (opening_seen) + { + if (c == '|') + nesting_level++; + opening_seen = 0; + } + else if (closing_seen) + { + if (c == '#') + nesting_level--; + closing_seen = 0; + } + else if (c == '|') + closing_seen = 1; + else if (c == '#') + opening_seen = 1; + else + opening_seen = closing_seen = 0; + } + + return SCM_UNSPECIFIED; +} + static SCM scm_read_commented_expression (scm_t_wchar chr, SCM port) { @@ -1173,8 +1220,19 @@ scm_read_sharp (scm_t_wchar chr, SCM port) default: result = scm_read_sharp_extension (chr, port); if (scm_is_eq (result, SCM_UNSPECIFIED)) - scm_i_input_error (FUNC_NAME, port, "Unknown # object: ~S", - scm_list_1 (SCM_MAKE_CHAR (chr))); + { + /* To remain compatible with 1.8 and earlier, the following + characters have lower precedence than `read-hash-extend' + characters. */ + switch (chr) + { + case '|': + return scm_read_r6rs_block_comment (chr, port); + default: + scm_i_input_error (FUNC_NAME, port, "Unknown # object: ~S", + scm_list_1 (SCM_MAKE_CHAR (chr))); + } + } else return result; } diff --git a/test-suite/tests/reader.test b/test-suite/tests/reader.test index 0eb851508..2ee21c158 100644 --- a/test-suite/tests/reader.test +++ b/test-suite/tests/reader.test @@ -1,6 +1,6 @@ ;;;; reader.test --- Exercise the reader. -*- Scheme -*- ;;;; -;;;; Copyright (C) 1999, 2001, 2002, 2003, 2007, 2008 Free Software Foundation, Inc. +;;;; Copyright (C) 1999, 2001, 2002, 2003, 2007, 2008, 2009 Free Software Foundation, Inc. ;;;; Jim Blandy ;;;; ;;;; This library is free software; you can redistribute it and/or @@ -18,6 +18,7 @@ ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA (define-module (test-suite reader) + :use-module (srfi srfi-1) :use-module (test-suite lib)) @@ -26,7 +27,7 @@ (define exception:unexpected-rparen (cons 'read-error "unexpected \")\"$")) (define exception:unterminated-block-comment - (cons 'read-error "unterminated `#! ... !#' comment$")) + (cons 'read-error "unterminated `#. \\.\\.\\. .#' comment$")) (define exception:unknown-character-name (cons 'read-error "unknown character name .*$")) (define exception:unknown-sharp-object @@ -83,6 +84,30 @@ (equal? '(+ 2) (read-string "(+ 2 #! a comment\n!#\n) "))) + (pass-if "R6RS/SRFI-30 block comment" + (equal? '(+ 1 2 3) + (read-string "(+ 1 #| this is a\ncomment |# 2 3)"))) + + (pass-if "R6RS/SRFI-30 nested block comment" + (equal? '(a b c) + (read-string "(a b c #| d #| e |# f |#)"))) + + (pass-if "R6RS/SRFI-30 block comment syntax overridden" + ;; To be compatible with 1.8 and earlier, we should be able to override + ;; this syntax. + (let ((rhp read-hash-procedures)) + (dynamic-wind + (lambda () + (read-hash-extend #\| (lambda args 'not))) + (lambda () + (fold (lambda (x y result) + (and result (eq? x y))) + #t + (read-string "(this is #| a comment)") + `(this is not a comment))) + (lambda () + (set! read-hash-procedures rhp))))) + (pass-if "unprintable symbol" ;; The reader tolerates unprintable characters for symbols. (equal? (string->symbol "\001\002\003") @@ -131,6 +156,9 @@ (pass-if-exception "unterminated block comment" exception:unterminated-block-comment (read-string "(+ 1 #! comment\n...")) + (pass-if-exception "R6RS/SRFI-30 unterminated nested block comment" + exception:unterminated-block-comment + (read-string "(foo #| bar #| |#)")) (pass-if-exception "unknown character name" exception:unknown-character-name (read-string "#\\theunknowncharacter")) -- 2.20.1