(texinfo): add a command-spec for @math{}.
[bpt/guile.git] / module / texinfo.scm
CommitLineData
47f3ce52
AW
1;;;; (texinfo) -- parsing of texinfo into SXML
2;;;;
1d14478f 3;;;; Copyright (C) 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
47f3ce52
AW
4;;;; Copyright (C) 2004, 2009 Andy Wingo <wingo at pobox dot com>
5;;;; Copyright (C) 2001,2002 Oleg Kiselyov <oleg at pobox dot com>
6;;;;
7;;;; This file is based on SSAX's SSAX.scm.
8;;;;
9;;;; This library is free software; you can redistribute it and/or
10;;;; modify it under the terms of the GNU Lesser General Public
11;;;; License as published by the Free Software Foundation; either
12;;;; version 3 of the License, or (at your option) any later version.
13;;;;
14;;;; This library is distributed in the hope that it will be useful,
15;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
16;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17;;;; Lesser General Public License for more details.
18;;;;
19;;;; You should have received a copy of the GNU Lesser General Public
20;;;; License along with this library; if not, write to the Free Software
21;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22\f
23;;; Commentary:
24;;
25;; @subheading Texinfo processing in scheme
26;;
27;; This module parses texinfo into SXML. TeX will always be the
28;; processor of choice for print output, of course. However, although
29;; @code{makeinfo} works well for info, its output in other formats is
30;; not very customizable, and the program is not extensible as a whole.
31;; This module aims to provide an extensible framework for texinfo
32;; processing that integrates texinfo into the constellation of SXML
33;; processing tools.
34;;
35;; @subheading Notes on the SXML vocabulary
36;;
37;; Consider the following texinfo fragment:
38;;
39;;@example
40;; @@deffn Primitive set-car! pair value
41;; This function...
42;; @@end deffn
43;;@end example
44;;
45;; Logically, the category (Primitive), name (set-car!), and arguments
46;; (pair value) are ``attributes'' of the deffn, with the description as
47;; the content. However, texinfo allows for @@-commands within the
48;; arguments to an environment, like @code{@@deffn}, which means that
49;; texinfo ``attributes'' are PCDATA. XML attributes, on the other hand,
50;; are CDATA. For this reason, ``attributes'' of texinfo @@-commands are
51;; called ``arguments'', and are grouped under the special element, `%'.
52;;
53;; Because `%' is not a valid NCName, stexinfo is a superset of SXML. In
54;; the interests of interoperability, this module provides a conversion
55;; function to replace the `%' with `texinfo-arguments'.
56;;
57;;; Code:
58
59;; Comparison to xml output of texinfo (which is rather undocumented):
60;; Doesn't conform to texinfo dtd
61;; No DTD at all, in fact :-/
62;; Actually outputs valid xml, after transforming %
63;; Slower (although with caching the SXML that problem can go away)
64;; Doesn't parse menus (although menus are shite)
65;; Args go in a dedicated element, FBOFW
66;; Definitions are handled a lot better
67;; Does parse comments
68;; Outputs only significant line breaks (a biggie!)
69;; Nodes are treated as anchors, rather than content organizers (a biggie)
70;; (more book-like, less info-like)
71
72;; TODO
73;; Integration: help, indexing, plain text
74
75(define-module (texinfo)
76 #:use-module (sxml simple)
77 #:use-module (sxml transform)
78 #:use-module (sxml ssax input-parse)
79 #:use-module (srfi srfi-1)
0c65f52c 80 #:use-module (srfi srfi-11)
47f3ce52
AW
81 #:use-module (srfi srfi-13)
82 #:export (call-with-file-and-dir
83 texi-command-specs
84 texi-command-depth
85 texi-fragment->stexi
86 texi->stexi
87 stexi->sxml))
88
89;; Some utilities
90
91(define (parser-error port message . rest)
05c29c5a 92 (apply throw 'parser-error port message rest))
47f3ce52
AW
93
94(define (call-with-file-and-dir filename proc)
95 "Call the one-argument procedure @var{proc} with an input port that
96reads from @var{filename}. During the dynamic extent of @var{proc}'s
97execution, the current directory will be @code{(dirname
98@var{filename})}. This is useful for parsing documents that can include
99files by relative path name."
100 (let ((current-dir (getcwd)))
101 (dynamic-wind
102 (lambda () (chdir (dirname filename)))
103 (lambda ()
104 (call-with-input-file (basename filename) proc))
105 (lambda () (chdir current-dir)))))
106
47f3ce52
AW
107;;========================================================================
108;; Reflection on the XML vocabulary
109
110(define texi-command-specs
111 #;
112"A list of (@var{name} @var{content-model} . @var{args})
113
114@table @var
115@item name
116The name of an @@-command, as a symbol.
117
118@item content-model
119A symbol indicating the syntactic type of the @@-command:
120@table @code
121@item EMPTY-COMMAND
122No content, and no @code{@@end} is coming
123@item EOL-ARGS
124Unparsed arguments until end of line
125@item EOL-TEXT
126Parsed arguments until end of line
127@item INLINE-ARGS
128Unparsed arguments ending with @code{#\\@}}
129@item INLINE-TEXT
130Parsed arguments ending with @code{#\\@}}
be52f329
AW
131@item INLINE-TEXT-ARGS
132Parsed arguments ending with @code{#\\@}}
47f3ce52
AW
133@item ENVIRON
134The tag is an environment tag, expect @code{@@end foo}.
135@item TABLE-ENVIRON
136Like ENVIRON, but with special parsing rules for its arguments.
137@item FRAGMENT
138For @code{*fragment*}, the command used for parsing fragments of
139texinfo documents.
140@end table
141
142@code{INLINE-TEXT} commands will receive their arguments within their
143bodies, whereas the @code{-ARGS} commands will receive them in their
144attribute list.
145
146@code{EOF-TEXT} receives its arguments in its body.
147
148@code{ENVIRON} commands have both: parsed arguments until the end of
149line, received through their attribute list, and parsed text until the
150@code{@@end}, received in their bodies.
151
152@code{EOF-TEXT-ARGS} receives its arguments in its attribute list, as in
153@code{ENVIRON}.
154
31d59769
AW
155In addition, @code{ALIAS} can alias one command to another. The alias
156will never be seen in parsed stexinfo.
157
47f3ce52
AW
158There are four @@-commands that are treated specially. @code{@@include}
159is a low-level token that will not be seen by higher-level parsers, so
160it has no content-model. @code{@@para} is the paragraph command, which
161is only implicit in the texinfo source. @code{@@item} has special
162syntax, as noted above, and @code{@@entry} is how this parser treats
163@code{@@item} commands within @code{@@table}, @code{@@ftable}, and
164@code{@@vtable}.
165
166Also, indexing commands (@code{@@cindex}, etc.) are treated specially.
167Their arguments are parsed, but they are needed before entering the
168element so that an anchor can be inserted into the text before the index
169entry.
170
171@item args
172Named arguments to the command, in the same format as the formals for a
173lambda. Only present for @code{INLINE-ARGS}, @code{EOL-ARGS},
be52f329 174@code{INLINE-TEXT-ARGS}, @code{ENVIRON}, @code{TABLE-ENVIRON} commands.
47f3ce52
AW
175@end table"
176 '(;; Special commands
177 (include #f) ;; this is a low-level token
178 (para PARAGRAPH)
179 (item ITEM)
180 (entry ENTRY . heading)
181 (noindent EMPTY-COMMAND)
182 (*fragment* FRAGMENT)
183
184 ;; Inline text commands
185 (*braces* INLINE-TEXT) ;; FIXME: make me irrelevant
186 (bold INLINE-TEXT)
187 (sample INLINE-TEXT)
188 (samp INLINE-TEXT)
189 (code INLINE-TEXT)
1d14478f 190 (math INLINE-TEXT)
47f3ce52
AW
191 (kbd INLINE-TEXT)
192 (key INLINE-TEXT)
193 (var INLINE-TEXT)
194 (env INLINE-TEXT)
195 (file INLINE-TEXT)
196 (command INLINE-TEXT)
197 (option INLINE-TEXT)
198 (dfn INLINE-TEXT)
199 (cite INLINE-TEXT)
200 (acro INLINE-TEXT)
47f3ce52
AW
201 (email INLINE-TEXT)
202 (emph INLINE-TEXT)
203 (strong INLINE-TEXT)
204 (sample INLINE-TEXT)
205 (sc INLINE-TEXT)
206 (titlefont INLINE-TEXT)
207 (asis INLINE-TEXT)
208 (b INLINE-TEXT)
209 (i INLINE-TEXT)
210 (r INLINE-TEXT)
211 (sansserif INLINE-TEXT)
212 (slanted INLINE-TEXT)
213 (t INLINE-TEXT)
214
215 ;; Inline args commands
216 (value INLINE-ARGS . (key))
217 (ref INLINE-ARGS . (node #:opt name section info-file manual))
218 (xref INLINE-ARGS . (node #:opt name section info-file manual))
219 (pxref INLINE-ARGS . (node #:opt name section info-file manual))
31d59769 220 (url ALIAS . uref)
47f3ce52
AW
221 (uref INLINE-ARGS . (url #:opt title replacement))
222 (anchor INLINE-ARGS . (name))
223 (dots INLINE-ARGS . ())
224 (result INLINE-ARGS . ())
225 (bullet INLINE-ARGS . ())
226 (copyright INLINE-ARGS . ())
227 (tie INLINE-ARGS . ())
228 (image INLINE-ARGS . (file #:opt width height alt-text extension))
229
be52f329
AW
230 ;; Inline parsed args commands
231 (acronym INLINE-TEXT-ARGS . (acronym #:opt meaning))
232
47f3ce52
AW
233 ;; EOL args elements
234 (node EOL-ARGS . (name #:opt next previous up))
235 (c EOL-ARGS . all)
236 (comment EOL-ARGS . all)
237 (setchapternewpage EOL-ARGS . all)
238 (sp EOL-ARGS . all)
239 (page EOL-ARGS . ())
240 (vskip EOL-ARGS . all)
241 (syncodeindex EOL-ARGS . all)
242 (contents EOL-ARGS . ())
243 (shortcontents EOL-ARGS . ())
244 (summarycontents EOL-ARGS . ())
245 (insertcopying EOL-ARGS . ())
246 (dircategory EOL-ARGS . (category))
247 (top EOL-ARGS . (title))
248 (printindex EOL-ARGS . (type))
406524ea 249 (paragraphindent EOL-ARGS . (indent))
47f3ce52
AW
250
251 ;; EOL text commands
252 (*ENVIRON-ARGS* EOL-TEXT)
253 (itemx EOL-TEXT)
254 (set EOL-TEXT)
255 (center EOL-TEXT)
256 (title EOL-TEXT)
257 (subtitle EOL-TEXT)
258 (author EOL-TEXT)
259 (chapter EOL-TEXT)
260 (section EOL-TEXT)
261 (appendix EOL-TEXT)
262 (appendixsec EOL-TEXT)
263 (unnumbered EOL-TEXT)
264 (unnumberedsec EOL-TEXT)
265 (subsection EOL-TEXT)
266 (subsubsection EOL-TEXT)
267 (appendixsubsec EOL-TEXT)
268 (appendixsubsubsec EOL-TEXT)
269 (unnumberedsubsec EOL-TEXT)
270 (unnumberedsubsubsec EOL-TEXT)
271 (chapheading EOL-TEXT)
272 (majorheading EOL-TEXT)
273 (heading EOL-TEXT)
274 (subheading EOL-TEXT)
275 (subsubheading EOL-TEXT)
276
277 (deftpx EOL-TEXT-ARGS . (category name . attributes))
278 (defcvx EOL-TEXT-ARGS . (category class name))
279 (defivarx EOL-TEXT-ARGS . (class name))
280 (deftypeivarx EOL-TEXT-ARGS . (class data-type name))
281 (defopx EOL-TEXT-ARGS . (category class name . arguments))
282 (deftypeopx EOL-TEXT-ARGS . (category class data-type name . arguments))
283 (defmethodx EOL-TEXT-ARGS . (class name . arguments))
284 (deftypemethodx EOL-TEXT-ARGS . (class data-type name . arguments))
285 (defoptx EOL-TEXT-ARGS . (name))
286 (defvrx EOL-TEXT-ARGS . (category name))
287 (defvarx EOL-TEXT-ARGS . (name))
288 (deftypevrx EOL-TEXT-ARGS . (category data-type name))
289 (deftypevarx EOL-TEXT-ARGS . (data-type name))
290 (deffnx EOL-TEXT-ARGS . (category name . arguments))
291 (deftypefnx EOL-TEXT-ARGS . (category data-type name . arguments))
292 (defspecx EOL-TEXT-ARGS . (name . arguments))
293 (defmacx EOL-TEXT-ARGS . (name . arguments))
294 (defunx EOL-TEXT-ARGS . (name . arguments))
295 (deftypefunx EOL-TEXT-ARGS . (data-type name . arguments))
296
297 ;; Indexing commands
298 (cindex INDEX . entry)
299 (findex INDEX . entry)
300 (vindex INDEX . entry)
301 (kindex INDEX . entry)
302 (pindex INDEX . entry)
303 (tindex INDEX . entry)
304
305 ;; Environment commands (those that need @end)
306 (texinfo ENVIRON . title)
307 (ignore ENVIRON . ())
308 (ifinfo ENVIRON . ())
309 (iftex ENVIRON . ())
310 (ifhtml ENVIRON . ())
311 (ifxml ENVIRON . ())
312 (ifplaintext ENVIRON . ())
313 (ifnotinfo ENVIRON . ())
314 (ifnottex ENVIRON . ())
315 (ifnothtml ENVIRON . ())
316 (ifnotxml ENVIRON . ())
317 (ifnotplaintext ENVIRON . ())
318 (titlepage ENVIRON . ())
319 (menu ENVIRON . ())
320 (direntry ENVIRON . ())
321 (copying ENVIRON . ())
322 (example ENVIRON . ())
323 (smallexample ENVIRON . ())
324 (display ENVIRON . ())
325 (smalldisplay ENVIRON . ())
326 (verbatim ENVIRON . ())
327 (format ENVIRON . ())
328 (smallformat ENVIRON . ())
329 (lisp ENVIRON . ())
330 (smalllisp ENVIRON . ())
331 (cartouche ENVIRON . ())
332 (quotation ENVIRON . ())
333
334 (deftp ENVIRON . (category name . attributes))
335 (defcv ENVIRON . (category class name))
336 (defivar ENVIRON . (class name))
337 (deftypeivar ENVIRON . (class data-type name))
338 (defop ENVIRON . (category class name . arguments))
339 (deftypeop ENVIRON . (category class data-type name . arguments))
340 (defmethod ENVIRON . (class name . arguments))
341 (deftypemethod ENVIRON . (class data-type name . arguments))
342 (defopt ENVIRON . (name))
343 (defvr ENVIRON . (category name))
344 (defvar ENVIRON . (name))
345 (deftypevr ENVIRON . (category data-type name))
346 (deftypevar ENVIRON . (data-type name))
347 (deffn ENVIRON . (category name . arguments))
348 (deftypefn ENVIRON . (category data-type name . arguments))
349 (defspec ENVIRON . (name . arguments))
350 (defmac ENVIRON . (name . arguments))
351 (defun ENVIRON . (name . arguments))
352 (deftypefun ENVIRON . (data-type name . arguments))
353
354 (table TABLE-ENVIRON . (formatter))
355 (itemize TABLE-ENVIRON . (formatter))
356 (enumerate TABLE-ENVIRON . (start))
357 (ftable TABLE-ENVIRON . (formatter))
358 (vtable TABLE-ENVIRON . (formatter))))
359
360(define command-depths
361 '((chapter . 1) (section . 2) (subsection . 3) (subsubsection . 4)
362 (top . 0) (unnumbered . 1) (unnumberedsec . 2)
363 (unnumberedsubsec . 3) (unnumberedsubsubsec . 4)
364 (appendix . 1) (appendixsec . 2) (appendixsection . 2)
365 (appendixsubsec . 3) (appendixsubsubsec . 4)))
366(define (texi-command-depth command max-depth)
367 "Given the texinfo command @var{command}, return its nesting level, or
368@code{#f} if it nests too deep for @var{max-depth}.
369
370Examples:
371@example
05c29c5a
AW
372 (texi-command-depth 'chapter 4) @result{} 1
373 (texi-command-depth 'top 4) @result{} 0
374 (texi-command-depth 'subsection 4) @result{} 3
375 (texi-command-depth 'appendixsubsec 4) @result{} 3
376 (texi-command-depth 'subsection 2) @result{} #f
47f3ce52
AW
377@end example"
378 (let ((depth (and=> (assq command command-depths) cdr)))
379 (and depth (<= depth max-depth) depth)))
380
381;; The % is for arguments
382(define (space-significant? command)
383 (memq command
384 '(example smallexample verbatim lisp smalllisp menu %)))
385
386;; Like a DTD for texinfo
387(define (command-spec command)
dc7a9cef
AW
388 (let ((spec (assq command texi-command-specs)))
389 (cond
390 ((not spec)
391 (parser-error #f "Unknown command" command))
392 ((eq? (cadr spec) 'ALIAS)
393 (command-spec (cddr spec)))
394 (else
395 spec))))
47f3ce52
AW
396
397(define (inline-content? content)
be52f329
AW
398 (case content
399 ((INLINE-TEXT INLINE-ARGS INLINE-TEXT-ARGS) #t)
400 (else #f)))
47f3ce52
AW
401
402
403;;========================================================================
404;; Lower-level parsers and scanners
405;;
406;; They deal with primitive lexical units (Names, whitespaces, tags) and
407;; with pieces of more generic productions. Most of these parsers must
408;; be called in appropriate context. For example, complete-start-command
409;; must be called only when the @-command start has been detected and
410;; its name token has been read.
411
412;; Test if a string is made of only whitespace
413;; An empty string is considered made of whitespace as well
414(define (string-whitespace? str)
415 (or (string-null? str)
416 (string-every char-whitespace? str)))
417
418;; Like read-text-line, but allows EOF.
419(define read-eof-breaks '(*eof* #\return #\newline))
420(define (read-eof-line port)
421 (if (eof-object? (peek-char port))
422 (peek-char port)
423 (let* ((line (next-token '() read-eof-breaks
424 "reading a line" port))
425 (c (read-char port))) ; must be either \n or \r or EOF
426 (if (and (eq? c #\return) (eq? (peek-char port) #\newline))
427 (read-char port)) ; skip \n that follows \r
428 line)))
429
47f3ce52
AW
430(define (skip-whitespace port)
431 (skip-while '(#\space #\tab #\return #\newline) port))
432
433(define (skip-horizontal-whitespace port)
434 (skip-while '(#\space #\tab) port))
435
436;; command ::= Letter+
437
438;; procedure: read-command PORT
439;;
440;; Read a command starting from the current position in the PORT and
441;; return it as a symbol.
442(define (read-command port)
443 (let ((first-char (peek-char port)))
444 (or (char-alphabetic? first-char)
445 (parser-error port "Nonalphabetic @-command char: '" first-char "'")))
446 (string->symbol
447 (next-token-of
448 (lambda (c)
449 (cond
450 ((eof-object? c) #f)
451 ((char-alphabetic? c) c)
452 (else #f)))
453 port)))
454
455;; A token is a primitive lexical unit. It is a record with two fields,
456;; token-head and token-kind.
457;;
458;; Token types:
459;; END The end of a texinfo command. If the command is ended by },
460;; token-head will be #f. Otherwise if the command is ended by
461;; @end COMMAND, token-head will be COMMAND. As a special case,
462;; @bye is the end of a special @texinfo command.
463;; START The start of a texinfo command. The token-head will be a
464;; symbol of the @-command name.
465;; INCLUDE An @include directive. The token-head will be empty -- the
466;; caller is responsible for reading the include file name.
467;; ITEM @item commands have an irregular syntax. They end at the
468;; next @item, or at the end of the environment. For that
469;; read-command-token treats them specially.
470
471(define (make-token kind head) (cons kind head))
472(define token? pair?)
473(define token-kind car)
474(define token-head cdr)
475
476;; procedure: read-command-token PORT
477;;
478;; This procedure starts parsing of a command token. The current
479;; position in the stream must be #\@. This procedure scans enough of
480;; the input stream to figure out what kind of a command token it is
481;; seeing. The procedure returns a token structure describing the token.
482
483(define (read-command-token port)
484 (assert-curr-char '(#\@) "start of the command" port)
485 (let ((peeked (peek-char port)))
486 (cond
487 ((memq peeked '(#\! #\. #\? #\@ #\\ #\{ #\}))
488 ;; @-commands that escape characters
489 (make-token 'STRING (string (read-char port))))
490 (else
491 (let ((name (read-command port)))
492 (case name
493 ((end)
494 ;; got an ending tag
495 (let ((command (string-trim-both
496 (read-eof-line port))))
497 (or (and (not (string-null? command))
498 (string-every char-alphabetic? command))
499 (parser-error port "malformed @end" command))
500 (make-token 'END (string->symbol command))))
501 ((bye)
502 ;; the end of the top
503 (make-token 'END 'texinfo))
504 ((item)
505 (make-token 'ITEM 'item))
506 ((include)
507 (make-token 'INCLUDE #f))
508 (else
509 (make-token 'START name))))))))
510
511;; procedure+: read-verbatim-body PORT STR-HANDLER SEED
512;;
513;; This procedure must be called after we have read a string
514;; "@verbatim\n" that begins a verbatim section. The current position
515;; must be the first position of the verbatim body. This function reads
516;; _lines_ of the verbatim body and passes them to a STR-HANDLER, a
517;; character data consumer.
518;;
519;; The str-handler is a STR-HANDLER, a procedure STRING1 STRING2 SEED.
520;; The first STRING1 argument to STR-HANDLER never contains a newline.
521;; The second STRING2 argument often will. On the first invocation of the
522;; STR-HANDLER, the seed is the one passed to read-verbatim-body
523;; as the third argument. The result of this first invocation will be
524;; passed as the seed argument to the second invocation of the line
525;; consumer, and so on. The result of the last invocation of the
526;; STR-HANDLER is returned by the read-verbatim-body. Note a
527;; similarity to the fundamental 'fold' iterator.
528;;
529;; Within a verbatim section all characters are taken at their face
530;; value. It ends with "\n@end verbatim(\r)?\n".
531
532;; Must be called right after the newline after @verbatim.
533(define (read-verbatim-body port str-handler seed)
534 (let loop ((seed seed))
535 (let ((fragment (next-token '() '(#\newline)
536 "reading verbatim" port)))
537 ;; We're reading the char after the 'fragment', which is
538 ;; #\newline.
539 (read-char port)
540 (if (string=? fragment "@end verbatim")
541 seed
542 (loop (str-handler fragment "\n" seed))))))
543
544;; procedure+: read-arguments PORT
545;;
546;; This procedure reads and parses a production ArgumentList.
547;; ArgumentList ::= S* Argument (S* , S* Argument)* S*
548;; Argument ::= ([^@{},])*
549;;
550;; Arguments are the things in braces, i.e @ref{my node} has one
551;; argument, "my node". Most commands taking braces actually don't have
552;; arguments, they process text. For example, in
553;; @emph{@strong{emphasized}}, the emph takes text, because the parse
554;; continues into the braces.
555;;
556;; Any whitespace within Argument is replaced with a single space.
557;; Whitespace around an Argument is trimmed.
558;;
559;; The procedure returns a list of arguments. Afterwards the current
560;; character will be after the final #\}.
561
562(define (read-arguments port stop-char)
563 (define (split str)
564 (read-char port) ;; eat the delimiter
565 (let ((ret (map (lambda (x) (if (string-null? x) #f x))
566 (map string-trim-both (string-split str #\,)))))
567 (if (and (pair? ret) (eq? (car ret) #f) (null? (cdr ret)))
568 '()
569 ret)))
570 (split (next-token '() (list stop-char)
571 "arguments of @-command" port)))
572
573;; procedure+: complete-start-command COMMAND PORT
574;;
575;; This procedure is to complete parsing of an @-command. The procedure
576;; must be called after the command token has been read. COMMAND is a
577;; TAG-NAME.
578;;
579;; This procedure returns several values:
580;; COMMAND: a symbol.
581;; ARGUMENTS: command's arguments, as an alist.
582;; CONTENT-MODEL: the content model of the command.
583;;
584;; On exit, the current position in PORT will depend on the CONTENT-MODEL.
585;;
586;; Content model Port position
587;; ============= =============
588;; INLINE-TEXT One character after the #\{.
be52f329 589;; INLINE-TEXT-ARGS One character after the #\{.
47f3ce52
AW
590;; INLINE-ARGS The first character after the #\}.
591;; EOL-TEXT The first non-whitespace character after the command.
592;; ENVIRON, TABLE-ENVIRON, EOL-ARGS, EOL-TEXT
593;; The first character on the next line.
594;; PARAGRAPH, ITEM, EMPTY-COMMAND
595;; The first character after the command.
596
597(define (arguments->attlist port args arg-names)
598 (let loop ((in args) (names arg-names) (opt? #f) (out '()))
599 (cond
600 ((symbol? names) ;; a rest arg
601 (reverse (if (null? in) out (acons names in out))))
602 ((and (not (null? names)) (eq? (car names) #:opt))
603 (loop in (cdr names) #t out))
604 ((null? in)
605 (if (or (null? names) opt?)
606 (reverse out)
607 (parser-error port "@-command expected more arguments:"
608 args arg-names names)))
609 ((null? names)
610 (parser-error port "@-command didn't expect more arguments:" in))
611 ((not (car in))
612 (or (and opt? (loop (cdr in) (cdr names) opt? out))
613 (parser-error "@-command missing required argument"
614 (car names))))
615 (else
616 (loop (cdr in) (cdr names) opt?
be52f329
AW
617 (acons (car names)
618 (if (list? (car in)) (car in) (list (car in)))
619 out))))))
47f3ce52
AW
620
621(define (parse-table-args command port)
622 (let* ((line (string-trim-both (read-text-line port)))
623 (length (string-length line)))
624 (define (get-formatter)
625 (or (and (not (zero? length))
626 (eq? (string-ref line 0) #\@)
627 (let ((f (string->symbol (substring line 1))))
628 (or (inline-content? (cadr (command-spec f)))
629 (parser-error
630 port "@item formatter must be INLINE" f))
631 f))
05c29c5a 632 (parser-error port "Invalid @item formatter" line)))
47f3ce52
AW
633 (case command
634 ((enumerate)
635 (if (zero? length)
636 '()
637 `((start
638 ,(if (or (and (eq? length 1)
639 (char-alphabetic? (string-ref line 0)))
640 (string-every char-numeric? line))
641 line
642 (parser-error
643 port "Invalid enumerate start" line))))))
644 ((itemize)
645 `((bullet
646 ,(or (and (eq? length 1) line)
647 (and (string-null? line) '(bullet))
648 (list (get-formatter))))))
649 (else ;; tables of various varieties
650 `((formatter (,(get-formatter))))))))
651
652(define (complete-start-command command port)
653 (define (get-arguments type arg-names stop-char)
654 (arguments->attlist port (read-arguments port stop-char) arg-names))
655
656 (let* ((spec (command-spec command))
dc7a9cef 657 (command (car spec))
47f3ce52
AW
658 (type (cadr spec))
659 (arg-names (cddr spec)))
660 (case type
661 ((INLINE-TEXT)
662 (assert-curr-char '(#\{) "Inline element lacks {" port)
663 (values command '() type))
664 ((INLINE-ARGS)
665 (assert-curr-char '(#\{) "Inline element lacks {" port)
666 (values command (get-arguments type arg-names #\}) type))
be52f329
AW
667 ((INLINE-TEXT-ARGS)
668 (assert-curr-char '(#\{) "Inline element lacks {" port)
669 (values command '() type))
47f3ce52
AW
670 ((EOL-ARGS)
671 (values command (get-arguments type arg-names #\newline) type))
672 ((ENVIRON ENTRY INDEX)
673 (skip-horizontal-whitespace port)
674 (values command (parse-environment-args command port) type))
675 ((TABLE-ENVIRON)
676 (skip-horizontal-whitespace port)
677 (values command (parse-table-args command port) type))
678 ((EOL-TEXT)
679 (skip-horizontal-whitespace port)
680 (values command '() type))
681 ((EOL-TEXT-ARGS)
682 (skip-horizontal-whitespace port)
683 (values command (parse-eol-text-args command port) type))
684 ((PARAGRAPH EMPTY-COMMAND ITEM FRAGMENT)
685 (values command '() type))
686 (else ;; INCLUDE shouldn't get here
687 (parser-error port "can't happen")))))
688
689;;-----------------------------------------------------------------------------
690;; Higher-level parsers and scanners
691;;
692;; They parse productions corresponding entire @-commands.
693
694;; Only reads @settitle, leaves it to the command parser to finish
695;; reading the title.
696(define (take-until-settitle port)
697 (or (find-string-from-port? "\n@settitle " port)
698 (parser-error port "No \\n@settitle found"))
699 (skip-horizontal-whitespace port)
700 (and (eq? (peek-char port) #\newline)
701 (parser-error port "You have a @settitle, but no title")))
702
703;; procedure+: read-char-data PORT EXPECT-EOF? STR-HANDLER SEED
704;;
705;; This procedure is to read the CharData of a texinfo document.
706;;
707;; text ::= (CharData | Command)*
708;;
709;; The procedure reads CharData and stops at @-commands (or
710;; environments). It also stops at an open or close brace.
711;;
712;; port
713;; a PORT to read
714;; expect-eof?
715;; a boolean indicating if EOF is normal, i.e., the character
716;; data may be terminated by the EOF. EOF is normal
717;; while processing the main document.
718;; preserve-ws?
719;; a boolean indicating if we are within a whitespace-preserving
720;; environment. If #t, suppress paragraph detection.
721;; str-handler
722;; a STR-HANDLER, see read-verbatim-body
723;; seed
724;; an argument passed to the first invocation of STR-HANDLER.
725;;
726;; The procedure returns two results: SEED and TOKEN. The SEED is the
727;; result of the last invocation of STR-HANDLER, or the original seed if
728;; STR-HANDLER was never called.
729;;
730;; TOKEN can be either an eof-object (this can happen only if expect-eof?
731;; was #t), or a texinfo token denoting the start or end of a tag.
732
733;; read-char-data port expect-eof? preserve-ws? str-handler seed
734(define read-char-data
735 (let* ((end-chars-eof '(*eof* #\{ #\} #\@ #\newline)))
736 (define (handle str-handler str1 str2 seed)
737 (if (and (string-null? str1) (string-null? str2))
738 seed
739 (str-handler str1 str2 seed)))
740
741 (lambda (port expect-eof? preserve-ws? str-handler seed)
742 (let ((end-chars ((if expect-eof? identity cdr) end-chars-eof)))
743 (let loop ((seed seed))
744 (let* ((fragment (next-token '() end-chars "reading char data" port))
745 (term-char (peek-char port))) ; one of end-chars
746 (cond
747 ((eof-object? term-char) ; only if expect-eof?
748 (values (handle str-handler fragment "" seed) term-char))
749 ((memq term-char '(#\@ #\{ #\}))
750 (values (handle str-handler fragment "" seed)
751 (case term-char
752 ((#\@) (read-command-token port))
753 ((#\{) (make-token 'START '*braces*))
754 ((#\}) (read-char port) (make-token 'END #f)))))
755 ((eq? term-char #\newline)
756 ;; Always significant, unless directly before an end token.
757 (let ((c (peek-next-char port)))
758 (cond
759 ((eof-object? c)
760 (or expect-eof?
761 (parser-error port "EOF while reading char data"))
762 (values (handle str-handler fragment "" seed) c))
763 ((eq? c #\@)
764 (let* ((token (read-command-token port))
765 (end? (eq? (token-kind token) 'END)))
766 (values
767 (handle str-handler fragment (if end? "" " ") seed)
768 token)))
769 ((and (not preserve-ws?) (eq? c #\newline))
770 ;; paragraph-separator ::= #\newline #\newline+
771 (skip-while '(#\newline) port)
772 (skip-horizontal-whitespace port)
773 (values (handle str-handler fragment "" seed)
774 (make-token 'PARA 'para)))
775 (else
776 (loop (handle str-handler fragment
777 (if preserve-ws? "\n" " ") seed)))))))))))))
778
779; procedure+: assert-token TOKEN KIND NAME
780; Make sure that TOKEN is of anticipated KIND and has anticipated NAME
781(define (assert-token token kind name)
782 (or (and (token? token)
783 (eq? kind (token-kind token))
784 (equal? name (token-head token)))
785 (parser-error #f "Expecting @end for " name ", got " token)))
786
787;;========================================================================
788;; Highest-level parsers: Texinfo to SXML
789
790;; These parsers are a set of syntactic forms to instantiate a SSAX
791;; parser. The user tells what to do with the parsed character and
792;; element data. These latter handlers determine if the parsing follows a
793;; SAX or a DOM model.
794
795;; syntax: make-command-parser fdown fup str-handler
796
797;; Create a parser to parse and process one element, including its
798;; character content or children elements. The parser is typically
799;; applied to the root element of a document.
800
801;; fdown
802;; procedure COMMAND ARGUMENTS EXPECTED-CONTENT SEED
803;;
804;; This procedure is to generate the seed to be passed to handlers
805;; that process the content of the element. This is the function
806;; identified as 'fdown' in the denotational semantics of the XML
807;; parser given in the title comments to (sxml ssax).
808;;
809;; fup
810;; procedure COMMAND ARGUMENTS PARENT-SEED SEED
811;;
812;; This procedure is called when parsing of COMMAND is finished.
813;; The SEED is the result from the last content parser (or from
814;; fdown if the element has the empty content). PARENT-SEED is the
815;; same seed as was passed to fdown. The procedure is to generate a
816;; seed that will be the result of the element parser. This is the
817;; function identified as 'fup' in the denotational semantics of
818;; the XML parser given in the title comments to (sxml ssax).
819;;
820;; str-handler
821;; A STR-HANDLER, see read-verbatim-body
822;;
823
824;; The generated parser is a
825;; procedure COMMAND PORT SEED
826;;
827;; The procedure must be called *after* the command token has been read.
828
829(define (read-include-file-name port)
830 (let ((x (string-trim-both (read-eof-line port))))
831 (if (string-null? x)
832 (error "no file listed")
833 x))) ;; fixme: should expand @value{} references
834
835(define (sxml->node-name sxml)
836 "Turn some sxml string into a valid node name."
837 (let loop ((in (string->list (sxml->string sxml))) (out '()))
838 (if (null? in)
839 (apply string (reverse out))
840 (if (memq (car in) '(#\{ #\} #\@ #\,))
841 (loop (cdr in) out)
842 (loop (cdr in) (cons (car in) out))))))
843
844(define (index command arguments fdown fup parent-seed)
845 (case command
846 ((deftp defcv defivar deftypeivar defop deftypeop defmethod
847 deftypemethod defopt defvr defvar deftypevr deftypevar deffn
848 deftypefn defspec defmac defun deftypefun)
849 (let ((args `((name ,(string-append (symbol->string command) "-"
850 (cadr (assq 'name arguments)))))))
851 (fup 'anchor args parent-seed
852 (fdown 'anchor args 'INLINE-ARGS '()))))
853 ((cindex findex vindex kindex pindex tindex)
854 (let ((args `((name ,(string-append (symbol->string command) "-"
855 (sxml->node-name
856 (assq 'entry arguments)))))))
857 (fup 'anchor args parent-seed
858 (fdown 'anchor args 'INLINE-ARGS '()))))
859 (else parent-seed)))
860
861(define (make-command-parser fdown fup str-handler)
862 (lambda (command port seed)
863 (let visit ((command command) (port port) (sig-ws? #f) (parent-seed seed))
864 (let*-values (((command arguments expected-content)
865 (complete-start-command command port)))
866 (let* ((parent-seed (index command arguments fdown fup parent-seed))
867 (seed (fdown command arguments expected-content parent-seed))
868 (eof-closes? (or (memq command '(texinfo para *fragment*))
869 (eq? expected-content 'EOL-TEXT)))
870 (sig-ws? (or sig-ws? (space-significant? command)))
871 (up (lambda (s) (fup command arguments parent-seed s)))
872 (new-para (lambda (s) (fdown 'para '() 'PARAGRAPH s)))
873 (make-end-para (lambda (p) (lambda (s) (fup 'para '() p s)))))
874
875 (define (port-for-content)
876 (if (eq? expected-content 'EOL-TEXT)
877 (call-with-input-string (read-text-line port) identity)
878 port))
879
880 (cond
881 ((memq expected-content '(EMPTY-COMMAND INLINE-ARGS EOL-ARGS INDEX
882 EOL-TEXT-ARGS))
883 ;; empty or finished by complete-start-command
884 (up seed))
885 ((eq? command 'verbatim)
886 (up (read-verbatim-body port str-handler seed)))
887 (else
888 (let loop ((port (port-for-content))
889 (expect-eof? eof-closes?)
890 (end-para identity)
891 (need-break? (and (not sig-ws?)
892 (memq expected-content
893 '(ENVIRON TABLE-ENVIRON
894 ENTRY ITEM FRAGMENT))))
895 (seed seed))
896 (cond
897 ((and need-break? (or sig-ws? (skip-whitespace port))
898 (not (memq (peek-char port) '(#\@ #\})))
899 (not (eof-object? (peek-char port))))
900 ;; Even if we have an @, it might be inline -- check
901 ;; that later
902 (let ((seed (end-para seed)))
903 (loop port expect-eof? (make-end-para seed) #f
904 (new-para seed))))
905 (else
906 (let*-values (((seed token)
907 (read-char-data
908 port expect-eof? sig-ws? str-handler seed)))
909 (cond
910 ((eof-object? token)
911 (case expect-eof?
912 ((include #f) (end-para seed))
913 (else (up (end-para seed)))))
914 (else
915 (case (token-kind token)
916 ((STRING)
917 ;; this is only @-commands that escape
918 ;; characters: @}, @@, @{ -- new para if need-break
919 (let ((seed ((if need-break? end-para identity) seed)))
920 (loop port expect-eof?
921 (if need-break? (make-end-para seed) end-para) #f
922 (str-handler (token-head token) ""
923 ((if need-break? new-para identity)
924 seed)))))
925 ((END)
926 ;; The end will only have a name if it's for an
927 ;; environment
928 (cond
929 ((memq command '(item entry))
930 (let ((spec (command-spec (token-head token))))
931 (or (eq? (cadr spec) 'TABLE-ENVIRON)
932 (parser-error
933 port "@item not ended by @end table/enumerate/itemize"
934 token))))
935 ((eq? expected-content 'ENVIRON)
936 (assert-token token 'END command)))
937 (up (end-para seed)))
938 ((ITEM)
939 (cond
940 ((memq command '(enumerate itemize))
941 (up (visit 'item port sig-ws? (end-para seed))))
942 ((eq? expected-content 'TABLE-ENVIRON)
943 (up (visit 'entry port sig-ws? (end-para seed))))
944 ((memq command '(item entry))
945 (visit command port sig-ws? (up (end-para seed))))
946 (else
947 (parser-error
948 port "@item must be within a table environment"
949 command))))
950 ((PARA)
951 ;; examine valid paragraphs?
952 (loop port expect-eof? end-para (not sig-ws?) seed))
953 ((INCLUDE)
954 ;; Recurse for include files
955 (let ((seed (call-with-file-and-dir
956 (read-include-file-name port)
957 (lambda (port)
958 (loop port 'include end-para
959 need-break? seed)))))
960 (loop port expect-eof? end-para need-break? seed)))
961 ((START) ; Start of an @-command
962 (let* ((head (token-head token))
dc7a9cef
AW
963 (spec (command-spec head))
964 (head (car spec))
965 (type (cadr spec))
47f3ce52
AW
966 (inline? (inline-content? type))
967 (seed ((if (and inline? (not need-break?))
968 identity end-para) seed))
969 (end-para (if inline?
970 (if need-break? (make-end-para seed)
971 end-para)
972 identity))
973 (new-para (if (and inline? need-break?)
974 new-para identity)))
975 (loop port expect-eof? end-para (not inline?)
976 (visit head port sig-ws? (new-para seed)))))
977 (else
978 (parser-error port "Unknown token type" token))))))))))))))))
979
980;; procedure: reverse-collect-str-drop-ws fragments
981;;
982;; Given the list of fragments (some of which are text strings), reverse
983;; the list and concatenate adjacent text strings. We also drop
984;; "unsignificant" whitespace, that is, whitespace in front, behind and
985;; between elements. The whitespace that is included in character data
986;; is not affected.
987(define (reverse-collect-str-drop-ws fragments)
988 (cond
989 ((null? fragments) ; a shortcut
990 '())
991 ((and (string? (car fragments)) ; another shortcut
992 (null? (cdr fragments)) ; remove single ws-only string
993 (string-whitespace? (car fragments)))
994 '())
995 (else
996 (let loop ((fragments fragments) (result '()) (strs '())
997 (all-whitespace? #t))
998 (cond
999 ((null? fragments)
1000 (if all-whitespace?
1001 result ; remove leading ws
1002 (cons (apply string-append strs) result)))
1003 ((string? (car fragments))
1004 (loop (cdr fragments) result (cons (car fragments) strs)
1005 (and all-whitespace?
1006 (string-whitespace? (car fragments)))))
1007 (else
1008 (loop (cdr fragments)
1009 (cons
1010 (car fragments)
1011 (cond
1012 ((null? strs) result)
1013 (all-whitespace?
1014 (if (null? result)
1015 result ; remove trailing whitespace
1016 (cons " " result))); replace interstitial ws with
1017 ; one space
1018 (else
1019 (cons (apply string-append strs) result))))
1020 '() #t)))))))
1021
be52f329
AW
1022(define (parse-inline-text-args port spec text)
1023 (let lp ((in text) (cur '()) (out '()))
1024 (cond
1025 ((null? in)
1026 (if (and (pair? cur)
1027 (string? (car cur))
1028 (string-whitespace? (car cur)))
1029 (lp in (cdr cur) out)
1030 (let ((args (reverse (if (null? cur)
1031 out
1032 (cons (reverse cur) out)))))
1033 (arguments->attlist port args (cddr spec)))))
1034 ((pair? (car in))
1035 (lp (cdr in) (cons (car in) cur) out))
1036 ((string-index (car in) #\,)
1037 (let* ((parts (string-split (car in) #\,))
1038 (head (string-trim-right (car parts)))
1039 (rev-tail (reverse (cdr parts)))
1040 (last (string-trim (car rev-tail))))
1041 (lp (cdr in)
1042 (if (string-null? last) cur (cons last cur))
1043 (append (cdr rev-tail)
1044 (cons (reverse (if (string-null? head) cur (cons head cur)))
1045 out)))))
1046 (else
1047 (lp (cdr in)
1048 (cons (if (null? cur) (string-trim (car in)) (car in)) cur)
1049 out)))))
1050
47f3ce52
AW
1051(define (make-dom-parser)
1052 (make-command-parser
1053 (lambda (command args content seed) ; fdown
1054 '())
1055 (lambda (command args parent-seed seed) ; fup
dc7a9cef
AW
1056 (let* ((seed (reverse-collect-str-drop-ws seed))
1057 (spec (command-spec command))
1058 (command (car spec)))
be52f329
AW
1059 (if (eq? (cadr spec) 'INLINE-TEXT-ARGS)
1060 (cons (list command (cons '% (parse-inline-text-args #f spec seed)))
1061 parent-seed)
1062 (acons command
1063 (if (null? args) seed (acons '% args seed))
1064 parent-seed))))
47f3ce52
AW
1065 (lambda (string1 string2 seed) ; str-handler
1066 (if (string-null? string2)
1067 (cons string1 seed)
1068 (cons* string2 string1 seed)))))
1069
1070(define parse-environment-args
1071 (let ((parser (make-dom-parser)))
1072 ;; duplicate arguments->attlist to avoid unnecessary splitting
1073 (lambda (command port)
dc7a9cef
AW
1074 (let* ((args (cdar (parser '*ENVIRON-ARGS* port '())))
1075 (spec (command-spec command))
1076 (command (car spec))
1077 (arg-names (cddr spec)))
47f3ce52
AW
1078 (cond
1079 ((not arg-names)
1080 (if (null? args) '()
1081 (parser-error port "@-command doesn't take args" command)))
1082 ((eq? arg-names #t)
1083 (list (cons 'arguments args)))
1084 (else
1085 (let loop ((args args) (arg-names arg-names) (out '()))
1086 (cond
1087 ((null? arg-names)
1088 (if (null? args) (reverse! out)
1089 (parser-error port "@-command didn't expect more args"
1090 command args)))
1091 ((symbol? arg-names)
1092 (reverse! (acons arg-names args out)))
1093 ((null? args)
1094 (parser-error port "@-command expects more args"
1095 command arg-names))
1096 ((and (string? (car args)) (string-index (car args) #\space))
1097 => (lambda (i)
1098 (let ((rest (substring/shared (car args) (1+ i))))
1099 (if (zero? i)
1100 (loop (cons rest (cdr args)) arg-names out)
1101 (loop (cons rest (cdr args)) (cdr arg-names)
1102 (cons (list (car arg-names)
1103 (substring (car args) 0 i))
1104 out))))))
1105 (else
1106 (loop (cdr args) (cdr arg-names)
1107 (if (and (pair? (car args)) (eq? (caar args) '*braces*))
1108 (acons (car arg-names) (cdar args) out)
1109 (cons (list (car arg-names) (car args)) out))))))))))))
1110
1111(define (parse-eol-text-args command port)
1112 ;; perhaps parse-environment-args should be named more
1113 ;; generically.
1114 (parse-environment-args command port))
1115
1116;; procedure: texi-fragment->stexi STRING
1117;;
1118;; A DOM parser for a texinfo fragment STRING.
1119;;
1120;; The procedure returns an SXML tree headed by the special tag,
1121;; *fragment*.
1122
1123(define (texi-fragment->stexi string-or-port)
1124 "Parse the texinfo commands in @var{string-or-port}, and return the
1125resultant stexi tree. The head of the tree will be the special command,
1126@code{*fragment*}."
1127 (define (parse port)
1128 (postprocess (car ((make-dom-parser) '*fragment* port '()))))
1129 (if (input-port? string-or-port)
1130 (parse string-or-port)
1131 (call-with-input-string string-or-port parse)))
1132
1133;; procedure: texi->stexi PORT
1134;;
1135;; This is an instance of a SSAX parser above that returns an SXML
1136;; representation of the texinfo document ready to be read at PORT.
1137;;
1138;; The procedure returns an SXML tree. The port points to the
1139;; first character after the @bye, or to the end of the file.
1140
1141(define (texi->stexi port)
1142 "Read a full texinfo document from @var{port} and return the parsed
1143stexi tree. The parsing will start at the @code{@@settitle} and end at
1144@code{@@bye} or EOF."
1145 (let ((parser (make-dom-parser)))
1146 (take-until-settitle port)
1147 (postprocess (car (parser 'texinfo port '())))))
1148
1149(define (car-eq? x y) (and (pair? x) (eq? (car x) y)))
1150(define (make-contents tree)
1151 (define (lp in out depth)
1152 (cond
1153 ((null? in) (values in (cons 'enumerate (reverse! out))))
1154 ((and (pair? (cdr in)) (texi-command-depth (caadr in) 4))
1155 => (lambda (new-depth)
1156 (let ((node-name (and (car-eq? (car in) 'node)
1157 (cadr (assq 'name (cdadar in))))))
1158 (cond
1159 ((< new-depth depth)
1160 (values in (cons 'enumerate (reverse! out))))
1161 ((> new-depth depth)
1162 (let ((out-cdr (if (null? out) '() (cdr out)))
1163 (out-car (if (null? out) (list 'item) (car out))))
1164 (let*-values (((new-in new-out) (lp in '() (1+ depth))))
1165 (lp new-in
1166 (cons (append out-car (list new-out)) out-cdr)
1167 depth))))
1168 (else ;; same depth
1169 (lp (cddr in)
1170 (cons
1171 `(item (para
1172 ,@(if node-name
1173 `((ref (% (node ,node-name))))
1174 (cdadr in))))
1175 out)
1176 depth))))))
1177 (else (lp (cdr in) out depth))))
1178 (let*-values (((_ contents) (lp tree '() 1)))
1179 `((chapheading "Table of Contents") ,contents)))
1180
1181(define (trim-whitespace str trim-left? trim-right?)
1182 (let* ((left-space? (and (not trim-left?)
1183 (string-prefix? " " str)))
1184 (right-space? (and (not trim-right?)
1185 (string-suffix? " " str)))
1186 (tail (append! (string-tokenize str)
1187 (if right-space? '("") '()))))
1188 (string-join (if left-space? (cons "" tail) tail))))
1189
1190(define (postprocess tree)
1191 (define (loop in out state first? sig-ws?)
1192 (cond
1193 ((null? in)
1194 (values (reverse! out) state))
1195 ((string? (car in))
1196 (loop (cdr in)
1197 (cons (if sig-ws? (car in)
1198 (trim-whitespace (car in) first? (null? (cdr in))))
1199 out)
1200 state #f sig-ws?))
1201 ((pair? (car in))
1202 (case (caar in)
1203 ((set)
1204 (if (null? (cdar in)) (error "@set missing arguments" in))
1205 (if (string? (cadar in))
1206 (let ((i (string-index (cadar in) #\space)))
1207 (if i
1208 (loop (cdr in) out
1209 (acons (substring (cadar in) 0 i)
1210 (cons (substring (cadar in) (1+ i)) (cddar in))
1211 state)
1212 #f sig-ws?)
1213 (loop (cdr in) out (acons (cadar in) (cddar in) state)
1214 #f sig-ws?)))
1215 (error "expected a constant to define for @set" in)))
1216 ((value)
1217 (loop (fold-right cons (cdr in)
1218 (or (and=>
1219 (assoc (cadr (assq 'key (cdadar in))) state) cdr)
1220 (error "unknown value" (cdadar in) state)))
1221 out
1222 state #f sig-ws?))
1223 ((copying)
1224 (loop (cdr in) out (cons (car in) state) #f sig-ws?))
1225 ((insertcopying)
1226 (loop (fold-right cons (cdr in)
1227 (or (cdr (assoc 'copying state))
1228 (error "copying isn't set yet")))
1229 out
1230 state #f sig-ws?))
1231 ((contents)
1232 (loop (cdr in) (fold cons out (make-contents tree)) state #f sig-ws?))
1233 (else
1234 (let*-values (((kid-out state)
1235 (loop (car in) '() state #t
1236 (or sig-ws? (space-significant? (caar in))))))
1237 (loop (cdr in) (cons kid-out out) state #f sig-ws?)))))
1238 (else ; a symbol
1239 (loop (cdr in) (cons (car in) out) state #t sig-ws?))))
1240
1241 (call-with-values
1242 (lambda () (loop tree '() '() #t #f))
1243 (lambda (out state) out)))
1244
1245;; Replace % with texinfo-arguments.
1246(define (stexi->sxml tree)
1247 "Transform the stexi tree @var{tree} into sxml. This involves
1248replacing the @code{%} element that keeps the texinfo arguments with an
1249element for each argument.
1250
1251FIXME: right now it just changes % to @code{texinfo-arguments} -- that
1252doesn't hang with the idea of making a dtd at some point"
1253 (pre-post-order
1254 tree
1255 `((% . ,(lambda (x . t) (cons 'texinfo-arguments t)))
1256 (*text* . ,(lambda (x t) t))
1257 (*default* . ,(lambda (x . t) (cons x t))))))
1258
1259;;; arch-tag: 73890afa-597c-4264-ae70-46fe7756ffb5
1260;;; texinfo.scm ends here