import statprof, sxml, and texinfo from guile-lib
[bpt/guile.git] / module / sxml / simple.scm
CommitLineData
47f3ce52
AW
1;;;; (sxml simple) -- a simple interface to the SSAX parser
2;;;;
3;;;; Copyright (C) 2009 Free Software Foundation, Inc.
4;;;; Modified 2004 by Andy Wingo <wingo at pobox dot com>.
5;;;; Originally written by Oleg Kiselyov <oleg at pobox dot com> as SXML-to-HTML.scm.
6;;;;
7;;;; This library is free software; you can redistribute it and/or
8;;;; modify it under the terms of the GNU Lesser General Public
9;;;; License as published by the Free Software Foundation; either
10;;;; version 3 of the License, or (at your option) any later version.
11;;;;
12;;;; This library is distributed in the hope that it will be useful,
13;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15;;;; Lesser General Public License for more details.
16;;;;
17;;;; You should have received a copy of the GNU Lesser General Public
18;;;; License along with this library; if not, write to the Free Software
19;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20;;;;
21\f
22;;; Commentary:
23;;
24;;A simple interface to XML parsing and serialization.
25;;
26;;; Code:
27
28(define-module (sxml simple)
29 #:use-module (sxml ssax)
30 #:use-module (sxml transform)
31 #:use-module (ice-9 optargs)
32 #:use-module (srfi srfi-13)
33 #:export (xml->sxml sxml->xml sxml->string universal-sxslt-rules))
34
35(define* (xml->sxml #:optional (port (current-input-port)))
36 "Use SSAX to parse an XML document into SXML. Takes one optional
37argument, @var{port}, which defaults to the current input port."
38 (ssax:xml->sxml port '()))
39
40;; Universal transformation rules. Works for all XML.
41(define universal-sxslt-rules
42 #;
43 "A set of @code{pre-post-order} rules that transform any SXML tree
44into a form suitable for XML serialization by @code{(sxml transform)}'s
45@code{SRV:send-reply}. Used internally by @code{sxml->xml}."
46 `((@
47 ((*default* . ,(lambda (attr-key . value) ((enattr attr-key) value))))
48 . ,(lambda (trigger . value) (list '@ value)))
49 (*ENTITY* . ,(lambda (tag name) (list "&" name ";")))
50 (*PI* . ,(lambda (pi tag str) (list "<?" tag " " str "?>")))
51 ;; Is this right for entities? I don't have a reference for
52 ;; public-id/system-id at the moment...
53 (*default* . ,(lambda (tag . elems) (apply (entag tag) elems)))
54 (*text* . ,(lambda (trigger str)
55 (if (string? str) (string->escaped-xml str) str)))))
56
57(define* (sxml->xml tree #:optional (port (current-output-port)))
58 "Serialize the sxml tree @var{tree} as XML. The output will be written
59to the current output port, unless the optional argument @var{port} is
60present."
61 (with-output-to-port port
62 (lambda ()
63 (SRV:send-reply
64 (post-order
65 tree
66 universal-sxslt-rules)))))
67
68(define (sxml->string sxml)
69 "Detag an sxml tree @var{sxml} into a string. Does not perform any
70formatting."
71 (string-concatenate-reverse
72 (foldts
73 (lambda (seed tree) ; fdown
74 '())
75 (lambda (seed kid-seed tree) ; fup
76 (append! kid-seed seed))
77 (lambda (seed tree) ; fhere
78 (if (string? tree) (cons tree seed) seed))
79 '()
80 sxml)))
81
82;; The following two functions serialize tags and attributes. They are
83;; being used in the node handlers for the post-order function, see
84;; above.
85
86(define (check-name name)
87 (let* ((str (symbol->string name))
88 (i (string-index str #\:))
89 (head (or (and i (substring str 0 i)) str))
90 (tail (and i (substring str (1+ i)))))
91 (and i (string-index (substring str (1+ i)) #\:)
92 (error "Invalid QName: more than one colon" name))
93 (for-each
94 (lambda (s)
95 (and s
96 (or (char-alphabetic? (string-ref s 0))
97 (eq? (string-ref s 0) #\_)
98 (error "Invalid name starting character" s name))
99 (string-for-each
100 (lambda (c)
101 (or (char-alphabetic? c) (string-index "0123456789.-_" c)
102 (error "Invalid name character" c s name)))
103 s)))
104 (list head tail))))
105
106(define (entag tag)
107 (check-name tag)
108 (lambda elems
109 (if (and (pair? elems) (pair? (car elems)) (eq? '@ (caar elems)))
110 (list #\< tag (cdar elems)
111 (if (pair? (cdr elems))
112 (list #\> (cdr elems) "</" tag #\>)
113 " />"))
114 (list #\< tag
115 (if (pair? elems)
116 (list #\> elems "</" tag #\>)
117 " />")))))
118
119(define (enattr attr-key)
120 (check-name attr-key)
121 (let ((attr-str (symbol->string attr-key)))
122 (lambda (value)
123 (list #\space attr-str
124 "=\"" (and (not (null? value)) value) #\"))))
125
126(define (make-char-quotator char-encoding)
127 (let ((bad-chars (map car char-encoding)))
128
129 ;; Check to see if str contains one of the characters in charset,
130 ;; from the position i onward. If so, return that character's index.
131 ;; otherwise, return #f
132 (define (index-cset str i charset)
133 (let loop ((i i))
134 (and (< i (string-length str))
135 (if (memv (string-ref str i) charset) i
136 (loop (+ 1 i))))))
137
138 ;; The body of the function
139 (lambda (str)
140 (let ((bad-pos (index-cset str 0 bad-chars)))
141 (if (not bad-pos) str ; str had all good chars
142 (string-concatenate-reverse
143 (let loop ((from 0) (to bad-pos) (out '()))
144 (cond
145 ((>= from (string-length str)) out)
146 ((not to)
147 (cons (substring str from (string-length str)) out))
148 (else
149 (let ((quoted-char
150 (cdr (assv (string-ref str to) char-encoding)))
151 (new-to
152 (index-cset str (+ 1 to) bad-chars)))
153 (loop (1+ to) new-to
154 (if (< from to)
155 (cons* quoted-char (substring str from to) out)
156 (cons quoted-char out)))))))))))))
157
158;; Given a string, check to make sure it does not contain characters
159;; such as '<' or '&' that require encoding. Return either the original
160;; string, or a list of string fragments with special characters
161;; replaced by appropriate character entities.
162
163(define string->escaped-xml
164 (make-char-quotator
165 '((#\< . "&lt;") (#\> . "&gt;") (#\& . "&amp;") (#\" . "&quot;"))))
166
167;;; arch-tag: 9c853b25-d82f-42ef-a959-ae26fdc7d1ac
168;;; simple.scm ends here
169