sxml->xml writes directly to a port
[bpt/guile.git] / module / sxml / simple.scm
CommitLineData
47f3ce52
AW
1;;;; (sxml simple) -- a simple interface to the SSAX parser
2;;;;
df3f1090 3;;;; Copyright (C) 2009, 2010 Free Software Foundation, Inc.
47f3ce52
AW
4;;;; Modified 2004 by Andy Wingo <wingo at pobox dot com>.
5;;;; Originally written by Oleg Kiselyov <oleg at pobox dot com> as SXML-to-HTML.scm.
6;;;;
7;;;; This library is free software; you can redistribute it and/or
8;;;; modify it under the terms of the GNU Lesser General Public
9;;;; License as published by the Free Software Foundation; either
10;;;; version 3 of the License, or (at your option) any later version.
11;;;;
12;;;; This library is distributed in the hope that it will be useful,
13;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15;;;; Lesser General Public License for more details.
16;;;;
17;;;; You should have received a copy of the GNU Lesser General Public
18;;;; License along with this library; if not, write to the Free Software
19;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20;;;;
21\f
22;;; Commentary:
23;;
24;;A simple interface to XML parsing and serialization.
25;;
26;;; Code:
27
28(define-module (sxml simple)
29 #:use-module (sxml ssax)
30 #:use-module (sxml transform)
31 #:use-module (ice-9 optargs)
32 #:use-module (srfi srfi-13)
d9fff48e 33 #:export (xml->sxml sxml->xml sxml->string))
47f3ce52
AW
34
35(define* (xml->sxml #:optional (port (current-input-port)))
36 "Use SSAX to parse an XML document into SXML. Takes one optional
37argument, @var{port}, which defaults to the current input port."
38 (ssax:xml->sxml port '()))
39
d9fff48e
AW
40(define check-name
41 (let ((*good-cache* (make-hash-table)))
42 (lambda (name)
43 (if (not (hashq-ref *good-cache* name))
44 (let* ((str (symbol->string name))
45 (i (string-index str #\:))
46 (head (or (and i (substring str 0 i)) str))
47 (tail (and i (substring str (1+ i)))))
48 (and i (string-index (substring str (1+ i)) #\:)
49 (error "Invalid QName: more than one colon" name))
50 (for-each
51 (lambda (s)
52 (and s
53 (or (char-alphabetic? (string-ref s 0))
54 (eq? (string-ref s 0) #\_)
55 (error "Invalid name starting character" s name))
56 (string-for-each
57 (lambda (c)
58 (or (char-alphabetic? c) (string-index "0123456789.-_" c)
59 (error "Invalid name character" c s name)))
60 s)))
61 (list head tail))
62 (hashq-set! *good-cache* name #t))))))
63
64;; The following two functions serialize tags and attributes. They are
65;; being used in the node handlers for the post-order function, see
66;; below.
67
68(define (attribute-value->xml value port)
69 (cond
70 ((pair? value)
71 (attribute-value->xml (car value) port)
72 (attribute-value->xml (cdr value) port))
73 ((string? value)
74 (string->escaped-xml value port))
75 ((procedure? value)
76 (with-output-to-port port value))
77 (else
78 (string->escaped-xml
79 (call-with-output-string (lambda (port) (display value port)))
80 port))))
81
82(define (attribute->xml attr value port)
83 (check-name attr)
84 (display attr port)
85 (display "=\"" port)
86 (attribute-value->xml value port)
87 (display #\" port))
88
89(define (element->xml tag attrs body port)
90 (check-name tag)
91 (display #\< port)
92 (display tag port)
93 (if attrs
94 (let lp ((attrs attrs))
95 (if (pair? attrs)
96 (let ((attr (car attrs)))
97 (display #\space port)
98 (if (pair? attr)
99 (attribute->xml (car attr) (cdr attr) port)
100 (error "bad attribute" tag attr))
101 (lp (cdr attrs)))
102 (if (not (null? attrs))
103 (error "bad attributes" tag attrs)))))
104 (if (pair? body)
105 (begin
106 (display #\> port)
107 (let lp ((body body))
108 (cond
109 ((pair? body)
110 (sxml->xml (car body) port)
111 (lp (cdr body)))
112 ((null? body)
113 (display "</" port)
114 (display tag port)
115 (display ">" port))
116 (else
117 (error "bad element body" tag body)))))
118 (display " />" port)))
119
120;; FIXME: ensure name is valid
121(define (entity->xml name port)
122 (display #\& port)
123 (display name port)
124 (display #\; port))
125
126;; FIXME: ensure tag and str are valid
127(define (pi->xml tag str port)
128 (display "<?" port)
129 (display tag port)
130 (display #\space port)
131 (display str port)
132 (display "?>" port))
47f3ce52
AW
133
134(define* (sxml->xml tree #:optional (port (current-output-port)))
135 "Serialize the sxml tree @var{tree} as XML. The output will be written
136to the current output port, unless the optional argument @var{port} is
137present."
d9fff48e
AW
138 (cond
139 ((pair? tree)
140 (if (symbol? (car tree))
141 ;; An element.
142 (let ((tag (car tree)))
143 (case tag
144 ((*TOP*)
145 (sxml->xml (cdr tree) port))
146 ((*ENTITY*)
147 (if (and (list? (cdr tree)) (= (length (cdr tree)) 1))
148 (entity->xml (cadr tree) port)
149 (error "bad *ENTITY* args" (cdr tree))))
150 ((*PI*)
151 (if (and (list? (cdr tree)) (= (length (cdr tree)) 2))
152 (pi->xml (cadr tree) (caddr tree) port)
153 (error "bad *PI* args" (cdr tree))))
154 (else
155 (let* ((elems (cdr tree))
156 (attrs (and (pair? elems) (pair? (car elems))
157 (eq? '@ (caar elems))
158 (cdar elems))))
159 (element->xml tag attrs (if attrs (cdr elems) elems) port)))))
160 ;; A nodelist.
161 (for-each (lambda (x) (sxml->xml x port)) tree)))
162 ((string? tree)
163 (string->escaped-xml tree port))
164 ((null? tree) *unspecified*)
165 ((not tree) *unspecified*)
166 ((eqv? tree #t) *unspecified*)
167 ((procedure? tree)
168 (with-output-to-port port tree))
169 (else
170 (string->escaped-xml
171 (call-with-output-string (lambda (port) (display tree port)))
172 port))))
47f3ce52
AW
173
174(define (sxml->string sxml)
175 "Detag an sxml tree @var{sxml} into a string. Does not perform any
176formatting."
177 (string-concatenate-reverse
178 (foldts
179 (lambda (seed tree) ; fdown
180 '())
181 (lambda (seed kid-seed tree) ; fup
182 (append! kid-seed seed))
183 (lambda (seed tree) ; fhere
184 (if (string? tree) (cons tree seed) seed))
185 '()
186 sxml)))
187
47f3ce52 188(define (make-char-quotator char-encoding)
d9fff48e 189 (let ((bad-chars (list->char-set (map car char-encoding))))
47f3ce52
AW
190
191 ;; Check to see if str contains one of the characters in charset,
192 ;; from the position i onward. If so, return that character's index.
193 ;; otherwise, return #f
194 (define (index-cset str i charset)
d9fff48e
AW
195 (string-index str charset i))
196
47f3ce52 197 ;; The body of the function
d9fff48e 198 (lambda (str port)
47f3ce52 199 (let ((bad-pos (index-cset str 0 bad-chars)))
d9fff48e
AW
200 (if (not bad-pos)
201 (display str port) ; str had all good chars
202 (let loop ((from 0) (to bad-pos))
203 (cond
204 ((>= from (string-length str)) *unspecified*)
205 ((not to)
206 (display (substring str from (string-length str)) port))
207 (else
208 (let ((quoted-char
209 (cdr (assv (string-ref str to) char-encoding)))
210 (new-to
211 (index-cset str (+ 1 to) bad-chars)))
212 (if (< from to)
213 (display (substring str from to) port))
214 (display quoted-char port)
215 (loop (1+ to) new-to))))))))))
47f3ce52
AW
216
217;; Given a string, check to make sure it does not contain characters
218;; such as '<' or '&' that require encoding. Return either the original
219;; string, or a list of string fragments with special characters
220;; replaced by appropriate character entities.
221
222(define string->escaped-xml
223 (make-char-quotator
224 '((#\< . "&lt;") (#\> . "&gt;") (#\& . "&amp;") (#\" . "&quot;"))))
225
226;;; arch-tag: 9c853b25-d82f-42ef-a959-ae26fdc7d1ac
227;;; simple.scm ends here
228