Commit | Line | Data |
---|---|---|
47f3ce52 AW |
1 | ;;;; (sxml simple) -- a simple interface to the SSAX parser |
2 | ;;;; | |
df3f1090 | 3 | ;;;; Copyright (C) 2009, 2010 Free Software Foundation, Inc. |
47f3ce52 AW |
4 | ;;;; Modified 2004 by Andy Wingo <wingo at pobox dot com>. |
5 | ;;;; Originally written by Oleg Kiselyov <oleg at pobox dot com> as SXML-to-HTML.scm. | |
6 | ;;;; | |
7 | ;;;; This library is free software; you can redistribute it and/or | |
8 | ;;;; modify it under the terms of the GNU Lesser General Public | |
9 | ;;;; License as published by the Free Software Foundation; either | |
10 | ;;;; version 3 of the License, or (at your option) any later version. | |
11 | ;;;; | |
12 | ;;;; This library is distributed in the hope that it will be useful, | |
13 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | ;;;; Lesser General Public License for more details. | |
16 | ;;;; | |
17 | ;;;; You should have received a copy of the GNU Lesser General Public | |
18 | ;;;; License along with this library; if not, write to the Free Software | |
19 | ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
20 | ;;;; | |
21 | \f | |
22 | ;;; Commentary: | |
23 | ;; | |
24 | ;;A simple interface to XML parsing and serialization. | |
25 | ;; | |
26 | ;;; Code: | |
27 | ||
28 | (define-module (sxml simple) | |
29 | #:use-module (sxml ssax) | |
30 | #:use-module (sxml transform) | |
31 | #:use-module (ice-9 optargs) | |
32 | #:use-module (srfi srfi-13) | |
d9fff48e | 33 | #:export (xml->sxml sxml->xml sxml->string)) |
47f3ce52 AW |
34 | |
35 | (define* (xml->sxml #:optional (port (current-input-port))) | |
36 | "Use SSAX to parse an XML document into SXML. Takes one optional | |
37 | argument, @var{port}, which defaults to the current input port." | |
38 | (ssax:xml->sxml port '())) | |
39 | ||
d9fff48e AW |
40 | (define check-name |
41 | (let ((*good-cache* (make-hash-table))) | |
42 | (lambda (name) | |
43 | (if (not (hashq-ref *good-cache* name)) | |
44 | (let* ((str (symbol->string name)) | |
45 | (i (string-index str #\:)) | |
46 | (head (or (and i (substring str 0 i)) str)) | |
47 | (tail (and i (substring str (1+ i))))) | |
48 | (and i (string-index (substring str (1+ i)) #\:) | |
49 | (error "Invalid QName: more than one colon" name)) | |
50 | (for-each | |
51 | (lambda (s) | |
52 | (and s | |
53 | (or (char-alphabetic? (string-ref s 0)) | |
54 | (eq? (string-ref s 0) #\_) | |
55 | (error "Invalid name starting character" s name)) | |
56 | (string-for-each | |
57 | (lambda (c) | |
58 | (or (char-alphabetic? c) (string-index "0123456789.-_" c) | |
59 | (error "Invalid name character" c s name))) | |
60 | s))) | |
61 | (list head tail)) | |
62 | (hashq-set! *good-cache* name #t)))))) | |
63 | ||
64 | ;; The following two functions serialize tags and attributes. They are | |
65 | ;; being used in the node handlers for the post-order function, see | |
66 | ;; below. | |
67 | ||
68 | (define (attribute-value->xml value port) | |
69 | (cond | |
70 | ((pair? value) | |
71 | (attribute-value->xml (car value) port) | |
72 | (attribute-value->xml (cdr value) port)) | |
73 | ((string? value) | |
74 | (string->escaped-xml value port)) | |
75 | ((procedure? value) | |
76 | (with-output-to-port port value)) | |
77 | (else | |
78 | (string->escaped-xml | |
79 | (call-with-output-string (lambda (port) (display value port))) | |
80 | port)))) | |
81 | ||
82 | (define (attribute->xml attr value port) | |
83 | (check-name attr) | |
84 | (display attr port) | |
85 | (display "=\"" port) | |
86 | (attribute-value->xml value port) | |
87 | (display #\" port)) | |
88 | ||
89 | (define (element->xml tag attrs body port) | |
90 | (check-name tag) | |
91 | (display #\< port) | |
92 | (display tag port) | |
93 | (if attrs | |
94 | (let lp ((attrs attrs)) | |
95 | (if (pair? attrs) | |
96 | (let ((attr (car attrs))) | |
97 | (display #\space port) | |
98 | (if (pair? attr) | |
99 | (attribute->xml (car attr) (cdr attr) port) | |
100 | (error "bad attribute" tag attr)) | |
101 | (lp (cdr attrs))) | |
102 | (if (not (null? attrs)) | |
103 | (error "bad attributes" tag attrs))))) | |
104 | (if (pair? body) | |
105 | (begin | |
106 | (display #\> port) | |
107 | (let lp ((body body)) | |
108 | (cond | |
109 | ((pair? body) | |
110 | (sxml->xml (car body) port) | |
111 | (lp (cdr body))) | |
112 | ((null? body) | |
113 | (display "</" port) | |
114 | (display tag port) | |
115 | (display ">" port)) | |
116 | (else | |
117 | (error "bad element body" tag body))))) | |
118 | (display " />" port))) | |
119 | ||
120 | ;; FIXME: ensure name is valid | |
121 | (define (entity->xml name port) | |
122 | (display #\& port) | |
123 | (display name port) | |
124 | (display #\; port)) | |
125 | ||
126 | ;; FIXME: ensure tag and str are valid | |
127 | (define (pi->xml tag str port) | |
128 | (display "<?" port) | |
129 | (display tag port) | |
130 | (display #\space port) | |
131 | (display str port) | |
132 | (display "?>" port)) | |
47f3ce52 AW |
133 | |
134 | (define* (sxml->xml tree #:optional (port (current-output-port))) | |
135 | "Serialize the sxml tree @var{tree} as XML. The output will be written | |
136 | to the current output port, unless the optional argument @var{port} is | |
137 | present." | |
d9fff48e AW |
138 | (cond |
139 | ((pair? tree) | |
140 | (if (symbol? (car tree)) | |
141 | ;; An element. | |
142 | (let ((tag (car tree))) | |
143 | (case tag | |
144 | ((*TOP*) | |
145 | (sxml->xml (cdr tree) port)) | |
146 | ((*ENTITY*) | |
147 | (if (and (list? (cdr tree)) (= (length (cdr tree)) 1)) | |
148 | (entity->xml (cadr tree) port) | |
149 | (error "bad *ENTITY* args" (cdr tree)))) | |
150 | ((*PI*) | |
151 | (if (and (list? (cdr tree)) (= (length (cdr tree)) 2)) | |
152 | (pi->xml (cadr tree) (caddr tree) port) | |
153 | (error "bad *PI* args" (cdr tree)))) | |
154 | (else | |
155 | (let* ((elems (cdr tree)) | |
156 | (attrs (and (pair? elems) (pair? (car elems)) | |
157 | (eq? '@ (caar elems)) | |
158 | (cdar elems)))) | |
159 | (element->xml tag attrs (if attrs (cdr elems) elems) port))))) | |
160 | ;; A nodelist. | |
161 | (for-each (lambda (x) (sxml->xml x port)) tree))) | |
162 | ((string? tree) | |
163 | (string->escaped-xml tree port)) | |
164 | ((null? tree) *unspecified*) | |
165 | ((not tree) *unspecified*) | |
166 | ((eqv? tree #t) *unspecified*) | |
167 | ((procedure? tree) | |
168 | (with-output-to-port port tree)) | |
169 | (else | |
170 | (string->escaped-xml | |
171 | (call-with-output-string (lambda (port) (display tree port))) | |
172 | port)))) | |
47f3ce52 AW |
173 | |
174 | (define (sxml->string sxml) | |
175 | "Detag an sxml tree @var{sxml} into a string. Does not perform any | |
176 | formatting." | |
177 | (string-concatenate-reverse | |
178 | (foldts | |
179 | (lambda (seed tree) ; fdown | |
180 | '()) | |
181 | (lambda (seed kid-seed tree) ; fup | |
182 | (append! kid-seed seed)) | |
183 | (lambda (seed tree) ; fhere | |
184 | (if (string? tree) (cons tree seed) seed)) | |
185 | '() | |
186 | sxml))) | |
187 | ||
47f3ce52 | 188 | (define (make-char-quotator char-encoding) |
d9fff48e | 189 | (let ((bad-chars (list->char-set (map car char-encoding)))) |
47f3ce52 AW |
190 | |
191 | ;; Check to see if str contains one of the characters in charset, | |
192 | ;; from the position i onward. If so, return that character's index. | |
193 | ;; otherwise, return #f | |
194 | (define (index-cset str i charset) | |
d9fff48e AW |
195 | (string-index str charset i)) |
196 | ||
47f3ce52 | 197 | ;; The body of the function |
d9fff48e | 198 | (lambda (str port) |
47f3ce52 | 199 | (let ((bad-pos (index-cset str 0 bad-chars))) |
d9fff48e AW |
200 | (if (not bad-pos) |
201 | (display str port) ; str had all good chars | |
202 | (let loop ((from 0) (to bad-pos)) | |
203 | (cond | |
204 | ((>= from (string-length str)) *unspecified*) | |
205 | ((not to) | |
206 | (display (substring str from (string-length str)) port)) | |
207 | (else | |
208 | (let ((quoted-char | |
209 | (cdr (assv (string-ref str to) char-encoding))) | |
210 | (new-to | |
211 | (index-cset str (+ 1 to) bad-chars))) | |
212 | (if (< from to) | |
213 | (display (substring str from to) port)) | |
214 | (display quoted-char port) | |
215 | (loop (1+ to) new-to)))))))))) | |
47f3ce52 AW |
216 | |
217 | ;; Given a string, check to make sure it does not contain characters | |
218 | ;; such as '<' or '&' that require encoding. Return either the original | |
219 | ;; string, or a list of string fragments with special characters | |
220 | ;; replaced by appropriate character entities. | |
221 | ||
222 | (define string->escaped-xml | |
223 | (make-char-quotator | |
224 | '((#\< . "<") (#\> . ">") (#\& . "&") (#\" . """)))) | |
225 | ||
226 | ;;; arch-tag: 9c853b25-d82f-42ef-a959-ae26fdc7d1ac | |
227 | ;;; simple.scm ends here | |
228 |