Commit | Line | Data |
---|---|---|
47f3ce52 AW |
1 | ;;;; (sxml simple) -- a simple interface to the SSAX parser |
2 | ;;;; | |
3 | ;;;; Copyright (C) 2009 Free Software Foundation, Inc. | |
4 | ;;;; Modified 2004 by Andy Wingo <wingo at pobox dot com>. | |
5 | ;;;; Originally written by Oleg Kiselyov <oleg at pobox dot com> as SXML-to-HTML.scm. | |
6 | ;;;; | |
7 | ;;;; This library is free software; you can redistribute it and/or | |
8 | ;;;; modify it under the terms of the GNU Lesser General Public | |
9 | ;;;; License as published by the Free Software Foundation; either | |
10 | ;;;; version 3 of the License, or (at your option) any later version. | |
11 | ;;;; | |
12 | ;;;; This library is distributed in the hope that it will be useful, | |
13 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | ;;;; Lesser General Public License for more details. | |
16 | ;;;; | |
17 | ;;;; You should have received a copy of the GNU Lesser General Public | |
18 | ;;;; License along with this library; if not, write to the Free Software | |
19 | ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
20 | ;;;; | |
21 | \f | |
22 | ;;; Commentary: | |
23 | ;; | |
24 | ;;A simple interface to XML parsing and serialization. | |
25 | ;; | |
26 | ;;; Code: | |
27 | ||
28 | (define-module (sxml simple) | |
29 | #:use-module (sxml ssax) | |
30 | #:use-module (sxml transform) | |
31 | #:use-module (ice-9 optargs) | |
32 | #:use-module (srfi srfi-13) | |
33 | #:export (xml->sxml sxml->xml sxml->string universal-sxslt-rules)) | |
34 | ||
35 | (define* (xml->sxml #:optional (port (current-input-port))) | |
36 | "Use SSAX to parse an XML document into SXML. Takes one optional | |
37 | argument, @var{port}, which defaults to the current input port." | |
38 | (ssax:xml->sxml port '())) | |
39 | ||
40 | ;; Universal transformation rules. Works for all XML. | |
41 | (define universal-sxslt-rules | |
42 | #; | |
43 | "A set of @code{pre-post-order} rules that transform any SXML tree | |
44 | into a form suitable for XML serialization by @code{(sxml transform)}'s | |
45 | @code{SRV:send-reply}. Used internally by @code{sxml->xml}." | |
46 | `((@ | |
47 | ((*default* . ,(lambda (attr-key . value) ((enattr attr-key) value)))) | |
48 | . ,(lambda (trigger . value) (list '@ value))) | |
49 | (*ENTITY* . ,(lambda (tag name) (list "&" name ";"))) | |
50 | (*PI* . ,(lambda (pi tag str) (list "<?" tag " " str "?>"))) | |
51 | ;; Is this right for entities? I don't have a reference for | |
52 | ;; public-id/system-id at the moment... | |
53 | (*default* . ,(lambda (tag . elems) (apply (entag tag) elems))) | |
54 | (*text* . ,(lambda (trigger str) | |
55 | (if (string? str) (string->escaped-xml str) str))))) | |
56 | ||
57 | (define* (sxml->xml tree #:optional (port (current-output-port))) | |
58 | "Serialize the sxml tree @var{tree} as XML. The output will be written | |
59 | to the current output port, unless the optional argument @var{port} is | |
60 | present." | |
61 | (with-output-to-port port | |
62 | (lambda () | |
63 | (SRV:send-reply | |
64 | (post-order | |
65 | tree | |
66 | universal-sxslt-rules))))) | |
67 | ||
68 | (define (sxml->string sxml) | |
69 | "Detag an sxml tree @var{sxml} into a string. Does not perform any | |
70 | formatting." | |
71 | (string-concatenate-reverse | |
72 | (foldts | |
73 | (lambda (seed tree) ; fdown | |
74 | '()) | |
75 | (lambda (seed kid-seed tree) ; fup | |
76 | (append! kid-seed seed)) | |
77 | (lambda (seed tree) ; fhere | |
78 | (if (string? tree) (cons tree seed) seed)) | |
79 | '() | |
80 | sxml))) | |
81 | ||
82 | ;; The following two functions serialize tags and attributes. They are | |
83 | ;; being used in the node handlers for the post-order function, see | |
84 | ;; above. | |
85 | ||
86 | (define (check-name name) | |
87 | (let* ((str (symbol->string name)) | |
88 | (i (string-index str #\:)) | |
89 | (head (or (and i (substring str 0 i)) str)) | |
90 | (tail (and i (substring str (1+ i))))) | |
91 | (and i (string-index (substring str (1+ i)) #\:) | |
92 | (error "Invalid QName: more than one colon" name)) | |
93 | (for-each | |
94 | (lambda (s) | |
95 | (and s | |
96 | (or (char-alphabetic? (string-ref s 0)) | |
97 | (eq? (string-ref s 0) #\_) | |
98 | (error "Invalid name starting character" s name)) | |
99 | (string-for-each | |
100 | (lambda (c) | |
101 | (or (char-alphabetic? c) (string-index "0123456789.-_" c) | |
102 | (error "Invalid name character" c s name))) | |
103 | s))) | |
104 | (list head tail)))) | |
105 | ||
106 | (define (entag tag) | |
107 | (check-name tag) | |
108 | (lambda elems | |
109 | (if (and (pair? elems) (pair? (car elems)) (eq? '@ (caar elems))) | |
110 | (list #\< tag (cdar elems) | |
111 | (if (pair? (cdr elems)) | |
112 | (list #\> (cdr elems) "</" tag #\>) | |
113 | " />")) | |
114 | (list #\< tag | |
115 | (if (pair? elems) | |
116 | (list #\> elems "</" tag #\>) | |
117 | " />"))))) | |
118 | ||
119 | (define (enattr attr-key) | |
120 | (check-name attr-key) | |
121 | (let ((attr-str (symbol->string attr-key))) | |
122 | (lambda (value) | |
123 | (list #\space attr-str | |
124 | "=\"" (and (not (null? value)) value) #\")))) | |
125 | ||
126 | (define (make-char-quotator char-encoding) | |
127 | (let ((bad-chars (map car char-encoding))) | |
128 | ||
129 | ;; Check to see if str contains one of the characters in charset, | |
130 | ;; from the position i onward. If so, return that character's index. | |
131 | ;; otherwise, return #f | |
132 | (define (index-cset str i charset) | |
133 | (let loop ((i i)) | |
134 | (and (< i (string-length str)) | |
135 | (if (memv (string-ref str i) charset) i | |
136 | (loop (+ 1 i)))))) | |
137 | ||
138 | ;; The body of the function | |
139 | (lambda (str) | |
140 | (let ((bad-pos (index-cset str 0 bad-chars))) | |
141 | (if (not bad-pos) str ; str had all good chars | |
142 | (string-concatenate-reverse | |
143 | (let loop ((from 0) (to bad-pos) (out '())) | |
144 | (cond | |
145 | ((>= from (string-length str)) out) | |
146 | ((not to) | |
147 | (cons (substring str from (string-length str)) out)) | |
148 | (else | |
149 | (let ((quoted-char | |
150 | (cdr (assv (string-ref str to) char-encoding))) | |
151 | (new-to | |
152 | (index-cset str (+ 1 to) bad-chars))) | |
153 | (loop (1+ to) new-to | |
154 | (if (< from to) | |
155 | (cons* quoted-char (substring str from to) out) | |
156 | (cons quoted-char out))))))))))))) | |
157 | ||
158 | ;; Given a string, check to make sure it does not contain characters | |
159 | ;; such as '<' or '&' that require encoding. Return either the original | |
160 | ;; string, or a list of string fragments with special characters | |
161 | ;; replaced by appropriate character entities. | |
162 | ||
163 | (define string->escaped-xml | |
164 | (make-char-quotator | |
165 | '((#\< . "<") (#\> . ">") (#\& . "&") (#\" . """)))) | |
166 | ||
167 | ;;; arch-tag: 9c853b25-d82f-42ef-a959-ae26fdc7d1ac | |
168 | ;;; simple.scm ends here | |
169 |