| 1 | ;;;; (sxml simple) -- a simple interface to the SSAX parser |
| 2 | ;;;; |
| 3 | ;;;; Copyright (C) 2009, 2010 Free Software Foundation, Inc. |
| 4 | ;;;; Modified 2004 by Andy Wingo <wingo at pobox dot com>. |
| 5 | ;;;; Originally written by Oleg Kiselyov <oleg at pobox dot com> as SXML-to-HTML.scm. |
| 6 | ;;;; |
| 7 | ;;;; This library is free software; you can redistribute it and/or |
| 8 | ;;;; modify it under the terms of the GNU Lesser General Public |
| 9 | ;;;; License as published by the Free Software Foundation; either |
| 10 | ;;;; version 3 of the License, or (at your option) any later version. |
| 11 | ;;;; |
| 12 | ;;;; This library is distributed in the hope that it will be useful, |
| 13 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 15 | ;;;; Lesser General Public License for more details. |
| 16 | ;;;; |
| 17 | ;;;; You should have received a copy of the GNU Lesser General Public |
| 18 | ;;;; License along with this library; if not, write to the Free Software |
| 19 | ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 20 | ;;;; |
| 21 | \f |
| 22 | ;;; Commentary: |
| 23 | ;; |
| 24 | ;;A simple interface to XML parsing and serialization. |
| 25 | ;; |
| 26 | ;;; Code: |
| 27 | |
| 28 | (define-module (sxml simple) |
| 29 | #:use-module (sxml ssax) |
| 30 | #:use-module (sxml transform) |
| 31 | #:use-module (ice-9 optargs) |
| 32 | #:use-module (srfi srfi-13) |
| 33 | #:export (xml->sxml sxml->xml sxml->string)) |
| 34 | |
| 35 | (define* (xml->sxml #:optional (port (current-input-port))) |
| 36 | "Use SSAX to parse an XML document into SXML. Takes one optional |
| 37 | argument, @var{port}, which defaults to the current input port." |
| 38 | (ssax:xml->sxml port '())) |
| 39 | |
| 40 | (define check-name |
| 41 | (let ((*good-cache* (make-hash-table))) |
| 42 | (lambda (name) |
| 43 | (if (not (hashq-ref *good-cache* name)) |
| 44 | (let* ((str (symbol->string name)) |
| 45 | (i (string-index str #\:)) |
| 46 | (head (or (and i (substring str 0 i)) str)) |
| 47 | (tail (and i (substring str (1+ i))))) |
| 48 | (and i (string-index (substring str (1+ i)) #\:) |
| 49 | (error "Invalid QName: more than one colon" name)) |
| 50 | (for-each |
| 51 | (lambda (s) |
| 52 | (and s |
| 53 | (or (char-alphabetic? (string-ref s 0)) |
| 54 | (eq? (string-ref s 0) #\_) |
| 55 | (error "Invalid name starting character" s name)) |
| 56 | (string-for-each |
| 57 | (lambda (c) |
| 58 | (or (char-alphabetic? c) (string-index "0123456789.-_" c) |
| 59 | (error "Invalid name character" c s name))) |
| 60 | s))) |
| 61 | (list head tail)) |
| 62 | (hashq-set! *good-cache* name #t)))))) |
| 63 | |
| 64 | ;; The following two functions serialize tags and attributes. They are |
| 65 | ;; being used in the node handlers for the post-order function, see |
| 66 | ;; below. |
| 67 | |
| 68 | (define (attribute-value->xml value port) |
| 69 | (cond |
| 70 | ((pair? value) |
| 71 | (attribute-value->xml (car value) port) |
| 72 | (attribute-value->xml (cdr value) port)) |
| 73 | ((string? value) |
| 74 | (string->escaped-xml value port)) |
| 75 | ((procedure? value) |
| 76 | (with-output-to-port port value)) |
| 77 | (else |
| 78 | (string->escaped-xml |
| 79 | (call-with-output-string (lambda (port) (display value port))) |
| 80 | port)))) |
| 81 | |
| 82 | (define (attribute->xml attr value port) |
| 83 | (check-name attr) |
| 84 | (display attr port) |
| 85 | (display "=\"" port) |
| 86 | (attribute-value->xml value port) |
| 87 | (display #\" port)) |
| 88 | |
| 89 | (define (element->xml tag attrs body port) |
| 90 | (check-name tag) |
| 91 | (display #\< port) |
| 92 | (display tag port) |
| 93 | (if attrs |
| 94 | (let lp ((attrs attrs)) |
| 95 | (if (pair? attrs) |
| 96 | (let ((attr (car attrs))) |
| 97 | (display #\space port) |
| 98 | (if (pair? attr) |
| 99 | (attribute->xml (car attr) (cdr attr) port) |
| 100 | (error "bad attribute" tag attr)) |
| 101 | (lp (cdr attrs))) |
| 102 | (if (not (null? attrs)) |
| 103 | (error "bad attributes" tag attrs))))) |
| 104 | (if (pair? body) |
| 105 | (begin |
| 106 | (display #\> port) |
| 107 | (let lp ((body body)) |
| 108 | (cond |
| 109 | ((pair? body) |
| 110 | (sxml->xml (car body) port) |
| 111 | (lp (cdr body))) |
| 112 | ((null? body) |
| 113 | (display "</" port) |
| 114 | (display tag port) |
| 115 | (display ">" port)) |
| 116 | (else |
| 117 | (error "bad element body" tag body))))) |
| 118 | (display " />" port))) |
| 119 | |
| 120 | ;; FIXME: ensure name is valid |
| 121 | (define (entity->xml name port) |
| 122 | (display #\& port) |
| 123 | (display name port) |
| 124 | (display #\; port)) |
| 125 | |
| 126 | ;; FIXME: ensure tag and str are valid |
| 127 | (define (pi->xml tag str port) |
| 128 | (display "<?" port) |
| 129 | (display tag port) |
| 130 | (display #\space port) |
| 131 | (display str port) |
| 132 | (display "?>" port)) |
| 133 | |
| 134 | (define* (sxml->xml tree #:optional (port (current-output-port))) |
| 135 | "Serialize the sxml tree @var{tree} as XML. The output will be written |
| 136 | to the current output port, unless the optional argument @var{port} is |
| 137 | present." |
| 138 | (cond |
| 139 | ((pair? tree) |
| 140 | (if (symbol? (car tree)) |
| 141 | ;; An element. |
| 142 | (let ((tag (car tree))) |
| 143 | (case tag |
| 144 | ((*TOP*) |
| 145 | (sxml->xml (cdr tree) port)) |
| 146 | ((*ENTITY*) |
| 147 | (if (and (list? (cdr tree)) (= (length (cdr tree)) 1)) |
| 148 | (entity->xml (cadr tree) port) |
| 149 | (error "bad *ENTITY* args" (cdr tree)))) |
| 150 | ((*PI*) |
| 151 | (if (and (list? (cdr tree)) (= (length (cdr tree)) 2)) |
| 152 | (pi->xml (cadr tree) (caddr tree) port) |
| 153 | (error "bad *PI* args" (cdr tree)))) |
| 154 | (else |
| 155 | (let* ((elems (cdr tree)) |
| 156 | (attrs (and (pair? elems) (pair? (car elems)) |
| 157 | (eq? '@ (caar elems)) |
| 158 | (cdar elems)))) |
| 159 | (element->xml tag attrs (if attrs (cdr elems) elems) port))))) |
| 160 | ;; A nodelist. |
| 161 | (for-each (lambda (x) (sxml->xml x port)) tree))) |
| 162 | ((string? tree) |
| 163 | (string->escaped-xml tree port)) |
| 164 | ((null? tree) *unspecified*) |
| 165 | ((not tree) *unspecified*) |
| 166 | ((eqv? tree #t) *unspecified*) |
| 167 | ((procedure? tree) |
| 168 | (with-output-to-port port tree)) |
| 169 | (else |
| 170 | (string->escaped-xml |
| 171 | (call-with-output-string (lambda (port) (display tree port))) |
| 172 | port)))) |
| 173 | |
| 174 | (define (sxml->string sxml) |
| 175 | "Detag an sxml tree @var{sxml} into a string. Does not perform any |
| 176 | formatting." |
| 177 | (string-concatenate-reverse |
| 178 | (foldts |
| 179 | (lambda (seed tree) ; fdown |
| 180 | '()) |
| 181 | (lambda (seed kid-seed tree) ; fup |
| 182 | (append! kid-seed seed)) |
| 183 | (lambda (seed tree) ; fhere |
| 184 | (if (string? tree) (cons tree seed) seed)) |
| 185 | '() |
| 186 | sxml))) |
| 187 | |
| 188 | (define (make-char-quotator char-encoding) |
| 189 | (let ((bad-chars (list->char-set (map car char-encoding)))) |
| 190 | |
| 191 | ;; Check to see if str contains one of the characters in charset, |
| 192 | ;; from the position i onward. If so, return that character's index. |
| 193 | ;; otherwise, return #f |
| 194 | (define (index-cset str i charset) |
| 195 | (string-index str charset i)) |
| 196 | |
| 197 | ;; The body of the function |
| 198 | (lambda (str port) |
| 199 | (let ((bad-pos (index-cset str 0 bad-chars))) |
| 200 | (if (not bad-pos) |
| 201 | (display str port) ; str had all good chars |
| 202 | (let loop ((from 0) (to bad-pos)) |
| 203 | (cond |
| 204 | ((>= from (string-length str)) *unspecified*) |
| 205 | ((not to) |
| 206 | (display (substring str from (string-length str)) port)) |
| 207 | (else |
| 208 | (let ((quoted-char |
| 209 | (cdr (assv (string-ref str to) char-encoding))) |
| 210 | (new-to |
| 211 | (index-cset str (+ 1 to) bad-chars))) |
| 212 | (if (< from to) |
| 213 | (display (substring str from to) port)) |
| 214 | (display quoted-char port) |
| 215 | (loop (1+ to) new-to)))))))))) |
| 216 | |
| 217 | ;; Given a string, check to make sure it does not contain characters |
| 218 | ;; such as '<' or '&' that require encoding. Return either the original |
| 219 | ;; string, or a list of string fragments with special characters |
| 220 | ;; replaced by appropriate character entities. |
| 221 | |
| 222 | (define string->escaped-xml |
| 223 | (make-char-quotator |
| 224 | '((#\< . "<") (#\> . ">") (#\& . "&") (#\" . """)))) |
| 225 | |
| 226 | ;;; arch-tag: 9c853b25-d82f-42ef-a959-ae26fdc7d1ac |
| 227 | ;;; simple.scm ends here |
| 228 | |