#:use-module (sxml transform)
#:use-module (ice-9 optargs)
#:use-module (srfi srfi-13)
- #:export (xml->sxml sxml->xml sxml->string universal-sxslt-rules))
+ #:export (xml->sxml sxml->xml sxml->string))
(define* (xml->sxml #:optional (port (current-input-port)))
"Use SSAX to parse an XML document into SXML. Takes one optional
argument, @var{port}, which defaults to the current input port."
(ssax:xml->sxml port '()))
-;; Universal transformation rules. Works for all XML.
-(define universal-sxslt-rules
- #;
- "A set of @code{pre-post-order} rules that transform any SXML tree
-into a form suitable for XML serialization by @code{(sxml transform)}'s
-@code{SRV:send-reply}. Used internally by @code{sxml->xml}."
- `((@
- ((*default* . ,(lambda (attr-key . value) ((enattr attr-key) value))))
- . ,(lambda (trigger . value) (list '@ value)))
- (*TOP* . ,(lambda (tag . xml) xml))
- (*ENTITY* . ,(lambda (tag name) (list "&" name ";")))
- (*PI* . ,(lambda (pi tag str) (list "<?" tag " " str "?>")))
- ;; Is this right for entities? I don't have a reference for
- ;; public-id/system-id at the moment...
- (*default* . ,(lambda (tag . elems) (apply (entag tag) elems)))
- (*text* . ,(lambda (trigger str)
- (if (string? str) (string->escaped-xml str) str)))))
+(define check-name
+ (let ((*good-cache* (make-hash-table)))
+ (lambda (name)
+ (if (not (hashq-ref *good-cache* name))
+ (let* ((str (symbol->string name))
+ (i (string-index str #\:))
+ (head (or (and i (substring str 0 i)) str))
+ (tail (and i (substring str (1+ i)))))
+ (and i (string-index (substring str (1+ i)) #\:)
+ (error "Invalid QName: more than one colon" name))
+ (for-each
+ (lambda (s)
+ (and s
+ (or (char-alphabetic? (string-ref s 0))
+ (eq? (string-ref s 0) #\_)
+ (error "Invalid name starting character" s name))
+ (string-for-each
+ (lambda (c)
+ (or (char-alphabetic? c) (string-index "0123456789.-_" c)
+ (error "Invalid name character" c s name)))
+ s)))
+ (list head tail))
+ (hashq-set! *good-cache* name #t))))))
+
+;; The following two functions serialize tags and attributes. They are
+;; being used in the node handlers for the post-order function, see
+;; below.
+
+(define (attribute-value->xml value port)
+ (cond
+ ((pair? value)
+ (attribute-value->xml (car value) port)
+ (attribute-value->xml (cdr value) port))
+ ((string? value)
+ (string->escaped-xml value port))
+ ((procedure? value)
+ (with-output-to-port port value))
+ (else
+ (string->escaped-xml
+ (call-with-output-string (lambda (port) (display value port)))
+ port))))
+
+(define (attribute->xml attr value port)
+ (check-name attr)
+ (display attr port)
+ (display "=\"" port)
+ (attribute-value->xml value port)
+ (display #\" port))
+
+(define (element->xml tag attrs body port)
+ (check-name tag)
+ (display #\< port)
+ (display tag port)
+ (if attrs
+ (let lp ((attrs attrs))
+ (if (pair? attrs)
+ (let ((attr (car attrs)))
+ (display #\space port)
+ (if (pair? attr)
+ (attribute->xml (car attr) (cdr attr) port)
+ (error "bad attribute" tag attr))
+ (lp (cdr attrs)))
+ (if (not (null? attrs))
+ (error "bad attributes" tag attrs)))))
+ (if (pair? body)
+ (begin
+ (display #\> port)
+ (let lp ((body body))
+ (cond
+ ((pair? body)
+ (sxml->xml (car body) port)
+ (lp (cdr body)))
+ ((null? body)
+ (display "</" port)
+ (display tag port)
+ (display ">" port))
+ (else
+ (error "bad element body" tag body)))))
+ (display " />" port)))
+
+;; FIXME: ensure name is valid
+(define (entity->xml name port)
+ (display #\& port)
+ (display name port)
+ (display #\; port))
+
+;; FIXME: ensure tag and str are valid
+(define (pi->xml tag str port)
+ (display "<?" port)
+ (display tag port)
+ (display #\space port)
+ (display str port)
+ (display "?>" port))
(define* (sxml->xml tree #:optional (port (current-output-port)))
"Serialize the sxml tree @var{tree} as XML. The output will be written
to the current output port, unless the optional argument @var{port} is
present."
- (with-output-to-port port
- (lambda ()
- (SRV:send-reply
- (post-order
- tree
- universal-sxslt-rules)))))
+ (cond
+ ((pair? tree)
+ (if (symbol? (car tree))
+ ;; An element.
+ (let ((tag (car tree)))
+ (case tag
+ ((*TOP*)
+ (sxml->xml (cdr tree) port))
+ ((*ENTITY*)
+ (if (and (list? (cdr tree)) (= (length (cdr tree)) 1))
+ (entity->xml (cadr tree) port)
+ (error "bad *ENTITY* args" (cdr tree))))
+ ((*PI*)
+ (if (and (list? (cdr tree)) (= (length (cdr tree)) 2))
+ (pi->xml (cadr tree) (caddr tree) port)
+ (error "bad *PI* args" (cdr tree))))
+ (else
+ (let* ((elems (cdr tree))
+ (attrs (and (pair? elems) (pair? (car elems))
+ (eq? '@ (caar elems))
+ (cdar elems))))
+ (element->xml tag attrs (if attrs (cdr elems) elems) port)))))
+ ;; A nodelist.
+ (for-each (lambda (x) (sxml->xml x port)) tree)))
+ ((string? tree)
+ (string->escaped-xml tree port))
+ ((null? tree) *unspecified*)
+ ((not tree) *unspecified*)
+ ((eqv? tree #t) *unspecified*)
+ ((procedure? tree)
+ (with-output-to-port port tree))
+ (else
+ (string->escaped-xml
+ (call-with-output-string (lambda (port) (display tree port)))
+ port))))
(define (sxml->string sxml)
"Detag an sxml tree @var{sxml} into a string. Does not perform any
'()
sxml)))
-;; The following two functions serialize tags and attributes. They are
-;; being used in the node handlers for the post-order function, see
-;; above.
-
-(define (check-name name)
- (let* ((str (symbol->string name))
- (i (string-index str #\:))
- (head (or (and i (substring str 0 i)) str))
- (tail (and i (substring str (1+ i)))))
- (and i (string-index (substring str (1+ i)) #\:)
- (error "Invalid QName: more than one colon" name))
- (for-each
- (lambda (s)
- (and s
- (or (char-alphabetic? (string-ref s 0))
- (eq? (string-ref s 0) #\_)
- (error "Invalid name starting character" s name))
- (string-for-each
- (lambda (c)
- (or (char-alphabetic? c) (string-index "0123456789.-_" c)
- (error "Invalid name character" c s name)))
- s)))
- (list head tail))))
-
-(define (entag tag)
- (check-name tag)
- (lambda elems
- (if (and (pair? elems) (pair? (car elems)) (eq? '@ (caar elems)))
- (list #\< tag (cdar elems)
- (if (pair? (cdr elems))
- (list #\> (cdr elems) "</" tag #\>)
- " />"))
- (list #\< tag
- (if (pair? elems)
- (list #\> elems "</" tag #\>)
- " />")))))
-
-(define (enattr attr-key)
- (check-name attr-key)
- (let ((attr-str (symbol->string attr-key)))
- (lambda (value)
- (list #\space attr-str
- "=\"" (and (not (null? value)) value) #\"))))
-
(define (make-char-quotator char-encoding)
- (let ((bad-chars (map car char-encoding)))
+ (let ((bad-chars (list->char-set (map car char-encoding))))
;; Check to see if str contains one of the characters in charset,
;; from the position i onward. If so, return that character's index.
;; otherwise, return #f
(define (index-cset str i charset)
- (let loop ((i i))
- (and (< i (string-length str))
- (if (memv (string-ref str i) charset) i
- (loop (+ 1 i))))))
-
+ (string-index str charset i))
+
;; The body of the function
- (lambda (str)
+ (lambda (str port)
(let ((bad-pos (index-cset str 0 bad-chars)))
- (if (not bad-pos) str ; str had all good chars
- (string-concatenate-reverse
- (let loop ((from 0) (to bad-pos) (out '()))
- (cond
- ((>= from (string-length str)) out)
- ((not to)
- (cons (substring str from (string-length str)) out))
- (else
- (let ((quoted-char
- (cdr (assv (string-ref str to) char-encoding)))
- (new-to
- (index-cset str (+ 1 to) bad-chars)))
- (loop (1+ to) new-to
- (if (< from to)
- (cons* quoted-char (substring str from to) out)
- (cons quoted-char out)))))))))))))
+ (if (not bad-pos)
+ (display str port) ; str had all good chars
+ (let loop ((from 0) (to bad-pos))
+ (cond
+ ((>= from (string-length str)) *unspecified*)
+ ((not to)
+ (display (substring str from (string-length str)) port))
+ (else
+ (let ((quoted-char
+ (cdr (assv (string-ref str to) char-encoding)))
+ (new-to
+ (index-cset str (+ 1 to) bad-chars)))
+ (if (< from to)
+ (display (substring str from to) port))
+ (display quoted-char port)
+ (loop (1+ to) new-to))))))))))
;; Given a string, check to make sure it does not contain characters
;; such as '<' or '&' that require encoding. Return either the original