1 (* MLton
20010629 (built Fri Jun
29 11:14:21 2001 on starlinux
) *)
2 (* created this file on Fri Jun
29 13:02:19 2001. *)
3 (* Do not edit this file
. *)
6 (* chunk
: chunk per function
*)
8 (* defines
: [NODEBUG
,MLton_safe
=TRUE
,MLton_detectOverflow
=TRUE
] *)
9 (* detect overflow
: true *)
10 (* fixed heap
: None
*)
12 (* includes
: [mlton
.h
] *)
13 (* inline
: NonRecursive
{product
= 320,small
= 60} *)
14 (* input file
: fxp
.cm
*)
15 (* instrument
: false *)
16 (* instrument Sxml
: false *)
19 (* native commented
: 0 *)
20 (* native copy prop
: true *)
22 (* native ieee fp
: false *)
23 (* native live transfer
: true *)
24 (* native move hoist
: true *)
25 (* native optimize
: 1 *)
26 (* native split
: Some (20000) *)
27 (* polyvariance
: Some ({rounds
= 2,small
= 30,product
= 300}) *)
28 (* print at
fun entry
: false *)
31 (* show types
: false *)
33 (* use basis library
: true *)
34 (* verbosity
: Silent
*)
35 (* start
of ../../Util
/utilTime
.sml
*)
39 (*--------------------------------------------------------------------------*)
40 (* Structure
: UtilTime
*)
44 (* Exceptions raised by functions
in this
structure: *)
47 (*--------------------------------------------------------------------------*)
50 val time
: ('a
-> 'b
) -> 'a
-> 'b
* {usr
:Time
.time
, sys
:Time
.time
, gc
:Time
.time
}
51 val timeN
: int -> ('a
-> 'b
) -> 'a
-> 'b
* {usr
:Time
.time
, sys
:Time
.time
, gc
:Time
.time
}
54 structure UtilTime
: UtilTime
=
56 (*--------------------------------------------------------------------*)
57 (* run f on x
, and measure the runtime
. return the result
and time
. *)
58 (*--------------------------------------------------------------------*)
59 fun time f x
= let val timer
= Timer
.startCPUTimer ()
61 val ptime
= Timer
.checkCPUTimer timer
65 (*--------------------------------------------------------------------*)
66 (* run f n times on x
, and measure the runtime
. return the time
. *)
67 (*--------------------------------------------------------------------*)
69 let fun iter m
= if m
<=1 then f x
else (ignore (f x
); iter (m
-1))
73 (* stop
of ../../Util
/utilTime
.sml
*)
74 (* start
of ../../Util
/utilString
.sml
*)
75 (*--------------------------------------------------------------------------*)
76 (* Structure
: UtilString
*)
77 (*--------------------------------------------------------------------------*)
78 signature UtilString
=
80 val quoteString
: char
-> string -> string
82 val numberNth
: int -> string
83 val prependAnA
: string -> string
85 val nBlanks
: int -> string
86 val padxLeft
: char
-> string * int -> string
87 val padxRight
: char
-> string * int -> string
89 val breakLines
: int -> string -> string list
91 val toUpperFirst
: string -> string
92 val toUpperString
: string -> string
94 val Int2String
: int -> string
96 val Bool2xString
: string * string -> bool -> string
97 val Bool2String
: bool -> string
99 val Option2xString
: string * (('a
-> string) -> 'a
-> string)
100 -> ('a
-> string) -> 'a option
-> string
101 val Option2String0
: ('a
-> string) -> 'a option
-> string
102 val Option2String
: ('a
-> string) -> 'a option
-> string
104 val List2xString
: string * string * string -> ('a
-> string) -> 'a list
-> string
105 val List2String0
: ('a
-> string) -> 'a list
-> string
106 val List2String
: ('a
-> string) -> 'a list
-> string
108 val Vector2xString
: string * string * string -> ('a
-> string) -> 'a vector
-> string
109 val Vector2String
: ('a
-> string) -> 'a vector
-> string
112 structure UtilString
: UtilString
=
114 fun quoteString q s
= let val quote
= String.implode
[q
] in quote^s^quote
end
116 (*--------------------------------------------------------------------*)
117 (* generate a
string with the ordinal number
of n
, by appending
*)
118 (* "st", "nd", "rd" or
"th" to the number
. *)
119 (*--------------------------------------------------------------------*)
121 let val suffix
= case n
mod 9
126 in Int.toString n^suffix
129 (*--------------------------------------------------------------------*)
130 (* is the single character c represented by a
word starting
with a
*)
131 (* vocal
in the alphabet?
(l~ell
->true, k~kay
->false) *)
132 (*--------------------------------------------------------------------*)
149 (*--------------------------------------------------------------------*)
150 (* is character c a vocal?
*)
151 (*--------------------------------------------------------------------*)
161 (*--------------------------------------------------------------------*)
162 (* does a
word require
"an" as undefinite article?
true if: *)
163 (* - it is a single letter that starts
with a vocal
in the alphabet
*)
164 (* - its first two letters are capitals
, i
.e
. it is an abbreviation
, *)
165 (* and its first letter starts
with a vocal
in the alphabet
*)
166 (* - it has more than one letter
, is not an abbreviation
, and either
*)
167 (* + it starts
with a
, i or
o *)
168 (* + it starts
with e
and the second letter is not a
u (europe
) *)
169 (* + it starts
with a u
and continues
with a
vocal (very unlikely
, *)
170 (* only
in c
.s
., like uuencoded or uid
*)
171 (* + it starts
with a u
, continues
with a consonant not followed by
*)
172 (* an
i (like
in unicode
); that is something like un
-... *)
173 (* This ruleset is not complete since it does not cover
, e
.g
., the
*)
174 (* word uninvented
, but sufficient for most cases
. *)
175 (* (Is english pronounciation decidable at all?
) *)
176 (*--------------------------------------------------------------------*)
177 fun extendsAtoAn
word =
178 case String.explode
word
180 |
[c
] => vocalLetter c
181 | c1
::c2
::cs
=> if not (Char.isLower c1
orelse Char.isLower c2
)
183 else case Char.toLower c1
187 | #
"e" => Char.toLower c2
<> #
"u"
188 | #
"u" => if isVocal c2
then false
191 | c3
::_
=> Char.toLower c3
<> #
"i")
194 (*--------------------------------------------------------------------*)
195 (* add an undefinite article to a
word. *)
196 (*--------------------------------------------------------------------*)
197 fun prependAnA
word = if extendsAtoAn
word then "an "^
word else "a "^
word
199 (*--------------------------------------------------------------------*)
200 (* generate a list
/string of n times character c
. *)
201 (*--------------------------------------------------------------------*)
202 fun nCharsC c n
= if n
>0 then c
::nCharsC
c (n
-1) else nil
203 fun nChars c n
= String.implode (nCharsC c n
)
204 val nBlanks
= nChars #
" "
206 (*--------------------------------------------------------------------*)
207 (* add a minimal number
of characters c to the left
/right
of a
string *)
208 (* in order to make its length at least n
. *)
209 (*--------------------------------------------------------------------*)
210 fun padxLeft
c (s
,n
) = (nChars
c (n
-String.size s
))^s
211 fun padxRight
c (s
,n
) = s^
(nChars
c (n
-String.size s
))
212 val padLeft
= padxLeft #
" "
213 val padRight
= padxRight #
" "
215 (*--------------------------------------------------------------------*)
216 (* break a
string into several lines
of length width
. *)
217 (*--------------------------------------------------------------------*)
218 fun breakLines width str
=
220 val tokens
= String.tokens (fn c
=> #
" "=c
) str
221 fun makeLine(toks
,lines
) = if null toks
then lines
222 else (String.concat (rev toks
))::lines
223 fun doit
w (toks
,lines
) nil
= makeLine(toks
,lines
)
224 | doit
w (toks
,lines
) (one
::rest
) =
226 val l
= String.size one
229 if w1
<width
then doit (w1
+1) (" "::one
::toks
,lines
) rest
230 else if w1
=width
then doit
0 (nil
,makeLine(one
::toks
,lines
)) rest
231 else if l
>=width
then doit
0 (nil
,one
::makeLine(toks
,lines
)) rest
232 else doit (l
+1) ([" ",one
],makeLine(toks
,lines
)) rest
234 in List.rev (doit
0 (nil
,nil
) tokens
)
237 (*--------------------------------------------------------------------*)
238 (* convert the first
/all characters
of a
string to upper
case *)
239 (*--------------------------------------------------------------------*)
240 fun toUpperFirst str
=
241 case String.explode str
243 | c
::cs
=> String.implode (Char.toUpper c
::cs
)
244 fun toUpperString str
=
245 String.implode(map
Char.toUpper (String.explode str
))
247 (*--------------------------------------------------------------------*)
248 (* return a
string representation
of an
int, char or unit
. *)
249 (*--------------------------------------------------------------------*)
250 val Int2String
= Int.toString
251 val Char2String
= Char.toString
252 fun Unit2String() = "()"
254 (*--------------------------------------------------------------------*)
255 (* return a
string representation
of a boolean
. *)
256 (*--------------------------------------------------------------------*)
257 fun Bool2xString (t
,f
) b
= if b
then t
else f
258 val Bool2String
= Bool2xString ("true","false")
260 (*--------------------------------------------------------------------*)
261 (* return a
string representation
of an option
. *)
262 (* the first arg is a
string for the NONE
case, the second a function
*)
263 (* that converts x to a
string, given a function for doing so
. *)
264 (*--------------------------------------------------------------------*)
265 fun Option2xString (none
,Some2String
) x2String opt
=
268 | SOME x
=> Some2String x2String x
269 fun Option2String0 x2String
= Option2xString ("",fn f
=> fn x
=> f x
) x2String
270 fun Option2String x2String
= Option2xString ("NONE",fn f
=> fn x
=> "SOME "^f x
) x2String
272 (*--------------------------------------------------------------------*)
273 (* return a
string representation
of list
; start
with pre
, separate
*)
274 (* with sep
and finish
with post
; use X2String for each element
. *)
275 (*--------------------------------------------------------------------*)
276 fun List2xString (pre
,sep
,post
) X2String nil
= pre^post
277 |
List2xString (pre
,sep
,post
) X2String l
=
278 let fun doit nil _
= [post
]
279 |
doit (x
::r
) str
= str
::X2String x
::doit r sep
280 in String.concat (doit l pre
)
282 fun List2String X2String nil
= "[]"
283 | List2String X2String l
=
284 let fun doit nil _
= ["]"]
285 |
doit (x
::r
) str
= str
::X2String x
::doit r
","
286 in String.concat (doit l
"[")
288 fun List2String0 X2String nil
= ""
289 | List2String0 X2String l
=
290 let fun doit nil _
= nil
291 |
doit (x
::r
) str
= str
::X2String x
::doit r
" "
292 in String.concat (doit l
"")
295 (* a compiler bug
in smlnj
110 makes the following uncompilable
: *)
296 (* fun List2String X2String xs
= List2xString ("[",",","]") X2String xs
*)
297 (* fun List2String0 X2String xs
= List2xString (""," ","") X2String xs
*)
299 (*--------------------------------------------------------------------*)
300 (* return a
string representation
of list
; start
with pre
, separate
*)
301 (* with sep
and finish
with post
; use X2String for each element
. *)
302 (*--------------------------------------------------------------------*)
303 fun Vector2xString (pre
,sep
,post
) X2String vec
=
304 if Vector.length vec
=0 then pre^post
306 (pre
::X2String(Vector.sub(vec
,0))::
307 Vector.foldri (fn (_
,x
,yet
) => sep
::X2String x
::yet
) [post
] (vec
,1,NONE
))
308 fun Vector2String X2String vec
= Vector2xString ("#[",",","]") X2String vec
310 (* stop
of ../../Util
/utilString
.sml
*)
311 (* start
of ../../Util
/utilCompare
.sml
*)
312 signature UtilCompare
=
314 type 'a Comparer
= 'a
* 'a
-> order
316 val comparePair
: 'a Comparer
* 'b Comparer
-> ('a
* 'b
) Comparer
317 val compareTriple
: 'a Comparer
* 'b Comparer
* 'c Comparer
-> ('a
* 'b
* 'c
) Comparer
319 val compareOption
: 'a Comparer
-> 'a option Comparer
320 val compareList
: 'a Comparer
-> 'a list Comparer
321 val compareVector
: 'a Comparer
-> 'a vector Comparer
323 val compareInt
: int Comparer
324 val compareIntPair
: (int * int) Comparer
325 val compareIntTriple
: (int * int * int) Comparer
327 val compareWord
: word Comparer
328 val compareWordPair
: (word * word) Comparer
329 val compareWordTriple
: (word * word * word) Comparer
332 structure UtilCompare
: UtilCompare
=
334 type 'a Comparer
= 'a
* 'a
-> order
336 fun comparePair (compareA
,compareB
) ((a1
,b1
),(a2
,b2
)) =
338 of EQUAL
=> compareB(b1
,b2
)
340 fun compareTriple (compareA
,compareB
,compareC
) ((a1
,b1
,c1
),(a2
,b2
,c2
)) =
342 of EQUAL
=> (case compareB(b1
,b2
)
343 of EQUAL
=> compareC(c1
,c2
)
347 val compareInt
= Int.compare
348 fun compareIntPair((x1
,y1
),(x2
,y2
)) =
349 case Int.compare(x1
,x2
)
350 of EQUAL
=> Int.compare (y1
,y2
)
352 fun compareIntTriple((x1
,y1
,z1
),(x2
,y2
,z2
)) =
353 case Int.compare(x1
,x2
)
354 of EQUAL
=> (case Int.compare (y1
,y2
)
355 of EQUAL
=> Int.compare (z1
,z2
)
359 val compareWord
= Word.compare
360 fun compareWordPair((x1
,y1
),(x2
,y2
)) =
361 case Word.compare(x1
,x2
)
362 of EQUAL
=> Word.compare (y1
,y2
)
364 fun compareWordTriple((x1
,y1
,z1
),(x2
,y2
,z2
)) =
365 case Word.compare(x1
,x2
)
366 of EQUAL
=> (case Word.compare (y1
,y2
)
367 of EQUAL
=> Word.compare (z1
,z2
)
371 fun compareOption compareA opts
=
373 of (NONE
,NONE
) => EQUAL
374 |
(NONE
,SOME x
) => LESS
375 |
(SOME x
,NONE
) => GREATER
376 |
(SOME x
,SOME y
) => compareA(x
,y
)
378 fun compareList compA ll
=
379 let fun doit (nil
,nil
) = EQUAL
380 |
doit (nil
,_
) = LESS
381 |
doit (_
,nil
) = GREATER
382 |
doit (a1
::as1
,a2
::as2
) = case compA(a1
,a2
)
383 of EQUAL
=> doit(as1
,as2
)
388 fun compareVector
compA (vec1
,vec2
) =
389 let val (l
,l2
) = (Vector.length vec1
,Vector.length vec2
)
390 in case Int.compare(l
,l2
)
391 of EQUAL
=> let fun doit i
= if i
>=l
then EQUAL
392 else case compA(Vector.sub(vec1
,i
),Vector.sub(vec2
,i
))
393 of EQUAL
=> doit (i
+1)
401 (* stop
of ../../Util
/utilCompare
.sml
*)
402 (* start
of ../../Util
/utilHash
.sml
*)
405 val hashPair
: ('a
-> word) * ('b
-> word) -> 'a
* 'b
-> word
406 val hashTriple
: ('a
-> word) * ('b
-> word) * ('c
-> word) -> 'a
* 'b
* 'c
-> word
408 val hashOption
: ('a
-> word) -> 'a option
-> word
409 val hashList
: ('a
-> word) -> 'a list
-> word
410 val hashVector
: ('a
-> word) -> 'a vector
-> word
412 val hashString
: string -> word
414 val hashInt
: int -> word
415 val hashIntPair
: int * int -> word
416 val hashIntTriple
: int * int * int -> word
418 val hashWord
: word -> word
419 val hashWordPair
: word * word -> word
420 val hashWordTriple
: word * word * word -> word
423 structure UtilHash
: UtilHash
=
425 fun hashPair (hashA
,hashB
) (a
,b
) =
426 0w1327
* hashA a
+ 0w3853
* hashB b
427 fun hashTriple (hashA
,hashB
,hashC
) (a
,b
,c
) =
428 0w1327
* hashA a
+ 0w3853
* hashB b
+ 0w2851
* hashC c
432 fun hashIntPair (i
,j
) =
433 0w1327
* Word.fromInt i
+ 0w3853
* Word.fromInt j
434 fun hashIntTriple (i
,j
,k
) =
435 0w1327
* Word.fromInt i
+ 0w3853
* Word.fromInt j
+ 0w2851
* Word.fromInt k
438 fun hashWordPair (i
,j
) = 0w1327
* i
+ 0w3853
* j
439 fun hashWordTriple (i
,j
,k
) = 0w1327
* i
+ 0w3853
* j
+ 0w2851
* k
441 val hashChar
= Word.fromInt
o ord
445 |
1 => 0w1
+ hashChar(String.sub(s
,0))
446 |
2 => let val w1
= String.sub(s
,0)
447 val w2
= String.sub(s
,1)
448 in 0w2
+ hashChar w1
* 0wx1327
+ hashChar w2
450 | n
=> let val w1
= String.sub(s
,0)
451 val w2
= String.sub(s
,1)
452 val wn
= String.sub(s
,n
-1)
453 in 0w3
+ hashChar w1
* 0wx3853
+ hashChar w2
* 0wx1327
+ hashChar wn
457 fun hashOption hashA opt
=
460 | SOME a
=> 0w1
+ hashA a
462 fun hashList hashA l
=
465 |
[a
] => 0w1
+ hashA a
466 | a1
::a2
::_
=> 0w2
+ 0w3853
* hashA a1
+ 0wx1327
* hashA a2
468 fun hashVector hashA cv
=
469 case Vector.length cv
471 |
1 => 0w1
+ hashA(Vector.sub(cv
,0))
472 |
2 => let val w1
= Vector.sub(cv
,0)
473 val w2
= Vector.sub(cv
,1)
474 in 0w2
+ hashA w1
* 0wx1327
+ hashA w2
476 | n
=> let val w1
= Vector.sub(cv
,0)
477 val w2
= Vector.sub(cv
,1)
478 val wn
= Vector.sub(cv
,n
-1)
479 in 0w3
+ hashA w1
* 0wx3853
+ hashA w2
* 0wx1327
+ hashA wn
483 (* stop
of ../../Util
/utilHash
.sml
*)
484 (* start
of ../../Util
/SymDict
/key
.sml
*)
488 (*--------------------------------------------------------------------------*)
489 (* In order to be used
as a dictinary
/symbol table key
, a
type must have a
*)
490 (* null value
, hash to words
, must be comparable
and printable
. *)
491 (*--------------------------------------------------------------------------*)
497 val hash
: Key
-> word
498 val compare
: Key
* Key
-> order
499 val toString
: Key
-> string
501 (* stop
of ../../Util
/SymDict
/key
.sml
*)
502 (* start
of ../../Util
/utilInt
.sml
*)
503 (*--------------------------------------------------------------------------*)
504 (* Structure
: UtilInt
*)
508 (* Exceptions raised by functions
in this
structure: *)
509 (* appInterval
: none
*)
510 (* insertInt
: none
*)
511 (* insertNewInt
: none
*)
512 (* nextPowerTwo
: none
*)
513 (*--------------------------------------------------------------------------*)
516 val intervalList
: (int * int) -> int list
517 val appInterval
: (int -> unit
) -> (int * int) -> unit
518 val insertInt
: int * int list
-> int list
519 val insertNewInt
: int * int list
-> int list option
520 val powerOfTwo
: int -> int
521 val nextPowerTwo
: int -> int
524 structure UtilInt
: UtilInt
=
526 (*--------------------------------------------------------------------*)
527 (* generate the list
[n
,...,m
] *)
528 (*--------------------------------------------------------------------*)
529 fun intervalList(n
,m
) = if n
>m
then nil
else n
::intervalList(n
+1,m
)
531 (*--------------------------------------------------------------------*)
532 (* apply f to each number
in [n
...m
] *)
533 (*--------------------------------------------------------------------*)
534 fun appInterval
f (n
,m
) =
543 (*--------------------------------------------------------------------*)
544 (* insert an integer into a sorted list without duplicates
. *)
545 (*--------------------------------------------------------------------*)
546 fun insertInt (x
:int,l
) =
548 |
go (l
as y
::ys
) = case Int.compare (x
,y
)
551 | GREATER
=> y
::go ys
555 (*--------------------------------------------------------------------*)
556 (* insert an integer into a sorted list
if it is not yet
in it
. *)
557 (*--------------------------------------------------------------------*)
558 fun insertNewInt (x
:int,l
) =
560 fun go nil
= SOME
[x
]
561 |
go (l
as y
::ys
) = case Int.compare (x
,y
)
562 of LESS
=> SOME(x
::l
)
564 | GREATER
=> case go ys
566 | SOME xys
=> SOME(y
::xys
)
570 (*--------------------------------------------------------------------*)
571 (* compute the power to the base
of two
. *)
572 (*--------------------------------------------------------------------*)
575 else if n
mod 2=0 then let val x
=powerOfTwo (n
div 2) in x
*x
end
576 else let val x
=powerOfTwo (n
-1) in 2*x
end
578 (*--------------------------------------------------------------------*)
579 (* find the smallest p
with 2^p
>= n
. *)
580 (*--------------------------------------------------------------------*)
584 else if m
*m
<2*n
then doit (2*p
,m
*m
)
589 (* stop
of ../../Util
/utilInt
.sml
*)
590 (* start
of ../../Util
/utilError
.sml
*)
596 signature UtilError
=
598 exception InternalError
of string * string * string
599 exception NoSuchFile
of string * string
601 val formatMessage
: int * int -> string list
-> string
604 structure UtilError
: UtilError
=
608 exception InternalError
of string * string * string
609 exception NoSuchFile
of string * string
611 fun formatMessage (indentWidth
,lineWidth
) strs
=
613 val indent
= nBlanks indentWidth
618 fun isSep c
= #
" "=c
orelse #
"\n"=c
orelse #
"\t"=c
620 fun go (w
,yet
) nil
= List.rev ("\n"::yet
)
621 |
go (w
,yet
) (x
::xs
) =
623 val y
= if null xs
then x^dot
else x
624 val l
= String.size y
626 val (w2
,yet2
) = if w1
<=lineWidth
then (w1
,y
::yet
)
627 else (indentWidth
+l
,y
::nl
::yet
)
628 val (w3
,yet3
) = if null xs
then (w2
,yet2
)
629 else (if w2
<lineWidth
then (w2
+1,blank
::yet2
)
630 else (indentWidth
,nl
::yet2
))
634 val tokens
= List.concat (map (String.tokens isSep
) strs
)
635 val fragments
= go (0,nil
) tokens
637 String.concat fragments
640 (* stop
of ../../Util
/utilError
.sml
*)
641 (* start
of ../../Util
/SymDict
/dict
.sml
*)
642 (*--------------------------------------------------------------------------*)
648 (* Exceptions raised by functions
in this
functor: *)
649 (* addByIndex
: NoSuchIndex
*)
650 (* addByKey
: InternalError
*)
651 (* getByIndex
: NoSuchIndex
*)
652 (* getByKey
: InternalError
*)
653 (* getIndex
: InternalError
*)
654 (* getKey
: NoSuchIndex
*)
655 (* hasIndex
: none
*)
656 (* makeDict
: none
*)
657 (* nullDict
: none
*)
658 (* printDict
: none
*)
659 (* usedIndices
: none
*)
660 (*--------------------------------------------------------------------------*)
661 (* A dictionary maps keys to consecutive integers
and additionally holds
*)
662 (* a value
of arbitrary
type for each entry
. *)
663 (*--------------------------------------------------------------------------*)
669 exception NoSuchIndex
671 val nullDict
: string * 'a
-> 'a Dict
672 val makeDict
: string * int * 'a
-> 'a Dict
673 val clearDict
: 'a Dict
* int option
-> unit
675 val hasIndex
: 'a Dict
* Key
-> int option
676 val getIndex
: 'a Dict
* Key
-> int
677 val getKey
: 'a Dict
* int -> Key
679 val getByIndex
: 'a Dict
* int -> 'a
680 val getByKey
: 'a Dict
* Key
-> 'a
682 val setByIndex
: 'a Dict
* int * 'a
-> unit
683 val setByKey
: 'a Dict
* Key
* 'a
-> unit
685 val usedIndices
: 'a Dict
-> int
687 val extractDict
: 'a Dict
-> (Key
* 'a
) array
688 val printDict
: ('a
-> string) -> 'a Dict
-> unit
691 functor Dict (structure Key
: Key
) : Dict
=
693 open UtilError UtilInt
697 exception NoSuchIndex
699 (*--------------------------------------------------------------------*)
700 (* a dictionary can have at most size MAX_WIDTH
. This is because
*)
701 (* arrays may at most have Array
.maxLen elements
. We only use powers
*)
702 (* of two
as sizes
, so we are really only interested
in the position
*)
703 (* of maxLen
's highest bit
. That would be the maximal width for hash
*)
704 (* tables
, and thus we must decrease it by one for obtaining the max
*)
706 (*--------------------------------------------------------------------*)
707 fun highestBit w
= if w
=0w0
then 0 else 1+highestBit(Word.>>(w
,0w1
))
708 val MAX_WIDTH
= highestBit (Word.fromInt Array
.maxLen
)-1
710 type Bucket
= (Key
* int) list
711 val nullBucket
= nil
: Bucket
713 (*--------------------------------------------------------------------*)
714 (* buckets are unsorted
- they are probably small
, so comparing the
*)
715 (* keys might be overkill
. *)
716 (*--------------------------------------------------------------------*)
717 fun addToBucket (ni
as (key
,_
),bucket
) =
720 |
doit (nis
as (ni
' as (key
',_
))::rest
) =
721 case Key
.compare (key
',key
)
722 of LESS
=> ni
'::doit rest
728 fun searchBucket (key
,bucket
) =
731 |
doit ((key
',i
)::rest
) =
732 case Key
.compare (key
',key
)
740 (*--------------------------------------------------------------------*)
741 (* a dictionary consists
of *)
742 (* - a
string desc saying what is stored
in this dictionary
*)
743 (* - an array tab holding for each index its key
and value
*)
744 (* - a hash table
, i
.e
. Bucket array
, of double size than tab
*)
745 (* - a hashFun mapping Key to the range
of the hash table
*)
746 (* - an integer width for computing table sizes
*)
747 (* - an integer size wich is the size
of the value table
*)
748 (* - an integer count holding the next free index
*)
749 (* - a default value for the value table
*)
750 (*--------------------------------------------------------------------*)
751 type 'a Dict
= {desc
: string,
752 tab
: (Key
* 'a
) array ref
,
753 hashTab
: Bucket array ref
,
754 hashFun
: (Key
-> int) ref
,
755 width
: int ref
, (* bit width
*)
756 size
: int ref
, (* tab size
=2^width
, hash size is double
*)
757 count
: int ref
, (* number
of entries
*)
758 def
: 'a (* default for values
*)
760 fun nullDict (desc
,def
) = {desc
= desc
,
761 tab
= ref (Array
.array(1,(Key
.null
,def
))),
762 hashTab
= ref (Array
.array(2,nullBucket
)),
763 hashFun
= ref (fn _
=> 0),
769 (*--------------------------------------------------------------------*)
770 (* how many entries are
in the dictionary?
*)
771 (*--------------------------------------------------------------------*)
772 fun usedIndices ({count
,...}:'a Dict
) = !count
774 (*--------------------------------------------------------------------*)
775 (* what is the table load
, i
.e
. percentage
of number
of entries to
*)
776 (* hash table size
= 100*count
/(2*size
) = 50*count
/size
. *)
777 (*--------------------------------------------------------------------*)
778 fun hashRatio({count
,size
,...}:'a Dict
) = 50 * !count
div !size
781 (*--------------------------------------------------------------------*)
782 (* this is the hash function
. Key
.hash hashes data to arbitrary
*)
783 (* words
, that are mapped to the hash range by this function
, where *)
784 (* mask is the bitmask corresponding to the size
of the hash table
: *)
785 (* 1. square the
word produced by Key
.hash
*)
786 (* 2. take the width bits from the middle
of the square
, these are
*)
787 (* the bit
-places influenced by all input bit
-places
: *)
788 (* - shift to the right by half
of the destination width
*)
789 (* - mask out all bits to the left
of destination
*)
790 (* this is a simple strategy but experiences good results
. *)
791 (*--------------------------------------------------------------------*)
792 fun square (x
:word) = Word.*(x
,x
)
793 fun hashKey(half
,mask
) x
=
794 Word.toInt(Word.andb(mask
,Word.>>(square(Key
.hash x
),half
)))
795 fun makeHashFun(size
,width
) =
797 val mask
= 0w2
*Word.fromInt size
-0w1
798 val half
= Word.fromInt((width
+1) div 2)
803 (*--------------------------------------------------------------------*)
804 (* create a new dictionary for
2^w
, but at least
2 and at most
2^m
*)
805 (* entries
, where m is the value
of MAX_WIDTH
. *)
806 (*--------------------------------------------------------------------*)
807 fun makeDict (desc
,w
,def
) =
809 val width
= Int.min(Int.max(1,w
),MAX_WIDTH
)
810 val size
= Word.toInt(Word.<<(0w1
,Word.fromInt(width
-1)))
812 tab
= ref (Array
.array(size
,(Key
.null
,def
))),
813 hashTab
= ref (Array
.array(2*size
,nullBucket
)),
814 hashFun
= ref (makeHashFun(size
,width
)),
821 (*--------------------------------------------------------------------*)
822 (* clear a dictionary
. If the
2nd arg is SOME w
, use w for resizing
. *)
823 (*--------------------------------------------------------------------*)
824 fun clearDict (dict
:'a Dict
,widthOpt
) =
828 val {tab
=ref tab
,hashTab
=ref hashTab
,size
,count
,def
,...} = dict
829 val _
= appInterval (fn i
=> Array
.update(tab
,i
,(Key
.null
,def
))) (0,!count
-1)
830 val _
= appInterval (fn i
=> Array
.update(hashTab
,i
,nullBucket
)) (0,!size
*2-1)
836 val {tab
,hashTab
,hashFun
,width
,size
,count
,def
,...} = dict
837 val newWidth
= Int.min(Int.max(1,w
),MAX_WIDTH
)
838 val newSize
= Word.toInt(Word.<<(0w1
,Word.fromInt(newWidth
-1)))
839 val _
= tab
:= (Array
.array(newSize
,(Key
.null
,def
)))
840 val _
= hashTab
:= (Array
.array(2*newSize
,nullBucket
))
841 val _
= hashFun
:= (makeHashFun(newSize
,newWidth
))
842 val _
= width
:= newWidth
843 val _
= size
:= newSize
848 (*--------------------------------------------------------------------*)
849 (* grow a dictionary to the double size
. raise InternalError
if the
*)
850 (* dictionary already has maximal size
. *)
851 (*--------------------------------------------------------------------*)
852 fun growDictionary ({desc
,tab
,hashTab
,hashFun
,width
,size
,count
,def
}:'a Dict
) =
855 val _
= if !width
< MAX_WIDTH
then width
:= !width
+1
856 else raise InternalError
857 ("Dict","growDictionary",
858 String.concat
["growing the ",desc
," dictionary ",
859 "exceeded the system maximum size of ",
860 Int.toString Array
.maxLen
," for arrays"])
861 val _
= size
:= !size
*2
862 val _
= tab
:= Array
.array(!size
,(Key
.null
,def
))
863 val _
= hashTab
:= Array
.array(!size
*2,nullBucket
)
864 val _
= hashFun
:= makeHashFun(!size
,!width
)
866 fun addTo (i
,kv
as (key
,_
)) =
868 val idx
= !hashFun key
869 val _
= Array
.update(!hashTab
,idx
,addToBucket((key
,i
),Array
.sub(!hashTab
,idx
)))
870 val _
= Array
.update(!tab
,i
,kv
)
874 Array
.appi
addTo (oldTab
,0,NONE
)
877 (*--------------------------------------------------------------------*)
878 (* lookup the key for an index
of the dictionary
. *)
879 (*--------------------------------------------------------------------*)
880 fun getKey({tab
,count
,...}:'a Dict
,idx
) =
881 if !count
>idx
then #
1(Array
.sub(!tab
,idx
))
882 else raise NoSuchIndex
884 (*--------------------------------------------------------------------*)
885 (* map a Key to its index
in the dictionary
. if it is not
in the
*)
886 (* dictionary yet
, add a new entry
with a new index
. grow the table
*)
887 (* if there is no more free index
in the dictionary
. *)
888 (*--------------------------------------------------------------------*)
889 fun getIndex(dict
as {tab
,hashTab
,hashFun
,size
,count
,def
,...}:'a Dict
,key
) =
892 val bucket
= Array
.sub(!hashTab
,k
)
894 case searchBucket(key
,bucket
)
896 | NONE
=> let val idx
= !count
897 val (k
',buck
') = if !size
>idx
then (k
,bucket
)
898 else let val _
= growDictionary dict
899 val k
' = !hashFun key
900 val buck
' = Array
.sub(!hashTab
,k
')
903 val _
= Array
.update(!hashTab
,k
',addToBucket((key
,idx
),buck
'))
904 val _
= Array
.update(!tab
,idx
,(key
,def
))
905 val _
= count
:= idx
+1
910 (*--------------------------------------------------------------------*)
911 (* does a Key have an entry
in a dictionary?
*)
912 (*--------------------------------------------------------------------*)
913 fun hasIndex({hashTab
,hashFun
,...}:'a Dict
,key
) =
915 val idx
= !hashFun key
916 val bucket
= Array
.sub(!hashTab
,idx
)
918 searchBucket(key
,bucket
)
921 (*--------------------------------------------------------------------*)
922 (* get the value stored for index idx
*)
923 (*--------------------------------------------------------------------*)
924 fun getByIndex({tab
,count
,...}:'a Dict
,idx
) =
925 if !count
>idx
then #
2(Array
.sub(!tab
,idx
))
926 else raise NoSuchIndex
928 (*--------------------------------------------------------------------*)
929 (* get the value stored for a key
*)
930 (*--------------------------------------------------------------------*)
931 fun getByKey(dict
,key
) =
932 getByIndex(dict
,getIndex(dict
,key
))
934 (*--------------------------------------------------------------------*)
935 (* enter a value for index idx
. *)
936 (*--------------------------------------------------------------------*)
937 fun setByIndex({tab
,count
,...}:'a Dict
,idx
,a
) =
938 if !count
>idx
then let val (key
,_
) = Array
.sub(!tab
,idx
)
939 in Array
.update(!tab
,idx
,(key
,a
))
941 else raise NoSuchIndex
943 (*--------------------------------------------------------------------*)
944 (* enter a value for a key
. *)
945 (*--------------------------------------------------------------------*)
946 fun setByKey(dict
,key
,v
) =
947 setByIndex(dict
,getIndex(dict
,key
),v
)
949 (*--------------------------------------------------------------------*)
950 (* extract the contents
of the dictionary to an array
. *)
951 (*--------------------------------------------------------------------*)
952 fun extractDict({count
,tab
,...}:'a Dict
) =
953 Array
.tabulate(!count
,fn i
=> Array
.sub(!tab
,i
))
955 (*--------------------------------------------------------------------*)
956 (* print the contents
of the dictionary
. *)
957 (*--------------------------------------------------------------------*)
958 fun printDict
X2String ({desc
,tab
,count
,...}:'a Dict
) =
959 (print (desc^
" dictionary:\n");
961 (fn (n
,(key
,value
)) =>
962 print (" "^
Int.toString n^
": "^Key
.toString key^
" = "^X2String value^
"\n"))
963 (!tab
,0,SOME (!count
)))
965 (* stop
of ../../Util
/SymDict
/dict
.sml
*)
966 (* start
of ../../Util
/SymDict
/symbolTable
.sml
*)
973 (*--------------------------------------------------------------------------*)
974 (* Functor
: SymbolTable
*)
976 (* Exceptions raised by functions
in this
structure: *)
977 (* getSymIndex
: Key
.InternalError
*)
978 (* getSymKey
: NoSuchSymbol
*)
979 (* hasSymIndex
: none
*)
980 (* makeSymTable
: none
*)
981 (* nullSymTable
: none
*)
982 (* printSymTable
: none
*)
983 (* usedSymbols
: none
*)
984 (*--------------------------------------------------------------------------*)
985 (* A symbol table maps Keys to consecutive integers
. *)
986 (*--------------------------------------------------------------------------*)
992 exception NoSuchSymbol
994 val nullSymTable
: string -> SymTable
995 val makeSymTable
: string * int -> SymTable
996 val clearSymTable
: SymTable
* int option
-> unit
998 val hasSymIndex
: SymTable
* Key
-> int option
999 val getSymIndex
: SymTable
* Key
-> int
1000 val getSymKey
: SymTable
* int -> Key
1001 val usedSymbols
: SymTable
-> int
1003 val assignSymIndex
: SymTable
* Key
* int -> unit
1004 val reserveSymIndex
: SymTable
-> int
1006 val extractSymTable
: SymTable
-> Key vector
1007 val printSymTable
: SymTable
-> unit
1010 functor SymTable (structure Key
: Key
) : SymTable
=
1012 open UtilError UtilInt
1014 exception NoSuchSymbol
1018 (*--------------------------------------------------------------------*)
1019 (* a symbol table can have at most size MAX_WIDTH
. This is because
*)
1020 (* arrays may at most have Array
.maxLen elements
. We only use powers
*)
1021 (* of two
as sizes
, so we are really only interested
in the position
*)
1022 (* of maxLen
's highest bit
. That would be the maximal width for hash
*)
1023 (* tables
, and thus we must decrease it by one for obtaining the max
*)
1025 (*--------------------------------------------------------------------*)
1026 fun highestBit w
= if w
=0w0
then 0 else 1+highestBit(Word.>>(w
,0w1
))
1027 val MAX_WIDTH
= highestBit (Word.fromInt Array
.maxLen
)-1
1029 type Bucket
= (Key
* int) list
1030 val nullBucket
= nil
: Bucket
1032 (*--------------------------------------------------------------------*)
1033 (* buckets are sorted
- though they are probably small
. *)
1034 (*--------------------------------------------------------------------*)
1035 fun addToBucket (ni
as (key
,_
),bucket
) =
1038 |
doit (nis
as (ni
' as (key
',_
))::rest
) =
1039 case Key
.compare (key
',key
)
1040 of LESS
=> ni
'::doit rest
1042 | GREATER
=> ni
::nis
1046 fun searchBucket (key
,bucket
) =
1049 |
doit ((key
',i
)::rest
) =
1050 case Key
.compare (key
',key
)
1051 of LESS
=> doit rest
1058 (*--------------------------------------------------------------------*)
1059 (* a symbol table consists
of *)
1060 (* - an array tab holding for each index its key
*)
1061 (* - a hash table
, i
.e
. Bucket array
, of double size than tab
*)
1062 (* - a hashFun mapping Key to the range
of the hash table
*)
1063 (* - an integer width for computing table sizes
*)
1064 (* - an integer size wich is the size
of the value table
*)
1065 (* - an integer count holding the next free index
*)
1066 (*--------------------------------------------------------------------*)
1067 type SymTable
= {desc
: string,
1068 tab
: Key array ref
,
1069 hash
: Bucket array ref
,
1070 hashFun
: (Key
-> int) ref
,
1071 width
: int ref
, (* bit width
*)
1072 size
: int ref
, (* tab size
=2^width
, hash size is double
*)
1073 count
: int ref (* number
of entries
*)
1076 fun nullSymTable desc
= {desc
= desc
,
1077 tab
= ref (Array
.array(1,Key
.null
)),
1078 hash
= ref (Array
.array(2,nullBucket
)),
1079 hashFun
= ref (fn _
=> 0),
1082 width
= ref
0} : SymTable
1084 (*--------------------------------------------------------------------*)
1085 (* how many entries are
in the symtable?
*)
1086 (*--------------------------------------------------------------------*)
1087 fun usedSymbols ({count
,...}:SymTable
) = !count
1089 (*--------------------------------------------------------------------*)
1090 (* what is the table load
, i
.e
. percentage
of number
of entries to
*)
1091 (* hash table size
= 100*count
/(2*size
) = 50*count
/size
. *)
1092 (*--------------------------------------------------------------------*)
1093 fun hashRatio({count
,size
,...}:SymTable
) = 50 * !count
div !size
1096 (*--------------------------------------------------------------------*)
1097 (* this is the hash function
. Key
.hash hashes data to arbitrary
*)
1098 (* words
, that are mapped to the hash range by this function
, where *)
1099 (* mask is the bitmask corresponding to the size
of the hash table
: *)
1100 (* 1. square the
word produced by Key
.hash
*)
1101 (* 2. take the width bits from the middle
of the square
, these are
*)
1102 (* the bit
-places influenced by all input bit
-places
: *)
1103 (* - shift to the right by half
of the destination width
*)
1104 (* - mask out all bits to the left
of destination
*)
1105 (* this is a simple strategy but experiences good results
. *)
1106 (*--------------------------------------------------------------------*)
1107 fun square (x
:word) = Word.*(x
,x
)
1108 fun hashKey(half
,mask
) x
=
1109 Word.toInt(Word.andb(mask
,Word.>>(square(Key
.hash x
),half
)))
1110 fun makeHashFun(size
,width
) =
1112 val mask
= Word.fromInt(2*size
-1)
1113 val half
= Word.fromInt((width
+1) div 2)
1118 (*--------------------------------------------------------------------*)
1119 (* create a new symtable for
2^w
, but at least
2 and at most
2^m
*)
1120 (* entries
, where m is the value
of MAX_WIDTH
. *)
1121 (*--------------------------------------------------------------------*)
1122 fun makeSymTable (desc
,w
) =
1124 val width
= Int.min(Int.max(1,w
),MAX_WIDTH
)
1125 val size
= Word.toInt(Word.<<(0w1
,Word.fromInt(width
-1)))
1127 tab
= ref (Array
.array(size
,Key
.null
)),
1128 hash
= ref (Array
.array(2*size
,nullBucket
)),
1129 hashFun
= ref (makeHashFun(size
,width
)),
1135 (*--------------------------------------------------------------------*)
1136 (* clear a dictionary
. If the
2nd arg is SOME w
, use w for resizing
. *)
1137 (*--------------------------------------------------------------------*)
1138 fun clearSymTable (symTab
:SymTable
,widthOpt
) =
1142 val {tab
=ref tab
,hash
=ref hash
,size
,count
,...} = symTab
1143 val _
= appInterval (fn i
=> Array
.update(tab
,i
,Key
.null
)) (0,!count
-1)
1144 val _
= appInterval (fn i
=> Array
.update(hash
,i
,nullBucket
)) (0,!size
*2-1)
1150 val {tab
,hash
,hashFun
,width
,size
,count
,...} = symTab
1151 val newWidth
= Int.min(Int.max(1,w
),MAX_WIDTH
)
1152 val newSize
= Word.toInt(Word.<<(0w1
,Word.fromInt(newWidth
-1)))
1153 val _
= tab
:= (Array
.array(newSize
,Key
.null
))
1154 val _
= hash
:= (Array
.array(2*newSize
,nullBucket
))
1155 val _
= hashFun
:= (makeHashFun(newSize
,newWidth
))
1156 val _
= width
:= newWidth
1157 val _
= size
:= newSize
1162 (*--------------------------------------------------------------------*)
1163 (* grow a symtable to the double size
. raise InternalError
if the
*)
1164 (* table already has maximal size
. *)
1165 (*--------------------------------------------------------------------*)
1166 fun growTable ({desc
,tab
,hash
,hashFun
,width
,size
,count
}:SymTable
) =
1168 val newWidth
= if !width
< MAX_WIDTH
then !width
+1
1169 else raise InternalError
1170 ("SymTable","growTable",
1171 String.concat
["growing the ",desc
," symbol table ",
1172 "exceeded the system maximum size of ",
1173 Int.toString Array
.maxLen
," for arrays"])
1174 val newSize
= !size
*2
1177 val newTab
= Array
.array(newSize
,Key
.null
)
1178 val newHash
= Array
.array(2*newSize
,nullBucket
)
1179 val newHashFun
= makeHashFun(newSize
,newWidth
)
1181 fun addToNew (inv
as (i
,key
)) =
1183 val idx
= newHashFun key
1184 val _
= Array
.update(newHash
,idx
,addToBucket((key
,i
),Array
.sub(newHash
,idx
)))
1185 val _
= Array
.update(newTab
,i
,key
)
1188 val _
= Array
.appi
addToNew (!tab
,0,NONE
)
1190 val _
= tab
:= newTab
1191 val _
= hash
:= newHash
1192 val _
= size
:= newSize
1193 val _
= width
:= newWidth
1194 val _
= hashFun
:= newHashFun
1198 (*--------------------------------------------------------------------*)
1199 (* lookup the key for an index
of the symbol table
. *)
1200 (*--------------------------------------------------------------------*)
1201 fun getSymKey({tab
,count
,...}:SymTable
,idx
) =
1202 if !count
>idx
then Array
.sub(!tab
,idx
)
1203 else raise NoSuchSymbol
1205 (*--------------------------------------------------------------------*)
1206 (* map a Key to its index
in the symbol table
. if it is not
in the
*)
1207 (* symbol table yet
, add a new entry
with a new index
. grow the table
*)
1208 (* if there is no more free index
in the table
. *)
1209 (*--------------------------------------------------------------------*)
1210 fun getSymIndex(st
as {tab
,hash
,hashFun
,size
,count
,...}:SymTable
,key
) =
1212 val idx
= !hashFun key
1213 val bucket
= Array
.sub(!hash
,idx
)
1215 case searchBucket(key
,bucket
)
1217 | NONE
=> let val i
= !count
1218 val (idx
',buck
') = if !size
>i
then (idx
,bucket
)
1219 else let val _
= growTable st
1220 val idx
' = !hashFun key
1221 val buck
' = Array
.sub(!hash
,idx
')
1224 val _
= Array
.update(!hash
,idx
',addToBucket((key
,i
),buck
'))
1225 val _
= Array
.update(!tab
,i
,key
)
1226 val _
= count
:= i
+1
1231 (*--------------------------------------------------------------------*)
1232 (* does a Key have an entry
in a symbol table?
*)
1233 (*--------------------------------------------------------------------*)
1234 fun hasSymIndex({hash
,hashFun
,...}:SymTable
,key
) =
1236 val idx
= !hashFun key
1237 val buck
= Array
.sub(!hash
,idx
)
1239 searchBucket(key
,buck
)
1242 (*--------------------------------------------------------------------*)
1243 (* reserve an index for
a (yet unknown
) key
. *)
1244 (*--------------------------------------------------------------------*)
1245 fun reserveSymIndex(st
as {size
,count
=count
as ref i
,...}:SymTable
) =
1247 val _
= if !size
>i
then () else growTable st
1248 val _
= count
:= i
+1
1252 (*--------------------------------------------------------------------*)
1253 (* assign an index to
a (previously reserved
) index
. *)
1254 (*--------------------------------------------------------------------*)
1255 fun assignSymIndex(st
as {count
,hash
,hashFun
,tab
,...}:SymTable
,key
,i
) =
1256 if !count
<=i
then raise NoSuchSymbol
1257 else let val idx
= !hashFun key
1258 val buck
= Array
.sub(!hash
,idx
)
1259 val newBuck
= addToBucket((key
,i
),buck
)
1260 val _
= Array
.update(!hash
,idx
,newBuck
)
1261 val _
= Array
.update(!tab
,i
,key
)
1265 (*--------------------------------------------------------------------*)
1266 (* extract the contents
of a symbol table to a vector
. *)
1267 (*--------------------------------------------------------------------*)
1268 fun extractSymTable({count
,tab
,...}:SymTable
) =
1269 Array
.extract(!tab
,0,SOME(!count
))
1271 (*--------------------------------------------------------------------*)
1272 (* print the contents
of the symbol table
. *)
1273 (*--------------------------------------------------------------------*)
1274 fun printSymTable ({desc
,tab
,count
,...}:SymTable
) =
1275 (print (desc^
" table:\n");
1278 print (" "^
Int.toString n^
": "^Key
.toString key^
"\n"))
1279 (!tab
,0,SOME (!count
)))
1281 (* stop
of ../../Util
/SymDict
/symbolTable
.sml
*)
1282 (* start
of ../../Util
/SymDict
/intListDict
.sml
*)
1289 structure KeyIntList
: Key
=
1294 val hash
= UtilHash
.hashList
Word.fromInt
1295 val compare
= UtilCompare
.compareList
Int.compare
1296 val toString
= UtilString
.List2String
Int.toString
1299 structure IntListDict
= Dict (structure Key
= KeyIntList
)
1300 structure IntListSymTab
= SymTable (structure Key
= KeyIntList
)
1303 (* stop
of ../../Util
/SymDict
/intListDict
.sml
*)
1304 (* start
of ../../Util
/SymDict
/intDict
.sml
*)
1312 structure KeyInt
: Key
=
1317 val hash
= Word.fromInt
1318 val compare
= Int.compare
1319 val toString
= Int.toString
1322 structure IntDict
= Dict (structure Key
= KeyInt
)
1323 structure IntSymTab
= SymTable (structure Key
= KeyInt
)
1326 (* stop
of ../../Util
/SymDict
/intDict
.sml
*)
1327 (* start
of ../../Unicode
/Chars
/uniChar
.sml
*)
1328 (*--------------------------------------------------------------------------*)
1329 (* Structure
: UniChar
*)
1334 (* Exceptions raised by functions
in this
structure: *)
1335 (*--------------------------------------------------------------------------*)
1338 structure Chars
: WORD
1340 type Char = Chars
.word
1341 type Data
= Char list
1342 type Vector = Char vector
1345 val nullVector
: Vector
1347 val hashChar
: Char -> word
1348 val hashData
: Data
-> word
1349 val hashVector
: Vector -> word
1351 val compareChar
: Char * Char -> order
1352 val compareData
: Data
* Data
-> order
1353 val compareVector
: Vector * Vector -> order
1355 val char2Char
: char
-> Char
1356 val Char2char
: Char -> char
1358 val Char2Uni
: Char -> string
1359 val Char2String
: Char -> string
1361 val String2Data
: string -> Data
1362 val Data2String
: Data
-> string
1363 val Latin2String
: Data
-> string
1365 val Data2Vector
: Data
-> Vector
1366 val Vector2Data
: Vector -> Data
1368 val String2Vector
: string -> Vector
1369 val Vector2String
: Vector -> string
1371 val quoteUni
: Char -> string -> string
1372 val quoteChar
: Char -> Char -> string
1373 val quoteData
: Char -> Data
-> string
1374 val quoteVector
: Char -> Vector -> string
1377 structure UniChar
: UniChar
=
1379 val O_VECTOR_PRINTLEN
= 48
1381 structure Chars
= Word
1383 val _
= if Chars
.wordSize
> 21 then ()
1384 else let val str
= ("UniChar: Chars.wordSize is too small.\n"^
1385 "Cannot compile on this system!\n" )
1390 type Char = Chars
.word
1391 type Data
= Char list
1393 type CharInterval
= Char * Char
1394 type CharRange
= CharInterval list
1396 type Vector = Char vector
1398 val nullChar
= 0wx0
:Char
1399 val nullData
= nil
:Data
1400 val nullVector
= Vector.fromList nullData
1402 val hashChar
= Word.fromLargeWord
o Chars
.toLargeWord
1403 val hashData
= UtilHash
.hashList hashChar
1404 val hashVector
= UtilHash
.hashVector hashChar
1406 val compareChar
= Chars
.compare
1407 val compareData
= UtilCompare
.compareList compareChar
1408 val compareVector
= UtilCompare
.compareVector compareChar
1410 val char2Char
= Chars
.fromLargeWord
o Word8.toLargeWord
o Byte
.charToByte
1411 val Char2char
= Byte
.byteToChar
o Word8.fromLargeWord
o Chars
.toLargeWord
1414 "U+"^UtilString
.toUpperString(StringCvt.padLeft #
"0" 4 (Chars
.toString c
))
1419 | _
=> if c
<0wx100
then String.implode
[Char2char c
]
1422 fun String2Data s
= map
char2Char (String.explode s
)
1423 fun Data2String cs
= String.concat (map Char2String cs
)
1424 fun Latin2String cs
= String.implode (map Char2char cs
)
1426 val Data2Vector
= Vector.fromList
1427 fun String2Vector s
= Vector.tabulate(String.size s
,fn i
=> char2Char(String.sub(s
,i
)))
1429 fun Vector2Data vec
= Vector.foldr (op ::) nil vec
1430 fun Vector2String vec
=
1432 val maxlen
= O_VECTOR_PRINTLEN
1433 val len
= Vector.length vec
1435 if len
<=maxlen
orelse maxlen
=0
1436 then Data2String (Vector2Data vec
)
1438 val cs1
= Vector.foldri
1439 (fn (_
,c
,cs
) => c
::cs
) nil (vec
,0,SOME (maxlen
div 2))
1440 val cs2
= Vector.foldri
1441 (fn (_
,c
,cs
) => c
::cs
) nil (vec
,len
-3-maxlen
div 2,NONE
)
1442 in Data2String cs1^
"..."^Data2String cs2
1446 fun quoteUni q s
= let val sQ
= Char2String q
in sQ^s^sQ
end
1447 fun quoteChar q c
= if c
=0wx0
then "entity end" else quoteUni
q (Char2String c
)
1448 fun quoteData q cs
= quoteUni
q (Data2String cs
)
1449 fun quoteVector q v
= quoteUni
q (Vector2String v
)
1453 (* stop
of ../../Unicode
/Chars
/uniChar
.sml
*)
1454 (* start
of ../../Unicode
/Chars
/charVecDict
.sml
*)
1455 structure KeyVector
: Key
=
1457 type Key
= UniChar
.Vector
1459 val null
= UniChar
.nullVector
1460 val compare
= UniChar
.compareVector
1461 val toString
= UniChar
.Vector2String
1462 val hash
= UniChar
.hashVector
1465 structure VectorDict
= Dict (structure Key
= KeyVector
)
1466 (* stop
of ../../Unicode
/Chars
/charVecDict
.sml
*)
1467 (* start
of ../../Util
/SymDict
/stringDict
.sml
*)
1476 structure KeyString
: Key
=
1481 val hash
= UtilHash
.hashString
1482 val compare
= String.compare
1484 fun toString str
= str
1487 structure StringDict
= Dict (structure Key
= KeyString
)
1488 (* stop
of ../../Util
/SymDict
/stringDict
.sml
*)
1489 (* start
of ../../Unicode
/encoding
.sml
*)
1492 signature Encoding
=
1495 NOENC | ASCII | EBCDIC | LATIN1
1496 | UCS4B | UCS4L | UCS4SB | UCS4SL
1497 | UCS2B | UCS2L | UTF16B | UTF16L
1502 val UTF16
: Encoding
1504 val encodingName
: Encoding
-> string
1505 val isEncoding
: string -> Encoding
1506 val switchEncoding
: Encoding
* Encoding
-> Encoding
1509 structure Encoding
: Encoding
=
1514 NOENC | ASCII | EBCDIC | LATIN1
1515 | UCS4B | UCS4L | UCS4SB | UCS4SL
1516 | UCS2B | UCS2L | UTF16B | UTF16L
1523 fun encodingName enc
=
1527 | EBCDIC
=> "EBCDIC"
1528 | LATIN1
=> "ISO-8859-1"
1536 | UTF16B
=> "UTF-16"
1537 | UTF16L
=> "UTF-16"
1539 val encDict
= makeDict("encoding",6,NOENC
)
1541 [(ASCII
,["ANSI_X3.4-1968","ANSI_X3.4-1986","ASCII","US-ASCII","US",
1542 "ISO646-US","ISO-IR-6","ISO_646.IRV:1991","IBM367","CP367"]),
1543 (EBCDIC
,["EBCDIC"]),
1544 (LATIN1
,["ISO_8859-1:1987","ISO-8859-1","ISO_8859-1",
1545 "ISO-IR-100","CP819","IBM819","L1","LATIN1"]),
1546 (UCS2
,["UCS-2","ISO-10646-UCS-2"]),
1547 (UCS4
,["UCS-4","ISO-10646-UCS-4"]),
1551 val _
= app (fn (x
,ys
) => app (fn y
=> setByKey(encDict
,y
,x
)) ys
) encAliases
1552 fun isEncoding name
= getByKey(encDict
,name
)
1554 fun compatAscii new
=
1560 fun compatUcs4 (old
,new
) =
1561 if new
=UCS4
then old
else NOENC
1563 fun switchEncoding(old
,new
) =
1566 | ASCII
=> compatAscii new
1567 | EBCDIC
=> if new
=EBCDIC
then new
else NOENC
1568 | LATIN1
=> compatAscii new
1569 | UCS4B
=> compatUcs4(old
,new
)
1570 | UCS4L
=> compatUcs4(old
,new
)
1571 | UCS4SB
=> compatUcs4(old
,new
)
1572 | UCS4SL
=> compatUcs4(old
,new
)
1573 | UTF16B
=> if new
=UTF16
then old
else if new
=UCS2
then UCS2B
else NOENC
1574 | UTF16L
=> if new
=UTF16
then old
else if new
=UCS2
then UCS2L
else NOENC
1575 | UCS2B
=> if new
=UCS2
then old
else if new
=UTF16
then UTF16B
else NOENC
1576 | UCS2L
=> if new
=UCS2
then old
else if new
=UTF16
then UTF16L
else NOENC
1577 | UTF8
=> compatAscii new
1579 (* stop
of ../../Unicode
/encoding
.sml
*)
1580 (* start
of ../../Unicode
/Encode
/encodeBasic
.sml
*)
1587 (*--------------------------------------------------------------------------*)
1588 (* Structure
: EncodeBasic
*)
1590 (* Exceptions raised by functions
in this
structure: *)
1591 (* closeFile
: none
*)
1592 (* fileName
: none
*)
1593 (* openFile
: NoSuchFile
*)
1594 (* writeByte
: Io
*)
1595 (*--------------------------------------------------------------------------*)
1596 signature EncodeBasic
=
1600 val stdOutFile
: File
1601 val closeFile
: File
-> unit
1602 val fileName
: File
-> string
1603 val openFile
: string -> File
1604 val writeByte
: File
* Word8.word -> File
1607 structure EncodeBasic
: EncodeBasic
=
1611 type outstream
= TextIO.outstream
1612 val closeOut
= TextIO.closeOut
1613 val openOut
= TextIO.openOut
1614 val output1
= TextIO.output1
1615 val stdOut
= TextIO.stdOut
1617 type File
= string * outstream
1619 val stdOutFile
= ("-",stdOut
)
1621 fun closeFile(fname
,s
) = if fname
="-" then () else closeOut s
1622 fun fileName(fname
,_
) = if fname
="-" then "<stdout>" else fname
1623 fun openFile fname
=
1624 if fname
= "-" then (fname
,stdOut
)
1625 else (fname
,openOut fname
)
1626 handle IO
.Io
{name
,cause
,...} => raise NoSuchFile(name
,exnMessage cause
)
1627 fun writeByte (f
as (_
,s
),b
) = f
before output1(s
,chr(Word8.toInt b
))
1629 (* stop
of ../../Unicode
/Encode
/encodeBasic
.sml
*)
1630 (* start
of ../../Unicode
/Encode
/encodeError
.sml
*)
1638 signature EncodeError
=
1640 datatype EncodeError
=
1641 ERR_ILLEGAL_CHAR
of UniChar
.Char * string
1643 val encodeMessage
: EncodeError
-> string list
1645 exception EncodeError
of EncodeBasic
.File
* EncodeError
1648 structure EncodeError
: EncodeError
=
1654 datatype EncodeError
=
1655 ERR_ILLEGAL_CHAR
of UniChar
.Char * string
1657 fun encodeMessage err
=
1659 of ERR_ILLEGAL_CHAR(c
,what
) => [Char2Uni c
,"is not",prependAnA what
,"character"]
1661 exception EncodeError
of EncodeBasic
.File
* EncodeError
1663 (* stop
of ../../Unicode
/Encode
/encodeError
.sml
*)
1664 (* start
of ../../Unicode
/Encode
/encodeMisc
.sml
*)
1666 require
"basis.__word";
1667 require
"basis.__word8";
1668 require
"basis.__word8_vector";
1671 require
"encodeBasic";
1672 require
"encodeError";
1674 signature EncodeMisc
=
1676 val writeCharAscii
: UniChar
.Char * EncodeBasic
.File
-> EncodeBasic
.File
1677 val writeCharEbcdic
: UniChar
.Char * EncodeBasic
.File
-> EncodeBasic
.File
1678 val writeCharLatin1
: UniChar
.Char * EncodeBasic
.File
-> EncodeBasic
.File
1679 val writeCharUcs4B
: UniChar
.Char * EncodeBasic
.File
-> EncodeBasic
.File
1680 val writeCharUcs4L
: UniChar
.Char * EncodeBasic
.File
-> EncodeBasic
.File
1681 val writeCharUcs4SB
: UniChar
.Char * EncodeBasic
.File
-> EncodeBasic
.File
1682 val writeCharUcs4SL
: UniChar
.Char * EncodeBasic
.File
-> EncodeBasic
.File
1683 val writeCharUtf8
: UniChar
.Char * EncodeBasic
.File
-> EncodeBasic
.File
1684 val writeCharUtf16B
: UniChar
.Char * EncodeBasic
.File
-> EncodeBasic
.File
1685 val writeCharUtf16L
: UniChar
.Char * EncodeBasic
.File
-> EncodeBasic
.File
1686 val writeCharUcs2B
: UniChar
.Char * EncodeBasic
.File
-> EncodeBasic
.File
1687 val writeCharUcs2L
: UniChar
.Char * EncodeBasic
.File
-> EncodeBasic
.File
1689 val validCharAscii
: UniChar
.Char -> bool
1690 val validCharEbcdic
: UniChar
.Char -> bool
1691 val validCharLatin1
: UniChar
.Char -> bool
1694 structure EncodeMisc
: EncodeMisc
=
1696 open UniChar EncodeBasic EncodeError
1702 val op && = Chars
.andb
1703 val op >> = Chars
.>>
1704 val op ||
= Word8.orb
1706 fun splitSurrogates (c
: Char) =
1707 (((c
-0wx10000
) >> 0w10
)+0wxD800
,c
&& 0wx3FF
+ 0wxDC00
)
1709 fun Char2Byte c
= Word8.fromLargeWord(Chars
.toLargeWord c
)
1711 (*---------------------------------------------------------------------*)
1713 (*---------------------------------------------------------------------*)
1714 fun validCharAscii (c
: Char) = c
<0wx80
1715 fun writeCharAscii(c
,f
) =
1716 if c
<0wx80
then writeByte(f
,Char2Byte c
)
1717 else raise EncodeError(f
,ERR_ILLEGAL_CHAR(c
,"ASCII"))
1719 (*---------------------------------------------------------------------*)
1721 (*---------------------------------------------------------------------*)
1722 val latin2ebcdicTab
= Word8Vector
.fromList
1723 [0wx00
,0wx01
,0wx02
,0wx03
,0wx37
,0wx2D
,0wx2E
,0wx2F
,
1724 0wx16
,0wx05
,0wx25
,0wx0B
,0wx0C
,0wx0D
,0wx0E
,0wx0F
,
1725 0wx10
,0wx11
,0wx12
,0wx13
,0wx3C
,0wx3D
,0wx32
,0wx26
,
1726 0wx18
,0wx19
,0wx3F
,0wx27
,0wx1C
,0wx1D
,0wx1E
,0wx1F
,
1727 0wx40
,0wx4F
,0wx7F
,0wx7B
,0wx5B
,0wx6C
,0wx50
,0wx7D
,
1728 0wx4D
,0wx5D
,0wx5C
,0wx4E
,0wx6B
,0wx60
,0wx4B
,0wx61
,
1729 0wxF0
,0wxF1
,0wxF2
,0wxF3
,0wxF4
,0wxF5
,0wxF6
,0wxF7
,
1730 0wxF8
,0wxF9
,0wx7A
,0wx5E
,0wx4C
,0wx7E
,0wx6E
,0wx6F
,
1731 0wx7C
,0wxC1
,0wxC2
,0wxC3
,0wxC4
,0wxC5
,0wxC6
,0wxC7
,
1732 0wxC8
,0wxC9
,0wxD1
,0wxD2
,0wxD3
,0wxD4
,0wxD5
,0wxD6
,
1733 0wxD7
,0wxD8
,0wxD9
,0wxE2
,0wxE3
,0wxE4
,0wxE5
,0wxE6
,
1734 0wxE7
,0wxE8
,0wxE9
,0wx4A
,0wxE0
,0wx5A
,0wx5F
,0wx6D
,
1735 0wx79
,0wx81
,0wx82
,0wx83
,0wx84
,0wx85
,0wx86
,0wx87
,
1736 0wx88
,0wx89
,0wx91
,0wx92
,0wx93
,0wx94
,0wx95
,0wx96
,
1737 0wx97
,0wx98
,0wx99
,0wxA2
,0wxA3
,0wxA4
,0wxA5
,0wxA6
,
1738 0wxA7
,0wxA8
,0wxA9
,0wxC0
,0wx6A
,0wxD0
,0wxA1
,0wx07
,
1739 0wx20
,0wx21
,0wx22
,0wx23
,0wx24
,0wx15
,0wx06
,0wx17
,
1740 0wx28
,0wx29
,0wx2A
,0wx2B
,0wx2C
,0wx09
,0wx0A
,0wx1B
,
1741 0wx30
,0wx31
,0wx1A
,0wx33
,0wx34
,0wx35
,0wx36
,0wx08
,
1742 0wx38
,0wx39
,0wx3A
,0wx3B
,0wx04
,0wx14
,0wx3E
,0wxE1
,
1743 0wx41
,0wx42
,0wx43
,0wx44
,0wx45
,0wx46
,0wx47
,0wx48
,
1744 0wx49
,0wx51
,0wx52
,0wx53
,0wx54
,0wx55
,0wx56
,0wx57
,
1745 0wx58
,0wx59
,0wx62
,0wx63
,0wx64
,0wx65
,0wx66
,0wx67
,
1746 0wx68
,0wx69
,0wx70
,0wx71
,0wx72
,0wx73
,0wx74
,0wx75
,
1747 0wx76
,0wx77
,0wx78
,0wx80
,0wx8A
,0wx8B
,0wx8C
,0wx8D
,
1748 0wx8E
,0wx8F
,0wx90
,0wx9A
,0wx9B
,0wx9C
,0wx9D
,0wx9E
,
1749 0wx9F
,0wxA0
,0wxAA
,0wxAB
,0wxAC
,0wxAD
,0wxAE
,0wxAF
,
1750 0wxB0
,0wxB1
,0wxB2
,0wxB3
,0wxB4
,0wxB5
,0wxB6
,0wxB7
,
1751 0wxB8
,0wxB9
,0wxBA
,0wxBB
,0wxBC
,0wxBD
,0wxBE
,0wxBF
,
1752 0wxCA
,0wxCB
,0wxCC
,0wxCD
,0wxCE
,0wxCF
,0wxDA
,0wxDB
,
1753 0wxDC
,0wxDD
,0wxDE
,0wxDF
,0wxEA
,0wxEB
,0wxEC
,0wxED
,
1754 0wxEE
,0wxEF
,0wxFA
,0wxFB
,0wxFC
,0wxFD
,0wxFE
,0wxFF
1756 fun validCharEbcdic (c
: Char) = c
<0wx100
1757 fun writeCharEbcdic(c
,f
) =
1758 if c
<0wx100
then writeByte(f
,Word8Vector
.sub(latin2ebcdicTab
,Chars
.toInt c
))
1759 else raise EncodeError(f
,ERR_ILLEGAL_CHAR(c
,"EBCDIC"))
1761 (*---------------------------------------------------------------------*)
1763 (*---------------------------------------------------------------------*)
1764 fun validCharLatin1 (c
: Char) = c
<0wx100
1765 fun writeCharLatin1(c
,f
) =
1766 if c
<0wx100
then writeByte(f
,Char2Byte c
)
1767 else raise EncodeError(f
,ERR_ILLEGAL_CHAR(c
,"LATIN-1"))
1769 (*---------------------------------------------------------------------*)
1771 (*---------------------------------------------------------------------*)
1772 fun ucs4Bytes c
= (Char2Byte(c
>> 0w24
),
1773 Char2Byte(c
>> 0w16
),
1774 Char2Byte(c
>> 0w8
),
1776 fun writeCharUcs4 perm
=
1777 fn (c
,f
) => let val bytes
= ucs4Bytes c
1778 val (b1
,b2
,b3
,b4
) = perm bytes
1779 val f1
= writeByte(f
,b1
)
1780 val f2
= writeByte(f1
,b2
)
1781 val f3
= writeByte(f2
,b3
)
1782 val f4
= writeByte(f3
,b4
)
1786 fun permUcs4L (b1
,b2
,b3
,b4
) = (b4
,b3
,b2
,b1
)
1787 fun permUcs4SB (b1
,b2
,b3
,b4
) = (b2
,b1
,b4
,b3
)
1788 fun permUcs4SL (b1
,b2
,b3
,b4
) = (b3
,b4
,b1
,b2
)
1790 val writeCharUcs4B
= writeCharUcs4 permUcs4B
1791 val writeCharUcs4L
= writeCharUcs4 permUcs4L
1792 val writeCharUcs4SB
= writeCharUcs4 permUcs4SB
1793 val writeCharUcs4SL
= writeCharUcs4 permUcs4SL
1795 (*---------------------------------------------------------------------*)
1797 (*---------------------------------------------------------------------*)
1798 fun writeCharUtf8(c
,f
) =
1799 if c
<0wx80
then writeByte(f
,Char2Byte c
)
1801 then let val f1
= writeByte(f
,0wxC0 ||
Char2Byte(c
>> 0w6
))
1802 val f2
= writeByte(f1
,0wx80 ||
Char2Byte(c
&& 0wx3F
))
1806 then let val f1
= writeByte(f
, 0wxE0 ||
Char2Byte(c
>> 0w12
))
1807 val f2
= writeByte(f1
,0wx80 ||
Char2Byte((c
>> 0w6
) && 0wx3F
))
1808 val f3
= writeByte(f2
,0wx80 ||
Char2Byte(c
&& 0wx3F
))
1812 then let val f1
= writeByte(f
, 0wxF0 ||
Char2Byte(c
>> 0w18
))
1813 val f2
= writeByte(f1
,0wx80 ||
Char2Byte((c
>> 0w12
) && 0wx3F
))
1814 val f3
= writeByte(f2
,0wx80 ||
Char2Byte((c
>> 0w6
) && 0wx3F
))
1815 val f4
= writeByte(f3
,0wx80 ||
Char2Byte(c
&& 0wx3F
))
1818 else if c
<0wx4000000
1819 then let val f1
= writeByte(f
, 0wxF8 ||
Char2Byte(c
>> 0w24
))
1820 val f2
= writeByte(f1
,0wx80 ||
Char2Byte((c
>> 0w18
) && 0wx3F
))
1821 val f3
= writeByte(f2
,0wx80 ||
Char2Byte((c
>> 0w12
) && 0wx3F
))
1822 val f4
= writeByte(f3
,0wx80 ||
Char2Byte((c
>> 0w6
) && 0wx3F
))
1823 val f5
= writeByte(f4
,0wx80 ||
Char2Byte(c
&& 0wx3F
))
1826 else let val f1
= writeByte(f
, 0wxFC ||
Char2Byte(c
>> 0w30
))
1827 val f2
= writeByte(f1
,0wx80 ||
Char2Byte((c
>> 0w24
) && 0wx3F
))
1828 val f3
= writeByte(f2
,0wx80 ||
Char2Byte((c
>> 0w18
) && 0wx3F
))
1829 val f4
= writeByte(f3
,0wx80 ||
Char2Byte((c
>> 0w12
) && 0wx3F
))
1830 val f5
= writeByte(f4
,0wx80 ||
Char2Byte((c
>> 0w6
) && 0wx3F
))
1831 val f6
= writeByte(f5
,0wx80 ||
Char2Byte(c
&& 0wx3F
))
1835 (*---------------------------------------------------------------------*)
1837 (*---------------------------------------------------------------------*)
1838 fun oneUtf16
isL (c
,f
) =
1839 let val (b1
,b2
) = (Char2Byte(c
>> 0w8
),Char2Byte c
)
1840 in if isL
then writeByte(writeByte(f
,b2
),b1
)
1841 else writeByte(writeByte(f
,b1
),b2
)
1843 fun writeCharUtf16 isL
=
1845 if c
<0wx10000
then oneUtf16
isL (c
,f
)
1846 else let val (hi
,lo
) = splitSurrogates c
1847 val f1
= oneUtf16
isL (hi
,f
)
1848 val f2
= oneUtf16
isL (lo
,f1
)
1851 val writeCharUtf16B
= writeCharUtf16
false
1852 val writeCharUtf16L
= writeCharUtf16
true
1854 (*---------------------------------------------------------------------*)
1856 (*---------------------------------------------------------------------*)
1857 fun writeCharUcs2 isL
=
1860 then let val (b1
,b2
) = (Char2Byte(c
>> 0w8
),Char2Byte c
)
1861 in if isL
then writeByte(writeByte(f
,b2
),b1
)
1862 else writeByte(writeByte(f
,b1
),b2
)
1864 else raise EncodeError(f
,ERR_ILLEGAL_CHAR(c
,"UCS-2"))
1866 val writeCharUcs2B
= writeCharUcs2
false
1867 val writeCharUcs2L
= writeCharUcs2
true
1870 (* stop
of ../../Unicode
/Encode
/encodeMisc
.sml
*)
1871 (* start
of ../../Unicode
/Encode
/encode
.sml
*)
1889 val encNoFile
: EncFile
1890 val encStdOut
: EncFile
1891 val encOpenFile
: string * Encoding
.Encoding
* string -> EncFile
1892 val encCloseFile
: EncFile
-> unit
1893 val encAdapt
: EncFile
* File
-> EncFile
1895 val encPutChar
: EncFile
* UniChar
.Char -> EncFile
1896 val encValidChar
: EncFile
* UniChar
.Char -> bool
1899 structure Encode
: Encode
=
1903 EncodeBasic EncodeError EncodeMisc
1905 type EncFile
= Encoding
* File
1907 val encNoFile
= (NOENC
,stdOutFile
)
1908 val encStdOut
= (LATIN1
,stdOutFile
)
1910 fun encAdapt((enc
,_
),f
) = (enc
,f
)
1912 fun encValidChar((enc
,_
),c
) =
1914 of ASCII
=> validCharAscii c
1915 | EBCDIC
=> validCharEbcdic c
1916 | LATIN1
=> validCharLatin1 c
1919 fun encPutChar((enc
,f
),c
) =
1923 | ASCII
=> (writeCharAscii(c
,f
))
1924 | EBCDIC
=> (writeCharEbcdic(c
,f
))
1925 | LATIN1
=> (writeCharLatin1(c
,f
))
1926 | UCS2B
=> (writeCharUcs2B(c
,f
))
1927 | UCS2L
=> (writeCharUcs2L(c
,f
))
1928 | UCS4B
=> (writeCharUcs4B(c
,f
))
1929 | UCS4L
=> (writeCharUcs4L(c
,f
))
1930 | UCS4SB
=> (writeCharUcs4SB(c
,f
))
1931 | UCS4SL
=> (writeCharUcs4SL(c
,f
))
1932 | UTF8
=> (writeCharUtf8(c
,f
))
1933 | UTF16B
=> (writeCharUtf16B(c
,f
))
1934 | UTF16L
=> (writeCharUtf16L(c
,f
))
1938 fun encCloseFile(_
,f
) = closeFile f
1940 fun encOpenFile (fname
,enc
,name
) =
1945 (case isEncoding name
1946 of NOENC
=> raise NoSuchFile(fname
,"Unsupported encoding \""^name^
"\"")
1949 val f
= openFile fname
1950 val f1
= case outEnc
1951 of UTF16B
=> writeByte(writeByte(f
,0wxFE
),0wxFF
)
1952 | UTF16L
=> writeByte(writeByte(f
,0wxFF
),0wxFE
)
1958 (* stop
of ../../Unicode
/Encode
/encode
.sml
*)
1959 (* start
of nullHard
.sml
*)
1982 structure NullHard
=
1984 fun parseNull uri
= NullParse
.parseDocument uri NONE NullHooks
.nullStart
1987 NullCatOptions NullOptions Options NullParserOptions Uri
1989 val usage
= List.concat
[parserUsage
,[("","")],catalogUsage
,[("","")],nullUsage
]
1991 exception Exit
of OS
.Process
.status
1993 fun null(prog
,args
) =
1996 val hadError
= ref
false
1999 let val _
= TextIO.output(TextIO.stdErr
,msg^
".\n")
2003 let val _
= TextIO.output(TextIO.stdErr
,msg^
".\n")
2004 in raise Exit OS
.Process
.failure
2007 let val _
= printUsage
TextIO.stdOut prog usage
2008 in raise Exit OS
.Process
.success
2010 fun exitVersion prog
=
2011 let val _
= app print
[prog
," version ",Version
.FXP_VERSION
,"\n"]
2012 in raise Exit OS
.Process
.success
2015 fun summOpt prog
= "For a summary of options type "^prog^
" --help"
2016 fun noFile(f
,cause
) = "can't open file '"^f^
"': "^exnMessage cause
2018 val opts
= parseOptions args
2019 val _
= setParserDefaults()
2020 val opts1
= setParserOptions (opts
,optError
)
2021 val _
= setCatalogDefaults()
2022 val opts2
= setCatalogOptions (opts1
,optError
)
2023 val _
= setNullDefaults()
2024 val (vers
,help
,err
,file
) = setNullOptions (opts2
,optError
)
2025 val _
= if !hadError
then exitError (summOpt prog
) else ()
2026 val _
= if vers
then exitVersion prog
else ()
2027 val _
= if help
then exitHelp prog
else ()
2029 of SOME
"-" => O_ERROR_DEVICE
:= TextIO.stdErr
2030 | SOME f
=> (O_ERROR_DEVICE
:= TextIO.openOut f
2031 handle IO
.Io
{cause
,...} => exitError(noFile(f
,cause
)))
2033 val f
= valOf file
handle Option
=> "-"
2034 val uri
= if f
="-" then NONE
else SOME(String2Uri f
)
2035 val status
= parseNull uri
2036 val _
= if isSome err
then TextIO.closeOut (!O_ERROR_DEVICE
) else ()
2039 handle Exit status
=> status
2041 let val _
= TextIO.output
2042 (TextIO.stdErr
,prog^
": Unexpected exception: "^exnMessage exn^
".\n")
2043 in OS
.Process
.failure
2047 structure NullHard
= struct end
2048 (* stop
of nullHard
.sml
*)
2049 (* start
of ../../Util
/options
.sml
*)
2053 OPT_LONG
of string * string option
2054 | OPT_SHORT
of char list
2055 | OPT_NEG
of char list
2057 | OPT_STRING
of string
2058 val parseOptions
: string list
-> Option list
2060 datatype UsageItem
=
2063 | U_ITEM
of string list
* string
2064 type Usage
= UsageItem list
2065 val printUsage
: TextIO.outstream
-> string -> Usage
-> unit
2068 structure Options
: Options
=
2070 exception BadOption
of string
2073 OPT_LONG
of string * string option
2074 | OPT_SHORT
of char list
2075 | OPT_NEG
of char list
2077 | OPT_STRING
of string
2079 datatype UsageItem
=
2082 | U_ITEM
of string list
* string
2083 type Usage
= UsageItem list
2085 fun parseOptions ss
=
2088 if String.isPrefix
"--" opt
2089 then let val opt1
= Substring
.extract(opt
,2,NONE
)
2090 val (key0
,opt2
) = Substring
.splitl (fn c
=> #
"="<>c
) opt1
2091 val key
= if Substring
.isEmpty key0
then raise BadOption opt
2092 else Substring
.string key0
2093 val valOpt
= if Substring
.isPrefix
"=" opt2
2094 then let val val0
= Substring
.triml
1 opt2
2095 in if Substring
.isEmpty val0
2096 then raise BadOption opt
2097 else SOME(Substring
.string val0
)
2100 in OPT_LONG(key
,valOpt
)
2102 handle BadOption s
=> if opt
="--" then OPT_NOOPT
else OPT_STRING opt
2103 else if String.isPrefix
"-" opt
2104 then let val chars
= tl(String.explode opt
)
2105 (* val _
= app (fn c
=> if Char.isAlphaNum c
then ()
2106 else raise BadOption opt
) chars
*)
2108 of nil
=> OPT_STRING opt
2109 | #
"n"::(cs
as _
::_
) => OPT_NEG cs
2110 | _
=> OPT_SHORT chars
2112 handle BadOption s
=> OPT_STRING opt
2116 |
doAll (s
::ss
) = let val opt
= doOne s
2118 of OPT_NOOPT
=> opt
::map OPT_STRING ss
2119 | _
=> opt
::doAll ss
2124 fun printUsage stream prog usage
=
2128 val EMPTY_KEY
= UtilString
.nBlanks KEY_WIDTH
2129 fun appendKeys col nil
= if col
>KEY_WIDTH
then "\n"^EMPTY_KEY
2130 else UtilString
.nBlanks (KEY_WIDTH
-col
)
2131 | appendKeys col
[key
] = key^
" "^
appendKeys (col
+1+String.size key
) nil
2132 | appendKeys
col (key
::keys
) = let val col1
= col
+2+String.size key
2133 in if col1
>KEY_WIDTH
2134 then key^
",\n"^appendKeys
0 keys
2135 else key^
", "^appendKeys col1 keys
2137 fun makeKey keylist
= appendKeys
0 keylist
2138 val makeText
= UtilString
.breakLines(LINE_WIDTH
-KEY_WIDTH
)
2139 fun format (keylist
,text
) =
2140 let val key
= makeKey keylist
2141 in case makeText text
2143 | line
::lines
=> key^line
::map (fn line
=> EMPTY_KEY^line
) lines
2145 val _
= app (fn x
=> TextIO.output(stream
,x
))
2146 ["Usage: ",prog
," [option ...] file\n","where option is one of:\n\n"]
2147 val _
= app (fn item
=> app (fn x
=> TextIO.output(stream
,x^
"\n"))
2150 | U_TITLE txt
=> ["",txt
]
2151 | U_ITEM option
=> format option
)) usage
2155 (* stop
of ../../Util
/options
.sml
*)
2156 (* start
of ../../config
.sml
*)
2159 (*---------------------------------------------------------------------*)
2160 (* The OS command for retrieving a URI from the internet
and storing
*)
2161 (* it
in a
local file
, where *)
2162 (* %1 is replaced by the URI
. *)
2163 (* %2 is replaced by the
local filename
. *)
2164 (* It is recommended that the command exits
with failure
in case the
*)
2165 (* URI cannot be retrieved
. If the command generates a HTML error
*)
2166 (* message
instead (like
, e
.g
., lynx
), this HTML file is considered
*)
2167 (* to be XML
and will probably cause a mess
of parsing errors
. If you
*)
2168 (* don
't need URI retrieval
, use
"exit 1" which always fails
. *)
2169 (* Sensible values are
, e
.g
.: *)
2170 (* val retrieveCommand
= "wget -qO %2 %1" *)
2171 (* val retrieveCommand
= "got_it -o %2 %1" *)
2172 (* val retrieveCommand
= "urlget -s -o %2 %1" *)
2173 (*---------------------------------------------------------------------*)
2174 val retrieveCommand
= "wget -qO %2 %1"
2176 (* stop
of ../../config
.sml
*)
2177 (* start
of ../../Unicode
/Chars
/charClasses
.sml
*)
2178 (*--------------------------------------------------------------------------*)
2179 (* Structure
: CharClasses
*)
2182 (* This implementation uses the UNSAFE array operations
, and does NO
*)
2183 (* range checks
. This is for efficiency reasons
. *)
2184 (* If class
=makeCharClass(lo
,hi
) then a filed
of size hi
-lo
+1 is allo
- *)
2185 (* cated
. In order to lookup a character
, first make sure it
in [lo
..hi
], *)
2186 (* then subtract lo
before calling inCharClass
! *)
2187 (* The same holds for addChar
. *)
2193 (* Exceptions raised by functions
in this
structure: *)
2194 (* addChar
: none
*)
2195 (* addCharClass
: none
*)
2196 (* inCharClass
: none
*)
2197 (* makeCharClass
: none
*)
2198 (*--------------------------------------------------------------------------*)
2199 signature CharClasses
=
2203 type CharInterval
= UniChar
.Char * UniChar
.Char
2204 type CharRange
= CharInterval list
2206 val initialize
: CharInterval
-> MutableClass
2207 val finalize
: MutableClass
-> CharClass
2209 val addChar
: MutableClass
* UniChar
.Char * UniChar
.Char * UniChar
.Char -> unit
2210 val addCharRange
: MutableClass
* UniChar
.Char * UniChar
.Char * CharRange
-> CharRange
2212 val inCharClass
: UniChar
.Char * CharClass
-> bool
2215 structure CharClasses
: CharClasses
=
2219 type CharInterval
= Char * Char
2220 type CharRange
= CharInterval list
2222 val Char2Word
= Word.fromLargeWord
o Chars
.toLargeWord
2224 (*--------------------------------------------------------------------*)
2226 (*--------------------------------------------------------------------*)
2231 val op >> = Chars
.>>
2233 val op ||
= Chars
.orb
2234 val op && = Chars
.andb
2235 val op >>> = Word32
.>>
2236 val op <<< = Word32
.<<
2237 val op &&& = Word32
.andb
2238 val op |||
= Word32
.orb
2239 val op & = Word.andb
2241 val max32
= Word32
.notb
0wx0
2243 (*--------------------------------------------------------------------*)
2244 (* a char class is an array
of words
, interpreted
as bitvectors
. *)
2245 (*--------------------------------------------------------------------*)
2246 type MutableClass
= Word32
.word array
2247 type CharClass
= Word32
.word vector
2249 (*--------------------------------------------------------------------*)
2250 (* each
word in a char class holds
32 entries
. Thus the for a char c
*)
2251 (* is c
div 32 == c
>> 5. The bitmask is a
word of zeros
, only the
*)
2252 (* significant bit for c
, i
.e
. the (c
&& 31==0x1F)th bit set to one
. *)
2253 (*--------------------------------------------------------------------*)
2254 fun indexMask c
= let val idx
= Chars
.toInt(c
>>0w5
)
2255 val mask
= 0wx1
<<< Char2Word c
& 0w31
2259 (*--------------------------------------------------------------------*)
2260 (* generate index
and mask
, then lookup
. *)
2261 (*--------------------------------------------------------------------*)
2262 fun inCharClass(c
,vec
) = let val (idx
,mask
) = indexMask c
2263 in mask
&&& Vector.sub(vec
,idx
) <> 0wx0
2266 (*--------------------------------------------------------------------*)
2267 (* generate a CharClass large enough to
hold (max
-min
+1) characters
. *)
2268 (*--------------------------------------------------------------------*)
2269 fun initialize(min
,max
) =
2270 Array
.array((Chars
.toInt max
-Chars
.toInt min
+1) div 32+1,0wx0
):MutableClass
2271 fun finalize arr
= Array
.extract(arr
,0,NONE
)
2273 (*--------------------------------------------------------------------*)
2274 (* add a single character to a CharClass
. *)
2275 (*--------------------------------------------------------------------*)
2276 fun addChar(cls
,min
,max
,c
) =
2278 val (idx
,new
) = indexMask c
2279 val old
= Array
.sub(cls
,idx
)
2281 Array
.update(cls
,idx
,old|||new
)
2284 (*--------------------------------------------------------------------*)
2285 (* add a full range
of characters to a CharClass
. *)
2286 (* this is the only function that computes the offset
before access
*)
2288 (*--------------------------------------------------------------------*)
2289 fun addCharRange(cls
,min
,max
,range
) =
2293 val (l
,h
) = (lo
-min
,hi
-min
)
2294 val (idxL
,idxH
) = ((Chars
.toInt l
) div 32,(Chars
.toInt h
) div 32)
2295 val (bitL
,bitH
) = (Char2Word l
& 0w31
,Char2Word h
& 0w31
)
2299 val new
= (max32
>>>(0w31
-bitH
+bitL
))<<<bitL
2300 val old
= Array
.sub(cls
,idxL
)
2301 val _
= Array
.update(cls
,idxL
,old|||new
)
2304 else if idxL
<idxH
then
2306 val newL
= max32
<<<bitL
2307 val newH
= max32
>>>(0w31
-bitH
)
2308 val oldL
= Array
.sub(cls
,idxL
)
2309 val oldH
= Array
.sub(cls
,idxH
)
2310 val _
= Array
.update(cls
,idxL
,oldL|||newL
)
2311 val _
= Array
.update(cls
,idxH
,oldH|||newH
)
2312 val _
= UtilInt
.appInterval (fn i
=> Array
.update(cls
,i
,max32
))
2319 |
doAll ((lh
as (lo
,hi
))::lhs
) =
2320 if hi
<lo
then doAll lhs
2321 else if hi
<min
then doAll lhs
2322 else if lo
>max
then lh
::doAll lhs
2323 else if lo
<min
andalso hi
<=max
2324 then (doOne(min
,hi
); doAll lhs
)
2325 else if lo
>=min
andalso hi
<=max
2326 then (doOne lh
; doAll lhs
)
2327 else if lo
>=min
andalso hi
>max
2328 then (doOne(lo
,max
); (max
+0w1
,hi
)::lhs
)
2329 else (doOne(min
,max
); (max
+0w1
,hi
)::lhs
)
2336 (* stop
of ../../Unicode
/Chars
/charClasses
.sml
*)
2337 (* start
of ../../Unicode
/Chars
/uniRanges
.sml
*)
2342 structure UniRanges
=
2344 val digitRange
= [(0wx0030
,0wx0039
),
2359 ] : CharClasses
.CharRange
2361 val digitRange09
= [(0wx0030
,0wx0039
),
2367 ] : CharClasses
.CharRange
2369 val digitRange6F
= [(0wx0966
,0wx096F
),
2378 ] : CharClasses
.CharRange
2380 val baseRange
= [(0wx0041
,0wx005A
),
2582 ] : CharClasses
.CharRange
2584 val ideoRange
= [(0wx3007
,0wx3007
),
2587 ] : CharClasses
.CharRange
2589 val combRange
= [(0wx0300
,0wx0345
),
2684 ] : CharClasses
.CharRange
2686 val extRange
= [(0wx00B7
,0wx00B7
),
2697 ] : CharClasses
.CharRange
2699 val nmsRange
= List.concat
2700 [[(0wx3A
,0wx3A
),(0wx5F
,0wx5F
)](* :_
*),
2704 val nameRange
= List.concat
2705 [[(0wx2D
,0wx2D
),(0wx2E
,0wx2E
)](* -. *),
2711 val pubidRange
= List.concat
2712 [map (fn c
=> (c
,c
)) [0wx0A
,0wx0D
,0wx20
], (* space
,cr
,lf
*)
2713 map (fn c
=> (c
,c
)) (UniChar
.String2Data
"-'()+,./:=?;!*#@$_%"),
2714 [(0wx0030
,0wx0039
),(0wx0041
,0wx005A
),(0wx0061
,0wx007A
)] (* [0-9A
-Za
-z
] *)
2715 ] : CharClasses
.CharRange
2718 [(0wx002D
,0wx002E
), (* -. *)
2719 (0wx0030
,0wx0039
), (* 0-9 *)
2720 (0wx0041
,0wx005A
), (* A
-Z
*)
2721 (0wx005F
,0wx005F
), (* _
*)
2722 (0wx0061
,0wx007A
) (* a
-z
*)
2723 ] : CharClasses
.CharRange
2725 (* stop
of ../../Unicode
/Chars
/uniRanges
.sml
*)
2726 (* start
of ../../Unicode
/Chars
/uniClasses
.sml
*)
2731 (*--------------------------------------------------------------------------*)
2732 (* Structure
: UniClasses
*)
2735 (* read CharClasses
in order to understand how CharClasses are handled
. *)
2741 (* Exceptions raised by functions
in this
structure: *)
2742 (* decValue
: none
*)
2743 (* hexValue
: none
*)
2744 (* isAsciiLetter
: none
*)
2749 (* isPubid
: none
*)
2752 (* isUnicode
: none
*)
2754 (*--------------------------------------------------------------------------*)
2755 signature UniClasses
=
2757 val isName
: UniChar
.Char -> bool
2758 val isNms
: UniChar
.Char -> bool
2759 val isPubid
: UniChar
.Char -> bool
2760 val isS
: UniChar
.Char -> bool
2761 val isEnc
: UniChar
.Char -> bool
2762 val isEncS
: UniChar
.Char -> bool
2763 val isVers
: UniChar
.Char -> bool
2764 val isDec
: UniChar
.Char -> bool
2765 val isHex
: UniChar
.Char -> bool
2766 val isXml
: UniChar
.Char -> bool
2767 val isUnicode
: UniChar
.Char -> bool
2769 val decValue
: UniChar
.Char -> UniChar
.Char option
2770 val hexValue
: UniChar
.Char -> UniChar
.Char option
2772 val isAsciiLetter
: UniChar
.Char -> bool
2775 structure UniClasses
: UniClasses
=
2777 open UniChar CharClasses UniRanges
2779 (*--------------------------------------------------------------------*)
2780 (* initialize the character classes
. *)
2781 (*--------------------------------------------------------------------*)
2783 val nmsTemp
= initialize(0wx0000
,0wx3FFF
)
2784 val restNms
= addCharRange(nmsTemp
,0wx0000
,0wx3FFF
,nmsRange
)
2785 val _
= if restNms
=[(0wxAC00
,0wxD7A3
),(0wx4E00
,0wx9FA5
)] then ()
2786 else print ("Warning: extra characters after computing nms char class.\n")
2788 val nameTemp
= initialize(0wx0000
,0wxFFFF
)
2789 val restName
= addCharRange(nameTemp
,0wx0000
,0wx3FFF
,nameRange
)
2790 val _
= if restName
=[(0wxAC00
,0wxD7A3
),(0wx4E00
,0wx9FA5
)] then ()
2791 else print ("Warning: extra characters after computing name char class.\n")
2793 val pubTemp
= initialize(0wx0000
,0wx007F
)
2794 val restPubid
= addCharRange(pubTemp
,0wx0000
,0wx007F
,pubidRange
)
2795 val _
= if restPubid
=nil
then ()
2796 else print ("Warning: extra characters after computing pubid char class.\n")
2798 val encTemp
= initialize(0wx0000
,0wx007F
)
2799 val restEnc
= addCharRange(encTemp
,0wx0000
,0wx007F
,encRange
)
2800 val _
= if restEnc
=nil
then ()
2801 else print ("Warning: extra characters after computing enc char class.\n")
2803 val nmsClass
= finalize nmsTemp
2804 val nameClass
= finalize nameTemp
2805 val pubClass
= finalize pubTemp
2806 val encClass
= finalize encTemp
2809 (*--------------------------------------------------------------------*)
2810 (* is a character a name start char?
*)
2811 (*--------------------------------------------------------------------*)
2812 fun isNms c
= if c
<0wx4000
then inCharClass(c
,nmsClass
)
2814 c
>=0wx4E00
andalso c
<=0wx9FA5
orelse
2815 c
>=0wxAC00
andalso c
<=0wxD7A3
2817 (*--------------------------------------------------------------------*)
2818 (* is a character a name char?
*)
2819 (*--------------------------------------------------------------------*)
2820 fun isName c
= if c
<0wx4000
then inCharClass(c
,nameClass
)
2822 c
>=0wx4E00
andalso c
<=0wx9FA5
orelse
2823 c
>=0wxAC00
andalso c
<=0wxD7A3
2825 (*--------------------------------------------------------------------*)
2826 (* is a character a pubid char?
*)
2827 (*--------------------------------------------------------------------*)
2828 fun isPubid c
= c
<0wx80
andalso inCharClass(c
,pubClass
)
2830 (*--------------------------------------------------------------------*)
2831 (* is a character valid
in an encoding name
, at its start
, or
in a
*)
2832 (* version number?
*)
2833 (*--------------------------------------------------------------------*)
2835 c
<0wx80
andalso inCharClass(c
,encClass
)
2836 fun isEncS (c
:UniChar
.Char) =
2837 c
>=0wx41
andalso c
<=0wx5A
orelse
2838 c
>=0wx61
andalso c
<=0wx7A
2840 isEnc c
orelse c
=0wx3A (* #
":" *)
2842 (*--------------------------------------------------------------------*)
2843 (* these are the valid Unicode
characters (including surrogates
). *)
2844 (*--------------------------------------------------------------------*)
2845 fun isUnicode (c
:UniChar
.Char) = c
<=0wx10FFFF
2847 (*--------------------------------------------------------------------*)
2848 (* XML characters
if not checked for Unicode char
in advance
. *)
2849 (*--------------------------------------------------------------------*)
2850 fun isXml (c
:UniChar
.Char) =
2851 c
>=0wx0020
andalso c
<=0wxD7FF
orelse
2852 c
>=0wxE000
andalso c
<=0wxFFFD
orelse
2853 c
>=0wx10000
andalso c
<=0wx10FFFF
orelse
2854 c
=0wx9
orelse c
=0wxA
orelse c
=0wxD
2856 (*--------------------------------------------------------------------*)
2857 (* the frontend supresses
0wxD (carriage return
), but its is still
*)
2858 (* present when encoding is recognized
. *)
2859 (*--------------------------------------------------------------------*)
2860 fun isS (c
:UniChar
.Char) =
2868 (*--------------------------------------------------------------------*)
2869 (* is this character an ascii decimal
/hexadecimal digit?
*)
2870 (*--------------------------------------------------------------------*)
2871 fun isDec (c
:UniChar
.Char) =
2872 c
>=0wx30
andalso c
<=0wx39
2873 fun isHex (c
:UniChar
.Char) =
2874 c
>=0wx30
andalso c
<=0wx39
orelse
2875 c
>=0wx41
andalso c
<=0wx46
orelse
2876 c
>=0wx61
andalso c
<=0wx66
2878 (*--------------------------------------------------------------------*)
2879 (* calculate the decimal
/hexadecimal value
of an
ascii (hex
-)digit
. *)
2880 (*--------------------------------------------------------------------*)
2881 fun decValue (c
:UniChar
.Char) =
2883 in if v
<=0wx9
then SOME v
else NONE
2885 fun hexValue (c
:UniChar
.Char) =
2887 in if v
<=0wx9
then SOME v
2888 else (if c
>=0wx41
andalso c
<=0wx46
then SOME(c
-0wx37
)
2889 else if c
>=0wx61
andalso c
<=0wx66
then SOME(c
-0wx57
)
2893 (*--------------------------------------------------------------------*)
2894 (* is c
in [a
-z
]+[A
-Z
]?
*)
2895 (*--------------------------------------------------------------------*)
2896 fun isAsciiLetter (c
:UniChar
.Char) =
2897 c
>=0wx41
andalso c
<=0wx5A
orelse
2898 c
>=0wx61
andalso c
<=0wx7A
2900 (* stop
of ../../Unicode
/Chars
/uniClasses
.sml
*)
2901 (* start
of ../../Unicode
/Uri
/uriDecode
.sml
*)
2902 signature UriDecode
=
2904 val decodeUriLatin
: string -> string
2905 val decodeUriUtf8
: string -> string
2908 structure UriDecode
: UriDecode
=
2910 open UniChar UtilInt
2916 val op && = Word8.andb
2917 val op << = Word8.<<
2918 val op <<< = Chars
.<<
2919 val op ||
= Word8.orb
2920 val op |||
= Chars
.orb
2922 val Byte2Char
= Chars
.fromLargeWord
o Word8.toLargeWord
2925 if #
"0"<=c
andalso #
"9">=c
then SOME (Byte
.charToByte c
-0wx30
)
2926 else if #
"A"<=c
andalso #
"F">=c
then SOME (Byte
.charToByte c
-0wx37
)
2927 else if #
"a"<=c
andalso #
"f">=c
then SOME (Byte
.charToByte c
-0wx57
)
2930 exception Failed
of char list
2934 of c1
::c2
::cs1
=> (case (hexValue c1
,hexValue c2
)
2935 of (SOME b1
,SOME b2
) => ((b1
<< 0w4 || b2
),cs1
)
2936 | _
=> raise Failed cs1
)
2937 | _
=> raise Failed nil
2939 (*--------------------------------------------------------------------*)
2941 (*--------------------------------------------------------------------*)
2942 val byte1switch
= Array
.array(256,1) (* 1 byte
*)
2943 val _
= appInterval (fn i
=> Array
.update(byte1switch
,i
,0)) (0x80,0xBF) (* Error
*)
2944 val _
= appInterval (fn i
=> Array
.update(byte1switch
,i
,2)) (0xC0,0xDF) (* 2 bytes
*)
2945 val _
= appInterval (fn i
=> Array
.update(byte1switch
,i
,3)) (0xE0,0xEF) (* 3 bytes
*)
2946 val _
= appInterval (fn i
=> Array
.update(byte1switch
,i
,4)) (0xF0,0xF7) (* 4 bytes
*)
2947 val _
= appInterval (fn i
=> Array
.update(byte1switch
,i
,5)) (0xF8,0xFB) (* 5 bytes
*)
2948 val _
= appInterval (fn i
=> Array
.update(byte1switch
,i
,6)) (0xFC,0xFD) (* 6 bytes
*)
2950 val diff2
= 0wx00003080
2951 val diff3
= diff2
<<< 0wx6 |||
0wx00020080
2952 val diff4
= diff3
<<< 0wx6 |||
0wx00400080
2953 val diff5
= diff4
<<< 0wx6 |||
0wx08000080
2954 val diff6
= diff5
<<< 0wx6 |||
0wx00000080
2955 val diffsByLen
= Vector.fromList
[0w0
,0w0
,diff2
,diff3
,diff4
,diff5
,diff6
]
2959 of #
"%"::cs1
=> getQuads cs1
2960 | c
::cs1
=> (Byte
.charToByte c
,cs1
)
2961 | nil
=> raise Failed nil
2963 fun getBytes(b
,cs
,n
) =
2966 if n
<m
then raise Failed cs
2967 else let val (_
,cs1
) = getByte cs
2972 else let val (b
,cs1
) = getByte cs
handle Failed cs
=> do_err(cs
,m
+1)
2973 val w1
= if b
&& 0wxC0
= 0wx80
then w
<<< 0w6
+ Byte2Char b
2974 else do_err(cs1
,m
+1)
2975 in doit (w1
,cs1
,m
+1)
2977 val (w
,cs1
) = doit (Byte2Char b
,cs
,2)
2978 val diff
= Vector.sub(diffsByLen
,n
)
2981 if c
<0wx100
then (Char2char c
,cs1
)
2982 else raise Failed cs1
2985 fun getCharUtf8 cs
=
2986 let val (b
,cs1
) = getQuads cs
2987 in case Array
.sub(byte1switch
,Word8.toInt b
)
2988 of 0 (* error
*) => raise Failed cs1
2989 |
1 (* 1 byte
*) => (Byte
.byteToChar b
,cs1
)
2990 |
n (* n bytes
*) => getBytes(b
,cs1
,n
)
2993 fun decodeUriUtf8 str
=
2995 val cs
= String.explode str
2997 fun doit yet nil
= yet
2998 | doit
yet (c
::cs
) =
2999 if #
"%"<>c
then doit (c
::yet
) cs
3000 else let val (yet1
,cs1
) = let val (ch
,cs1
) = getCharUtf8 cs
3003 handle Failed cs
=> (yet
,cs
)
3007 String.implode(rev(doit nil cs
))
3010 (*--------------------------------------------------------------------*)
3012 (*--------------------------------------------------------------------*)
3015 of #
"%"::cs1
=> let val (b
,cs2
) = getQuads cs1
3016 in (Byte
.byteToChar b
,cs2
)
3019 | nil
=> raise Failed nil
3021 fun decodeUriLatin str
=
3023 val cs
= String.explode str
3025 fun doit yet nil
= yet
3026 | doit
yet (c
::cs
) =
3027 let val (yet1
,cs1
) = let val (ch
,cs1
) = getChar cs
3030 handle Failed cs
=> (yet
,cs
)
3034 String.implode(rev(doit nil cs
))
3037 (* stop
of ../../Unicode
/Uri
/uriDecode
.sml
*)
3038 (* start
of ../../Unicode
/Uri
/uriEncode
.sml
*)
3039 signature UriEncode
=
3041 val Data2UriUtf8
: UniChar
.Data
-> string
3042 val Data2UriLatin
: UniChar
.Data
-> string
3044 val Vector2UriUtf8
: UniChar
.Vector -> string
3045 val Vector2UriLatin
: UniChar
.Vector -> string
3047 val String2UriUtf8
: string -> string
3048 val String2UriLatin
: string -> string
3051 structure UriEncode
: UriEncode
=
3054 open UniChar UniClasses
3060 val op && = Word8.andb
3061 val op &&& = Chars
.andb
3062 val op >> = Word8.>>
3063 val op >>> = Chars
.>>
3064 val op ||
= Word8.orb
3066 val Char2Byte
= Word8.fromLargeWord
o Chars
.toLargeWord
3068 fun encodeCharUtf8 c
=
3069 if c
<0wx80
then [Char2Byte c
]
3071 then [0wxC0 ||
Char2Byte(c
>>> 0w6
),
3072 0wx80 ||
Char2Byte(c
&&& 0wx3F
)]
3074 then [0wxE0 ||
Char2Byte(c
>>> 0w12
),
3075 0wx80 ||
Char2Byte((c
>>> 0w6
) &&& 0wx3F
),
3076 0wx80 ||
Char2Byte(c
&&& 0wx3F
)]
3078 then [0wxF0 ||
Char2Byte(c
>>> 0w18
),
3079 0wx80 ||
Char2Byte((c
>>> 0w12
) &&& 0wx3F
),
3080 0wx80 ||
Char2Byte((c
>>> 0w6
) &&& 0wx3F
),
3081 0wx80 ||
Char2Byte(c
&&& 0wx3F
)]
3082 else if c
<0wx4000000
3083 then [0wxF8 ||
Char2Byte(c
>>> 0w24
),
3084 0wx80 ||
Char2Byte((c
>>> 0w18
) &&& 0wx3F
),
3085 0wx80 ||
Char2Byte((c
>>> 0w12
) &&& 0wx3F
),
3086 0wx80 ||
Char2Byte((c
>>> 0w6
) &&& 0wx3F
),
3087 0wx80 ||
Char2Byte(c
&&& 0wx3F
)]
3088 else [0wxFC ||
Char2Byte(c
>>> 0w30
),
3089 0wx80 ||
Char2Byte((c
>>> 0w24
) &&& 0wx3F
),
3090 0wx80 ||
Char2Byte((c
>>> 0w18
) &&& 0wx3F
),
3091 0wx80 ||
Char2Byte((c
>>> 0w12
) &&& 0wx3F
),
3092 0wx80 ||
Char2Byte((c
>>> 0w6
) &&& 0wx3F
),
3093 0wx80 ||
Char2Byte(c
&&& 0wx3F
)]
3096 let fun Quad2C b
= if b
<0wxA
then Byte
.byteToChar(b
+0wx30
) else Byte
.byteToChar(b
+0wx37
)
3097 in (Quad2C(b
>> 0w4
),Quad2C(b
&& 0wx0F
))
3100 fun precedesHex (i
,cv
) =
3101 if Vector.length cv
<= i
+2 then false
3102 else let val (c1
,c2
) = (Vector.sub(cv
,i
+1),Vector.sub(cv
,i
+2))
3103 in isHex c1
andalso isHex c2
3106 fun Vector2UriUtf8 cv
=
3107 let val revd
= Vector.foldli
3108 (fn (i
,c
,s
) => if c
<0wx80
andalso (c
<>0wx25
orelse precedesHex(i
,cv
))
3110 else foldl (fn (b
,s
) => let val (c1
,c2
) = Byte2Cc b
3113 s (encodeCharUtf8 c
))
3115 in String.implode (rev revd
)
3118 fun Vector2UriLatin cv
=
3119 let val revd
= Vector.foldli
3120 (fn (i
,c
,s
) => if c
<0wx80
andalso (c
<>0wx25
orelse precedesHex(i
,cv
))
3122 else (if c
>= 0w100
then s
3123 else let val (c1
,c2
) = Byte2Cc (Char2Byte c
)
3127 in String.implode (rev revd
)
3130 val Data2UriUtf8
= Vector2UriUtf8
o Data2Vector
3131 val Data2UriLatin
= Vector2UriLatin
o Data2Vector
3133 val String2UriUtf8
= Vector2UriUtf8
o String2Vector
3134 val String2UriLatin
= Vector2UriLatin
o String2Vector
3137 (* stop
of ../../Unicode
/Uri
/uriEncode
.sml
*)
3138 (* start
of ../../Unicode
/Uri
/uri
.sml
*)
3140 require
"basis.__array";
3141 require
"basis.__byte";
3142 require
"basis.__string";
3143 require
"basis.__vector";
3144 require
"basis.__word";
3145 require
"basis.__word8";
3147 require
"util.unsafe";
3148 require
"util.utilInt";
3159 val hashUri
: Uri
-> word
3160 val compareUri
: Uri
* Uri
-> order
3162 val uriJoin
: Uri
* Uri
-> Uri
3163 val uriSuffix
: Uri
-> string
3165 val Data2Uri
: UniChar
.Data
-> Uri
3166 val Vector2Uri
: UniChar
.Vector -> Uri
3167 val String2Uri
: string -> Uri
3168 val Uri2String
: Uri
-> string
3170 val retrieveUri
: Uri
-> string * string * bool
3173 structure Uri
:> Uri
=
3175 open UniChar UniClasses UriDecode UriEncode UtilError UtilInt
3177 (*--------------------------------------------------------------------*)
3179 (*--------------------------------------------------------------------*)
3184 val Vector2Uri
= Vector2UriUtf8
3185 val Data2Uri
= Data2UriUtf8
3186 val String2Uri
= String2UriUtf8
3187 val Uri2String
= decodeUriUtf8
3192 let fun search i
= if i
<0 then NONE
else case String.sub(s
,i
)
3196 in case search (String.size s
-1)
3198 | SOME i
=> String.extract(s
,i
+1,NONE
)
3202 Char.isAlphaNum c
orelse #
"+"=c
orelse #
"-"=c
orelse #
"."=c
3204 fun uriAbsolute uri
=
3206 if i
>=String.size uri
then false
3207 else let val c
=String.sub(uri
,i
)
3208 in if #
":"=c
then true else if isScheme c
then search (i
+1)
3212 if uri
="" then false
3213 else if Char.isAlpha (String.sub(uri
,0)) then search
1
3216 fun uriRelative uri
= not (uriAbsolute uri
)
3219 if String.isPrefix
"file:" uri
3220 then SOME(String.extract(uri
,5,NONE
))
3221 else if uriRelative uri
then SOME uri
3226 fun search (i
,hadSlash
) =
3227 if i
<0 then if hadSlash
then SOME
0 else NONE
3228 else case String.sub(s
,i
)
3229 of #
"/" => if hadSlash
then NONE
else search(i
-1,true)
3230 | _
=> if hadSlash
then SOME(i
+1) else search(i
-1,false)
3231 val len
= String.size s
3232 val posOpt
= search(len
-1,false)
3235 | SOME i
=> if i
=0 then slash
3236 else String.extract(s
,0,SOME(i
+1))
3241 fun searchScheme i
=
3242 if i
>=String.size uri
then NONE
3243 else let val c
=String.sub(uri
,i
)
3244 in if #
":"=c
then SOME i
else if isScheme c
then searchScheme (i
+1)
3248 if i
>=String.size uri
then NONE
3249 else let val c
=String.sub(uri
,i
)
3250 in if #
"/"=c
then SOME i
else searchSlash (i
+1)
3254 else if not (Char.isAlpha(String.sub(uri
,0))) then ""
3255 else case searchScheme
1
3258 if String.size uri
<=i
+2 then String.extract(uri
,0,SOME(i
+1))
3259 else if #
"/"=String.sub(uri
,i
+1) andalso #
"/"=String.sub(uri
,i
+2)
3260 then case searchSlash (i
+3)
3262 | SOME j
=> String.extract(uri
,0,SOME j
)
3263 else String.extract(uri
,0,SOME(i
+1))
3268 fun searchScheme i
=
3269 if i
>=String.size uri
then NONE
3270 else let val c
=String.sub(uri
,i
)
3271 in if #
":"=c
then SOME i
else if isScheme c
then searchScheme (i
+1)
3276 else if not (Char.isAlpha(String.sub(uri
,0))) then ""
3277 else case searchScheme
1
3279 | SOME i
=> String.extract(uri
,0,SOME(i
+1))
3282 fun uriJoin(abs
,rel
) =
3283 if rel
="" then uriPath abs
3284 else if abs
="" then rel
3285 else if String.isPrefix
"//" rel
then uriScheme abs^rel
3286 else if #
"/"=String.sub(rel
,0) then uriAuth abs^rel
3287 else if uriAbsolute rel
then rel
3288 else uriPath abs^rel
3290 val compareUri
= String.compare
3291 val hashUri
= UtilHash
.hashString
3293 fun convertCommand
str (src
,dst
) =
3295 val s
= Substring
.all str
3297 if Substring
.isEmpty s
then ss
3298 else let val (sl
,sr
) = Substring
.splitr (fn c
=> #
"%"<>c
) s
3299 in if Substring
.isEmpty sl
then sr
::ss
3300 else let val sl
' = Substring
.trimr
1 sl
3301 in case Substring
.first sr
3302 of SOME #
"1" => let val sr
' = Substring
.triml
1 sr
3303 in doit (Substring
.all src
::sr
'::ss
) sl
'
3305 | SOME #
"2" => let val sr
' = Substring
.triml
1 sr
3306 in doit (Substring
.all dst
::sr
'::ss
) sl
'
3308 | _
=> doit (Substring
.all
"%"::sr
::ss
) sl
'
3312 val s
= Substring
.concat ss
3316 fun retrieveRemote uri
=
3318 val tmp
= OS
.FileSys
.tmpName()
3319 val cmd
= convertCommand Config
.retrieveCommand (uri
,tmp
)
3320 val status
= OS
.Process
.system cmd
3321 val _
= if status
= OS
.Process
.success
then ()
3322 else let val _
= (OS
.FileSys
.remove tmp
3323 handle OS
.SysErr _
=> ())
3324 val cmd
= convertCommand
3325 Config
.retrieveCommand ("<uri>",tmp
)
3326 in raise NoSuchFile (uri
,"command '"^cmd^
"' failed")
3328 in (Uri2String uri
,tmp
,true)
3331 fun retrieveUri uri
=
3333 of SOME f
=> (Uri2String uri
,Uri2String f
,false)
3334 | NONE
=> retrieveRemote uri
3336 (* stop
of ../../Unicode
/Uri
/uri
.sml
*)
3337 (* start
of ../../Parser
/version
.sml
*)
3340 val FXP_VERSION
= "1.4.4"
3342 (* stop
of ../../Parser
/version
.sml
*)
3343 (* start
of ../../Util
/utilList
.sml
*)
3347 (*--------------------------------------------------------------------------*)
3348 (* Structure
: UtilList
*)
3352 (* Exceptions raised by functions
in this
structure: *)
3354 (* findAndDelete
: none
*)
3355 (*--------------------------------------------------------------------------*)
3356 signature UtilList
=
3358 val split
: ('a
-> bool) -> 'a list
-> 'a list list
3359 val member
: ''a
-> ''a list
-> bool
3360 val mapAllPairs
: ('a
* 'b
-> 'c
) -> 'a list
* 'b list
-> 'c list
3361 val findAndMap
: ('a
-> 'b option
) -> 'a list
-> 'b option
3362 val findAndDelete
: ('a
-> bool) -> 'a list
-> ('a option
* 'a list
)
3364 val sort
: ('a
* 'a
-> order
) -> 'a list
-> 'a list
3365 val merge
: ('a
* 'a
-> order
) -> 'a list
* 'a list
-> 'a list
3366 val diff
: ('a
* 'a
-> order
) -> 'a list
* 'a list
-> 'a list
3367 val cap
: ('a
* 'a
-> order
) -> 'a list
* 'a list
-> 'a list
3368 val sub
: ('a
* 'a
-> order
) -> 'a list
* 'a list
-> bool
3369 val insert
: ('a
* 'a
-> order
) -> 'a
* 'a list
-> 'a list
3370 val delete
: ('a
* 'a
-> order
) -> 'a
* 'a list
-> 'a list
3371 val elem
: ('a
* 'a
-> order
) -> 'a
* 'a list
-> bool
3374 structure UtilList
: UtilList
=
3376 (*--------------------------------------------------------------------*)
3377 (* split a list into a list
of lists at each element fullfilling p
. *)
3378 (*--------------------------------------------------------------------*)
3380 let val (one
,ls
) = foldr
3381 (fn (a
,(curr
,ls
)) => if p a
then (nil
,curr
::ls
) else (a
::curr
,ls
))
3386 (*--------------------------------------------------------------------*)
3387 (* is x a member
of l?
*)
3388 (*--------------------------------------------------------------------*)
3389 fun member x l
= List.exists (fn y
=> x
=y
) l
3391 (*--------------------------------------------------------------------*)
3392 (* for
[a1
,...,an
] and [b1
,...,bk
], generate
*)
3393 (* [f(a1
,b1
),f(a1
,b2
),...,f(an
,bk
-1),f(an
,bk
)]. *)
3394 (*--------------------------------------------------------------------*)
3395 fun mapAllPairs
f (ass
,bs
) =
3397 (fn (a
,cs
) => foldr (fn (b
,cs
) => f(a
,b
)::cs
) cs bs
)
3400 (*--------------------------------------------------------------------*)
3401 (* find the first element x
of l such that f x
= SOME y
, and return
*)
3402 (* f x
. If there is no such x
, return NONE
. *)
3403 (*--------------------------------------------------------------------*)
3404 fun findAndMap _ nil
= NONE
3405 | findAndMap
f (x
::xs
) = case f x
of NONE
=> findAndMap f xs | y
=> y
3407 (*--------------------------------------------------------------------*)
3408 (* find the first element x
of l such that f x
= true, delete it from
*)
3409 (* l
, and return SOME x
with the modified list
. If there is no such x
*)
3410 (* return (NONE
,l
). *)
3411 (*--------------------------------------------------------------------*)
3412 fun findAndDelete _ nil
= (NONE
,nil
)
3413 | findAndDelete
f (x
::xs
) =
3414 if f x
then (SOME x
,xs
)
3415 else let val (y
,ys
) = findAndDelete f xs
in (y
,x
::ys
) end
3417 (*--------------------------------------------------------------------*)
3418 (* given a function that compares elements
, merge two sorted lists
. *)
3419 (*--------------------------------------------------------------------*)
3420 fun merge
comp (l1
,l2
) =
3424 |
go (l1
as (x1
::r1
),l2
as (x2
::r2
)) =
3426 of LESS
=> x1
::go(r1
,l2
)
3427 | EQUAL
=> go(l1
,r2
)
3428 | GREATER
=> x2
::go(l1
,r2
)
3432 (*--------------------------------------------------------------------*)
3433 (* given a comparing function
, compute the intersection
of two
*)
3434 (* ordered lists
. *)
3435 (*--------------------------------------------------------------------*)
3436 fun cap
comp (l1
,l2
) =
3438 fun go (nil
,l
) = nil
3440 |
go (l1
as (x1
::r1
),l2
as (x2
::r2
)) =
3442 of LESS
=> go(r1
,l2
)
3443 | EQUAL
=> x1
::go(r1
,r2
)
3444 | GREATER
=> go(l1
,r2
)
3448 (*--------------------------------------------------------------------*)
3449 (* given a comparing function
, compute the difference
of two
*)
3450 (* ordered lists
. *)
3451 (*--------------------------------------------------------------------*)
3452 fun diff
comp (l1
,l2
) =
3454 fun go (nil
,l
) = nil
3456 |
go (l1
as (x1
::r1
),l2
as (x2
::r2
)) =
3458 of LESS
=> x1
::go(r1
,l2
)
3459 | EQUAL
=> go(r1
,r2
)
3460 | GREATER
=> go(l1
,r2
)
3464 (*--------------------------------------------------------------------*)
3465 (* given a comparing function
, find out whether an ordered list is
*)
3466 (* contained
in an other ordered list
. *)
3467 (*--------------------------------------------------------------------*)
3468 fun sub
comp (l1
,l2
) =
3470 fun go (nil
,l
) = true
3471 |
go (l
,nil
) = false
3472 |
go (l1
as (x1
::r1
),l2
as (x2
::r2
)) =
3475 | EQUAL
=> go(r1
,r2
)
3476 | GREATER
=> go(l1
,r2
)
3480 (*--------------------------------------------------------------------*)
3481 (* given a function that compares elements
, insert an element into an
*)
3483 (*--------------------------------------------------------------------*)
3484 fun insert
comp (x
,l
) =
3491 | GREATER
=> y
::go ys
3495 (*--------------------------------------------------------------------*)
3496 (* given a function that compares elements
, delete an element from
*)
3497 (* an ordered list
. *)
3498 (*--------------------------------------------------------------------*)
3499 fun delete
comp (x
,l
) =
3506 | GREATER
=> y
::go ys
3510 (*--------------------------------------------------------------------*)
3511 (* given a function that compares elements
, insert an element into an
*)
3513 (*--------------------------------------------------------------------*)
3514 fun elem
comp (x
,l
) =
3525 (*--------------------------------------------------------------------*)
3526 (* merge
-sort a list
of elements comparable
with the function
in the
*)
3527 (* 1st argument
. Preserve duplicate elements
. *)
3528 (*--------------------------------------------------------------------*)
3529 fun sort _ nil
= nil
3531 let fun mergeOne (x
::y
::l
) = merge
comp (x
,y
)::mergeOne l
3533 fun mergeAll
[l
] = l
3534 | mergeAll ls
= mergeAll (mergeOne ls
)
3535 val singles
= map (fn x
=> [x
]) l
3542 (* stop
of ../../Util
/utilList
.sml
*)
3543 (* start
of ../../Parser
/Dfa
/dfaOptions
.sml
*)
3544 signature DfaOptions
=
3546 val O_DFA_INITIAL_WIDTH
: int ref
3547 val O_DFA_MAX_STATES
: int ref
3548 val O_DFA_WARN_TOO_LARGE
: bool ref
3550 val setDfaDefaults
: unit
-> unit
3551 val setDfaOptions
: Options
.Option list
* (string -> unit
) -> Options
.Option list
3553 val dfaUsage
: Options
.Usage
3556 functor DfaOptions () : DfaOptions
=
3558 open Options UtilInt
3560 val O_DFA_INITIAL_WIDTH
= ref
4
3561 val O_DFA_MAX_STATES
= ref
256
3562 val O_DFA_WARN_TOO_LARGE
= ref
true
3564 fun setDfaDefaults() =
3566 val _
= O_DFA_INITIAL_WIDTH
:= 4
3567 val _
= O_DFA_MAX_STATES
:= 256
3568 val _
= O_DFA_WARN_TOO_LARGE
:= true
3573 [U_ITEM(["--dfa-initial-size=n"],"Initial size of DFA transition tables (16)"),
3574 U_ITEM(["--dfa-initial-width=n"],"Same as --dfa-initial-size=2^n (4)"),
3575 U_ITEM(["--dfa-max-size=n"],"Maximal size of DFAs for ambiguous content models (256)"),
3576 U_ITEM(["--dfa-warn-size[=(yes|no)]"],"Warn about too large DFAs (yes)")
3579 fun setDfaOptions(opts
,doError
) =
3581 exception Failed
of string option
3584 if str
="" then raise Failed NONE
3585 else let val cs
= String.explode str
3586 in foldl (fn (c
,n
) => if #
"0">c
orelse #
"9"<c
then raise Failed NONE
3587 else 10*n
+ord c
-48) 0 cs
3588 handle Overflow
=> raise Failed
3589 (SOME("number "^str^
" is too large for this system"))
3592 val yesNo
= "'yes' or 'no'"
3593 fun tooLarge n
= String.concat
["number ",n
," is too large for this system"]
3594 fun mustHave key
= String.concat
["option --",key
," must have an argument"]
3595 fun mustBe key what
= String.concat
3596 ["the argument to option --",key
," must be ",what
]
3598 fun do_yesno(key
,valOpt
,flag
) =
3600 of NONE
=> flag
:= true
3601 | SOME
"yes" => flag
:= true
3602 | SOME
"no" => flag
:= false
3603 | SOME s
=> doError (mustBe key yesNo
)
3605 fun do_num(key
,valOpt
,flag
) =
3607 of NONE
=> doError (mustHave key
)
3608 | SOME s
=> flag
:= getNat s
3609 handle Failed NONE
=> doError (mustBe key
"a number")
3610 |
Failed (SOME s
) => doError s
3612 fun do_dfa_ts(key
,valOpt
,toWidth
) =
3614 of NONE
=> doError (mustHave key
)
3615 | SOME s
=> O_DFA_INITIAL_WIDTH
:= toWidth (getNat s
)
3616 handle Failed NONE
=> doError (mustBe key
"a number")
3617 |
Failed (SOME s
) => doError s
3619 fun do_long(key
,valOpt
) =
3621 of "dfa-initial-size" => true before do_dfa_ts(key
,valOpt
,nextPowerTwo
)
3622 |
"dfa-initial-width" => true before do_dfa_ts(key
,valOpt
,fn i
=> i
)
3623 |
"dfa-max-size" => true before do_num(key
,valOpt
,O_DFA_MAX_STATES
)
3624 |
"dfa-warn-size" => true before do_yesno(key
,valOpt
,O_DFA_WARN_TOO_LARGE
)
3628 |
doit (opt
::opts
) =
3630 of OPT_NOOPT
=> opts
3631 |
OPT_LONG(key
,value
) => if do_long(key
,value
) then doit opts
3633 | OPT_NEG _
=> opt
::doit opts
3634 | OPT_SHORT _
=> opt
::doit opts
3635 | OPT_STRING _
=> opt
::doit opts
3641 (* stop
of ../../Parser
/Dfa
/dfaOptions
.sml
*)
3642 (* start
of ../../Parser
/Params
/parserOptions
.sml
*)
3643 (*--------------------------------------------------------------------------*)
3644 (* Structure
: ParserOptions
*)
3646 (* Depends on
: none
*)
3647 (*--------------------------------------------------------------------------*)
3648 signature ParserOptions
=
3650 structure DfaOptions
: DfaOptions
3652 val O_CHECK_ISO639
: bool ref
3653 val O_CHECK_LANGID
: bool ref
3654 val O_CHECK_PREDEFINED
: bool ref
3655 val O_CHECK_RESERVED
: bool ref
3656 val O_CHECK_VERSION
: bool ref
3658 val O_WARN_MULT_ENUM
: bool ref
3659 val O_WARN_XML_DECL
: bool ref
3660 val O_WARN_ATT_NO_ELEM
: bool ref
3661 val O_WARN_MULT_ENT_DECL
: bool ref
3662 val O_WARN_MULT_NOT_DECL
: bool ref
3663 val O_WARN_MULT_ATT_DEF
: bool ref
3664 val O_WARN_MULT_ATT_DECL
: bool ref
3665 val O_WARN_SHOULD_DECLARE
: bool ref
3666 val O_WARN_NON_ASCII_URI
: bool ref
3668 val O_ERROR_MINIMIZE
: bool ref
3670 val O_VALIDATE
: bool ref
3671 val O_COMPATIBILITY
: bool ref
3672 val O_INTEROPERABILITY
: bool ref
3674 val O_INCLUDE_EXT_PARSED
: bool ref
3675 val O_INCLUDE_PARAM_ENTS
: bool ref
3677 val setParserDefaults
: unit
-> unit
3678 val setParserOptions
: Options
.Option list
* (string -> unit
) -> Options
.Option list
3680 val parserUsage
: Options
.Usage
3683 functor ParserOptions () : ParserOptions
=
3685 structure DfaOptions
= DfaOptions ()
3687 open DfaOptions Options UtilInt UtilList
3689 val O_CHECK_VERSION
= ref
true (* check for conforming xml version?
*)
3690 val O_CHECK_ISO639
= ref
true (* check whether a two
-letter LangCode
*)
3691 (* is acording to ISO
639?
*)
3692 val O_CHECK_LANGID
= ref
true (* check whether a LangCode fullfills
*)
3693 (* IETF RFC
1766?
*)
3694 val O_CHECK_RESERVED
= ref
false(* check for names starting
with xml?
*)
3695 val O_CHECK_PREDEFINED
= ref
true (* check declarations
of predefined
*)
3696 val O_WARN_MULT_ENUM
= ref
true (* check whether a token occurs
*)
3697 (* twice
in the enumerated attribute
*)
3698 (* types
of the same element
*)
3699 val O_WARN_XML_DECL
= ref
false (* warn
if the XML decl is missing?
*)
3701 val O_WARN_ATT_NO_ELEM
= ref
true (* warn for undeclared elements
*)
3702 (* in att def list declarations?
*)
3704 val O_WARN_MULT_ENT_DECL
= ref
true (* warn about redefined entities
*)
3705 val O_WARN_MULT_NOT_DECL
= ref
true (* warn about redefined notations
*)
3706 val O_WARN_SHOULD_DECLARE
= ref
true (* warn
if predefined entities
*)
3707 (* are not declared
in the dtd
*)
3709 val O_WARN_MULT_ATT_DEF
= ref
true (* warn
if an attributes is defd
*)
3710 (* twice for the same element?
*)
3711 val O_WARN_MULT_ATT_DECL
= ref
true (* warn
if there are multiple att
*)
3712 (* def lists for one element?
*)
3713 val O_WARN_NON_ASCII_URI
= ref
true (* warn about non
-ascii chars
in *)
3714 (* system identifiers?
*)
3716 val O_ERROR_MINIMIZE
= ref
true (* try to avoid repeating errors?
*)
3718 val O_VALIDATE
= ref
true
3719 val O_COMPATIBILITY
= ref
true
3720 val O_INTEROPERABILITY
= ref
false
3722 val O_INCLUDE_EXT_PARSED
= ref
false
3723 val O_INCLUDE_PARAM_ENTS
= ref
false
3725 fun setParserDefaults() =
3727 val _
= setDfaDefaults()
3729 val _
= O_CHECK_ISO639
:= false
3730 val _
= O_CHECK_LANGID
:= false
3731 val _
= O_CHECK_PREDEFINED
:= true
3732 val _
= O_CHECK_RESERVED
:= false
3733 val _
= O_CHECK_VERSION
:= true
3735 val _
= O_WARN_MULT_ENUM
:= true
3736 val _
= O_WARN_XML_DECL
:= false
3737 val _
= O_WARN_ATT_NO_ELEM
:= false
3738 val _
= O_WARN_MULT_ENT_DECL
:= false
3739 val _
= O_WARN_MULT_NOT_DECL
:= false
3740 val _
= O_WARN_MULT_ATT_DEF
:= false
3741 val _
= O_WARN_MULT_ATT_DECL
:= false
3742 val _
= O_WARN_SHOULD_DECLARE
:= true
3743 val _
= O_WARN_NON_ASCII_URI
:= true
3745 val _
= O_VALIDATE
:= true
3746 val _
= O_COMPATIBILITY
:= true
3747 val _
= O_INTEROPERABILITY
:= false
3749 val _
= O_ERROR_MINIMIZE
:= true
3751 val _
= O_INCLUDE_EXT_PARSED
:= false
3752 val _
= O_INCLUDE_PARAM_ENTS
:= false
3757 [U_ITEM(["-[n]v","--validate[=(yes|no)]"],"Turn on or off validation (yes)"),
3758 U_ITEM(["-[n]c","--compat[=(yes|no)]","--compatibility[=(yes|no)]"],
3759 "Turn on or off compatibility checking (yes)"),
3760 U_ITEM(["-[n]i","--interop[=(yes|no)]","--interoperability[=(yes|no)]"],
3761 "Turn on or off interoperability checking (no)"),
3763 U_ITEM(["--few-errors[=(yes|no)]"],"Report fewer errors (no)"),
3764 U_ITEM(["--check-reserved[=(yes|no)]"],
3765 "Checking for reserved names (no)"),
3766 U_ITEM(["--check-predef[=(yes|no)]","--check-predefined[=(yes|no)]"],
3767 "Check declaration of predefined entities (yes)"),
3768 U_ITEM(["--check-lang-id[=(yes|no)]"],"Checking language identifiers (no)"),
3769 U_ITEM(["--check-iso639[=(yes|no)]"],"Check ISO 639 language codes (no)"),
3770 U_ITEM(["--check-xml-version[=(yes|no)]"], "Check XML version number (yes)"),
3772 U_ITEM(["--warn-xml-decl[=(yes|no)]"],"Warn if there is no XML declaration (no)"),
3773 U_ITEM(["--warn-att-elem[=(yes|no)]"],
3774 "Warn about attlist declarations for undeclared elements (no)"),
3775 U_ITEM(["--warn-predefined[=(yes|no)]"],
3776 "Warn if the predefined entities are not declared (no)"),
3777 U_ITEM(["--warn-mult-decl[=<arg>]"],"Warn about multiple declarations (none)"),
3778 U_ITEM(["--warn-uri[=(yes|no)]"],"Warn about non-ASCII characters in URIs (yes)"),
3779 U_ITEM(["--warn[=all]"],"Warn about nearly everything"),
3780 U_ITEM(["--warn=none"],"Do not print warnings"),
3782 U_ITEM(["--include-ext[=(yes|no)]","--include-external[=(yes|no)]"],
3783 "Include external entities in non-validating mode (no)"),
3784 U_ITEM(["--include-par[=(yes|no)]","--include-parameter[=(yes|no)]"],
3785 "Include parameter entities and external subset in "^
3786 "non-validating mode (no)"),
3790 fun setParserOptions(opts
,doError
) =
3792 datatype What
= ATT | ATTLIST | ENT | NOT
3794 exception Failed
of string option
3797 if str
="" then raise Failed NONE
3798 else let val cs
= String.explode str
3799 in foldl (fn (c
,n
) => if #
"0">c
orelse #
"9"<c
then raise Failed NONE
3800 else 10*n
+ord c
-48) 0 cs
3801 handle Overflow
=> raise Failed
3802 (SOME("number "^str^
" is too large for this system"))
3805 val allNone
= "'all' or 'none'"
3806 val yesNo
= "'yes' or 'no'"
3807 val yesNoWhat
= "'yes', 'no' or a list of 'att', 'attlist', 'ent' and 'not'"
3808 fun errorMustBe(key
,what
) = doError
3809 (String.concat
["the argument to option --",key
," must be ",what
])
3810 fun errorNoArg key
= doError
3811 (String.concat
["option --",key
," has no argument"])
3813 fun do_mult_decl(key
,valOpt
) =
3815 val all
= [ATT
,ATTLIST
,ENT
,NOT
]
3816 fun setFlags whats
= app (fn (what
,flag
) => flag
:= member what whats
)
3817 [(ATT
,O_WARN_MULT_ATT_DEF
),(ATTLIST
,O_WARN_MULT_ATT_DECL
),
3818 (ENT
,O_WARN_MULT_ENT_DECL
),(NOT
,O_WARN_MULT_NOT_DECL
)]
3820 of NONE
=> setFlags all
3821 | SOME
"yes" => setFlags all
3822 | SOME
"no" => setFlags nil
3823 | SOME s
=> let val fields
= String.fields (fn c
=> #
","=c
) s
3827 |
"attlist" => ATTLIST
3830 | _
=> raise Failed NONE
) fields
3833 handle Failed _
=> errorMustBe(key
,yesNoWhat
)
3836 fun do_noarg(key
,valOpt
,flag
) =
3838 of NONE
=> flag
:= true
3839 | SOME _
=> errorNoArg key
3841 fun do_yesno(key
,valOpt
,flag
) =
3843 of NONE
=> flag
:= true
3844 | SOME
"yes" => flag
:= true
3845 | SOME
"no" => flag
:= false
3846 | SOME s
=> errorMustBe(key
,yesNo
)
3848 fun do_num(key
,valOpt
,flag
) =
3850 of NONE
=> errorMustBe(key
,"a number")
3851 | SOME s
=> flag
:= getNat s
3852 handle Failed NONE
=> errorMustBe(key
,"a number")
3853 |
Failed (SOME s
) => doError s
3855 fun do_warn(key
,valOpt
) =
3856 let val all
= [O_WARN_MULT_ENUM
,O_WARN_ATT_NO_ELEM
,
3857 O_WARN_MULT_ENT_DECL
,O_WARN_MULT_NOT_DECL
,O_WARN_MULT_ATT_DEF
,
3858 O_WARN_MULT_ATT_DECL
,O_WARN_SHOULD_DECLARE
,O_WARN_XML_DECL
]
3859 fun setFlags value
= app (fn flag
=> flag
:= value
) all
3861 of NONE
=> setFlags
true
3862 | SOME
"all" => setFlags
true
3863 | SOME
"none" => setFlags
false
3864 | SOME _
=> errorMustBe(key
,allNone
)
3867 fun do_long(key
,valOpt
) =
3869 of "validate" => true before do_yesno(key
,valOpt
,O_VALIDATE
)
3870 |
"compat" => true before do_yesno(key
,valOpt
,O_COMPATIBILITY
)
3871 |
"compatibility" => true before do_yesno(key
,valOpt
,O_COMPATIBILITY
)
3872 |
"interop" => true before do_yesno(key
,valOpt
,O_INTEROPERABILITY
)
3873 |
"interoperability" => true before do_yesno(key
,valOpt
,O_INTEROPERABILITY
)
3875 |
"few-errors" => true before do_yesno(key
,valOpt
,O_ERROR_MINIMIZE
)
3877 |
"check-reserved" => true before do_yesno(key
,valOpt
,O_CHECK_RESERVED
)
3878 |
"check-predef" => true before do_yesno(key
,valOpt
,O_CHECK_PREDEFINED
)
3879 |
"check-predefined" => true before do_yesno(key
,valOpt
,O_CHECK_PREDEFINED
)
3880 |
"check-lang-id" => true before do_yesno(key
,valOpt
,O_CHECK_LANGID
)
3881 |
"check-iso639" => true before do_yesno(key
,valOpt
,O_CHECK_ISO639
)
3882 |
"check-xml-version" => true before do_yesno(key
,valOpt
,O_CHECK_VERSION
)
3884 |
"warn" => true before do_warn(key
,valOpt
)
3885 |
"warn-xml-decl" => true before do_yesno(key
,valOpt
,O_WARN_XML_DECL
)
3886 |
"warn-att-elem" => true before do_yesno(key
,valOpt
,O_WARN_ATT_NO_ELEM
)
3887 |
"warn-predefined" => true before do_yesno(key
,valOpt
,O_WARN_SHOULD_DECLARE
)
3888 |
"warn-mult-decl" => true before do_mult_decl(key
,valOpt
)
3889 |
"warn-uri" => true before do_yesno(key
,valOpt
,O_WARN_NON_ASCII_URI
)
3891 |
"include-ext" => true before do_yesno(key
,valOpt
,O_INCLUDE_EXT_PARSED
)
3892 |
"include-external" => true before do_yesno(key
,valOpt
,O_INCLUDE_EXT_PARSED
)
3893 |
"include-par" => true before do_yesno(key
,valOpt
,O_INCLUDE_PARAM_ENTS
)
3894 |
"include-parameter" => true before do_yesno(key
,valOpt
,O_INCLUDE_PARAM_ENTS
)
3901 of #
"v" => false before O_VALIDATE
:= true
3902 | #
"c" => false before O_COMPATIBILITY
:= true
3903 | #
"i" => false before O_INTEROPERABILITY
:= true
3905 in List.filter doOne cs
3911 of #
"v" => false before O_VALIDATE
:= false
3912 | #
"c" => false before O_COMPATIBILITY
:= false
3913 | #
"i" => false before O_INTEROPERABILITY
:= false
3915 in List.filter doOne cs
3919 |
doit (opt
::opts
) =
3921 of OPT_NOOPT
=> opts
3922 |
OPT_LONG(key
,value
) => if do_long(key
,value
) then doit opts
3924 | OPT_SHORT cs
=> (case do_short cs
3926 | rest
=> OPT_SHORT rest
::doit opts
)
3927 | OPT_NEG cs
=> (case do_neg cs
3929 | rest
=> OPT_NEG rest
::doit opts
)
3930 | OPT_STRING s
=> opt
::doit opts
3932 val opts1
= setDfaOptions (opts
,doError
)
3937 (* stop
of ../../Parser
/Params
/parserOptions
.sml
*)
3938 (* start
of ../../Util
/intLists
.sml
*)
3939 signature IntLists
=
3941 type IntList
= int list
3943 val emptyIntList
: IntList
3944 val singleIntList
: int -> IntList
3945 val fullIntList
: int -> IntList
3947 val isEmptyIntList
: IntList
-> bool
3948 val inIntList
: int * IntList
-> bool
3949 val subIntList
: IntList
* IntList
-> bool
3951 val compareIntLists
: IntList
* IntList
-> order
3952 val hashIntList
: IntList
-> word
3954 val addIntList
: int * IntList
-> IntList
3955 val delIntList
: int * IntList
-> IntList
3957 val cupIntLists
: IntList
* IntList
-> IntList
3958 val capIntLists
: IntList
* IntList
-> IntList
3959 val diffIntLists
: IntList
* IntList
-> IntList
3961 val IntList2String
: IntList
-> string
3964 structure IntLists
: IntLists
=
3966 open UtilCompare UtilHash UtilInt UtilList UtilString
3968 type IntList
= int list
3970 val emptyIntList
= nil
: IntList
3972 fun fullIntList n
= intervalList(0,n
)
3973 fun singleIntList n
= [n
]
3974 val isEmptyIntList
= null
3976 val inIntList
= elem
Int.compare
3977 val subIntList
= sub
Int.compare
3978 val addIntList
= insert
Int.compare
3979 val delIntList
= delete
Int.compare
3980 val capIntLists
= cap
Int.compare
3981 val cupIntLists
= merge
Int.compare
3982 val diffIntLists
= diff
Int.compare
3983 val compareIntLists
= compareList
Int.compare
3984 val hashIntList
= hashList hashInt
3986 val IntList2String
= List2String
Int.toString
3988 (* stop
of ../../Util
/intLists
.sml
*)
3989 (* start
of ../../Unicode
/Chars
/dataDict
.sml
*)
3998 structure KeyData
: Key
=
4000 type Key
= UniChar
.Data
4002 val null
= UniChar
.nullData
4003 val hash
= UniChar
.hashData
4004 val compare
= UniChar
.compareData
4005 val toString
= UniChar
.Data2String
4008 structure DataDict
= Dict (structure Key
= KeyData
)
4009 structure DataSymTab
= SymTable (structure Key
= KeyData
)
4012 (* stop
of ../../Unicode
/Chars
/dataDict
.sml
*)
4013 (* start
of ../../Parser
/Dfa
/dfaData
.sml
*)
4017 (*--------------------------------------------------------------------------*)
4018 (* Structure
: DfaData
*)
4022 (* Exceptions raised by functions
in this
structure: *)
4023 (* boundsFollow
: none
*)
4024 (* mergeFirst
: ConflictFirst
*)
4025 (* mergeFollow
: ConflictFollow
*)
4026 (*--------------------------------------------------------------------------*)
4031 datatype ContentModel
=
4033 | CM_OPT
of ContentModel
4034 | CM_REP
of ContentModel
4035 | CM_PLUS
of ContentModel
4036 | CM_ALT
of ContentModel list
4037 | CM_SEQ
of ContentModel list
4042 (*--- visible to the parser
---*)
4043 datatype ContentModel
=
4045 | CM_OPT
of ContentModel
4046 | CM_REP
of ContentModel
4047 | CM_PLUS
of ContentModel
4048 | CM_ALT
of ContentModel list
4049 | CM_SEQ
of ContentModel list
4054 val dfaDontCare
= ~
2
4058 exception DfaTooLarge
of int
4059 exception Ambiguous
of Sigma
* int * int
4060 exception ConflictFirst
of Sigma
* State
* State
4061 exception ConflictFollow
of Sigma
* State
* State
4064 type First
= (State
* Sigma
) list
4067 type Info
= State
* Empty
* First
4076 withtype CM
= CM
' * Info
4078 type Row
= Sigma
* Sigma
* State vector
* bool
4079 val nullRow
: Row
= (1,0,Vector.fromList nil
,false)
4081 type Dfa
= Row vector
4083 val emptyDfa
: Dfa
= Vector.fromList
[(1,0,Vector.fromList nil
,true)]
4086 structure DfaData
= DfaBase
: DfaData
4087 (* stop
of ../../Parser
/Dfa
/dfaData
.sml
*)
4088 (* start
of ../../Unicode
/Decode
/decodeFile
.sml
*)
4089 (*--------------------------------------------------------------------------*)
4090 (* Structure
: DecodeBasic
*)
4092 (* Exceptions raised by functions
in this
structure: *)
4093 (* closeFile
: none
*)
4094 (* filePos
: none
*)
4095 (* fileName
: none
*)
4096 (* nextByte
: EndOfFile
*)
4097 (* openFile
: NoSuchFile
*)
4098 (*--------------------------------------------------------------------------*)
4099 signature DecodeFile
=
4101 structure Bytes
: WORD
4104 type Byte
= Bytes
.word
4106 exception EndOfFile
of File
4108 val Char2Byte
: UniChar
.Char -> Byte
4109 val Byte2Char
: Byte
-> UniChar
.Char
4110 val Byte2Hex
: Byte
-> string
4112 val openFile
: Uri
.Uri option
-> File
4113 val closeFile
: File
-> unit
4115 val getByte
: File
-> Byte
* File
4116 val ungetBytes
: File
* Byte list
-> File
4118 val fileUri
: File
-> Uri
.Uri
4119 val fileName
: File
-> string
4122 structure DecodeFile
: DecodeFile
=
4125 UniChar Uri UtilError
4127 structure Bytes
= Word8
4128 type Byte
= Bytes
.word
4130 fun Byte2Char b
= Chars
.fromLargeWord(Bytes
.toLargeWord b
)
4132 "0x"^UtilString
.toUpperString(StringCvt.padLeft #
"0" 2 (Bytes
.toString b
))
4133 fun Char2Byte c
= Bytes
.fromLargeWord(Chars
.toLargeWord c
)
4135 type instream
= TextIO.instream
4136 val closeIn
= TextIO.closeIn
4137 val input
= TextIO.input
4138 val input1
= TextIO.input1
4139 val openIn
= TextIO.openIn
4140 val stdIn
= TextIO.stdIn
4142 (*--------------------------------------------------------------------*)
4143 (* a file
type is stdin or a uri
with its
string representation
and *)
4144 (* the file it is mapped to
. *)
4145 (* a file position is a stream
, an
int position
and a file
type. *)
4146 (* a file is a file position
, a buffer
, its size
and current index
. *)
4147 (*--------------------------------------------------------------------*)
4148 datatype FileType
= STD | FNAME
of (Uri
* string * string * bool)
4149 type FilePos
= FileType
* instream
* int
4150 type File
= FilePos
* Word8Vector
.vector
* int * int
4152 exception EndOfFile
of File
4153 val nullVec
= Word8Vector
.fromList nil
4155 (*--------------------------------------------------------------------*)
4156 (* return the uri
of a file
. *)
4157 (*--------------------------------------------------------------------*)
4158 fun fileUri ((typ
,_
,_
),_
,_
,_
) =
4161 |
FNAME(uri
,_
,_
,_
) => uri
4162 (*--------------------------------------------------------------------*)
4163 (* return the uri
string name
of a file
. *)
4164 (*--------------------------------------------------------------------*)
4165 fun fileName ((typ
,_
,_
),_
,_
,_
) =
4168 |
FNAME(_
,str
,_
,_
) => str
4169 (*--------------------------------------------------------------------*)
4170 (* return the uri
string and the position
in the the file
. *)
4171 (*--------------------------------------------------------------------*)
4172 fun filePos ((typ
,_
,p
),_
,s
,i
) =
4174 of STD
=> ("<stdin>",p
+i
-s
)
4175 |
FNAME(_
,str
,_
,_
) => (str
,p
+i
-s
)
4177 (*--------------------------------------------------------------------*)
4178 (* open a file
; report IO errors by raising NoSuchFile
. *)
4179 (*--------------------------------------------------------------------*)
4180 fun openFile uriOpt
=
4181 let val (typ
,stream
) =
4183 of NONE
=> (STD
,stdIn
)
4184 | SOME uri
=> let val (str
,fname
,tmp
) = retrieveUri uri
4185 in (FNAME(uri
,str
,fname
,tmp
),openIn fname
)
4187 handle IO
.Io
{name
,cause
,...}
4188 => raise NoSuchFile(name
,exnMessage cause
)
4189 in ((typ
,stream
,0),nullVec
,0,0)
4192 (*--------------------------------------------------------------------*)
4193 (* close the file
; ignore IO errors
. *)
4194 (*--------------------------------------------------------------------*)
4195 fun closeStream (typ
,stream
,_
) =
4198 |
FNAME(_
,uri
,fname
,tmp
)
4199 => let val _
= closeIn stream
handle IO
.Io _
=> ()
4200 val _
= (if tmp
andalso OS
.FileSys
.access(fname
,nil
)
4201 then OS
.FileSys
.remove fname
else ())
4202 handle exn
as OS
.SysErr _
=>
4203 TextIO.output(TextIO.stdErr
,String.concat
4204 ["Error removing temporary file ",fname
,"for URI",uri
,
4205 "(",exnMessage exn
,")\n"])
4209 fun closeFile (tsp
,_
,_
,_
) = closeStream tsp
4211 (*--------------------------------------------------------------------*)
4212 (* read a byte from the file
; if at the
end of buffer
, reload it
. *)
4213 (* if a reload fails or returns an IO error
, raise EndOfFile
. --------*)
4214 (*--------------------------------------------------------------------*)
4215 fun getByte (tsp
,vec
,s
,i
) =
4216 if i
<s
then (Word8Vector
.sub(vec
,i
),(tsp
,vec
,s
,i
+1))
4217 else let val (typ
,stream
,pos
) = tsp
4218 val v
= Byte
.stringToBytes (input stream
) handle IO
.Io _
=> nullVec
4219 val s
= Word8Vector
.length v
4220 in if s
=0 then let val _
= closeStream tsp
4221 in raise EndOfFile(tsp
,v
,0,0)
4223 else (Word8Vector
.sub(v
,0),((typ
,stream
,pos
+s
),v
,s
,1))
4226 (*--------------------------------------------------------------------*)
4227 (* un
-get some bytes
. this should only happen
while checking for a
*)
4228 (* byte
-order mark or xml
/text declaration
. It should be efficient
in *)
4229 (* that
case, otherwise might be very space
-consuming
. *)
4230 (*--------------------------------------------------------------------*)
4231 fun ungetBytes ((tsp
,vec
,s
,i
),bs
) =
4232 let val len
= length bs
4233 in if len
<=i
then (tsp
,vec
,s
,i
-len
)
4234 else let val diff
= len
-i
4235 val vec0
= Word8Vector
.fromList(List.take(bs
,diff
))
4236 in (tsp
,Word8Vector
.concat
[vec0
,vec
],s
+diff
,0)
4240 (* stop
of ../../Unicode
/Decode
/decodeFile
.sml
*)
4241 (* start
of ../../Unicode
/Decode
/decodeError
.sml
*)
4247 (*--------------------------------------------------------------------------*)
4248 (* Structure
: DecodeError
*)
4250 (* Exceptions raised by functions
in this
structure: *)
4251 (* decodeMessage
: none
*)
4252 (*--------------------------------------------------------------------------*)
4253 signature DecodeError
=
4255 datatype DecodeError
=
4256 ERR_ILLEGAL_CHAR
of DecodeFile
.Byte
* string
4257 | ERR_NON_UNI_UCS4
of UniChar
.Char
4258 | ERR_EOF_UCS4
of int * DecodeFile
.Byte list
4259 | ERR_NON_DIRECT_UTF7
of DecodeFile
.Byte
4260 | ERR_PADDING_UTF7
of UniChar
.Char
4261 | ERR_ILLFORMED_UTF8
of DecodeFile
.Byte
* int * int
4262 | ERR_ILLEGAL_UTF8
of DecodeFile
.Byte
4263 | ERR_INVALID_UTF8_SEQ
of DecodeFile
.Byte list
4264 | ERR_EOF_UTF8
of int * int
4265 | ERR_NON_UNI_UTF8
of UniChar
.Char * int
4266 | ERR_EOF_UCS2
of DecodeFile
.Byte
4267 | ERR_EOF_UTF16
of DecodeFile
.Byte
4268 | ERR_LOW_SURROGATE
of UniChar
.Char
4269 | ERR_HIGH_SURROGATE
of UniChar
.Char * UniChar
.Char
4270 | ERR_EOF_SURROGATE
of UniChar
.Char
4271 | ERR_NO_ENC_DECL
of string
4272 | ERR_UNSUPPORTED_ENC
of string
4273 | ERR_INCOMPATIBLE_ENC
of string * string
4275 val decodeMessage
: DecodeError
-> string list
4277 exception DecodeError
of DecodeFile
.File
* bool * DecodeError
4280 structure DecodeError
: DecodeError
=
4283 DecodeFile UtilString UniChar
4285 datatype DecodeError
=
4286 ERR_ILLEGAL_CHAR
of DecodeFile
.Byte
* string
4287 | ERR_NON_UNI_UCS4
of UniChar
.Char
4288 | ERR_EOF_UCS4
of int * DecodeFile
.Byte list
4289 | ERR_NON_DIRECT_UTF7
of DecodeFile
.Byte
4290 | ERR_PADDING_UTF7
of UniChar
.Char
4291 | ERR_ILLFORMED_UTF8
of DecodeFile
.Byte
* int * int
4292 | ERR_ILLEGAL_UTF8
of DecodeFile
.Byte
4293 | ERR_INVALID_UTF8_SEQ
of DecodeFile
.Byte list
4294 | ERR_EOF_UTF8
of int * int
4295 | ERR_NON_UNI_UTF8
of UniChar
.Char * int
4296 | ERR_EOF_UCS2
of DecodeFile
.Byte
4297 | ERR_EOF_UTF16
of DecodeFile
.Byte
4298 | ERR_LOW_SURROGATE
of UniChar
.Char
4299 | ERR_HIGH_SURROGATE
of UniChar
.Char * UniChar
.Char
4300 | ERR_EOF_SURROGATE
of UniChar
.Char
4301 | ERR_NO_ENC_DECL
of string
4302 | ERR_UNSUPPORTED_ENC
of string
4303 | ERR_INCOMPATIBLE_ENC
of string * string
4305 fun Char2Hex c
= "0x"^UtilString
.toUpperString(StringCvt.padLeft #
"0" 4 (Chars
.toString c
))
4307 fun decodeMessage err
=
4309 of ERR_ILLEGAL_CHAR(b
,what
) =>
4310 [Byte2Hex b
,"is not",prependAnA what
,"character"]
4312 | ERR_NON_UNI_UCS4 c
=>
4313 ["UCS-4 coded non-Unicode character",Char2Uni c
]
4314 |
ERR_EOF_UCS4(pos
,bytes
) =>
4315 ["End of file after",Int2String pos
,"bytes of UCS-4 character",
4316 "starting with ",List2String0 Byte2Hex bytes
]
4318 | ERR_NON_DIRECT_UTF7 b
=>
4319 ["Indirect UTF-7 character ",Byte2Hex b
,"in non-shifted mode"]
4320 | ERR_PADDING_UTF7 pad
=>
4321 ["Non-zero padding",Char2Hex pad
,"at end of UTF-7 shifted sequence"]
4323 |
ERR_ILLFORMED_UTF8 (b
,len
,pos
) =>
4324 [numberNth pos
,"byte",Byte2Hex b
,"of a",Int2String len^
"-byte",
4325 "UTF-8 sequence does not start with bits 10"]
4326 | ERR_ILLEGAL_UTF8 b
=>
4327 ["Byte",Byte2Hex b
,"is neither ASCII nor does it start",
4328 "a valid multi-byte UTF-8 sequence"]
4329 |
ERR_EOF_UTF8 (len
,pos
) =>
4330 ["End of file terminates a ",Int2String len^
"-byte",
4331 "UTF-8 sequence before the ",numberNth pos
,"byte"]
4332 |
ERR_NON_UNI_UTF8 (c
,len
) =>
4333 [Int2String len^
"-byte UTF-8 sequence decodes to non-Unicode character",Char2Uni c
]
4334 | ERR_INVALID_UTF8_SEQ bs
=>
4335 ["Invalid UTF-8 sequence",List2xString (""," ","") Byte2Hex bs
]
4338 ["End of file before second byte of UCS-2 character starting with",Byte2Hex b
]
4339 | ERR_EOF_UTF16 b
=>
4340 ["End of file before second byte of UTF-16 character starting with",Byte2Hex b
]
4342 | ERR_LOW_SURROGATE c
=>
4343 ["Low surrogate",Char2Uni c
,"without preceding high surrogate"]
4344 |
ERR_HIGH_SURROGATE (c
,c1
) =>
4345 ["High surrogate",Char2Uni c
,"followed by",Char2Uni c1
,"instead of low surrogate"]
4346 | ERR_EOF_SURROGATE c
=>
4347 ["High surrogate",Char2Uni c
,"followed by the end of file"]
4349 | ERR_NO_ENC_DECL auto
=>
4350 ["Couldn't parse encoding declaration but auto-detected encoding",auto
,"required so"]
4351 | ERR_UNSUPPORTED_ENC enc
=>
4352 ["Unsupported encoding",enc
]
4353 |
ERR_INCOMPATIBLE_ENC (enc
,auto
) =>
4354 ["Encoding",enc
,"is incompatible with auto-detected encoding",auto
]
4356 exception DecodeError
of File
* bool * DecodeError
4360 (* stop
of ../../Unicode
/Decode
/decodeError
.sml
*)
4361 (* start
of ../../Unicode
/Decode
/decodeUtil
.sml
*)
4363 require
"basis.__word";
4364 require
"basis.__word8";
4367 require
"decodeBasic";
4368 require
"decodeError";
4371 (*--------------------------------------------------------------------------*)
4372 (* Structure
: DecodeUtil
*)
4374 (* Exceptions raised by functions
in this
structure: *)
4375 (* combineSurrogates
: none
*)
4376 (* combineUcs4big
: none
*)
4377 (* combineUcs4little
: none
*)
4378 (* combineUcs4strangeBig
: none
*)
4379 (* combineUcs4strangeLittle
: none
*)
4380 (* combineUtf16big
: none
*)
4381 (* combineUtf16little
: none
*)
4382 (* isLowSurrogate
: none
*)
4383 (* isHighSurrogate
: none
*)
4384 (* isSurrogate
: none
*)
4385 (*--------------------------------------------------------------------------*)
4386 signature DecodeUtil
=
4388 val isSurrogate
: UniChar
.Char -> bool
4389 val isLowSurrogate
: UniChar
.Char -> bool
4390 val isHighSurrogate
: UniChar
.Char -> bool
4391 val combineSurrogates
: UniChar
.Char * UniChar
.Char -> UniChar
.Char
4394 structure DecodeUtil
: DecodeUtil
=
4396 open UniChar DecodeFile DecodeError
4398 fun isSurrogate c
= Chars
.orb(c
,0wx7FF
)=0wxDFFF
4399 fun isLowSurrogate c
= Chars
.orb(c
,0wx3FF
)=0wxDFFF
4400 fun isHighSurrogate c
= Chars
.orb(c
,0wx3FF
)=0wxDBFF
4401 fun combineSurrogates(hi
,lo
) = (hi
-0wxD800
)*0wx400
+lo
+0wx2400
: Char
4403 (* stop
of ../../Unicode
/Decode
/decodeUtil
.sml
*)
4404 (* start
of ../../Unicode
/Decode
/decodeUcs2
.sml
*)
4412 signature DecodeUcs2
=
4414 val getCharUcs2b
: DecodeFile
.File
-> UniChar
.Char * DecodeFile
.File
4415 val getCharUcs2l
: DecodeFile
.File
-> UniChar
.Char * DecodeFile
.File
4418 structure DecodeUcs2
: DecodeUcs2
=
4422 DecodeFile DecodeError DecodeUtil
4424 fun getCharUcs2b f
=
4426 val (b1
,f1
) = getByte f
4427 val (b2
,f2
) = getByte f1
handle exn
as EndOfFile f
4428 => raise DecodeError(f
,true,ERR_EOF_UCS2 b1
)
4429 val c
= Chars
.orb(Chars
.<<(Byte2Char b1
,0w8
),Byte2Char b2
)
4433 fun getCharUcs2l f
=
4435 val (b1
,f1
) = getByte f
4436 val (b2
,f2
) = getByte f1
handle exn
as EndOfFile f
4437 => raise DecodeError(f
,true,ERR_EOF_UCS2 b1
)
4438 val c
= Chars
.orb(Chars
.<<(Byte2Char b2
,0w8
),Byte2Char b1
)
4442 (* stop
of ../../Unicode
/Decode
/decodeUcs2
.sml
*)
4443 (* start
of ../../Unicode
/Decode
/decodeMisc
.sml
*)
4444 signature DecodeMisc
=
4446 val getCharAscii
: DecodeFile
.File
-> UniChar
.Char * DecodeFile
.File
4447 val getCharEbcdic
: DecodeFile
.File
-> UniChar
.Char * DecodeFile
.File
4448 val getCharEof
: DecodeFile
.File
-> UniChar
.Char * DecodeFile
.File
4449 val getCharLatin1
: DecodeFile
.File
-> UniChar
.Char * DecodeFile
.File
4452 structure DecodeMisc
: DecodeMisc
=
4455 UniChar DecodeFile DecodeError
4457 fun getCharEof f
= raise EndOfFile f
4459 (*--------------------------------------------------------------------*)
4460 (* ASCII characters must be lower than
0wx80
*)
4461 (*--------------------------------------------------------------------*)
4462 fun getCharAscii f
=
4463 let val (b
,f1
) = getByte f
4464 in if b
<0wx80
then (Byte2Char b
,f1
)
4465 else raise DecodeError(f1
,false,ERR_ILLEGAL_CHAR(b
,"ASCII"))
4468 (*--------------------------------------------------------------------*)
4469 (* LATIN
-1 is the first plane
of Unicode
. *)
4470 (*--------------------------------------------------------------------*)
4471 fun getCharLatin1 f
= let val (b
,f1
) = getByte f
4475 (*--------------------------------------------------------------------*)
4476 (* EBCDIC is mapped to the first plane
of Unicode
. *)
4477 (*--------------------------------------------------------------------*)
4478 (* according to rfc
-1345 (and gnu recode experiments
) *)
4479 val ebcdic2latinTab
= Vector.fromList
4480 [0wx00
,0wx01
,0wx02
,0wx03
,0wx9C
,0wx09
,0wx86
,0wx7F
,
4481 0wx97
,0wx8D
,0wx8E
,0wx0B
,0wx0C
,0wx0D
,0wx0E
,0wx0F
,
4482 0wx10
,0wx11
,0wx12
,0wx13
,0wx9D
,0wx85
,0wx08
,0wx87
,
4483 0wx18
,0wx19
,0wx92
,0wx8F
,0wx1C
,0wx1D
,0wx1E
,0wx1F
,
4484 0wx80
,0wx81
,0wx82
,0wx83
,0wx84
,0wx0A
,0wx17
,0wx1B
,
4485 0wx88
,0wx89
,0wx8A
,0wx8B
,0wx8C
,0wx05
,0wx06
,0wx07
,
4486 0wx90
,0wx91
,0wx16
,0wx93
,0wx94
,0wx95
,0wx96
,0wx04
,
4487 0wx98
,0wx99
,0wx9A
,0wx9B
,0wx14
,0wx15
,0wx9E
,0wx1A
,
4488 0wx20
,0wxA0
,0wxA1
,0wxA2
,0wxA3
,0wxA4
,0wxA5
,0wxA6
,
4489 0wxA7
,0wxA8
,0wx5B
,0wx2E
,0wx3C
,0wx28
,0wx2B
,0wx21
,
4490 0wx26
,0wxA9
,0wxAA
,0wxAB
,0wxAC
,0wxAD
,0wxAE
,0wxAF
,
4491 0wxB0
,0wxB1
,0wx5D
,0wx24
,0wx2A
,0wx29
,0wx3B
,0wx5E
,
4492 0wx2D
,0wx2F
,0wxB2
,0wxB3
,0wxB4
,0wxB5
,0wxB6
,0wxB7
,
4493 0wxB8
,0wxB9
,0wx7C
,0wx2C
,0wx25
,0wx5F
,0wx3E
,0wx3F
,
4494 0wxBA
,0wxBB
,0wxBC
,0wxBD
,0wxBE
,0wxBF
,0wxC0
,0wxC1
,
4495 0wxC2
,0wx60
,0wx3A
,0wx23
,0wx40
,0wx27
,0wx3D
,0wx22
,
4496 0wxC3
,0wx61
,0wx62
,0wx63
,0wx64
,0wx65
,0wx66
,0wx67
,
4497 0wx68
,0wx69
,0wxC4
,0wxC5
,0wxC6
,0wxC7
,0wxC8
,0wxC9
,
4498 0wxCA
,0wx6A
,0wx6B
,0wx6C
,0wx6D
,0wx6E
,0wx6F
,0wx70
,
4499 0wx71
,0wx72
,0wxCB
,0wxCC
,0wxCD
,0wxCE
,0wxCF
,0wxD0
,
4500 0wxD1
,0wx7E
,0wx73
,0wx74
,0wx75
,0wx76
,0wx77
,0wx78
,
4501 0wx79
,0wx7A
,0wxD2
,0wxD3
,0wxD4
,0wxD5
,0wxD6
,0wxD7
,
4502 0wxD8
,0wxD9
,0wxDA
,0wxDB
,0wxDC
,0wxDD
,0wxDE
,0wxDF
,
4503 0wxE0
,0wxE1
,0wxE2
,0wxE3
,0wxE4
,0wxE5
,0wxE6
,0wxE7
,
4504 0wx7B
,0wx41
,0wx42
,0wx43
,0wx44
,0wx45
,0wx46
,0wx47
,
4505 0wx48
,0wx49
,0wxE8
,0wxE9
,0wxEA
,0wxEB
,0wxEC
,0wxED
,
4506 0wx7D
,0wx4A
,0wx4B
,0wx4C
,0wx4D
,0wx4E
,0wx4F
,0wx50
,
4507 0wx51
,0wx52
,0wxEE
,0wxEF
,0wxF0
,0wxF1
,0wxF2
,0wxF3
,
4508 0wx5C
,0wx9F
,0wx53
,0wx54
,0wx55
,0wx56
,0wx57
,0wx58
,
4509 0wx59
,0wx5A
,0wxF4
,0wxF5
,0wxF6
,0wxF7
,0wxF8
,0wxF9
,
4510 0wx30
,0wx31
,0wx32
,0wx33
,0wx34
,0wx35
,0wx36
,0wx37
,
4511 0wx38
,0wx39
,0wxFA
,0wxFB
,0wxFC
,0wxFD
,0wxFE
,0wxFF
4514 fun ebcdic2latin b
= Vector.sub(ebcdic2latinTab
,Word8.toInt b
)
4516 fun getCharEbcdic f
= let val (b
,f1
) = getByte f
4517 in (ebcdic2latin b
,f1
)
4520 (* stop
of ../../Unicode
/Decode
/decodeMisc
.sml
*)
4521 (* start
of ../../Unicode
/Decode
/decodeUcs4
.sml
*)
4530 signature DecodeUcs4
=
4532 val getCharUcs4b
: DecodeFile
.File
-> UniChar
.Char * DecodeFile
.File
4533 val getCharUcs4l
: DecodeFile
.File
-> UniChar
.Char * DecodeFile
.File
4534 val getCharUcs4sb
: DecodeFile
.File
-> UniChar
.Char * DecodeFile
.File
4535 val getCharUcs4sl
: DecodeFile
.File
-> UniChar
.Char * DecodeFile
.File
4538 structure DecodeUcs4
: DecodeUcs4
=
4542 DecodeFile DecodeError DecodeUtil
4544 fun getCharUcs4b f
=
4546 val (b1
,f1
) = getByte f
4547 val (b2
,f2
) = getByte f1
handle EndOfFile f
4548 => raise DecodeError(f
,true,ERR_EOF_UCS4(1,[b1
]))
4549 val (b3
,f3
) = getByte f2
handle EndOfFile f
4550 => raise DecodeError(f
,true,ERR_EOF_UCS4(1,[b1
,b2
]))
4551 val (b4
,f4
) = getByte f3
handle EndOfFile f
4552 => raise DecodeError(f
,true,ERR_EOF_UCS4(1,[b1
,b2
,b3
]))
4553 val c
= Chars
.orb(Chars
.orb(Chars
.<<(Byte2Char b1
,0w24
),
4554 Chars
.<<(Byte2Char b2
,0w16
)),
4555 Chars
.orb(Chars
.<<(Byte2Char b3
,0w08
),
4557 in if isUnicode c
then (c
,f4
)
4558 else raise DecodeError(f4
,false,ERR_NON_UNI_UCS4 c
)
4561 fun getCharUcs4l f
=
4563 val (b1
,f1
) = getByte f
4564 val (b2
,f2
) = getByte f1
handle EndOfFile f
4565 => raise DecodeError(f
,true,ERR_EOF_UCS4(1,[b1
]))
4566 val (b3
,f3
) = getByte f2
handle EndOfFile f
4567 => raise DecodeError(f
,true,ERR_EOF_UCS4(1,[b1
,b2
]))
4568 val (b4
,f4
) = getByte f3
handle EndOfFile f
4569 => raise DecodeError(f
,true,ERR_EOF_UCS4(1,[b1
,b2
,b3
]))
4570 val c
= Chars
.orb(Chars
.orb(Chars
.<<(Byte2Char b4
,0w24
),
4571 Chars
.<<(Byte2Char b3
,0w16
)),
4572 Chars
.orb(Chars
.<<(Byte2Char b2
,0w08
),
4574 in if isUnicode c
then (c
,f4
)
4575 else raise DecodeError(f4
,false,ERR_NON_UNI_UCS4 c
)
4578 fun getCharUcs4sb f
=
4580 val (b1
,f1
) = getByte f
4581 val (b2
,f2
) = getByte f1
handle EndOfFile f
4582 => raise DecodeError(f
,true,ERR_EOF_UCS4(1,[b1
]))
4583 val (b3
,f3
) = getByte f2
handle EndOfFile f
4584 => raise DecodeError(f
,true,ERR_EOF_UCS4(1,[b1
,b2
]))
4585 val (b4
,f4
) = getByte f3
handle EndOfFile f
4586 => raise DecodeError(f
,true,ERR_EOF_UCS4(1,[b1
,b2
,b3
]))
4587 val c
= Chars
.orb(Chars
.orb(Chars
.<<(Byte2Char b2
,0w24
),
4588 Chars
.<<(Byte2Char b1
,0w16
)),
4589 Chars
.orb(Chars
.<<(Byte2Char b4
,0w08
),
4591 in if isUnicode c
then (c
,f4
)
4592 else raise DecodeError(f4
,false,ERR_NON_UNI_UCS4 c
)
4595 fun getCharUcs4sl f
=
4597 val (b1
,f1
) = getByte f
4598 val (b2
,f2
) = getByte f1
handle EndOfFile f
4599 => raise DecodeError(f
,true,ERR_EOF_UCS4(1,[b1
]))
4600 val (b3
,f3
) = getByte f2
handle EndOfFile f
4601 => raise DecodeError(f
,true,ERR_EOF_UCS4(1,[b1
,b2
]))
4602 val (b4
,f4
) = getByte f3
handle EndOfFile f
4603 => raise DecodeError(f
,true,ERR_EOF_UCS4(1,[b1
,b2
,b3
]))
4604 val c
= Chars
.orb(Chars
.orb(Chars
.<<(Byte2Char b3
,0w24
),
4605 Chars
.<<(Byte2Char b4
,0w16
)),
4606 Chars
.orb(Chars
.<<(Byte2Char b1
,0w08
),
4608 in if isUnicode c
then (c
,f4
)
4609 else raise DecodeError(f4
,false,ERR_NON_UNI_UCS4 c
)
4613 (* stop
of ../../Unicode
/Decode
/decodeUcs4
.sml
*)
4614 (* start
of ../../Unicode
/Decode
/decodeUtf16
.sml
*)
4622 signature DecodeUtf16
=
4624 val getCharUtf16b
: DecodeFile
.File
-> UniChar
.Char * DecodeFile
.File
4625 val getCharUtf16l
: DecodeFile
.File
-> UniChar
.Char * DecodeFile
.File
4628 structure DecodeUtf16
: DecodeUtf16
=
4632 DecodeFile DecodeError DecodeUtil
4634 fun getCharUtf16b f
=
4636 val (b1
,f1
) = getByte f
4637 val (b2
,f2
) = getByte f1
handle exn
as EndOfFile f
4638 => raise DecodeError(f
,true,ERR_EOF_UTF16 b1
)
4639 val c
= Chars
.orb(Chars
.<<(Byte2Char b1
,0w8
),Byte2Char b2
)
4641 if isSurrogate c
then (* Chars
.orb(c
,0wx7FF
)=0wxDFFF
*)
4642 if isLowSurrogate c
then raise DecodeError(f2
,false,ERR_LOW_SURROGATE c
)
4644 val (b3
,f3
) = getByte f2
handle exn
as EndOfFile f
4645 => raise DecodeError(f
,true,ERR_EOF_SURROGATE c
)
4646 val (b4
,f4
) = getByte f3
handle exn
as EndOfFile f
4647 => raise DecodeError(f
,true,ERR_EOF_UTF16 b3
)
4648 val c1
= Chars
.orb(Chars
.<<(Byte2Char b3
,0w8
),Byte2Char b4
)
4649 in if isLowSurrogate c1
then (combineSurrogates(c
,c1
),f4
)
4650 else raise DecodeError(f4
,false,ERR_HIGH_SURROGATE(c
,c1
))
4655 fun getCharUtf16l f
=
4657 val (b1
,f1
) = getByte f
4658 val (b2
,f2
) = getByte f1
handle exn
as EndOfFile f
4659 => raise DecodeError(f
,true,ERR_EOF_UTF16 b1
)
4660 val c
= Chars
.orb(Chars
.<<(Byte2Char b2
,0w8
),Byte2Char b1
)
4662 if isSurrogate c
then
4663 if isLowSurrogate c
then raise DecodeError(f2
,false,ERR_LOW_SURROGATE c
)
4665 val (b3
,f3
) = getByte f2
handle exn
as EndOfFile f
4666 => raise DecodeError(f
,true,ERR_EOF_SURROGATE c
)
4667 val (b4
,f4
) = getByte f3
handle exn
as EndOfFile f
4668 => raise DecodeError(f
,true,ERR_EOF_UTF16 b3
)
4669 val c1
= Chars
.orb(Chars
.<<(Byte2Char b4
,0w8
),Byte2Char b3
)
4670 in if isLowSurrogate c1
then (combineSurrogates(c
,c1
),f4
)
4671 else raise DecodeError(f4
,false,ERR_HIGH_SURROGATE(c
,c1
))
4676 (* stop
of ../../Unicode
/Decode
/decodeUtf16
.sml
*)
4677 (* start
of ../../Unicode
/Decode
/decodeUtf8
.sml
*)
4678 signature DecodeUtf8
=
4680 val getCharUtf8
: DecodeFile
.File
-> UniChar
.Char * DecodeFile
.File
4683 structure DecodeUtf8
: DecodeUtf8
=
4686 UniChar UniClasses UtilError UtilInt
4687 DecodeFile DecodeError DecodeUtil
4689 val THIS_MODULE
= "DecodeUtf8"
4695 val op && = Bytes
.andb
4696 val op <<< = Chars
.<<
4697 val op |||
= Chars
.orb
4699 val byte1switch
= Vector.tabulate
4702 else if i
<0xC0 then 0
4703 else if i
<0xE0 then 2
4704 else if i
<0xF0 then 3
4705 else if i
<0xF8 then 4
4706 else if i
<0xFC then 5
4707 else if i
<0xFE then 6
4710 val diff2
: Char = 0wx00003080
4711 val diff3
: Char = diff2
<<< 0wx6 |||
0wx00020080
4712 val diff4
: Char = diff3
<<< 0wx6 |||
0wx00400080
4713 val diff5
: Char = diff4
<<< 0wx6 |||
0wx08000080
4714 val diff6
: Char = diff5
<<< 0wx6 |||
0wx00000080
4717 let val (b1
,f1
) = getByte f
4718 in if b1
<0wx80
then (Byte2Char b1
,f1
)
4719 else let val n
= Vector.sub(byte1switch
,Word8.toInt b1
)
4721 of 0 (* error
*) => raise DecodeError(f1
,false,ERR_ILLEGAL_UTF8 b1
)
4722 |
1 => (Byte2Char b1
,f1
)
4725 val (b2
,f2
) = getByte f1
handle EndOfFile f
4726 => raise DecodeError(f
,true,ERR_EOF_UTF8(n
,2))
4727 in if b2
&& 0wxC0
<> 0wx80
4728 then raise DecodeError(f2
,false,ERR_ILLFORMED_UTF8(b2
,n
,2))
4729 else let val c
= Byte2Char b1
<<< 0w6
+ Byte2Char b2
- diff2
4730 in if c
>=0wx80
then (c
,f2
)
4731 else raise DecodeError(f2
,false,ERR_INVALID_UTF8_SEQ
[b1
,b2
])
4736 val (b2
,f2
) = getByte f1
handle EndOfFile f
4737 => raise DecodeError(f
,true,ERR_EOF_UTF8(n
,2))
4738 val (b3
,f3
) = getByte f2
handle EndOfFile f
4739 => raise DecodeError(f
,true,ERR_EOF_UTF8(n
,3))
4741 if b2
&& 0wxC0
<> 0wx80
4742 then raise DecodeError(f3
,false,ERR_ILLFORMED_UTF8(b2
,n
,2))
4743 else if b3
&& 0wxC0
<> 0wx80
4744 then raise DecodeError(f3
,false,ERR_ILLFORMED_UTF8(b2
,n
,3))
4745 else let val c
= (Byte2Char b1
<<< 0w12
+
4746 Byte2Char b2
<<< 0w06
+
4747 Byte2Char b3
- diff3
)
4748 in if c
>=0wx800
then (c
,f3
)
4749 else raise DecodeError
4750 (f3
,false,ERR_INVALID_UTF8_SEQ
[b1
,b2
,b3
])
4755 val (b2
,f2
) = getByte f1
handle EndOfFile f
4756 => raise DecodeError(f
,true,ERR_EOF_UTF8(n
,2))
4757 val (b3
,f3
) = getByte f2
handle EndOfFile f
4758 => raise DecodeError(f
,true,ERR_EOF_UTF8(n
,3))
4759 val (b4
,f4
) = getByte f3
handle EndOfFile f
4760 => raise DecodeError(f
,true,ERR_EOF_UTF8(n
,4))
4762 if b2
&& 0wxC0
<> 0wx80
4763 then raise DecodeError(f4
,false,ERR_ILLFORMED_UTF8(b2
,n
,2))
4764 else if b3
&& 0wxC0
<> 0wx80
4765 then raise DecodeError(f4
,false,ERR_ILLFORMED_UTF8(b2
,n
,3))
4766 else if b4
&& 0wxC0
<> 0wx80
4767 then raise DecodeError(f4
,false,ERR_ILLFORMED_UTF8(b2
,n
,4))
4768 else let val c
= (Byte2Char b1
<<< 0w18
+
4769 Byte2Char b2
<<< 0w12
+
4770 Byte2Char b3
<<< 0w06
+
4771 Byte2Char b4
- diff4
)
4773 if c
>=0wx100000
andalso c
<=0wx10FFFF
then (c
,f4
)
4775 then raise DecodeError
4776 (f4
,false,ERR_INVALID_UTF8_SEQ
[b1
,b2
,b3
,b4
])
4777 else raise DecodeError
4778 (f4
,false,ERR_NON_UNI_UTF8(c
,n
))
4783 val (b2
,f2
) = getByte f1
handle EndOfFile f
4784 => raise DecodeError(f
,true,ERR_EOF_UTF8(n
,2))
4785 val (b3
,f3
) = getByte f2
handle EndOfFile f
4786 => raise DecodeError(f
,true,ERR_EOF_UTF8(n
,3))
4787 val (b4
,f4
) = getByte f3
handle EndOfFile f
4788 => raise DecodeError(f
,true,ERR_EOF_UTF8(n
,4))
4789 val (b5
,f5
) = getByte f4
handle EndOfFile f
4790 => raise DecodeError(f
,true,ERR_EOF_UTF8(n
,5))
4792 if b2
&& 0wxC0
<> 0wx80
4793 then raise DecodeError(f5
,false,ERR_ILLFORMED_UTF8(b2
,n
,2))
4794 else if b3
&& 0wxC0
<> 0wx80
4795 then raise DecodeError(f5
,false,ERR_ILLFORMED_UTF8(b2
,n
,3))
4796 else if b4
&& 0wxC0
<> 0wx80
4797 then raise DecodeError(f5
,false,ERR_ILLFORMED_UTF8(b2
,n
,4))
4798 else if b5
&& 0wxC0
<> 0wx80
4799 then raise DecodeError(f5
,false,ERR_ILLFORMED_UTF8(b2
,n
,5))
4800 else let val c
= (Byte2Char b1
<<< 0w24
+
4801 Byte2Char b2
<<< 0w18
+
4802 Byte2Char b3
<<< 0w12
+
4803 Byte2Char b4
<<< 0w06
+
4804 Byte2Char b5
- diff5
)
4806 then raise DecodeError
4807 (f5
,false,ERR_INVALID_UTF8_SEQ
[b1
,b2
,b3
,b4
,b5
])
4808 else raise DecodeError
4809 (f5
,false,ERR_NON_UNI_UTF8(c
,n
))
4814 val (b2
,f2
) = getByte f1
handle EndOfFile f
4815 => raise DecodeError(f
,true,ERR_EOF_UTF8(n
,2))
4816 val (b3
,f3
) = getByte f2
handle EndOfFile f
4817 => raise DecodeError(f
,true,ERR_EOF_UTF8(n
,3))
4818 val (b4
,f4
) = getByte f3
handle EndOfFile f
4819 => raise DecodeError(f
,true,ERR_EOF_UTF8(n
,4))
4820 val (b5
,f5
) = getByte f4
handle EndOfFile f
4821 => raise DecodeError(f
,true,ERR_EOF_UTF8(n
,5))
4822 val (b6
,f6
) = getByte f5
handle EndOfFile f
4823 => raise DecodeError(f
,true,ERR_EOF_UTF8(n
,6))
4825 if b2
&& 0wxC0
<> 0wx80
4826 then raise DecodeError(f6
,false,ERR_ILLFORMED_UTF8(b2
,n
,2))
4827 else if b3
&& 0wxC0
<> 0wx80
4828 then raise DecodeError(f6
,false,ERR_ILLFORMED_UTF8(b2
,n
,3))
4829 else if b4
&& 0wxC0
<> 0wx80
4830 then raise DecodeError(f6
,false,ERR_ILLFORMED_UTF8(b2
,n
,4))
4831 else if b5
&& 0wxC0
<> 0wx80
4832 then raise DecodeError(f6
,false,ERR_ILLFORMED_UTF8(b2
,n
,5))
4833 else if b6
&& 0wxC0
<> 0wx80
4834 then raise DecodeError(f6
,false,ERR_ILLFORMED_UTF8(b2
,n
,6))
4835 else let val c
= (Byte2Char b1
<<< 0w30
+
4836 Byte2Char b2
<<< 0w24
+
4837 Byte2Char b3
<<< 0w18
+
4838 Byte2Char b4
<<< 0w12
+
4839 Byte2Char b5
<<< 0w06
+
4840 Byte2Char b6
- diff6
)
4842 then raise DecodeError
4843 (f6
,false,ERR_INVALID_UTF8_SEQ
[b1
,b2
,b3
,b4
,b5
,b6
])
4844 else raise DecodeError
4845 (f6
,false,ERR_NON_UNI_UTF8(c
,n
))
4848 | _
=> raise InternalError(THIS_MODULE
,"getCharUtf8",
4849 "byte1switch holds "^
Int.toString n^
4850 ">6 for byte "^Bytes
.toString b1
)
4854 (* stop
of ../../Unicode
/Decode
/decodeUtf8
.sml
*)
4855 (* start
of ../../Unicode
/Decode
/decode
.sml
*)
4856 (*--------------------------------------------------------------------------*)
4857 (* Structure
: Decode
*)
4859 (* Exceptions raised by functions
in this
structure: *)
4860 (* checkEncoding
: NoSuchFile
*)
4861 (* encCloseFile
: none
*)
4862 (* encFileName
: none
*)
4863 (*--------------------------------------------------------------------------*)
4866 structure Error
: DecodeError
4870 exception DecEof
of DecFile
4871 exception DecError
of DecFile
* bool * Error
.DecodeError
4873 val decUri
: DecFile
-> Uri
.Uri
4874 val decName
: DecFile
-> string
4875 val decEncoding
: DecFile
-> Encoding
.Encoding
4877 val decOpenXml
: Uri
.Uri option
-> DecFile
4878 val decOpenUni
: Uri
.Uri option
* Encoding
.Encoding
-> DecFile
4879 val decClose
: DecFile
-> DecFile
4881 val decCommit
: DecFile
-> unit
4882 val decSwitch
: DecFile
* string -> DecFile
4884 val decGetChar
: DecFile
-> UniChar
.Char * DecFile
4885 val decGetArray
: DecFile
-> UniChar
.Char array
-> int * DecFile
* Error
.DecodeError option
4888 structure Decode
: Decode
=
4890 structure Error
= DecodeError
4892 UniChar Encoding Error
4893 DecodeFile DecodeMisc DecodeUcs2 DecodeUcs4
4894 DecodeUtf16 DecodeUtf8 DecodeUtil
4896 type DecFile
= Encoding
* File
4897 exception DecEof
of DecFile
4898 exception DecError
of DecFile
* bool * DecodeError
4900 (*--------------------------------------------------------------------*)
4901 (* close an encoded entity
. *)
4902 (*--------------------------------------------------------------------*)
4903 fun decClose (_
,f
) = (NOENC
,f
) before closeFile f
4904 (*--------------------------------------------------------------------*)
4905 (* get the uri
string of an encoded entity
. *)
4906 (*--------------------------------------------------------------------*)
4907 fun decName (_
,f
) = fileName f
4908 (*--------------------------------------------------------------------*)
4909 (* get the uri
of an encoded entity
. *)
4910 (*--------------------------------------------------------------------*)
4911 fun decUri (_
,f
) = fileUri f
4912 (*--------------------------------------------------------------------*)
4913 (* get the encoding
of an encoded entity
. *)
4914 (*--------------------------------------------------------------------*)
4915 fun decEncoding (enc
,_
) = enc
4917 (*--------------------------------------------------------------------*)
4918 (* commit the auto
-detected encoding
. *)
4919 (*--------------------------------------------------------------------*)
4920 fun decCommit (enc
,f
) =
4925 | _
=> raise DecError((enc
,f
),false,ERR_NO_ENC_DECL(encodingName enc
))
4927 (*--------------------------------------------------------------------*)
4928 (* change to another
- compatible
- encoding
. *)
4929 (*--------------------------------------------------------------------*)
4930 fun decSwitch ((enc
,f
),decl
) =
4932 val decEnc
= isEncoding decl
4933 val _
= if decEnc
<>NOENC
then ()
4934 else raise DecError((enc
,f
),false,ERR_UNSUPPORTED_ENC decl
)
4935 val newEnc
= switchEncoding(enc
,decEnc
)
4936 val _
= if decEnc
<>NOENC
orelse enc
=NOENC
then ()
4937 else raise DecError((enc
,f
),false,ERR_INCOMPATIBLE_ENC(encodingName enc
,decl
))
4941 (*--------------------------------------------------------------------*)
4942 (* get a character from an encoded entity
. *)
4943 (*--------------------------------------------------------------------*)
4944 fun decGetChar (enc
,f
) =
4947 of NOENC
=> raise EndOfFile f
4948 | ASCII
=> getCharAscii f
4949 | EBCDIC
=> getCharEbcdic f
4950 | LATIN1
=> getCharLatin1 f
4951 | UCS2B
=> getCharUcs2b f
4952 | UCS2L
=> getCharUcs2l f
4953 | UCS4B
=> getCharUcs4b f
4954 | UCS4L
=> getCharUcs4l f
4955 | UCS4SB
=> getCharUcs4sb f
4956 | UCS4SL
=> getCharUcs4sl f
4957 | UTF8
=> getCharUtf8 f
4958 | UTF16B
=> getCharUtf16b f
4959 | UTF16L
=> getCharUtf16l f
4962 handle EndOfFile f
=> raise DecEof(NOENC
,f
)
4963 |
DecodeError(f
,eof
,err
) => raise DecError((enc
,f
),eof
,err
)
4965 (*--------------------------------------------------------------------*)
4966 (* Load new characters
, depending on the current entity
's encoding
. *)
4967 (*--------------------------------------------------------------------*)
4968 fun decGetArray (enc
,f
) arr
=
4970 (*--------------------------------------------------------------*)
4971 (* Load the buffer
with len new characters
, or until the entity
*)
4972 (* end is reached
. Close the current file
in that
case. *)
4973 (* Local
exception Ended is needed
in order to preserve tail
*)
4975 (*--------------------------------------------------------------*)
4976 fun loadArray getChar
=
4978 val ende
= Array
.length arr
4979 exception Error
of int * exn
4981 if idx
=ende
then (ende
,(enc
,f
),NONE
)
4982 else let val (c
,f1
) = getChar f
handle exn
=> raise Error (idx
,exn
)
4983 val _
= Array
.update(arr
,idx
,c
)
4986 in doit (0,f
) handle Error(idx
,exn
)
4988 of EndOfFile f
=> (idx
,(NOENC
,f
),NONE
)
4989 |
DecodeError (f
,_
,err
) => (idx
,(enc
,f
),SOME err
)
4993 of NOENC
=> (0,(NOENC
,f
),NONE
)
4994 | ASCII
=> loadArray getCharAscii
4995 | EBCDIC
=> loadArray getCharEbcdic
4996 | LATIN1
=> loadArray getCharLatin1
4997 | UCS2B
=> loadArray getCharUcs2b
4998 | UCS2L
=> loadArray getCharUcs2l
4999 | UCS4B
=> loadArray getCharUcs4b
5000 | UCS4L
=> loadArray getCharUcs4l
5001 | UCS4SB
=> loadArray getCharUcs4sb
5002 | UCS4SL
=> loadArray getCharUcs4sl
5003 | UTF8
=> loadArray getCharUtf8
5004 | UTF16B
=> loadArray getCharUtf16b
5005 | UTF16L
=> loadArray getCharUtf16l
5009 (*--------------------------------------------------------------------*)
5010 (* open an XML file
and try to auto
-detect its encoding
. *)
5011 (*--------------------------------------------------------------------*)
5012 (* Auto
-detection
of the encoding
of XML entities according to App
. F
*)
5013 (* of the XML recommendation
. *)
5015 (* The file is opened
in basic mode
and upto four bytes are read from
*)
5016 (* it
in order to detect the encoding
: if they constitute a prefix
of *)
5017 (* "<?xml" in a recognized encoding
, this encoding is taken
. *)
5018 (*--------------------------------------------------------------------*)
5019 (* read upto four bytes from the file
, detect the encoding
, and unget
*)
5020 (* the read bytes
. Return the resulting encoded file
and its encoding
*)
5021 (*--------------------------------------------------------------------*)
5022 (**************************************************************************)
5023 (* NB
24.08.2000 Autodetection
of encoding is affected by the
*)
5024 (* XML
1.0 Specification
Errata (10.08.2000) E
44 *)
5026 (* The first four bytes read are interpreted according to
: *)
5028 (* "Append the following to the second paragraph: *)
5029 (* The notation ## is used to denote any byte value except 00. *)
5030 (* Adjust the itemized list of detection cases to read as follows: *)
5032 (* With a Byte Order Mark: *)
5033 (* 00 00 FE FF: UCS-4, big-endian machine (1234 order) *)
5034 (* FF FE 00 00: UCS-4, little-endian machine (4321 order) *)
5035 (* FE FF 00 ##: UTF-16, big-endian *)
5036 (* FF FE ## 00: UTF-16, little-endian *)
5037 (* EF BB BF: UTF-8 *)
5038 (* Without a Byte Order Mark: *)
5039 (* 00 00 00 3C: UCS-4, big-endian machine (1234 order) *)
5040 (* 3C 00 00 00: UCS-4, little-endian machine (4321 order) *)
5041 (* 00 00 3C 00: UCS-4, unusual octet order (2143) *)
5042 (* 00 3C 00 00: UCS-4, unusual octet order (3412) *)
5047 (* 00 0D ## ## or *)
5048 (* 00 0A ## ##: Big-endian UTF-16 or ISO-10646-UCS-2. Note that, absent *)
5049 (* an encoding declaration, these cases are strictly *)
5050 (* speaking in error. *)
5055 (* 0D 00 ## ## or *)
5056 (* 0A 00 ## ##: Little-endian UTF-16 or ISO-10646-UCS-2. Note that, absent*)
5057 (* an encoding declaration, these cases are strictly *)
5058 (* speaking in error. *)
5059 (* 3C 3F 78 6D: UTF-8, ISO 646, ASCII, some part of ISO 8859, Shift-JIS, *)
5060 (* EUC, or any other 7-bit, 8-bit, or mixed-width encoding *)
5061 (* which ensures that the characters of ASCII have their *)
5062 (* normal positions, width, and values; the actual encoding *)
5063 (* declaration must be read to detect which of these *)
5064 (* applies, but since all of these encodings use the same *)
5065 (* bit patterns for the ASCII characters, the encoding *)
5066 (* declaration itself may be read reliably *)
5067 (* 4C 6F A7 94: EBCDIC (in some flavor; the full encoding declaration *)
5068 (* must be read to tell which code page is in use) *)
5069 (* other: UTF-8 without an encoding declaration, or else the data stream *)
5070 (* is corrupt, fragmentary, or enclosed in a wrapper of some kind *)
5071 (**************************************************************************)
5075 fun decOpenXml uri =
5077 fun get4Bytes (n,f) =
5079 else let val (b,f1) = getByte f
5080 val (bs,f2) = get4Bytes (n+1,f1)
5083 handle EndOfFile f => (nil,f)
5088 [0wx0,0wx0,0wxFE,0wxFF] => (UCS4B,nil)
5089 | [0wxFF,0wxFE,0wx0,0wx0] => (UCS4L,nil)
5090 | [0wxFE,0wxFF,0wx0,b4] =>
5091 if b4 <> 0wx0 then (UTF16B,[0wx0,b4])
5093 | [0wxFF,0wxFE,b3,0wx0] =>
5094 if b3 <> 0wx0 then (UTF16L,[b3,0wx0])
5096 | [0wxEF,0wxBB,0wxBF,b4] => (UTF8,[b4])
5097 | [0wx0,0wx0,0wx0,0wx3C] => (UCS4B,bs)
5098 | [0wx3C,0wx0,0wx0,0wx0] => (UCS4L,bs)
5099 | [0wx0,0wx0,0wx3C,0wx0] => (UCS4SB,bs)
5100 | [0wx0,0wx3C,0wx0,0wx0] => (UCS4SL,bs)
5101 | [0wx0,b2,b3,b4] =>
5102 if (b2=0wx3C orelse b2=0wx25 orelse b2=0wx20
5103 orelse b2=0wx09 orelse b2=0wx0D orelse b2=0wx0A)
5104 andalso (b3<>0wx0 orelse b4<>0wx0) then (UTF16B,bs)
5106 | [b1,0wx0,b3,b4] =>
5107 if (b1=0wx3C orelse b1=0wx25 orelse b1=0wx20
5108 orelse b1=0wx09 orelse b1=0wx0D orelse b1=0wx0A)
5109 andalso (b3<>0wx0 orelse b4<>0wx0) then (UTF16L,bs)
5111 | [0wx4C,0wx6F,0wxA7,0wx94] => (EBCDIC,bs)
5114 val f = openFile uri
5115 val (bs,f1) = get4Bytes(0,f)
5116 val (enc,unget) = detect bs
5117 in (enc,ungetBytes(f1,unget))
5120 (*--------------------------------------------------------------------*)
5121 (* open a Unicode file. Check whether it starts with a byte order *)
5122 (* mark. If yes, chose UTF16 encoding, otherwise use the default that *)
5123 (* is provided as second argument. *)
5125 (* return the encoded file, a list of bytes looked ahead and the *)
5127 (*--------------------------------------------------------------------*)
5128 fun decOpenUni (uri,default) =
5131 (default,ungetBytes(f,bs))
5133 let val (b1,f1) = getByte f
5135 of 0wxFE => (let val (b2,f2) = getByte f1
5136 in if b2 = 0wxFF then (UTF16B,f2)
5137 else def(f2,[b1,b2])
5138 end handle EndOfFile f => def(f,[b1]))
5139 | 0wxFF => (let val (b2,f2) = getByte f1
5140 in if b2 = 0wxFE then (UTF16L,f2)
5141 else def(f2,[b1,b2])
5142 end handle EndOfFile f => def(f,[b1]))
5144 end handle EndOfFile f => def(f,nil)
5145 val f = openFile uri
5146 val (enc,f1) = detect f
5151 (* stop of ../../Unicode/Decode/decode.sml *)
5152 (* start of ../../Parser/Error/errorData.sml *)
5153 structure ErrorData =
5155 (*--------------------------------------------------------------------*)
5156 (* a position holds the filename, line and column number. *)
5157 (*--------------------------------------------------------------------*)
5158 type Position = string * int * int
5159 val nullPosition = ("",0,0)
5162 EXP_CHAR of UniChar.Char
5163 | EXP_DATA of UniChar.Data
5164 | EXP_STRING of string
5165 type Expected = ExpItem list
5166 type Found = UniChar.Data
5172 | LOC_ATT_DEFAULT of Position
5205 datatype EntityClass =
5214 | IT_CHAR of UniChar.Char
5217 | IT_DATA of UniChar.Data
5238 ERR_EMPTY of Location
5239 | ERR_ENDED_BY_EE of Location
5240 | ERR_EXPECTED of Expected * Found
5241 | ERR_NON_XML_CHAR of UniChar.Char
5243 | ERR_NON_XML_CHARREF of UniChar.Char
5245 (* other well-formedness errors *)
5246 | ERR_CANT_PARSE of Location
5247 | ERR_ELEM_ENT_NESTING of UniChar.Data
5248 | ERR_ELEM_TYPE_MATCH of UniChar.Data * UniChar.Data
5249 | ERR_OMITTED_END_TAG of UniChar.Data
5250 | ERR_IGNORED_END_TAG of UniChar.Data * UniChar.Data
5251 | ERR_ENDED_IN_PROLOG
5252 | ERR_FORBIDDEN_HERE of Item * Location
5253 | ERR_ILLEGAL_ENTITY of EntityClass * UniChar.Data * Location
5255 | ERR_MULT_ATT_SPEC of UniChar.Data
5256 | ERR_RECURSIVE_ENTITY of EntityClass * UniChar.Data
5257 | ERR_UNDEC_ENTITY of EntityClass * UniChar.Data
5259 (* validity errors concerning attributes *)
5260 | ERR_AT_LEAST_ONE of Item
5261 | ERR_AT_MOST_ONE of Item
5262 | ERR_ATT_IS_NOT of UniChar.Data * Item
5263 | ERR_EXACTLY_ONE of Item
5264 | ERR_FIXED_VALUE of UniChar.Data * UniChar.Vector * UniChar.Vector
5266 | ERR_MISSING_ATT of UniChar.Data
5267 | ERR_MULT_ID_ELEM of UniChar.Data
5268 | ERR_MUST_BE_AMONG of Item * UniChar.Data * UniChar.Data list
5269 | ERR_MUST_BE_UNPARSED of UniChar.Data * Location
5270 | ERR_REPEATED_ID of UniChar.Data
5271 | ERR_UNDECL_ATT of UniChar.Data * UniChar.Data
5272 | ERR_UNDECL_ID of UniChar.Data * Position list
5274 (* validity errors concerning elements *)
5275 | ERR_BAD_ELEM of UniChar.Data * UniChar.Data
5276 | ERR_ELEM_CONTENT of Item
5277 | ERR_EMPTY_TAG of UniChar.Data
5278 | ERR_ENDED_EARLY of UniChar.Data
5279 | ERR_MULT_MIXED of UniChar.Data
5280 | ERR_NONEMPTY of UniChar.Data
5281 | ERR_REDEC_ELEM of UniChar.Data
5282 | ERR_ROOT_ELEM of UniChar.Data * UniChar.Data
5284 (* other validity errors *)
5285 | ERR_DECL_ENT_NESTING of Location
5287 | ERR_GROUP_ENT_NESTING of Location
5289 | ERR_STANDALONE_DEF of UniChar.Data
5290 | ERR_STANDALONE_ELEM of UniChar.Data
5291 | ERR_STANDALONE_ENT of EntityClass *UniChar.Data
5292 | ERR_STANDALONE_NORM of UniChar.Data
5293 | ERR_UNDECLARED of Item * UniChar.Data * Location
5295 (* miscellaneous errors *)
5296 | ERR_DECL_PREDEF of UniChar.Data * UniChar.Vector
5297 | ERR_NO_SUCH_FILE of string * string
5298 | ERR_RESERVED of UniChar.Data * Item
5299 | ERR_VERSION of string
5302 (* compatibility errors *)
5303 | ERR_AMBIGUOUS of UniChar.Data * int * int
5304 | ERR_MUST_ESCAPE of UniChar.Char
5306 (* interoperability errors *)
5307 | ERR_EMPTY_TAG_INTER of UniChar.Data
5308 | ERR_MUST_BE_EMPTY of UniChar.Data
5310 (* decoding errors *)
5311 | ERR_DECODE_ERROR of Decode.Error.DecodeError
5316 | WARN_MULT_DECL of Item * UniChar.Data
5317 | WARN_SHOULD_DECLARE of UniChar.Data list
5319 | WARN_ATT_UNDEC_ELEM of UniChar.Data
5320 | WARN_MULT_ATT_DECL of UniChar.Data
5321 | WARN_MULT_ATT_DEF of UniChar.Data * UniChar.Data
5322 | WARN_ENUM_ATTS of UniChar.Data * UniChar.Data list
5324 | WARN_DFA_TOO_LARGE of UniChar.Data * int
5326 | WARN_NON_ASCII_URI of UniChar.Char
5328 (* stop of ../../Parser/Error/errorData.sml *)
5329 (* start of ../../Parser/Error/errorString.sml *)
5342 signature ErrorString =
5344 val errorChar2String : UniChar.Char -> string
5345 val errorData2String : UniChar.Data -> string
5346 val errorVector2String : UniChar.Vector -> string
5348 val quoteErrorChar0 : UniChar.Char -> string
5349 val quoteErrorChar : UniChar.Char -> string
5350 val quoteErrorData : UniChar.Data -> string
5351 val quoteErrorString : string -> string
5352 val quoteErrorVector : UniChar.Vector -> string
5354 val Position2String : ErrorData.Position -> string
5356 val Expected2String : ErrorData.Expected -> string
5357 val Found2String : ErrorData.Found -> string
5359 val Item2String : ErrorData.Item -> string
5360 val AnItem2String : ErrorData.Item -> string
5362 val Location2String : ErrorData.Location -> string
5363 val InLocation2String : ErrorData.Location -> string
5365 val EntityClass2String : ErrorData.EntityClass -> string
5368 structure ErrorString : ErrorString =
5371 ErrorData UniChar UtilString
5374 fun errorChar2String c =
5378 | _ => if c>=0wx20 andalso c<0wx100 then String.implode [Char2char c]
5379 else "U
+"^UtilString.toUpperString
5380 (StringCvt.padLeft #"0" 4 (Chars.toString c))
5382 fun errorData2String cs =
5383 String.concat (map errorChar2String cs)
5384 fun errorVector2String vec =
5385 errorData2String (Vector.foldr (op ::) nil vec)
5388 fun quoteErrorChar0 c = QUOTE^errorChar2String c^QUOTE
5389 fun quoteErrorChar c = if c=0wx0 then "entity
end" else QUOTE^errorChar2String c^QUOTE
5390 fun quoteErrorData cs = QUOTE^errorData2String cs^QUOTE
5391 fun quoteErrorString s = QUOTE^s^QUOTE
5392 fun quoteErrorVector v = QUOTE^errorVector2String v^QUOTE
5394 fun Position2String (fname,l,c) =
5396 else String.concat ["[",fname,":",Int2String l,".",Int2String c,"]"]
5398 fun ExpItem2String exp =
5400 of EXP_CHAR c => quoteErrorChar c
5401 | EXP_DATA cs => quoteErrorData cs
5404 fun Expected2String exp =
5407 | [one] => ExpItem2String one
5408 | _ => let val l=List.length exp
5409 in List2xString ("",", ","") ExpItem2String (List.take (exp,l-1))
5410 ^" or
"^ExpItem2String (List.last exp)
5412 fun Found2String fnd =
5414 of [0wx0] => "entity
end"
5415 | cs => quoteErrorData cs
5417 fun Location2String loc =
5419 of LOC_NONE => "nothing
"
5420 | LOC_AFTER_DTD => "document instance
"
5421 | LOC_ATT_DECL => "attribute list declaration
"
5422 | LOC_ATT_DEFAULT pos => "default value declared at
"^Position2String pos
5423 | LOC_ATT_VALUE => "attribute value
"
5424 | LOC_CDATA => "CDATA section
"
5425 | LOC_CHOICE => "choice list
"
5426 | LOC_COMMENT => "comment
"
5427 | LOC_CONTENT => "content
"
5428 | LOC_DECL => "declaration
"
5429 | LOC_DOC_DECL => "document
type declaration
"
5430 | LOC_ELEM_DECL => "element
type declaration
"
5431 | LOC_ENCODING => "encoding name
"
5432 | LOC_ENT_DECL => "entity declaration
"
5433 | LOC_ENT_VALUE => "entity value
"
5434 | LOC_EPILOG => "epilog
"
5435 | LOC_ETAG => "end-tag
"
5436 | LOC_IGNORED => "ignored section
"
5437 | LOC_INCLUDED => "included section
"
5438 | LOC_INT_DECL => "declaration
in the internal subset
"
5439 | LOC_INT_SUBSET => "internal subset
"
5440 | LOC_LITERAL => "literal
"
5441 | LOC_MIXED => "Mixed list
"
5442 | LOC_NOT_DECL => "notation declaration
"
5443 | LOC_OUT_COND => "outside a conditional section
"
5444 | LOC_PROLOG => "prolog
"
5445 | LOC_PROC => "processing instruction
"
5446 | LOC_PUB_LIT => "public identifier
"
5447 | LOC_SEQ => "sequence list
"
5448 | LOC_STAG => "start
-tag
"
5449 | LOC_SUBSET => "declaration subset
"
5450 | LOC_SYS_LIT => "system identifier
"
5451 | LOC_TEXT_DECL => "text declaration
"
5452 | LOC_VERSION => "version number
"
5453 | LOC_XML_DECL => "XML declaration
"
5454 fun InLocation2String loc =
5457 | LOC_AFTER_DTD => "after the DTD
"
5458 | LOC_CONTENT => "in content
"
5459 | LOC_ATT_DEFAULT pos => "in default value declared at
"^Position2String pos
5460 | LOC_DOC_DECL => "in the document
type declaration
"
5461 | LOC_EPILOG => "after the root element
"
5462 | LOC_INT_SUBSET => "in the internal subset
"
5463 | LOC_OUT_COND => "outside a conditional section
"
5464 | LOC_PROLOG => "in prolog
"
5465 | LOC_SUBSET => "in the declaration subset
"
5466 | LOC_XML_DECL => "in the XML declaration
"
5467 | _ => "in "^prependAnA (Location2String loc)
5469 fun EntityClass2String ent =
5471 of ENT_GENERAL => "general
"
5472 | ENT_PARAMETER => "parameter
"
5473 | ENT_UNPARSED => "unparsed
"
5474 | ENT_EXTERNAL => "external
"
5476 fun Item2String item =
5478 of IT_ATT_NAME => "attribute name
"
5479 | IT_CDATA => "CDATA section
"
5480 | IT_CHAR c => "character
"^quoteErrorChar c
5481 | IT_CHAR_REF => "character reference
"
5482 | IT_COND => "conditional section
"
5483 | IT_DATA cs => if null cs then "character data
" else quoteErrorData cs
5484 | IT_DECL => "declaration
"
5485 | IT_DTD => "document
type declaration
"
5486 | IT_ELEM => "element
type"
5487 | IT_ENT_NAME => "entity name
"
5488 | IT_ETAG => "end-tag
"
5489 | IT_GEN_ENT => "general entity
"
5490 | IT_ID_NAME => "ID name
"
5491 | IT_LANG_ID => "language identifier
"
5493 | IT_NMTOKEN => "name token
"
5494 | IT_NOT_NAME => "notation name
"
5495 | IT_NOTATION => "notation
"
5496 | IT_PAR_ENT => "parameter entity
"
5497 | IT_PAR_REF => "parameter entity reference
"
5498 | IT_REF => "reference
"
5499 | IT_STAG => "start
-tag
"
5500 | IT_TARGET => "target name
"
5502 fun AnItem2String item =
5504 of IT_CHAR c => Item2String item
5505 | IT_DATA cs => Item2String item
5506 | _ => prependAnA (Item2String item)
5509 (* stop of ../../Parser/Error/errorString.sml *)
5510 (* start of ../../Parser/Error/errorMessage.sml *)
5519 signature ErrorMessage =
5521 val errorMessage : ErrorData.Error -> string list
5522 val warningMessage : ErrorData.Warning -> string list
5525 structure ErrorMessage : ErrorMessage =
5530 ErrorData ErrorString
5532 val quoteChar0 = quoteErrorChar0
5533 val quoteChar = quoteErrorChar
5534 val quoteData = quoteErrorData
5535 val quoteString = quoteErrorString
5536 val quoteVector = quoteErrorVector
5538 fun errorMessage err =
5541 of ERR_EMPTY loc => ["Empty
",Location2String loc]
5542 | ERR_ENDED_BY_EE loc => [toUpperFirst (Location2String loc),"ended by entity
end"]
5543 | ERR_EXPECTED (exp,found) =>
5544 ["Expected
",Expected2String exp,"but found
",Found2String found]
5545 | ERR_MISSING_WHITE => ["Missing white space
"]
5546 | ERR_NON_XML_CHAR c => ["Non
-XML character
",quoteChar0 c]
5547 | ERR_NON_XML_CHARREF c => ["Reference to non
-XML character
",quoteChar0 c]
5549 (* other well-formedness errors *)
5550 | ERR_CANT_PARSE loc => ["Cannot parse
",Location2String loc]
5551 | ERR_ELEM_ENT_NESTING elem =>
5552 ["The first
and last character
of element
",quoteData elem,
5553 "are
in different entities
"]
5554 | ERR_ELEM_TYPE_MATCH (elem,other) =>
5555 ["Element
",quoteData elem,"was ended by an
end-tag for
",quoteData other]
5556 | ERR_IGNORED_END_TAG(elem,other) =>
5557 ["An
end-tag for element
type",quoteData other,"is not allowed
in the
",
5558 "content
of element
",quoteData elem]
5559 | ERR_OMITTED_END_TAG elem =>
5560 ["Element
",quoteData elem,"has no
end-tag
"]
5561 | ERR_ENDED_IN_PROLOG => ["Document entity ended
in prolog
"]
5562 | ERR_FORBIDDEN_HERE(what,loc) =>
5563 [AnItem2String what,"is not allowed
",InLocation2String loc]
5564 | ERR_ILLEGAL_ENTITY(what,ent,loc) =>
5565 ["Reference to
",EntityClass2String what,"entity
",quoteData ent,InLocation2String loc]
5566 | ERR_MULTIPLE_DTD => ["Repeated document
type declaration
"]
5567 | ERR_MULT_ATT_SPEC att =>
5568 ["A value for attribute
",quoteData att,"was already specified
in this tag
"]
5569 | ERR_RECURSIVE_ENTITY(what,ent) =>
5570 ["Reference to
",EntityClass2String what,"entity
",quoteData ent,
5571 "that is already
open"]
5572 | ERR_UNDEC_ENTITY(what,ent) =>
5573 ["Reference to undeclared
",EntityClass2String what,"entity
",quoteData ent]
5575 (* validity errors concerning attributes *)
5576 | ERR_AT_LEAST_ONE what => ["At least one
",Item2String what,"must be specified
"]
5577 | ERR_AT_MOST_ONE what => ["Only one
",Item2String what,"may be specified
"]
5578 | ERR_ATT_IS_NOT(cs,what) => [quoteData cs,"is not
",AnItem2String what]
5579 | ERR_EXACTLY_ONE what => [toUpperFirst (AnItem2String what),"must be specified
"]
5580 | ERR_FIXED_VALUE(att,value,fixed) =>
5581 ["Attribute
",quoteData att,"has the value
",quoteVector value,
5582 "but was declared
with a fixed default value
of",quoteVector fixed]
5584 ["An ID attribute must have a default value
of #IMPLIED or #REQUIRED
"]
5585 | ERR_MISSING_ATT att =>
5586 ["No value was specified for required attribute
",quoteData att]
5587 | ERR_MULT_ID_ELEM elem =>
5588 ["Element
type",quoteData elem,"already has an ID attribute
"]
5589 | ERR_MUST_BE_AMONG (what,x,ys) =>
5590 [toUpperFirst (Item2String what),quoteData x,"is none
of",
5591 List2xString ("",",","") quoteData ys]
5592 | ERR_MUST_BE_UNPARSED (name,loc) =>
5593 [quoteData name,InLocation2String loc,"is not the name
of an unparsed entity
"]
5594 | ERR_REPEATED_ID name =>
5595 ["ID name
",quoteData name,"already occurred
as an attribute value
"]
5596 | ERR_UNDECL_ATT(att,elem) =>
5597 ["Attribute
",quoteData att,"was not declared for element
type",quoteData elem]
5598 | ERR_UNDECL_ID(name,refs) =>
5599 (if null refs then ["Reference to non
-existent ID
",quoteData name]
5600 else ["Reference to non
-existent ID
",quoteData name,
5601 "(also referenced at
",List2xString ("",", ",")") Position2String refs])
5603 (* validity errors concerning elements *)
5604 | ERR_BAD_ELEM (curr,elem) =>
5605 ["Element
type",quoteData elem,"not allowed at this point
",
5606 "in the content
of element
",quoteData curr]
5607 | ERR_ELEM_CONTENT what =>
5608 [toUpperFirst (AnItem2String what),"is not allowed
in element content
"]
5609 | ERR_EMPTY_TAG elem =>
5610 ["Empty
-element tag for element
type",quoteData elem,
5611 "whose content model requires non
-empty content
"]
5612 | ERR_ENDED_EARLY elem =>
5613 ["Element
",quoteData elem,"ended
before its content was completed
"]
5614 | ERR_MULT_MIXED elem =>
5615 ["Element
type",quoteData elem,"already occurred
in this mixed
-content declaration
"]
5616 | ERR_NONEMPTY elem =>
5617 ["The
end-tag for element
",quoteData elem,"with declared EMPTY content
",
5618 "must follow immediately after its start
-tag
"]
5619 | ERR_REDEC_ELEM elem => ["Element
type",quoteData elem,"was already declared
"]
5620 | ERR_ROOT_ELEM (dec,root) =>
5621 ["Document element
",quoteData root,"does not match the name
",
5622 quoteData dec,"in the document
type declaration
"]
5624 (* other validity errors *)
5625 | ERR_DECL_ENT_NESTING loc =>
5626 ["The first
and last character
of this
",Location2String loc,
5627 "are not
in the same entity replacement text
"]
5628 | ERR_EE_INT_SUBSET =>
5629 ["An entity
end is not allowed
in a declaration
in the internal subset
"]
5630 | ERR_GROUP_ENT_NESTING loc =>
5631 ["The opening
and closing parentheses
of this
",Location2String loc,
5632 "are not
in the same entity replacement text
"]
5634 ["There is no document
type declaration
. Switching to semi
-validating mode
",
5635 "(will not check for declaredness
of entities
, elements
, etc
.)"]
5636 | ERR_STANDALONE_DEF att =>
5637 ["Externally declared attribute
",quoteData att,"was defaulted
,",
5638 "although the standalone declaration is
",quoteString "yes
"]
5639 | ERR_STANDALONE_ELEM elem =>
5640 ["White space occurred
in the content
of externally declared
",
5641 "element
",quoteData elem,"with declared element content
",
5642 "although the standalone declaration is
",quoteString "yes
"]
5643 | ERR_STANDALONE_ENT(what,ent) =>
5644 ["Reference to externally declared
",EntityClass2String what,"entity
",
5645 quoteData ent^",","although the standalone declaration is
",quoteString "yes
"]
5646 | ERR_STANDALONE_NORM att =>
5647 ["The value for externally declared attribute
",
5648 quoteData att,"was changed
as a result
of normalization
,",
5649 "although the standalone declaration is
",quoteString "yes
"]
5650 | ERR_UNDECLARED (what,x,loc) =>
5651 ["Undeclared
",Item2String what,quoteData x,InLocation2String loc]
5653 (* miscellaneous errors *)
5654 | ERR_DECL_PREDEF(ent,def) =>
5655 ["General entity
",quoteData ent,"must be declared
as internal entity
",
5656 "with replacement text
",quoteVector def]
5657 | ERR_NO_SUCH_FILE(f,msg) => ["Could not
open file
",quoteString f,"("^msg^")"]
5658 | ERR_RESERVED(name,what) =>
5659 [quoteData name,"is reserved for standardization
and therefore not allowed
as",
5661 | ERR_VERSION version =>
5662 ["XML version
",quoteString version,"is not supported
"]
5664 ["Attribute
",quoteString "xml
:space
","must be given an enumeration
type",
5665 "with values
",quoteString "default
","and",quoteString "preserve
","only
"]
5667 (* compatibility errors *)
5668 | ERR_AMBIGUOUS(a,n1,n2) =>
5669 ["Content model is ambiguous
: conflict between the
",numberNth n1,
5670 "and the
",numberNth n2,"occurrence
of element
",quoteData a^".",
5671 "Using an approximation instead
"]
5672 | ERR_MUST_ESCAPE c => ["Character
",quoteChar c,"must be escaped for compatibility
"]
5674 (* interoperability errors *)
5675 | ERR_EMPTY_TAG_INTER elem =>
5676 ["Empty
-element tag for element
",quoteData elem,"with non
-EMPTY declared content
"]
5677 | ERR_MUST_BE_EMPTY elem =>
5678 ["An empty
-element tag must be used for element
type",
5679 quoteData elem,"with EMPTY declared content
"]
5681 (* decoding errors *)
5682 | ERR_DECODE_ERROR err => "Decoding error
:"::Decode.Error.decodeMessage err
5684 fun warningMessage warn =
5686 of WARN_NO_XML_DECL => ["Document entity has no XML declaration
"]
5688 | WARN_MULT_DECL(what,name) =>
5689 ["Repeated declaration for
",Item2String what,quoteData name]
5690 | WARN_SHOULD_DECLARE(ents) =>
5691 let val (one,more) = (hd ents,tl ents)
5693 of nil => ["The predefined entity
",quoteData one,"should have been declared
"]
5694 | _ => ["The predefined entities
",List2xString ("",", ","") quoteData more,
5695 "and",quoteData one,"should have been declared
"]
5698 | WARN_ATT_UNDEC_ELEM elem =>
5699 ["Attribute
-list declaration for undeclared element
type",quoteData elem]
5700 | WARN_MULT_ATT_DECL elem =>
5701 ["Repeated attribute
-list declaration for element
type",quoteData elem]
5702 | WARN_MULT_ATT_DEF(elem,att) =>
5703 ["Repeated definition
of attribute
",quoteData att,"for element
type",quoteData elem]
5704 | WARN_ENUM_ATTS(elem,names) =>
5705 ["The following name tokens occur more than once
in the enumerated attribute
",
5706 "types
of element
",quoteData elem^":",List2xString ("",", ","") quoteData names]
5708 | WARN_DFA_TOO_LARGE (elem,max) =>
5709 ["The finite state machine for the content model
of element
type",
5710 quoteData elem,"would have more than the maximal allowed number
of",
5711 Int2String max,"states
. Using an approximation instead
"]
5713 | WARN_NON_ASCII_URI c =>
5714 ["System identifier contains non
-ASCII character
",quoteChar c]
5717 (* stop of ../../Parser/Error/errorMessage.sml *)
5718 (* start of ../../Parser/Error/errorUtil.sml *)
5721 signature ErrorUtil =
5723 val isFatalError : ErrorData.Error -> bool
5724 val isDecodeError : ErrorData.Error -> bool
5725 val isSyntaxError : ErrorData.Error -> bool
5726 val isValidityError : ErrorData.Error -> bool
5727 val isWellFormedError : ErrorData.Error -> bool
5730 structure ErrorUtil : ErrorUtil =
5734 fun isDecodeError err =
5736 of ERR_DECODE_ERROR _ => true
5739 fun isSyntaxError err =
5741 of ERR_EMPTY _ => true
5742 | ERR_ENDED_BY_EE _ => true
5743 | ERR_EXPECTED _ => true
5744 | ERR_MISSING_WHITE => true
5745 | ERR_NON_XML_CHAR _ => true
5746 | ERR_NON_XML_CHARREF _ => true
5749 fun isWellFormedError err =
5751 of ERR_CANT_PARSE _ => true
5752 | ERR_ELEM_ENT_NESTING _ => true
5753 | ERR_ELEM_TYPE_MATCH _ => true
5754 | ERR_OMITTED_END_TAG _ => true
5755 | ERR_IGNORED_END_TAG _ => true
5756 | ERR_ENDED_IN_PROLOG => true
5757 | ERR_FORBIDDEN_HERE _ => true
5758 | ERR_ILLEGAL_ENTITY _ => true
5759 | ERR_MULTIPLE_DTD => true
5760 | ERR_MULT_ATT_SPEC _ => true
5761 | ERR_RECURSIVE_ENTITY _ => true
5762 | ERR_UNDEC_ENTITY _ => true
5763 | _ => isSyntaxError err
5765 fun isFatalError err =
5767 of ERR_NO_SUCH_FILE _ => true
5768 | _ => isWellFormedError err
5770 fun isValidityError err =
5772 of ERR_AT_LEAST_ONE _ => true
5773 | ERR_AT_MOST_ONE _ => true
5774 | ERR_ATT_IS_NOT _ => true
5775 | ERR_EXACTLY_ONE _ => true
5776 | ERR_FIXED_VALUE _ => true
5777 | ERR_ID_DEFAULT => true
5778 | ERR_MISSING_ATT _ => true
5779 | ERR_MULT_ID_ELEM _ => true
5780 | ERR_MUST_BE_AMONG _ => true
5781 | ERR_MUST_BE_UNPARSED _ => true
5782 | ERR_REPEATED_ID _ => true
5783 | ERR_UNDECL_ATT _ => true
5784 | ERR_UNDECL_ID _ => true
5785 | ERR_BAD_ELEM _ => true
5786 | ERR_ELEM_CONTENT _ => true
5787 | ERR_EMPTY_TAG _ => true
5788 | ERR_ENDED_EARLY _ => true
5789 | ERR_MULT_MIXED _ => true
5790 | ERR_NONEMPTY _ => true
5791 | ERR_REDEC_ELEM _ => true
5792 | ERR_ROOT_ELEM _ => true
5793 | ERR_DECL_ENT_NESTING _ => true
5794 | ERR_EE_INT_SUBSET => true
5795 | ERR_GROUP_ENT_NESTING _ => true
5796 | ERR_NO_DTD => true
5797 | ERR_STANDALONE_DEF _ => true
5798 | ERR_STANDALONE_ELEM _ => true
5799 | ERR_STANDALONE_ENT _ => true
5800 | ERR_STANDALONE_NORM _ => true
5801 | ERR_UNDECLARED _ => true
5804 (* stop of ../../Parser/Error/errorUtil.sml *)
5805 (* start of ../../Parser/Error/expected.sml *)
5810 structure Expected =
5813 open UniChar ErrorData
5815 val expAnElemName = [EXP_STRING "an element name
"]
5816 val expAnEntName = [EXP_STRING "an entity name
"]
5817 val expAName = [EXP_STRING "a name
"]
5818 val expANameToken = [EXP_STRING "a name token
"]
5819 val expANotName = [EXP_STRING "a notation name
"]
5820 val expATarget = [EXP_STRING "a target name
"]
5821 val expAttDefKey = [EXP_DATA (String2Data "REQUIRED
"),EXP_DATA (String2Data "IMPLIED
"),
5822 EXP_DATA (String2Data "FIXED
")]
5823 val expAttNameGt = [EXP_STRING "an attribute name
",EXP_CHAR 0wx3E]
5824 val expAttSTagEnd = [EXP_STRING "an attribute name
",EXP_CHAR 0wx3E,
5825 EXP_DATA(String2Data "/>")]
5826 val expAttType = [EXP_CHAR 0wx28,EXP_DATA (String2Data "CDATA
"),
5827 EXP_DATA (String2Data "ID
"),EXP_DATA (String2Data "IDREF
"),
5828 EXP_DATA (String2Data "IDREFS
"),EXP_DATA (String2Data "ENTITY
"),
5829 EXP_DATA (String2Data "ENTITIES
"),EXP_DATA (String2Data "NMTOKEN
"),
5830 EXP_DATA (String2Data "NMTOKENS
"),EXP_DATA (String2Data "NOTATION
")]
5831 val expBarRpar = [EXP_CHAR 0wx29,EXP_CHAR 0wx7C]
5832 val expCdata = [EXP_DATA (String2Data "CDATA
")]
5833 fun expConCRpar c = [EXP_CHAR 0wx29,EXP_CHAR c]
5834 val expConRpar = [EXP_CHAR 0wx29,EXP_CHAR 0wx2C,EXP_CHAR 0wx7C]
5835 val expCondStatus = [EXP_DATA (String2Data "IGNORE
"),EXP_DATA (String2Data "INCLUDE
")]
5836 val expContSpec = [EXP_CHAR 0wx28,EXP_DATA (String2Data "ANY
"),
5837 EXP_DATA (String2Data "EMPTY
")]
5838 val expElemLpar = [EXP_STRING "an element name
",EXP_CHAR 0wx28]
5839 val expEncStand = [EXP_DATA (String2Data "encoding
"),
5840 EXP_DATA (String2Data "standalone
")]
5841 val expDash = [EXP_CHAR 0wx2D]
5842 val expDashDocLbrk = [EXP_CHAR 0wx2D,EXP_CHAR 0wx5B,EXP_DATA (String2Data "DOCTYPE
")]
5843 val expDashLbrack = [EXP_CHAR 0wx2D,EXP_CHAR 0wx5B]
5844 val expDigitX = [EXP_STRING "a digit
",EXP_CHAR 0wx78]
5845 val expEncoding = [EXP_DATA (String2Data "encoding
")]
5846 val expEncVers = [EXP_DATA (String2Data "encoding
"),EXP_DATA (String2Data "version
")]
5847 val expEntNamePero = [EXP_STRING "an entity name
",EXP_CHAR 0wx25]
5848 val expEq = [EXP_CHAR 0wx3D]
5849 val expExclQuest = [EXP_CHAR 0wx21,EXP_CHAR 0wx3F]
5850 val expExtId = [EXP_DATA (String2Data "PUBLIC
"),EXP_DATA (String2Data "SYSTEM
")]
5851 val expGt = [EXP_CHAR 0wx3E]
5852 val expGtNdata = [EXP_CHAR 0wx3E,EXP_DATA (String2Data "NDATA
")]
5853 val expHexDigit = [EXP_STRING "a hexadecimal digit
"]
5854 val expInSubset = [EXP_CHAR 0wx3C,EXP_CHAR 0wx5D,EXP_CHAR 0wx25,
5855 EXP_STRING "white space
"]
5856 val expLbrack = [EXP_CHAR 0wx5B]
5857 val expLitQuote = [EXP_CHAR 0wx22,EXP_CHAR 0wx27]
5858 val expLitQuotExt = [EXP_CHAR 0wx22,EXP_CHAR 0wx27,
5859 EXP_DATA (String2Data "PUBLIC
"),EXP_DATA (String2Data "SYSTEM
")]
5860 val expLpar = [EXP_CHAR 0wx28]
5861 val expNoYes = [EXP_DATA (String2Data "no
"),EXP_DATA (String2Data "yes
")]
5862 val expPcdata = [EXP_DATA (String2Data "PCDATA
")]
5863 val expProcEnd = [EXP_DATA (String2Data "?
>")]
5864 val expQuoteRni = [EXP_CHAR 0wx22,EXP_CHAR 0wx27,EXP_CHAR 0wx23]
5865 val expRbrack = [EXP_CHAR 0wx5D]
5866 val expRep = [EXP_CHAR 0wx2A]
5867 val expSemi = [EXP_CHAR 0wx3B]
5868 val expStandOpt = [EXP_DATA (String2Data "standalone
"),EXP_DATA (String2Data "?
>")]
5869 val expStartEnc = [EXP_STRING "a letter
"]
5870 val expStartMarkup = [EXP_DATA (String2Data "--"),EXP_DATA (String2Data "ATTLIST
"),
5871 EXP_DATA (String2Data "ELEMENT
"),EXP_DATA (String2Data "ENTITY
"),
5872 EXP_DATA (String2Data "NOTATION
")]
5873 val expVersion = [EXP_DATA (String2Data "version
")]
5876 (* stop of ../../Parser/Error/expected.sml *)
5877 (* start of ../../Parser/Error/errors.sml *)
5882 ErrorData ErrorMessage ErrorString ErrorUtil Expected
5884 (* stop of ../../Parser/Error/errors.sml *)
5885 (* start of ../../Parser/Base/baseData.sml *)
5886 (*--------------------------------------------------------------------------*)
5887 (* Structure: BaseData *)
5888 (*--------------------------------------------------------------------------*)
5890 structure BaseData =
5894 (*--- external ids may have a public id and must have a system id ---*)
5895 (*--- for notations, however, also the system id can be optional ----*)
5896 datatype ExternalId =
5897 EXTID of (string * UniChar.Char) option * (Uri.Uri * Uri.Uri * UniChar.Char) option
5899 (*--- external ids may have a public id and must have a system id ---*)
5900 type NotationInfo = ExternalId option
5902 (*--- replacement of a general entity ---*)
5903 datatype GenEntity =
5905 | GE_INTERN of UniChar.Vector * UniChar.Vector
5906 | GE_EXTERN of ExternalId
5907 | GE_UNPARSED of ExternalId * int * Errors.Position
5908 type GenEntInfo = GenEntity * bool
5910 fun isExtGen (GE_EXTERN _) = true
5911 | isExtGen _ = false
5913 (*--- replacement of a parameter entity ---*)
5914 datatype ParEntity =
5916 | PE_INTERN of UniChar.Vector * UniChar.Vector
5917 | PE_EXTERN of ExternalId
5918 type ParEntInfo = ParEntity * bool
5920 fun isExtPar (PE_EXTERN _) = true
5921 | isExtPar _ = false
5923 (*--- declared type of an attribute ---*)
5933 | AT_GROUP of int list
5934 | AT_NOTATION of int list
5936 (*--- typed attribute value ---*)
5938 AV_CDATA of UniChar.Vector
5939 | AV_NMTOKEN of UniChar.Data
5940 | AV_NMTOKENS of UniChar.Data list
5943 | AV_IDREFS of int list
5945 | AV_ENTITIES of int list
5946 | AV_GROUP of int list * int
5947 | AV_NOTATION of int list * int
5949 fun isIdType at = at=AT_ID
5951 (*--- default values of attributes ---*)
5952 datatype AttDefault =
5955 | AD_DEFAULT of (UniChar.Vector * UniChar.Vector * AttValue option)
5956 * (Errors.Position * bool ref)
5957 | AD_FIXED of (UniChar.Vector * UniChar.Vector * AttValue option)
5958 * (Errors.Position * bool ref)
5960 (*--- attribute definition (list) ---*)
5961 (*--- the boolean says whether it was externally declared ---*)
5962 type AttDef = int * AttType * AttDefault * bool
5963 type AttDefList = AttDef list
5965 (*--- content specification ---*)
5966 fun defaultAttDef idx = (idx,AT_CDATA,AD_IMPLIED,false)
5968 (*--- content specification ---*)
5969 datatype ContentSpec =
5972 | CT_MIXED of int list
5973 | CT_ELEMENT of DfaData.ContentModel * DfaData.Dfa
5978 | CT_MIXED _ => true
5981 type ElemInfo = {decl : (ContentSpec * bool) option,
5982 atts : (AttDefList * bool) option,
5985 val nullElemInfo : ElemInfo = {decl=NONE,
5989 (*--------------------------------------------------------------------*)
5990 (* the id info tells whether an id value has occurred for a name and *)
5991 (* the list of all positions where it occurred as an idref value. *)
5992 (*--------------------------------------------------------------------*)
5993 type IdInfo = bool * Errors.Position list
5994 val nullIdInfo : IdInfo = (false,nil)
5996 (* stop of ../../Parser/Base/baseData.sml *)
5997 (* start of ../../Parser/Dfa/dfaString.sml *)
6003 (*--------------------------------------------------------------------------*)
6004 (* Structure: DfaString *)
6007 (* This structure is needed for debugging of content models and tables. *)
6013 (* Exceptions raised by functions in this structure: *)
6014 (* Table2String : none *)
6015 (* ContentModel2String : none *)
6016 (*--------------------------------------------------------------------------*)
6017 signature DfaString =
6019 val ContentModel2String : (int -> string) -> DfaData.ContentModel -> string
6020 val Dfa2String : (int -> string) -> DfaData.Dfa -> string
6023 structure DfaString : DfaString =
6025 open DfaBase UtilString
6027 fun State2String q = if q=dfaError then "Error
" else Int2String q
6029 fun Info2String Elem2String (q,mt,fst) = String.concat
6030 (State2String q::Bool2xString ("[empty
]","") mt
6031 ::map (fn (q,a) => " "^Elem2String a^"->"^State2String q) fst)
6033 fun ContentModel2String Elem2String cm =
6035 of CM_ELEM i => Elem2String i
6036 | CM_OPT cm => ContentModel2String Elem2String cm^"?
"
6037 | CM_REP cm => ContentModel2String Elem2String cm^"*"
6038 | CM_PLUS cm => ContentModel2String Elem2String cm^"+"
6039 | CM_ALT cms => List2xString ("(","|
",")") (ContentModel2String Elem2String) cms
6040 | CM_SEQ cms => List2xString ("(",",",")") (ContentModel2String Elem2String) cms
6042 fun CM2String Elem2String =
6043 let fun cm2s indent cm =
6045 of (ELEM a,info) => String.concat
6046 [indent,Elem2String a," ",Info2String Elem2String info,"\n"]
6047 | (OPT cm',info) => String.concat
6048 [indent,"?
",Info2String Elem2String info,"\n",cm2s (indent^" ") cm']
6049 | (REP cm',info) => String.concat
6050 [indent,"* ",Info2String Elem2String info,"\n",cm2s (indent^" ") cm']
6051 | (PLUS cm',info) => String.concat
6052 [indent,"+ ",Info2String Elem2String info,"\n",cm2s (indent^" ") cm']
6053 | (ALT cms,info) => String.concat
6054 (indent^"|
"::Info2String Elem2String info::"\n"
6055 ::map (cm2s (indent^" ")) cms)
6056 | (SEQ cms,info) => String.concat
6057 (indent^", "::Info2String Elem2String info::"\n"
6058 ::map (cm2s (indent^" ")) cms)
6062 fun Row2String Elem2String (lo,hi,tab,fin) =
6065 (fn (i,q,yet) => if q<0 then yet
6066 else " "::Elem2String (i+lo)::"->"::State2String q::yet)
6067 (if fin then [" [Final
]"] else nil)
6070 fun Dfa2String Elem2String tab =
6073 (fn (q,row,yet) => State2String q::":"::Row2String Elem2String row::yet)
6076 (* stop of ../../Parser/Dfa/dfaString.sml *)
6077 (* start of ../../Parser/Base/baseString.sml *)
6090 (*--------------------------------------------------------------------------*)
6091 (* Structure: BaseString *)
6099 (* Exceptions raised by functions in this structure: *)
6100 (* ElemInfo2xString : InternalError *)
6101 (* ExternalId2String : none *)
6102 (* GenEntity2xString : none *)
6103 (* Notation2String : none *)
6104 (* IdInfo2String : none *)
6105 (* ParEntity2String : none *)
6106 (*--------------------------------------------------------------------------*)
6107 signature BaseString =
6109 val ExternalId2String : BaseData.ExternalId -> string
6110 val NotationInfo2String : BaseData.NotationInfo -> string
6112 val GenEntity2xString : (int -> string) -> BaseData.GenEntity -> string
6113 val ParEntity2String : BaseData.ParEntity -> string
6115 val ElemInfo2xString : (int -> string) * (int -> string) * (int -> string)
6116 * (int -> string) * (int -> string) -> BaseData.ElemInfo -> string
6118 val IdInfo2String : BaseData.IdInfo -> string
6121 structure BaseString : BaseString =
6125 Errors UniChar DfaString
6128 val THIS_MODULE = "BaseString
"
6130 fun ExternalId2String (EXTID id) =
6132 of (SOME(p,pq),SOME(rel,s,sq)) => String.concat
6133 ["PUBLIC
",quoteUni pq p,
6134 " ",quoteUni sq (Uri2String rel),
6135 " @
",quoteUni sq (Uri2String s)]
6136 | (SOME(p,pq),NONE) => String.concat
6137 ["PUBLIC
",quoteUni pq p]
6138 | (NONE,SOME(rel,s,sq)) => String.concat
6139 ["SYSTEM
",quoteUni sq (Uri2String rel),
6140 " @
",quoteUni sq (Uri2String s)]
6141 | (NONE,NONE) => "<none
>"
6142 fun NotationInfo2String not =
6144 of NONE => "undeclared
"
6145 | SOME extId => ExternalId2String extId
6147 fun GenEntity2xString NotIdx2String ge =
6149 of GE_NULL => "NULL
"
6150 | GE_INTERN(lit,cv) => let val quote = Vector.sub(lit,0)
6151 in String.concat ["INTERN
",Vector2String lit,
6152 " - ",quoteVector quote cv]
6154 | GE_EXTERN id => "EXTERN
"^ExternalId2String id
6155 | GE_UNPARSED(id,not,_) => "UNPARSED
"^ExternalId2String id^" "^NotIdx2String not
6157 fun ParEntity2String pe =
6159 of PE_NULL => "NULL
"
6160 | PE_INTERN(lit,cv) => let val quote = Vector.sub(lit,0)
6161 in String.concat ["INTERN
",Vector2String lit,
6162 " - ",quoteVector quote cv]
6164 | PE_EXTERN id => "EXTERN
"^ExternalId2String id
6166 fun ContentSpec2String Elem2String cs =
6169 | CT_EMPTY => "EMPTY
"
6170 | CT_MIXED is => List2xString ("MIXED (","|
",")") Elem2String is
6171 | CT_ELEMENT(cm,_) => "ELEMENT
"^ContentModel2String Elem2String cm
6173 fun AttValue2xString (Att2String,Ent2String,Id2String,Not2String) quote av =
6174 quoteUni quote (case av
6175 of AV_CDATA buf => Vector2String buf
6176 | AV_NMTOKEN cs => Data2String cs
6177 | AV_NMTOKENS css => List2xString (""," ","") Data2String css
6178 | AV_ID idx => Id2String idx
6179 | AV_IDREF idx => Id2String idx
6180 | AV_IDREFS idxs => List2xString (""," ","") Id2String idxs
6181 | AV_ENTITY idx => Ent2String idx
6182 | AV_ENTITIES idxs => List2xString (""," ","") Ent2String idxs
6183 | AV_GROUP(_,idx) => Att2String idx
6184 | AV_NOTATION(_,idx) => Not2String idx)
6186 fun AttDefault2xString funs ad =
6188 of AD_DEFAULT ((lit,cv,av),_) =>
6189 let val quote = Vector.sub(lit,0)
6190 in String.concat [quoteVector quote cv," ",
6191 Option2String0 (AttValue2xString funs quote) av]
6193 | AD_FIXED ((lit,cv,av),_) =>
6194 let val quote = Vector.sub(lit,0)
6195 in String.concat ["#FIXED
",quoteVector quote cv," ",
6196 Option2String0 (AttValue2xString funs quote) av]
6198 | AD_IMPLIED => "#IMPLIED
"
6199 | AD_REQUIRED => "#REQUIRED
"
6201 fun AttType2xString (Att2String,Not2String) at =
6203 of AT_CDATA => "CDATA
"
6204 | AT_NMTOKEN => "NMTOKEN
"
6205 | AT_NMTOKENS => "NMTOKENS
"
6207 | AT_IDREF => "IDREF
"
6208 | AT_IDREFS => "IDREFS
"
6209 | AT_ENTITY => "ENTITY
"
6210 | AT_ENTITIES => "ENTITIES
"
6211 | AT_GROUP idxs => List2xString ("(","|
",")") Att2String idxs
6212 | AT_NOTATION idxs => List2xString ("NOTATION(","|
",")") Not2String idxs
6214 fun AttDef2xString (funs as (Att2String,_,_,Not2String)) (idx,attType,default,ext) =
6215 String.concat [Att2String idx," ",
6216 AttType2xString (Att2String,Not2String) attType," ",
6217 AttDefault2xString funs default,
6218 Bool2xString ("[external
]","") ext]
6220 fun AttDefList2xString funs adl = List2xString ("",",","") (AttDef2xString funs) adl
6222 fun ElemInfo2xString (Att2String,Elem2String,Ent2String,Id2String,Not2String)
6223 ({decl,atts,...}:ElemInfo) =
6224 let val dec = case decl
6225 of NONE => "elem undeclared
"
6226 | SOME(cont,ext) => String.concat
6227 ["elem declared
",if ext then "ex
" else "in","ternally
: ",
6228 ContentSpec2String Elem2String cont]
6230 of NONE => "no atts declared
"
6231 | SOME(defs,hadId) => String.concat
6232 ["atts were declared
",if hadId then "(has id attribute
): " else ": ",
6233 AttDefList2xString (Att2String,Ent2String,Id2String,Not2String) defs]
6237 fun IdInfo2String (decl,refs) =
6238 Bool2xString ("declared
","undeclared
") decl^"/"^
6239 (if null refs then "no references
"
6240 else List2xString ("references
: ",", ","") Position2String refs)
6243 (* stop of ../../Parser/Base/baseString.sml *)
6244 (* start of ../../Parser/Base/base.sml *)
6254 (* stop of ../../Parser/Base/base.sml *)
6255 (* start of ../../Parser/Params/dtd.sml *)
6256 (*--------------------------------------------------------------------------*)
6257 (* Structure: Dtd *)
6259 (* Exceptions raised by functions in this structure: *)
6260 (* AttNot2Index : none *)
6261 (* Element2Index : none *)
6262 (* GenEnt2Index : none *)
6263 (* Id2Index : none *)
6264 (* Index2AttNot : NoSuchIndex *)
6265 (* Index2Element : NoSuchIndex *)
6266 (* Index2GenEnt : NoSuchIndex *)
6267 (* Index2Id : NoSuchIndex *)
6268 (* Index2ParEnt : NoSuchIndex *)
6269 (* ParEnt2Index : none *)
6270 (* entitiesWellformed : none *)
6271 (* getElement : NoSuchIndex *)
6272 (* getGenEnt : NoSuchIndex *)
6273 (* getId : NoSuchIndex *)
6274 (* getNotation : NoSuchIndex *)
6275 (* getParEnt : NoSuchIndex *)
6276 (* hasNotation : NoSuchIndex *)
6277 (* initDtdTables : none *)
6278 (* maxUsedElem : none *)
6279 (* maxUsedId : none *)
6280 (* printAttNotTable : none *)
6281 (* printIdTable : none *)
6282 (* printParEntTable : none *)
6283 (* printxElementTable : none *)
6284 (* printxGenEntTable : none *)
6285 (* setElement : NoSuchIndex *)
6286 (* setGenEnt : NoSuchIndex *)
6287 (* setId : NoSuchIndex *)
6288 (* setNotation : NoSuchIndex *)
6289 (* setParEnt : NoSuchIndex *)
6290 (*--------------------------------------------------------------------------*)
6295 val hasDtd : Dtd -> bool
6296 val hasExternal : Dtd -> bool
6297 val standsAlone : Dtd -> bool
6299 val setHasDtd : Dtd -> unit
6300 val setExternal : Dtd -> unit
6301 val setStandAlone : Dtd -> bool -> unit
6303 val entitiesWellformed : Dtd -> bool
6305 val validPredef : int -> UniChar.Vector
6306 val isRedefined : Dtd -> int -> bool
6307 val setRedefined : Dtd -> int -> unit
6308 val notRedefined : Dtd -> UniChar.Data list
6310 val AttNot2Index : Dtd -> UniChar.Data -> int
6311 val Element2Index : Dtd -> UniChar.Data -> int
6312 val Id2Index : Dtd -> UniChar.Data -> int
6313 val GenEnt2Index : Dtd -> UniChar.Data -> int
6314 val ParEnt2Index : Dtd -> UniChar.Data -> int
6316 val Index2Element : Dtd -> int -> UniChar.Data
6317 val Index2Id : Dtd -> int -> UniChar.Data
6318 val Index2GenEnt : Dtd -> int -> UniChar.Data
6319 val Index2AttNot : Dtd -> int -> UniChar.Data
6320 val Index2ParEnt : Dtd -> int -> UniChar.Data
6322 val getId : Dtd -> int -> Base.IdInfo
6323 val getElement : Dtd -> int -> Base.ElemInfo
6324 val getGenEnt : Dtd -> int -> Base.GenEntInfo
6325 val getNotation : Dtd -> int -> Base.NotationInfo
6326 val getParEnt : Dtd -> int -> Base.ParEntInfo
6328 val hasNotation : Dtd -> int -> bool
6330 val setId : Dtd -> int * Base.IdInfo -> unit
6331 val setElement : Dtd -> int * Base.ElemInfo -> unit
6332 val setGenEnt : Dtd -> int * Base.GenEntInfo -> unit
6333 val setNotation : Dtd -> int * Base.ExternalId -> unit
6334 val setParEnt : Dtd -> int * Base.ParEntInfo -> unit
6336 val maxUsedId : Dtd -> int
6337 val maxUsedElem : Dtd -> int
6338 val maxUsedGen : Dtd -> int
6340 val initDtdTables : unit -> Dtd
6341 val printDtdTables : Dtd -> unit
6343 val printAttNotTable : Dtd -> unit
6344 val printIdTable : Dtd -> unit
6345 val printElementTable : Dtd -> unit
6346 val printGenEntTable : Dtd -> unit
6347 val printParEntTable : Dtd -> unit
6349 val defaultIdx : int
6350 val preserveIdx : int
6351 val xmlLangIdx : int
6352 val xmlSpaceIdx : int
6353 val xmlSpaceType : Base.AttType
6356 structure Dtd :> Dtd =
6363 val O_TS_ELEM = ref 6 (* Initial size of element table *)
6364 val O_TS_GEN_ENT = ref 6 (* Initial size of general entity table *)
6365 val O_TS_ID = ref 6 (* Initial size of id attribute table *)
6366 val O_TS_ATT_NOT = ref 6 (* Initial size of notation table *)
6367 val O_TS_PAR_ENT = ref 6 (* Initial size of parameter entity table *)
6369 (*--------------------------------------------------------------------*)
6370 (* this is how the predefined entities must be declared. *)
6371 (*--------------------------------------------------------------------*)
6372 val predefined = Vector.fromList
6373 (map (fn (x,y,z) => (String2Data x,String2Vector y,String2Vector z))
6375 ("amp
" ,"'&#
38;'","&#
38;"),
6376 ("lt
" ,"'&#
60;'","&#
60;"),
6377 ("gt
" ,"'&#
62;'","&#
62;"),
6378 ("apos
","\"'\"" ,"'" ),
6379 ("quot
","'\"'" ,"\"" )])
6380 fun validPredef i = #3(Vector.sub(predefined,i))
6382 (*--------------------------------------------------------------------*)
6383 (* this type holds all information relevent to the DTD. *)
6384 (*--------------------------------------------------------------------*)
6385 type Dtd = {hasDtdFlag : bool ref,
6386 standAloneFlag : bool ref,
6387 externalFlag : bool ref,
6388 elDict : ElemInfo DataDict.Dict,
6389 genDict : GenEntInfo DataDict.Dict,
6390 idDict : IdInfo DataDict.Dict,
6391 notDict : NotationInfo DataDict.Dict,
6392 parDict : ParEntInfo DataDict.Dict,
6393 preRedef : bool array
6396 fun newDtd() = {hasDtdFlag = ref false,
6397 standAloneFlag = ref false,
6398 externalFlag = ref false,
6399 elDict = nullDict ("element
",nullElemInfo),
6400 idDict = nullDict ("ID name
",nullIdInfo),
6401 genDict = nullDict ("general entity
",(GE_NULL,false)),
6402 notDict = nullDict ("attribute
and notation
",NONE:NotationInfo),
6403 parDict = nullDict ("parameter entity
",(PE_NULL,false)),
6404 preRedef = Array.array(6,false)
6407 val default = String2Data "default
"
6408 val preserve = String2Data "preserve
"
6409 val xmlLang = String2Data "xml
:lang
"
6410 val xmlSpace = String2Data "xml
:space
"
6412 (*--------------------------------------------------------------------*)
6413 (* standalone status, existance of a DTD and of external declarations *)
6414 (* externalFlag is true if there is an external subset or a (not nece-*)
6415 (* ssarily external) parameter entity reference in the DTD. (cf. 4.1) *)
6416 (*--------------------------------------------------------------------*)
6417 fun standsAlone (dtd:Dtd) = !(#standAloneFlag dtd)
6418 fun hasExternal (dtd:Dtd) = !(#externalFlag dtd)
6419 fun hasDtd (dtd:Dtd) = !(#hasDtdFlag dtd)
6421 fun setHasDtd (dtd:Dtd) = #hasDtdFlag dtd := true
6422 fun setExternal (dtd:Dtd) = #externalFlag dtd := true
6423 fun setStandAlone (dtd:Dtd) x = #standAloneFlag dtd := x
6426 (*--------------------------------------------------------------------*)
6428 (* Well-Formedness Constraint: Entity Declared *)
6429 (* In a document without any DTD, a document with only an internal *)
6430 (* DTD subset which contains no parameter entity references, or a *)
6431 (* document with "standalone
='yes
'", the Name given in the entity *)
6432 (* reference must match that in an entity declaration ... Note that *)
6433 (* if entities are declared in the external subset or in external *)
6434 (* parameter entities, a non-validating processor is not obligated *)
6435 (* to read and process their declarations; for such documents, the *)
6436 (* rule that an entity must be declared is a well-formedness *)
6437 (* constraint only if standalone='yes'. *)
6439 (* Thus a reference to an undeclared entity is a well-formedness *)
6440 (* error if either #hasDtdFlag or #externalFlag is false, or if *)
6441 (* #standaloneFlag is true *)
6442 (*--------------------------------------------------------------------*)
6443 (* bug fixed 080600: changed !hasDtdFlag to not(!hasDtdFlag) *)
6444 (*--------------------------------------------------------------------*)
6445 fun entitiesWellformed ({hasDtdFlag,standAloneFlag,externalFlag,...}:Dtd) =
6446 not (!hasDtdFlag andalso !externalFlag) orelse !standAloneFlag
6448 fun initStandalone ({hasDtdFlag,standAloneFlag,externalFlag,...}:Dtd) =
6449 (hasDtdFlag := false; standAloneFlag := false; externalFlag := false)
6451 (*--------------------------------------------------------------------*)
6452 (* this array tells whether the predefined entities (index 1-5) have *)
6453 (* been declared in the dtd. *)
6454 (*--------------------------------------------------------------------*)
6455 fun isRedefined (dtd:Dtd) i = Array.sub(#preRedef dtd,i)
6456 fun setRedefined (dtd:Dtd) i = Array.update(#preRedef dtd,i,true)
6457 fun notRedefined dtd = List.mapPartial
6458 (fn i => if isRedefined dtd i then NONE else SOME(#1(Vector.sub(predefined,i))))
6461 fun AttNot2Index (dtd:Dtd) name = getIndex(#notDict dtd,name)
6462 fun Element2Index (dtd:Dtd) name = getIndex(#elDict dtd,name)
6463 fun GenEnt2Index (dtd:Dtd) name = getIndex(#genDict dtd,name)
6464 fun Id2Index (dtd:Dtd) name = getIndex(#idDict dtd,name)
6465 fun ParEnt2Index (dtd:Dtd) name = getIndex(#parDict dtd,name)
6467 fun Index2AttNot (dtd:Dtd) idx = getKey(#notDict dtd,idx)
6468 fun Index2Element (dtd:Dtd) idx = getKey(#elDict dtd,idx)
6469 fun Index2GenEnt (dtd:Dtd) idx = getKey(#genDict dtd,idx)
6470 fun Index2Id (dtd:Dtd) idx = getKey(#idDict dtd,idx)
6471 fun Index2ParEnt (dtd:Dtd) idx = getKey(#parDict dtd,idx)
6473 fun getElement (dtd:Dtd) idx = getByIndex(#elDict dtd,idx)
6474 fun getGenEnt (dtd:Dtd) idx = getByIndex(#genDict dtd,idx)
6475 fun getId (dtd:Dtd) idx = getByIndex(#idDict dtd,idx)
6476 fun getNotation (dtd:Dtd) idx = getByIndex(#notDict dtd,idx)
6477 fun getParEnt (dtd:Dtd) idx = getByIndex(#parDict dtd,idx)
6479 fun hasNotation (dtd:Dtd) idx = isSome(getByIndex(#notDict dtd,idx))
6481 fun setElement (dtd:Dtd) (idx,el) = setByIndex(#elDict dtd,idx,el)
6482 fun setGenEnt (dtd:Dtd) (idx,ge) = setByIndex(#genDict dtd,idx,ge)
6483 fun setId (dtd:Dtd) (idx,a) = setByIndex(#idDict dtd,idx,a)
6484 fun setNotation (dtd:Dtd) (idx,nt) = setByIndex(#notDict dtd,idx,SOME nt)
6485 fun setParEnt (dtd:Dtd) (idx,pe) = setByIndex(#parDict dtd,idx,pe)
6487 fun maxUsedElem (dtd:Dtd) = usedIndices(#elDict dtd)-1
6488 fun maxUsedGen (dtd:Dtd) = usedIndices(#genDict dtd)-1
6489 fun maxUsedId (dtd:Dtd) = usedIndices(#idDict dtd)-1
6491 (*--------------------------------------------------------------------*)
6492 (* initialize the attribute tables. Make sure that indices 0...3 are *)
6493 (* assigned to "default
", "preserve
", "xml
:lang
" and "xml
:space
". *)
6494 (*--------------------------------------------------------------------*)
6495 fun initAttNotTable (dtd as {idDict,notDict,...}:Dtd) =
6497 val _ = clearDict(notDict,SOME(!O_TS_ATT_NOT))
6498 val _ = clearDict(idDict,SOME(!O_TS_ID))
6499 val _ = AttNot2Index dtd default
6500 val _ = AttNot2Index dtd preserve
6501 val _ = AttNot2Index dtd xmlLang
6502 val _ = AttNot2Index dtd xmlSpace
6505 fun initElementTable (dtd:Dtd) = clearDict(#elDict dtd,SOME(!O_TS_ELEM))
6506 (*--------------------------------------------------------------------*)
6507 (* reserve 0 for gen entity -, i.e., the document entity. *)
6508 (* reserve 1 for gen entity amp, i.e., "&#
38;#
38;" *)
6509 (* reserve 2 for gen entity lt, i.e., "&#
38;#
60;" *)
6510 (* reserve 3 for gen entity gt, i.e., "&#
62;" *)
6511 (* reserve 4 for gen entity apos, i.e., "&#
39;" *)
6512 (* reserve 5 for gen entity quot, i.e., "&#
34;" *)
6513 (* reserve 0 for par entity -, i.e., the external dtd subset. *)
6517 (* ... except that well-formed documents need not declare any of *)
6518 (* the following entities: amp, lt, gt, apos, quot. *)
6522 (* <!ENTITY lt "&#
38;#
60;"> *)
6523 (* <!ENTITY gt "&#
62;"> *)
6524 (* <!ENTITY amp "&#
38;#
38;"> *)
6525 (* <!ENTITY apos "&#
39;"> *)
6526 (* <!ENTITY quot "&#
34;"> *)
6527 (*--------------------------------------------------------------------*)
6528 fun initEntityTables (dtd as {genDict,parDict,preRedef,...}:Dtd) =
6530 val _ = clearDict(genDict,SOME(!O_TS_GEN_ENT))
6531 val _ = clearDict(parDict,SOME(!O_TS_PAR_ENT))
6532 val _ = map (fn i => Array.update(preRedef,i,false)) [1,2,3,4,5]
6533 val _ = GenEnt2Index dtd [0wx2D] (* "-" *)
6534 val _ = ParEnt2Index dtd [0wx2D] (* "-" *)
6536 (fn (_,(name,lit,cs))
6537 => (setGenEnt dtd (GenEnt2Index dtd name,(GE_INTERN(lit,cs),false))))
6542 fun initDtdTables() =
6545 val _ = initAttNotTable dtd
6546 val _ = initElementTable dtd
6547 val _ = initEntityTables dtd
6548 val _ = initStandalone dtd
6553 val dtd = initDtdTables()
6555 val defaultIdx = AttNot2Index dtd default
6556 val preserveIdx = AttNot2Index dtd preserve
6557 val xmlLangIdx = AttNot2Index dtd xmlLang
6558 val xmlSpaceIdx = AttNot2Index dtd xmlSpace
6559 val xmlSpaceType = AT_GROUP (IntLists.addIntList (preserveIdx,[defaultIdx]))
6562 fun printAttNotTable (dtd:Dtd) =
6563 printDict NotationInfo2String (#notDict dtd)
6564 fun printElementTable dtd =
6565 printDict (ElemInfo2xString (UniChar.Data2String o (Index2AttNot dtd),
6566 UniChar.Data2String o (Index2Element dtd),
6567 UniChar.Data2String o (Index2GenEnt dtd),
6568 UniChar.Data2String o (Index2Id dtd),
6569 UniChar.Data2String o (Index2AttNot dtd))) (#elDict dtd)
6570 fun printGenEntTable dtd =
6571 printDict (fn (ent,ext) => GenEntity2xString (Data2String o (Index2AttNot dtd)) ent
6572 ^(if ext then "[external
]" else "")) (#genDict dtd)
6573 fun printIdTable (dtd:Dtd) = printDict (IdInfo2String) (#idDict dtd)
6574 fun printParEntTable (dtd:Dtd) =
6575 printDict (fn (ent,ext) => ParEntity2String ent
6576 ^(if ext then "[external
]" else "")) (#parDict dtd)
6578 fun printDtdTables dtd = (printAttNotTable dtd;
6579 printElementTable dtd;
6580 printGenEntTable dtd;
6582 printParEntTable dtd)
6584 (* stop of ../../Parser/Params/dtd.sml *)
6585 (* start of ../../Parser/Params/hookData.sml *)
6586 structure HookData =
6588 type StartEnd = Errors.Position * Errors.Position
6589 (*--------------------------------------------------------------------*)
6590 (* a text declaration consists of a version info and an encoding decl.*)
6591 (* an xml declaration has an additional standalone decl. *)
6592 (*--------------------------------------------------------------------*)
6593 type TextDecl = string option * string option
6594 type XmlDecl = string option * string option * bool option
6596 type XmlInfo = Uri.Uri * Encoding.Encoding * XmlDecl option
6597 type ExtSubsetInfo = Uri.Uri * Encoding.Encoding * TextDecl option
6598 type SubsetInfo = Errors.Position
6599 type EndDtdInfo = Errors.Position
6601 type ErrorInfo = Errors.Position * Errors.Error
6602 type WarningInfo = Errors.Position * Errors.Warning
6603 type NoFileInfo = string * string
6605 type CommentInfo = StartEnd * UniChar.Vector
6606 type ProcInstInfo = StartEnd * UniChar.Data * Errors.Position * UniChar.Vector
6608 type DtdInfo = int * Base.ExternalId option
6610 datatype AttPresent =
6613 | AP_DEFAULT of UniChar.Vector * UniChar.Vector * Base.AttValue option
6614 | AP_PRESENT of UniChar.Vector * UniChar.Vector * Base.AttValue option
6616 type AttSpec = int * AttPresent * (UniChar.Data * UniChar.Data) option
6617 type AttSpecList = AttSpec list
6619 type EndTagInfo = StartEnd * int * (int * UniChar.Data) option
6620 type StartTagInfo = StartEnd * int * AttSpecList * UniChar.Data * bool
6621 type WhiteInfo = UniChar.Vector
6622 type CDataInfo = StartEnd * UniChar.Vector
6623 type DataInfo = StartEnd * UniChar.Vector * bool
6625 type CharRefInfo = StartEnd * UniChar.Char * UniChar.Vector
6626 type GenRefInfo = StartEnd * int * Base.GenEntity * bool
6627 type ParRefInfo = StartEnd * int * Base.ParEntity * bool
6628 type EntEndInfo = Errors.Position
6630 datatype MarkupDecl =
6631 DEC_ATTLIST of int * (int * Base.AttType * Base.AttDefault) list * bool
6632 | DEC_ELEMENT of int * Base.ContentSpec * bool
6633 | DEC_GEN_ENT of int * Base.GenEntity * bool
6634 | DEC_PAR_ENT of int * Base.ParEntity * bool
6635 | DEC_NOTATION of int * Base.ExternalId * bool
6636 type DeclInfo = StartEnd * MarkupDecl
6638 fun isExtDecl decl =
6640 of DEC_ATTLIST(_,_,ext) => ext
6641 | DEC_ELEMENT(_,_,ext) => ext
6642 | DEC_GEN_ENT(_,_,ext) => ext
6643 | DEC_PAR_ENT(_,_,ext) => ext
6644 | DEC_NOTATION(_,_,ext) => ext
6646 (* stop of ../../Parser/Params/hookData.sml *)
6647 (* start of ../../Parser/Params/hooks.sml *)
6656 val hookXml : AppData * HookData.XmlInfo -> AppData
6657 val hookFinish : AppData -> AppFinal
6659 val hookError : AppData * HookData.ErrorInfo -> AppData
6660 val hookWarning : AppData * HookData.WarningInfo -> AppData
6662 val hookProcInst : AppData * HookData.ProcInstInfo -> AppData
6663 val hookComment : AppData * HookData.CommentInfo -> AppData
6664 val hookWhite : AppData * HookData.WhiteInfo -> AppData
6665 val hookDecl : AppData * HookData.DeclInfo -> AppData
6667 val hookStartTag : AppData * HookData.StartTagInfo -> AppData
6668 val hookEndTag : AppData * HookData.EndTagInfo -> AppData
6669 val hookCData : AppData * HookData.CDataInfo -> AppData
6670 val hookData : AppData * HookData.DataInfo -> AppData
6672 val hookCharRef : AppData * HookData.CharRefInfo -> AppData
6673 val hookGenRef : AppData * HookData.GenRefInfo -> AppData
6674 val hookParRef : AppData * HookData.ParRefInfo -> AppData
6675 val hookEntEnd : AppData * HookData.EntEndInfo -> AppData
6677 val hookDocType : AppData * HookData.DtdInfo -> AppData
6678 val hookSubset : AppData * HookData.SubsetInfo -> AppData
6679 val hookExtSubset : AppData * HookData.ExtSubsetInfo -> AppData
6680 val hookEndDtd : AppData * HookData.EndDtdInfo -> AppData
6682 (* stop of ../../Parser/Params/hooks.sml *)
6683 (* start of ../../Parser/Params/resolve.sml *)
6690 val resolveExtId : Base.ExternalId -> Uri.Uri
6693 structure ResolveNull : Resolve =
6695 open Base Errors Uri
6697 fun resolveExtId (EXTID(_,sys)) =
6699 of NONE => raise NoSuchFile ("","Could not generate system identifier
")
6700 | SOME (base,file,_) => uriJoin(base,file)
6702 (* stop of ../../Parser/Params/resolve.sml *)
6703 (* start of ../../Parser/Dfa/dfaUtil.sml *)
6710 (*--------------------------------------------------------------------------*)
6711 (* Structure: DfaUtil *)
6717 (* Exceptions raised by functions in this structure: *)
6718 (* boundsFollow : none *)
6719 (* cmSymbols : none *)
6720 (* makeRow : none *)
6721 (* mergeFirst : ConflictFirst *)
6722 (* mergeFollow : ConflictFollow *)
6723 (*--------------------------------------------------------------------------*)
6726 val mergeFirst : bool -> DfaBase.First * DfaBase.First -> DfaBase.First
6727 val mergeFollow : bool -> DfaBase.Follow * DfaBase.Follow -> DfaBase.Follow
6728 val boundsFollow : DfaBase.Follow -> DfaBase.Sigma * DfaBase.Sigma
6729 val cmSymbols : DfaBase.ContentModel -> DfaBase.Sigma list
6730 val makeRow : DfaBase.Follow * bool -> DfaBase.Row
6733 structure DfaUtil : DfaUtil =
6735 open UtilInt DfaBase
6737 (*--------------------------------------------------------------------*)
6738 (* merge two First sets, raise ConflictFirst at conflict: there may *)
6739 (* not be two entries (q1,a) and (q2,a) in the same First set, if *)
6740 (* nondet is false. *)
6741 (*--------------------------------------------------------------------*)
6742 fun mergeFirst nondet ll =
6744 fun go_det (nil,l) = l
6745 | go_det (l,nil) = l
6746 | go_det (l1 as (x1 as (q1,a1))::r1,l2 as (x2 as (q2,a2))::r2) =
6747 case Int.compare(a1,a2)
6748 of LESS => x1::go_det(r1,l2)
6749 | GREATER => x2::go_det(l1,r2)
6750 | EQUAL => raise ConflictFirst(a1,q1,q2)
6752 fun go_nondet (nil,l) = l
6753 | go_nondet (l,nil) = l
6754 | go_nondet (l1 as (x1 as (q1,a1))::r1,l2 as (x2 as (q2,a2))::r2) =
6755 case Int.compare(a1,a2)
6756 of LESS => x1::go_nondet(r1,l2)
6757 | GREATER => x2::go_nondet(l1,r2)
6758 | EQUAL => case Int.compare(q1,q2)
6759 of LESS => x1::go_nondet(r1,l2)
6760 | GREATER => x2::go_nondet(l1,r2)
6761 | EQUAL => go_nondet(l1,r2)
6763 if nondet then go_nondet ll else go_det ll
6766 (*--------------------------------------------------------------------*)
6767 (* merge two Follow sets, raise ConflictFollow at conflict. there may *)
6768 (* not be two entries (q1,a) and (q2,a) with q1<>q2 in the same Follow*)
6769 (* set, if nondet is false. Note that, e.g. for (a+)+, Follow(a) = *)
6770 (* Follow(a+) U First(a+), so duplicate occurrences of the same (q,a) *)
6771 (* are possible (as opposed to First). *)
6772 (*--------------------------------------------------------------------*)
6773 fun mergeFollow nondet ll =
6775 fun go_det (nil,l) = l
6776 | go_det (l,nil) = l
6777 | go_det (l1 as (x1 as (q1,a1))::r1,l2 as (x2 as (q2,a2))::r2) =
6778 case Int.compare(a1,a2)
6779 of LESS => x1::go_det(r1,l2)
6780 | GREATER => x2::go_det(l1,r2)
6781 | EQUAL => if q1=q2 then go_det(l1,r2)
6782 else raise ConflictFollow(a1,q1,q2)
6784 fun go_nondet (nil,l) = l
6785 | go_nondet (l,nil) = l
6786 | go_nondet (l1 as (x1 as (q1,a1))::r1,l2 as (x2 as (q2,a2))::r2) =
6787 case Int.compare(a1,a2)
6788 of LESS => x1::go_nondet(r1,l2)
6789 | GREATER => x2::go_nondet(l1,r2)
6790 | EQUAL => case Int.compare(q1,q2)
6791 of LESS => x1::go_nondet(r1,l2)
6792 | GREATER => x2::go_nondet(l1,r2)
6793 | EQUAL => go_nondet(l1,r2)
6795 if nondet then go_nondet ll else go_det ll
6798 (*--------------------------------------------------------------------*)
6799 (* what are the least and largest symbol occurring in a Follow set? *)
6800 (*--------------------------------------------------------------------*)
6801 fun boundsFollow (nil:Follow) = (1,0)
6802 | boundsFollow [(q,a)] = (a,a)
6803 | boundsFollow ((q,a)::xs) = (a,#2(List.last xs))
6805 (*--------------------------------------------------------------------*)
6806 (* return the list of all symbols occurring in a content model. *)
6807 (*--------------------------------------------------------------------*)
6812 of CM_ELEM a => insertInt(a,yet)
6813 | CM_OPT cm => do_cm(cm,yet)
6814 | CM_REP cm => do_cm(cm,yet)
6815 | CM_PLUS cm => do_cm(cm,yet)
6816 | CM_ALT cms => foldr do_cm yet cms
6817 | CM_SEQ cms => foldr do_cm yet cms
6821 (*--------------------------------------------------------------------*)
6822 (* given the follow set and the final flag, make a row in the dfa. *)
6823 (*--------------------------------------------------------------------*)
6824 fun makeRow (flw,fin) =
6826 val (lo,hi) = boundsFollow flw
6827 val tab = Array.array(hi-lo+1,dfaError)
6828 val _ = app (fn (q,a) => Array.update (tab,a-lo,q)) flw
6830 (lo,hi,Array.extract (tab,0,NONE),fin)
6834 (* stop of ../../Parser/Dfa/dfaUtil.sml *)
6835 (* start of ../../Util/intSets.sml *)
6847 val emptyIntSet : IntSet
6848 val singleIntSet : int -> IntSet
6849 val fullIntSet : int -> IntSet
6851 val isEmptyIntSet : IntSet -> bool
6852 val inIntSet : int * IntSet -> bool
6854 val compareIntSets: IntSet * IntSet -> order
6855 val hashIntSet : IntSet -> word
6857 val addIntSet : int * IntSet -> IntSet
6858 val delIntSet : int * IntSet -> IntSet
6860 val cupIntSets : IntSet * IntSet -> IntSet
6861 val capIntSets : IntSet * IntSet -> IntSet
6862 val diffIntSets : IntSet * IntSet -> IntSet
6864 val IntSet2List : IntSet -> int list
6865 val IntList2Set : int list -> IntSet
6868 structure IntSets : IntSets =
6870 structure W = Word32
6871 val wordSize = W.wordSize
6873 type IntSet = W.word vector
6885 fun normalize (vec:IntSet) =
6886 let val max = Vector.foldli
6887 (fn (i,w,max) => if w=0wx0 then i else max) 0 (vec,0,NONE)
6888 in Vector.extract (vec,0,SOME max)
6891 val emptyIntSet = Vector.fromList nil : IntSet
6893 fun fullIntSet n = let val size = (n+wordSize-1) div wordSize
6894 val full = 0w0-0w1:W.word
6895 val bits = (n-1) mod wordSize+1
6896 val last = full >> (Word.fromInt (wordSize-bits))
6897 in Vector.tabulate(n div wordSize+1,
6898 fn i => if i<size-1 then full else last):IntSet
6901 fun singleIntSet n =
6903 val idx = n div wordSize
6904 val mask = 0w1 << (Word.fromInt (n mod wordSize))
6906 Vector.tabulate(idx+1,fn i => if i=idx then mask else 0w0):IntSet
6909 fun isEmptyIntSet vec = Vector.length vec=0
6911 fun inIntSet(n,vec) =
6912 let val idx = n div wordSize
6913 in if idx>=Vector.length vec then false
6914 else let val mask = 0w1 << (Word.fromInt (n mod wordSize))
6915 in Vector.sub(vec,idx) && mask <> 0w0
6919 fun addIntSet(n,vec) =
6921 val idx = n div wordSize
6922 val mask = 0w1 << (Word.fromInt (n mod wordSize))
6923 val size = Vector.length vec
6926 then Vector.mapi (fn (i,x) => if i=idx then x||mask else x) (vec,0,NONE)
6927 else Vector.tabulate
6928 (idx+1,fn i => if i<size then Vector.sub(vec,i) else if i=idx then mask else 0w0)
6931 fun delIntSet(n,vec) =
6933 val idx = n div wordSize
6934 val size = Vector.length vec
6935 val vec1 = if size<=idx then vec
6936 else let val mask = !! (0w1 << (Word.fromInt (n mod wordSize)))
6938 (fn (i,x) => if i=idx then x && mask else x) (vec,0,NONE)
6943 fun capIntSets(vec1,vec2) =
6945 val l12 = Int.min(Vector.length vec1,Vector.length vec2)
6946 val v12 = Vector.tabulate(l12,fn i => Vector.sub(vec1,i) && Vector.sub(vec2,i))
6951 fun cupIntSets(vec1,vec2) =
6953 val (l1,l2) = (Vector.length vec1,Vector.length vec2)
6954 val (shorter,longer,v) = if l1<=l2 then (l1,l2,vec2) else (l2,l1,vec1)
6956 Vector.tabulate (longer,fn i => if i>=shorter then Vector.sub(v,i)
6957 else Vector.sub(vec1,i) || Vector.sub(vec2,i))
6960 fun diffIntSets(vec1,vec2) =
6962 val (l1,l2) = (Vector.length vec1,Vector.length vec2)
6963 val vec1 = Vector.tabulate
6964 (l1,fn i => if i>=l2 then Vector.sub(vec1,i)
6965 else Vector.sub(vec1,i) && !!(Vector.sub(vec2,i)))
6969 fun IntList2Set l = List.foldl addIntSet emptyIntSet l
6971 fun IntSet2List vec =
6973 val size = Vector.length vec
6974 fun doOne (w,off,yet) =
6975 let fun doit (i,mask) =
6976 if i=wordSize then yet
6977 else if w&&mask=0w0 then doit(i+1,mask<<0wx1)
6978 else (off+i)::doit(i+1,mask<<0wx1)
6981 fun doAll i = if i>=size then nil
6982 else doOne(Vector.sub(vec,i),wordSize*i,(doAll (i+1)))
6986 fun compareIntSets (vec1,vec2:IntSet) =
6988 val (l1,l2) = (Vector.length vec1,Vector.length vec2)
6989 val (l12,ifEq) = case Int.compare(l1,l2)
6990 of LESS => (l1,LESS)
6991 | order => (l2,order)
6992 fun doit i = if i>=l12 then ifEq
6993 else case W.compare(Vector.sub(vec1,i),Vector.sub(vec2,i))
6994 of EQUAL => doit (i+1)
6999 val intShift = case Int.precision
7001 | SOME x => Word.fromInt(Int.max(wordSize-x+1,0))
7003 fun hashIntSet vec =
7004 case Vector.length vec
7006 | 1 => Word.fromInt(W.toInt(W.>>(Vector.sub(vec,0),intShift)))
7007 | l => Word.fromInt(W.toInt(W.>>(Vector.sub(vec,0)+Vector.sub(vec,l-1),intShift)))
7009 (* stop of ../../Util/intSets.sml *)
7010 (* start of ../../Util/SymDict/intSetDict.sml *)
7018 structure KeyIntSet : Key =
7020 open IntSets UtilString
7024 val null = emptyIntSet
7025 val hash = hashIntSet
7026 val compare = compareIntSets
7027 val toString = (List2xString ("{",",","}") Int2String) o IntSet2List
7030 structure IntSetDict = Dict (structure Key = KeyIntSet)
7031 structure IntSetSymTab = SymTable (structure Key = KeyIntSet)
7034 (* stop of ../../Util/SymDict/intSetDict.sml *)
7035 (* start of ../../Parser/Dfa/dfaPassThree.sml *)
7047 (*--------------------------------------------------------------------------*)
7048 (* Structure: DfaPassThree *)
7057 (* Exceptions raised by functions in this structure: *)
7058 (* passThree : TooLarge *)
7059 (*--------------------------------------------------------------------------*)
7060 signature DfaPassThree =
7062 val passThree: bool -> (DfaBase.Follow * bool) vector -> DfaBase.Dfa
7065 functor DfaPassThree (structure DfaOptions : DfaOptions) : DfaPassThree =
7068 IntSets IntSetDict DfaBase DfaOptions DfaUtil
7070 (*--------------------------------------------------------------------*)
7071 (* do the subset construction. *)
7072 (*--------------------------------------------------------------------*)
7073 (* given an automaton (Q,q0,F,delta), the subset automaton is *)
7074 (* (Q',q0',F',delta') with: *)
7077 (* - F' = {S | S cap F <> empty} *)
7078 (* - delta'(S,a) = {p | (q,a,p) in delta, q in S} *)
7079 (*--------------------------------------------------------------------*)
7082 (* the new start state is the singleton of the old start state *)
7083 val sNull = singleIntSet 0
7085 (* create a dictionary for the subsets, make sNull get index 0 *)
7086 val tau = makeDict("",!O_DFA_INITIAL_WIDTH,(nil:Follow,false))
7087 val pInitial = getIndex(tau,sNull)
7089 (* enter a new set state. raise DfaTooLarge if the new state *)
7090 (* would have a too large index *)
7092 let val (max,i) = (!O_DFA_MAX_STATES,getIndex(tau,s))
7093 in if max>i then i else raise DfaTooLarge max
7096 (* compute the follow set for a set state from the follow sets *)
7097 (* of its members *)
7098 fun makeFollow NONE nil = nil
7099 | makeFollow (SOME(s,a)) nil = [(makeState s,a)]
7100 | makeFollow NONE ((q,a)::qas) = makeFollow (SOME(singleIntSet q,a)) qas
7101 | makeFollow (SOME(s,a)) ((q,b)::qas) =
7102 if a=b then makeFollow (SOME(addIntSet(q,s),a)) qas
7103 else (makeState s,a)::makeFollow (SOME(singleIntSet q,b)) qas
7105 (* continue until all entries in the state dictionary are done -*)
7107 if i>=usedIndices tau then i
7108 else let val sI = getKey(tau,i)
7109 val lI = IntSet2List sI
7110 val ffs = map (fn j => Vector.sub(tab,j)) lI
7111 val (followJs,finI) = foldl
7112 (fn ((flwJ,finJ),(flw,fin)) => (mergeFollow true (flwJ,flw),
7115 val followI = makeFollow NONE followJs
7116 val _ = setByIndex(tau,i,(followI,finI))
7121 in (* finally create a vector holding the new follow/fin pairs *)
7122 Vector.tabulate (size,fn i => getByIndex(tau,i))
7125 (*--------------------------------------------------------------------*)
7126 (* given a vector of Follow and boolean final condition, make a dfa *)
7127 (* out of it. if the first arg is true, then the content model was *)
7128 (* ambiguous; in this case we must first apply a subset construction *)
7129 (* in order to obtain a deterministic finite machine. *)
7130 (*--------------------------------------------------------------------*)
7131 fun passThree nondet tab =
7133 val det = if nondet then makeDet tab else tab
7134 in Vector.map makeRow det
7137 (* stop of ../../Parser/Dfa/dfaPassThree.sml *)
7138 (* start of ../../Parser/Dfa/dfaError.sml *)
7141 (*--------------------------------------------------------------------------*)
7142 (* Structure: DfaError *)
7145 (* The function in this structure is for producing good error messages *)
7146 (* for ambiguous content models. It numbers the nodes of a cm exactly *)
7147 (* like passOne does, but counts the occurrences of symbol a in order to *)
7148 (* indicate which are in conflict. It is only executed in case of error. *)
7153 (* Exceptions raised by functions in this structure: *)
7154 (* countOccs : none *)
7155 (*--------------------------------------------------------------------------*)
7156 signature DfaError =
7158 val countOccs : DfaBase.Sigma * DfaBase.State * DfaBase.State
7159 -> DfaBase.ContentModel -> DfaBase.Sigma * int * int
7162 structure DfaError : DfaError =
7166 fun countOccs (a,q1,q2) cm =
7168 val (q1,q2) = if q1>q2 then (q2,q1) else (q1,q2)
7170 fun next a nil = (1,[(a,2)])
7171 | next a ((b,n)::rest) =
7172 if a=b then (n,(b,n+1)::rest)
7173 else if a<b then (1,(a,2)::(b,n)::rest)
7174 else let val (m,new) = next a rest
7178 fun insert a (q,yet,n1,n2) =
7179 let val (n,new) = next a yet
7180 in (q+1,new,if q=q1 then n else n1,if q=q2 then n else n2)
7185 of CM_ELEM a => insert a yet
7186 | CM_OPT cmi => doit (cmi,yet)
7187 | CM_REP cmi => doit (cmi,yet)
7188 | CM_PLUS cmi => doit (cmi,yet)
7189 | CM_ALT cmis => foldl doit yet cmis
7190 | CM_SEQ cmis => foldl doit yet cmis
7192 val (_,_,n1,n2) = doit (cm,(1,nil,0,0))
7197 (* stop of ../../Parser/Dfa/dfaError.sml *)
7198 (* start of ../../Parser/Dfa/dfaPassOne.sml *)
7202 (*--------------------------------------------------------------------------*)
7203 (* Structure: DfaPassOne *)
7209 (* Exceptions raised by functions in this structure: *)
7210 (* passOne : ConflictFirst *)
7211 (*--------------------------------------------------------------------------*)
7212 signature DfaPassOne =
7214 val passOne : bool -> DfaBase.ContentModel -> DfaBase.CM
7217 structure DfaPassOne : DfaPassOne =
7219 open DfaBase DfaUtil
7221 (*--------------------------------------------------------------------*)
7222 (* Given a content model, number the leafs, compute Empty and First *)
7223 (* for each node, and construct a corresponding CM annotated with *)
7224 (* these informations. *)
7227 (* The leafs are numbered in left-to-right, depth-first order, *)
7228 (* starting with 1 (0 will be the start state of the DFA). *)
7230 (* Empty a = false *)
7231 (* Empty e? = Empty e* = true *)
7232 (* Empty e+ = Empty e *)
7233 (* Empty e1|...|eN = Empty e1 \/ ... \/ Empty eN *)
7234 (* Empty e1,...,eN = Empty e1 /\ ... /\ Empty eN *)
7236 (* First a = {q,a}, where q is the number of this leaf. *)
7237 (* First e? = First e* = First e+ = First e *)
7238 (* First e1|...|eN = First e1 ++ ... ++ First eN *)
7239 (* First e1,...,eN = ++{First eI | Empty eJ=False forall j<i} *)
7241 (* F1++F2 = F1 U F2, if a2<>a1 forall (q1,a1) in F1, (q1,a1) in F1} *)
7242 (* error, if exist (q1,a) in F1, (q2,a) in F2 *)
7243 (* then raise ConflictFirst(a,q1,q2) *)
7244 (*--------------------------------------------------------------------*)
7245 fun passOne nondet cm =
7247 fun und(a,b) = a andalso b
7248 fun oder(a,b) = a orelse b
7250 fun op_fst_seq (fst,fsts,mt) = if mt then mergeFirst nondet (fst,fsts) else fst
7251 fun op_fst_or (fst,fsts,_) = mergeFirst nondet (fst,fsts)
7255 of CM_ELEM a => (ELEM a,(q+1,false,[(q+1,a)]))
7256 | CM_OPT cm => let val cmi as (_,(q1,_,fst)) = do_cm cm q
7257 in (OPT cmi,(q1,true,fst))
7259 | CM_REP cm => let val cmi as (_,(q1,_,fst)) = do_cm cm q
7260 in (REP cmi,(q1,true,fst))
7262 | CM_PLUS cm => let val cmi as (_,info1) = do_cm cm q
7265 | CM_ALT cms => do_cms (ALT,false,oder,op_fst_or) cms q
7266 | CM_SEQ cms => do_cms (SEQ,true,und,op_fst_seq) cms q
7268 and do_cms(con,null_mt,op_mt,op_fst) cms q =
7270 fun doit [] q = ([],(q,null_mt,[]))
7271 | doit (cm::cms) q =
7273 val cmi as (_,(q1,mt1,fst1)) = do_cm cm q
7274 val (cmis,(q2,mt2,fst2)) = doit cms q1
7275 in (cmi::cmis,(q2,op_mt(mt1,mt2),op_fst(fst1,fst2,mt1)))
7277 val (cmis,info1) = doit cms q
7284 (* stop of ../../Parser/Dfa/dfaPassOne.sml *)
7285 (* start of ../../Parser/Dfa/dfaPassTwo.sml *)
7291 (*--------------------------------------------------------------------------*)
7292 (* Structure: DfaPassTwo *)
7298 (* Exceptions raised by functions in this structure: *)
7299 (* passTwo : ConflictFollow *)
7300 (*--------------------------------------------------------------------------*)
7301 signature DfaPassTwo =
7303 val passTwo: bool -> DfaBase.CM -> (DfaBase.Follow * bool) vector
7306 structure DfaPassTwo : DfaPassTwo =
7308 open DfaBase DfaUtil
7310 (*--------------------------------------------------------------------*)
7311 (* Given a CM annotated with leaf numbers (states), Empty and First, *)
7312 (* compute Follow and Fin foreach node, and generate the transition *)
7313 (* row if node is a leaf. Follow and Fin are computed top-down: *)
7316 (* Follow e = {}, Fin e = true *)
7319 (* Follow e1 = Follow e, Fin e1 = Fin e *)
7321 (* (e=e1*, e=e1+) *)
7322 (* Follow e1 = Follow e1 ++ First e1, Fin e1 = Fin e *)
7324 (* (e=e1|...|eN) = *)
7325 (* Follow eI = Follow e, Fin eI = Fin e for i=0...n *)
7327 (* (e=e1,...,eN) = *)
7328 (* Follow eN = Follow e, Fin eN = Fin e *)
7329 (* Follow eI = First eI+1, if Empty eI+1 = false, i<n *)
7330 (* First eI+1 ++ Follow eI+1, if Empty eI+1 = true, i<n *)
7331 (* Fin eI = false, if Empty eI+1 = false, i<n *)
7332 (* Fin eI+1, if Empty eI+1 = true, i<n *)
7334 (* F1++F2 = F1 U F2, if a2<>a1 forall (q1,a1) in F1, (q1,a1) in F1} *)
7335 (* error, if exist (q1,a) in F1, (q2,a) in F2 *)
7336 (* then raise ConflictFirst(a,q1,q2) *)
7337 (*--------------------------------------------------------------------*)
7338 fun passTwo nondet (cmi as (_,(n,mt,fst))) =
7340 val table = Array.array(n+1,(nil,false))
7342 val _ = Array.update(table,0,(fst,mt))
7344 fun do_cm (ff as (flw,fin)) (cm,(q,mt,fst)) =
7346 of ELEM a => Array.update(table,q,ff)
7347 | OPT cmi => do_cm ff cmi
7348 | REP cmi => do_cm (mergeFollow nondet (fst,flw),fin) cmi
7349 | PLUS cmi => do_cm (mergeFollow nondet (fst,flw),fin) cmi
7350 | ALT cmis => app (do_cm ff) cmis
7351 | SEQ cmis => ignore (do_seq ff cmis)
7352 and do_seq ff cmis = foldr
7353 (fn (cmi as (_,(_,mt,fst)),ff as (flw,fin))
7355 if mt then (mergeFollow nondet (fst,flw),fin) else (fst,false)))
7358 val _ = do_cm (nil,true) cmi
7360 in Array.extract (table,0,NONE)
7363 (* stop of ../../Parser/Dfa/dfaPassTwo.sml *)
7364 (* start of ../../Parser/Dfa/dfa.sml *)
7375 (*--------------------------------------------------------------------------*)
7376 (* Structure: Dfa *)
7386 (* Exceptions raised by functions in this structure: *)
7387 (* ContentModel2String : none *)
7388 (* dfaFinal : none *)
7389 (* dfaTrans : none *)
7390 (* makeAmbiguous : DfaTooLarge *)
7391 (* makeChoiceDfa : none *)
7392 (* makeDfa : Ambiguous *)
7393 (* Dfa2String : none *)
7394 (*--------------------------------------------------------------------------*)
7399 val dfaError : DfaState
7400 val dfaInitial : DfaState
7402 exception DfaTooLarge of int
7403 exception Ambiguous of int * int * int
7405 val emptyDfa : DfaData.Dfa
7407 val makeDfa : DfaData.ContentModel -> DfaData.Dfa
7408 val makeAmbiguous : DfaData.ContentModel -> DfaData.Dfa
7409 val makeChoiceDfa : DfaData.ContentModel -> DfaData.Dfa
7411 val dfaFinal : DfaData.Dfa * DfaState -> bool
7412 val dfaTrans : DfaData.Dfa * DfaState * int -> DfaState
7415 functor Dfa (structure DfaOptions : DfaOptions) : Dfa =
7417 structure DfaPassThree = DfaPassThree (structure DfaOptions = DfaOptions)
7420 DfaBase DfaError DfaPassOne DfaPassTwo DfaString DfaUtil
7422 type DfaState = State
7424 (*--------------------------------------------------------------------*)
7425 (* Create a dfa for the content model (a1|...|aN)*, where a1,...,aN *)
7426 (* are the symbols occurring in the input dfa. *)
7427 (*--------------------------------------------------------------------*)
7428 fun makeChoiceDfa cm =
7430 val syms = cmSymbols cm
7431 val flw = map (fn a => (dfaInitial,a)) syms
7433 Vector.fromList [makeRow(flw,true)]
7436 (*--------------------------------------------------------------------*)
7437 (* create a dfa for an ambiguous content model. Raise DfaTooLarge if *)
7438 (* the subset construction yields too many states. *)
7439 (*--------------------------------------------------------------------*)
7440 fun makeAmbiguous cm =
7442 val cmi = DfaPassOne.passOne true cm
7443 val tab = DfaPassTwo.passTwo true cmi
7444 val dfa = DfaPassThree.passThree true tab
7448 (*--------------------------------------------------------------------*)
7449 (* generate a dfa for a content model. Raise Ambiguous if the content *)
7450 (* model is ambiguous. *)
7451 (*--------------------------------------------------------------------*)
7454 val cmi = DfaPassOne.passOne false cm
7455 val tab = DfaPassTwo.passTwo false cmi
7456 val dfa = DfaPassThree.passThree false tab
7459 handle ConflictFirst aqq => raise Ambiguous (countOccs aqq cm)
7460 | ConflictFollow aqq => raise Ambiguous (countOccs aqq cm)
7462 (*--------------------------------------------------------------------*)
7463 (* make one transitions in the dfa. *)
7464 (*--------------------------------------------------------------------*)
7465 fun dfaTrans(tab,q,a) =
7466 if q<0 then dfaDontCare
7467 else let val (lo,hi,tab,_) = Vector.sub(tab,q)
7468 in if a>=lo andalso a<=hi then Vector.sub(tab,a-lo) else dfaError
7471 (*--------------------------------------------------------------------*)
7472 (* check whether a dfa's state is an accepting state. *)
7473 (*--------------------------------------------------------------------*)
7474 fun dfaFinal (tab,q) =
7475 q<0 orelse #4(Vector.sub(tab,q):Row)
7477 (* stop of ../../Parser/Dfa/dfa.sml *)
7478 (* start of ../../Parser/entities.sml *)
7479 (*--------------------------------------------------------------------------*)
7480 (* Structure: Entities *)
7482 (* Exceptions raised by functions in this structure: *)
7483 (* closeAll : none *)
7484 (* getChar : none *)
7485 (* getEntId : none *)
7487 (* inInternalSubset : none *)
7488 (* isOpenEntity : none *)
7489 (* isSpecialEnd : none *)
7490 (* Position2String : none *)
7491 (* pushDummy : none *)
7492 (* pushExtern : NoSuchFile *)
7493 (* pushIntern : none *)
7494 (* pushSpecial : NoSuchFile *)
7495 (* statePos : none *)
7496 (*--------------------------------------------------------------------------*)
7497 (* This module maintains the entity stack. For each open entity it holds a *)
7498 (* buffer to read characters from. When the buffer is exceeded, it gets re- *)
7499 (* filled with new characters, depending on the entity's encoding. *)
7501 (* End-of-line handling as specified in 2.11 is performed: *)
7503 (* ... To simplify the tasks of applications, wherever an external parsed *)
7504 (* entity or the literal entity value of an internal parsed entity *)
7505 (* contains either the literal two-character sequence "#xD#xA
" or a *)
7506 (* standalone literal #xD, an XML processor must pass to the application *)
7507 (* the single character #xA. *)
7508 (* (This behavior can conveniently be produced by normalizing all line *)
7509 (* breaks to #xA on input, before parsing.) *)
7511 (* It also checks for illegal characters, cf. 2.2: *)
7513 (* [2] Char ::= #x9 | #xA | #xD /* any Unicode character, *)
7514 (* | [#x20-#xD7FF] excluding the surrogate *)
7515 (* | [#xE000-#xFFFD] blocks, FFFE, and FFFF. */ *)
7516 (* | [#x10000-#x10FFFF] *)
7518 (* More precisely, it assumes that all decoded characters are valid Unicode *)
7519 (* characters. It thus only checks for control characters other than #x9, *)
7521 (*--------------------------------------------------------------------------*)
7522 signature Entities =
7528 datatype Special = DOC_ENTITY | EXT_SUBSET
7530 exception CantOpenFile of (string * string) * AppData
7532 val pushIntern : State * int * bool * UniChar.Vector -> State
7533 val pushExtern : State * int * bool * Uri.Uri -> State * Encoding.Encoding
7534 val pushSpecial : Special * Uri.Uri option -> State * Encoding.Encoding
7536 val closeAll : State -> unit
7538 val commitAuto : AppData * State -> AppData * State
7539 val changeAuto : AppData * State * string -> AppData * State * Encoding.Encoding
7541 val getEntId : State -> EntId
7542 val getPos : State -> Errors.Position
7543 val getUri : State -> Uri.Uri
7545 val getChar : AppData * State -> UniChar.Char * AppData * State
7546 val ungetChars : State * UniChar.Data -> State
7548 val isOpen : int * bool * State -> bool
7549 val isSpecial : State -> bool
7550 val inDocEntity : State -> bool
7553 functor Entities (structure Hooks : Hooks) : Entities =
7556 UniChar Decode Decode.Error Errors Hooks Uri UtilError
7558 val THIS_MODULE = "Entities
"
7560 type CharBuffer = UniChar.Char array
7562 (*--------------------------------------------------------------------*)
7563 (* A special entity can not be popped from the stack by getChar, so *)
7564 (* it must be popped explicitly. This is for the document entity and *)
7565 (* the external subset. *)
7566 (*--------------------------------------------------------------------*)
7567 datatype Special = DOC_ENTITY | EXT_SUBSET
7568 (*--------------------------------------------------------------------*)
7569 (* In order to distinguish a general entity from a paramter entity, *)
7570 (* entity idxs are marked with this datatype. *)
7571 (*--------------------------------------------------------------------*)
7572 datatype EntId = GENERAL of int | PARAMETER of int
7574 (*--------------------------------------------------------------------*)
7575 (* Make an EntId from the entity's index. *)
7576 (*--------------------------------------------------------------------*)
7577 fun makeEntId(idx,isParam) =
7578 if isParam then PARAMETER idx else GENERAL idx
7580 (*--------------------------------------------------------------------*)
7581 (* A non-empty stack is: *)
7583 (* an internal entity INT(buf,size,idx,(id,other)): *)
7584 (* - (vec,idx,size) is a buffer,current index and its size; *)
7585 (* - id is the index of the entity's name in the entity table. *)
7586 (* - other contains the underlying entities (the rest of the stack). *)
7587 (* The components are nested according to access frequency. *)
7589 (* an external entity has three forms: *)
7590 (* EXT2(buf,size,idx,line,col,break,(dec,err,typ)) *)
7591 (* - (buf,size,idx) is a buffer, its size and current index; *)
7592 (* - (line,col) are the line and column; *)
7593 (* - break is a boolean indicating whether the last character was a *)
7594 (* carriage return (0xD) (then a succeeding line feed (0xA) must be *)
7596 (* - err is an option: if it is SOME(f,ee,err) then it indicates that *)
7597 (* the array was finished by a decoding error err, with the basic *)
7598 (* file f; f was at end of file if ee is true. Otherwise there was *)
7599 (* no error when loading the array. *)
7600 (* - dec describies the encoding of the entity and thus, how more *)
7601 (* data can be loaded; *)
7602 (* - typ is either of the form SPECIAL spec indicating a special *)
7603 (* entity; then this is the only entity on the stack. Otherwise it *)
7604 (* is NORMAL(id,other) for a normal external entity, with: *)
7605 (* + id is the index of the entity's name in the DTD; *)
7606 (* + other is the underlying stack. *)
7607 (* The components are nested according to access frequency. *)
7609 (* The second form of an external entity is *)
7610 (* EXT1(dec,line,col,break,typ). This is an unbuffered *)
7611 (* entity whose encoding declaration is being read. We may not load *)
7612 (* an array of characters as a whole because the encoding might still *)
7613 (* change. The components have the same meaning as for EXT2. *)
7615 (* A closed entity remains on the stack until the next getChar, for *)
7616 (* purposes of error printing. A closed external entity has the form *)
7617 (* CLOSED(dec,l,col,typ); components have the same meaning *)
7618 (* as for open external entities. A closed internal entity has the *)
7619 (* form ENDED(id,other) with components as above. *)
7621 (* Sometimes (for parsing xml/decl declarations) we need a lookahead. *)
7622 (* LOOKING(cs,q) is a state remembering all chars cs looked ahead up *)
7623 (* to state q, in reverse order. LOOKED(cs,q) is an undone lookahead, *)
7624 (* the looked-ahead chars now in the right order. *)
7625 (*--------------------------------------------------------------------*)
7626 datatype ExtType = SPECIAL of Special | NORMAL of EntId * State
7628 LOOKED of Data * State
7629 | ENDED of EntId * State
7630 | CLOSED of DecFile * int * int * ExtType
7631 | INT of Vector * int * int * (EntId * State)
7632 | EXT1 of DecFile * int * int * bool * ExtType
7633 | EXT2 of CharBuffer * int * int * int * int * bool
7634 * (DecFile * DecodeError option * ExtType)
7636 exception CantOpenFile of (string * string) * AppData
7638 (*--------------------------------------------------------------------*)
7639 (* Extract the unique number from a state. *)
7640 (*--------------------------------------------------------------------*)
7641 fun getExtEntId extType =
7643 of SPECIAL DOC_ENTITY => GENERAL 0
7644 | SPECIAL EXT_SUBSET => PARAMETER 0
7645 | NORMAL(id,_) => id
7648 of LOOKED (_,q) => getEntId q
7650 | CLOSED(_,_,_,extType) => getExtEntId extType
7651 | INT(_,_,_,(id,_)) => id
7652 | EXT1(_,_,_,_,extType) => getExtEntId extType
7653 | EXT2(_,_,_,_,_,_,(_,_,extType)) => getExtEntId extType
7655 (*--------------------------------------------------------------------*)
7656 (* Find the nearest enclosing external entity, and return its *)
7657 (* filename, line and column number. *)
7658 (*--------------------------------------------------------------------*)
7661 of ENDED(_,other) => getPos other
7662 | INT(_,_,_,(_,other)) => getPos other
7663 | CLOSED(dec,l,col,_) => (decName dec,l,col)
7664 | EXT1(dec,l,col,_,_) => (decName dec,l,col)
7665 | EXT2(_,_,_,l,col,_,(dec,_,_)) => (decName dec,l,col)
7666 | LOOKED (cs,q) => let val (f,l,c) = getPos q
7668 in if c>=k then (f,l,c-k) else (f,l,0)
7671 (*--------------------------------------------------------------------*)
7672 (* get the path of the nearest enclosing external entity. *)
7673 (*--------------------------------------------------------------------*)
7676 of LOOKED (_,q) => getUri q
7677 | ENDED(_,other) => getUri other
7678 | INT(_,_,_,(_,other)) => getUri other
7679 | CLOSED(dec,l,col,_) => decUri dec
7680 | EXT1(dec,l,col,_,_) => decUri dec
7681 | EXT2(_,_,_,l,col,_,(dec,_,_)) => decUri dec
7683 (*--------------------------------------------------------------------*)
7684 (* close all files, return nothing. *)
7685 (*--------------------------------------------------------------------*)
7688 of LOOKED(_,other) => closeAll other
7689 | ENDED(_,other) => closeAll other
7690 | CLOSED(_,_,_,SPECIAL _) => ()
7691 | CLOSED(_,_,_,NORMAL(_,other)) => closeAll other
7692 | INT(_,_,_,(_,other)) => closeAll other
7693 | EXT1(dec,_,_,_,SPECIAL _) => ignore(decClose dec)
7694 | EXT1(dec,_,_,_,NORMAL(_,other)) => (decClose dec; closeAll other)
7695 | EXT2(_,_,_,_,_,_,(dec,_,SPECIAL _)) => ignore(decClose dec)
7696 | EXT2(_,_,_,_,_,_,(dec,_,NORMAL(_,other))) => (decClose dec; closeAll other)
7698 (*--------------------------------------------------------------------*)
7699 (* is this entity already on the stack? *)
7700 (*--------------------------------------------------------------------*)
7701 fun isOpen (idx,isParam,q) =
7702 let val id = makeEntId(idx,isParam)
7705 of LOOKED (_,other) => doit other
7706 | ENDED(id',other) => id=id' orelse doit other
7707 | CLOSED(_,_,_,SPECIAL _) => false
7708 | CLOSED(_,_,_,NORMAL(id',other)) => id=id' orelse doit other
7709 | INT(_,_,_,(id',other)) => id=id' orelse doit other
7710 | EXT1(_,_,_,_,SPECIAL _) => false
7711 | EXT1(_,_,_,_,NORMAL(id',other)) => id=id' orelse doit other
7712 | EXT2(_,_,_,_,_,_,(_,_,SPECIAL _)) => false
7713 | EXT2(_,_,_,_,_,_,(_,_,NORMAL(id',other))) => id=id' orelse doit other
7717 (*--------------------------------------------------------------------*)
7718 (* are we in the internal subset, i.e., in the document entity? *)
7719 (* The internal subset can only be in the document entity, since no *)
7720 (* parameter entities are declared prior to it. The document entity *)
7721 (* is then the only entity on the stack. *)
7722 (*--------------------------------------------------------------------*)
7725 of LOOKED (_,q) => inDocEntity q
7726 | ENDED(_,other) => inDocEntity other
7727 | INT(_,_,_,(_,other)) => inDocEntity other
7728 | CLOSED(_,_,_,NORMAL _) => false
7729 | CLOSED(_,_,_,SPECIAL what) => what=DOC_ENTITY
7730 | EXT1(_,_,_,_,NORMAL _) => false
7731 | EXT1(_,_,_,_,SPECIAL what) => what=DOC_ENTITY
7732 | EXT2(_,_,_,_,_,_,(_,_,NORMAL _)) => false
7733 | EXT2(_,_,_,_,_,_,(_,_,SPECIAL what)) => what=DOC_ENTITY
7735 (*--------------------------------------------------------------------*)
7736 (* is this state the document end, i.e., are all entities closed? *)
7737 (*--------------------------------------------------------------------*)
7740 of LOOKED (_,q) => isSpecial q
7741 | CLOSED(_,_,_,SPECIAL _) => true
7742 | EXT1(_,_,_,_,SPECIAL _) => true
7743 | EXT2(_,_,_,_,_,_,(_,_,SPECIAL _)) => true
7746 (*--------------------------------------------------------------------*)
7747 (* Initialize and load a new buffer when opening an external entity. *)
7748 (*--------------------------------------------------------------------*)
7751 val arr = Array.array(BUFSIZE,0wx0)
7752 val (n,dec1,err) = decGetArray dec arr
7756 (*--------------------------------------------------------------------*)
7757 (* Open an external/internal entity. *)
7758 (*--------------------------------------------------------------------*)
7759 fun pushIntern(q,id,isParam,vec) =
7760 INT(vec,Vector.length vec,0,(makeEntId(id,isParam),q))
7761 fun pushExtern(q,id,isParam,uri) =
7763 val dec = decOpenXml (SOME uri)
7764 val auto = decEncoding dec
7765 val q1 = EXT1(dec,1,0,false,NORMAL(makeEntId(id,isParam),q))
7768 fun pushSpecial(what,uri) =
7770 val dec = decOpenXml uri
7771 val auto = decEncoding dec
7772 val q = EXT1(dec,1,0,false,SPECIAL what)
7776 (*--------------------------------------------------------------------*)
7777 (* confirm the autodetected encoding of an external entity. *)
7778 (*--------------------------------------------------------------------*)
7779 fun commitAuto(a,q) =
7781 of EXT1(dec,l,col,brk,typ) =>
7783 val a1 = a before decCommit dec
7784 handle DecError(_,_,err)
7785 => hookError(a,(getPos q,ERR_DECODE_ERROR err))
7786 val (arr,n,dec1,err) = initArray dec
7787 in (a1,EXT2(arr,n,0,l,col,brk,(dec1,err,typ)))
7790 in (a1,EXT1(dec,l,col,brk,typ))
7793 | LOOKED(cs,q1) => let val (a1,q2) = commitAuto (a,q1)
7794 in (a1,LOOKED(cs,q2))
7797 | _ => raise InternalError(THIS_MODULE,"commitAuto
",
7798 "entity is neither EXT1 nor CLOSED nor LOOKED
")
7800 (*--------------------------------------------------------------------*)
7801 (* change from the autodetected encoding to the declared one. *)
7802 (*--------------------------------------------------------------------*)
7803 fun changeAuto (a,q,decl) =
7805 of EXT1(dec,l,col,brk,typ) =>
7807 val dec1 = decSwitch(dec,decl)
7808 handle DecError(dec,_,err)
7809 => let val a1 = hookError(a,(getPos q,ERR_DECODE_ERROR err))
7810 val _ = decClose dec
7811 val uri = decName dec
7813 of ERR_UNSUPPORTED_ENC _ => "Unsupported encoding
"
7814 | _ => "Declared encoding incompatible
"
7815 ^"with auto
-detected encoding
"
7816 in raise CantOpenFile ((uri,msg),a1)
7818 val newEnc = decEncoding dec1
7819 val (arr,n,dec2,err) = initArray dec1
7820 in (a,EXT2(arr,n,0,l,col,brk,(dec2,err,typ)),newEnc)
7823 in (a,EXT1(dec1,l,col,brk,typ),newEnc)
7827 | LOOKED(cs,q1) => let val (a2,q2,enc2) = changeAuto(a,q1,decl)
7828 in (a2,LOOKED(cs,q2),enc2)
7830 | CLOSED(dec,_,_,_) => (a,q,decEncoding dec)
7831 | _ => raise InternalError(THIS_MODULE,"changeAuto
",
7832 "entity is neither EXT1 nor CLOSED nor LOOKED
")
7834 (*--------------------------------------------------------------------*)
7835 (* Get one character from the current entity. Possibly reload buffer. *)
7836 (* Return 0wx0 at entity end. Otherwise check whether the character *)
7837 (* is valid (cf. 2.2). If the last character was a carriage return *)
7838 (* (0xD) supress a line feed (0xA). *)
7839 (*--------------------------------------------------------------------*)
7842 of ENDED(_,other) => getChar(a,other)
7843 | CLOSED(_,_,_,typ) =>
7845 of SPECIAL _ => raise InternalError (THIS_MODULE,"getChar
",
7846 "attempt to read beyond special entity
end")
7847 | NORMAL(_,other) => getChar(a,other))
7848 | INT(vec,s,i,io) =>
7849 if i>=s then (0wx0,a,ENDED io)
7850 else (Vector.sub(vec,i),a,INT(vec,s,i+1,io))
7851 | EXT1(dec,l,col,br,typ) =>
7853 val (c,dec1) = decGetChar dec
7855 if (* c>=0wx20 orelse c=0wx09 *)
7858 orelse c>=0wxE000 andalso (c<=0wxFFFD
7859 orelse c>=0wx10000))
7861 then (c,a,EXT1(dec1,l,col+1,false,typ))
7863 then if br then getChar(a,EXT1(dec1,l,col,false,typ))
7864 else (c,a,EXT1(dec1,l+1,0,false,typ))
7865 else (if c=0wxD then (0wxA,a,EXT1(dec1,l+1,0,true,typ))
7866 else let val a1 = hookError(a,(getPos q,ERR_NON_XML_CHAR c))
7867 in getChar(a1,EXT1(dec1,l,col+1,false,typ))
7870 handle DecEof dec => (0wx0,a,CLOSED(dec,l,col,typ))
7871 | DecError(dec,eof,err) =>
7872 let val err = ERR_DECODE_ERROR err
7873 val a1 = hookError(a,(getPos q,err))
7874 in if eof then (0wx0,a,CLOSED(dec,l,col,typ))
7875 else getChar(a1,EXT1(dec,col,l,br,typ))
7877 | EXT2(arr,s,i,l,col,br,det) =>
7879 then let val c = Array.sub(arr,i)
7880 in if (* c>=0wx20 orelse c=0wx09 *)
7881 (* c>=0wx0020 andalso c<=0wxD7FF orelse c=0wx9 orelse *)
7882 (* c>=0wxE000 andalso c<=0wxFFFD orelse c>=0wx10000 *)
7885 orelse c>=0wxE000 andalso (c<=0wxFFFD
7886 orelse c>=0wx10000))
7888 then (c,a,EXT2(arr,s,i+1,l,col+1,false,det))
7890 then if br then getChar(a,EXT2(arr,s,i+1,l,col,false,det))
7891 else (c,a,EXT2(arr,s,i+1,l+1,0,false,det))
7892 else (if c=0wxD then (0wxA,a,EXT2(arr,s,i+1,l+1,0,true,det))
7893 else let val a1 = hookError(a,(getPos q,ERR_NON_XML_CHAR c))
7894 in getChar(a1,EXT2(arr,s,i+1,l,col+1,false,det))
7897 else let val (dec,err,typ) = det
7898 val (a1,(n,dec1,err1)) =
7900 of NONE => if s=BUFSIZE then (a,decGetArray dec arr)
7901 else (a,(0,dec,NONE))
7902 | SOME err => (hookError(a,(getPos q,ERR_DECODE_ERROR err)),
7903 decGetArray dec arr)
7904 in if n=0 andalso not (isSome err1)
7905 then (0wx0,a1,CLOSED(dec1,l,col,typ))
7906 else getChar(a1,EXT2(arr,n,0,l,col,br,(dec1,err1,typ)))
7908 | LOOKED(nil,q) => getChar(a,q)
7909 | LOOKED(c::cs,q) => (c,a,LOOKED(cs,q))
7911 (*--------------------------------------------------------------------*)
7912 (* unget a list of characters. *)
7913 (*--------------------------------------------------------------------*)
7914 fun ungetChars (q,cs) = LOOKED(cs,q)
7916 (* stop of ../../Parser/entities.sml *)
7917 (* start of ../../Parser/Dtd/dtdDeclare.sml *)
7918 (*--------------------------------------------------------------------------*)
7919 (* Structure: DtdDeclare *)
7921 (*--------------------------------------------------------------------------*)
7922 (* Functor: DtdDeclare *)
7923 (*--------------------------------------------------------------------------*)
7924 (* This module provides functions for adding declarations to the DTD tables *)
7925 (* and for doing checks on components of declarations. *)
7926 (*--------------------------------------------------------------------------*)
7927 functor DtdDeclare (structure Dtd : Dtd
7928 structure Entities : Entities
7929 structure ParserOptions : ParserOptions) =
7933 Base Dtd Errors Entities ParserOptions UniChar UniClasses
7935 (*--------------------------------------------------------------------*)
7936 (* check whether a sequence a chars is the b-adic representation of a *)
7937 (* character's code, terminated by ";". base will be 10 or 16, isBase *)
7938 (* will check for a character being a decimal/hexadecimal number. *)
7939 (*--------------------------------------------------------------------*)
7940 fun checkBasimal (base,baseValue) (ch:Char,cs) =
7941 let fun doit _ (nil:Data) = false
7942 | doit yet [0wx3B] = yet=ch
7943 | doit yet (c::cs) = case baseValue c
7945 | SOME v => doit (base*yet+v) cs
7948 val checkDecimal = checkBasimal (0w10,decValue)
7949 val checkHeximal = checkBasimal (0wx10,hexValue)
7951 (*--------------------------------------------------------------------*)
7952 (* check a character reference for identifying a character. *)
7953 (*--------------------------------------------------------------------*)
7954 fun checkRef (ch,0wx26::0wx23::0wx78::cs) (* "&#x
..." *) = checkHeximal(ch,cs)
7955 | checkRef (ch,0wx26::0wx23::cs) (* "&#
..." *) = checkDecimal(ch,cs)
7956 | checkRef _ = false
7958 (*--------------------------------------------------------------------*)
7959 (* check for a single character ch. *)
7960 (*--------------------------------------------------------------------*)
7961 fun checkSingle (ch,[c]) = c=ch
7962 | checkSingle _ = false
7964 (*--------------------------------------------------------------------*)
7965 (* check a predefined entity for being well defined. Note that both *)
7966 (* a single char and a char ref representation are allowed, except *)
7967 (* for 'amp' which must be escaped. *)
7968 (*--------------------------------------------------------------------*)
7969 fun checkPredef (idx,cs) =
7971 of 1 => checkRef(0wx26,cs)
7972 | 2 => checkSingle(0wx3C,cs) orelse checkRef(0wx3C,cs)
7973 | 3 => checkSingle(0wx3E,cs) orelse checkRef(0wx3E,cs)
7974 | 4 => checkSingle(0wx27,cs) orelse checkRef(0wx27,cs)
7975 | 5 => checkSingle(0wx22,cs) orelse checkRef(0wx22,cs)
7978 (*--------------------------------------------------------------------*)
7979 (* Given the declaration of an entity check whether it is predefined. *)
7980 (* If no return false. If yes, check whether is was already declared *)
7981 (* and whether it is correctly declared. See 4.6: *)
7983 (* All XML processors must recognize these entities whether they *)
7984 (* are declared or not. For interoperability, valid XML documents *)
7985 (* should declare these entities, like any others, before using *)
7986 (* them. If the entities in question are declared, they must be *)
7987 (* declared as internal entities whose replacement text is the *)
7988 (* single character being escaped or a character reference to that *)
7989 (* character, as shown below. *)
7991 (* <!ENTITY lt "&#
38;#
60;"> *)
7992 (* <!ENTITY gt "&#
62;"> *)
7993 (* <!ENTITY amp "&#
38;#
38;"> *)
7994 (* <!ENTITY apos "&#
39;"> *)
7995 (* <!ENTITY quot "&#
34;"> *)
7997 (* Note that the < and & characters in the declarations of "lt
" and *)
7998 (* "amp
" are doubly escaped to meet the requirement that entity *)
7999 (* replacement be well-formed. *)
8001 (* print an error if the entity was already declared. *)
8002 (* print an error if the declaration is not correct. *)
8003 (*--------------------------------------------------------------------*)
8004 fun checkPredefined dtd (a,q) (idx,ent) =
8005 if !O_VALIDATE andalso idx>=1 andalso idx<=5 then
8007 val a1 = if !O_WARN_MULT_ENT_DECL andalso isRedefined dtd idx
8008 then let val warn = WARN_MULT_DECL(IT_GEN_ENT,Index2GenEnt dtd idx)
8009 in hookWarning(a,(getPos q,warn))
8011 else a before setRedefined dtd idx
8013 if !O_CHECK_PREDEFINED then
8016 of GE_INTERN(_,rep) => checkPredef (idx,Vector2Data rep)
8018 in if correct then a1
8019 else let val err = ERR_DECL_PREDEF(Index2GenEnt dtd idx,validPredef idx)
8020 in hookError(a1,(getPos q,err))
8028 (*--------------------------------------------------------------------*)
8029 (* add an entity declaration to the DTD tables. 4.2 *)
8031 (* ... If the same entity is declared more than once, the first *)
8032 (* declaration encountered is binding; at user option, an XML *)
8033 (* processor may issue a warning if entities are declared multiple *)
8036 (* For general entities, check whether it is a predefined entity and *)
8037 (* if so, whether it is declared correctly. *)
8038 (*--------------------------------------------------------------------*)
8039 (* print a warning and ignore the declaration if the notation was *)
8040 (* declared previously. *)
8041 (*--------------------------------------------------------------------*)
8042 fun addGenEnt dtd (a,q) (idx,ent,ext) =
8043 case getGenEnt dtd idx
8044 of (GE_NULL,_) => a before setGenEnt dtd (idx,(ent,ext))
8045 | _ => let val (pre,a1) = checkPredefined dtd (a,q) (idx,ent)
8046 in if pre orelse not (!O_WARN_MULT_ENT_DECL) then a1
8047 else hookWarning(a1,(getPos q,WARN_MULT_DECL
8048 (IT_GEN_ENT,Index2GenEnt dtd idx)))
8051 fun addParEnt dtd (a,q) (idx,ent,ext) =
8052 case getParEnt dtd idx
8053 of (PE_NULL,_) => a before setParEnt dtd (idx,(ent,ext))
8054 | _ => if !O_WARN_MULT_ENT_DECL
8055 then hookWarning(a,(getPos q,WARN_MULT_DECL
8056 (IT_PAR_ENT,Index2ParEnt dtd idx)))
8059 (*--------------------------------------------------------------------*)
8060 (* at option print a warning if not all predefined entities have been *)
8061 (* declared. Cf. 4.1: *)
8063 (* For interoperability, valid documents should declare the *)
8064 (* entities amp, lt, gt, apos, quot, in the form specified in *)
8065 (* "4.6 Predefined Entities
". *)
8066 (*--------------------------------------------------------------------*)
8067 fun checkPreDefined dtd (a,q) =
8068 if !O_VALIDATE andalso !O_INTEROPERABILITY andalso
8069 !O_WARN_SHOULD_DECLARE andalso hasDtd dtd
8070 then case notRedefined dtd
8072 | ents => hookWarning(a,(getPos q,WARN_SHOULD_DECLARE ents))
8075 (*--------------------------------------------------------------------*)
8076 (* add a notation declaration to the DTD tables. *)
8078 (* though the rec. says nothing about repeated notation declarations, *)
8079 (* I assume that the intention is to treat them like entities, i.e. *)
8080 (* ignore repeated declarations with an optional warning. *)
8082 (* print a warning and ignore the declaration if the notation was *)
8083 (* declared previously. *)
8084 (*--------------------------------------------------------------------*)
8085 fun addNotation dtd (a,q) (idx,nt) =
8086 if hasNotation dtd idx
8087 then if !O_WARN_MULT_NOT_DECL
8088 then hookWarning(a,(getPos q,WARN_MULT_DECL
8089 (IT_NOTATION,Index2AttNot dtd idx)))
8091 else a before setNotation dtd (idx,nt)
8093 (*--------------------------------------------------------------------*)
8094 (* add an element declaration to the element table. Only the content *)
8095 (* part of the element info is updated. 3.2: *)
8097 (* Validity Constraint: Unique Element Type Declaration *)
8098 (* No element type may be declared more than once. *)
8100 (* print an error and ignore the declaration if the element was *)
8101 (* declared previously. *)
8102 (*--------------------------------------------------------------------*)
8103 fun addElement dtd (a,q) (idx,cont,ext) =
8104 let val {decl,atts,errAtts,...} = getElement dtd idx
8106 of NONE => a before setElement dtd (idx,{decl = SOME(cont,ext),
8109 | SOME _ => if !O_VALIDATE
8110 then hookError(a,(getPos q,ERR_REDEC_ELEM(Index2Element dtd idx)))
8114 (*--------------------------------------------------------------------*)
8115 (* at option, pretend an element is declared by adding a default *)
8116 (* declaration. Only the decl flag of the element info is updated. *)
8117 (*--------------------------------------------------------------------*)
8118 fun handleUndeclElement dtd idx =
8120 val {atts,errAtts,...} = getElement dtd idx
8121 val newInfo = {decl = SOME(CT_ANY,false),
8124 in newInfo before setElement dtd (idx,newInfo)
8127 (*--------------------------------------------------------------------*)
8128 (* check whether an element is declared and whether it already had an *)
8129 (* attribute list declaration. Cf. 3.3: *)
8131 (* At user option, an XML processor may issue a warning if *)
8132 (* attributes are declared for an element type not itself declared, *)
8133 (* but this is not an error. *)
8135 (* ... an XML processor may at user option issue a warning when *)
8136 (* more than one attribute-list declaration is provided for a given *)
8137 (* element type, ... *)
8139 (* print a warning if the element is not declared or already had an *)
8140 (* attribute list declaration. *)
8141 (*--------------------------------------------------------------------*)
8142 fun enterAttList dtd (a,q) idx =
8144 val {decl,atts,errAtts,...} = getElement dtd idx
8145 val a1 = if isSome decl orelse not (!O_WARN_ATT_NO_ELEM) then a
8146 else hookWarning(a,(getPos q,WARN_ATT_UNDEC_ELEM(Index2Element dtd idx)))
8149 of NONE => a1 before
8150 setElement dtd (idx,{decl=decl,atts=SOME(nil,false),errAtts=errAtts})
8151 | _ => if !O_INTEROPERABILITY andalso !O_WARN_MULT_ATT_DECL
8152 then hookWarning(a1,(getPos q,WARN_MULT_ATT_DECL(Index2Element dtd idx)))
8156 (*--------------------------------------------------------------------*)
8157 (* check whether attribute "xml
:space
" is declared correctly. 2.10: *)
8159 (* A special attribute named xml:space may be attached ... In valid *)
8160 (* documents, this attribute, like any other, must be declared if *)
8161 (* it is used. When declared, it must be given as an enumerated *)
8162 (* type whose only possible values are "default
" and "preserve
". *)
8163 (*--------------------------------------------------------------------*)
8164 fun checkAttDef (a,q) (aidx,attType,_,_) =
8165 if aidx<>xmlSpaceIdx orelse attType=xmlSpaceType then a
8166 else hookError(a,(getPos q,ERR_XML_SPACE))
8168 (*--------------------------------------------------------------------*)
8169 (* enter a definition of a single attribute to the element table. *)
8170 (* ignore the definition if the attribute is already defined for that *)
8171 (* element. Cf. 3.3: *)
8173 (* When more than one AttlistDecl is provided for a given element *)
8174 (* type, the contents of all those provided are merged. When more *)
8175 (* than one definition is provided for the same attribute of a *)
8176 (* given element type, the first declaration is binding and later *)
8177 (* declarations are ignored. For interoperability, an XML processor *)
8178 (* may at user option issue a warning when ... more than one *)
8179 (* attribute definition is provided for a given attribute, but this *)
8180 (* is not an error. *)
8182 (* If the attribute type is ID, check whether an element already has *)
8183 (* an attribute of that type. 3.3.1: *)
8185 (* Validity Constraint: One ID per Element Type *)
8186 (* No element type may have more than one ID attribute specified. *)
8187 (*--------------------------------------------------------------------*)
8188 (* print an error if the element already has an ID attribute. *)
8189 (* print a warning if the attr. is already defined for this element. *)
8190 (*--------------------------------------------------------------------*)
8191 (* return the new application data. *)
8192 (*--------------------------------------------------------------------*)
8193 fun addAttribute dtd (a,q) (eidx,attDef as (att,attType,attDefault,_)) =
8195 val a1 = checkAttDef (a,q) attDef
8197 fun doit nil = (false,[attDef],a)
8198 | doit (atts as (ad as (aidx,_,_,_))::rest) =
8200 then let val a1 = if !O_INTEROPERABILITY andalso !O_WARN_MULT_ATT_DEF
8201 then let val warn = WARN_MULT_ATT_DEF
8202 (Index2Element dtd eidx,Index2AttNot dtd att)
8203 in hookWarning(a,(getPos q,warn))
8208 else (if aidx<att then (false,attDef::atts,a)
8209 else let val (redefined,atts1,a1) = doit rest
8210 in (redefined,ad::atts1,a1)
8213 val {decl,atts,errAtts,...} = getElement dtd eidx
8214 val (defs,hadId) = getOpt(atts,(nil,false))
8215 val (redefined,defs1,a1) = doit defs
8216 val (newId,a1) = if isIdType attType
8217 then let val a1 = if hadId andalso (not redefined) andalso !O_VALIDATE
8218 then hookError(a,(getPos q,ERR_MULT_ID_ELEM
8219 (Index2Element dtd eidx)))
8224 val (_,defs1,a1) = doit defs
8225 val _ = setElement dtd (eidx,{decl = decl,
8226 atts = SOME(defs1,newId),
8231 (*--------------------------------------------------------------------*)
8232 (* check whether a name starts with (a case variant of) "xml
" and if *)
8233 (* yes, whether it is an allowed name from the spec. Cf. 3: *)
8235 (* This specification does not constrain ... names of the element *)
8236 (* types and attributes, except that names beginning with a match *)
8237 (* to (('X'|'x')('M'|'m')('L'|'l')) are reserved for standardization*)
8238 (* in this or future versions of this specification. *)
8240 (* and 2.10, 2.12: *)
8242 (* ... a special attribute named xml:space may be attached ... *)
8243 (* ... A special attribute named xml:lang may be inserted ... *)
8245 (* print an error if the name is reserved and not standardized. *)
8246 (*--------------------------------------------------------------------*)
8247 fun startsWithXml name =
8249 of c1::c2::c3::cs => (c1=0wx58 orelse c1=0wx78) andalso
8250 (c2=0wx4D orelse c2=0wx6D) andalso (c3=0wx4C orelse c3=0wx6C)
8252 fun checkAttName (a,q) name =
8253 if !O_CHECK_RESERVED andalso startsWithXml name then
8255 of [0wx78,0wx6d,0wx6c,0wx3a,0wx6c,0wx61,0wx6e,0wx67] (* ":lang
" *) => a
8256 | [0wx78,0wx6d,0wx6c,0wx3a,0wx73,0wx70,0wx61,0wx63,0wx65] (* ":space
" *) => a
8257 | _ => hookError(a,(getPos q,ERR_RESERVED(name,IT_ATT_NAME)))
8259 fun checkElemName (a,q) name =
8260 if !O_CHECK_RESERVED andalso startsWithXml name
8261 then hookError(a,(getPos q,ERR_RESERVED(name,IT_ELEM)))
8264 (*--------------------------------------------------------------------*)
8265 (* check for each element in the dtd, whether a name token occurs *)
8266 (* more than once in its enumerated attribute types. *)
8268 (* print a warning for each element where this is true. *)
8270 (* return nothing. *)
8271 (*--------------------------------------------------------------------*)
8272 fun checkMultEnum dtd (a,q) =
8273 if !O_INTEROPERABILITY andalso !O_WARN_MULT_ENUM then
8277 (*-----------------------------------------------------*)
8278 (* for each i, add i to yet if it not in that list. *)
8279 (* otherwise add it to dup. *)
8280 (*-----------------------------------------------------*)
8281 fun do_list yd nil = yd
8282 | do_list (yet,dup) (i::is) =
8283 let val yd' = case insertNewInt (i,yet)
8284 of NONE => (yet,insertInt (i,dup))
8285 | SOME new => (new,dup)
8288 (*-----------------------------------------------------*)
8289 (* For each enumerated attribute type call the appro- *)
8290 (* priate function. *)
8291 (*-----------------------------------------------------*)
8292 fun doit (yet,dup) nil = dup
8293 | doit (yet,dup) ((_,attType,_,_)::rest) =
8295 of AT_GROUP is => doit (do_list (yet,dup) is) rest
8296 | AT_NOTATION is => doit (do_list (yet,dup) is) rest
8297 | _ => doit (yet,dup) rest
8299 val defs = case #atts(getElement dtd idx)
8301 | SOME(defs,_) => defs
8302 val dup = doit (nil,nil) defs
8305 else hookWarning(a,(getPos q,WARN_ENUM_ATTS
8306 (Index2Element dtd idx,map (Index2AttNot dtd) dup)))
8308 (*-----------------------------------------------------------*)
8309 (* the highest used index is usedIndices-1. *)
8310 (*-----------------------------------------------------------*)
8311 val maxIdx = maxUsedElem dtd
8313 fun doit a i = if i>maxIdx then a else doit (doElem a i) (i+1)
8319 (*--------------------------------------------------------------------*)
8320 (* check for all id names refereneced by some IDREF attribute whether *)
8321 (* it was also declared by an ID attribute. *)
8323 (* print an error if a referenced ID name was not defined. *)
8325 (* return nothing. *)
8326 (*--------------------------------------------------------------------*)
8327 fun checkDefinedIds dtd (a,q) =
8330 val maxId = maxUsedId dtd
8332 fun doOne a i = let val (decl,refs) = getId dtd i
8333 in if decl orelse null refs then a
8334 else hookError(a,(hd refs,ERR_UNDECL_ID(Index2Id dtd i,tl refs)))
8336 fun doAll a i = if i>maxId then a else doAll (doOne a i) (i+1)
8342 (*--------------------------------------------------------------------*)
8343 (* check for all declared unparsed entities, whether their notations *)
8344 (* have been declared. *)
8346 (* print an error if a notation was not declared. *)
8348 (* return nothing. *)
8349 (*--------------------------------------------------------------------*)
8350 fun checkUnparsed dtd a =
8353 val maxGen = maxUsedGen dtd
8356 case getGenEnt dtd i
8357 of (GE_UNPARSED(_,nidx,pos),_) =>
8358 if hasNotation dtd nidx then a
8359 else hookError(a,(pos,ERR_UNDECLARED
8360 (IT_NOTATION,Index2AttNot dtd nidx,LOC_NONE)))
8362 fun doAll a i = if i>maxGen then a else doAll (doOne a i) (i+1)
8368 (* stop of ../../Parser/Dtd/dtdDeclare.sml *)
8369 (* start of ../../Parser/Dtd/dtdAttributes.sml *)
8370 (*--------------------------------------------------------------------------*)
8371 (* Structure: DtdAttributes *)
8373 (* Exceptions raised by functions in this structure: *)
8374 (* checkAttValue : AttValue InternalError *)
8375 (* checkDefinedIds : none *)
8376 (* genMissingAtts : none *)
8377 (* makeAttValue : AttValue InternalError *)
8378 (*--------------------------------------------------------------------------*)
8379 functor DtdAttributes (structure Dtd : Dtd
8380 structure Entities : Entities
8381 structure ParserOptions : ParserOptions) =
8383 structure DtdDeclare = DtdDeclare (structure Dtd = Dtd
8384 structure Entities = Entities
8385 structure ParserOptions = ParserOptions)
8387 UniChar UniClasses UtilList
8388 Base Dtd DtdDeclare Errors Entities HookData ParserOptions
8390 val THIS_MODULE = "DtdAttributes
"
8392 exception AttValue of AppData
8394 (*--------------------------------------------------------------------*)
8395 (* this is the list of language codes in ISO 639. *)
8396 (*--------------------------------------------------------------------*)
8399 ["AA
","AB
","AF
","AM
","AR
","AS
","AY
","AZ
",
8400 "BA
","BE
","BG
","BH
","BI
","BN
","BO
","BR
",
8401 "CA
","CO
","CS
","CY
",
8403 "EL
","EN
","EO
","ES
","ET
","EU
",
8404 "FA
","FI
","FJ
","FO
","FR
","FY
",
8405 "GA
","GD
","GL
","GN
","GU
",
8406 "HA
","HE
","HI
","HR
","HU
","HY
",
8407 "IA
","ID
","IE
","IK
","IN
","IS
","IT
","IU
","IW
",
8409 "KA
","KK
","KL
","KM
","KN
","KO
","KS
","KU
","KY
",
8410 "LA
","LN
","LO
","LT
","LV
",
8411 "MG
","MI
","MK
","ML
","MN
","MO
","MR
","MS
","MT
","MY
",
8412 "NA
","NE
","NL
","NO
",
8414 "PA
","PL
","PS
","PT
",
8416 "RM
","RN
","RO
","RU
","RW
",
8417 "SA
","SD
","SG
","SH
","SI
","SK
","SL
","SM
","SN
","SO
","SQ
","SR
","SS
","ST
","SU
","SV
","SW
",
8418 "TA
","TE
","TG
","TH
","TI
","TK
","TL
","TN
","TO
","TR
","TS
","TT
","TW
",
8419 "UG
","UK
","UR
","UZ
",
8426 (*--------------------------------------------------------------------*)
8427 (* a two-dimensional field [0..25][0..25] of booleans for ISO 639. *)
8428 (*--------------------------------------------------------------------*)
8431 val arr = Array.tabulate(26,fn _ => Array.array(26,false))
8433 (fn s => Array.update(Array.sub(arr,ord(String.sub(s,0))-65),
8434 ord(String.sub(s,1))-65,
8437 in Vector.tabulate(26,fn i => Array.extract (Array.sub(arr,i),0,NONE))
8440 (*--------------------------------------------------------------------*)
8441 (* for a letter, compute ord(toUpper c)-ord(#"A
"), for subscripting. *)
8442 (*--------------------------------------------------------------------*)
8443 val toUpperMask = Chars.notb(0wx20)
8444 fun cIndex c = Chars.toInt(Chars.andb(c,toUpperMask)-0wx41)
8446 (*--------------------------------------------------------------------*)
8447 (* are these two letters an ISO 639 code? *)
8448 (*--------------------------------------------------------------------*)
8449 fun isIso639 (c1,c2) =
8450 if !O_CHECK_ISO639 then
8451 Vector.sub(Vector.sub(iso639field,cIndex c1),cIndex c2)
8452 handle Subscript => false
8453 else isAsciiLetter c1 andalso isAsciiLetter c2
8455 (*--------------------------------------------------------------------*)
8456 (* does this match Subcode ('-' Subcode)* ? *)
8457 (* is this a sequence of ('-' Subcode) ? *)
8458 (* Iana codes and user codes also end on ([a-z] | [A-Z])+ *)
8459 (*--------------------------------------------------------------------*)
8460 fun isSubcode' nil = false
8461 | isSubcode' (c::cs) =
8462 let fun doit nil = true
8463 | doit (c::cs) = if c=0wx2D then isSubcode' cs
8464 else isAsciiLetter c andalso doit cs
8465 in isAsciiLetter c andalso doit cs
8467 fun isSubcode nil = true
8468 | isSubcode (c::cs) = c=0wx2D andalso isSubcode' cs
8469 val isIanaUser = isSubcode'
8471 (*--------------------------------------------------------------------*)
8472 (* Check whether a "xml
:lang
" attribute matches the LanguageID *)
8473 (* production. 2.12: *)
8475 (* [33] LanguageID ::= Langcode ('-' Subcode)* *)
8476 (* [34] Langcode ::= ISO639Code | IanaCode | UserCode *)
8477 (* [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z]) *)
8478 (* [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+ *)
8479 (* [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+ *)
8480 (* [38] Subcode ::= ([a-z] | [A-Z])+ *)
8482 (* print an error and raise AttValue if the "xml
:lang
" attribute does *)
8483 (* not have a valid value. *)
8484 (*--------------------------------------------------------------------*)
8485 fun checkAttSpec (a,q) (aidx,cs) =
8486 if !O_CHECK_LANGID andalso aidx=xmlLangIdx
8487 then let val valid = case cs
8488 of c::0wx2D::cs' => (c=0wx49 orelse
8491 c=0wx78) andalso isIanaUser cs'
8492 | c1::c2::cs' => isIso639 (c1,c2) andalso isSubcode cs'
8496 else raise AttValue(hookError(a,(getPos q,ERR_ATT_IS_NOT(cs,IT_LANG_ID))))
8500 (*--------------------------------------------------------------------*)
8501 (* Normalize an attribute value of type other than CDATA, and split *)
8502 (* it into tokens at space characters. Cf. 3.3.3: *)
8504 (* ... If the declared value is not CDATA, then the XML processor *)
8505 (* must further process the normalized attribute value by dis- *)
8506 (* carding any leading and trailing space (#x20) characters, and by *)
8507 (* replacing sequences of space (#x20) characters by a single space *)
8508 (* (#x20) character. *)
8510 (* replacement of references is already done when parsing the literal,*)
8511 (* thus we need only do whitespace normalization. we don't need to *)
8512 (* take care of the 3rd rule since replacement of sequences of #x20 *)
8513 (* and then splitting subsumes its effect. *)
8515 (* return the list of tokens as character lists and the normalized *)
8516 (* value as a char vector. *)
8517 (*--------------------------------------------------------------------*)
8518 fun splitAttValue av =
8520 fun doOne nil = (nil,nil,nil)
8521 | doOne (c::cs) = if c=0wx20 then let val (toks,ys) = doAll true cs
8524 else let val (tok,toks,ys) = doOne cs
8525 in ((c::tok),toks,c::ys)
8527 and doAll addS nil = (nil,nil)
8528 | doAll addS (c::cs) = if c=0wx20 then doAll addS cs
8529 else let val (tok,toks,ys) = doOne cs
8531 if addS then 0wx20::c::ys else c::ys)
8534 val (tokens,normed) = doAll false av
8535 in (Data2Vector normed,tokens)
8537 (*--------------------------------------------------------------------*)
8538 (* normalize an attribute value other than CDATA according to 3.3.3. *)
8540 (* return the normalized att value as a Vector. *)
8541 (*--------------------------------------------------------------------*)
8542 fun normAttValue av =
8543 let fun doOne nil = nil
8544 | doOne (c::cs) = if c=0wx20 then doAll true cs
8546 and doAll addS nil = nil
8547 | doAll addS (c::cs) = if c=0wx20 then doAll addS cs
8548 else let val ys = doOne cs
8549 in if addS then 0wx20::c::ys else c::ys
8551 val normed = doAll false av
8552 in Data2Vector normed
8555 (*--------------------------------------------------------------------*)
8556 (* Check whether a sequence of chars forms a name (token). *)
8557 (*--------------------------------------------------------------------*)
8558 fun isNmToken cs = List.all isName cs
8559 fun isaName nil = false
8560 | isaName (c::cs) = isNms c andalso List.all isName cs
8562 (*--------------------------------------------------------------------*)
8563 (* Check whether a list of tokens is a single what fulfilling isWhat. *)
8564 (* print an error and raise AttValue if it is not. *)
8565 (*--------------------------------------------------------------------*)
8566 fun checkOne (isWhat,what,detail) (a,q) toks =
8568 of nil => raise AttValue (hookError(a,(getPos q,ERR_EXACTLY_ONE detail)))
8569 | [one] => if isWhat one then one
8570 else raise AttValue(hookError(a,(getPos q,ERR_ATT_IS_NOT(one,what))))
8571 | more => raise AttValue(hookError(a,(getPos q,ERR_AT_MOST_ONE detail)))
8572 (*--------------------------------------------------------------------*)
8573 (* Check whether a list of tokens is non-empty and all elements ful- *)
8575 (* print an error and raise AttValue if not. *)
8576 (*--------------------------------------------------------------------*)
8577 fun checkList (isWhat,what,detail) (a,q) toks =
8579 of nil => raise AttValue (hookError(a,(getPos q,ERR_AT_LEAST_ONE detail)))
8580 | _ => app (fn one => if isWhat one then ()
8581 else let val err = ERR_ATT_IS_NOT(one,what)
8582 in raise AttValue(hookError(a,(getPos q,err)))
8584 (*--------------------------------------------------------------------*)
8585 (* Convert a list of tokens into an ID att value. 3.3.1: *)
8587 (* Validity Constraint: ID *)
8588 (* Values of type ID must match the Name production. *)
8590 (* Validity Constraint: ID *)
8591 (* ... A name must not appear more than once in an XML document as *)
8592 (* a value of this type; i.e., ID values must uniquely identify the *)
8593 (* elements which bear them. *)
8595 (* mark the value as used, print an error and raise AttValue if it *)
8596 (* was already used. *)
8597 (* print an error and raise AttValue if it is not a name. *)
8598 (*--------------------------------------------------------------------*)
8599 fun takeId (dtd,inDtd) (a,q) toks =
8600 let val one = checkOne (isaName,IT_NAME,IT_ID_NAME) (a,q) toks
8601 val idx = Id2Index dtd one
8602 val _ = if inDtd then ()
8603 else let val (decl,refs) = getId dtd idx
8604 in if decl then let val err = ERR_REPEATED_ID one
8605 in raise AttValue (hookError(a,(getPos q,err)))
8607 else setId dtd (idx,(true,refs))
8609 in (SOME(AV_ID idx),a)
8612 (*--------------------------------------------------------------------*)
8613 (* Convert a list of tokens into an IDREF/IDREFS att value. 3.3.1: *)
8615 (* Validity Constraint: IDREF *)
8616 (* Values of type IDREF must match the Name production. *)
8618 (* print an error an raise AttValue if it is not a (list of) name(s). *)
8619 (*--------------------------------------------------------------------*)
8620 fun setIdRef (dtd,q) idx =
8621 let val (decl,refs) = getId dtd idx
8622 in setId dtd (idx,(decl,getPos q::refs))
8624 fun takeIdref (dtd,_) (a,q) toks =
8625 let val one = checkOne (isaName,IT_NAME,IT_ID_NAME) (a,q) toks
8626 val idx=Id2Index dtd one
8627 val _ = setIdRef (dtd,q) idx
8628 in (SOME(AV_IDREF idx),a)
8630 fun takeIdrefs (dtd,_) (a,q) toks =
8631 let val _ = checkList (isaName,IT_NAME,IT_ID_NAME) (a,q) toks
8632 val idxs = map (Id2Index dtd) toks
8633 val _ = app (setIdRef (dtd,q)) idxs
8634 in (SOME(AV_IDREFS idxs),a)
8637 (*--------------------------------------------------------------------*)
8638 (* Convert a list of tokens into an ENTITY/IES att value. 3.3.1: *)
8640 (* Validity Constraint: Entity Name *)
8641 (* Values of type ENTITY must match the Name production... *)
8642 (* must match the name of an unparsed entity declared in the DTD. *)
8644 (* print an error and raise AttValue if a token is not a name. *)
8645 (* print an error and raise AttValue if an entity is undeclared or a *)
8646 (* parsed entity. *)
8647 (*--------------------------------------------------------------------*)
8648 fun checkEntity (dtd,inDtd) (a,q) name =
8649 let val idx = GenEnt2Index dtd name
8650 val (ent,_) = getGenEnt dtd idx
8651 val _ = if inDtd then ()
8653 of GE_UNPARSED _ => ()
8654 | GE_NULL => let val err = ERR_UNDECLARED(IT_GEN_ENT,name,LOC_NONE)
8655 in raise AttValue (hookError(a,(getPos q,err)))
8657 | _ => let val err = ERR_MUST_BE_UNPARSED(name,LOC_NONE)
8658 in raise AttValue (hookError(a,(getPos q,err)))
8662 fun takeEntity (dtd,inDtd) (aq as (a,_)) toks =
8663 let val one = checkOne (isaName,IT_NAME,IT_ENT_NAME) aq toks
8664 val idx = checkEntity (dtd,inDtd) aq one
8665 in (SOME(AV_ENTITY idx),a)
8667 fun takeEntities (dtd,inDtd) (aq as (a,_)) toks =
8668 let val _ = checkList (isaName,IT_NAME,IT_ENT_NAME) aq toks
8669 val idxs = map (checkEntity (dtd,inDtd) aq) toks
8670 in (SOME(AV_ENTITIES idxs),a)
8673 (*--------------------------------------------------------------------*)
8674 (* Convert a list of tokens into a NOTATION att value. 3.3.1: *)
8676 (* Validity Constraint: Notation Attributes *)
8677 (* Values of this type must match one of the notation names *)
8678 (* included in the declaration. *)
8680 (* print an error and raise AttValue if it is not a single name. *)
8681 (* print an error and raise AttValue if the notation's index is not *)
8682 (* in the list given as 1st arg. *)
8683 (*--------------------------------------------------------------------*)
8684 fun takeNotation is (dtd,inDtd) (aq as (a,q)) toks =
8685 let val one = checkOne (isaName,IT_NAME,IT_NOT_NAME) aq toks
8686 val idx = AttNot2Index dtd one
8687 val _ = if member idx is then ()
8688 else let val nots = map (Index2AttNot dtd) is
8689 val err = ERR_MUST_BE_AMONG(IT_NOT_NAME,one,nots)
8690 in raise AttValue (hookError(a,(getPos q,err)))
8692 in (SOME(AV_NOTATION(is,idx)),a)
8695 (*--------------------------------------------------------------------*)
8696 (* Convert a list of tokens into an enumerated att value. 3.3.1: *)
8698 (* Validity Constraint: Enumeration *)
8699 (* Values of this type must match one of the Nmtoken tokens in *)
8700 (* the declaration. *)
8702 (* print an error and raise AttValue if it is not a single name token.*)
8703 (* print an error and raise AttValue if the token's index is not *)
8704 (* in the list given as 1st arg. *)
8705 (*--------------------------------------------------------------------*)
8706 fun takeGroup is (dtd,_) (aq as (a,q)) toks =
8707 let val one = checkOne (isNmToken,IT_NMTOKEN,IT_NMTOKEN) aq toks
8708 val idx = AttNot2Index dtd one
8709 val _ = if member idx is then ()
8710 else let val toks = map (Index2AttNot dtd) is
8711 val err = ERR_MUST_BE_AMONG(IT_NMTOKEN,one,toks)
8712 in raise AttValue (hookError(a,(getPos q,err)))
8714 in (SOME(AV_GROUP(is,idx)),a)
8717 (*--------------------------------------------------------------------*)
8718 (* Given an attribute type and a list of characters, construct the *)
8719 (* corresponding AttValue. *)
8721 (* print an error (and possibly raise AttValue) if the attribute *)
8722 (* is ill-formed. *)
8723 (*--------------------------------------------------------------------*)
8724 fun makeAttValue dtd (a,q) (aidx,attType,ext,inDtd,cs) =
8726 then let val cv = Data2Vector cs
8727 in if !O_VALIDATE andalso hasDtd dtd
8728 then (cv,(SOME(AV_CDATA cv),checkAttSpec (a,q) (aidx,cs)))
8732 if !O_VALIDATE andalso hasDtd dtd then
8734 val a1 = checkAttSpec (a,q) (aidx,cs)
8735 val (cv,toks) = splitAttValue cs
8737 if ext andalso standsAlone dtd
8738 then let val cdata = Data2Vector cs
8739 in if cdata=cv then a1
8740 else let val err = ERR_STANDALONE_NORM(Index2AttNot dtd aidx)
8741 val _ = setStandAlone dtd (not (!O_ERROR_MINIMIZE))
8742 in hookError(a1,(getPos q,err))
8747 of AT_NMTOKEN => (cv,(SOME(AV_NMTOKEN(checkOne(isNmToken,IT_NMTOKEN,
8748 IT_NMTOKEN) (a2,q) toks)),a2))
8749 | AT_NMTOKENS => (cv,(SOME(AV_NMTOKENS toks),a2)) before
8750 checkList(isNmToken,IT_NMTOKEN,IT_NMTOKEN) (a2,q) toks
8751 | AT_ID => (cv,takeId (dtd,inDtd) (a2,q) toks)
8752 | AT_IDREF => (cv,takeIdref (dtd,inDtd) (a2,q) toks)
8753 | AT_IDREFS => (cv,takeIdrefs (dtd,inDtd) (a2,q) toks)
8754 | AT_ENTITY => (cv,takeEntity (dtd,inDtd) (a2,q) toks)
8755 | AT_ENTITIES => (cv,takeEntities (dtd,inDtd) (a2,q) toks)
8756 | AT_GROUP is => (cv,takeGroup is (dtd,inDtd) (a2,q) toks)
8757 | AT_NOTATION is => (cv,takeNotation is (dtd,inDtd) (a2,q) toks)
8758 | AT_CDATA => raise InternalError(THIS_MODULE,"makeAttValue
",
8759 "AT_CDATA
in the innermost
case")
8761 else (normAttValue cs,(NONE,a))
8763 (*--------------------------------------------------------------------*)
8764 (* given an attribute value literal and the attribute type, generate *)
8765 (* the AttValue, and check whether it complies with its default value.*)
8766 (* If yes, make an AttPresent value out of it. *)
8769 (* Validity Constraint: Fixed Attribute Default *)
8770 (* If an attribute has a default value declared with the #FIXED *)
8771 (* keyword, instances of that attribute must match the default *)
8774 (* print an error and raise AttValue if the attribute value doesn't *)
8777 (* return the value as a AttPresent value. *)
8778 (*--------------------------------------------------------------------*)
8779 fun checkAttValue dtd (a,q) ((aidx,attType,defVal,ext),literal,cs) =
8780 let val (cv,(av,a1)) = makeAttValue dtd (a,q) (aidx,attType,ext,false,cs)
8781 in if !O_VALIDATE andalso hasDtd dtd then
8783 of AD_FIXED((def,cv',_),_) =>
8784 if cv=cv' then (AP_PRESENT(literal,cv,av),a1)
8786 (hookError(a1,(getPos q,ERR_FIXED_VALUE(Index2AttNot dtd aidx,cv,cv'))))
8787 | _ => (AP_PRESENT(literal,cv,av),a1)
8788 else (AP_PRESENT(literal,cv,av),a1)
8791 (*--------------------------------------------------------------------*)
8792 (* check a defaulted attribute value for validity. *)
8794 (* since the lexical constraints are checked when the default is *)
8795 (* declared we only need to check whether notations are declared and *)
8796 (* entities are declared and unparsed. An ID attribute cannot be *)
8797 (* defaulted, so no need to check for duplicate ID attributes. *)
8798 (*--------------------------------------------------------------------*)
8799 fun checkDefaultValue dtd (a,q,pos) av =
8801 fun checkEntity (idx,a) =
8802 let val (ent,_) = getGenEnt dtd idx
8804 of GE_UNPARSED _ => a
8805 | GE_NULL => hookError(a,(getPos q,ERR_UNDECLARED
8806 (IT_GEN_ENT,Index2GenEnt dtd idx,
8807 LOC_ATT_DEFAULT pos)))
8808 | _ => hookError(a,(getPos q,ERR_MUST_BE_UNPARSED
8809 (Index2GenEnt dtd idx,LOC_ATT_DEFAULT pos)))
8812 fun checkNotation (idx,a) =
8813 if hasNotation dtd idx then a
8814 else hookError(a,(getPos q,ERR_UNDECLARED
8815 (IT_NOTATION,Index2AttNot dtd idx,LOC_ATT_DEFAULT pos)))
8818 of SOME(AV_ENTITY i) => checkEntity (i,a)
8819 | SOME(AV_ENTITIES is) => foldl checkEntity a is
8820 | SOME(AV_NOTATION(_,i)) => checkNotation(i,a)
8824 (*--------------------------------------------------------------------*)
8825 (* Generate the attributes not specified in a start-tag, the defs of *)
8826 (* these atts and the specified atts given as argument. 3.3.2: *)
8828 (* If the declaration is neither #REQUIRED nor #IMPLIED, then the *)
8829 (* AttValue value contains the declared default value; ... If a *)
8830 (* default value is declared, when an XML processor encounters an *)
8831 (* omitted attribute, it is to behave as though the attribute were *)
8832 (* present with the declared default value. *)
8834 (* Validity Constraint: Required Attribute *)
8835 (* If the default declaration is the keyword #REQUIRED, then the *)
8836 (* attribute must be specified for all elements of the type in the *)
8837 (* attribute-list declaration. *)
8839 (* print an error if a required attribute was omitted. *)
8841 (* return the AttSpecList of all attributes for this tag. *)
8842 (*--------------------------------------------------------------------*)
8843 fun genMissingAtts dtd (a,q) (defs,specd) =
8845 fun default a (idx,(v as (_,_,av),(pos,checked)),ext) =
8846 let val a1 = if ext andalso !O_VALIDATE andalso standsAlone dtd
8847 then let val err = ERR_STANDALONE_DEF(Index2AttNot dtd idx)
8848 val _ = setStandAlone dtd (not (!O_ERROR_MINIMIZE))
8849 in hookError(a,(getPos q,err))
8852 val a2 = if !O_VALIDATE andalso not (!checked andalso !O_ERROR_MINIMIZE)
8853 then checkDefaultValue dtd (a1,q,pos) av before checked := true
8855 in (AP_DEFAULT v,a1)
8857 fun doit a nil = (specd,a)
8858 | doit a ((idx,_,dv,ext)::rest) =
8859 let val (value,a1) =
8861 of AD_DEFAULT v => default a (idx,v,ext)
8862 | AD_FIXED v => default a (idx,v,ext)
8863 | AD_IMPLIED => (AP_IMPLIED,a)
8865 let val a1 = if not (!O_VALIDATE) then a
8866 else hookError(a,(getPos q,
8867 ERR_MISSING_ATT(Index2AttNot dtd idx)))
8870 val (other,a2) = doit a1 rest
8871 in ((idx,value,NONE)::other,a2)
8876 (*--------------------------------------------------------------------*)
8877 (* process an undeclared attribute in a start-tag. *)
8878 (* At option, an error message is generated only once for the same *)
8879 (* attribute and element. *)
8881 (* possibly print an error. *)
8883 (* return nothing. *)
8884 (*--------------------------------------------------------------------*)
8885 fun handleUndeclAtt dtd (a,q) (aidx,att,eidx,elem) =
8886 if !O_ERROR_MINIMIZE then
8887 let val {decl,atts,errAtts} = getElement dtd eidx
8888 in if member aidx errAtts then a
8889 else let val a1 = if !O_VALIDATE andalso hasDtd dtd
8890 then let val err = ERR_UNDECL_ATT(att,elem)
8891 in hookError(a,(getPos q,err))
8894 val a2 = checkAttName (a1,q) att
8895 val _ = setElement dtd (eidx,{decl = decl,
8897 errAtts = aidx::errAtts})
8901 else let val a1 = if !O_VALIDATE andalso hasDtd dtd
8902 then hookError(a,(getPos q,ERR_UNDECL_ATT(att,elem)))
8904 in checkAttName (a1,q) att
8908 (* stop of ../../Parser/Dtd/dtdAttributes.sml *)
8909 (* start of ../../Parser/Dtd/dtdManager.sml *)
8910 (*--------------------------------------------------------------------------*)
8911 (* Structure: Dtd *)
8921 (* Exceptions raised by functions in this structure: *)
8922 (* initDtdTables : none *)
8923 (* AttIdx2String : NoSuchSymbol *)
8924 (* ElemIdx2String : NoSuchIndex *)
8925 (* GenEntIdx2String : NoSuchIndex *)
8926 (* IdIdx2String : NoSuchIndex *)
8927 (* NotIdx2String : NoSuchIndex *)
8928 (* GenEntity2String : NoSuchIndex *)
8929 (* ElemInfo2String : NoSuchIndex NoSuchSymbol *)
8930 (* printGenEntTable : NoSuchIndex *)
8931 (* printElementTable : NoSuchIndex NoSuchSymbol *)
8932 (* printDtdTables : NoSuchIndex NoSuchSymbol *)
8933 (*--------------------------------------------------------------------------*)
8934 signature DtdManager =
8939 exception AttValue of AppData
8941 val makeAttValue : Dtd -> AppData * State
8942 -> int * Base.AttType * bool * bool * UniChar.Data
8943 -> UniChar.Vector * (Base.AttValue option * AppData)
8944 val checkAttValue : Dtd -> AppData * State
8945 -> Base.AttDef * UniChar.Vector * UniChar.Data
8946 -> HookData.AttPresent * AppData
8947 val genMissingAtts : Dtd -> AppData * State
8948 -> Base.AttDefList * HookData.AttSpecList -> HookData.AttSpecList * AppData
8949 val handleUndeclAtt : Dtd -> AppData * State
8950 -> int * UniChar.Data * int * UniChar.Data -> AppData
8951 val handleUndeclElement : Dtd -> int -> Base.ElemInfo
8953 val checkAttName : AppData * State -> UniChar.Data -> AppData
8954 val checkElemName : AppData * State -> UniChar.Data -> AppData
8955 val checkDefinedIds : Dtd -> AppData * State -> AppData
8956 val checkMultEnum : Dtd -> AppData * State -> AppData
8957 val checkPreDefined : Dtd -> AppData * State -> AppData
8958 val checkUnparsed : Dtd -> AppData -> AppData
8960 val enterAttList : Dtd -> AppData * State -> int -> AppData
8962 val addAttribute : Dtd -> AppData * State -> int * Base.AttDef -> AppData
8963 val addElement : Dtd -> AppData * State -> int * Base.ContentSpec * bool -> AppData
8964 val addGenEnt : Dtd -> AppData * State -> int * Base.GenEntity * bool -> AppData
8965 val addNotation : Dtd -> AppData * State -> int * Base.ExternalId -> AppData
8966 val addParEnt : Dtd -> AppData * State -> int * Base.ParEntity * bool -> AppData
8969 functor DtdManager (structure Dtd : Dtd
8970 structure Hooks : Hooks
8971 structure ParserOptions : ParserOptions) : DtdManager =
8973 structure Entities = Entities (structure Hooks = Hooks)
8974 structure DtdAttributes = DtdAttributes (structure Dtd = Dtd
8975 structure Entities = Entities
8976 structure ParserOptions = ParserOptions)
8981 (* stop of ../../Parser/Dtd/dtdManager.sml *)
8982 (* start of ../../Parser/Parse/parseBase.sml *)
8983 signature ParseBase =
8985 include Dfa DtdManager Resolve DfaOptions ParserOptions
8987 exception NoSuchChar of AppData * State
8988 exception NoSuchEntity of AppData * State
8989 exception NotFound of UniChar.Char * AppData * State
8990 exception SyntaxError of UniChar.Char * AppData * State
8992 val expectedOrEnded : Errors.Expected * Errors.Location -> UniChar.Char -> Errors.Error
8994 val recoverXml : UniChar.Char * AppData * State -> UniChar.Char * AppData * State
8995 val recoverETag : UniChar.Char * AppData * State -> UniChar.Char * AppData * State
8996 val recoverSTag : UniChar.Char * AppData * State -> bool * (UniChar.Char * AppData * State)
8997 val recoverDecl : bool -> UniChar.Char * AppData * State -> (UniChar.Char * AppData * State)
8999 val useParamEnts : unit -> bool
9002 (*--------------------------------------------------------------------------*)
9003 (* Structure: ParseBase *)
9004 (*--------------------------------------------------------------------------*)
9005 (* This structure provides exceptions for the Parse functions, and strings *)
9006 (* for error generation (these strings don't really need to reside in their *)
9007 (* own structure, but like this the code is more easier to read). *)
9008 (*--------------------------------------------------------------------------*)
9009 functor ParseBase (structure Dtd : Dtd
9010 structure Hooks : Hooks
9011 structure Resolve : Resolve
9012 structure ParserOptions : ParserOptions) : ParseBase =
9014 structure DfaOptions = ParserOptions.DfaOptions
9015 structure Dfa = Dfa (structure DfaOptions = DfaOptions)
9016 structure DtdManager = DtdManager (structure Dtd = Dtd
9017 structure Hooks = Hooks
9018 structure ParserOptions = ParserOptions)
9020 Base DtdManager DfaOptions Dfa Errors ParserOptions Resolve UniChar
9022 exception NoSuchChar of AppData * State
9023 exception NoSuchEntity of AppData * State
9024 exception NotFound of UniChar.Char * AppData * State
9025 exception SyntaxError of UniChar.Char * AppData * State
9027 fun expectedOrEnded (exp,ended) c =
9028 if c=0wx00 then ERR_ENDED_BY_EE ended
9029 else ERR_EXPECTED(exp,[c])
9031 (*--------------------------------------------------------------------*)
9032 (* Besides "?
>" also recognize ">" as end delimiter, because the typo *)
9033 (* might be an omitted "?
". Also stop on "<"; then the entire "?
>" *)
9034 (* was omitted; the "<" may not be consumed then. *)
9035 (* Within literals dont recognize ">" and "<", but only "?
>"; then *)
9036 (* the typo is an omitted quote character. *)
9037 (*--------------------------------------------------------------------*)
9038 fun recoverXml caq =
9040 fun do_lit ch (c,a,q) =
9043 | 0wx3F (* #"?
" *) =>
9044 let val (c1,a1,q1) = getChar (a,q)
9045 in if c1=0wx3E (* #">" *) then (c1,a1,q1)
9046 else do_lit ch (c1,a1,q1)
9048 | _ => if c=ch then (getChar (a,q))
9049 else do_lit ch (getChar (a,q))
9053 | 0wx22 (* #""""*) => doit (do_lit c (getChar (a,q)))
9054 | 0wx25 (* #"%" *) => (c,a,q)
9055 | 0wx26 (* #"&" *) => (c,a,q)
9056 | 0wx27 (* #"'" *) => doit (do_lit c (getChar (a,q)))
9057 | 0wx3C (* #"<" *) => (c,a,q)
9058 | 0wx3E (* #">" *) => (getChar (a,q))
9059 | _ => doit (getChar (a,q))
9064 fun recoverETag caq =
9066 fun do_lit ch (c,a,q) =
9069 | _ => if c=ch then (getChar (a,q))
9070 else do_lit ch (getChar (a,q))
9074 | 0wx22 (* #""""*) => doit (do_lit c (getChar (a,q)))
9075 | 0wx26 (* #"&" *) => (c,a,q)
9076 | 0wx27 (* #"'" *) => doit (do_lit c (getChar (a,q)))
9077 | 0wx3E (* #">" *) => (getChar (a,q))
9078 | 0wx3C (* #"<" *) => (c,a,q)
9079 | _ => doit (getChar (a,q))
9084 fun recoverSTag caq =
9086 fun do_lit ch (c,a,q) =
9089 | _ => if c=ch then (getChar (a,q))
9090 else do_lit ch (getChar (a,q))
9093 of 0wx00 => (false,(c,a,q))
9094 | 0wx22 (* #""""*) => doit (do_lit c (getChar (a,q)))
9095 | 0wx26 (* #"&" *) => (false,(c,a,q))
9096 | 0wx27 (* #"'" *) => doit (do_lit c (getChar (a,q)))
9097 | 0wx2F (* #"/" *) => let val (c1,a1,q1) = getChar (a,q)
9098 in if c1=0wx3E (* #">" *) then (true,(c1,a1,q1))
9099 else doit (c1,a1,q1)
9101 | 0wx3E (* #">" *) => (false,getChar (a,q))
9102 | 0wx3C (* #"<" *) => (false,(c,a,q))
9103 | _ => doit (getChar (a,q))
9108 fun recoverDecl hasSubset caq =
9110 fun do_lit ch (c,a,q) =
9111 if c=0wx00 then (c,a,q)
9112 else if c=ch then getChar (a,q)
9113 else do_lit ch (getChar(a,q))
9114 fun do_decl (c,a,q) =
9117 | 0wx22 (* #"\""*) => do_decl (do_lit c (getChar (a,q)))
9118 | 0wx27 (* #"'" *) => do_decl (do_lit c (getChar (a,q)))
9119 | 0wx3E (* #">" *) => getChar (a,q)
9120 | _ => do_decl (getChar (a,q))
9121 fun do_subset (c,a,q) =
9124 | 0wx3C (* #"<" *) => do_subset (do_decl (getChar (a,q)))
9125 | 0wx5D (* #"]" *) => getChar (a,q)
9126 | _ => do_subset (getChar (a,q))
9129 of 0wx00 => if isSpecial q then (c,a,q) else doit (getChar (a,q))
9130 | 0wx22 (* #"\""*) => doit (do_lit c (getChar (a,q)))
9131 | 0wx25 (* #"%" *) => if hasSubset then (c,a,q) else doit (getChar (a,q))
9132 | 0wx27 (* #"'" *) => doit (do_lit c (getChar (a,q)))
9133 | 0wx3C (* #"<" *) => (c,a,q)
9134 | 0wx3E (* #">" *) => getChar (a,q)
9135 | 0wx5B (* #"[" *) => if hasSubset then doit (do_subset (getChar (a,q)))
9136 else doit (getChar (a,q))
9137 | _ => doit (getChar (a,q))
9141 fun useParamEnts() = !O_VALIDATE orelse !O_INCLUDE_PARAM_ENTS
9143 (* stop of ../../Parser/Parse/parseBase.sml *)
9144 (* start of ../../Parser/Parse/parseNames.sml *)
9152 signature ParseNames =
9156 val parseName : UniChar.Char * AppData * State
9157 -> UniChar.Data * (UniChar.Char * AppData * State)
9158 val parseNmtoken : UniChar.Char * AppData * State
9159 -> UniChar.Data * (UniChar.Char * AppData * State)
9161 val parseNameLit : UniChar.Data -> UniChar.Char * AppData * State
9162 -> UniChar.Data * UniChar.Data * (UniChar.Char * AppData * State)
9163 val parseEntName : UniChar.Data * UniChar.Data -> UniChar.Char * AppData * State
9164 -> bool * UniChar.Data * UniChar.Data * (UniChar.Char * AppData * State)
9167 (*--------------------------------------------------------------------------*)
9168 (* Structure: ParseNames *)
9170 (* Exceptions raised by functions in this structure: *)
9171 (* parseEntName : none *)
9172 (* parseName : NotFound *)
9173 (* parseNmtoken : NotFound *)
9174 (*--------------------------------------------------------------------------*)
9175 functor ParseNames (structure ParseBase : ParseBase)
9179 Errors ParseBase UniClasses
9181 (*--------------------------------------------------------------------*)
9182 (* parse (the remainder of) a name or nmtoken. *)
9184 (* [5] Name ::= (Letter | '_' | ':') (NameChar)* *)
9186 (* raise NotFound if no name/name start character comes first. *)
9188 (* return the name as a list of characters, together with the next *)
9189 (* character and the remaining state. *)
9190 (*--------------------------------------------------------------------*)
9191 fun parseName' (c,a,q) =
9193 then let val (cs,caq1) = parseName'(getChar(a,q))
9197 fun parseName (c,a,q) =
9199 then let val (cs,caq1) = parseName'(getChar(a,q))
9202 else raise NotFound(c,a,q)
9203 fun parseNmtoken (c,a,q) =
9205 then let val (cs,caq1) = parseName'(getChar(a,q))
9208 else raise NotFound(c,a,q)
9210 (*--------------------------------------------------------------------*)
9211 (* parse a name, additionally accumulating its characters in reverse *)
9212 (* order to the first argument. *)
9214 (* raise NotFound if no name/name start character comes first. *)
9215 (*--------------------------------------------------------------------*)
9216 fun parseNameLit cs (c,a,q) =
9217 let fun doit (cs,ns) (c,a,q) =
9218 if isName c then doit (c::cs,c::ns) (getChar(a,q))
9219 else (cs,rev ns,(c,a,q))
9221 if isNms c then doit (c::cs,[c]) (getChar(a,q))
9222 else raise NotFound(c,a,q)
9224 (*--------------------------------------------------------------------*)
9225 (* parse a name, accumulating its reverse in the first arg text. This *)
9226 (* is useful for parsing of entity values, where entity references *)
9227 (* are parsed but bypassed, and must thus be accumulated together *)
9228 (* the other literal text. *)
9230 (* print an error if no name/name start character comes first. *)
9232 (* return a boolean indicating whether a name was found, the reverse *)
9233 (* name as a list of characters, concatenated with the text in the *)
9234 (* first arg, together with the next character and remaining state. *)
9235 (*--------------------------------------------------------------------*)
9236 fun parseEntName (lit,text) (c,a,q) =
9238 fun doit (lit,text) (c,a,q) =
9239 if isName c then doit (c::lit,c::text) (getChar (a,q))
9240 else (true,lit,text,(c,a,q))
9242 if isNms c then doit (c::lit,c::text) (getChar (a,q))
9243 else let val a1 = hookError(a,(getPos q,ERR_EXPECTED(expAnEntName,[c])))
9244 in (false,lit,text,(c,a1,q))
9250 (* stop of ../../Parser/Parse/parseNames.sml *)
9251 (* start of ../../Parser/Parse/parseMisc.sml *)
9262 signature ParseMisc =
9264 (*----------------------------------------------------------------------
9267 val parseName : UniChar.Char * AppData * State
9268 -> UniChar.Data * (UniChar.Char * AppData * State)
9269 val parseNmtoken : UniChar.Char * AppData * State
9270 -> UniChar.Data * (UniChar.Char * AppData * State)
9271 val parseNameLit : UniChar.Data -> UniChar.Char * AppData * State
9272 -> UniChar.Data * UniChar.Data * (UniChar.Char * AppData * State)
9273 val parseEntName : UniChar.Data * UniChar.Data -> UniChar.Char * AppData * State
9274 -> bool * UniChar.Data * UniChar.Data * (UniChar.Char * AppData * State)
9275 ----------------------------------------------------------------------*)
9278 val skipS : UniChar.Char * AppData * State -> UniChar.Char * AppData * State
9279 val skipSopt : UniChar.Char * AppData * State -> UniChar.Char * AppData * State
9280 val skipSmay : UniChar.Char * AppData * State -> bool * (UniChar.Char * AppData * State)
9282 val parseSopt : UniChar.Data -> UniChar.Char * AppData * State
9283 -> UniChar.Data * (UniChar.Char * AppData * State)
9284 val parseSmay : UniChar.Data -> UniChar.Char * AppData * State
9285 -> bool * (UniChar.Data * (UniChar.Char * AppData * State))
9287 val skipEq : UniChar.Char * AppData * State
9288 -> UniChar.Char * AppData * State
9289 val parseEq : UniChar.Char * AppData * State
9290 -> UniChar.Data * (UniChar.Char * AppData * State)
9292 val parseComment : Errors.Position -> AppData * State -> (UniChar.Char * AppData * State)
9293 val parseProcInstr : Errors.Position -> AppData * State -> (UniChar.Char * AppData * State)
9296 (*--------------------------------------------------------------------------*)
9297 (* Structure: ParseMisc *)
9299 (* Exceptions raised by functions in this structure: *)
9301 (* skipSopt : none *)
9302 (* skipSmay : none *)
9303 (* skipEq : SyntaxError *)
9304 (* skipComment : none *)
9305 (* parseComment : none *)
9306 (* parseProcInstr : none *)
9307 (*--------------------------------------------------------------------------*)
9308 functor ParseMisc (structure ParseBase : ParseBase)
9311 structure ParseNames = ParseNames (structure ParseBase = ParseBase)
9314 UniChar Errors ParseNames
9316 (*--------------------------------------------------------------------*)
9317 (* parse a sequence of white space. 2.3: *)
9319 (* [3] S ::= (#x20 | #x9 | #xD | #xA)+ *)
9320 (*--------------------------------------------------------------------*)
9321 (* parse optional white space. *)
9322 (*--------------------------------------------------------------------*)
9323 (* Return type: Char * AppData * State *)
9324 (*--------------------------------------------------------------------*)
9325 fun skipSopt (c,a,q) =
9327 of 0wx09 => skipSopt (getChar (a,q))
9328 | 0wx0A => skipSopt (getChar (a,q))
9329 | 0wx20 => skipSopt (getChar (a,q))
9331 fun parseSopt cs (c,a,q) =
9333 of 0wx09 => parseSopt (c::cs) (getChar (a,q))
9334 | 0wx0A => parseSopt (c::cs) (getChar (a,q))
9335 | 0wx20 => parseSopt (c::cs) (getChar (a,q))
9337 (*--------------------------------------------------------------------*)
9338 (* parse optional white space. *)
9339 (*--------------------------------------------------------------------*)
9340 (* Return type: bool * (Char * AppData * State) *)
9341 (* the bool indicates whether white space was found or not. *)
9342 (*--------------------------------------------------------------------*)
9343 fun skipSmay (c,a,q) =
9345 of 0wx09 => (true,skipSopt (getChar (a,q)))
9346 | 0wx0A => (true,skipSopt (getChar (a,q)))
9347 | 0wx20 => (true,skipSopt (getChar (a,q)))
9348 | _ => (false,(c,a,q))
9349 fun parseSmay cs (c,a,q) =
9351 of 0wx09 => (true,parseSopt (c::cs) (getChar (a,q)))
9352 | 0wx0A => (true,parseSopt (c::cs) (getChar (a,q)))
9353 | 0wx20 => (true,parseSopt (c::cs) (getChar (a,q)))
9354 | _ => (false,(cs,(c,a,q)))
9355 (*--------------------------------------------------------------------*)
9356 (* parse required white space. *)
9357 (*--------------------------------------------------------------------*)
9358 (* print an error if no white space character is found. *)
9359 (*--------------------------------------------------------------------*)
9360 (* Return type: Char * AppData * State *)
9361 (*--------------------------------------------------------------------*)
9364 of 0wx09 => skipSopt (getChar (a,q))
9365 | 0wx0A => skipSopt (getChar (a,q))
9366 | 0wx20 => skipSopt (getChar (a,q))
9367 | _ => (c,hookError(a,(getPos q,ERR_MISSING_WHITE)),q)
9369 (*--------------------------------------------------------------------*)
9370 (* parse a "=" together with surrounding white space. Cf. 28: *)
9372 (* [25] Eq ::= S? '=' S? *)
9373 (*--------------------------------------------------------------------*)
9375 (* SyntaxError if no "=" is found. *)
9376 (*--------------------------------------------------------------------*)
9377 (* Return type: Char * AppData * State *)
9378 (*--------------------------------------------------------------------*)
9380 let val (c1,a1,q1) = skipSopt caq
9381 in if c1=0wx3D then skipSopt (getChar (a1,q1))
9382 else let val a2 = hookError(a1,(getPos q1,ERR_EXPECTED(expEq,[c1])))
9383 in raise SyntaxError(c1,a2,q1)
9387 let val (cs1,(c1,a1,q1)) = parseSopt nil caq
9389 then let val (cs2,caq2)= parseSopt (c1::cs1) (getChar (a1,q1))
9392 else let val a2 = hookError(a1,(getPos q1,ERR_EXPECTED(expEq,[c1])))
9393 in raise SyntaxError(c1,a2,q1)
9397 (*--------------------------------------------------------------------*)
9398 (* parse a comment, the initial "<--" already consumed. cf. 2.5: *)
9400 (* They are not part of the document's character data; an XML *)
9401 (* processor may, but need not, make it possible for an application *)
9402 (* to retrieve the text of comments. For compatibility, the string *)
9403 (* "--" (double-hyphen) must not occur within comments. *)
9405 (* [15] Comment ::= '<!--' ( (Char - '-') *)
9406 (* | ('-' (Char - '-')))* '-->' *)
9407 (*--------------------------------------------------------------------*)
9408 (* print an error and end the comment if an entity end is found. *)
9409 (* print an error if the comment contains "--". *)
9410 (*--------------------------------------------------------------------*)
9411 (* add the comment to the user data. *)
9412 (*--------------------------------------------------------------------*)
9413 (* Return type: Char * AppData * State *)
9414 (*--------------------------------------------------------------------*)
9415 fun parseComment startPos aq =
9417 fun check_end yet (a0,q0) =
9418 let val (c,a,q) = getChar (a0,q0)
9419 in if c=0wx2D (* #"-" *)
9420 then let val (c1,a1,q1) = getChar (a,q)
9421 in if c1=0wx3E (* #">" *)
9422 then let val cs = Data2Vector(rev yet)
9423 val a2 = hookComment(a1,((startPos,getPos q1),cs))
9426 else let val a2 = if not (!O_COMPATIBILITY) then a1
9427 else hookError(a1,(getPos q0,ERR_FORBIDDEN_HERE
9428 (IT_DATA [c,c],LOC_COMMENT)))
9429 in doit (c::c::yet) (c1,a2,q1)
9432 else doit (0wx2D::yet) (c,a,q)
9434 and doit yet (c,a,q) =
9435 if c=0wx2D (* #"-" *) then check_end yet (a,q)
9436 else if c<>0wx00 then doit (c::yet) (getChar (a,q))
9437 else let val err = ERR_ENDED_BY_EE LOC_COMMENT
9438 val a1 = hookError(a,(getPos q,err))
9439 val cs = Data2Vector(rev yet)
9440 val a2 = hookComment(a1,((startPos,getPos q),cs))
9443 in doit nil (getChar aq)
9446 (*--------------------------------------------------------------------*)
9447 (* check whether a name matches "xml
", disregarding case, cf. 2.6: *)
9449 (* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) *)
9451 (* The target names "XML
", "xml
", and so on are reserved for *)
9452 (* standardization in this or future versions of this specification.*)
9453 (*--------------------------------------------------------------------*)
9454 (* print an error if it does match. *)
9455 (*--------------------------------------------------------------------*)
9456 (* Return type: AppData *)
9457 (*--------------------------------------------------------------------*)
9458 fun checkPiTarget (a,q) name =
9460 of [c1,c2,c3] => if ((c1=0wx58 orelse c1=0wx78) andalso
9461 (c2=0wx4D orelse c2=0wx6D) andalso
9462 (c3=0wx4C orelse c3=0wx6C))
9463 then hookError(a,(getPos q,ERR_RESERVED(name,IT_TARGET)))
9466 (*--------------------------------------------------------------------*)
9467 (* parse a processing instruction, the initial "<?
" and target *)
9468 (* already consumed. cf. 2.5: *)
9470 (* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char* )))? '?>'*)
9472 (* The first arg consists of the target and the (reversed) list of *)
9473 (* leading characters of the text that have been looked ahead. *)
9474 (*--------------------------------------------------------------------*)
9475 (* print an error and end the proc. instr. if an entity end is found. *)
9476 (*--------------------------------------------------------------------*)
9477 (* add the processing instruction to the user data. *)
9478 (*--------------------------------------------------------------------*)
9479 (* Return type: Char * AppData * State *)
9480 (*--------------------------------------------------------------------*)
9481 fun parseProcInstr' (startPos,target,txtPos,yetText) caq =
9483 fun doit text (c1,a1,q1) =
9485 of 0wx00 => let val a2 = hookError(a1,(getPos q1,ERR_ENDED_BY_EE LOC_PROC))
9486 in (text,getPos q1,(c1,a2,q1))
9488 | 0wx3F => (* #"?
" *)
9489 let val (c2,a2,q2) = getChar (a1,q1)
9491 of 0wx3E => (* #">" *) (text,getPos q2,getChar(a2,q2))
9492 | _ => doit (c1::text) (c2,a2,q2)
9494 | _ => doit (c1::text) (getChar (a1,q1))
9496 val (cs,endPos,(c2,a2,q2)) = doit yetText caq
9497 val text = Data2Vector(rev cs)
9498 val a3 = hookProcInst(a2,((startPos,endPos),target,txtPos,text))
9502 (*--------------------------------------------------------------------*)
9503 (* parse a processing instruction, the initial "<?
" already read. *)
9505 (* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char* )))? '?>'*)
9506 (*--------------------------------------------------------------------*)
9507 (* print an error and end the proc. instr. if an entity end is found. *)
9508 (* print an error if no target name is found. *)
9509 (* print an error if no whitespace follows the target. *)
9510 (*--------------------------------------------------------------------*)
9511 (* add the processing instruction to the user data. *)
9512 (*--------------------------------------------------------------------*)
9513 (* Return type: Char * AppData * State *)
9514 (*--------------------------------------------------------------------*)
9515 fun parseProcInstr startPos (a,q) =
9517 (* NotFound is handled after the 'in .. end' *)
9518 val (target,(c1,a1,q1)) = parseName (getChar(a,q))
9519 val a1 = checkPiTarget (a1,q) target
9524 val a2 = hookError(a1,(getPos q1,ERR_ENDED_BY_EE LOC_PROC))
9525 val a3 = hookProcInst(a2,((startPos,getPos q1),target,getPos q1,nullVector))
9528 | 0wx3F => (* #"?
" *)
9529 let val (c2,a2,q2) = getChar (a1,q1)
9531 of 0wx3E => (* #">" *)
9532 let val a3 = hookProcInst(a2,((startPos,getPos q2),target,
9533 getPos q1,nullVector))
9536 | _ => let val a3 = hookError(a2,(getPos q1,ERR_MISSING_WHITE))
9537 in parseProcInstr' (startPos,target,getPos q1,[c1]) (c2,a3,q2)
9540 | _ => let val (hadS,(c2,a2,q2)) = skipSmay (c1,a1,q1)
9541 val a3 = if hadS then a2
9542 else hookError(a2,(getPos q2,ERR_MISSING_WHITE))
9543 in parseProcInstr' (startPos,target,getPos q2,nil) (c2,a3,q2)
9546 handle NotFound(c,a,q) =>
9547 let val a1 = hookError(a,(getPos q,ERR_EXPECTED(expATarget,[c])))
9548 in parseProcInstr' (startPos,nullData,getPos q,nil) (c,a1,q)
9551 (* stop of ../../Parser/Parse/parseMisc.sml *)
9552 (* start of ../../Parser/Parse/parseXml.sml *)
9553 signature ParseXml =
9555 (*----------------------------------------------------------------------
9558 val parseName : UniChar.Char * AppData * State
9559 -> UniChar.Data * (UniChar.Char * AppData * State)
9560 val parseNmtoken : UniChar.Char * AppData * State
9561 -> UniChar.Data * (UniChar.Char * AppData * State)
9562 val parseNameLit : UniChar.Data -> UniChar.Char * AppData * State
9563 -> UniChar.Data * UniChar.Data * (UniChar.Char * AppData * State)
9564 val parseEntName : UniChar.Data * UniChar.Data -> UniChar.Char * AppData * State
9565 -> bool * UniChar.Data * UniChar.Data * (UniChar.Char * AppData * State)
9567 val parseComment : Errors.Position -> AppData * State -> (UniChar.Char * AppData * State)
9568 val parseProcInstr : Errors.Position -> AppData * State -> (UniChar.Char * AppData * State)
9569 val skipS : UniChar.Char * AppData * State -> UniChar.Char * AppData * State
9570 val skipSopt : UniChar.Char * AppData * State -> UniChar.Char * AppData * State
9571 val skipSmay : UniChar.Char * AppData * State -> bool * (UniChar.Char * AppData * State)
9572 val parseSopt : UniChar.Data -> UniChar.Char * AppData * State
9573 -> UniChar.Data * (UniChar.Char * AppData * State)
9574 val parseSmay : UniChar.Data -> UniChar.Char * AppData * State
9575 -> bool * (UniChar.Data * (UniChar.Char * AppData * State))
9576 val parseEq : UniChar.Char * AppData * State
9577 -> UniChar.Data * (UniChar.Char * AppData * State)
9578 ----------------------------------------------------------------------*)
9581 val openDocument : Uri.Uri option -> AppData
9582 -> Encoding.Encoding * HookData.XmlDecl option * (UniChar.Char * AppData * State)
9583 val openSubset : Uri.Uri -> AppData
9584 -> Encoding.Encoding * HookData.TextDecl option * (UniChar.Char * AppData * State)
9585 val openExtern : int * bool * Uri.Uri -> AppData * State
9586 -> Encoding.Encoding * HookData.TextDecl option * (UniChar.Char * AppData * State)
9589 (*--------------------------------------------------------------------------*)
9590 (* Structure: ParseXml *)
9592 (* Exceptions raised by functions in this structure: *)
9593 (* openDocument : NoSuchFile *)
9594 (* openExtern : none *)
9595 (* openSubset : NoSuchFile *)
9596 (*--------------------------------------------------------------------------*)
9597 functor ParseXml (structure ParseBase : ParseBase)
9600 structure ParseMisc = ParseMisc (structure ParseBase = ParseBase)
9603 Errors UniChar UniClasses UtilString
9606 fun checkVersionNum (a,q) version =
9607 if not (!O_CHECK_VERSION) orelse version="1.0" then a
9608 else hookError(a,(getPos q,ERR_VERSION version))
9610 (*--------------------------------------------------------------------*)
9611 (* parse a version number, the quote character ("'" or '"') passed
as *)
9612 (* first argument
. cf
. 2.8: *)
9614 (* [24] VersionInfo
::= S
'version
' Eq (' VersionNum
' *)
9615 (* |
" VersionNum ") *)
9616 (* [26] VersionNum
::= ([a
-zA
-Z0
-9_
.:] |
'-')+ *)
9618 (* print an error
and end the literal
if an entity
end is found
. *)
9619 (* print an error
if a disallowed character is found
. *)
9621 (* return the version number
as a
string option
, together
with the
*)
9622 (* next character
and state
. *)
9623 (*--------------------------------------------------------------------*)
9624 (* might
raise: none
*)
9625 (*--------------------------------------------------------------------*)
9626 fun parseVersionNum quote aq
=
9628 fun doit
text (c
,a
,q
) =
9629 if c
=quote
then (text
,getChar (a
,q
))
9630 else if isVers c
then doit (c
::text
) (getChar (a
,q
))
9632 then let val a1
= hookError(a
,(getPos q
,ERR_ENDED_BY_EE LOC_VERSION
))
9635 else let val err
= ERR_FORBIDDEN_HERE(IT_CHAR c
,LOC_VERSION
)
9636 val a1
= hookError(a
,(getPos q
,err
))
9637 in doit
text (getChar (a1
,q
))
9640 val (c1
,a1
,q1
) = getChar aq
9642 val (text
,(c2
,a2
,q2
)) =
9643 if isVers c1
then doit
[c1
] (getChar (a1
,q1
))
9645 then let val a2
= hookError(a1
,(getPos q1
,ERR_EMPTY LOC_VERSION
))
9646 in (nil
,getChar (a2
,q1
))
9649 then let val a2
= hookError(a1
,(getPos q1
,ERR_ENDED_BY_EE LOC_VERSION
))
9650 val a3
= hookError(a2
,(getPos q1
,ERR_EMPTY LOC_VERSION
))
9653 else let val err
= ERR_FORBIDDEN_HERE(IT_CHAR c1
,LOC_VERSION
)
9654 val a2
= hookError(a1
,(getPos q1
,err
))
9655 in doit
nil (getChar (a2
,q1
))
9657 val version
= Latin2String (rev text
)
9658 val a3
= checkVersionNum (a2
,q1
) version
9660 (SOME version
,(c2
,a3
,q2
))
9662 (*--------------------------------------------------------------------*)
9663 (* parse a version info starting after
'version
'. Cf
. 2.8: *)
9665 (* [24] VersionInfo
::= S
'version
' Eq (' VersionNum
' *)
9666 (* |
" VersionNum ") *)
9668 (* print an error
and raise SyntaxState
if no
'=' is found
. *)
9669 (* print an error
and raise SyntaxState
if no quote sign is found
. *)
9671 (* return the version number
as a
string option
, together
with the
*)
9672 (* next char
and the remaining state
. *)
9673 (*--------------------------------------------------------------------*)
9674 (* might
raise: SyntaxState
*)
9675 (*--------------------------------------------------------------------*)
9676 fun parseVersionInfo caq
=
9677 let val (c1
,a1
,q1
) = skipEq caq
9679 of 0wx22 (* '""' *) => parseVersionNum
c1 (a1
,q1
)
9680 |
0wx27 (* "'" *) => parseVersionNum
c1 (a1
,q1
)
9681 | _
=> let val a2
= hookError(a1
,(getPos q1
,ERR_EXPECTED(expLitQuote
,[c1
])))
9682 in raise SyntaxError(c1
,a2
,q1
)
9686 (*--------------------------------------------------------------------*)
9687 (* parse an encoding name
, the quote
character ("'" or
'"') passed as *)
9688 (* first argument. cf. 4.3.3: *)
9690 (* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName
'"' *)
9691 (* |"'" EncName "'") *)
9693 (* [81] EncName ::= [A-Za-z] /* Encoding name *)
9694 (* ([A-Za-z0-9._] | '-')* contains only Latin *)
9697 (* print an error and end the literal if an entity end is found. *)
9698 (* print an error if a disallowed character is found. *)
9700 (* return the encoding name as a string option, together with the *)
9701 (* next character and state. *)
9702 (*--------------------------------------------------------------------*)
9703 (* might raise: none *)
9704 (*--------------------------------------------------------------------*)
9705 fun parseEncName quote aq =
9707 fun doit text (c,a,q) =
9708 if c=quote then (text,getChar (a,q))
9709 else if isEnc c then doit (c::text) (getChar (a,q))
9711 then let val a1 = hookError(a,(getPos q,ERR_ENDED_BY_EE LOC_ENCODING))
9714 else let val err = ERR_FORBIDDEN_HERE(IT_CHAR c,LOC_ENCODING)
9715 val a1 = hookError(a,(getPos q,err))
9716 in doit text (getChar (a,q))
9719 val (c1,a1,q1) = getChar aq
9722 if isEncS c1 then doit [c1] (getChar (a1,q1))
9724 then let val a2 = hookError(a1,(getPos q1,ERR_EMPTY LOC_ENCODING))
9725 in (nil,getChar (a2,q1))
9728 then let val a2 = hookError(a1,(getPos q1,ERR_ENDED_BY_EE LOC_ENCODING))
9729 val a3 = hookError(a2,(getPos q1,ERR_EMPTY LOC_ENCODING))
9732 else let val a2 = hookError(a1,(getPos q1,ERR_EXPECTED(expStartEnc,[c1])))
9733 in doit nil (getChar (a2,q1))
9736 val enc = toUpperString (Latin2String (rev text))
9740 (*--------------------------------------------------------------------*)
9741 (* parse an encoding decl starting after 'encoding'. Cf. 4.3.3: *)
9744 (* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName
'"' *)
9745 (* |"'" EncName "'") *)
9747 (* print an error and raise SyntaxState if no '=' is found. *)
9748 (* print an error and raise SyntaxState if no quote sign is found. *)
9750 (* return the encoding name as a string option, together with the *)
9751 (* next char and the remaining state. *)
9752 (*--------------------------------------------------------------------*)
9753 (* might raise: SyntaxState *)
9754 (*--------------------------------------------------------------------*)
9755 fun parseEncodingDecl caq =
9756 let val (c1,a1,q1) = skipEq caq
9758 of 0wx22 (* '""' *) => parseEncName c1 (a1,q1)
9759 | 0wx27 (* "'" *) => parseEncName c1 (a1,q1)
9760 | _ => let val a2 = hookError(a1,(getPos q1,ERR_EXPECTED(expLitQuote,[c1])))
9761 in raise SyntaxError(c1,a2,q1)
9765 (*--------------------------------------------------------------------*)
9766 (* parse a standalone declaration starting after 'standalone'. *)
9769 (* [32] SDDecl ::= S 'standalone' Eq [ VC: Standalone *)
9770 (* ( ("'" ('yes' | 'no') "'") Document *)
9771 (* | ('"' ('yes
' |
'no
') '"')) Declaration ] *)
9773 (* print an error and raise SyntaxState if no '=' is found. *)
9774 (* print an error and raise SyntaxState if no literal is found. *)
9775 (* print an error and end the literal if an entity end is found. *)
9776 (* print an error if the literal is neither 'yes' nor 'no'. *)
9778 (* return the standalone status as a boolean option, together with *)
9779 (* the next character and the remaining state. *)
9780 (*--------------------------------------------------------------------*)
9781 (* might raise: SyntaxState *)
9782 (*--------------------------------------------------------------------*)
9783 fun parseStandaloneDecl caq0 =
9785 val (quote,a,q) = skipEq caq0
9787 fun doit text (c,a,q) =
9788 if c=quote then (text,getChar (a,q))
9789 else if c<>0wx0 then doit (c::text) (getChar (a,q))
9790 else let val a1 = hookError(a,(getPos q,ERR_ENDED_BY_EE LOC_LITERAL))
9794 val caq1 as (_,_,q1) =
9796 of 0wx22 (* '""' *) => (getChar (a,q))
9797 | 0wx27 (* "'" *) => (getChar (a,q))
9798 | _ => let val a1 = hookError(a,(getPos q,ERR_EXPECTED(expLitQuote,[quote])))
9799 in raise SyntaxError(quote,a1,q)
9801 val (text,caq2) = doit nil caq1
9804 of [0wx73,0wx65,0wx79] (* reversed "yes
" *) => (SOME true,caq2)
9805 | [0wx6f,0wx6e] (* reversed "no
" *) => (SOME false,caq2)
9806 | revd => let val (c2,a2,q2) = caq2
9807 val a3 = hookError(a2,(getPos q1,ERR_EXPECTED(expNoYes,revd)))
9808 in (NONE,(c2,a3,q2))
9812 (*--------------------------------------------------------------------*)
9813 (* parse an xml declaration starting after 'xml ' (i.e. the first *)
9814 (* white space character is already consumed). Cf. 2.8: *)
9816 (* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S?'?>'*)
9818 (* [24] VersionInfo ::= S 'version' Eq (' VersionNum ' *)
9819 (* | " VersionNum
") *)
9821 (* [32] SDDecl ::= S 'standalone' Eq [ VC: Standalone *)
9822 (* ( ("'" ('yes' | 'no') "'") Document *)
9823 (* | ('"' ('yes
' |
'no
') '"')) Declaration ] *)
9825 (* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName
'"' *)
9826 (* |"'" EncName "'") *)
9828 (* default version, encoding and standalone status to NONE. *)
9830 (* print an error if no leading white space is found. *)
9831 (* print an error whenever a wrong name is encountered. *)
9832 (* print an Error if no VersionInfo is found. *)
9833 (* print an Error if no '?>' is found at the end. *)
9834 (* print an error and raise SyntaxState if no '=' or no literal is *)
9835 (* found in VersionInfo, EncodingDecl or SDDecl. *)
9836 (* print an error if a literal does not have a correct value. *)
9838 (* return the corresponding XmlDecl option and the next char & state. *)
9839 (*--------------------------------------------------------------------*)
9840 (* might raise: SyntaxState *)
9841 (*--------------------------------------------------------------------*)
9842 fun parseXmlDecl auto caq =
9844 (*-----------------------------------------------------------------*)
9845 (* skip the '?>' at the end of the xml declaration. *)
9847 (* print an error and raise SyntaxState if no '?>' is found. *)
9849 (* return the info passed as first arg, and the next char & state. *)
9850 (*-----------------------------------------------------------------*)
9851 (* might raise: SyntaxState *)
9852 (*-----------------------------------------------------------------*)
9853 fun skipXmlDeclEnd enc res (c,a,q) =
9854 if c=0wx3F (* "#?
" *)
9855 then let val (c1,a1,q1) = getChar (a,q)
9856 in if c1=0wx3E (* #">" *) then (enc,SOME res,getChar (a1,q1))
9857 else let val a2 = hookError(a1,(getPos q1,ERR_EXPECTED(expGt,[c1])))
9858 in raise SyntaxError (c1,a2,q1)
9861 else let val a1 = hookError(a,(getPos q,ERR_EXPECTED(expProcEnd,[c])))
9862 in raise SyntaxError (c,a1,q)
9864 (*-----------------------------------------------------------------*)
9865 (* parse the remainder after the keyword 'standalone', the version *)
9866 (* and encoding already parsed and given in the first arg. *)
9868 (* pass the version,encoding and sd status to skipXmlDeclEnd *)
9869 (*-----------------------------------------------------------------*)
9870 (* might raise: SyntaxState *)
9871 (*-----------------------------------------------------------------*)
9872 fun parseXmlDeclAfterS enc (v,e) caq =
9874 val (alone,caq1) = parseStandaloneDecl caq
9875 val caq2 = skipSopt caq1
9876 in skipXmlDeclEnd enc (v,e,alone) caq2
9878 (*-----------------------------------------------------------------*)
9879 (* parse the remainder after the encoding declaration, the version *)
9880 (* and encoding already parsed and given in the first arg. *)
9882 (* print an error if a name other than 'standalone' is found. *)
9884 (* pass the version and encoding to parseXmlDeclAfterS. *)
9885 (*-----------------------------------------------------------------*)
9886 (* might raise: SyntaxState *)
9887 (*-----------------------------------------------------------------*)
9888 fun parseXmlDeclBeforeS enc (v,e) caq =
9890 val (hadS,caq1 as (_,_,q1)) = skipSmay caq
9891 val (name,(c2,a2,q2)) = parseName caq1 (* NotFound handled below *)
9892 val a3 = if hadS then a2
9893 else hookError(a2,(getPos q1,ERR_MISSING_WHITE))
9895 of [0wx73,0wx74,0wx61,0wx6e,0wx64,0wx61,0wx6c,0wx6f,0wx6e,0wx65] =>
9896 (* "standalone
" *) parseXmlDeclAfterS enc (v,e) (c2,a3,q2)
9897 | _ => let val a4 = hookError(a3,(getPos q1,ERR_EXPECTED(expStandOpt,name)))
9898 in parseXmlDeclAfterS enc (v,e) (c2,a4,q2)
9901 handle NotFound caq => (* exception raised by parseName *)
9902 skipXmlDeclEnd enc (v,e,NONE) caq
9903 (*-----------------------------------------------------------------*)
9904 (* parse the remainder after the keyword 'encoding', the version *)
9905 (* already parsed and given in the first arg. *)
9907 (* pass the version and encoding and to parseXmlDeclBeforeS *)
9908 (*-----------------------------------------------------------------*)
9909 (* might raise: SyntaxState *)
9910 (*-----------------------------------------------------------------*)
9911 fun parseXmlDeclAfterE ver caq =
9913 val (enc,(c1,a1,q1)) = parseEncodingDecl caq
9914 val (a2,q2,enc1) = changeAuto(a1,q1,enc)
9916 parseXmlDeclBeforeS enc1 (ver,SOME enc) (c1,a2,q2)
9918 (*-----------------------------------------------------------------*)
9919 (* parse the remainder after the version info, the version already *)
9920 (* parsed and given in the first arg. *)
9922 (* print an error if a name other than 'encoding' or 'standalone' *)
9925 (* pass obtained/default values to parseXmlDeclAfter[E|S] or to *)
9926 (* skipXmlDeclEnd. *)
9927 (*-----------------------------------------------------------------*)
9928 (* might raise: SyntaxState *)
9929 (*-----------------------------------------------------------------*)
9930 fun parseXmlDeclBeforeE ver caq =
9932 val (hadS,caq1 as (_,_,q1)) = skipSmay caq
9933 val (name,(c2,a2,q2)) = parseName caq1 (* NotFound handled below *)
9934 val a3 = if hadS then a2
9935 else hookError(a2,(getPos q1,ERR_MISSING_WHITE))
9938 of [0wx65,0wx6e,0wx63,0wx6f,0wx64,0wx69,0wx6e,0wx67] =>
9939 (* "encoding
" *) parseXmlDeclAfterE ver (c2,a3,q2)
9940 | [0wx73,0wx74,0wx61,0wx6e,0wx64,0wx61,0wx6c,0wx6f,0wx6e,0wx65] =>
9941 (* "standalone
" *) parseXmlDeclAfterS auto (ver,NONE) (c2,a3,q2)
9942 | _ => let val a4 = hookError(a3,(getPos q1,ERR_EXPECTED(expEncStand,name)))
9943 in parseXmlDeclAfterE ver (c2,a4,q2)
9946 handle NotFound caq => (* exception raised by parseName *)
9947 skipXmlDeclEnd auto (ver,NONE,NONE) caq
9949 (*-----------------------------------------------------------------*)
9950 (* do the main work. if the first name is not 'version' then it *)
9951 (* might be 'encoding' or 'standalone'. Then take the default *)
9952 (* NONE for version and - if needed - encoding and call the *)
9953 (* appropriate function. otherwise assume a typo and parse the *)
9954 (* version number, then call parseXmlDeclBeforeE. if no name is *)
9955 (* found at all, proceed with skipXmlDeclEnd. *)
9957 (* print an error and raise SyntaxState if an entity end is found. *)
9958 (* print an error and raise SyntaxState if appropriate. *)
9959 (* print an error if a name other than 'version' is found. *)
9960 (*-----------------------------------------------------------------*)
9961 (* might raise: SyntaxState *)
9962 (*-----------------------------------------------------------------*)
9963 val caq1 as (_,_,q1) = skipSopt caq
9964 val (name,(caq2 as (c2,a2,q2))) = parseName caq1
9965 handle NotFound (c,a,q) => let val err = ERR_EXPECTED(expVersion,[c])
9966 val a1 = hookError(a,(getPos q,err))
9967 in raise SyntaxError (c,a1,q)
9970 if name=[0wx76,0wx65,0wx72,0wx73,0wx69,0wx6f,0wx6e] (* "version
" *)
9971 then let val (ver,caq3) = parseVersionInfo caq2
9972 in parseXmlDeclBeforeE ver caq3
9974 else let val a3 = hookError(a2,(getPos q1,ERR_EXPECTED(expVersion,name)))
9976 of [0wx65,0wx6e,0wx63,0wx6f,0wx64,0wx69,0wx6e,0wx67] =>
9977 (* "encoding
" *) parseXmlDeclAfterE NONE (c2,a3,q2)
9978 | [0wx73,0wx74,0wx61,0wx6e,0wx64,0wx61,0wx6c,0wx6f,0wx6e,0wx65] =>
9979 (* "standalone
" *) parseXmlDeclAfterS auto (NONE,NONE) (c2,a3,q2)
9980 | _ => let val (ver,caq3) = parseVersionInfo (c2,a3,q2)
9981 in parseXmlDeclBeforeE ver caq3
9985 (*----------------------------------------------------------------*)
9986 (* catch entity end exceptions raised by subfunctions, print an *)
9987 (* error and re-raise the exception. *)
9988 (*----------------------------------------------------------------*)
9989 handle SyntaxError(c,a,q) =>
9990 let val err = if c=0wx0 then ERR_ENDED_BY_EE LOC_XML_DECL
9991 else ERR_CANT_PARSE LOC_XML_DECL
9992 val a1 = hookError(a,(getPos q,err))
9993 in (auto,NONE,recoverXml(c,a1,q))
9996 (*--------------------------------------------------------------------*)
9997 (* parse a text declaration starting after 'xml ' (i.e. the first *)
9998 (* white space character is already consumed). Cf. 2.8: *)
10000 (* [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' *)
10002 (* [24] VersionInfo ::= S 'version' Eq (' VersionNum ' *)
10003 (* | " VersionNum
") *)
10005 (* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName
'"' *)
10006 (* |"'" EncName "'") *)
10008 (* default version and encoding to NONE. *)
10010 (* print an error if no leading white space is found. *)
10011 (* print an error whenever a wrong name is encountered. *)
10012 (* print an Error if no EncodingDecl is found. *)
10013 (* print an Error if '?>' is found at the end. *)
10014 (* print an error and raise SyntaxState if no '=' or no literal is *)
10015 (* found in VersionInfo or EncodingDecl. *)
10016 (* print an error if a literal does not have a correct value. *)
10018 (* return the corresponding TextDecl option and the next char & state.*)
10019 (*--------------------------------------------------------------------*)
10020 (* might raise: SyntaxState *)
10021 (*--------------------------------------------------------------------*)
10022 fun parseTextDecl auto caq =
10024 (*-----------------------------------------------------------------*)
10025 (* skip the '?>' at the end of the text declaration. *)
10027 (* print an error and raise SyntaxState if no '?>' is found. *)
10029 (* return the info passed as first arg, and the next char & state. *)
10030 (*-----------------------------------------------------------------*)
10031 (* might raise: SyntaxState *)
10032 (*-----------------------------------------------------------------*)
10033 fun skipTextDeclEnd enc res (c,a,q) =
10034 if c=0wx3F (* "#?
" *)
10035 then let val (c1,a1,q1) = getChar (a,q)
10036 in if c1=0wx3E (* #">" *) then (enc,SOME res,getChar (a1,q1))
10037 else let val a2 = hookError(a1,(getPos q1,ERR_EXPECTED(expGt,[c1])))
10038 in raise SyntaxError(c1,a2,q1)
10041 else let val a1 = hookError(a,(getPos q,ERR_EXPECTED(expProcEnd,[c])))
10042 in raise SyntaxError(c,a1,q)
10044 (*-----------------------------------------------------------------*)
10045 (* parse the remainder after the keyword 'encoding', the version *)
10046 (* already parsed and given in the first arg. *)
10048 (* pass the version and encoding and to skipTextDeclEnd. *)
10049 (*-----------------------------------------------------------------*)
10050 (* might raise: SyntaxState *)
10051 (*-----------------------------------------------------------------*)
10052 fun parseTextDeclAfterE ver caq =
10054 val (enc,(c1,a1,q1)) = parseEncodingDecl caq
10055 val (a2,q2,enc1) = changeAuto(a1,q1,enc)
10056 val caq3 = skipSopt (c1,a2,q2)
10057 in skipTextDeclEnd enc1 (ver,SOME enc) caq3
10059 (*-----------------------------------------------------------------*)
10060 (* parse the remainder after the version info, the version given *)
10061 (* as first argument. *)
10063 (* print an error and raise SyntaxState is no name is found. *)
10064 (* print an error if a name other than 'encoding' is found. *)
10066 (* pass obtained/default values to parseTextDeclAfterE. *)
10067 (*-----------------------------------------------------------------*)
10068 (* might raise: SyntaxState *)
10069 (*-----------------------------------------------------------------*)
10070 fun parseTextDeclBeforeE ver caq =
10072 val caq1 as (_,_,q1) = skipS caq
10073 val (name,caq2) = parseName caq1
10074 handle NotFound (c,a,q) => let val err = ERR_EXPECTED(expEncoding,[c])
10075 val a1 = hookError(a,(getPos q,err))
10076 in raise SyntaxError (c,a1,q)
10079 if name=[0wx65,0wx6e,0wx63,0wx6f,0wx64,0wx69,0wx6e,0wx67] (* "encoding
" *)
10080 then parseTextDeclAfterE ver caq2
10081 else let val (c2,a2,q2) = caq2
10082 val a3 = hookError(a2,(getPos q1,ERR_EXPECTED(expEncoding,name)))
10083 in parseTextDeclAfterE ver (c2,a3,q2)
10086 (*-----------------------------------------------------------------*)
10087 (* do the main work. if the first name is neither 'version' nor *)
10088 (* 'encoding' then assume typo of 'version'. Then parse the *)
10089 (* version number, call parseTextDeclBeforeE. if no name is found *)
10090 (* at all, proceed with skipTextDeclEnd. *)
10092 (* print an error and raise SyntaxState if appropriate. *)
10093 (* print an error if a name other than 'version' or 'encoding' is *)
10095 (*-----------------------------------------------------------------*)
10096 (* might raise: SyntaxState *)
10097 (*-----------------------------------------------------------------*)
10098 val caq1 as (_,_,q1) = skipSopt caq
10099 val (name,caq2) = parseName caq1
10100 handle NotFound (c,a,q) => let val err = ERR_EXPECTED(expEncVers,[c])
10101 val a1 = hookError(a,(getPos q,err))
10102 in raise SyntaxError(c,a1,q)
10105 of [0wx76,0wx65,0wx72,0wx73,0wx69,0wx6f,0wx6e] => (* "version
" *)
10106 let val (ver,caq3) = parseVersionInfo caq2
10107 in parseTextDeclBeforeE ver caq3
10109 | [0wx65,0wx6e,0wx63,0wx6f,0wx64,0wx69,0wx6e,0wx67] => (* "encoding
" *)
10110 parseTextDeclAfterE NONE caq2
10111 | _ => let val (c2,a2,q2) = caq2
10112 val a3 = hookError(a2,(getPos q1,ERR_EXPECTED(expEncVers,name)))
10113 val (ver,caq3) = parseVersionInfo (c2,a3,q2)
10114 in parseTextDeclBeforeE ver caq3
10117 (*----------------------------------------------------------------*)
10118 (* catch entity end exceptions raised by subfunctions, print an *)
10119 (* error and re-raise the exception. *)
10120 (*----------------------------------------------------------------*)
10121 handle SyntaxError(c,a,q) =>
10122 let val err = if c=0wx0 then ERR_ENDED_BY_EE LOC_TEXT_DECL
10123 else ERR_CANT_PARSE LOC_TEXT_DECL
10124 val a1 = hookError(a,(getPos q,err))
10125 in (auto,NONE,recoverXml(c,a1,q))
10128 (*--------------------------------------------------------------------*)
10129 (* check for the string "<?xml
" followed by a white space. The first *)
10130 (* paramter seen is a prefix of that string already consued. If the *)
10131 (* complete string is not found, unget all characters seen, including *)
10132 (* those from parameter seen. *)
10134 (* return a boolean indicating wheher the string was found, together *)
10135 (* with the remaining app data and state. *)
10136 (*--------------------------------------------------------------------*)
10137 (* might raise: none *)
10138 (*--------------------------------------------------------------------*)
10139 fun checkForXml aq =
10141 val unseen = [0wx3c,0wx3f,0wx78,0wx6d,0wx6c]
10142 fun doit (seen,unseen) (a,q) =
10143 let val (c1,a1,q1) = getChar (a,q)
10145 of nil => if isS c1 then (true,(a1,q1))
10146 else (false,(a1,ungetChars(q1,rev(c1::seen))))
10147 | c::cs => if c1=c then doit (c1::seen,cs) (a1,q1)
10148 else (false,(a1,ungetChars(q1,rev(c1::seen))))
10150 in doit (nil,unseen) aq
10153 (*--------------------------------------------------------------------*)
10154 (* consume the text/xml declaration. The first parameter is a pair of *)
10155 (* the function that parses the declaration and a boolean indicating *)
10156 (* whether a warning should we produced if the declaration is missing.*)
10157 (* The second parameter is a pair (seen,auto), where auto is the *)
10158 (* auto-detected encoding, and seen is SOME cs, if auto-detection *)
10159 (* found some initial characters cs of the string "<?xml
", otherwise *)
10160 (* NONE. A text/xml declaration can only be present if seen is SOME. *)
10162 (* Check whether the declaration is present with checkForXml. If yes, *)
10163 (* parse it, if no, possibly print a warning. *)
10165 (* Return the encoding of the entity, the optional declaration and *)
10166 (* the next char, app data and state. *)
10167 (*--------------------------------------------------------------------*)
10168 fun findTextDecl (parseDecl,warn) auto aq =
10169 let val (hasXml,aq1) = checkForXml aq
10170 in if hasXml then parseDecl auto (getChar aq1)
10171 else let val (a1,q1) = aq1
10172 val (a2,q2) = commitAuto(a1,q1)
10173 val a3 = if warn then hookWarning(a2,(getPos q2,WARN_NO_XML_DECL)) else a2
10174 in (auto,NONE,getChar(a3,q2))
10178 (*--------------------------------------------------------------------*)
10179 (* open an external entity; consume its text declaration if present. *)
10182 (* [78] extParsedEnt ::= TextDecl? content *)
10183 (* [79] extPE ::= TextDecl? extSubsetDecl *)
10185 (* handle NoSuchFile by printing an error and opening an empty dummy *)
10186 (* entity (some functions might rely on the entity's entity end). *)
10188 (* return the optional text declaration and the resulting first char *)
10189 (* together with the new state. *)
10190 (*--------------------------------------------------------------------*)
10191 (* might raise: none *)
10192 (*--------------------------------------------------------------------*)
10193 fun openExtern (id,isParam,uri) (a,q) =
10194 let val (q1,auto) = pushExtern (q,id,isParam,uri)
10195 in findTextDecl (parseTextDecl,false) auto (a,q1)
10197 handle NoSuchFile fmsg => raise CantOpenFile(fmsg,a)
10199 (*--------------------------------------------------------------------*)
10200 (* open the external subset; consume its text declaration if present. *)
10203 (* [30] extSubset ::= TextDecl? extSubsetDecl *)
10205 (* return the optional text declaration and the first char and state. *)
10206 (*--------------------------------------------------------------------*)
10207 (* might raise: NoSuchFile *)
10208 (*--------------------------------------------------------------------*)
10209 fun openSubset uri a =
10210 let val (q,auto) = pushSpecial (EXT_SUBSET,SOME uri)
10211 in findTextDecl (parseTextDecl,false) auto (a,q)
10213 handle NoSuchFile fmsg => raise CantOpenFile(fmsg,a)
10215 (*--------------------------------------------------------------------*)
10216 (* open the document entity; consume its xml declaration if present. *)
10219 (* [1] document ::= prolog element Misc* *)
10220 (* [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc* )? *)
10222 (* return the optional xml declaration and the first char and state. *)
10223 (*--------------------------------------------------------------------*)
10224 (* might raise: NoSuchFile *)
10225 (*--------------------------------------------------------------------*)
10226 fun openDocument uri a =
10227 let val (q,auto) = pushSpecial (DOC_ENTITY,uri)
10228 in findTextDecl (parseXmlDecl,!O_WARN_XML_DECL) auto (a,q)
10230 handle NoSuchFile fmsg => raise CantOpenFile(fmsg,a)
10232 (* stop of ../../Parser/Parse/parseXml.sml *)
10233 (* start of ../../Parser/Parse/parseRefs.sml *)
10234 signature ParseRefs =
10236 (*----------------------------------------------------------------------
10239 val parseName : UniChar.Char * AppData * State
10240 -> UniChar.Data * (UniChar.Char * AppData * State)
10241 val parseNmtoken : UniChar.Char * AppData * State
10242 -> UniChar.Data * (UniChar.Char * AppData * State)
10243 val parseEntName : UniChar.Data * UniChar.Data -> UniChar.Char * AppData * State
10244 -> bool * UniChar.Data * UniChar.Data * (UniChar.Char * AppData * State)
10246 val parseComment : Errors.Position -> AppData * State -> (UniChar.Char * AppData * State)
10247 val parseProcInstr : Errors.Position -> AppData * State -> (UniChar.Char * AppData * State)
10248 val skipS : UniChar.Char * AppData * State -> UniChar.Char * AppData * State
10249 val skipSopt : UniChar.Char * AppData * State -> UniChar.Char * AppData * State
10250 val skipSmay : UniChar.Char * AppData * State -> bool * (UniChar.Char * AppData * State)
10251 val parseSopt : UniChar.Data -> UniChar.Char * AppData * State
10252 -> UniChar.Data * (UniChar.Char * AppData * State)
10253 val parseSmay : UniChar.Data -> UniChar.Char * AppData * State
10254 -> bool * (UniChar.Data * (UniChar.Char * AppData * State))
10255 val parseEq : UniChar.Char * AppData * State
10256 -> UniChar.Data * (UniChar.Char * AppData * State)
10258 val openExtern : int * Uri.Uri -> AppData * State
10259 -> Encoding.Encoding * HookData.TextDecl option * (UniChar.Char * AppData * State)
10260 val openDocument : Uri.Uri option -> AppData
10261 -> Encoding.Encoding * HookData.XmlDecl option * (UniChar.Char * AppData * State)
10262 val openSubset : Uri.Uri -> AppData
10263 -> Encoding.Encoding * HookData.TextDecl option * (UniChar.Char * AppData * State)
10264 ----------------------------------------------------------------------*)
10267 val parseCharRef : AppData * State -> UniChar.Char * AppData * State
10268 val parseGenRef : Dtd -> UniChar.Char * AppData * State
10269 -> (int * Base.GenEntity) * (AppData * State)
10270 val parseParRef : Dtd -> UniChar.Char * AppData * State
10271 -> (int * Base.ParEntity) * (AppData * State)
10273 val parseCharRefLit : UniChar.Data -> AppData * State
10274 -> UniChar.Data * (UniChar.Char * AppData * State)
10275 val parseGenRefLit : Dtd -> UniChar.Data -> UniChar.Char * AppData * State
10276 -> UniChar.Data * ((int * Base.GenEntity) * (AppData * State))
10277 val parseParRefLit : Dtd -> UniChar.Data -> UniChar.Char * AppData * State
10278 -> UniChar.Data * ((int * Base.ParEntity) * (AppData * State))
10280 val skipCharRef : AppData * State -> (UniChar.Char * AppData * State)
10281 val skipReference : UniChar.Char * AppData * State -> (UniChar.Char * AppData * State)
10283 val skipPS : Dtd -> UniChar.Char * AppData * State
10284 -> UniChar.Char * AppData * State
10285 val skipPSopt : Dtd -> UniChar.Char * AppData * State
10286 -> UniChar.Char * AppData * State
10287 val skipPSmay : Dtd -> UniChar.Char * AppData * State
10288 -> bool * (UniChar.Char * AppData * State)
10289 val skipPSdec : Dtd -> UniChar.Char * AppData * State
10290 -> bool * (UniChar.Char * AppData * State)
10293 (*--------------------------------------------------------------------------*)
10294 (* Structure: ParseRefs *)
10296 (* Exceptions raised by functions in this structure: *)
10297 (* parseCharRef : NoSuchChar SyntaxError *)
10298 (* parseGenRef : NoSuchEntity SyntaxState *)
10299 (* parseParRef : NoSuchEntity SyntaxState *)
10300 (* skipCharRef : none *)
10301 (* skipPS : none *)
10302 (* skipPSdec : none *)
10303 (* skipPSmay : none *)
10304 (* skipPSopt : none *)
10305 (* skipReference : none *)
10306 (*--------------------------------------------------------------------------*)
10307 functor ParseRefs (structure ParseBase : ParseBase)
10310 structure ParseXml = ParseXml (structure ParseBase = ParseBase)
10313 Base Errors UniClasses
10316 (*--------------------------------------------------------------------*)
10317 (* parse a character reference, the "&#
" already read. See 4.1: *)
10319 (* [66] CharRef ::= '&#' [0-9]+ ';' *)
10320 (* | '&#x' [0-9a-fA-F]+ ';' [ WFC: Legal Character ] *)
10322 (* Well-Formedness Constraint: Legal Character *)
10323 (* Characters referred to using character references must match the *)
10324 (* production for Char. *)
10326 (* If the character reference begins with "&#x
", the digits and *)
10327 (* letters up to the terminating ; provide a hexadecimal *)
10328 (* representation of the character's code point in ISO/IEC 10646. *)
10329 (* If it begins just with "&#
", the digits up to the terminating ; *)
10330 (* provide a decimal representation of the character's code point. *)
10332 (* raise SyntaxError if no number or x plus hexnum is found, or if no *)
10333 (* semicolon follows it. *)
10334 (* raise NoSuchChar if the reference is to a non-XML character. *)
10336 (* return the character referred to, and the remaining state. *)
10337 (*--------------------------------------------------------------------*)
10338 fun parseCharRef aq =
10340 (*--------------------------------------------------------------*)
10341 (* parse a (hexa)decimal number, accumulating the value in the *)
10342 (* first parameter. *)
10344 (* return the numbers value as a Char. *)
10345 (*--------------------------------------------------------------*)
10346 fun do_hex_n yet (c,a,q) =
10348 of NONE => (yet,(c,a,q))
10349 | SOME v => do_hex_n (0wx10*yet+v) (getChar (a,q))
10350 fun do_dec_n yet (c,a,q) =
10352 of NONE => (yet,(c,a,q))
10353 | SOME v => do_dec_n (0wx0A*yet+v) (getChar (a,q))
10354 (*--------------------------------------------------------------*)
10355 (* Parse a (hexa)decimal number of at least one digit. *)
10357 (* raise SyntaxError if no hexdigit is found first. *)
10359 (* return the numbers value as a Char. *)
10360 (*--------------------------------------------------------------*)
10361 fun do_hex_1 (c,a,q) =
10363 of SOME v => do_hex_n v (getChar (a,q))
10364 | NONE => let val a1 = hookError(a,(getPos q,ERR_EXPECTED(expHexDigit,[c])))
10365 in raise SyntaxError(c,a1,q)
10367 (*--------------------------------------------------------------*)
10368 (* Parse a decimal number of at least one digit, or a hexnumber *)
10369 (* if the first character is 'x'. *)
10371 (* raise SyntaxError if neither 'x' nor digit is found first. *)
10373 (* return the number's value as a Char. *)
10374 (*--------------------------------------------------------------*)
10375 fun do_dec_1 (c,a,q) =
10377 of SOME v => do_dec_n v (getChar (a,q))
10378 | NONE => if c=0wx78 (* #"x
" *)
10379 then do_hex_1 (getChar (a,q))
10380 else let val a1 = hookError(a,(getPos q,ERR_EXPECTED(expDigitX,[c])))
10381 in raise SyntaxError(c,a1,q)
10384 val (ch,(c1,a1,q1)) = do_dec_1 (getChar aq)
10386 val _ = if c1=0wx3B then ()
10387 else let val a2 = hookError(a1,(getPos q1,ERR_EXPECTED(expSemi,[c1])))
10388 in raise SyntaxError(c1,a2,q1)
10391 val _ = if isXml ch then ()
10392 else let val a2 = hookError(a1,(getPos q1,ERR_NON_XML_CHARREF ch))
10393 in raise NoSuchChar (a2,q1)
10397 fun parseCharRefLit cs aq =
10399 (*--------------------------------------------------------------*)
10400 (* parse a (hexa)decimal number, accumulating the value in the *)
10401 (* first parameter. *)
10403 (* return the numbers value as a Char. *)
10404 (*--------------------------------------------------------------*)
10405 fun do_hex_n (cs,yet) (c,a,q) =
10407 of NONE => (cs,yet,(c,a,q))
10408 | SOME v => do_hex_n (c::cs,0wx10*yet+v) (getChar (a,q))
10409 fun do_dec_n (cs,yet) (c,a,q) =
10411 of NONE => (cs,yet,(c,a,q))
10412 | SOME v => do_dec_n (c::cs,0wx0A*yet+v) (getChar (a,q))
10413 (*--------------------------------------------------------------*)
10414 (* Parse a (hexa)decimal number of at least one digit. *)
10416 (* raise SyntaxError if no hexdigit is found first. *)
10418 (* return the numbers value as a Char. *)
10419 (*--------------------------------------------------------------*)
10420 fun do_hex_1 cs (c,a,q) =
10422 of SOME v => do_hex_n (c::cs,v) (getChar (a,q))
10423 | NONE => let val a1 = hookError(a,(getPos q,ERR_EXPECTED(expHexDigit,[c])))
10424 in raise SyntaxError(c,a1,q)
10426 (*--------------------------------------------------------------*)
10427 (* Parse a decimal number of at least one digit, or a hexnumber *)
10428 (* if the first character is 'x'. *)
10430 (* raise SyntaxError if neither 'x' nor digit is found first. *)
10432 (* return the number's value as a Char. *)
10433 (*--------------------------------------------------------------*)
10434 fun do_dec_1 cs (c,a,q) =
10436 of SOME v => do_dec_n (c::cs,v) (getChar (a,q))
10437 | NONE => if c=0wx78 (* #"x
" *)
10438 then do_hex_1 (c::cs) (getChar (a,q))
10439 else let val a1 = hookError(a,(getPos q,ERR_EXPECTED(expDigitX,[c])))
10440 in raise SyntaxError(c,a1,q)
10443 val (cs1,ch,(c1,a1,q1)) = do_dec_1 cs (getChar aq)
10445 val _ = if c1=0wx3B then ()
10446 else let val a2 = hookError(a1,(getPos q1,ERR_EXPECTED(expSemi,[c1])))
10447 in raise SyntaxError(c1,a2,q1)
10450 val _ = if isXml ch then ()
10451 else let val a2 = hookError(a1,(getPos q1,ERR_NON_XML_CHARREF ch))
10452 in raise NoSuchChar (a2,q1)
10454 in (c1::cs1,(ch,a1,q1))
10457 (*--------------------------------------------------------------------*)
10458 (* parse a general entity reference, the "&" already read. See 4.1: *)
10460 (* [68] EntityRef ::= '&' Name ';' [ WFC: Entity Declared ] *)
10461 (* [ VC: Entity Declared ] *)
10462 (* [ WFC: Parsed Entity ] *)
10463 (* [ WFC: No Recursion ] *)
10465 (* Well-Formedness Constraint: Entity Declared *)
10466 (* In a document without any DTD, a document with only an internal *)
10467 (* DTD subset which contains no parameter entity references, or a *)
10468 (* document with "standalone
='yes
'", the Name given in the entity *)
10469 (* reference must match that in an entity declaration, ... *)
10470 (* ... the declaration of a general entity must precede any *)
10471 (* reference to it which appears in a default value in an *)
10472 (* attribute-list declaration. *)
10474 (* Validity Constraint: Entity Declared *)
10475 (* In a document with an external subset or external parameter *)
10476 (* entities with "standalone
='no
'", the Name given in the entity *)
10477 (* reference must match that in an entity declaration. ... *)
10478 (* ... the declaration of a general entity must precede any *)
10479 (* reference to it which appears in a default value in an *)
10480 (* attribute-list declaration. *)
10482 (* Thus: in both cases it is an error if the entity is not declared. *)
10483 (* The only difference is the impact on well-formednes/validity. *)
10485 (* There are three contexts in which a general entity reference can *)
10486 (* appear: in content, in attribute value, in entity value. This *)
10487 (* passage states that it need not be declared prior to a reference *)
10488 (* in an entity value. But in this context, it is bypassed and not *)
10489 (* included, i.e., it need not be recognized. *)
10491 (* Well-Formedness Constraint: Parsed Entity *)
10492 (* An entity reference must not contain the name of an unparsed *)
10493 (* entity. Unparsed entities may be referred to only in attribute *)
10496 (* Well-Formedness Constraint: No Recursion *)
10497 (* A parsed entity must not contain a recursive reference to *)
10498 (* itself, either directly or indirectly. *)
10500 (* print an error and raise SyntaxState if no name is found, or if no *)
10501 (* semicolon follows it. *)
10502 (* print an error and return GE_NULL if the reference is to an *)
10503 (* undeclared, unparsed or open entity. *)
10505 (* return the entity referred to, and the remaining state. *)
10506 (*--------------------------------------------------------------------*)
10507 fun parseGenRef dtd (caq as (_,_,q)) =
10509 val (name,(c1,a1,q1)) = parseName caq
10510 handle NotFound (c,a,q) => let val err = ERR_EXPECTED(expAnEntName,[c])
10511 val a1 = hookError(a,(getPos q,err))
10512 in raise SyntaxError(c,a1,q)
10514 val _ = if c1=0wx3B then ()
10515 else let val a2 = hookError(a1,(getPos q1,ERR_EXPECTED(expSemi,[c1])))
10516 in raise SyntaxError(c1,a2,q1)
10519 val idx = GenEnt2Index dtd name
10520 val (ent,ext) = getGenEnt dtd idx
10522 val _ = (* check whether entity is undeclared/unparsed/open *)
10525 if entitiesWellformed dtd
10526 then let val err = ERR_UNDEC_ENTITY(ENT_GENERAL,name)
10527 val a2 = hookError(a1,(getPos q,err))
10528 in raise NoSuchEntity (a2,q1)
10530 else if useParamEnts()
10531 then let val err = ERR_UNDECLARED(IT_GEN_ENT,name,LOC_NONE)
10532 val a2 = hookError(a1,(getPos q,err))
10533 in raise NoSuchEntity (a2,q1)
10536 | GE_UNPARSED _ => let val err = ERR_ILLEGAL_ENTITY(ENT_UNPARSED,name,LOC_NONE)
10537 val a2 = hookError(a1,(getPos q,err))
10538 in raise NoSuchEntity (a2,q1)
10540 | _ => if isOpen(idx,false,q1)
10541 then let val err = ERR_RECURSIVE_ENTITY(ENT_GENERAL,name)
10542 val a2 = hookError(a1,(getPos q,err))
10543 in raise NoSuchEntity (a2,q1)
10548 if ext andalso !O_VALIDATE andalso standsAlone dtd andalso inDocEntity q1
10549 then let val _ = if !O_ERROR_MINIMIZE then setStandAlone dtd false else ()
10550 in hookError(a1,(getPos q,ERR_STANDALONE_ENT(ENT_GENERAL,name)))
10554 in ((idx,ent),(a2,q1))
10556 fun parseGenRefLit dtd cs (caq as (_,_,q)) =
10558 val (cs1,name,(c1,a1,q1)) = parseNameLit cs caq
10559 handle NotFound (c,a,q) => let val err = ERR_EXPECTED(expAnEntName,[c])
10560 val a1 = hookError(a,(getPos q,err))
10561 in raise SyntaxError(c,a1,q)
10563 val _ = if c1=0wx3B then ()
10564 else let val a2 = hookError(a1,(getPos q1,ERR_EXPECTED(expSemi,[c1])))
10565 in raise SyntaxError(c1,a2,q1)
10568 val idx = GenEnt2Index dtd name
10569 val (ent,ext) = getGenEnt dtd idx
10571 val _ = (* check whether entity is undeclared/unparsed/open *)
10574 if entitiesWellformed dtd
10575 then let val err = ERR_UNDEC_ENTITY(ENT_GENERAL,name)
10576 val a2 = hookError(a1,(getPos q,err))
10577 in raise NoSuchEntity (a2,q1)
10579 else if useParamEnts()
10580 then let val err = ERR_UNDECLARED(IT_GEN_ENT,name,LOC_NONE)
10581 val a2 = hookError(a1,(getPos q,err))
10582 in raise NoSuchEntity (a2,q1)
10585 | GE_UNPARSED _ => let val err = ERR_ILLEGAL_ENTITY(ENT_UNPARSED,name,LOC_NONE)
10586 val a2 = hookError(a1,(getPos q,err))
10587 in raise NoSuchEntity (a2,q1)
10589 | _ => if isOpen(idx,false,q1)
10590 then let val err = ERR_RECURSIVE_ENTITY(ENT_GENERAL,name)
10591 val a2 = hookError(a1,(getPos q,err))
10592 in raise NoSuchEntity (a2,q1)
10597 if ext andalso !O_VALIDATE andalso standsAlone dtd andalso inDocEntity q1
10598 then let val _ = if !O_ERROR_MINIMIZE then setStandAlone dtd false else ()
10599 in hookError(a1,(getPos q,ERR_STANDALONE_ENT(ENT_GENERAL,name)))
10603 in (c1::cs1,((idx,ent),(a2,q1)))
10606 (*--------------------------------------------------------------------*)
10607 (* parse a parameter entity reference, the "%" already read. See 4.1: *)
10609 (* [69] PEReference ::= '%' Name ';' [ VC: Entity Declared ] *)
10610 (* [ WFC: No Recursion ] *)
10611 (* [ WFC: In DTD ] *)
10613 (* Well-Formedness Constraint: Entity Declared *)
10614 (* In a document without any DTD, a document with only an internal *)
10615 (* DTD subset which contains no parameter entity references, or a *)
10616 (* document with "standalone
='yes
'", the Name given in the entity *)
10617 (* reference must match that in an entity declaration, ... *)
10618 (* The declaration of a parameter entity must precede any reference *)
10621 (* Validity Constraint: Entity Declared *)
10622 (* In a document with an external subset or external parameter *)
10623 (* entities with "standalone
='no
'", the Name given in the entity *)
10624 (* reference must match that in an entity declaration. ... *)
10625 (* The declaration of a parameter entity must precede any reference *)
10628 (* Thus: in both cases it is an error if the entity is not declared. *)
10629 (* The only difference is the impact on well-formednes/validity. *)
10630 (* Because the thing to be parsed is a parameter entity reference, *)
10631 (* this DTD has references, and thus an undeclared entity is probably *)
10632 (* a validity and not a well-formedness error. Thus setExternal must *)
10633 (* be called before determining a possible error! *)
10635 (* Well-Formedness Constraint: No Recursion *)
10636 (* A parsed entity must not contain a recursive reference to *)
10637 (* itself, either directly or indirectly. *)
10639 (* print an error and raise SyntaxError if no name is found, or if no *)
10640 (* semicolon follows it. *)
10641 (* print an error and return PE_NULL if the reference is to an *)
10642 (* undeclared or open entity. *)
10644 (* return the entity referred to, and the remaining state. *)
10645 (*--------------------------------------------------------------------*)
10646 fun parseParRef dtd (caq as (_,_,q)) =
10648 val (name,(c1,a1,q1)) = parseName caq
10649 handle NotFound(c,a,q) => let val err = ERR_EXPECTED(expAnEntName,[c])
10650 val a1 = hookError(a,(getPos q,err))
10651 in raise SyntaxError(c,a1,q)
10654 val _ = if c1=0wx3B then ()
10655 else let val err = ERR_EXPECTED(expSemi,[c1])
10656 val a2 = hookError(a1,(getPos q1,err))
10657 in raise SyntaxError(c1,a2,q1)
10660 val _ = setExternal dtd;
10661 val idx = ParEnt2Index dtd name
10662 val (ent,ext) = getParEnt dtd idx
10664 val _ = (* check whether entity is declared *)
10667 if entitiesWellformed dtd
10668 then let val err = ERR_UNDEC_ENTITY(ENT_PARAMETER,name)
10669 val a2 = hookError(a1,(getPos q,err))
10670 in raise NoSuchEntity (a2,q1)
10672 else if useParamEnts()
10673 then let val err = ERR_UNDECLARED(IT_PAR_ENT,name,LOC_NONE)
10674 val a2 = hookError(a1,(getPos q,err))
10675 in raise NoSuchEntity (a2,q1)
10678 (* check whether the entity is already open *)
10679 | _ => if isOpen(idx,true,q1)
10680 then let val err = ERR_RECURSIVE_ENTITY(ENT_PARAMETER,name)
10681 val a2 = hookError(a1,(getPos q,err))
10682 in raise NoSuchEntity (a2,q1)
10685 in ((idx,ent),(a1,q1))
10687 fun parseParRefLit dtd cs (caq as (_,_,q)) =
10689 val (cs1,name,(c1,a1,q1)) = parseNameLit cs caq
10690 handle NotFound(c,a,q) => let val err = ERR_EXPECTED(expAnEntName,[c])
10691 val a1 = hookError(a,(getPos q,err))
10692 in raise SyntaxError(c,a1,q)
10695 val _ = if c1=0wx3B then ()
10696 else let val err = ERR_EXPECTED(expSemi,[c1])
10697 val a2 = hookError(a1,(getPos q1,err))
10698 in raise SyntaxError(c1,a2,q1)
10701 val _ = setExternal dtd;
10702 val idx = ParEnt2Index dtd name
10703 val (ent,ext) = getParEnt dtd idx
10705 val _ = (* check whether entity is declared *)
10708 if entitiesWellformed dtd
10709 then let val err = ERR_UNDEC_ENTITY(ENT_PARAMETER,name)
10710 val a2 = hookError(a1,(getPos q,err))
10711 in raise NoSuchEntity (a2,q1)
10713 else if useParamEnts()
10714 then let val err = ERR_UNDECLARED(IT_PAR_ENT,name,LOC_NONE)
10715 val a2 = hookError(a1,(getPos q,err))
10716 in raise NoSuchEntity (a2,q1)
10719 (* check whether the entity is already open *)
10720 | _ => if isOpen(idx,true,q1)
10721 then let val err = ERR_RECURSIVE_ENTITY(ENT_PARAMETER,name)
10722 val a2 = hookError(a1,(getPos q,err))
10723 in raise NoSuchEntity (a2,q1)
10726 in (c1::cs1,((idx,ent),(a1,q1)))
10729 (*--------------------------------------------------------------------*)
10730 (* skip a general/parameter entity reference, the "&/%" already read. *)
10732 (* print an error if no name is found, or if no semicolon follows it. *)
10734 (* handle any SyntaxState by returning its char and state. *)
10736 (* return the remaining state. *)
10737 (*--------------------------------------------------------------------*)
10738 fun skipReference caq =
10739 let val (_,(c1,a1,q1)) = parseName caq
10740 in if c1=0wx3B then getChar (a1,q1)
10741 else let val err = ERR_EXPECTED(expSemi,[c1])
10742 val a2 = hookError(a1,(getPos q1,err))
10746 handle NotFound (c,a,q) => let val err = ERR_EXPECTED(expAnEntName,[c])
10747 val a1 = hookError(a,(getPos q,err))
10751 (*--------------------------------------------------------------------*)
10752 (* skip a character reference, the "&#
" already read. See 4.1: *)
10754 (* print an error if no number or x plus hexnum is found, or if no *)
10755 (* semicolon follows it. *)
10757 (* handle any SyntaxState by returning its char and state. *)
10759 (* return the remaining char and state. *)
10760 (*--------------------------------------------------------------------*)
10761 fun skipCharRef aq =
10763 (*--------------------------------------------------------------*)
10764 (* skip a (hexa)decimal number. *)
10765 (*--------------------------------------------------------------*)
10766 fun skip_ximal isX (c,a,q) =
10767 if isX c then skip_ximal isX (getChar (a,q)) else (c,a,q)
10769 val (c1,a1,q1) = getChar aq
10771 if isDec c1 then skip_ximal isDec (getChar (a1,q1))
10772 else if c1=0wx78 (* #"x
" *)
10773 then let val (c2,a2,q2) = getChar (a1,q1)
10774 in if isHex c2 then skip_ximal isHex (getChar (a2,q2))
10775 else let val err = ERR_EXPECTED(expHexDigit,[c2])
10776 val a3 = hookError(a2,(getPos q2,err))
10777 in raise SyntaxError(c2,a3,q2)
10780 else let val a2 = hookError(a1,(getPos q1,ERR_EXPECTED(expDigitX,[c1])))
10781 in raise SyntaxError (c1,a2,q1)
10784 in if c2=0wx3B then getChar (a2,q2)
10785 else (c2,hookError(a2,(getPos q2,ERR_EXPECTED(expSemi,[c2]))),q2)
10787 handle SyntaxError caq => caq
10789 (*--------------------------------------------------------------------*)
10790 (* parse a sequence of white space in markup declarations. Cf. 2.3: *)
10792 (* [3] S ::= (#x20 | #x9 | #xD | #xA)+ *)
10794 (* and 2.8 states: *)
10796 (* The markup declarations may be made up in whole or in part of *)
10797 (* the replacement text of parameter entities. The productions *)
10798 (* later in this specification for individual nonterminals *)
10799 (* (elementdecl, AttlistDecl, and so on) describe the declarations *)
10800 (* after all the parameter entities have been included. *)
10802 (* in markup declarations, we thus have to include entity references *)
10803 (* and skip entity ends, except for the document end. *)
10805 (* Well-Formedness Constraint: PEs in Internal Subset *)
10806 (* In the internal DTD subset, parameter-entity references can *)
10807 (* occur only where markup declarations can occur, not within *)
10808 (* markup declarations. (This does not apply to references that *)
10809 (* occur in external parameter entities or to the external subset.) *)
10811 (* we therefore always check whether we are in the internal subset *)
10812 (* before including a parameter entity. *)
10813 (*--------------------------------------------------------------------*)
10814 (* handle a parameter entity reference *)
10815 (*--------------------------------------------------------------------*)
10816 fun doParRef dtd (caq as (c,a,q)) =
10818 then let val err = ERR_FORBIDDEN_HERE(IT_PAR_REF,LOC_INT_DECL)
10819 val a1 = hookError(a,(getPos q,err))
10820 in skipReference (c,a1,q)
10822 else let val ((id,ent),(a1,q1)) = parseParRef dtd caq
10824 of PE_NULL => getChar (a1,q1)
10825 | PE_INTERN (_,rep) => getChar(a1,(pushIntern(q1,id,true,rep)))
10826 | PE_EXTERN extId => #3(openExtern(id,true,resolveExtId extId) (a1,q1))
10827 handle CantOpenFile(fmsg,a)
10828 => let val err = ERR_NO_SUCH_FILE fmsg
10829 val a1 = hookError(a,(getPos q1,err))
10830 in (getChar(a1,q1))
10833 handle SyntaxError caq => caq
10834 | NoSuchEntity aq => getChar aq
10835 (*--------------------------------------------------------------------*)
10836 (* parse optional white space. *)
10838 (* catch SyntaxState exceptions from parameter refs. *)
10840 (* print an error if a parameter entity reference or an entity end is *)
10841 (* found inside the internal subset. *)
10843 (* return the following character and the remaining state. *)
10844 (*--------------------------------------------------------------------*)
10845 fun skipPSopt dtd caq =
10846 let fun doit (c,a,q) =
10849 if isSpecial q then (c,a,q)
10850 else let val a1 = if !O_VALIDATE andalso inDocEntity q
10851 then hookError(a,(getPos q,ERR_EE_INT_SUBSET))
10853 in doit (getChar (a1,q))
10855 | 0wx09 => doit (getChar (a,q))
10856 | 0wx0A => doit (getChar (a,q))
10857 | 0wx20 => doit (getChar (a,q))
10858 | 0wx25 (* #"%" *) => doit (doParRef dtd (getChar (a,q)))
10862 (*--------------------------------------------------------------------*)
10863 (* parse optional white space. *)
10865 (* catch SyntaxState exceptions from parameter refs. *)
10867 (* print an error if a parameter entity reference or an entity end is *)
10868 (* found inside the internal subset. *)
10870 (* return a boolean whether white space was actually found, and the *)
10871 (* following character with the remaining state. *)
10872 (*--------------------------------------------------------------------*)
10873 fun skipPSmay dtd (c,a,q) =
10876 if isSpecial q then (false,(c,a,q))
10877 else let val a1 = if !O_VALIDATE andalso inDocEntity q
10878 then hookError(a,(getPos q,ERR_EE_INT_SUBSET))
10880 in (true,skipPSopt dtd (getChar (a1,q)))
10882 | 0wx09 => (true,skipPSopt dtd (getChar (a,q)))
10883 | 0wx0A => (true,skipPSopt dtd (getChar (a,q)))
10884 | 0wx20 => (true,skipPSopt dtd (getChar (a,q)))
10885 | 0wx25 (* #"%" *) => (true,skipPSopt dtd (doParRef dtd (getChar (a,q))))
10886 | _ => (false,(c,a,q))
10887 (*--------------------------------------------------------------------*)
10888 (* parse required white space. *)
10890 (* catch SyntaxState exceptions from parameter refs. *)
10892 (* print an error and return if no white space character is found. *)
10893 (* print an error if a parameter entity reference or an entity end is *)
10894 (* found inside the internal subset. *)
10896 (* return the following character and the remaining state. *)
10897 (*--------------------------------------------------------------------*)
10898 fun skipPS dtd (c,a,q) =
10901 if isSpecial q then (c,hookError(a,(getPos q,ERR_MISSING_WHITE)),q)
10902 else let val a1 = if !O_VALIDATE andalso inDocEntity q
10903 then hookError(a,(getPos q,ERR_EE_INT_SUBSET))
10905 in skipPSopt dtd (getChar (a1,q))
10907 | 0wx09 => skipPSopt dtd (getChar (a,q))
10908 | 0wx0A => skipPSopt dtd (getChar (a,q))
10909 | 0wx20 => skipPSopt dtd (getChar (a,q))
10910 | 0wx25 (* #"%" *) => skipPSopt dtd (doParRef dtd (getChar (a,q)))
10911 | _ => (c,hookError(a,(getPos q,ERR_MISSING_WHITE)),q)
10912 (*--------------------------------------------------------------------*)
10913 (* parse required white space, taking care of a single '%' character. *)
10914 (* this is only needed before the entity name in an entity decl. *)
10916 (* catch SyntaxState exceptions from parameter refs. *)
10918 (* print an error if no white space character is found. *)
10919 (* print an error if a parameter entity reference or an entity end is *)
10920 (* found inside the internal subset. *)
10922 (* return a boolean whether a '%' was found, the following character *)
10923 (* and the remaining state. *)
10924 (*--------------------------------------------------------------------*)
10925 fun skipPSdec dtd caq =
10926 let fun doit req (c,a,q) =
10929 if isSpecial q then (false,(c,a,q))
10930 else let val a1 = if !O_VALIDATE andalso inDocEntity q
10931 then hookError(a,(getPos q,ERR_EE_INT_SUBSET))
10933 in doit false (getChar (a1,q))
10935 | 0wx09 => doit false (getChar (a,q))
10936 | 0wx0A => doit false (getChar (a,q))
10937 | 0wx20 => doit false (getChar (a,q))
10938 | 0wx25 => (* #"%" *)
10939 let val (c1,a1,q1) = getChar (a,q)
10940 in if isNms c1 then doit false (doParRef dtd (c1,a1,q1))
10941 else let val a2 = if req then hookError(a1,(getPos q,ERR_MISSING_WHITE))
10943 in (true,(c1,a2,q1))
10946 | _ => let val a1 = if req then hookError(a,(getPos q,ERR_MISSING_WHITE))
10948 in (false,(c,a1,q))
10954 (* stop of ../../Parser/Parse/parseRefs.sml *)
10955 (* start of ../../Parser/Parse/parseLiterals.sml *)
10956 signature ParseLiterals =
10958 (*----------------------------------------------------------------------
10961 val parseName : UniChar.Char * AppData * State
10962 -> UniChar.Data * (UniChar.Char * AppData * State)
10963 val parseNmtoken : UniChar.Char * AppData * State
10964 -> UniChar.Data * (UniChar.Char * AppData * State)
10966 val parseComment : Errors.Position -> AppData * State -> (UniChar.Char * AppData * State)
10967 val parseProcInstr : Errors.Position -> AppData * State -> (UniChar.Char * AppData * State)
10968 val skipS : UniChar.Char * AppData * State -> UniChar.Char * AppData * State
10969 val skipSopt : UniChar.Char * AppData * State -> UniChar.Char * AppData * State
10970 val skipSmay : UniChar.Char * AppData * State -> bool * (UniChar.Char * AppData * State)
10971 val parseSopt : UniChar.Data -> UniChar.Char * AppData * State
10972 -> UniChar.Data * (UniChar.Char * AppData * State)
10973 val parseSmay : UniChar.Data -> UniChar.Char * AppData * State
10974 -> bool * (UniChar.Data * (UniChar.Char * AppData * State))
10975 val parseEq : UniChar.Char * AppData * State
10976 -> UniChar.Data * (UniChar.Char * AppData * State)
10978 val openExtern : int * Uri.Uri -> AppData * State
10979 -> Encoding.Encoding * HookData.TextDecl option * (UniChar.Char * AppData * State)
10980 val openDocument : Uri.Uri option -> AppData
10981 -> Encoding.Encoding * HookData.XmlDecl option * (UniChar.Char * AppData * State)
10982 val openSubset : Uri.Uri -> AppData
10983 -> Encoding.Encoding * HookData.TextDecl option * (UniChar.Char * AppData * State)
10985 val skipCharRef : AppData * State -> (UniChar.Char * AppData * State)
10986 val skipReference : UniChar.Char * AppData * State -> (UniChar.Char * AppData * State)
10987 val parseGenRef : Dtd -> UniChar.Char * AppData * State
10988 -> (int * Base.GenEntity) * (AppData * State)
10989 val parseParRef : Dtd -> UniChar.Char * AppData * State
10990 -> (int * Base.ParEntity) * (AppData * State)
10991 val parseCharRefLit : UniChar.Data -> AppData * State
10992 -> UniChar.Data * (UniChar.Char * AppData * State)
10993 val skipPS : Dtd -> UniChar.Char * AppData * State
10994 -> UniChar.Char * AppData * State
10995 val skipPSopt : Dtd -> UniChar.Char * AppData * State
10996 -> UniChar.Char * AppData * State
10997 val skipPSmay : Dtd -> UniChar.Char * AppData * State
10998 -> bool * (UniChar.Char * AppData * State)
10999 val skipPSdec : Dtd -> UniChar.Char * AppData * State
11000 -> bool * (UniChar.Char * AppData * State)
11001 ----------------------------------------------------------------------*)
11004 val parseSystemLiteral : UniChar.Char * AppData * State
11005 -> Uri.Uri * UniChar.Char * (UniChar.Char * AppData * State)
11006 val parsePubidLiteral : UniChar.Char * AppData * State
11007 -> string * UniChar.Char * (UniChar.Char * AppData * State)
11009 val parseAttValue : Dtd -> UniChar.Char * AppData * State
11010 -> UniChar.Vector * UniChar.Data * (UniChar.Char * AppData * State)
11011 val parseEntityValue : Dtd -> (UniChar.Vector * UniChar.Vector -> 'a)
11012 -> UniChar.Char * AppData * State
11013 -> 'a * (UniChar.Char * AppData * State)
11016 (*--------------------------------------------------------------------------*)
11017 (* Structure: ParseLiterals *)
11019 (* Exceptions raised by functions in this structure: *)
11020 (* parseSystemLiteral : NotFound *)
11021 (* parsePubidLiteral : NotFound *)
11022 (* parseAttValue : NotFound *)
11023 (* parseEntityValue : NotFound *)
11024 (*--------------------------------------------------------------------------*)
11025 functor ParseLiterals (structure ParseBase : ParseBase)
11028 structure ParseRefs = ParseRefs (structure ParseBase = ParseBase)
11031 Base UniChar Errors UniClasses Uri
11034 val THIS_MODULE = "ParseLiterals
"
11036 (*--------------------------------------------------------------------*)
11037 (* parse a system literal, the quote character ("'" or '"') already
--*)
11038 (* read
and passed
as first argument
. cf
. 2.3: *)
11040 (* ... Note that a SystemLiteral can be parsed without scanning
*)
11043 (* [11] SystemLiteral
::= ('"' [^"]* '"') | ("'" [^']* "'") *)
11045 (* print an error and end the literal if an entity end is found. *)
11047 (* return the literal as a string together with the next character *)
11048 (* and remaining state. *)
11049 (*--------------------------------------------------------------------*)
11050 (* might raise: none *)
11051 (*--------------------------------------------------------------------*)
11052 fun parseSystemLiteral' quote aq =
11054 fun doit text (c,a,q) =
11055 if c=quote then (text,getChar (a,q))
11057 then let val a1 = hookError(a,(getPos q,ERR_ENDED_BY_EE LOC_SYS_LIT))
11060 else if c>0wx7F andalso !O_WARN_NON_ASCII_URI
11061 then let val a1 = hookWarning(a,(getPos q,WARN_NON_ASCII_URI c))
11062 in doit (c::text) (getChar(a1,q))
11064 else doit (c::text) (getChar(a,q))
11066 val (text,caq1) = doit nil (getChar aq)
11068 (Data2Uri(rev text),quote,caq1)
11070 (*--------------------------------------------------------------------*)
11071 (* parse a system literal. *)
11073 (* [11] SystemLiteral ::= ('"' [^
"]* '"') |
("'" [^
']* "'") *)
11075 (* raise NotFound
if neither
'"' nor "'" comes first. *)
11077 (* return the literal as a string together with the next character *)
11078 (* and remaining state. *)
11079 (*--------------------------------------------------------------------*)
11080 (* might raise: NotFound *)
11081 (*--------------------------------------------------------------------*)
11082 fun parseSystemLiteral (c,a,q) =
11083 if c=0wx22 (* "'" *) orelse
11085 then parseSystemLiteral
' c (a
,q
)
11086 else raise NotFound (c
,a
,q
)
11088 (*--------------------------------------------------------------------*)
11089 (* parse a pubid literal
, the quote
character ("'" or
'"') already ---*)
11090 (* read and passed as first argument. cf. 2.3: *)
11092 (* [12] PubidLiteral ::= '"' PubidChar
* '"' *)
11093 (* | "'" (PubidChar - "'")* "'" *)
11095 (* print an error and end the literal if an entity end is found. *)
11096 (* print an error if a non-pubid character is found. *)
11098 (* return the literal as a string together with the next character *)
11099 (* and remaining state. *)
11100 (*--------------------------------------------------------------------*)
11101 (* might raise: none *)
11102 (*--------------------------------------------------------------------*)
11103 fun parsePubidLiteral' quote aq =
11105 fun doit (hadSpace,atStart,text) aq =
11106 let val (c1,a1,q1) = getChar aq
11108 of 0wx00 => let val a2 = hookError(a1,(getPos q1,ERR_ENDED_BY_EE LOC_PUB_LIT))
11109 in (text,(c1,a2,q1))
11111 | 0wx0A => doit (true,atStart,text) (a1,q1)
11112 | 0wx20 => doit (true,atStart,text) (a1,q1)
11114 if c1=quote then (text,getChar (a1,q1))
11115 else if not (isPubid c1)
11116 then let val err = ERR_FORBIDDEN_HERE(IT_CHAR c1,LOC_PUB_LIT)
11117 val a2 = hookError(a1,(getPos q1,err))
11118 in doit (hadSpace,atStart,text) (a2,q1)
11120 else if hadSpace andalso not atStart
11121 then doit (false,false,c1::0wx20::text) (a1,q1)
11122 else doit (false,false,c1::text) (a1,q1)
11124 val (text,caq1) = doit (false,true,nil) aq
11126 (Latin2String(rev text),quote,caq1)
11128 (*--------------------------------------------------------------------*)
11129 (* parse a pubid literal. *)
11131 (* [12] PubidLiteral ::= '"' PubidChar
* '"' *)
11132 (* | "'" (PubidChar - "'")* "'" *)
11134 (* raise NotFound if neither '"' nor
"'" comes first
. *)
11136 (* return the literal
as a
string together
with the next character
*)
11137 (* and remaining state
. *)
11138 (*--------------------------------------------------------------------*)
11139 (* might
raise: NotFound
*)
11140 (*--------------------------------------------------------------------*)
11141 fun parsePubidLiteral (c
,a
,q
) =
11142 if c
=0wx22 (* "'" *) orelse
11144 then parsePubidLiteral' c (a,q)
11145 else raise NotFound (c,a,q)
11147 (*--------------------------------------------------------------------*)
11148 (* parse an entity value and the quote character ("'" or '"') passed
*)
11149 (* as first argument
. Cf
. 2.3: *)
11151 (* [9] EntityValue
::= '"' ([^%&"] | PEReference | Reference
)* '"'*)
11152 (* | "'" ([^%&'] | PEReference | Reference)* "'"*)
11153 (* See also 4.4.5: *)
11155 (* When ... a parameter entity reference appears in a literal *)
11156 (* entity value, its replacement text is processed in place of the *)
11157 (* reference itself as though it were part of the document at the *)
11158 (* location the reference was recognized, except that a single or *)
11159 (* double quote character in the replacement text is always treated *)
11160 (* as a normal data character and will not terminate the literal. *)
11164 (* When a general entity reference appears in the EntityValue in an *)
11165 (* entity declaration, it is bypassed and left as is. *)
11167 (* A bypassed entity ref must, however, be checked for syntactic *)
11168 (* validity, as opposed to SGML, where it is not even recognized. *)
11170 (* print an error and end the literal if an entity end is found at *)
11171 (* the toplevel. *)
11172 (* print an error if a general entity reference is ill-formed. *)
11174 (* handle any errors in references by ignoring them syntactically. *)
11176 (* return argument con applied to the entity value as a char buffer, *)
11177 (* and the remaining char and state. *)
11178 (*--------------------------------------------------------------------*)
11179 (* might raise: none *)
11180 (*--------------------------------------------------------------------*)
11181 fun parseEntityValue' dtd (quote,con) aq =
11182 let fun doit (level,hadCr,lit,text) (c1,a1,q1) =
11184 of 0wx00 => if level=0 then let val err = ERR_ENDED_BY_EE LOC_ENT_VALUE
11185 val a2 = hookError(a1,(getPos q1,err))
11186 in (lit,text,(c1,a2,q1))
11188 else doit (level-1,false,lit,text) (getChar (a1,q1))
11189 | 0wx25 => (* #"%" *)
11190 let val (level1,lit1,caq2) =
11192 then let val err = ERR_FORBIDDEN_HERE(IT_PAR_REF,LOC_INT_DECL)
11193 val a2 = hookError(a1,(getPos q1,err))
11194 in (level,lit,skipReference (getChar(a2,q1)))
11197 let val (lit1,((id,ent),(a2,q2))) =
11198 if level=0 then parseParRefLit dtd (c1::lit) (getChar(a1,q1))
11199 else (lit,parseParRef dtd (getChar(a1,q1)))
11201 of PE_NULL => (level,lit1,getChar(a2,q2))
11202 | PE_INTERN(_,rep) =>
11203 let val q3 = pushIntern(q2,id,true,rep)
11204 in (level+1,lit1,getChar(a2,q3))
11206 | PE_EXTERN extId =>
11208 val fname = resolveExtId extId
11209 val caq3 = #3(openExtern (id,true,fname) (a2,q2))
11210 in (level+1,lit1,caq3)
11211 end handle CantOpenFile(fmsg,a)
11212 => let val err = ERR_NO_SUCH_FILE fmsg
11213 val a1 = hookError(a,(getPos q1,err))
11214 in (level,lit1,getChar(a1,q1))
11216 end (* ignore syntax errors in references *)
11217 handle SyntaxError caq => (level,lit,caq)
11218 | NoSuchEntity aq => (level,lit,getChar aq)
11219 in doit (level1,false,lit1,text) caq2
11221 | 0wx26 => (* #"&" *)
11222 let val (c2,a2,q2) = getChar (a1,q1)
11223 in (if c2=0wx23 (* #"#
" *)
11224 (*--------------------------------------------------*)
11225 (* it's a character reference. *)
11226 (*--------------------------------------------------*)
11229 let val (lit3,(ch,a3,q3)) =
11230 parseCharRefLit (c2::c1::lit) (a2,q2)
11231 in doit (level,false,lit3,ch::text) (getChar(a3,q3))
11233 else let val (ch,a3,q3) = parseCharRef (a2,q2)
11234 in doit (level,false,lit,ch::text) (getChar(a3,q3))
11236 (* ignore errors in char references *)
11237 handle SyntaxError caq => doit (level,false,lit,text) caq
11238 | NoSuchChar aq => doit (level,false,lit,text) (getChar aq)
11239 (*-----------------------------------------------------*)
11240 (* it's a general entity reference. *)
11241 (*-----------------------------------------------------*)
11243 val (fnd,lit3,text3,(c3,a3,q3)) =
11244 parseEntName (c1::lit,c1::text) (c2,a2,q2)
11245 val (lit4,text4,caq4) =
11246 if not fnd then (lit,text,(c3,a3,q3))
11247 else if c3=0wx3B (* #";" *)
11248 then (c3::lit3,c3::text3,(getChar(a3,q3)))
11249 else let val err = ERR_EXPECTED(expSemi,[c3])
11250 val a4 = hookError(a3,(getPos q3,err))
11251 in (lit,text,(c3,a4,q3))
11253 in doit (level,false,lit4,text4) caq4
11257 | 0wx0A => doit (level,false,if level=0 then c1::lit else lit,
11258 if hadCr then text else c1::text) (getChar (a1,q1))
11259 | 0wx0D => doit (level,true,if level=0 then c1::lit else lit,0wx0A::text)
11261 | _ => if c1=quote andalso level=0 then (lit,text,getChar(a1,q1))
11262 else doit (level,false,if level=0 then c1::lit else lit,c1::text)
11265 val (lit,text,caq1) = doit (0,false,nil,nil) (getChar aq)
11266 val literal = Data2Vector(quote::rev(quote::lit))
11267 val repText = Data2Vector(rev text)
11269 (con(literal,repText),caq1)
11271 (*--------------------------------------------------------------------*)
11272 (* parse an entity value. *)
11274 (* [9] EntityValue ::= '"' ([^
%&"] | PEReference | Reference)* '"'*)
11275 (* |
"'" ([^
%&'] | PEReference | Reference
)* "'"*)
11277 (* raise NotFound
if neither
'"' nor "'" comes first. *)
11279 (* return the entity value as a char buffer, and the remaining char *)
11281 (*--------------------------------------------------------------------*)
11282 (* might raise: NotFound *)
11283 (*--------------------------------------------------------------------*)
11284 fun parseEntityValue dtd con (c,a,q) =
11285 if c=0wx22 (* "'" *) orelse
11287 then parseEntityValue
' dtd (c
,con
) (a
,q
)
11288 else raise NotFound (c
,a
,q
)
11290 (*--------------------------------------------------------------------*)
11291 (* parse
and normalize an attribute value
, consume the final quote
*)
11292 (* character ("'" or
'""') passed
in the argument
. Cf
. 2.3: *)
11294 (* [10] AttValue
::= '"' ([^<&""] | Reference)* '"' *)
11295 (* |
"'" ([^
<&'] | Reference
)* "'" *)
11296 (* See also
4.4.5: *)
11298 (* When an entity reference appears
in an attribute value
..., *)
11299 (* its replacement text is processed
in place
of the reference
*)
11300 (* itself
as though it were part
of the document at the location
*)
11301 (* the reference was recognized
, except that a single or double
*)
11302 (* quote character
in the replacement text is always treated
as a
*)
11303 (* normal data character
and will not terminate the literal
. *)
11307 (* Before the value
of an attribute is passed to the application
*)
11308 (* or checked for validity
, the XML processor must normalize it
as *)
11311 (* * a character reference is processed by appending the referenced
*)
11312 (* character to the attribute value
*)
11313 (* * an entity reference is processed by recursively processing the
*)
11314 (* replacement text
of the entity
*)
11315 (* * a whitespace
character (#x20
, #xD
, #xA
, #x9
) is processed by
*)
11316 (* appending #x20 to the normalized value
, except that only a
*)
11317 (* single #x20 is appended for a
"#xD#xA" sequence that is part
*)
11318 (* of an external parsed entity or the literal entity value
of *)
11319 (* an internal parsed entity
*)
11320 (* * other characters are processed by appending them to the
*)
11321 (* normalized value
*)
11323 (* since #xD#xA are normalized by the
parseEntityValue (internal
) and *)
11324 (* getChar (external entities
), we don
't need to care about that
. *)
11325 (*--------------------------------------------------------------------*)
11326 (* print an error
and end the literal
if an entity
end is found
. *)
11327 (* print an error
if a general entity reference is ill
-formed
. *)
11328 (* print an error
if a reference to an external or unparsed entity is
*)
11330 (* print an error
if character
'<' appears literally
. *)
11332 (* handle any errors
in references by ignoring them syntactically
. *)
11333 (* raise NotFound
if neither
'"' nor "'" comes first. *)
11335 (* return the list of chars in the value, and the next char and state *)
11336 (*--------------------------------------------------------------------*)
11337 (* might raise: NotFound *)
11338 (*--------------------------------------------------------------------*)
11339 fun parseAttValue dtd (quote,a,q) =
11340 let fun doit (lhlt as (level,lit,text)) (c1,a1,q1) =
11342 of 0wx00 => if level=0 then let val err = ERR_ENDED_BY_EE LOC_ATT_VALUE
11343 val a2 = hookError(a1,(getPos q1,err))
11344 in (lit,text,(c1,a2,q1))
11346 else doit (level-1,lit,text) (getChar (a1,q1))
11347 | 0wx26 => (* #"&" *)
11349 val (c2,a2,q2) = getChar (a1,q1)
11350 val ((level1,lit1,text1),caq3) =
11351 (if c2=0wx23 (* #"#
" *)
11352 (*--------------------------------------------------*)
11353 (* it's a character reference. *)
11354 (*--------------------------------------------------*)
11357 let val (lit3,(ch,a3,q3)) =
11358 parseCharRefLit (c2::c1::lit) (a2,q2)
11359 in ((level,lit3,ch::text),getChar(a3,q3))
11361 else let val (ch,a3,q3) = parseCharRef (a2,q2)
11362 in ((level,lit,ch::text),getChar (a3,q3))
11364 (*-----------------------------------------------------*)
11365 (* it's a general entity reference. *)
11366 (*-----------------------------------------------------*)
11368 let val (lit3,((id,ent),(a3,q3))) =
11369 if level=0 then parseGenRefLit dtd (c1::lit) (c2,a2,q2)
11370 else (nil,parseGenRef dtd (c2,a2,q2))
11372 of GE_NULL => ((level,lit3,text),getChar(a3,q3))
11373 | GE_INTERN(_,rep) =>
11374 let val q4 = pushIntern(q3,id,false,rep)
11375 in ((level+1,lit3,text),getChar (a3,q4))
11378 let val err = ERR_ILLEGAL_ENTITY
11379 (ENT_EXTERNAL,Index2GenEnt dtd id,LOC_ATT_VALUE)
11380 val a4 = hookError(a3,(getPos q2,err))
11381 in ((level,lit,text),getChar (a4,q3))
11383 | GE_UNPARSED _ => raise InternalError
11384 (THIS_MODULE,"parseAttValue
'",
11385 "parseGenRef returned GE_UNPARSED
")
11387 (*------------------------------------------------------*)
11388 (* handle any errors in references by ignoring them. *)
11389 (*------------------------------------------------------*)
11390 handle SyntaxError caq => ((level,lit,text),caq)
11391 | NoSuchEntity aq => ((level,lit,text),getChar aq)
11392 | NoSuchChar aq => ((level,lit,text),getChar aq)
11393 in doit (level1,lit1,text1) caq3
11395 | 0wx3C => let val err = ERR_FORBIDDEN_HERE(IT_CHAR c1,LOC_ATT_VALUE)
11396 val a2 = hookError(a1,(getPos q1,err))
11397 val lit1 = if level=0 then c1::lit else lit
11398 in doit (level,lit1,c1::text) (getChar (a2,q1))
11400 | _ => if isS c1 then doit (level,if level=0 then c1::lit else lit,0wx20::text)
11402 else (if c1=quote andalso level=0 then (lit,text,getChar (a1,q1))
11403 else doit (level,if level=0 then c1::lit else lit,c1::text)
11407 val _ = if quote=0wx22 orelse quote=0wx27 (* "'",'"' *) then ()
11408 else raise NotFound (quote
,a
,q
)
11409 val (lit
,text
,caq1
) = doit (0,nil
,nil
) (getChar(a
,q
))
11411 (Data2Vector(quote
::rev(quote
::lit
)),rev text
,caq1
)
11414 (* stop
of ../../Parser
/Parse
/parseLiterals
.sml
*)
11415 (* start
of ../../Parser
/Parse
/parseTags
.sml
*)
11416 signature ParseTags
=
11418 (*----------------------------------------------------------------------
11421 val parseName
: UniChar
.Char * AppData
* State
11422 -> UniChar
.Data
* (UniChar
.Char * AppData
* State
)
11423 val parseNmtoken
: UniChar
.Char * AppData
* State
11424 -> UniChar
.Data
* (UniChar
.Char * AppData
* State
)
11426 val parseComment
: Errors
.Position
-> AppData
* State
-> (UniChar
.Char * AppData
* State
)
11427 val parseProcInstr
: Errors
.Position
-> AppData
* State
-> (UniChar
.Char * AppData
* State
)
11428 val skipS
: UniChar
.Char * AppData
* State
-> UniChar
.Char * AppData
* State
11429 val skipSopt
: UniChar
.Char * AppData
* State
-> UniChar
.Char * AppData
* State
11430 val skipSmay
: UniChar
.Char * AppData
* State
-> bool * (UniChar
.Char * AppData
* State
)
11432 val openExtern
: int * Uri
.Uri
-> AppData
* State
11433 -> Encoding
.Encoding
* HookData
.TextDecl option
* (UniChar
.Char * AppData
* State
)
11434 val openDocument
: Uri
.Uri option
-> AppData
11435 -> Encoding
.Encoding
* HookData
.XmlDecl option
* (UniChar
.Char * AppData
* State
)
11436 val openSubset
: Uri
.Uri
-> AppData
11437 -> Encoding
.Encoding
* HookData
.TextDecl option
* (UniChar
.Char * AppData
* State
)
11439 val skipCharRef
: AppData
* State
-> (UniChar
.Char * AppData
* State
)
11440 val skipReference
: UniChar
.Char * AppData
* State
-> (UniChar
.Char * AppData
* State
)
11441 val parseGenRef
: Dtd
-> UniChar
.Char * AppData
* State
11442 -> (int * Base
.GenEntity
) * (AppData
* State
)
11443 val parseParRef
: Dtd
-> UniChar
.Char * AppData
* State
11444 -> (int * Base
.ParEntity
) * (AppData
* State
)
11445 val parseCharRefLit
: UniChar
.Data
-> AppData
* State
11446 -> UniChar
.Data
* (UniChar
.Char * AppData
* State
)
11447 val skipPS
: Dtd
-> UniChar
.Char * AppData
* State
11448 -> UniChar
.Char * AppData
* State
11449 val skipPSopt
: Dtd
-> UniChar
.Char * AppData
* State
11450 -> UniChar
.Char * AppData
* State
11451 val skipPSmay
: Dtd
-> UniChar
.Char * AppData
* State
11452 -> bool * (UniChar
.Char * AppData
* State
)
11453 val skipPSdec
: Dtd
-> UniChar
.Char * AppData
* State
11454 -> bool * (UniChar
.Char * AppData
* State
)
11456 val parseSystemLiteral
: UniChar
.Char * AppData
* State
11457 -> Uri
.Uri
* UniChar
.Char * (UniChar
.Char * AppData
* State
)
11458 val parsePubidLiteral
: UniChar
.Char * AppData
* State
11459 -> string * UniChar
.Char * (UniChar
.Char * AppData
* State
)
11460 val parseAttValue
: Dtd
-> UniChar
.Char * AppData
* State
11461 -> UniChar
.Vector * UniChar
.Data
* (UniChar
.Char * AppData
* State
)
11462 val parseEntityValue
: Dtd
-> (UniChar
.Vector * UniChar
.Vector -> 'a
)
11463 -> UniChar
.Char * AppData
* State
11464 -> 'a
* (UniChar
.Char * AppData
* State
)
11465 ----------------------------------------------------------------------*)
11466 include ParseLiterals
11468 val skipTag
: Errors
.Location
-> AppData
* State
-> (UniChar
.Char * AppData
* State
)
11470 val parseETag
: Dtd
-> AppData
* State
11471 -> int * UniChar
.Data
* Errors
.Position
* (UniChar
.Char * AppData
* State
)
11472 val parseSTag
: Dtd
-> Errors
.Position
-> UniChar
.Char * AppData
* State
11473 -> (HookData
.StartTagInfo
* Base
.ElemInfo
) * (UniChar
.Char * AppData
* State
)
11476 (*--------------------------------------------------------------------------*)
11477 (* Structure
: ParseTags
*)
11479 (* Exceptions raised by functions
in this
structure: *)
11480 (* skipTag
: none
*)
11481 (* parseETag
: SyntaxState
*)
11482 (* parseSTag
: SyntaxState
*)
11483 (*--------------------------------------------------------------------------*)
11484 functor ParseTags (structure ParseBase
: ParseBase
)
11487 structure ParseLiterals
= ParseLiterals (structure ParseBase
= ParseBase
)
11491 Base Errors UniClasses
11494 (*--------------------------------------------------------------------*)
11495 (* parse an
end-tag
, the
"</" already read
. 3.1: *)
11497 (* [42] ETag
::= '</' Name S?
'>' *)
11499 (* and 3. states
: *)
11501 (* Validity Constraint
: Element Valid
*)
11502 (* An element is valid
if there is a declaration matching elementdecl
*)
11503 (* where the Name matches the element
type, and ... *)
11505 (* print an error
, recover
and raise SyntaxState
if no name is found
. *)
11506 (* print an error
and recover
if no
">" is found
. *)
11507 (* print an error
if the element is not declared
. *)
11509 (* return the index
of the element
, and the next char
and state
. *)
11510 (*--------------------------------------------------------------------*)
11511 (* might
raise: SyntaxState
*)
11512 (*--------------------------------------------------------------------*)
11513 fun parseETag dtd aq
=
11515 val caq0
as (_
,_
,q0
) = getChar aq
11516 val (elem
,(c1
,a1
,q1
)) = parseName caq0
11517 handle NotFound (c
,a
,q
) => let val err
= expectedOrEnded (expAName
,LOC_ETAG
) c
11518 val a1
= hookError(a
,(getPos q
,err
))
11519 val caq1
= recoverETag (c
,a1
,q
)
11520 in raise SyntaxError caq1
11522 val idx
= Element2Index dtd elem
11523 val elemInfo
as {decl
,...} = getElement dtd idx
11524 val a1
' = if isSome decl
then a1
11525 else let val a2
= if not (!O_VALIDATE
andalso hasDtd dtd
) then a1
11526 else let val err
= ERR_UNDECLARED(IT_ELEM
,elem
,LOC_ETAG
)
11527 val a1
' = hookError(a1
,(getPos q0
,err
))
11528 val _
= if not (!O_ERROR_MINIMIZE
) then ()
11529 else ignore (handleUndeclElement dtd idx
)
11532 in checkElemName (a2
,q0
) elem
11535 val (cs
,(c2
,a2
,q2
)) = parseSopt
nil (c1
,a1
',q1
)
11538 if c2
=0wx3E (* #
">" *) then (idx
,space
,getPos q2
,getChar(a2
,q2
))
11539 else let val err
= expectedOrEnded (expGt
,LOC_ETAG
) c2
11540 val a3
= hookError(a2
,(getPos q2
,err
))
11541 val caq3
= recoverETag(c2
,a3
,q2
)
11542 in (idx
,space
,getPos q2
,caq3
)
11546 (*--------------------------------------------------------------------*)
11547 (* parse a start
-tag or an empty
-element
-tag
, the
"<" already read
. *)
11550 (* [40] STag
::= '<' Name (S Attribute
)* S?
'>' *)
11551 (* [ WFC
: Unique Att Spec
] *)
11552 (* [41] Attribute
::= Name Eq AttValue
[ VC
: Attribute Value Type
] *)
11554 (* Well
-Formedness Constraint
: Unique Att Spec
*)
11555 (* No attribute name may appear more than once
in the same
*)
11556 (* start
-tag or empty
-element tag
. *)
11558 (* Validity Constraint
: Attribute Value Type
*)
11559 (* The attribute must have been declared
; the value must be
of the
*)
11560 (* type declared for it
. *)
11562 (* [44] EmptyElemTag
::= '<' Name (S Attribute
)* S?
'/>' *)
11563 (* [ WFC
: Unique Att Spec
] *)
11565 (* and 3. states
: *)
11567 (* Validity Constraint
: Element Valid
*)
11568 (* An element is valid
if there is a declaration matching elementdecl
*)
11569 (* where the Name matches the element
type, and ... *)
11571 (* catch entity
end exceptions
in subfunctions by printing an error
*)
11572 (* and re
-raising the
exception. *)
11574 (* print an error
, recover
and raise SyntaxState
if no element name
*)
11576 (* print an error
and recover
if no
">" or
"/>" is found
. *)
11577 (* print an error
and continue
if no
"=" is found after an att name
. *)
11578 (* print an error
and recover
if no literal is found after the
"=". *)
11579 (* print an error
if white space is missing
. *)
11580 (* print an error
if the element is not declared
. *)
11581 (* print an error
and ignore the attribute
if an attribute is
*)
11582 (* specified twice
. *)
11583 (* print an error
if an attribute is not declared
. *)
11585 (* return the index
of the element
, its ElemInfo
, the list
of *)
11586 (* AttSpecs (specified
and omitted atts
) and a boolean whether it was
*)
11587 (* an empty
-element
-tag
, together
with the next char
and state
. *)
11588 (*--------------------------------------------------------------------*)
11589 (* might
raise: SyntaxState
*)
11590 (*--------------------------------------------------------------------*)
11591 fun parseSTag dtd
startPos (caq
as (_
,_
,q
)) =
11593 val (elem
,(c1
,a1
,q1
)) = parseName caq
11594 handle NotFound (c
,a
,q
) => let val err
= expectedOrEnded (expAName
,LOC_STAG
) c
11595 val a1
= hookError(a
,(getPos q
,err
))
11596 val (_
,caq1
) = recoverSTag (c
,a1
,q
)
11597 in raise SyntaxError (c
,a1
,q
)
11599 val eidx
= Element2Index dtd elem
11600 val elemInfo
as {atts
,decl
,...} = getElement dtd eidx
11601 val defs
= case atts
11603 |
SOME (defs
,_
) => defs
11604 val (a1
',elemInfo
) =
11605 if isSome decl
then (a1
,elemInfo
)
11607 let val (a2
,newInfo
) =
11608 if not (!O_VALIDATE
andalso hasDtd dtd
) then (a1
,elemInfo
)
11609 else let val err
= ERR_UNDECLARED(IT_ELEM
,elem
,LOC_STAG
)
11610 val a1
' = hookError(a1
,(getPos q
,err
))
11611 val newInfo
= if not (!O_ERROR_MINIMIZE
) then elemInfo
11612 else handleUndeclElement dtd eidx
11615 in (checkElemName (a2
,q
) elem
,newInfo
)
11618 val hscaq2
= parseSmay
nil (c1
,a1
',q1
)
11620 (*--------------------------------------------------------------*)
11621 (* yet are the indices
of attributes encountered yet
, old are
*)
11622 (* the valid attributes specified yet
, and todo are the defs
of *)
11623 (* attributes yet to be specified
. hadS indicates whether white
*)
11624 (* space preceded
. *)
11625 (*--------------------------------------------------------------*)
11626 fun doit (yet
,old
,todo
) (hadS
,(sp
,(c
,a
,q
))) =
11628 of 0wx3E (* #
">" *) => (old
,todo
,sp
,false,q
,getChar(a
,q
))
11629 |
0wx2F (* #
"/" *) =>
11630 let val (c1
,a1
,q1
) = getChar(a
,q
)
11631 in if c1
=0wx3E (* #
">" *) then (old
,todo
,sp
,true,q1
,getChar(a1
,q1
))
11632 else let val err
= expectedOrEnded (expGt
,LOC_STAG
) c1
11633 val a2
= hookError(a1
,(getPos q1
,err
))
11634 val (mt
,caq2
) = recoverSTag (c1
,a2
,q1
)
11635 in (old
,todo
,sp
,mt
,q
,caq2
)
11640 then let val err
= expectedOrEnded (expAttSTagEnd
,LOC_STAG
) c
11641 val a1
= hookError(a
,(getPos q
,err
))
11642 val (mt
,caq1
) = recoverSTag (c
,a1
,q
)
11643 in (old
,todo
,sp
,mt
,q
,caq1
)
11646 let(* first parse the name
of the attribute
*)
11647 val (att
,(c1
,a1
,q1
)) = parseName (c
,a
,q
)
11648 val a2
= if hadS
then a1
11649 else hookError(a1
,(getPos q
,ERR_MISSING_WHITE
))
11651 (* now get its index
, check whether it already
*)
11652 (* occurred
and get its definition
. *)
11653 val aidx
= AttNot2Index dtd att
11656 then (true,hookError(a2
,(getPos q
,ERR_MULT_ATT_SPEC att
)))
11659 val (def
,rest
) = findAndDelete (fn (i
,_
,_
,_
) => i
=aidx
) todo
11660 val a4
= if isSome def
orelse hadIt
then a3
11661 else handleUndeclAtt
dtd (a3
,q
) (aidx
,att
,eidx
,elem
)
11663 (* consume the
" = ", ignore errors
*)
11664 val (eq
,caq5
as (_
,_
,q5
)) = parseEq (c1
,a4
,q1
)
11665 handle SyntaxError caq
=> ([0wx3D
],caq
)
11667 (* now parse the attribute value
*)
11668 val (literal
,value
,(c6
,a6
,q6
)) = parseAttValue dtd caq5
11670 (* possibly make a new AttSpec
*)
11673 if hadIt
then (old
,a6
)
11676 if !O_VALIDATE
andalso hasDtd dtd
then (old
,a6
)
11677 else (let val (attVal
,a7
) = checkAttValue
dtd (a6
,q5
)
11678 (defaultAttDef aidx
,literal
,value
)
11679 in ((aidx
,attVal
,SOME(space
,eq
))::old
,a7
)
11681 handle AttValue a
=> (old
,a
))
11683 let val (attVal
,a7
) = checkAttValue
dtd (a6
,q5
)
11685 in ((aidx
,attVal
,SOME(space
,eq
))::old
,a7
)
11687 handle AttValue a
=> (old
,a
)
11688 val hscaq8
= parseSmay
nil (c6
,a7
,q6
)
11690 doit (aidx
::yet
,new
,rest
) hscaq8
11692 handle NotFound (c
,a
,q
) (* raised by parseAttValue above
*)
11693 => let val err
= expectedOrEnded (expLitQuote
,LOC_STAG
) c
11694 val a1
= hookError(a
,(getPos q
,err
))
11695 val (mt
,caq1
) = recoverSTag (c
,a1
,q
)
11696 in (old
,todo
,sp
,mt
,q
,caq1
)
11699 val (specd
,todo
,sp
,empty
,qe
,(c3
,a3
,q3
)) = doit (nil
,nil
,defs
) hscaq2
11702 (* generate the defaults for unspecified attributes
*)
11703 val (all
,a4
) = genMissingAtts
dtd (a3
,qe
) (todo
,rev specd
)
11705 ((((startPos
,getPos q3
),eidx
,all
,space
,empty
),elemInfo
),(c3
,a4
,q3
))
11708 (*--------------------------------------------------------------------*)
11709 (* skip a tag
, the initial
"<" or
"</" already read
, the first arg
*)
11710 (* being a
string describing the tag
. *)
11711 (* don
't care about whether it is a start
- or
end-tag
. Ignore
">" and *)
11712 (* "/>" if within a literal
. *)
11714 (* print an error
and finish
if an entity
end is found
. *)
11716 (* return the remaining char
and state
. *)
11717 (*--------------------------------------------------------------------*)
11718 (* might
raise: none
*)
11719 (*--------------------------------------------------------------------*)
11720 fun skipTag loc aq
=
11722 fun do_lit
ch (c
,a
,q
) =
11723 if c
=0wx00
then let val a1
= hookError(a
,(getPos q
,ERR_ENDED_BY_EE loc
))
11726 else if c
=ch
then doit (getChar(a
,q
))
11727 else do_lit
ch (getChar(a
,q
))
11731 of 0wx00
=> let val a1
= hookError(a
,(getPos q
,ERR_ENDED_BY_EE loc
))
11734 |
0wx22 (* #
"\""*) => do_lit
c (getChar(a
,q
))
11735 |
0wx27 (* #
"'" *) => do_lit
c (getChar(a
,q
))
11736 |
0wx2F (* #
"/" *) => (case getChar(a
,q
)
11737 of (0wx3E
,a1
,q1
) (* #
">" *) => getChar(a1
,q1
)
11738 | caq1
=> doit caq1
)
11739 |
0wx3E (* #
">" *) => getChar(a
,q
)
11740 | _
=> doit(getChar(a
,q
))
11741 in doit (getChar aq
)
11745 (* stop
of ../../Parser
/Parse
/parseTags
.sml
*)
11746 (* start
of ../../Parser
/Parse
/parseDecl
.sml
*)
11747 signature ParseDecl
=
11749 (*----------------------------------------------------------------------
11752 val parseName
: UniChar
.Char * AppData
* State
11753 -> UniChar
.Data
* (UniChar
.Char * AppData
* State
)
11755 val parseComment
: Errors
.Position
-> AppData
* State
-> (UniChar
.Char * AppData
* State
)
11756 val parseProcInstr
: Errors
.Position
-> AppData
* State
-> (UniChar
.Char * AppData
* State
)
11757 val skipS
: UniChar
.Char * AppData
* State
-> UniChar
.Char * AppData
* State
11758 val skipSopt
: UniChar
.Char * AppData
* State
-> UniChar
.Char * AppData
* State
11759 val skipSmay
: UniChar
.Char * AppData
* State
-> bool * (UniChar
.Char * AppData
* State
)
11761 val openExtern
: int * Uri
.Uri
-> AppData
* State
11762 -> Encoding
.Encoding
* HookData
.TextDecl option
* (UniChar
.Char * AppData
* State
)
11763 val openDocument
: Uri
.Uri option
-> AppData
11764 -> Encoding
.Encoding
* HookData
.XmlDecl option
* (UniChar
.Char * AppData
* State
)
11765 val openSubset
: Uri
.Uri
-> AppData
11766 -> Encoding
.Encoding
* HookData
.TextDecl option
* (UniChar
.Char * AppData
* State
)
11768 val skipCharRef
: AppData
* State
-> (UniChar
.Char * AppData
* State
)
11769 val skipReference
: UniChar
.Char * AppData
* State
-> (UniChar
.Char * AppData
* State
)
11770 val parseGenRef
: Dtd
-> UniChar
.Char * AppData
* State
11771 -> (int * Base
.GenEntity
) * (AppData
* State
)
11772 val parseParRef
: Dtd
-> UniChar
.Char * AppData
* State
11773 -> (int * Base
.ParEntity
) * (AppData
* State
)
11774 val parseCharRefLit
: UniChar
.Data
-> AppData
* State
11775 -> UniChar
.Data
* (UniChar
.Char * AppData
* State
)
11776 val skipPSopt
: Dtd
-> UniChar
.Char * AppData
* State
11777 -> UniChar
.Char * AppData
* State
11779 val skipTag
: Errors
.Location
-> AppData
* State
-> (UniChar
.Char * AppData
* State
)
11780 val parseETag
: Dtd
-> AppData
* State
11781 -> int * UniChar
.Data
* Errors
.Position
* (UniChar
.Char * AppData
* State
)
11782 val parseSTag
: Dtd
-> Errors
.Position
-> UniChar
.Char * AppData
* State
11783 -> (HookData
.StartTagInfo
* Base
.ElemInfo
) * (UniChar
.Char * AppData
* State
)
11784 ----------------------------------------------------------------------*)
11787 val skipDecl
: bool -> UniChar
.Char * AppData
* State
-> UniChar
.Char * AppData
* State
11789 val parseExtIdSub
: Dtd
-> UniChar
.Char * AppData
* State
11790 -> Base
.ExternalId
* bool * (UniChar
.Char * AppData
* State
)
11792 val parseEntityDecl
: Dtd
-> EntId
* Errors
.Position
* bool
11793 -> UniChar
.Char * AppData
* State
-> UniChar
.Char * AppData
* State
11794 val parseElementDecl
: Dtd
-> EntId
* Errors
.Position
* bool
11795 -> UniChar
.Char * AppData
* State
-> UniChar
.Char * AppData
* State
11796 val parseNotationDecl
: Dtd
-> EntId
* Errors
.Position
* bool
11797 -> UniChar
.Char * AppData
* State
-> UniChar
.Char * AppData
* State
11798 val parseAttListDecl
: Dtd
-> EntId
* Errors
.Position
* bool
11799 -> UniChar
.Char * AppData
* State
-> UniChar
.Char * AppData
* State
11802 (*--------------------------------------------------------------------------*)
11803 (* Structure
: ParseDecl
*)
11805 (* Exceptions raised by functions
in this
structure: *)
11806 (* skipDecl
: none
*)
11807 (* parseExtIdSub
: NotFound SyntaxError
*)
11808 (* parseEntityDecl
: none
*)
11809 (* parseElementDecl
: none
*)
11810 (* parseNotationDecl
: none
*)
11811 (* parseAttListDecl
: none
*)
11812 (*--------------------------------------------------------------------------*)
11813 functor ParseDecl (structure ParseBase
: ParseBase
)
11816 structure ParseTags
= ParseTags (structure ParseBase
= ParseBase
)
11820 Base Errors HookData
11823 (*--------------------------------------------------------------------*)
11824 (* skip a markup declaration
, the initial
"<!" and name already read
. *)
11825 (* ignore
">" if within a literal
. yake care
of internal subset
if *)
11826 (* the first arg is
true. *)
11828 (* print an error
and finish
if an entity
end is found
. *)
11830 (* return the remaining char
and state
. *)
11831 (*--------------------------------------------------------------------*)
11832 (* might
raise: none
*)
11833 (*--------------------------------------------------------------------*)
11834 fun skipDecl hasSubset caq
=
11836 fun do_lit
ch (c
,a
,q
) =
11837 if c
=0wx00
then (c
,a
,q
)
11838 else if c
=ch
then getChar (a
,q
)
11839 else do_lit
ch (getChar(a
,q
))
11840 fun do_decl (c
,a
,q
) =
11842 of 0wx00
=> (c
,a
,q
)
11843 |
0wx22 (* #
"\""" *) => do_decl (do_lit c (getChar(a,q)))
11844 | 0wx27 (* #"'" *) => do_decl (do_lit c (getChar(a,q)))
11845 | 0wx3E (* #">" *) => getChar(a,q)
11846 | _ => do_decl (getChar(a,q))
11847 fun do_subset (c,a,q) =
11849 of 0wx00 => (c,a,q)
11850 | 0wx3C (* #"<" *) => do_subset (do_decl (getChar(a,q)))
11851 | 0wx5D (* #"]" *) => getChar(a,q)
11852 | _ => do_subset (getChar(a,q))
11855 of 0wx00 => (c,hookError(a,(getPos q,ERR_ENDED_BY_EE LOC_DECL)),q)
11856 | 0wx22 (* #"\"""*) => doit (do_lit
c (getChar(a
,q
)))
11857 |
0wx27 (* #
"'" *) => doit (do_lit
c (getChar(a
,q
)))
11858 |
0wx3E (* #
">" *) => getChar(a
,q
)
11859 |
0wx5B (* #
"[" *) => if hasSubset
then doit (do_subset (getChar(a
,q
)))
11860 else doit (getChar(a
,q
))
11861 | _
=> doit (getChar(a
,q
))
11865 (*--------------------------------------------------------------------*)
11866 (* parse an external id
, or a public id
if the first arg is
true. *)
11867 (* Cf
. 4.2.2 and 4.7: *)
11869 (* [75] ExternalID
::= 'SYSTEM
' S SystemLiteral
*)
11870 (* |
'PUBLIC
' S PubidLiteral S SystemLiteral
*)
11872 (* [83] PublicID
::= 'PUBLIC
' S PubidLiteral
*)
11874 (* raise NotFound
if no name is found first
. *)
11875 (* print an error
if white space is missing
. *)
11876 (* print an error
and raise SyntaxState
if a wrong name is found
. *)
11877 (* print an Error
and raise SyntaxState
if a required literal is not
*)
11878 (* found (depends on optSys
). *)
11880 (* return the public
and system identifiers
as string options
, *)
11881 (* a boolean
, whether whit space followed the external id
, *)
11882 (* and the next character
and the remaining state
. *)
11883 (*--------------------------------------------------------------------*)
11884 (* might
raise: NotFound SyntaxState
*)
11885 (*--------------------------------------------------------------------*)
11886 fun parseExternalId dtd
optSys (caq
as (_
,_
,q
))=
11888 (* do not
handle NotFound
: in this
case no extId was found
*)
11889 val (name
,caq1
) = parseName caq
11890 val caq2
as (_
,_
,q2
)= skipPS dtd caq1
11893 of [0wx50
,0wx55
,0wx42
,0wx4c
,0wx49
,0wx43
] => (* "PUBLIC" *)
11895 val (pub
,pquote
,caq3
) = parsePubidLiteral caq2
11896 handle NotFound (c
,a
,q
) => let val err
= ERR_EXPECTED(expLitQuote
,[c
])
11897 val a1
= hookError(a
,(getPos q
,err
))
11898 in raise SyntaxError (c
,a1
,q
)
11900 val (hadS
,caq4
as (_
,_
,q4
)) = skipPSmay dtd caq3
11902 val (sys
,squote
,(c5
,a5
,q5
)) = parseSystemLiteral caq4
11903 val base
= getUri q4
11904 val a6
= if hadS
then a5
else hookError(a5
,(getPos q4
,ERR_MISSING_WHITE
))
11905 val (hadS6
,caq6
) = skipPSmay
dtd (c5
,a6
,q5
)
11907 (EXTID(SOME(pub
,pquote
),SOME(base
,sys
,squote
)),hadS6
,caq6
)
11909 handle NotFound (c
,a
,q
) => (* no system id
*)
11910 if optSys
then (EXTID(SOME(pub
,pquote
),NONE
),hadS
,(c
,a
,q
))
11911 else let val a1
= hookError(a
,(getPos q
,ERR_EXPECTED(expLitQuote
,[c
])))
11912 in raise SyntaxError (c
,a1
,q
)
11916 |
[0wx53
,0wx59
,0wx53
,0wx54
,0wx45
,0wx4d
] => (* "SYSTEM" *)
11918 val (sys
,squote
,caq3
) = parseSystemLiteral caq2
11919 handle NotFound (c
,a
,q
) => let val err
= ERR_EXPECTED(expLitQuote
,[c
])
11920 val a1
= hookError(a
,(getPos q
,err
))
11921 in raise SyntaxError (c
,a1
,q
)
11923 val base
= getUri q2
11924 val (hadS
,caq4
) = skipPSmay dtd caq3
11926 (EXTID(NONE
,SOME(base
,sys
,squote
)),hadS
,caq4
)
11929 | _
=> let val (c2
,a2
,q2
) = caq2
11930 val a3
= hookError(a2
,(getPos q
,ERR_EXPECTED(expExtId
,name
)))
11931 in raise SyntaxError (c2
,a3
,q2
)
11934 (*--------------------------------------------------------------------*)
11935 (* parse an external id
in an entity definition
. Cf
. 4.2.2: *)
11937 (* print an Error
and raise SyntaxState
if no external id is found
. *)
11938 (*--------------------------------------------------------------------*)
11939 (* might
raise: SyntaxState
*)
11940 (*--------------------------------------------------------------------*)
11941 fun parseExtIdEnt dtd caq
= parseExternalId dtd
false caq
11942 handle NotFound (c
,a
,q
) => let val err
= ERR_EXPECTED(expLitQuotExt
,[c
])
11943 in raise SyntaxError (c
,hookError(a
,(getPos q
,err
)),q
)
11945 (*--------------------------------------------------------------------*)
11946 (* parse an external or public id
in a notation declaration
. *)
11948 (* print an Error
and raise SyntaxState
if neither external nor
*)
11949 (* public id is found
. *)
11950 (*--------------------------------------------------------------------*)
11951 (* might
raise: SyntaxState
*)
11952 (*--------------------------------------------------------------------*)
11953 fun parseExtIdNot dtd caq
= parseExternalId dtd
true caq
11954 handle NotFound (c
,a
,q
) => let val err
= ERR_EXPECTED(expExtId
,[c
])
11955 in raise SyntaxError (c
,hookError(a
,(getPos q
,err
)),q
)
11957 (*--------------------------------------------------------------------*)
11958 (* parse an external id for the external subset
. *)
11960 (* raise NotFound
if no external id is found
. *)
11961 (*--------------------------------------------------------------------*)
11962 (* might
raise: NotFound SyntaxState
*)
11963 (*--------------------------------------------------------------------*)
11964 fun parseExtIdSub dtd caq
= parseExternalId dtd
false caq
11966 (*--------------------------------------------------------------------*)
11967 (* parse a parameter entity declaration
, starting after the
'%'. The
*)
11968 (* unique entity id
of the initial
'<' is given
as first arg
. 4.2: *)
11970 (* [72] PEDecl
::= '<!ENTITY
' S
'%' S Name S PEDef S?
'>' *)
11971 (* [74] PEDef
::= EntityValue | ExternalID
*)
11973 (* (see also the comments for ParseDtd
.parseMarkupDecl
). *)
11975 (* print an error
if white space is missing
. *)
11976 (* print an error
and raise SyntaxState
if neither entity value nor
*)
11977 (* external identifier is found
. *)
11978 (* print an error
and raise SyntaxState
if the closing
'>' is missing
.*)
11979 (* print an error
if the
'>' is not
in the same entity
as the
'<!'. *)
11981 (* enter the declared entity into the entity table
. *)
11982 (* return the remaining char
and state
. *)
11983 (*--------------------------------------------------------------------*)
11984 (* might
raise: SyntaxState
*)
11985 (*--------------------------------------------------------------------*)
11986 fun parseParEntDecl
dtd (startEnt
,startPos
,ext
) caq
=
11988 val caq1
as (_
,_
,q1
) = skipPS dtd caq
11990 val (name
,caq2
) = parseName caq1
11991 handle NotFound (c
,a
,q
) => let val err
= ERR_EXPECTED(expAnEntName
,[c
])
11992 in raise SyntaxError (c
,hookError(a
,(getPos q
,err
)),q
)
11994 val idx
= ParEnt2Index dtd name
11995 val caq3
= skipPS dtd caq2
11997 val (ent
,(c4
,a4
,q4
)) =
11998 let val (ent
,caq4
) = parseEntityValue dtd PE_INTERN caq3
11999 val caq5
= skipPSopt dtd caq4
12002 handle NotFound caq
=>
12003 let val (extId
,_
,caq1
) = parseExtIdEnt dtd caq
12004 in (PE_EXTERN extId
,caq1
)
12007 val a5
= if useParamEnts() orelse not ext
then addParEnt
dtd (a4
,q1
) (idx
,ent
,ext
) else a4
12008 val a6
= hookDecl(a5
,((startPos
,getPos q4
),DEC_PAR_ENT(idx
,ent
,ext
)))
12010 if c4
<>0wx3E (* #
">" *)
12011 then let val a7
= hookError(a6
,(getPos q4
,ERR_EXPECTED(expGt
,[c4
])))
12012 in raise SyntaxError(c4
,a7
,q4
)
12014 else let val a7
= if not (!O_VALIDATE
) orelse getEntId q4
=startEnt
then a6
12015 else hookError(a6
,(getPos q4
,ERR_DECL_ENT_NESTING LOC_ENT_DECL
))
12020 (*--------------------------------------------------------------------*)
12021 (* parse a general entity declaration
, starting
with the name
. The
*)
12022 (* unique entity id
of the initial
'<' is given
as first arg
. 4.2: *)
12024 (* [71] GEDecl
::= '<!ENTITY
' S Name S EntityDef S?
'>' *)
12025 (* [73] EntityDef
::= EntityValue |
(ExternalID NDataDecl?
) *)
12027 (* [76] NDataDecl
::= S
'NDATA
' S Name
[ VC
: Notation
*)
12030 (* If the NDataDecl is present
, this is a general unparsed entity
; *)
12031 (* otherwise it is a parsed entity
. *)
12033 (* Validity Constraint
: Notation Declared
*)
12034 (* The Name must match the declared name
of a notation
. *)
12036 (* (see also the comments for ParseDtd
.parseMarkupDecl
). *)
12038 (* print an error
if white space is missing
. *)
12039 (* print an error
and raise SyntaxState
if neither entity value nor
*)
12040 (* external identifier is found
. *)
12041 (* print an error
if name other
then 'NDATA
' is found after ext
. id
. *)
12042 (* print an error
and raise SyntaxState
if no name is found after the
*)
12044 (* print an error
if the notation is not declared
. *)
12045 (* print an error
and raise SyntaxState
if the closing
'>' is missing
.*)
12046 (* print an error
if the
'>' is not
in the same entity
as the
'<!'. *)
12048 (* enter the declared entity into the entity table
. *)
12049 (* return the remaining char
and state
. *)
12050 (*--------------------------------------------------------------------*)
12051 (* might
raise: SyntaxState
*)
12052 (*--------------------------------------------------------------------*)
12053 fun parseGenEntDecl
dtd (startEnt
,startPos
,ext
) (caq
as (_
,_
,q
)) =
12055 val (name
,caq1
) = parseName caq
12056 handle NotFound (c
,a
,q
) => let val err
= ERR_EXPECTED(expEntNamePero
,[c
])
12057 in raise SyntaxError (c
,hookError(a
,(getPos q
,err
)),q
)
12059 val idx
= GenEnt2Index dtd name
12060 val caq2
= skipPS dtd caq1
12062 val (ent
,expEnd
,(c3
,a3
,q3
)) =
12063 (*-----------------------------------------------------------*)
12064 (* Try for an internal entity
. Then
'>' must follow
. *)
12065 (*-----------------------------------------------------------*)
12067 val (ent
,caq3
) = parseEntityValue dtd GE_INTERN caq2
12068 val caq4
= skipPSopt dtd caq3
12072 handle NotFound cq
=> (* raised by parseEntityValue
*)
12073 (*-----------------------------------------------------------*)
12074 (* Must be external
. First parse the external identifier
. *)
12075 (*-----------------------------------------------------------*)
12077 val (extId
,hadS
,caq1
as (_
,_
,q1
)) = parseExtIdEnt dtd caq2
12079 (*-----------------------------------------------------*)
12080 (* Does a name follow? Then is must be
'NDATA
' and the
*)
12081 (* notation name follows
. Thus the entity is unparsed
. *)
12082 (* Also
, only
'>' may come next
. *)
12083 (* NotFound is handled at the
end of the
let. *)
12084 (*-----------------------------------------------------*)
12085 val (key
,(c2
,a2
,q2
)) = parseName caq1
12086 val a3
= if hadS
then a2
else hookError(a2
,(getPos q1
,ERR_MISSING_WHITE
))
12087 val a4
= if key
= [0wx4e
,0wx44
,0wx41
,0wx54
,0wx41
] (* "NDATA" *) then a3
12088 else hookError(a3
,(getPos q1
,ERR_EXPECTED(expGtNdata
,key
)))
12090 val caq5
as (_
,_
,q5
) = skipPS
dtd (c2
,a4
,q2
)
12092 val (not
,caq6
) = parseName caq5
12093 handle NotFound (c
,a
,q
) => let val err
= ERR_EXPECTED(expANotName
,[c
])
12094 val a1
= hookError(a
,(getPos q
,err
))
12095 in raise SyntaxError (c
,a1
,q
)
12097 val notIdx
= AttNot2Index dtd not
12098 val caq7
= skipPSopt dtd caq6
12100 (GE_UNPARSED(extId
,notIdx
,getPos q5
),expGt
,caq7
)
12102 handle NotFound caq
=>
12103 (*--------------------------------------------------------*)
12104 (* No
'NDATA
' present
, so it
's parsed external entity
. *)
12105 (* A
'NDATA
' might have followed
. *)
12106 (*--------------------------------------------------------*)
12107 (GE_EXTERN extId
,expGtNdata
,caq
)
12110 val a4
= if useParamEnts() orelse not ext
then addGenEnt
dtd (a3
,q
) (idx
,ent
,ext
) else a3
12111 val a5
= hookDecl(a4
,((startPos
,getPos q3
),DEC_GEN_ENT(idx
,ent
,ext
)))
12113 if c3
<>0wx3E (* #
">" *)
12114 then let val a6
= hookError(a5
,(getPos q3
,ERR_EXPECTED(expGt
,[c3
])))
12115 in raise SyntaxError(c3
,a6
,q3
)
12117 else let val a6
= if not (!O_VALIDATE
) orelse getEntId q3
=startEnt
then a5
12118 else hookError(a5
,(getPos q3
,ERR_DECL_ENT_NESTING LOC_ENT_DECL
))
12123 (*--------------------------------------------------------------------*)
12124 (* parse an entity declaration
, the initial
'<!ENTITY
' already read
. *)
12125 (* The unique entity id
of the initial
'<' is given
as 1st arg
. 4.2: *)
12127 (* [70] EntityDecl
::= GEDecl | PEDecl
*)
12128 (* [71] GEDecl
::= '<!ENTITY
' S Name S EntityDef S?
'>' *)
12129 (* [72] PEDecl
::= '<!ENTITY
' S
'%' S Name S PEDef S?
'>' *)
12131 (* (see also the comments for ParseDtd
.parseMarkupDecl
). *)
12133 (* raise SyntaxState
in case of a syntax error
. *)
12134 (* print an error
if white space is missing
. *)
12136 (* print an error for entity
end exceptions
in subfunctions
. *)
12137 (* catch syntax errors by recovering to the next possible state
. *)
12139 (* pass control to parseParEntDecl or parseGenEntDecl
, depending on
*)
12140 (* whether the S is followed by a
'%'. *)
12141 (* return the remaining char
and state
. *)
12142 (*--------------------------------------------------------------------*)
12143 (* might
raise: none
*)
12144 (*--------------------------------------------------------------------*)
12145 fun parseEntityDecl dtd pars caq
=
12147 val (hadPero
,caq1
) = skipPSdec dtd caq
12149 if hadPero
then parseParEntDecl dtd pars caq1
12150 else parseGenEntDecl dtd pars caq1
12152 handle exn
as SyntaxError (c
,a
,q
) =>
12153 let val a1
= if c
=0wx00
then hookError(a
,(getPos q
,ERR_ENDED_BY_EE LOC_ENT_DECL
))
12155 in recoverDecl
false (c
,a1
,q
)
12158 (*--------------------------------------------------------------------*)
12159 (* parse a notation declaration
, the initial
'<!NOTATION
' already
*)
12160 (* read
. The unique entity id
of the
'<!' is given
as first arg
. 4.7: *)
12162 (* [82] NotationDecl
::= '<!NOTATION
' S Name S
*)
12163 (* (ExternalID | PublicID
) S?
'>' *)
12165 (* (see also the comments for ParseDtd
.parseMarkupDecl
). *)
12167 (* print an error
and raise SyntaxState
if no notation name
, no
*)
12168 (* external
/public identifier or no final
'>' is found
. *)
12169 (* print an error
if the
'>' is not
in the same entity
as the
'<!'. *)
12170 (* print an error
if white space is missing
. *)
12172 (* print an error for entity
end exceptions
in subfunctions
. *)
12173 (* catch syntax errors by recovering to the next possible state
. *)
12175 (* enter the declared notation into the notation table
. *)
12176 (* return the remaining char
and state
. *)
12177 (*--------------------------------------------------------------------*)
12178 (* might
raise: none
*)
12179 (*--------------------------------------------------------------------*)
12180 fun parseNotationDecl
dtd (startEnt
,startPos
,ext
) caq
=
12182 val caq1
as (_
,_
,q1
) = skipPS dtd caq
12183 val (name
,caq2
) = parseName caq1
12184 handle NotFound (c
,a
,q
) => let val err
= ERR_EXPECTED(expANotName
,[c
])
12185 in raise SyntaxError (c
,hookError(a
,(getPos q
,err
)),q
)
12187 val idx
= AttNot2Index dtd name
12188 val caq3
= skipPS dtd caq2
12190 val (extId
,_
,(c4
,a4
,q4
)) = parseExtIdNot dtd caq3
12192 val a5
= if useParamEnts() orelse not ext
then addNotation
dtd (a4
,q1
) (idx
,extId
) else a4
12193 val a6
= hookDecl(a5
,((startPos
,getPos q4
),DEC_NOTATION(idx
,extId
,ext
)))
12195 if c4
<>0wx3E (* #
">" *)
12196 then let val a7
= hookError(a6
,(getPos q4
,ERR_EXPECTED(expGt
,[c4
])))
12197 in raise SyntaxError (c4
,a7
,q4
)
12199 else let val a7
= if not (!O_VALIDATE
) orelse getEntId q4
=startEnt
then a6
12200 else hookError(a6
,(getPos q4
,ERR_DECL_ENT_NESTING LOC_NOT_DECL
))
12204 handle exn
as SyntaxError(c
,a
,q
) =>
12205 let val a1
= if c
=0wx00
then hookError(a
,(getPos q
,ERR_ENDED_BY_EE LOC_NOT_DECL
))
12207 in recoverDecl
false (c
,a1
,q
)
12210 (*--------------------------------------------------------------------*)
12211 (* parse a mixed
-content specification
, the initial
'(', S?
and '#
' *)
12212 (* already read
. The unique id
of the openening paren
's entity is
*)
12213 (* given
as first arg
. Cf
. 3.2.1/2: *)
12215 (* Validity Constraint
: Proper Group
/PE Nesting
*)
12216 (* Parameter
-entity replacement text must be properly nested
with *)
12217 (* parenthetized groups
. That is to say
, if either
of the opening
*)
12218 (* or closing parentheses
in a choice
, seq
, or Mixed construct is
*)
12219 (* contained
in the replacement text for a parameter entity
, both
*)
12220 (* must be contained
in the same replacement text
. *)
12222 (* [51] Mixed
::= '(' S?
'#PCDATA
' [ VC
: Proper Group
/PE
*)
12223 (* (S?
'|
' S? Name
)* S?
')*' Nesting
] *)
12224 (* |
'(' S?
'#PCDATA
' S?
')' [ VC
: No Duplicate
*)
12227 (* print an error
and raise SyntaxState
if no name is found first
. *)
12228 (* print an error
if a name other than
'PCDATA
' is found
. *)
12229 (* is found
in the first place
. *)
12230 (* print an error
if element names are specified but no
'*' follows
. *)
12231 (* print an error
if an element name is specified more than once
. *)
12232 (* print an error
and raise SyntaxState
if neither
'|
' nor
')' is
*)
12233 (* found after the
'PCDATA
' or after an element name
. *)
12234 (* print an error
if the closing parenthesis is not
in the same
*)
12235 (* as the opening one
. *)
12237 (* return the mixed
-content specification
, togther
with the next
*)
12238 (* character
and state
. *)
12239 (*--------------------------------------------------------------------*)
12240 (* might
raise: SyntaxState
*)
12241 (*--------------------------------------------------------------------*)
12242 fun parseMixed dtd
lparEnt (caq
as (_
,_
,q
)) =
12244 fun doit
is (c
,a
,q
) =
12246 of 0wx29 (* #
")" *) =>
12247 let val a1
= if not (!O_VALIDATE
) orelse getEntId q
=lparEnt
then a
12248 else hookError(a
,(getPos q
,ERR_GROUP_ENT_NESTING LOC_MIXED
))
12249 in (rev is
,getChar(a1
,q
))
12251 |
0wx7C (* #
"|" *) =>
12253 val caq1
as (_
,_
,q1
) = skipPSopt
dtd (getChar(a
,q
))
12255 val (name
,(c2
,a2
,q2
)) = parseName caq1
12256 handle NotFound (c
,a
,q
) => let val err
= ERR_EXPECTED(expAName
,[c
])
12257 val a1
= hookError(a
,(getPos q
,err
))
12258 in raise SyntaxError (c
,a1
,q
)
12260 val i
= Element2Index dtd name
12262 if not (member i is
) then (i
::is
,a2
)
12263 else let val a3
= if !O_VALIDATE
12264 then hookError(a2
,(getPos q1
,ERR_MULT_MIXED name
))
12268 val caq3
= skipPSopt
dtd (c2
,a3
,q2
)
12271 | _
=> let val a1
= hookError(a
,(getPos q
,ERR_EXPECTED(expBarRpar
,[c
])))
12272 in raise SyntaxError (c
,a1
,q
)
12275 val (name
,(c1
,a1
,q1
)) = parseName caq
12276 handle NotFound (c
,a
,q
) => let val err
= ERR_EXPECTED(expPcdata
,[c
])
12277 in raise SyntaxError (c
,hookError(a
,(getPos q
,err
)),q
)
12280 of [0wx50
,0wx43
,0wx44
,0wx41
,0wx54
,0wx41
] (* "PCDATA" *) => a1
12281 | _
=> hookError(a1
,(getPos q
,ERR_EXPECTED(expPcdata
,name
)))
12283 val caq2
= skipPSopt
dtd (c1
,a2
,q1
)
12284 val (is
,(c3
,a3
,q3
)) = doit nil caq2
12286 val caq4
= if c3
=0wx2A (* #
"*" *) then getChar(a3
,q3
)
12287 else let val a4
= if null is
then a3
12288 else hookError(a3
,(getPos q3
,ERR_EXPECTED(expRep
,[c3
])))
12295 (*--------------------------------------------------------------------*)
12296 (* parse an optional occurrence indicator afer a content particle or
*)
12297 (* a content model
, given
as first argument
. Cf
. 3.2.1: *)
12299 (* [47] children
::= (choice | seq
) ('?
' |
'*' |
'+')?
*)
12300 (* [48] cp
::= (Name | choice | seq
) ('?
' |
'*' |
'+')?
*)
12302 (* return
the (possibly modified
) content particle
, together
with the
*)
12303 (* next char
and state
. *)
12304 (*--------------------------------------------------------------------*)
12305 (* might
raise: none
*)
12306 (*--------------------------------------------------------------------*)
12307 fun parseOcc
cm (c
,a
,q
) =
12309 of 0wx3F (* #
"?" *) => (CM_OPT cm
,getChar(a
,q
))
12310 |
0wx2A (* #
"*" *) => (CM_REP cm
,getChar(a
,q
))
12311 |
0wx2B (* #
"+" *) => (CM_PLUS cm
,getChar(a
,q
))
12312 | _
=> (cm
,(c
,a
,q
))
12314 (*--------------------------------------------------------------------*)
12315 (* parse a content particle
. Cf
. 3.2.1: *)
12317 (* Validity Constraint
: Proper Group
/PE Nesting
*)
12318 (* Parameter
-entity replacement text must be properly nested
with *)
12319 (* parenthetized groups
. ... *)
12321 (* (see also parseMixed
) *)
12323 (* [48] cp
::= (Name | choice | seq
) ('?
' |
'*' |
'+')?
*)
12324 (* [49] choice
::= '(' S? cp
[ VC
: Proper Group
/ *)
12325 (* ( S?
'|
' S? cp
)* S?
')' PE Nesting
] *)
12326 (* [50] seq
::= '(' S? cp
[ VC
: Proper Group
/ *)
12327 (* ( S?
',' S? cp
)* S?
')' PE Nesting
] *)
12329 (* print an error
and raise SyntaxState
if no element name or
"(" is
*)
12330 (* found
in the first place
. *)
12332 (* return the content particle together
with the next char
and state
. *)
12333 (*--------------------------------------------------------------------*)
12334 (* might
raise: SyntaxState
*)
12335 (*--------------------------------------------------------------------*)
12336 fun parseCP
dtd (c
,a
,q
) =
12338 of 0wx28 (* #
"(" *) =>
12340 val lparEnt
= getEntId q
12341 val caq1
= skipPSopt
dtd (getChar (a
,q
))
12342 in parseGroup dtd lparEnt caq1
12344 | _
=> (* must be an element name
*)
12346 val (name
,caq1
) = parseName (c
,a
,q
)
12347 handle NotFound (c
,a
,q
) => let val err
= ERR_EXPECTED(expElemLpar
,[c
])
12348 val a1
= hookError(a
,(getPos q
,err
))
12349 in raise SyntaxError (c
,a1
,q
)
12351 val idx
= Element2Index dtd name
12353 parseOcc (CM_ELEM idx
) caq1
12356 (*--------------------------------------------------------------------*)
12357 (* parse a seq
/choice
, the first content particle
and the connector
*)
12358 (* already parsed
; the connector
, the
type of group
and the entity id
*)
12359 (* of the opening parenthesis are given
in first arg
. Cf
. 3.2.1: *)
12361 (* Validity Constraint
: Proper Group
/PE Nesting
*)
12362 (* Parameter
-entity replacement text must be properly nested
with *)
12363 (* parenthetized groups
. ... *)
12365 (* (see also parseMixed
) *)
12367 (* [49] choice
::= '(' S? cp
[ VC
: Proper Group
/ *)
12368 (* ( S?
'|
' S? cp
)* S?
')' PE Nesting
] *)
12369 (* [50] seq
::= '(' S? cp
[ VC
: Proper Group
/ *)
12370 (* ( S?
',' S? cp
)* S?
')' PE Nesting
] *)
12372 (* print an error
and raise SyntaxState
if something other than the
*)
12373 (* connector or
')' is found after a content particle
. *)
12374 (* print an error
if the closing parenthesis
of a group is not
in the
*)
12375 (* same entity
as the opening one
. *)
12377 (* return the list
of content particles parsed
, together
with the
*)
12378 (* remaining character
and state
. *)
12379 (*--------------------------------------------------------------------*)
12380 (* might
raise: SyntaxState
*)
12381 (*--------------------------------------------------------------------*)
12382 and parseGroup
' dtd (con
,loc
,lparEnt
) caq
=
12385 val caq1
= skipPSopt dtd caq
12386 val (cp
,caq2
) = parseCP dtd caq1
12387 val (c3
,a3
,q3
) = skipPSopt dtd caq2
12389 if c3
=0wx29 (* #
")" ( *)
12390 then let val a4
= if not (!O_VALIDATE
) orelse getEntId q3
=lparEnt
then a3
12391 else hookError(a3
,(getPos q3
,ERR_GROUP_ENT_NESTING loc
))
12392 in ([cp
],getChar(a4
,q3
))
12394 else (if c3
=con
then let val (cps
,caq4
) = doit (getChar(a3
,q3
))
12397 else let val err
= ERR_EXPECTED(expConCRpar con
,[c3
])
12398 in raise SyntaxError (c3
,hookError(a3
,(getPos q3
,err
)),q3
)
12405 (*--------------------------------------------------------------------*)
12406 (* parse a seq
/choice
, the first content particle parsed
; the entity
*)
12407 (* id
of the opening parenthesis are given
in first arg
. Cf
. 3.2.1: *)
12409 (* (see also parseMixed
) *)
12411 (* [49] choice
::= '(' S? cp
[ VC
: Proper Group
/ *)
12412 (* ( S?
'|
' S? cp
)* S?
')' PE Nesting
] *)
12413 (* [50] seq
::= '(' S? cp
[ VC
: Proper Group
/ *)
12414 (* ( S?
',' S? cp
)* S?
')' PE Nesting
] *)
12416 (* print an error
and raise SyntaxState
if neither
'|
' nor
',' nor
*)
12417 (* ')' follows the first content particle
in a seq
/choice
. *)
12419 (* return the list
of as a ContentModel
, together
with the remaining
*)
12420 (* character
and state
. *)
12421 (*--------------------------------------------------------------------*)
12422 (* might
raise: SyntaxState
*)
12423 (*--------------------------------------------------------------------*)
12424 and parseGroup dtd lparEnt caq
=
12426 val (cp
,caq1
) = parseCP dtd caq
12427 val (c2
,a2
,q2
) = skipPSopt dtd caq1
12430 of 0wx29 (* #
")" *) =>
12431 let val a3
= if not (!O_VALIDATE
) orelse getEntId q2
=lparEnt
then a2
12432 else hookError(a2
,(getPos q2
,ERR_GROUP_ENT_NESTING LOC_SEQ
))
12433 in (CM_SEQ
[cp
],getChar(a3
,q2
))
12435 |
0wx2C (* #
"," *) =>
12436 let val (cps
,caq3
) = parseGroup
' dtd (c2
,LOC_SEQ
,lparEnt
) (getChar(a2
,q2
))
12437 in (CM_SEQ(cp
::cps
),caq3
)
12439 |
0wx7C (* #
"|" *) =>
12440 let val (cps
,caq3
) = parseGroup
' dtd (c2
,LOC_CHOICE
,lparEnt
) (getChar(a2
,q2
))
12441 in (CM_ALT(cp
::cps
),caq3
)
12443 | _
=> let val a3
= hookError(a2
,(getPos q2
,ERR_EXPECTED(expConRpar
,[c2
])))
12444 in raise SyntaxError (c2
,a3
,q2
)
12446 in parseOcc group caq3
12449 (*--------------------------------------------------------------------*)
12450 (* parse a content specification
. Cf
. 3.2/3.2.1: *)
12452 (* Validity Constraint
: Proper Group
/PE Nesting
*)
12453 (* Parameter
-entity replacement text must be properly nested
with *)
12454 (* parenthetized groups
. That is to say
, if either
of the opening
*)
12455 (* or closing parentheses
in a choice
, seq
, or Mixed construct is
*)
12456 (* contained
in the replacement text for a parameter entity
, both
*)
12457 (* must be contained
in the same replacement text
. *)
12459 (* [46] contentspec
::= 'EMPTY
' |
'ANY
' | Mixed | children
*)
12461 (* [47] children
::= (choice | seq
) ('?
' |
'*' |
'+')?
*)
12463 (* [49] choice
::= '(' S?
cp ( S?
'|
' S? cp
)* S?
')' [ VC
:Proper
*)
12464 (* [50] seq
::= '(' S?
cp ( S?
',' S? cp
)* S?
')' Group
/PE
*)
12467 (* [51] Mixed
::= '(' S?
'#PCDATA
' [ VC
: Proper Group
/PE
*)
12468 (* (S?
'|
' S? Name
)* S?
')*' Nesting
] *)
12469 (* |
'(' S?
'#PCDATA
' S?
')' [ VC
: No Duplicate
*)
12472 (* print an error
and raise SyntaxState
if no children
, Mixed
, or
*)
12473 (* name is found
. *)
12474 (* print an error
and assume ANY
if an ambiguous content model is
*)
12476 (* print an error
and assume ANY
if a name other than EMPTY or ANY
*)
12478 (* print an error
if the closing parenthesis
of a Mixed is not
in the
*)
12479 (* same entity
as the opening one
. *)
12481 (* return the parsed content specification
, togther
with the next
*)
12482 (* character
and state
. *)
12483 (*--------------------------------------------------------------------*)
12484 (* might
raise: SyntaxState
*)
12485 (*--------------------------------------------------------------------*)
12486 fun parseContentSpec dtd
curr (c
,a
,q
) =
12488 of 0wx28 (* #
"(" *) =>
12490 val (c1
,a1
,q1
) = skipPSopt
dtd (getChar(a
,q
))
12491 val lparEnt
= getEntId q
12493 if c1
=0wx23 (* #
"#" *)
12494 then parseMixed dtd
lparEnt (getChar(a1
,q1
))
12495 else let val (cm
,(c2
,a2
,q2
)) = parseGroup dtd
lparEnt (c1
,a1
,q1
)
12496 val (dfa
,a3
) = (makeDfa cm
,a2
) handle Ambiguous(a
,n1
,n2
)
12497 => if !O_COMPATIBILITY
12498 then let val err
= ERR_AMBIGUOUS(Index2Element dtd a
,n1
,n2
)
12499 val a3
= hookError(a2
,(getPos q
,err
))
12500 val dfa
= makeChoiceDfa cm
12503 else (makeAmbiguous cm
,a2
) handle DfaTooLarge max
12504 => let val a3
= if !O_DFA_WARN_TOO_LARGE
12506 (a2
,(getPos q
,WARN_DFA_TOO_LARGE(curr
,max
)))
12508 val dfa
= makeChoiceDfa cm
12511 in (CT_ELEMENT(cm
,dfa
),(c2
,a3
,q2
))
12514 | _
=> (* must be ANY or EMPTY
*)
12516 val (name
,caq1
as (c1
,a1
,q1
)) = parseName (c
,a
,q
)
12517 handle NotFound (c
,a
,q
) =>
12518 let val err
= ERR_EXPECTED(expContSpec
,[c
])
12519 in raise SyntaxError(c
,hookError(a
,(getPos q
,err
)),q
)
12522 of [0wx41
,0wx4e
,0wx59
] (* "ANY" *) => (CT_ANY
,caq1
)
12523 |
[0wx45
,0wx4d
,0wx50
,0wx54
,0wx59
] (* "EMPTY" *) => (CT_EMPTY
,caq1
)
12524 | _
=> let val a2
= hookError(a1
,(getPos q
,ERR_EXPECTED(expContSpec
,name
)))
12525 in (CT_ANY
,(c1
,a2
,q1
))
12529 (*--------------------------------------------------------------------*)
12530 (* parse an element declaration
, the initial
'<!ELEMENT
' already
*)
12531 (* read
. The unique entity id
of the
'<!' is given
as first arg
. 3.2: *)
12533 (* [45] elementdecl
::= '<!ELEMENT
' S Name
[ VC
: Unique
*)
12534 (* S contentspec S?
'>' Element Type
*)
12535 (* Declaration
] *)
12537 (* (see also the comments for ParseDtd
.parseMarkupDecl
). *)
12539 (* print an error
and raise SyntaxState
if no element name
, no
*)
12540 (* content specification
, or no final
'>' is found
. *)
12541 (* print an error
if the
'>' is not
in the same entity
as the
'<!'. *)
12542 (* print an error
if white space is missing
. *)
12544 (* print an error for entity
end exceptions
in subfunctions
. *)
12545 (* catch syntax errors by recovering to the next possible state
. *)
12547 (* enter the declared element into the notation table
. *)
12548 (* return the remaining char
and state
. *)
12549 (*--------------------------------------------------------------------*)
12550 (* might
raise: none
*)
12551 (*--------------------------------------------------------------------*)
12552 fun parseElementDecl
dtd (startEnt
,startPos
,ext
) caq
=
12554 val (caq1
as (_
,_
,q1
))= skipPS dtd caq
12555 val (name
,(c2
,a2
,q2
)) = parseName caq1
12556 handle NotFound (c
,a
,q
) => let val err
= ERR_EXPECTED(expAnElemName
,[c
])
12557 in raise SyntaxError(c
,hookError(a
,(getPos q
,err
)),q
)
12559 val a3
= checkElemName (a2
,q1
) name
12560 val idx
= Element2Index dtd name
12561 val caq3
= skipPS
dtd (c2
,a3
,q2
)
12563 val (contSpec
,(c4
,a4
,q4
)) = parseContentSpec dtd name caq3
12565 val a5
= if useParamEnts() orelse not ext
then addElement
dtd (a4
,q1
) (idx
,contSpec
,ext
)
12567 val a5
' = hookDecl(a5
,((startPos
,getPos q4
),DEC_ELEMENT(idx
,contSpec
,ext
)))
12569 val (c6
,a6
,q6
) = skipPSopt
dtd (c4
,a5
',q4
)
12571 if c6
<>0wx3E (* #
">" *)
12572 then let val a7
= hookError(a6
,(getPos q6
,ERR_EXPECTED(expGt
,[c6
])))
12573 in raise SyntaxError(c6
,a7
,q6
)
12575 else let val a7
= if not (!O_VALIDATE
) orelse getEntId q6
=startEnt
then a6
12576 else hookError(a6
,(getPos q6
,ERR_DECL_ENT_NESTING LOC_ELEM_DECL
))
12580 handle exn
as SyntaxError (c
,a
,q
) =>
12581 let val a1
= if c
=0wx00
then hookError(a
,(getPos q
,ERR_ENDED_BY_EE LOC_ELEM_DECL
))
12583 in recoverDecl
false (c
,a1
,q
)
12586 (*--------------------------------------------------------------------*)
12587 (* parse an enumerated attribute
type, the
'(' already consumed
. the
*)
12588 (* 1st arg is a
string describing the
attribute (nmtoken or notation
),*)
12589 (* the
2nd arg is a function that parses a single token
, the
3rd arg
*)
12590 (* a function for converting the token to its index
. 3.3.1: *)
12592 (* [58] NotationType
::= 'NOTATION
' S
*)
12593 (* '(' S?
Name (S?
'|
' S? Name
)* S?
')' *)
12594 (* [59] Enumeration
::= '(' S?
Nmtoken (S?
'|
' S? Nmtoken
)* S?
')' *)
12596 (* print an error
and raise SyntaxState
if no token is found after a
*)
12597 (* '(' or
'|
', or
if neither
'|
' nor
')' follows a token
. *)
12599 (* return
the (sorted
) list
of indices
of the parsed tokens
. *)
12600 (*--------------------------------------------------------------------*)
12601 (* might
raise: SyntaxState
*)
12602 (*--------------------------------------------------------------------*)
12603 fun parseEnumerated
dtd (expWhat
,parseToken
,Token2Index
) caq
=
12604 let fun doit idxs caq
=
12606 val caq1
as (_
,_
,q1
) = skipPSopt dtd caq
12607 val (nt
,(c2
,a2
,q2
)) = parseToken caq1
12608 handle NotFound (c
,a
,q
) => let val err
= ERR_EXPECTED(expWhat
,[c
])
12609 in raise SyntaxError(c
,hookError(a
,(getPos q
,err
)),q
)
12611 val (idx
,a3
) = Token2Index
dtd (a2
,q1
) nt
12612 val (c4
,a4
,q4
) = skipPSopt
dtd (c2
,a3
,q2
)
12613 val newIdxs
= insertInt(idx
,idxs
)
12615 of 0wx7C (* #
"|" *) => doit
newIdxs (getChar(a4
,q4
))
12616 |
0wx29 (* #
")" *) => (newIdxs
,getChar(a4
,q4
))
12617 | _
=> let val a5
= hookError(a4
,(getPos q4
,ERR_EXPECTED(expBarRpar
,[c4
])))
12618 in raise SyntaxError (c4
,a5
,q4
)
12624 (*--------------------------------------------------------------------*)
12625 (* Convert
a (name
) token to its index
as an enumerated attribute
. *)
12628 (* Validity Constraint
: Notation Attributes
*)
12629 (* ... all notation names
in the declaration must be declared
. *)
12631 (* print an error
if a notation is not declared
. *)
12632 (*--------------------------------------------------------------------*)
12633 (* might
raise: SyntaxState
*)
12634 (*--------------------------------------------------------------------*)
12635 fun Token2NmtokenIndex
dtd (a
,_
) token
= (AttNot2Index dtd token
,a
)
12636 fun Token2NotationIndex
dtd (a
,q
) token
=
12638 val idx
= AttNot2Index dtd token
12639 val a1
= if not (!O_VALIDATE
) orelse hasNotation dtd idx
then a
12640 else hookError(a
,(getPos q
,ERR_UNDECLARED(IT_NOTATION
,token
,LOC_NONE
)))
12644 (*--------------------------------------------------------------------*)
12645 (* parse an attribute
type, the
1st arg being the element this decl
. *)
12646 (* refers to
. 3.3.1: *)
12648 (* [54] AttType
::= StringType | TokenizedType | EnumeratedType
*)
12650 (* [55] StringType
::= 'CDATA
' *)
12651 (* [56] TokenizedType
::= 'ID
' [VC
: One ID per Element Type
] *)
12659 (* Validity Constraint
: One ID per Element Type
*)
12660 (* No element
type may have more than one ID attribute specified
. *)
12662 (* Enumerated Attribute Types
*)
12664 (* [57] EnumeratedType
::= NotationType | Enumeration
*)
12665 (* [58] NotationType
::= 'NOTATION
' S
'(' ... *)
12666 (* [59] Enumeration
::= '(' ... *)
12668 (* print an error
and raise SyntaxState
if no
'(', or name is found
*)
12669 (* in the first place
, or the name does not start an attribute
type, *)
12670 (* or
if no
'(' follows a
'NOTATION
'. *)
12671 (* print an error
and assume NMTOKEN instead
of ID
if the element
*)
12672 (* already has an ID attribute
. *)
12674 (* return the attribute
type together
with the next char
and state
. *)
12675 (*--------------------------------------------------------------------*)
12676 (* might
raise: SyntaxState
*)
12677 (*--------------------------------------------------------------------*)
12678 fun parseAttType dtd
elem (c
,a
,q
) =
12679 if c
=0wx28 (* #
"(" *) then
12680 let val (idxs
,caq1
) = parseEnumerated dtd
12681 (expANameToken
,parseNmtoken
,Token2NmtokenIndex
) (getChar(a
,q
))
12682 in (AT_GROUP idxs
,caq1
)
12684 else let val (name
,caq1
as (c1
,a1
,q1
)) = parseName (c
,a
,q
)
12685 handle NotFound cq
=> let val err
= ERR_EXPECTED(expAttType
,[c
])
12686 in raise SyntaxError (c
,hookError(a
,(getPos q
,err
)),q
)
12689 of [0wx43
,0wx44
,0wx41
,0wx54
,0wx41
] (* "CDATA" *) =>
12691 |
[0wx49
,0wx44
] (* "ID" *) =>
12693 |
[0wx49
,0wx44
,0wx52
,0wx45
,0wx46
] (* "IDREF" *) =>
12695 |
[0wx49
,0wx44
,0wx52
,0wx45
,0wx46
,0wx53
] (* "IDREFS" *) =>
12697 |
[0wx45
,0wx4e
,0wx54
,0wx49
,0wx54
,0wx59
] (* "ENTITY" *) =>
12699 |
[0wx45
,0wx4e
,0wx54
,0wx49
,0wx54
,0wx49
,0wx45
,0wx53
] (* "ENTITIES" *) =>
12701 |
[0wx4e
,0wx4d
,0wx54
,0wx4f
,0wx4b
,0wx45
,0wx4e
] (* "NMTOKEN" *) =>
12703 |
[0wx4e
,0wx4d
,0wx54
,0wx4f
,0wx4b
,0wx45
,0wx4e
,0wx53
] (* "NMTOKEN" *) =>
12705 |
[0wx4e
,0wx4f
,0wx54
,0wx41
,0wx54
,0wx49
,0wx4f
,0wx4e
] (* "NOTATION" *) =>
12706 let val (c2
,a2
,q2
) = skipPSopt dtd caq1
12708 of 0wx28 (* #
"(" *) =>
12709 let val (idxs
,caq3
) = parseEnumerated dtd
12710 (expANotName
,parseName
,Token2NotationIndex
) (getChar(a2
,q2
))
12711 in (AT_NOTATION idxs
,caq3
)
12713 | _
=> let val err
= ERR_EXPECTED(expLpar
,[c2
])
12714 in raise SyntaxError(c2
,hookError(a2
,(getPos q2
,err
)),q2
)
12717 | _
=> let val a2
= hookError(a1
,(getPos q
,ERR_EXPECTED(expAttType
,name
)))
12718 in raise SyntaxError (c1
,a2
,q1
)
12722 (*--------------------------------------------------------------------*)
12723 (* parse an attribute default
, for an attribute whose
type is given
*)
12724 (* the
1st argument
. Cf
. 3.3.2: *)
12726 (* [60] DefaultDecl
::= '#REQUIRED
' |
'#IMPLIED
' *)
12727 (* |
(('#FIXED
' S
)? AttValue
) *)
12729 (* Validity Constraint
: Attribute Default Legal
*)
12730 (* The declared default value must meet the lexical constraints
of *)
12731 (* the declared attribute
type. *)
12735 (* Validity Constraint
: ID Attribute Default
*)
12736 (* An ID attribute must have a declared default
of #IMPLIED or
*)
12739 (* print an error
and raise SyntaxState
if no
'#
' or literal is found
*)
12740 (* in the first place
, or no name or a wrong name is found after the
*)
12741 (* '#
', or
if no literal follows the
'FIXED
'. *)
12742 (* print an error
if white space is missing
. *)
12743 (* print an error
and assume IMPLIED
if the default for an ID attrib
. *)
12744 (* is not IMPLIED or REQUIRED
. *)
12746 (* return the default together
with the remaining char
and state
. *)
12747 (*--------------------------------------------------------------------*)
12748 (* might
raise: SyntaxState
*)
12749 (*--------------------------------------------------------------------*)
12750 fun parseDefaultDecl
dtd (aidx
,attType
) (c
,a
,q
) =
12751 if c
=0wx23 (* #
"#" *) then
12753 val caq0
as (_
,_
,q0
) = (getChar(a
,q
))
12754 val (name
,caq1
) = parseName caq0
12755 handle NotFound (c
,a
,q
) => let val err
= ERR_EXPECTED(expAttDefKey
,[c
])
12756 in raise SyntaxError(c
,hookError(a
,(getPos q
,err
)),q
)
12759 of [0wx46
,0wx49
,0wx58
,0wx45
,0wx44
] (* "FIXED" *) =>
12761 val caq2
as (_
,_
,q2
) = skipPS dtd caq1
12762 val (lit
,text
,(c3
,a3
,q3
)) = parseAttValue dtd caq2
12763 handle NotFound (c
,a
,q
) => let val err
= ERR_EXPECTED(expLitQuote
,[c
])
12764 val a1
= hookError(a
,(getPos q
,err
))
12765 in raise SyntaxError (c
,a1
,q
)
12768 if !O_VALIDATE
andalso isIdType attType
12769 then let val a4
= hookError(a3
,(getPos q
,ERR_ID_DEFAULT
))
12770 in (AD_IMPLIED
,(c3
,a4
,q3
))
12773 let val (cv
,(av
,a4
)) = makeAttValue
dtd (a3
,q2
)
12774 (aidx
,attType
,false,true,text
)
12775 in (AD_FIXED((lit
,cv
,av
),(getPos q2
,ref
false)),(c3
,a4
,q3
))
12777 handle AttValue a
=> (AD_IMPLIED
,(c3
,a
,q3
))
12780 |
[0wx49
,0wx4d
,0wx50
,0wx4c
,0wx49
,0wx45
,0wx44
] (* "IMPLIED" *) =>
12782 |
[0wx52
,0wx45
,0wx51
,0wx55
,0wx49
,0wx52
,0wx45
,0wx44
] (* "REQUIRED" *) =>
12784 | _
=> let val (c1
,a1
,q1
) = caq1
12785 val a2
= hookError(a1
,(getPos q0
,ERR_EXPECTED(expAttDefKey
,name
)))
12786 in raise SyntaxError (c1
,a2
,q1
)
12790 val (lit
,text
,(c1
,a1
,q1
)) = parseAttValue
dtd (c
,a
,q
)
12791 handle NotFound (c
,a
,q
) => let val err
= ERR_EXPECTED(expQuoteRni
,[c
])
12792 val a1
= hookError(a
,(getPos q
,err
))
12793 in raise SyntaxError(c
,a1
,q
)
12796 if !O_VALIDATE
andalso isIdType attType
12797 then let val a2
= hookError(a1
,(getPos q
,ERR_ID_DEFAULT
))
12798 in (AD_IMPLIED
,(c1
,a2
,q1
))
12800 else let val (cv
,(av
,a2
)) = makeAttValue
dtd (a1
,q
) (aidx
,attType
,false,true,text
)
12801 in (AD_DEFAULT((lit
,cv
,av
),(getPos q
,ref
false)),(c1
,a2
,q1
))
12803 handle AttValue a
=> (AD_IMPLIED
,(c1
,a
,q1
))
12806 (*--------------------------------------------------------------------*)
12807 (* parse an attribute definition
, the referred element given
as 1st
*)
12808 (* argument
. 3.3: *)
12810 (* [53] AttDef
::= S Name S AttType S DefaultDecl
*)
12812 (* raise NotFound
if no name is
found (and thus no attribute def
.) *)
12813 (* print an error
if white space is missing
. *)
12815 (* enter the attribute definition into the element table
. *)
12816 (* return the next character
and the remaining state
. *)
12817 (*--------------------------------------------------------------------*)
12818 (* might
raise: NotFound SyntaxState
*)
12819 (*--------------------------------------------------------------------*)
12820 fun parseAttDef
dtd (elem
,ext
) caq
=
12822 val (hadS
,caq1
as (_
,_
,q1
)) = skipPSmay dtd caq
12824 val (name
,(c2
,a2
,q2
)) = parseName
caq1 (* NotFound falls through to the next level
*)
12825 val a3
= if hadS
then a2
else hookError(a2
,(getPos q1
,ERR_MISSING_WHITE
))
12826 val a4
= checkAttName (a3
,q1
) name
12827 val idx
= AttNot2Index dtd name
12829 val caq5
= skipPS
dtd (c2
,a4
,q2
)
12830 val (attType
,caq6
) = parseAttType dtd elem caq5
12831 val caq7
= skipPS dtd caq6
12833 val (attDef
,(c8
,a8
,q8
)) = parseDefaultDecl
dtd (idx
,attType
) caq7
12835 val a9
= if useParamEnts() orelse not ext
12836 then addAttribute
dtd (a8
,q1
) (elem
,(idx
,attType
,attDef
,ext
)) else a8
12838 ((idx
,attType
,attDef
),(c8
,a9
,q8
))
12841 (*--------------------------------------------------------------------*)
12842 (* parse an attribute
-list declaration
, the initial
'<!ATTLIST
' *)
12843 (* already read
. The unique entity id
of the
'<!' is given
as first
*)
12844 (* arg
. Cf
. 3.3: *)
12846 (* [52] AttlistDecl
::= '<!ATTLIST
' S Name AttDef
* S?
'>' *)
12848 (* (see also the comments for ParseDtd
.parseMarkupDecl
). *)
12850 (* check whether the element already had an attlist declaration
. (cf
. *)
12851 (* DtdElements
.enterAttDecl
) *)
12853 (* print an error
and raise SyntaxState
if no element name
, or no
*)
12854 (* final
'>' is found
. *)
12855 (* print an error
if the
'>' is not
in the same entity
as the
'<!'. *)
12856 (* print an error
if white space is missing
. *)
12858 (* print an error for entity
end exceptions
in subfunctions
. *)
12859 (* catch syntax errors by recovering to the next possible state
. *)
12861 (* enter the declared attributes into the element table
. *)
12862 (* return the remaining char
and state
. *)
12863 (*--------------------------------------------------------------------*)
12864 (* might
raise: none
*)
12865 (*--------------------------------------------------------------------*)
12866 fun parseAttListDecl
dtd (startEnt
,startPos
,ext
) caq
=
12868 val caq1
as (_
,_
,q1
) = skipPS dtd caq
12869 val (name
,(c2
,a2
,q2
)) = parseName caq1
12870 handle NotFound (c
,a
,q
) => let val err
= ERR_EXPECTED(expAnElemName
,[c
])
12871 in raise SyntaxError (c
,hookError(a
,(getPos q
,err
)),q
)
12873 val a3
= checkElemName (a2
,q1
) name
12874 val idx
= Element2Index dtd name
12876 val a4
= if !O_VALIDATE
orelse not ext
then enterAttList
dtd (a3
,q1
) idx
else a3
12878 fun doit attDefs caq
=
12879 let val (attDef
,caq1
) = parseAttDef
dtd (idx
,ext
) caq
12880 handle NotFound (c
,a
,q
) => raise NotFound
12881 (c
,hookDecl(a
,((startPos
,getPos q
),DEC_ATTLIST(idx
,rev attDefs
,ext
))),q
)
12882 |
SyntaxError (c
,a
,q
) => raise SyntaxError
12883 (c
,hookDecl(a
,((startPos
,getPos q
),DEC_ATTLIST(idx
,rev attDefs
,ext
))),q
)
12884 in doit (attDef
::attDefs
) caq1
12887 val (c5
,a5
,q5
) = doit
nil (c2
,a4
,q2
) handle NotFound caq
=> caq
12889 if c5
<> 0wx3E (* #
">" *)
12890 then let val a6
= hookError(a5
,(getPos q5
,ERR_EXPECTED(expAttNameGt
,[c5
])))
12891 in raise SyntaxError (c5
,a6
,q5
)
12893 else let val a6
= if not (!O_VALIDATE
) orelse getEntId q5
=startEnt
then a5
12894 else hookError(a5
,(getPos q5
,ERR_DECL_ENT_NESTING LOC_ATT_DECL
))
12898 handle exn
as SyntaxError (c
,a
,q
) =>
12899 let val a1
= if c
=0wx00
then hookError(a
,(getPos q
,ERR_ENDED_BY_EE LOC_ATT_DECL
))
12901 in recoverDecl
false (c
,a
,q
)
12904 (* stop
of ../../Parser
/Parse
/parseDecl
.sml
*)
12905 (* start
of ../../Parser
/Parse
/parseDtd
.sml
*)
12906 signature ParseDtd
=
12908 (*----------------------------------------------------------------------
12911 val parseName
: UniChar
.Char * AppData
* State
12912 -> UniChar
.Data
* (UniChar
.Char * AppData
* State
)
12914 val openExtern
: int * Uri
.Uri
-> AppData
* State
12915 -> Encoding
.Encoding
* HookData
.TextDecl option
* (UniChar
.Char * AppData
* State
)
12916 val openDocument
: Uri
.Uri option
-> AppData
12917 -> Encoding
.Encoding
* HookData
.XmlDecl option
* (UniChar
.Char * AppData
* State
)
12919 val skipCharRef
: AppData
* State
-> (UniChar
.Char * AppData
* State
)
12920 val skipReference
: UniChar
.Char * AppData
* State
-> (UniChar
.Char * AppData
* State
)
12921 val parseGenRef
: Dtd
-> UniChar
.Char * AppData
* State
12922 -> (int * Base
.GenEntity
) * (AppData
* State
)
12923 val parseCharRefLit
: UniChar
.Data
-> AppData
* State
12924 -> UniChar
.Data
* (UniChar
.Char * AppData
* State
)
12926 val parseComment
: Errors
.Position
-> AppData
* State
-> (UniChar
.Char * AppData
* State
)
12927 val parseProcInstr
: Errors
.Position
-> AppData
* State
-> (UniChar
.Char * AppData
* State
)
12929 val skipTag
: Errors
.Location
-> AppData
* State
-> (UniChar
.Char * AppData
* State
)
12930 val parseETag
: Dtd
-> AppData
* State
12931 -> int * UniChar
.Data
* Errors
.Position
* (UniChar
.Char * AppData
* State
)
12932 val parseSTag
: Dtd
-> Errors
.Position
-> UniChar
.Char * AppData
* State
12933 -> (HookData
.StartTagInfo
* Base
.ElemInfo
) * (UniChar
.Char * AppData
* State
)
12935 val skipDecl
: bool -> UniChar
.Char * AppData
* State
-> UniChar
.Char * AppData
* State
12936 ----------------------------------------------------------------------*)
12939 val parseDocTypeDecl
: Dtd
-> (UniChar
.Char * AppData
* State
)
12940 -> int option
* (UniChar
.Char * AppData
* State
)
12943 (*--------------------------------------------------------------------------*)
12944 (* Structure
: ParseDtd
*)
12946 (* Exceptions raised by functions
in this
structure: *)
12947 (* parseDocTypeDecl
: none
*)
12948 (*--------------------------------------------------------------------------*)
12949 functor ParseDtd (structure ParseBase
: ParseBase
)
12952 structure ParseDecl
= ParseDecl (structure ParseBase
= ParseBase
)
12955 Base UniChar Errors
12958 (*--------------------------------------------------------------------*)
12959 (* parse a markup declaration other than a processing instruction
, *)
12960 (* "<!" already consumed
. The unique entity id
of the initial
'<!' is
*)
12961 (* given
as first arg
. Cf
. 2.8: *)
12963 (* [29] markupdecl
::= elementdecl | AttlistDecl | EntityDecl
*)
12964 (* | NotationDecl | PI | Comment
*)
12966 (* Validity Constraint
: Proper Declaration
/PE Nesting
*)
12967 (* Parameter
-entity replacement text must be properly nested
with *)
12968 (* markup declarations
. That is to say
, if either the first
*)
12969 (* character or the last character
of a markup declaration
*)
12970 (* (markupdecl above
) is contained
in the replacement text for a
*)
12971 (* parameter
-entity reference
, both must be contained
in the same
*)
12972 (* replacement text
. *)
12974 (* and 3.2,3.3,4.2,4.7: *)
12976 (* [45] elementdecl
::= '<!ELEMENT
' ... *)
12977 (* [52] AttlistDecl
::= '<!ATTLIST
' ... *)
12978 (* [70] EntityDecl
::= GEDecl | PEDecl
*)
12979 (* [71] GEDecl
::= '<!ENTITY
' ... *)
12980 (* [72] PEDecl
::= '<!ENTITY
' ... *)
12981 (* [82] NotationDecl
::= '<!NOTATION
' ... *)
12983 (* print an error an recover
if something other than
"--", "ELEMENT", *)
12984 (* "ENTITY", "ATTLIST", or
"NOTATION" is found
. *)
12986 (* return the remaining character
and state
. *)
12987 (*--------------------------------------------------------------------*)
12988 (* might
raise: none
*)
12989 (*--------------------------------------------------------------------*)
12990 fun parseMarkupDecl
dtd (startEnt
,startPos
) (c
,a
,q
) =
12992 of 0wx2D
=> (* #
"-" *)
12993 let val (c1
,a1
,q1
) = getChar (a
,q
)
12994 in if c1
<>0wx2D (* #
"-" *)
12995 then let val a2
= hookError(a1
,(getPos q1
,ERR_EXPECTED(expDash
,[c1
])))
12996 in recoverDecl
false (c1
,a2
,q1
)
12998 else parseComment
startPos (a1
,q1
)
13001 val (name
,caq1
) = parseName (c
,a
,q
)
13002 handle NotFound (c
,a
,q
) => let val err
= ERR_EXPECTED(expStartMarkup
,[c
])
13003 val a1
= hookError(a
,(getPos q
,err
))
13004 in raise SyntaxError (c
,a1
,q
)
13006 val ext
= hasExternal dtd
13008 of [0wx45
,0wx4c
,0wx45
,0wx4d
,0wx45
,0wx4e
,0wx54
] (* "ELEMENT" *) =>
13009 parseElementDecl
dtd (startEnt
,startPos
,ext
) caq1
13010 |
[0wx41
,0wx54
,0wx54
,0wx4c
,0wx49
,0wx53
,0wx54
] (* "ATTLIST" *) =>
13011 parseAttListDecl
dtd (startEnt
,startPos
,ext
) caq1
13012 |
[0wx4e
,0wx4f
,0wx54
,0wx41
,0wx54
,0wx49
,0wx4f
,0wx4e
] (* "NOTATION" *) =>
13013 parseNotationDecl
dtd (startEnt
,startPos
,ext
) caq1
13014 |
[0wx45
,0wx4e
,0wx54
,0wx49
,0wx54
,0wx59
] (* "ENTITY" *) =>
13015 parseEntityDecl
dtd (startEnt
,startPos
,ext
) caq1
13016 | _
=> let val (c1
,a1
,q1
) = caq1
13017 val err
= ERR_EXPECTED(expStartMarkup
,name
)
13018 val a2
= hookError(a1
,(getPos q
,err
))
13019 in recoverDecl
false (c1
,a2
,q1
)
13023 (*--------------------------------------------------------------------*)
13024 (* skip an ignored section
, starting after the
'<![IGNORE
[', consume
*)
13025 (* the finishing
"]]>". 3.4: *)
13027 (* [63] ignoreSect
::= '<![' S?
'IGNORE
' S?
'[' *)
13028 (* ignoreSectContents
* ']]>' *)
13029 (* [64] ignoreSectContents
::= Ignore ('<![' ignoreSectContents
*)
13030 (* ']]>' Ignore
)* *)
13031 (* [65] Ignore
::= Char* - (Char* ('<!['|
']]>') Char* ) *)
13033 (* ... If the keyword
of the conditional section is IGNORE
, then *)
13034 (* the contents
of the conditional section are not logically part
*)
13035 (* of the DTD
. Note that for reliable parsing
, the contents
of even
*)
13036 (* ignored conditional sections must be read
in order to detect
*)
13037 (* nested conditional sections
and ensure that the
end of the
*)
13038 (* outermost (ignored
) conditional section is properly detected
. *)
13039 (* If a conditional section
with a keyword
of INCLUDE occurs within
*)
13040 (* a larger conditional section
with a keyword
of IGNORE
, both the
*)
13041 (* outer
and the inner conditional sections are ignored
. *)
13043 (* print an error an finish
if an entity
end is encountered
. *)
13045 (* return the next char
and state
. *)
13046 (*--------------------------------------------------------------------*)
13047 (* might
raise: none
*)
13048 (*--------------------------------------------------------------------*)
13049 fun skipIgnored caq
=
13051 (*--------------------------------------------------------------*)
13052 (* level counts the nesting
of conditional sections
. *)
13053 (* if the second char after a
"<" ("]") is not a
"[" ("]"), it
*)
13054 (* can nevertheless start another delimiter
and is therefore
*)
13055 (* fed into a recursive call
of doit
. *)
13056 (*--------------------------------------------------------------*)
13057 fun doit
level (c
,a
,q
) =
13059 of 0wx00
=> (c
,hookError(a
,(getPos q
,ERR_ENDED_BY_EE LOC_IGNORED
)),q
)
13060 |
0wx3C (* #
"<" *) =>
13061 let val (c1
,a1
,q1
) = getChar (a
,q
)
13062 in if c1
=0wx21 (* #
"!" *)
13063 then let val (c2
,a2
,q2
) = (getChar(a1
,q1
))
13064 in if c2
=0wx5B (* #
"[" *) then doit (level
+1) (getChar(a2
,q2
))
13065 else doit
level (c2
,a2
,q2
)
13067 else doit
level (c1
,a1
,q1
)
13069 |
0wx5D (* #
"]" *) =>
13070 let val (c1
,a1
,q1
) = getChar (a
,q
)
13071 in if c1
=0wx5D (* #
"]" *) then doit
' level (getChar (a1
,q1
))
13072 else doit
level (c1
,a1
,q1
)
13074 | _
=> doit
level (getChar (a
,q
))
13075 (*--------------------------------------------------------------*)
13076 (* if the second
"]" is followed by a
"]", then this might be
*)
13077 (* the
real second
"]". Therefore doit
' loops
as long
as it
*)
13079 (*--------------------------------------------------------------*)
13080 and doit
' level (c
,a
,q
) =
13082 of 0wx3E (* #
">" *) => if level
>0 then doit (level
-1) (getChar (a
,q
))
13084 |
0wx5D (* #
"]" *) => doit
' level (getChar (a
,q
))
13085 | _
=> doit
level (c
,a
,q
)
13090 (*--------------------------------------------------------------------*)
13091 (* parse the internal or external subset
of the dtd
. handle included
*)
13092 (* sections by counting their nesting level
. Cf
2.8: *)
13094 (* Validity Constraint
: Proper Declaration
/PE Nesting
*)
13095 (* Parameter
-entity replacement text must be properly nested
with *)
13096 (* markup declarations
. That is to say
, if either the first
*)
13097 (* character or the last character
of a markup declaration
*)
13098 (* (markupdecl above
) is contained
in the replacement text for a
*)
13099 (* parameter
-entity reference
, both must be contained
in the same
*)
13100 (* replacement text
. *)
13102 (* [28] doctypedecl
::= '<!DOCTYPE
'[Image
] S
Name (S ExternalID
)?
*)
13103 (* S?
('[' (markupdecl | PEReference | S
)* ']' S?
)?
'>' *)
13104 (* [29] markupdecl
::= elementdecl | AttlistDecl | EntityDecl
*)
13105 (* | NotationDecl | PI | Comment
*)
13106 (* [30] extSubset
::= TextDecl? extSubsetDecl
*)
13107 (* [31] extSubsetDecl
::= ( markupdecl | conditionalSect
*)
13108 (* | PEReference | S
)* *)
13111 (* [61] conditionalSect
::= includeSect | ignoreSect
*)
13112 (* [62] includeSect
::= '<![' S?
'INCLUDE
' S?
*)
13113 (* '[' extSubsetDecl
']]>' *)
13114 (* [63] ignoreSect
::= '<![' S?
'IGNORE
' S?
*)
13115 (* '[' ignoreSectContents
* ']]>' *)
13117 (* print an error
and finish
if the
end of document is encountered
in *)
13118 (* the internal subset
. *)
13119 (* print an error
and raise SyntaxState
if a
"<" is not followed by a
*)
13120 (* "!" or a
"?". *)
13121 (* print an error
and raise SyntaxState
if a
"]" is not followed by
*)
13123 (* print an error
if a
"<![" is found
in the internal subset
. *)
13124 (* print an error
if a
"]" is found outside the internal subset
. *)
13125 (* print an error
if a
"]]>" is found outside an included section
. *)
13126 (* print an error an
raise SyntaxState
if something other than a
*)
13127 (* markup declaration
, parameter entity reference
, white space or
*)
13128 (* a conditional section is encountered
. *)
13129 (* print an error
and raise SyntaxState
if a
"<![" is not followed by
*)
13130 (* "INCLUDE" or
"IGNORE", or
if the second
"[" is missing
. *)
13132 (* catch entity
end exceptions
in subfunctions by printing an error
*)
13133 (* and recovering
. *)
13135 (* return the remaining state
and char
. *)
13136 (*--------------------------------------------------------------------*)
13137 (* might
raise: none
*)
13138 (*--------------------------------------------------------------------*)
13139 fun parseSubset dtd caq
=
13141 datatype CondStatus
= IGNORE | INCLUDE
13144 let fun doit hadError
ws (c
,a
,q
) =
13146 of 0wx00
=> (ws
,(c
,a
,q
))
13147 |
0wx09
=> doit
false (c
::ws
) (getChar(a
,q
))
13148 |
0wx0A
=> doit
false (c
::ws
) (getChar(a
,q
))
13149 |
0wx20
=> doit
false (c
::ws
) (getChar(a
,q
))
13150 |
0wx25
=> (ws
,(c
,a
,q
))
13151 |
0wx3C
=> (ws
,(c
,a
,q
))
13152 |
0wx5D
=> (ws
,(c
,a
,q
))
13153 | _
=> if hadError
then doit
true ws (getChar(a
,q
))
13154 else let val err
= ERR_FORBIDDEN_HERE(IT_DATA nil
,LOC_SUBSET
)
13155 val a1
= hookError (a
,(getPos q
,err
))
13156 in doit
true ws (getChar(a1
,q
))
13159 val (ws
,(c1
,a1
,q1
)) = doit
false nil caq
13160 val a2
= if null ws
then a1
13161 else hookWhite(a1
,Data2Vector (rev ws
))
13165 fun doit
cond (c
,a
,q
) =
13169 (*---------------------------------------------------*)
13170 (* the external subset ends at
and of special entity
.*)
13171 (* so does the internal subset
, but
with error
. *)
13172 (*---------------------------------------------------*)
13176 then hookError(a
,(getPos q
,ERR_ENDED_BY_EE LOC_INT_SUBSET
))
13177 else if cond
=0 then a
13178 else hookError(a
,(getPos q
,ERR_ENDED_BY_EE LOC_INCLUDED
))
13181 else let val a1
= hookEntEnd (a
,getPos q
)
13182 in doit
cond (getChar(a1
,q
))
13185 (* ignore errors
in parameter references
-----------------*)
13186 |
0wx25 (* #
"%" *) =>
13189 let val ((id
,ent
),(a1
,q1
)) = parseParRef
dtd (getChar(a
,q
))
13190 in if !O_VALIDATE
orelse !O_INCLUDE_PARAM_ENTS
then
13192 of PE_NULL
=> getChar(a1
,q1
)
13193 |
PE_INTERN(_
,rep
) =>
13195 val q2
= pushIntern(q1
,id
,true,rep
)
13196 val a2
= hookParRef(a1
,((getPos q
,getPos q1
),id
,ent
,true))
13199 | PE_EXTERN extId
=>
13201 val a2
= hookParRef(a1
,((getPos q
,getPos q1
),id
,ent
,true))
13203 #
3(openExtern (id
,true,resolveExtId extId
) (a2
,q1
))
13204 handle CantOpenFile(fmsg
,a
)
13205 => let val err
= ERR_NO_SUCH_FILE fmsg
13206 val a1
= hookError(a
,(getPos q1
,err
))
13207 val a2
= hookEntEnd (a1
,getPos q1
)
13208 in (getChar(a2
,q1
))
13212 (* changed
080600: setExternal is already called by parseParRef
*)
13213 else let val a2
= hookParRef(a1
,((getPos q
,getPos q1
),id
,ent
,false))
13217 handle SyntaxError caq
=> caq
13218 | NoSuchEntity aq
=> getChar aq
13222 |
0wx3C (* #
"<" *) =>
13223 let val (c1
,a1
,q1
) = getChar(a
,q
)
13225 of 0wx3F
=> (* #
"?" *)
13226 let val caq2
= parseProcInstr (getPos q
) (a1
,q1
)
13229 |
0wx21
=> (* #
"!" *)
13230 let val (c2
,a2
,q2
) = (getChar(a1
,q1
))
13231 in if c2
=0wx5B (* #
"[" *)
13232 then do_cond cond
q (a2
,q2
)
13234 let val caq3
= parseMarkupDecl dtd
13235 (getEntId q
,getPos q
) (c2
,a2
,q2
)
13239 | _
=> let val err
= ERR_EXPECTED(expExclQuest
,[c1
])
13240 val a2
= hookError(a1
,(getPos q1
,err
))
13241 val caq3
= recoverDecl
false (c1
,a2
,q1
)
13246 |
0wx5D (* #
"]" *) => do_brack cond
q (getChar(a
,q
))
13247 | _
=> let val caq1
= do_data (c
,a
,q
)
13251 and do_brack cond
q0 (c
,a
,q
) =
13252 if inDocEntity q
then (c
,a
,q
)
13253 else if c
=0wx5D (* #
"]" *)
13254 then let val (c1
,a1
,q1
) = getChar(a
,q
)
13255 in if c1
=0wx3E (* #
">" *)
13256 (* ignore wrong
"]]>"'s
------------------*)
13258 then let val err
= ERR_FORBIDDEN_HERE(IT_DATA
[c
,c
,c1
],
13260 val a2
= hookError(a1
,(getPos q0
,err
))
13261 in doit
cond (getChar(a2
,q1
))
13263 else doit (cond
-1) (getChar(a1
,q1
))
13264 (* the second
"]" may start another
"]]>" ---*)
13265 else let val a2
= hookError(a1
,(getPos q1
,ERR_EXPECTED(expGt
,[c1
])))
13266 in do_brack cond
q (c1
,a2
,q1
)
13269 else let val a1
= hookError(a
,(getPos q
,ERR_EXPECTED(expRbrack
,[c
])))
13270 in doit
cond (c
,a1
,q
)
13273 and do_cond cond
q0 (a
,q
) =
13275 (* marked sections are forbidden
in the internal subset
. -*)
13276 val inInt
= inDocEntity q
13277 val a1
= if inInt
then hookError (a
,(getPos q0
,ERR_FORBIDDEN_HERE
13278 (IT_COND
,LOC_INT_SUBSET
)))
13281 val caq2
as (_
,_
,q2
) = skipPSopt
dtd (getChar(a1
,q
))
13283 val (status
,caq3
) =
13285 val (name
,(c3
,a3
,q3
)) = parseName caq2
13286 (* ignore sections
with bad status keyword
---------*)
13289 of [0wx49
,0wx47
,0wx4e
,0wx4f
,0wx52
,0wx45
] => (IGNORE
,a3
)
13290 |
[0wx49
,0wx4e
,0wx43
,0wx4c
,0wx55
,0wx44
,0wx45
] => (INCLUDE
,a3
)
13291 | _
=> let val err
= ERR_EXPECTED(expCondStatus
,name
)
13292 val a4
= hookError(a3
,(getPos q2
,err
))
13295 val (c5
,a5
,q5
) = skipPSopt
dtd (c3
,a4
,q3
)
13296 in (* ignore sections without
"[" after keyword
-------*)
13297 if c5
=0wx5B
then (status
,getChar(a5
,q5
))
13298 else let val a6
= hookError(a5
,(getPos q5
,ERR_EXPECTED(expLbrack
,[c5
])))
13299 in (IGNORE
,(c5
,a6
,q5
))
13302 handle NotFound (c
,a
,q
) => let val err
= ERR_EXPECTED(expCondStatus
,[c
])
13303 val a1
= hookError(a
,(getPos q
,err
))
13304 in (IGNORE
,(c
,a1
,q
))
13307 (* ignore sections
in the internal subset
----------------*)
13308 case (status
,inInt
)
13309 of (INCLUDE
,_
) => doit (cond
+1) caq3
13310 |
(_
,_
) => doit
cond (skipIgnored caq3
)
13316 (*--------------------------------------------------------------------*)
13317 (* parse the internal subset
of the dtd
. Cf
2.8: *)
13319 (* return the remaining character
and state
. *)
13320 (*--------------------------------------------------------------------*)
13321 (* might
raise: none
*)
13322 (*--------------------------------------------------------------------*)
13323 fun parseInternalSubset
dtd (a
,q
) =
13324 let val a1
= hookSubset (a
,getPos q
)
13325 in parseSubset
dtd (getChar(a1
,q
))
13328 (*--------------------------------------------------------------------*)
13329 (* parse the external subset
of the dtd
, the filename given
as first
*)
13330 (* argument
. handle included sections by counting their nesting level
.*)
13331 (* the file is opened on its own stack
, and closed at the
end. *)
13334 (* print an error
and do nothing
if the file cannot be opened
. *)
13336 (* return nothing
. *)
13337 (*--------------------------------------------------------------------*)
13338 (* might
raise: none
*)
13339 (*--------------------------------------------------------------------*)
13340 fun parseExternalSubset
dtd (a
,q
) extId
=
13342 val uri
= resolveExtId extId
13343 val (enc
,textDecl
,(c1
,a1
,q1
)) = openSubset uri a
13344 val a2
= hookExtSubset (a1
,(uri
,enc
,textDecl
))
13345 val (_
,a3
,q3
) = parseSubset
dtd (c1
,a2
,q1
)
13346 val _
= closeAll q3
13349 handle CantOpenFile(fmsg
,a
) => hookError(a
,(getPos q
,ERR_NO_SUCH_FILE fmsg
))
13351 (*--------------------------------------------------------------------*)
13352 (* Parse the document
type declaration
, the
<!DOCTYPE already read
. *)
13355 (* [28] doctypedecl
::= '<!DOCTYPE
'[Image
] S
Name (S ExternalID
)?
*)
13356 (* S?
('[' (markupdecl | PEReference | S
)* ']' S?
)?
'>' *)
13358 (* print an error
and raise SyntaxState
if no name is found
. *)
13359 (* print an error
and raise SyntaxState
if no final
">" is found
. *)
13360 (* external identifier is found
. *)
13361 (* print an error
if white space is missing
. *)
13363 (* return nothing
. *)
13364 (*--------------------------------------------------------------------*)
13365 (* might
raise: none
*)
13366 (*--------------------------------------------------------------------*)
13367 fun parseDocTypeDecl dtd caq
=
13369 val _
= setHasDtd dtd
13370 val caq1
= skipS caq
13372 val (doc
,caq2
) = parseName caq1
13373 handle NotFound (c
,a
,q
) => let val err
= ERR_EXPECTED(expAName
,[c
])
13374 in raise SyntaxError (c
,hookError(a
,(getPos q
,err
)),q
)
13376 val idx
= Element2Index dtd doc
13378 val (hadS
,caq3
as (_
,_
,q3
)) = skipSmay caq2
13379 val (ext
,(c4
,a4
,q4
)) = let val (extId
,_
,(c4
,a4
,q4
)) = parseExtIdSub dtd caq3
13380 val a5
= if hadS
then a4
13381 else hookError(a4
,(getPos q3
,ERR_MISSING_WHITE
))
13382 in (SOME extId
,(c4
,a5
,q4
))
13384 handle NotFound caq
=> (NONE
,caq
)
13386 val a4
' = hookDocType(a4
,(idx
,ext
))
13387 val (c5
,a5
,q5
) = case c4
13388 of 0wx5B (* #
"[" *) =>
13389 let val caq5
= parseInternalSubset
dtd (a4
',q4
)
13396 | SOME extId
=> let val _
= setExternal dtd
13397 in if !O_VALIDATE
orelse !O_INCLUDE_PARAM_ENTS
13398 then parseExternalSubset
dtd (a5
,q5
) extId
13402 val a7
= checkMultEnum
dtd (a6
,q5
)
13403 val a7
'= checkPreDefined
dtd (a7
,q5
)
13404 val a8
= checkUnparsed dtd a7
'
13406 val (c9
,a9
,q9
) = if c5
=0wx3E (* #
">" *) then getChar(a8
,q5
)
13407 else let val err
= expectedOrEnded(expGt
,LOC_DOC_DECL
) c5
13408 val a9
= hookError(a8
,(getPos q5
,err
))
13409 in recoverDecl
false (c5
,a9
,q5
)
13412 (SOME idx
,(c9
,hookEndDtd(a9
,getPos q9
),q9
))
13414 handle exn
as SyntaxError(c
,a
,q
) =>
13415 let val a1
= if c
=0wx00
then hookError(a
,(getPos q
,ERR_ENDED_BY_EE LOC_DOC_DECL
))
13417 val (c2
,a2
,q2
) = recoverDecl
true (c
,a1
,q
)
13418 in (NONE
,(c2
,hookEndDtd(a2
,getPos q2
),q2
))
13421 (* stop
of ../../Parser
/Parse
/parseDtd
.sml
*)
13422 (* start
of ../../Parser
/Parse
/parseContent
.sml
*)
13423 signature ParseContent
=
13425 (*----------------------------------------------------------------------
13428 val parseName
: UniChar
.Char * AppData
* State
13429 -> UniChar
.Data
* (UniChar
.Char * AppData
* State
)
13431 val openDocument
: Uri
.Uri option
-> AppData
13432 -> Encoding
.Encoding
* HookData
.XmlDecl option
* (UniChar
.Char * AppData
* State
)
13434 val skipCharRef
: AppData
* State
-> (UniChar
.Char * AppData
* State
)
13435 val skipReference
: UniChar
.Char * AppData
* State
-> (UniChar
.Char * AppData
* State
)
13437 val parseComment
: Errors
.Position
-> AppData
* State
-> (UniChar
.Char * AppData
* State
)
13438 val parseProcInstr
: Errors
.Position
-> AppData
* State
-> (UniChar
.Char * AppData
* State
)
13440 val skipTag
: Errors
.Location
-> AppData
* State
-> (UniChar
.Char * AppData
* State
)
13441 val parseSTag
: Dtd
-> Errors
.Position
-> UniChar
.Char * AppData
* State
13442 -> (HookData
.StartTagInfo
* Base
.ElemInfo
) * (UniChar
.Char * AppData
* State
)
13444 val skipDecl
: bool -> UniChar
.Char * AppData
* State
-> UniChar
.Char * AppData
* State
13446 val parseDocTypeDecl
: Dtd
-> (UniChar
.Char * AppData
* State
)
13447 -> int option
* (UniChar
.Char * AppData
* State
)
13448 ----------------------------------------------------------------------*)
13451 val skipBadSection
: UniChar
.Char * AppData
* State
-> (UniChar
.Char * AppData
* State
)
13453 val parseElement
: Dtd
* int list
* State
* (HookData
.StartTagInfo
* Base
.ElemInfo
)
13454 * (UniChar
.Char * AppData
* State
)
13455 -> (int * UniChar
.Data
* Errors
.Position
* Errors
.Position
) option
13456 * (UniChar
.Char * AppData
* State
)
13459 (*--------------------------------------------------------------------------*)
13460 (* Structure
: ParseContent
*)
13462 (* Exceptions raised by functions
in this
structure: *)
13463 (* skipBadSection
: none
*)
13464 (* parseElement
: none
*)
13465 (*--------------------------------------------------------------------------*)
13466 functor ParseContent (structure ParseBase
: ParseBase
)
13469 structure ParseDtd
= ParseDtd (structure ParseBase
= ParseBase
)
13472 Base Errors UniChar UniClasses UtilList
13475 val THIS_MODULE
= "ParseContent"
13476 val DATA_BUFSIZE
= 1024
13477 val dataBuffer
= Array
.array(DATA_BUFSIZE
,0w0
:UniChar
.Char)
13479 (*--------------------------------------------------------------------*)
13480 (* skip a cdata section
, the initial
"<![" already consumed
. The first
*)
13481 (* arg is the
type of section to be skipped
. cf
. 2.5: *)
13483 (* [18] CDSect
::= CDStart CData CDEnd
*)
13484 (* [19] CDStart
::= '<![CDATA
[' *)
13485 (* [20] CData
::= (Char* - (Char* ']]>' Char* )) [[ *)
13486 (* [21] CDEnd
::= ']]>' *)
13488 (* don
't care abeout whether
"CDATA[" is present
. just skip until the
*)
13489 (* next
"]]>" or entity
end. *)
13491 (* return the remaining char
and state
. *)
13492 (*--------------------------------------------------------------------*)
13493 (* might
raise: none
*)
13494 (*--------------------------------------------------------------------*)
13495 fun skipBadSection caq
=
13496 let(*--------------------------------------------------------------*)
13497 (* for a sequence
of "]"s
, check whether the last two are
*)
13498 (* followed by a
">" *)
13499 (*--------------------------------------------------------------*)
13501 let val (c1
,a1
,q1
) = getChar aq
13503 of 0wx3E (* #
">" *) => getChar(a1
,q1
)
13504 |
0wx5D (* #
"]" *) => checkEnd(a1
,q1
)
13505 | _
=> doit(c1
,a1
,q1
)
13509 of 0wx00
=> let val a1
= hookError(a
,(getPos q
,ERR_ENDED_BY_EE LOC_CDATA
))
13512 |
0wx5D (* #
"]" *) => let val (c1
,a1
,q1
) = getChar(a
,q
)
13513 in if c1
=0wx5D (* #
"]" *) then checkEnd(a1
,q1
)
13514 else doit (c1
,a1
,q1
)
13516 | _
=> doit (getChar(a
,q
))
13520 (*--------------------------------------------------------------------*)
13521 (* parse a cdata section
, the initial
"<![CDATA[" already consumed
. *)
13524 (* [18] CDSect
::= CDStart CData CDEnd
*)
13525 (* [19] CDStart
::= '<![CDATA
[' *)
13526 (* [20] CData
::= (Char* - (Char* ']]>' Char* )) [[ *)
13527 (* [21] CDEnd
::= ']]>' *)
13529 (* print an error
and finish
if an entity
end is found
. *)
13531 (* return the data
as a
Vector option
and the next char
& state
. *)
13532 (*--------------------------------------------------------------------*)
13533 (* might
raise: none
*)
13534 (*--------------------------------------------------------------------*)
13535 fun parseCDataSection
' (aq
as (_
,q
)) =
13537 (*--------------------------------------------------------------*)
13538 (* for a sequence
of "]"s
, check whether the last two are
*)
13539 (* followed by a
">" *)
13540 (*--------------------------------------------------------------*)
13541 fun doEnd (text
,q0
,q1
) (a2
,q2
) =
13542 let val (c3
,a3
,q3
) = getChar (a2
,q2
)
13545 let val a4
= hookError(a3
,(getPos q3
,ERR_ENDED_BY_EE LOC_CDATA
))
13546 in (0wx5D
::text
,getPos q2
,(c3
,a4
,q3
))
13548 |
0wx3E
=> (* #
">" *) (text
,getPos q0
,getChar(a3
,q3
))
13549 |
0wx5D
=> doEnd (0wx5D
::text
,q1
,q2
) (a3
,q3
)
13550 | _
=> doit (c3
::0wx5D
::0wx5D
::text
) (a3
,q3
)
13552 and doBrack (text
,q0
) (a1
,q1
) =
13553 let val (c2
,a2
,q2
) = getChar(a1
,q1
)
13556 let val a3
= hookError(a2
,(getPos q2
,ERR_ENDED_BY_EE LOC_CDATA
))
13557 in (0wx5D
::text
,getPos q1
,(c2
,a3
,q2
))
13559 |
0wx5D (* #
"]" *) => doEnd (text
,q0
,q1
) (a2
,q2
)
13560 | _
=> doit (c2
::0wx5D
::text
) (a2
,q2
)
13562 and doit
text (a
,q
) =
13563 let val (c1
,a1
,q1
) = getChar(a
,q
)
13566 let val a2
= hookError(a1
,(getPos q1
,ERR_ENDED_BY_EE LOC_CDATA
))
13567 in (text
,getPos q
,(c1
,a2
,q1
))
13569 |
0wx5D (* #
"]" *) => doBrack (text
,q
) (a1
,q1
)
13570 | _
=> doit (c1
::text
) (a1
,q1
)
13572 val (c1
,a1
,q1
) = getChar aq
13573 val startPos
= getPos q1
13574 val (cs
,endPos
,(c2
,a2
,q2
)) =
13577 let val a2
= hookError(a1
,(getPos q1
,ERR_ENDED_BY_EE LOC_CDATA
))
13578 in (nil
,getPos q
,(c1
,a2
,q1
))
13580 |
0wx5D (* #
"]" *) => doBrack (nil
,q
) (a1
,q1
)
13581 | _
=> doit
[c1
] (a1
,q1
)
13582 val text
= Data2Vector(rev cs
)
13583 val a3
= hookCData(a1
,((startPos
,endPos
),text
))
13586 (*--------------------------------------------------------------------*)
13587 (* parse a cdata section
, the initial
"<![" already consumed
. *)
13590 (* [18] CDSect
::= CDStart CData CDEnd
*)
13591 (* [19] CDStart
::= '<![CDATA
[' *)
13592 (* [20] CData
::= (Char* - (Char* ']]>' Char* )) [[ *)
13593 (* [21] CDEnd
::= ']]>' *)
13595 (* print an error
and skip the section
if no name or a name other
*)
13596 (* than CDATA comes first
, or no
'[' follows the name
. *)
13598 (* return the text
of the section together
with the remaining state
. *)
13599 (*--------------------------------------------------------------------*)
13600 (* might
raise: none
*)
13601 (*--------------------------------------------------------------------*)
13602 fun parseCDataSection startPos aq
=
13604 val caq0
as (_
,_
,q0
) = (getChar aq
)
13605 val (name
,(c1
,a1
,q1
)) = parseName caq0
13606 handle NotFound (c
,a
,q
) => let val err
= expectedOrEnded(expCdata
,LOC_CDATA
) c
13607 in raise SyntaxError(c
,hookError(a
,(getPos q
,err
)),q
)
13610 val _
= if name
= [0wx43
,0wx44
,0wx41
,0wx54
,0wx41
] (* "CDATA" *) then ()
13611 else let val err
= ERR_EXPECTED(expCdata
,name
)
13612 in raise SyntaxError(c1
,hookError(a1
,(getPos q0
,err
)),q1
)
13615 val _
= if c1
=0wx5B (* #
"[" *) then ()
13616 else let val err
= expectedOrEnded(expLbrack
,LOC_CDATA
) c1
13617 in raise SyntaxError(c1
,hookError(a1
,(getPos q1
,err
)),q1
)
13620 parseCDataSection
'(a1
,q1
)
13622 handle SyntaxError caq
=> skipBadSection caq
13624 (*--------------------------------------------------------------------*)
13625 (* parse element or empty content
. The second arg holds the unique
*)
13626 (* number
of the element
's first characters
's entity
, the index
of *)
13627 (* the current element
, and the dfa for its content
. Cf
. 3: *)
13629 (* [39] element
::= EmptyElemTag
*)
13630 (* | STag content ETag
*)
13632 (* Well
-Formedness Constraint
: Element Type Match
*)
13633 (* The Name
in an element
's
end-tag must match the element
type in *)
13634 (* the start
-tag
. *)
13636 (* Validity Constraint
: Element Valid
*)
13637 (* An element is valid
if there is a declaration matching
*)
13638 (* elementdecl
where the Name matches the element
type, and one
of *)
13639 (* the following holds
: *)
13641 (* 1. The declaration matches EMPTY
and the element has no content
. *)
13642 (* 2. The declaration matches children
and the sequence
of child
*)
13643 (* elements belongs to the language generated by the regular
*)
13644 (* expression
in the content model
, with optional white space
*)
13645 (* (characters matching the nonterminal S
) between each pair
of *)
13646 (* child elements
. *)
13650 (* [43] content
::= (element | CharData | Reference | CDSect | PI
*)
13653 (* The ampersand
character (&) and the left angle
bracket (<) may
*)
13654 (* appear
in their literal form only when used
as markup delimiters
,*)
13655 (* or within a comment
, a processing instruction
, or a CDATA
*)
13656 (* section
... If they are needed elsewhere
, they must be escaped
*)
13657 (* using either numeric character references or the strings
"&" *)
13658 (* and "<" respectively
... *)
13660 (* consume the content
of the element
, accumulating it via the user
*)
13661 (* data
functions (parameter a
in subfunctions
). trace the content
*)
13662 (* model
of the element
with a dfa transitions on a dfa
state (para
- *)
13663 (* meter p
in subfunctions
). finish at the first
end-tag
, whether
*)
13664 (* matching or not
, or at the document
end. *)
13666 (* handle all syntax
and other recoverable errors from subfunctions
*)
13667 (* and try to continue
. *)
13669 (* return the accumulated user data
and the next char
and state
. *)
13670 (*--------------------------------------------------------------------*)
13671 (* might
raise: none
*)
13672 (*--------------------------------------------------------------------*)
13673 fun parseElementContent
dtd (openElems
,startEnt
,curr
,dfa
,ext
,mt
) caq
=
13675 (*--------------------------------------------------------------*)
13676 (* check whether the dfa allows a transition
/an
end tag here
. *)
13677 (* print an error
if not
. After a transition return the new
*)
13679 (*--------------------------------------------------------------*)
13680 fun fin_elem (a
,pos
,dfa
,p
) =
13681 if dfaFinal(dfa
,p
) then a
13682 else hookError(a
,(pos
,ERR_ENDED_EARLY(Index2Element dtd curr
)))
13683 fun trans_elem (a
,q
,dfa
,p
,el
) =
13684 let val p1
= dfaTrans(dfa
,p
,el
)
13685 in if p1
<>dfaError
then (p1
,a
)
13686 else let val err
= ERR_BAD_ELEM(Index2Element dtd curr
,Index2Element dtd el
)
13687 in (p1
,hookError(a
,(getPos q
,err
)))
13691 (*--------------------------------------------------------------*)
13692 (* consume all white space
and skip all data until the next
"<" *)
13693 (* or
"&". print an error for each sequence
of data encountered
.*)
13695 (* add the white space
as data to the user data
. *)
13696 (* return the next char
and state
. *)
13697 (*--------------------------------------------------------------*)
13698 fun do_char_elem (c0
,a0
,q0
) =
13700 (*--------------------------------------------------------------*)
13701 (* read data characters until the next
"<", "&" or entity
end. *)
13702 (* add the data to the user data when an error occurs or no
*)
13703 (* more data follows
. *)
13705 (* return the modified user data
with the next char
and state
. *)
13706 (*--------------------------------------------------------------*)
13707 fun data_hook(a
,q
,cs
) =
13709 else hookData(a
,((getPos q0
,getPos q
),Data2Vector(rev cs
),true))
13710 fun after_error (caq
as (c
,a
,q
)) =
13713 |
0wx26 (* #
"&" *) => caq
13714 |
0wx3C (* #
"<" *) => caq
13715 | _
=> after_error(getChar(a
,q
))
13716 fun do_data (yet
,aq
as (_
,q
)) =
13717 let val (c1
,a1
,q1
) = getChar aq
13719 of 0wx00
=> (c1
,data_hook(a1
,q
,yet
),q1
)
13720 |
0wx26 (* #
"&" *) => (c1
,data_hook(a1
,q
,yet
),q1
)
13721 |
0wx3C (* #
"<" *) => (c1
,data_hook(a1
,q
,yet
),q1
)
13723 if isS c1
then do_data (c1
::yet
,(a1
,q1
))
13724 else let val a2
= data_hook(a1
,q
,yet
)
13725 val err
= ERR_ELEM_CONTENT(IT_DATA nil
)
13726 val a3
= hookError(a2
,(getPos q1
,err
))
13727 in after_error (getChar(a3
,q1
))
13732 let val a1
= if not (ext
andalso standsAlone dtd
) then a0
13733 else let val err
= ERR_STANDALONE_ELEM(Index2Element dtd curr
)
13734 val _
= setStandAlone
dtd (not (!O_ERROR_MINIMIZE
))
13735 in hookError(a0
,(getPos q0
,err
))
13737 in do_data ([c0
],(a1
,q0
))
13739 else let val a1
= hookError(a0
,(getPos q0
,ERR_ELEM_CONTENT(IT_DATA nil
)))
13740 in after_error(getChar(a1
,q0
))
13743 (*--------------------------------------------------------------*)
13744 (* consume a reference
, handling errors by ignoring them
. *)
13745 (*--------------------------------------------------------------*)
13746 fun do_ref (q
,(c1
,a1
,q1
)) =
13747 if c1
=0wx23 (* #
"#" *)
13748 (*------------------------------------------------------*)
13749 (* it
's a character reference
. *)
13750 (*------------------------------------------------------*)
13751 then let val err
= ERR_ELEM_CONTENT IT_CHAR_REF
13752 val a2
= hookError(a1
,(getPos q
,err
))
13753 in skipCharRef(a2
,q1
)
13755 (*---------------------------------------------------------*)
13756 (* it
's a general entity reference
. *)
13757 (*---------------------------------------------------------*)
13758 else let val ((id
,ent
),(a2
,q2
)) = parseGenRef
dtd (c1
,a1
,q1
)
13761 let val a3
= hookGenRef(a2
,((getPos q
,getPos q2
),id
,ent
,false))
13762 in (getChar(a3
,q2
))
13764 |
GE_INTERN(_
,rep
) =>
13766 val q3
= pushIntern(q2
,id
,false,rep
)
13767 val a3
= hookGenRef(a2
,((getPos q
,getPos q2
),id
,ent
,true))
13768 in (getChar(a3
,q3
))
13771 if !O_VALIDATE
orelse !O_INCLUDE_EXT_PARSED
13774 val a3
= hookGenRef(a2
,((getPos q
,getPos q2
),id
,ent
,true))
13775 val caq4
= #
3(openExtern (id
,false,resolveExtId ext
) (a3
,q2
))
13776 handle CantOpenFile(fmsg
,a
)
13777 => let val err
= ERR_NO_SUCH_FILE fmsg
13778 val a2
= hookError(a
,(getPos q2
,err
))
13779 val a3
= hookEntEnd(a2
,getPos q2
)
13780 in (getChar(a3
,q2
))
13784 else let val a3
= hookGenRef(a2
,((getPos q
,getPos q2
),id
,ent
,false))
13788 raise InternalError
13789 (THIS_MODULE
,"parseElementContent",
13790 "parseGenRef returned GE_UNPARSED")
13792 (*-------------------------------------------------------*)
13793 (* handle any errors
in references by ignoring them
. *)
13794 (*-------------------------------------------------------*)
13795 handle SyntaxError caq
=> caq
13796 | NoSuchEntity aq
=> getChar aq
13798 (*--------------------------------------------------------------*)
13799 (* handle an
end-tag
. finish the element
in the user data
and *)
13802 (* print an error
if the element
's content is not yet finished
. *)
13803 (* print an error
if the
end-tag is for another element
. *)
13804 (* print an error
if the element
's first character was not
in *)
13805 (* the same entity
. *)
13806 (*--------------------------------------------------------------*)
13807 and do_etag (p
,etag
as (elem
,space
,startPos
,endPos
),(c
,a
,q
)) =
13809 fun checkNesting a
=
13810 if getEntId q
=startEnt
then a
13811 else hookError(a
,(startPos
,ERR_ELEM_ENT_NESTING(Index2Element dtd curr
)))
13813 if elem
=curr
then let val a1
= fin_elem (a
,startPos
,dfa
,p
)
13814 val a2
= checkNesting a1
13815 val a3
= hookEndTag
13816 (a2
,((startPos
,endPos
),curr
,SOME(elem
,space
)))
13819 else if member elem openElems
13820 then let val err
= ERR_OMITTED_END_TAG(Index2Element dtd curr
)
13821 val a1
= hookError(a
,(startPos
,err
))
13822 val a2
= fin_elem (a1
,startPos
,dfa
,p
)
13823 val a3
= hookEndTag(a2
,((startPos
,endPos
),curr
,NONE
))
13824 in (SOME etag
,(c
,a3
,q
))
13826 else if dfaFinal(dfa
,p
)
13827 then let val err
= ERR_ELEM_TYPE_MATCH(Index2Element dtd curr
,
13828 Index2Element dtd elem
)
13829 val a1
= hookError(a
,(startPos
,err
))
13830 val a2
= checkNesting a1
13831 val a3
= hookEndTag(a2
,((startPos
,endPos
),curr
,SOME(elem
,space
)))
13834 else let val err
= ERR_IGNORED_END_TAG(Index2Element dtd curr
,
13835 Index2Element dtd elem
)
13836 val a1
= hookError(a
,(startPos
,err
))
13837 in do_elem(p
,(c
,a1
,q
))
13841 (*--------------------------------------------------------------*)
13842 (* handle a declaration
, proc
. instr or tag
. *)
13843 (*--------------------------------------------------------------*)
13844 and do_lt (p
,q
,(c1
,a1
,q1
)) =
13846 of 0wx21 (* #
"!" *) =>
13847 (*------------------------------------------------------*)
13848 (* its a declaration
, cdata section or comment
. *)
13849 (* Only comments are valid
. *)
13850 (*------------------------------------------------------*)
13851 let val (c2
,a2
,q2
) = getChar(a1
,q1
)
13854 of 0wx2D (* #
"-" *) =>
13855 let val (c3
,a3
,q3
) = getChar(a2
,q2
)
13856 in if c3
=0wx2D
then parseComment (getPos q
) (a3
,q3
)
13857 else let val err
= ERR_EXPECTED(expDash
,[c3
])
13858 val a4
= hookError(a3
,(getPos q3
,err
))
13859 in recoverDecl
false (c3
,a4
,q3
)
13862 |
0wx5B (* #
"[" *) =>
13863 let val a3
= hookError(a2
,(getPos q2
,ERR_ELEM_CONTENT IT_CDATA
))
13864 in skipBadSection (getChar(a3
,q2
))
13866 | _
=> (c2
,hookError(a2
,(getPos q2
,ERR_EXPECTED(expDash
,[c2
]))),q2
)
13869 |
0wx2F (* #
"/" *) =>
13870 (let val (elem
,space
,endPos
,caq2
) = parseETag
dtd (a1
,q1
)
13871 in do_etag (p
,(elem
,space
,getPos q
,endPos
),caq2
)
13873 handle SyntaxError caq
=> do_elem(p
,caq
))
13874 |
0wx3F (* #
"?" *) => do_elem (p
,parseProcInstr (getPos q
) (a1
,q1
))
13876 (*------------------------------------------------------*)
13877 (* it
's a start tag
. the recursive call to parseElement
*)
13878 (* might return an
end-tag that has to be consumed
. *)
13879 (*------------------------------------------------------*)
13881 let val (p1
,(opt
,caq2
)) =
13882 (let val (stag
as ((_
,elem
,_
,_
,_
),_
),(c2
,a2
,q2
)) =
13883 parseSTag
dtd (getPos q
) (c1
,a1
,q1
)
13884 val (p1
,a3
) = trans_elem (a2
,q1
,dfa
,p
,elem
)
13885 in (p1
,parseElement (dtd
,curr
::openElems
,q
,stag
,(c2
,a3
,q2
)))
13887 handle SyntaxError caq
=> (p
,(NONE
,caq
))
13889 of NONE
=> do_elem (p1
,caq2
)
13890 | SOME etag
=> do_etag (p1
,etag
,caq2
)
13892 else let val err
= ERR_FORBIDDEN_HERE(IT_CHAR
0wx3C
,LOC_CONTENT
)
13893 val a2
= hookError(a1
,(getPos q
,err
))
13894 in do_elem (p
,(c1
,a2
,q1
))
13897 (*--------------------------------------------------------------*)
13898 (* do element content
. handle the document
end by printing an
*)
13899 (* error
and finishing like
with an
end-tag
. *)
13900 (*--------------------------------------------------------------*)
13901 and do_elem (p
,(c
,a
,q
)) =
13903 of 0wx00
=> if isSpecial q
13904 then let val err
= ERR_OMITTED_END_TAG(Index2Element dtd curr
)
13905 val a1
= hookError(a
,(getPos q
,err
))
13907 val a2
= fin_elem (a1
,pos
,dfa
,p
)
13908 val a3
= hookEndTag(a2
,((pos
,pos
),curr
,NONE
))
13911 else let val a1
= hookEntEnd(a
,getPos q
)
13912 in do_elem (p
,getChar(a1
,q
))
13914 |
0wx26 (* #
"&" *) => do_elem (p
,do_ref (q
,getChar(a
,q
)))
13915 |
0wx3C (* #
"<" *) => do_lt (p
,q
,getChar(a
,q
))
13916 | _
=> do_elem (p
,do_char_elem (c
,a
,q
))
13918 (*--------------------------------------------------------------*)
13919 (* do empty content
. if the first thing to come is the current
*)
13920 (* element
's
end-tag
, finish it
. Otherwise print an error
and *)
13921 (* continue
as for element content
. *)
13922 (*--------------------------------------------------------------*)
13923 and do_empty (c
,a
,q
) =
13924 if c
<>0wx3C (* #
"<" *)
13925 then let val a1
= hookError(a
,(getPos q
,ERR_NONEMPTY(Index2Element dtd curr
)))
13926 in do_elem (dfaInitial
,(c
,a1
,q
))
13929 let val (c1
,a1
,q1
) = getChar(a
,q
)
13930 in if c1
<>0wx2F (* #
"/" *)
13931 then let val err
= ERR_NONEMPTY(Index2Element dtd curr
)
13932 val a2
= hookError(a1
,(getPos q
,err
))
13933 in do_lt (dfaInitial
,q
,(c1
,a2
,q1
))
13935 else let val (elem
,space
,endPos
,caq2
) = parseETag
dtd (a1
,q1
)
13936 in do_etag (dfaInitial
,(elem
,space
,getPos q
,endPos
),caq2
)
13938 handle SyntaxError caq
=> do_elem (dfaInitial
,caq
)
13941 in if mt
then do_empty caq
13942 else do_elem (dfaInitial
,caq
)
13945 (*--------------------------------------------------------------------*)
13946 (* parse mixed or any content
. The second arg holds the unique number
*)
13947 (* of the element
's first characters
's entity
, the idx
of the current
*)
13948 (* element
, and a function for validating child elements
. Cf
. 3: *)
13950 (* [39] element
::= EmptyElemTag
*)
13951 (* | STag content ETag
*)
13953 (* Well
-Formedness Constraint
: Element Type Match
*)
13954 (* The Name
in an element
's
end-tag must match the element
type in *)
13955 (* the start
-tag
. *)
13957 (* Validity Constraint
: Element Valid
*)
13958 (* An element is valid
if there is a declaration matching
*)
13959 (* elementdecl
where the Name matches the element
type, and one
of *)
13960 (* the following holds
: *)
13962 (* 3. The declaration matches Mixed
and the content consists
of *)
13963 (* character data
and child elements whose types match names
in *)
13964 (* the content model
. *)
13965 (* 4. The declaration matches ANY
, and the types
of any child
*)
13966 (* elements have been declared
. *)
13970 (* [43] content
::= (element | CharData | Reference | CDSect | PI
*)
13973 (* The ampersand
character (&) and the left angle
bracket (<) may
*)
13974 (* appear
in their literal form only when used
as markup delimiters
,*)
13975 (* or within a comment
, a processing instruction
, or a CDATA
*)
13976 (* section
... If they are needed elsewhere
, they must be escaped
*)
13977 (* using either numeric character references or the strings
"&" *)
13978 (* and "<" respectively
. The right angle
bracket (>) may be
*)
13979 (* represented using the
string ">", and must
, for compatibility
,*)
13980 (* be escaped using
">" or a character reference when it appears
*)
13981 (* in the
string "]]>" in content
, when that
string is not marking
*)
13982 (* the
end of a CDATA section
. *)
13984 (* consume the content
of the element
, accumulating it via the user
*)
13985 (* data
functions (parameter a
in subfunctions
). for each child
, *)
13986 (* check whether it was specified
in the element
's Mixed content
*)
13987 (* specification (validate
). finish at the first
end-tag
, whether
*)
13988 (* matching or not
, or at the document
end. *)
13990 (* handle all syntax
and other recoverable errors from subfunctions
*)
13991 (* and try to continue
. *)
13993 (* return the accumulated user data
and the next char
and state
. *)
13994 (*--------------------------------------------------------------------*)
13995 (* might
raise: none
*)
13996 (*--------------------------------------------------------------------*)
13997 and parseMixedContent
dtd (openElems
,startEnt
,curr
,validate
) caq
=
13999 (*--------------------------------------------------------------*)
14000 (* read data characters until the next
"<", "&" or entity
end. *)
14001 (* add the data to the user data when an error occurs or no
*)
14002 (* more data follows
. *)
14004 (* return the modified user data
with the next char
and state
. *)
14005 (*--------------------------------------------------------------*)
14006 fun do_data (br
,(c0
,a0
,q0
)) =
14008 val pos0
= ref (getPos q0
)
14009 val _
= Array
.update(dataBuffer
,0,c0
)
14011 fun data_hook (i
,(a
,q
)) =
14012 hookData(a
,((!pos0
,getPos q
),Array
.extract(dataBuffer
,0,SOME i
),false))
14013 fun takeOne (c
,qE
,i
,aq
as (a
,q
)) =
14014 if i
<DATA_BUFSIZE
then (i
+1,aq
) before Array
.update(dataBuffer
,i
,c
)
14015 else let val a1
= data_hook(i
,(a
,qE
))
14016 val _
= pos0
:= getPos q
14017 val _
= Array
.update(dataBuffer
,0,c
)
14020 fun do_br (n
,(i
,aq
as (_
,q
))) =
14021 let val (c1
,a1
,q1
) = getChar aq
14023 of 0wx00
=> (c1
,data_hook(i
,(a1
,q
)),q1
)
14024 |
0wx26 (* #
"&" *) => (c1
,data_hook(i
,(a1
,q
)),q1
)
14025 |
0wx3C (* #
"<" *) => (c1
,data_hook(i
,(a1
,q
)),q1
)
14026 |
0wx5D (* #
"]" *) => do_br (n
+1,takeOne(c1
,q
,i
,(a1
,q1
)))
14027 |
0wx3E (* #
">" *) =>
14028 let val a2
= if n
=1 then a1
14029 else hookError(a1
,(getPos q1
,ERR_MUST_ESCAPE c1
))
14030 in doit (takeOne(c1
,q
,i
,(a2
,q1
)))
14032 | _
=> doit (takeOne(c1
,q
,i
,(a1
,q1
)))
14034 and doit (i
,aq
as (_
,q
)) =
14035 let val (c1
,a1
,q1
) = getChar aq
14037 of 0wx00
=> (c1
,data_hook(i
,(a1
,q
)),q1
)
14038 |
0wx26 (* #
"&" *) => (c1
,data_hook(i
,(a1
,q
)),q1
)
14039 |
0wx3C (* #
"<" *) => (c1
,data_hook(i
,(a1
,q
)),q1
)
14040 |
0wx5D (* #
"]" *) => if !O_COMPATIBILITY
14041 then do_br (1,takeOne(c1
,q
,i
,(a1
,q1
)))
14042 else doit (takeOne(c1
,q
,i
,(a1
,q1
)))
14043 | _
=> doit (takeOne(c1
,q
,i
,(a1
,q1
)))
14046 if br
then do_br (1,(1,(a0
,q0
)))
14047 else doit (1,(a0
,q0
))
14050 fun do_data (br
,(c0
,a0
,q0
)) =
14052 fun data_hook (yet
,(a
,q
)) =
14053 hookData(a
,((getPos q0
,getPos q
),Data2Vector(rev yet
),false))
14054 fun do_br (n
,yet
,aq
as (_
,q
)) =
14055 let val (c1
,a1
,q1
) = getChar aq
14057 of 0wx00
=> (c1
,data_hook(yet
,(a1
,q
)),q1
)
14058 |
0wx26 (* #
"&" *) => (c1
,data_hook(yet
,(a1
,q
)),q1
)
14059 |
0wx3C (* #
"<" *) => (c1
,data_hook(yet
,(a1
,q
)),q1
)
14060 |
0wx5D (* #
"]" *) => do_br (n
+1,c1
::yet
,(a1
,q1
))
14061 |
0wx3E (* #
">" *) =>
14062 let val a2
= if n
=1 then a1
14063 else hookError(a1
,(getPos q1
,ERR_MUST_ESCAPE c1
))
14064 in doit (c1
::yet
,(a2
,q1
))
14066 | _
=> doit (c1
::yet
,(a1
,q1
))
14068 and doit (yet
,aq
as (_
,q
)) =
14069 let val (c1
,a1
,q1
) = getChar aq
14071 of 0wx00
=> (c1
,data_hook(yet
,(a1
,q
)),q1
)
14072 |
0wx26 (* #
"&" *) => (c1
,data_hook(yet
,(a1
,q
)),q1
)
14073 |
0wx3C (* #
"<" *) => (c1
,data_hook(yet
,(a1
,q
)),q1
)
14074 |
0wx5D (* #
"]" *) => if !O_COMPATIBILITY
14075 then do_br (1,c1
::yet
,(a1
,q1
))
14076 else doit (c1
::yet
,(a1
,q1
))
14077 | _
=> doit (c1
::yet
,(a1
,q1
))
14080 if br
then do_br (1,[0wx5D
],(a0
,q0
))
14081 else doit ([c0
],(a0
,q0
))
14085 (*--------------------------------------------------------------*)
14086 (* consume a reference
, handling errors by ignoring them
. *)
14087 (*--------------------------------------------------------------*)
14088 fun do_ref (q0
,(c
,a
,q
)) =
14089 if c
=0wx23 (* #
"#" *)
14090 (*------------------------------------------------------*)
14091 (* it
's a character reference
. *)
14092 (*------------------------------------------------------*)
14093 then let val (cs
,(ch
,a1
,q1
)) = parseCharRefLit
[0wx23
,0wx26
] (a
,q
)
14094 val cv
= Data2Vector(rev cs
)
14095 val a2
= hookCharRef(a1
,((getPos q0
,getPos q1
),ch
,cv
))
14098 handle SyntaxError caq
=> caq
14099 | NoSuchChar aq
=> getChar aq
14100 (*---------------------------------------------------------*)
14101 (* it
's a general entity reference
. *)
14102 (*---------------------------------------------------------*)
14103 else let val ((id
,ent
),(a1
,q1
)) = parseGenRef
dtd (c
,a
,q
)
14106 let val a2
= hookGenRef(a1
,((getPos q0
,getPos q1
),id
,ent
,false))
14109 |
GE_INTERN(_
,rep
) =>
14111 val q2
= pushIntern(q1
,id
,false,rep
)
14112 val a2
= hookGenRef(a1
,((getPos q0
,getPos q1
),id
,ent
,true))
14116 if !O_VALIDATE
orelse !O_INCLUDE_EXT_PARSED
14119 val a2
= hookGenRef(a1
,((getPos q0
,getPos q1
),id
,ent
,true))
14120 val caq3
= #
3(openExtern (id
,false,resolveExtId ext
) (a2
,q1
))
14121 handle CantOpenFile(fmsg
,a
)
14122 => let val err
= ERR_NO_SUCH_FILE fmsg
14123 val a1
= hookError(a
,(getPos q1
,err
))
14124 val a2
= hookEntEnd(a1
,getPos q1
)
14125 in (getChar(a2
,q1
))
14129 else let val a2
= hookGenRef(a1
,((getPos q0
,getPos q1
),id
,ent
,false))
14133 raise InternalError
14134 ("THIS_MODULE","parseMixedContent",
14135 "parseGenRef returned GE_UNPARSED")
14137 (*-------------------------------------------------------*)
14138 (* handle any errors
in references by ignoring them
. *)
14139 (*-------------------------------------------------------*)
14140 handle SyntaxError caq
=> caq
14141 | NoSuchEntity aq
=> getChar aq
14143 (*--------------------------------------------------------------*)
14144 (* handle an
end-tag
. finish the element
in the user data
and *)
14147 (* print an error
if the element
's content is not yet finished
. *)
14148 (* print an error
if the
end-tag is for another element
. *)
14149 (* print an error
if the element
's first character was not
in *)
14150 (* the same entity
. *)
14151 (*--------------------------------------------------------------*)
14152 and do_etag (etag
as (elem
,space
,startPos
,endPos
),(c
,a
,q
)) =
14154 fun checkNesting a
=
14155 if getEntId q
=startEnt
then a
14156 else hookError(a
,(startPos
,ERR_ELEM_ENT_NESTING(Index2Element dtd curr
)))
14158 if elem
=curr
then let val a1
= checkNesting a
14159 val a2
= hookEndTag
14160 (a1
,((startPos
,endPos
),curr
,SOME(elem
,space
)))
14163 else if member elem openElems
14164 then let val err
= ERR_OMITTED_END_TAG(Index2Element dtd curr
)
14165 val a1
= hookError(a
,(startPos
,err
))
14166 val a2
= hookEndTag(a1
,((startPos
,endPos
),curr
,NONE
))
14167 in (SOME etag
,(c
,a2
,q
))
14169 else let val err
= ERR_ELEM_TYPE_MATCH(Index2Element dtd curr
,
14170 Index2Element dtd elem
)
14171 val a1
= hookError(a
,(startPos
,err
))
14172 val a2
= checkNesting a1
14173 val a3
= hookEndTag(a2
,((startPos
,endPos
),curr
,SOME(elem
,space
)))
14178 (*--------------------------------------------------------------*)
14179 (* handle a declaration
, proc
. instr or tag
. If it is an
end- *)
14180 (* tag
, finish the element
in the user data
and return
. *)
14182 (* print an error
if the element
's content is not yet finished
. *)
14183 (* print an error
if the
end-tag is for another element
. *)
14184 (* print an error
if the element
's first character was not
in *)
14185 (* the same entity
. *)
14186 (*--------------------------------------------------------------*)
14187 and do_lt (q
,(c1
,a1
,q1
)) =
14189 of 0wx21 (* #
"!" *) =>
14190 (*------------------------------------------------------*)
14191 (* its a declaration
, cdata section or comment
. *)
14192 (* Only comments
and cdata sections are valid
. *)
14193 (*------------------------------------------------------*)
14194 let val (c2
,a2
,q2
) = getChar(a1
,q1
)
14197 of 0wx2D (* #
"-" *) =>
14198 let val (c3
,a3
,q3
) = getChar(a2
,q2
)
14199 in if c3
=0wx2D
then parseComment (getPos q
) (a3
,q3
)
14200 else let val err
= ERR_EXPECTED(expDash
,[c3
])
14201 val a4
= hookError(a3
,(getPos q3
,err
))
14202 in recoverDecl
false (c3
,a4
,q3
)
14205 |
0wx5B (* #
"[" *) => parseCDataSection (getPos q
) (a2
,q2
)
14207 (c2
,hookError(a2
,(getPos q2
,ERR_EXPECTED(expDashLbrack
,[c2
]))),q2
)
14210 |
0wx2F (* #
"/" *) =>
14211 (let val (elem
,space
,endPos
,caq2
) = parseETag
dtd (a1
,q1
)
14212 in do_etag ((elem
,space
,getPos q
,endPos
),caq2
)
14214 handle SyntaxError caq
=> do_mixed caq
)
14215 |
0wx3F (* #
"?" *) => do_mixed (parseProcInstr (getPos q
) (a1
,q1
))
14217 (*------------------------------------------------------*)
14218 (* it
's a start tag
. the recursive call to parseElement
*)
14219 (* might return an
end-tag that has to be consumed
. *)
14220 (*------------------------------------------------------*)
14222 let val (opt
,caq2
) =
14223 (let val (stag
as ((_
,elem
,_
,_
,_
),_
),(c2
,a2
,q2
)) =
14224 parseSTag
dtd (getPos q
) (c1
,a1
,q1
)
14225 val a3
= validate (a2
,q1
) elem
14226 in parseElement (dtd
,curr
::openElems
,q
,stag
,(c2
,a3
,q2
))
14228 handle SyntaxError caq
=> (NONE
,caq
))
14230 of NONE
=> do_mixed caq2
14231 | SOME etag
=> do_etag (etag
,caq2
)
14233 else let val err
= ERR_FORBIDDEN_HERE(IT_CHAR
0wx3C
,LOC_CONTENT
)
14234 val a2
= hookError(a1
,(getPos q
,err
))
14235 in do_mixed (c1
,a2
,q1
)
14238 (*--------------------------------------------------------------*)
14239 (* do mixed content
. handle the document
end by printing an
*)
14240 (* error
and finishing like
with an
end-tag
. *)
14241 (*--------------------------------------------------------------*)
14242 and do_mixed (c
,a
,q
) =
14244 of 0wx00
=> if isSpecial q
14245 then let val err
= ERR_OMITTED_END_TAG(Index2Element dtd curr
)
14246 val a1
= hookError(a
,(getPos q
,err
))
14248 val a2
= hookEndTag(a1
,((pos
,pos
),curr
,NONE
))
14251 else let val a1
= hookEntEnd(a
,getPos q
)
14252 in do_mixed (getChar(a1
,q
))
14254 |
0wx26 (* #
"&" *) => do_mixed (do_ref (q
,getChar(a
,q
)))
14255 |
0wx3C (* #
"<" *) => do_lt (q
,getChar(a
,q
))
14256 |
0wx5D
=> do_mixed (do_data (!O_COMPATIBILITY
,(c
,a
,q
)))
14257 | _
=> do_mixed (do_data (false,(c
,a
,q
)))
14262 (*--------------------------------------------------------------------*)
14263 (* parse an element
, the start tag already read
. the second arg holds
*)
14264 (* the number
of the entity
of the start
-tag
's first char
, and the
*)
14265 (* start
-tag information
. The
1st arg is the start value for the user
*)
14268 (* [39] element
::= EmptyElemTag
*)
14269 (* | STag content ETag
*)
14272 (* Empty
-element tags may be used for any element which has no
*)
14273 (* content
, whether or not it is declared using the keyword EMPTY
. *)
14274 (* For interoperability
, the empty
-element tag must be used
, and *)
14275 (* can only be used
, for elements which are declared EMPTY
. *)
14276 (*--------------------------------------------------------------------*)
14277 and parseElement (dtd
,openElems
,q0
,(stag
as (_
,curr
,_
,_
,mt
),elemInfo
),(c
,a
,q
)) =
14279 (*--------------------------------------------------------------*)
14280 (* validate whether an element is allowed
in mixed
/any content
. *)
14281 (*--------------------------------------------------------------*)
14282 fun trans_any (a
,_
) _
= a
14283 fun trans_mixed
is (a
,q
) i
=
14284 if member i is
then a
14285 else let val err
= ERR_BAD_ELEM(Index2Element dtd curr
,Index2Element dtd i
)
14286 in hookError(a
,(getPos q
,err
))
14289 (*-----------------------------------------------------------*)
14290 (* For empty
-element tags
, verify that the element
's declar
. *)
14291 (* allows empty content
. *)
14292 (*-----------------------------------------------------------*)
14295 if not (!O_VALIDATE
andalso hasDtd dtd
) then a
14297 case #decl elemInfo
14298 of (SOME(CT_EMPTY
,_
)) => a
14299 |
(SOME(CT_ELEMENT(_
,dfa
),_
)) =>
14300 if not (dfaFinal(dfa
,dfaInitial
))
14301 then hookError(a
,(getPos q0
,ERR_EMPTY_TAG(Index2Element dtd curr
)))
14302 else if not (!O_INTEROPERABILITY
) then a
14304 (a
,(getPos q0
,ERR_EMPTY_TAG_INTER (Index2Element dtd curr
)))
14305 | _
=> if not (!O_INTEROPERABILITY
) then a
14306 else hookError(a
,(getPos q0
,ERR_EMPTY_TAG_INTER
14307 (Index2Element dtd curr
)))
14308 in (NONE
,(c
,hookStartTag(a1
,stag
),q
))
14310 (*-----------------------------------------------------------*)
14311 (* for normal start
-tags
, check whether the element
's decl
. *)
14312 (* requires an empty
-element tag
, or empty content
, then *)
14313 (* call the appropriate function that parses the content
. *)
14314 (*-----------------------------------------------------------*)
14316 let val startEnt
= getEntId q0
14317 in if !O_VALIDATE
then
14318 case getOpt(#decl elemInfo
,(CT_ANY
,false))
14319 of (CT_ANY
,_
) => parseMixedContent dtd
14320 (openElems
,startEnt
,curr
,trans_any
) (c
,hookStartTag(a
,stag
),q
)
14321 |
(CT_MIXED is
,_
) => parseMixedContent dtd
14322 (openElems
,startEnt
,curr
,trans_mixed is
) (c
,hookStartTag(a
,stag
),q
)
14323 |
(CT_ELEMENT(_
,dfa
),ext
) => parseElementContent dtd
14324 (openElems
,startEnt
,curr
,dfa
,ext
,false)
14325 (c
,hookStartTag(a
,stag
),q
)
14327 let val a1
= if not (!O_INTEROPERABILITY
) then a
14328 else let val err
= ERR_MUST_BE_EMPTY(Index2Element dtd curr
)
14329 in hookError(a
,(getPos q0
,err
))
14331 val a2
= hookStartTag(a1
,stag
)
14332 in parseElementContent dtd
14333 (openElems
,startEnt
,curr
,emptyDfa
,false,true) (c
,a2
,q
)
14335 else parseMixedContent dtd
14336 (openElems
,startEnt
,curr
,trans_any
) (c
,hookStartTag(a
,stag
),q
)
14340 (* stop
of ../../Parser
/Parse
/parseContent
.sml
*)
14341 (* start
of ../../Parser
/Parse
/parseDocument
.sml
*)
14342 (*--------------------------------------------------------------------------*)
14343 (* Structure
: ParseDocument
*)
14345 (* Exceptions raised by functions
in this
structure: *)
14346 (* parseDocTypeDecl
: none
*)
14347 (*--------------------------------------------------------------------------*)
14349 (structure Dtd
: Dtd
14350 structure Hooks
: Hooks
14351 structure Resolve
: Resolve
14352 structure ParserOptions
: ParserOptions
) :
14354 val parseDocument
: Uri
.Uri option
-> Dtd
.Dtd option
-> Hooks
.AppData
-> Hooks
.AppFinal
14358 structure ParseBase
= ParseBase (structure Dtd
= Dtd
14359 structure Hooks
= Hooks
14360 structure Resolve
= Resolve
14361 structure ParserOptions
= ParserOptions
)
14363 structure ParseContent
= ParseContent (structure ParseBase
= ParseBase
)
14366 Base UniChar Errors UniClasses Uri
14369 val THIS_MODULE
= "ParseContent"
14374 | INSTANCE
of int option
14378 of PROLOG
=> LOC_PROLOG
14379 | INSTANCE _
=> LOC_PROLOG
14380 | EPILOG
=> LOC_EPILOG
14382 fun checkRoot
dtd (a
,q
) (doc
,stag
as ((_
,elem
,_
,_
,_
),_
)) =
14388 else let val err
= ERR_ROOT_ELEM(Index2Element dtd doc
,
14389 Index2Element dtd elem
)
14390 in hookError(a
,(getPos q
,err
))
14394 fun parseDoc dtd caq
=
14396 fun do_data wher caq
=
14397 let fun doit hadError
ws (c
,a
,q
) =
14399 of 0wx00
=> (ws
,(c
,a
,q
))
14400 |
0wx26 (* #
"&" *) => (ws
,(c
,a
,q
))
14401 |
0wx3C (* #
"<" *) => (ws
,(c
,a
,q
))
14402 |
0wx09 (* #
"\t"*) => doit
hadError (c
::ws
) (getChar(a
,q
))
14403 |
0wx0A (* #
"\n"*) => doit
hadError (c
::ws
) (getChar(a
,q
))
14404 |
0wx20 (* #
" " *) => doit
hadError (c
::ws
) (getChar(a
,q
))
14405 | _
=> let val a1
= if hadError
then a
14406 else hookError(a
,(getPos q
,ERR_FORBIDDEN_HERE
14407 (IT_DATA nil
,locOf wher
)))
14408 in doit
true ws (getChar(a1
,q
))
14411 val (ws
,(c1
,a1
,q1
)) = doit
false nil caq
14412 val a2
= if null ws
then a1
14413 else hookWhite(a1
,Data2Vector (rev ws
))
14417 fun do_decl wher
q0 (c
,a
,q
) =
14419 of 0wx2D (* #
"-" *) =>
14420 let val (c1
,a1
,q1
) = getChar(a
,q
)
14421 in if c1
=0wx2D
then (wher
,parseComment (getPos q0
) (a1
,q1
))
14422 else let val err
= ERR_EXPECTED(expDash
,[c1
])
14423 val a2
= hookError(a1
,(getPos q1
,err
))
14424 val caq2
= recoverDecl
false (c1
,a2
,q1
)
14428 |
0wx5B (* #
"[" *) =>
14430 val err
= ERR_FORBIDDEN_HERE (IT_CDATA
,locOf wher
)
14431 val a1
= hookError(a
,(getPos q0
,err
))
14432 val caq2
= skipBadSection (getChar(a1
,q
))
14438 (let val (name
,(c1
,a1
,q1
)) = parseName (c
,a
,q
)
14439 handle NotFound (c
,a
,q
) =>
14440 let val err
= expectedOrEnded(expDashDocLbrk
,LOC_DECL
) c
14441 in raise SyntaxError (c
,hookError(a
,(getPos q
,err
)),q
)
14444 val _
= if name
=[0wx44
,0wx4f
,0wx43
,0wx54
,0wx59
,0wx50
,0wx45
]
14445 (* "DOCTYPE" *) then ()
14446 else let val err
= ERR_EXPECTED(expDashDocLbrk
,name
)
14447 val a2
= hookError(a1
,(getPos q
,err
))
14448 in raise SyntaxError (c1
,a2
,q1
)
14451 val (doc
,caq2
) = parseDocTypeDecl
dtd (c1
,a1
,q1
)
14452 in (INSTANCE doc
,caq2
)
14454 handle SyntaxError caq
=> (PROLOG
,recoverDecl
true caq
))
14456 | _
=> let val loc
= if wher
=EPILOG
then LOC_EPILOG
else LOC_AFTER_DTD
14457 val err
= ERR_FORBIDDEN_HERE (IT_DECL
,loc
)
14458 val a1
= hookError(a
,(getPos q0
,err
))
14459 val caq2
= skipDecl
true (c
,a1
,q
)
14463 and doit
wher (c
,a
,q
) =
14465 of 0wx00
=> if isSpecial q
then (wher
,(a
,q
))
14466 else doit
wher (getChar(a
,q
))
14467 (*--------------------------------------------------------------*)
14468 (* References are forbidden outside the document element
*)
14469 (*--------------------------------------------------------------*)
14470 |
0wx26 (* #
"&" *) =>
14472 val (c1
,a1
,q1
) = getChar(a
,q
)
14474 if c1
=0wx23 (* #
"#" *)
14475 then let val err
= ERR_FORBIDDEN_HERE(IT_CHAR_REF
,locOf wher
)
14476 val a2
= hookError(a1
,(getPos q
,err
))
14477 in skipCharRef (a2
,q1
)
14479 else let val err
= ERR_FORBIDDEN_HERE(IT_REF
,locOf wher
)
14480 val a2
= hookError(a1
,(getPos q
,err
))
14481 in skipReference (c1
,a2
,q1
)
14485 |
0wx3C (* #
"<" *) =>
14486 let val (c1
,a1
,q1
) = getChar (a
,q
)
14488 of 0wx21 (* #
"!" *) =>
14489 let val (wher1
,caq2
) = do_decl wher
q (getChar(a1
,q1
))
14492 |
0wx2F (* #
"/" *) =>
14494 val err
= ERR_FORBIDDEN_HERE(IT_ETAG
,locOf wher
)
14495 val a2
= hookError(a1
,(getPos q
,err
))
14496 val caq3
= skipTag
LOC_ETAG (a2
,q1
)
14499 |
0wx3F (* #
"?" *) => doit
wher (parseProcInstr (getPos q
) (a1
,q1
))
14504 of PROLOG
=> INSTANCE NONE
14508 raise InternalError(THIS_MODULE
,"parseDoc.doit","")
14511 val err
= ERR_FORBIDDEN_HERE(IT_STAG
,LOC_EPILOG
)
14512 val a2
= hookError(a1
,(getPos q
,err
))
14513 val caq3
= skipTag
LOC_STAG (a2
,q1
)
14514 in doit EPILOG caq3
14519 if not (!O_VALIDATE
) orelse isSome doc
then a1
14520 else hookError(a1
,(getPos q
,ERR_NO_DTD
))
14521 val (stag
,(c3
,a3
,q3
)) = parseSTag
14522 dtd (getPos q
) (c1
,a2
,q1
)
14523 val a4
= checkRoot
dtd (a3
,q1
) (doc
,stag
)
14524 val (opt
,(c5
,a5
,q5
)) = parseElement
14525 (dtd
,nil
,q
,stag
,(c3
,a4
,q3
))
14526 val a6
= checkDefinedIds
dtd (a5
,q5
)
14528 of NONE
=> doit
EPILOG (c5
,a6
,q5
)
14529 |
SOME (_
,_
,startPos
,_
) =>
14531 val err
= ERR_FORBIDDEN_HERE(IT_ETAG
,LOC_EPILOG
)
14532 val a7
= hookError(a6
,(startPos
,err
))
14533 in doit
EPILOG (c5
,a7
,q5
)
14536 handle SyntaxError caq
=> doit wher1 caq
)
14538 else let val err
= ERR_FORBIDDEN_HERE(IT_CHAR
0wx3C
,locOf wher
)
14539 val a2
= hookError(a1
,(getPos q
,err
))
14540 in doit
wher (c1
,a2
,q1
)
14543 | _
=> let val caq1
= do_data
wher (c
,a
,q
)
14550 (* to
false. (cf
. 2.9) *)
14552 (* ... If
... there is no standalone document declaration
, the
*)
14553 (* value
"no" is assumed
. *)
14554 fun parseDocument uriOpt dtdOpt a
=
14556 val dtd
= case dtdOpt
14557 of NONE
=> initDtdTables ()
14559 val (enc
,xmlDecl
,(c1
,a1
,q1
)) = openDocument uriOpt a
14560 val uri
= getUri q1
14561 val alone
= case xmlDecl
14562 of (SOME(_
,_
,SOME sa
)) => sa
14564 val _
= if alone
then setStandAlone dtd
true else ()
14565 val a2
= hookXml(a1
,(uri
,enc
,xmlDecl
))
14566 val (wher
,(a3
,q3
)) = parseDoc
dtd (c1
,a2
,q1
)
14567 val _
= closeAll q3
14570 | _
=> hookError(a3
,(getPos q3
,ERR_ENDED_IN_PROLOG
))
14573 handle CantOpenFile(fmsg
,a
) =>
14574 let val a1
= hookError(a
,(nullPosition
,ERR_NO_SUCH_FILE fmsg
))
14578 (* stop
of ../../Parser
/Parse
/parseDocument
.sml
*)
14579 (* start
of ../../Catalog
/catError
.sml
*)
14590 signature CatError
=
14593 val nullPosition
: Position
14594 val Position2String
: Position
-> string
14596 datatype Location
=
14603 datatype Expected
=
14607 datatype CatError
=
14608 ERR_DECODE_ERROR
of Decode
.Error
.DecodeError
14609 | ERR_NO_SUCH_FILE
of string * string
14610 | ERR_ILLEGAL_HERE
of UniChar
.Char * Location
14611 | ERR_MISSING_WHITE
14612 | ERR_EOF
of Location
14613 | ERR_EXPECTED
of Expected
* UniChar
.Char
14614 | ERR_XML
of Errors
.Error
14615 | ERR_MISSING_ATT
of UniChar
.Data
* UniChar
.Data
14616 | ERR_NON_PUBID
of UniChar
.Data
* UniChar
.Data
14618 val catMessage
: CatError
-> string list
14621 structure CatError
: CatError
=
14623 open Errors UtilError UtilString
14625 type Position
= string * int * int
14626 val nullPosition
= ("",0,0)
14628 fun Position2String (fname
,l
,c
) =
14629 if fname
="" then ""
14630 else String.concat
["[",fname
,":",Int2String l
,".",Int2String c
,"]"]
14632 datatype Location
=
14639 fun Location2String loc
=
14641 of LOC_CATALOG
=> "catalog file"
14642 | LOC_COMMENT
=> "comment"
14643 | LOC_NOCOMMENT
=> "something other than a comment"
14644 | LOC_PUBID
=> "public identifier"
14645 | LOC_SYSID
=> "system identifier"
14647 fun InLocation2String loc
=
14649 of LOC_CATALOG
=> "in a catalog file"
14650 | LOC_COMMENT
=> "in a comment"
14651 | LOC_NOCOMMENT
=> "outside of comments"
14652 | LOC_PUBID
=> "in a public identifier"
14653 | LOC_SYSID
=> "in a system identifier"
14655 datatype Expected
=
14659 fun Expected2String exp
=
14661 of EXP_NAME
=> "a name"
14662 | EXP_LITERAL
=> "a literal"
14664 datatype CatError
=
14665 ERR_DECODE_ERROR
of Decode
.Error
.DecodeError
14666 | ERR_NO_SUCH_FILE
of string * string
14667 | ERR_ILLEGAL_HERE
of UniChar
.Char * Location
14668 | ERR_MISSING_WHITE
14669 | ERR_EOF
of Location
14670 | ERR_EXPECTED
of Expected
* UniChar
.Char
14672 | ERR_MISSING_ATT
of UniChar
.Data
* UniChar
.Data
14673 | ERR_NON_PUBID
of UniChar
.Data
* UniChar
.Data
14675 fun catMessage err
=
14677 of ERR_DECODE_ERROR err
=> Decode
.Error
.decodeMessage err
14678 |
ERR_NO_SUCH_FILE(f
,msg
) => ["Could not open file",quoteErrorString f
,"("^msg^
")"]
14680 |
ERR_ILLEGAL_HERE (c
,loc
) =>
14681 ["Character",quoteErrorChar c
,"is not allowed",InLocation2String loc
]
14683 | ERR_MISSING_WHITE
=> ["Missing white space"]
14684 | ERR_EOF loc
=> [toUpperFirst (Location2String loc
),"ended by end of file"]
14685 |
ERR_EXPECTED (exp
,c
) =>
14686 ["Expected",Expected2String exp
,"but found",quoteErrorChar c
]
14688 | ERR_XML err
=> errorMessage err
14689 |
ERR_MISSING_ATT(elem
,att
) =>
14690 ["Element",quoteErrorData elem
,"has no",quoteErrorData att
,"attribute"]
14691 |
ERR_NON_PUBID(att
,cs
) =>
14692 ["Value specified for attribute",quoteErrorData att
,"contains non-PublicId",
14694 of [c
] => "character"^quoteErrorChar c
14695 | cs
=> List2xString ("characters ",", ","") quoteErrorChar cs
]
14697 (* stop
of ../../Catalog
/catError
.sml
*)
14698 (* start
of ../../Catalog
/catParams
.sml
*)
14704 signature CatParams
=
14706 val O_CATALOG_FILES
: Uri
.Uri list ref
14707 val O_PREFER_SOCAT
: bool ref
14708 val O_PREFER_SYSID
: bool ref
14709 val O_PREFER_CATALOG
: bool ref
14710 val O_SUPPORT_REMAP
: bool ref
14711 val O_CATALOG_ENC
: Encoding
.Encoding ref
14713 val catError
: CatError
.Position
* CatError
.CatError
-> unit
14716 (* stop
of ../../Catalog
/catParams
.sml
*)
14717 (* start
of ../../Unicode
/Uri
/uriDict
.sml
*)
14725 structure KeyUri
: Key
=
14729 val null
= Uri
.emptyUri
14730 val compare
= Uri
.compareUri
14731 val toString
= Uri
.Uri2String
14732 val hash
= Uri
.hashUri
14735 structure UriDict
= Dict (structure Key
= KeyUri
)
14736 (* stop
of ../../Unicode
/Uri
/uriDict
.sml
*)
14737 (* start
of ../../Catalog
/catData
.sml
*)
14740 structure CatData
=
14742 datatype CatEntry
=
14744 | E_DELEGATE
of string * Uri
.Uri
14745 | E_EXTEND
of Uri
.Uri
14746 | E_MAP
of string * Uri
.Uri
14747 | E_REMAP
of Uri
.Uri
* Uri
.Uri
14749 type Catalog
= Uri
.Uri
* CatEntry list
14751 (* stop
of ../../Catalog
/catData
.sml
*)
14752 (* start
of ../../Catalog
/catFile
.sml
*)
14762 signature CatFile
=
14767 val catOpenFile
: Uri
.Uri
-> CatFile
14768 val catCloseFile
: CatFile
-> unit
14769 val catGetChar
: CatFile
-> UniChar
.Char * CatFile
14770 val catPos
: CatFile
-> CatError
.Position
14773 functor CatFile ( structure Params
: CatParams
) : CatFile
=
14775 open UniChar CatError Decode Params Uri UtilError
14777 (* column
, line
, break
*)
14778 type PosInfo
= int * int * bool
14779 val startPos
= (0,1,false)
14782 NOFILE
of string * PosInfo
14783 | DIRECT
of DecFile
* PosInfo
14787 of NOFILE (uri
,(col
,line
,_
)) => (uri
,line
,col
)
14788 |
DIRECT (dec
,(col
,line
,_
)) => (decName dec
,line
,col
)
14790 fun catOpenFile uri
=
14791 let val dec
= decOpenUni(SOME uri
,!O_CATALOG_ENC
)
14792 in DIRECT(dec
,startPos
)
14794 handle NoSuchFile fmsg
=> let val _
= catError(nullPosition
,ERR_NO_SUCH_FILE fmsg
)
14795 in NOFILE(Uri2String uri
,startPos
)
14798 fun catCloseFile cf
=
14801 |
DIRECT(dec
,_
) => ignore (decClose dec
)
14803 fun catGetChar cf
=
14805 of NOFILE _
=> (0wx00
,cf
)
14806 |
DIRECT(dec
,(col
,line
,brk
)) =>
14807 (let val (c
,dec1
) = decGetChar dec
14809 of 0wx09
=> (c
,DIRECT(dec1
,(col
+1,line
,false)))
14810 |
0wx0A
=> if brk
then catGetChar(DIRECT(dec1
,(col
,line
,false)))
14811 else (c
,DIRECT(dec1
,(0,line
+1,false)))
14812 |
0wx0D
=> (0wx0A
,DIRECT(dec1
,(0,line
+1,true)))
14813 | _
=> if c
>=0wx20
then (c
,DIRECT(dec1
,(col
+1,line
,false)))
14814 else let val err
= ERR_ILLEGAL_HERE(c
,LOC_CATALOG
)
14815 val _
= catError(catPos cf
,err
)
14816 in catGetChar(DIRECT(dec1
,(col
+1,line
,false)))
14819 handle DecEof dec
=> (0wx00
,NOFILE(decName dec
,(col
,line
,brk
)))
14820 |
DecError(dec
,_
,err
) =>
14821 let val _
= catError(catPos cf
,ERR_DECODE_ERROR err
)
14822 in catGetChar(DIRECT(dec
,(col
,line
,false)))
14827 (* stop
of ../../Catalog
/catFile
.sml
*)
14828 (* start
of ../../Catalog
/socatParse
.sml
*)
14838 signature SocatParse
=
14840 val parseSoCat
: Uri
.Uri
-> CatData
.Catalog
14843 functor SocatParse ( structure Params
: CatParams
) : SocatParse
=
14845 structure CatFile
= CatFile ( structure Params
= Params
)
14847 open CatData CatError CatFile Params UniChar UniClasses Uri
14849 exception SyntaxError
of UniChar
.Char * CatFile
.CatFile
14850 exception NotFound
of UniChar
.Char * CatFile
.CatFile
14852 val getChar
= catGetChar
14854 fun parseName
' (c
,f
) =
14855 if isName c
then let val (cs
,cf1
) = parseName
' (getChar f
)
14859 fun parseName (c
,f
) =
14860 if isNms c
then let val (cs
,cf1
) = parseName
' (getChar f
)
14863 else raise NotFound (c
,f
)
14871 | KW_OTHER
of UniChar
.Data
14873 fun parseKeyword cf
=
14875 val (name
,cf1
) = parseName cf
14877 of [0wx42
,0wx41
,0wx53
,0wx45
] => KW_BASE
14878 |
[0wx43
,0wx41
,0wx54
,0wx41
,0wx4c
,0wx4f
,0wx47
] => KW_CATALOG
14879 |
[0wx44
,0wx45
,0wx4c
,0wx45
,0wx47
,0wx41
,0wx54
,0wx45
] => KW_DELEGATE
14880 |
[0wx50
,0wx55
,0wx42
,0wx4c
,0wx49
,0wx43
] => KW_PUBLIC
14881 |
[0wx53
,0wx59
,0wx53
,0wx54
,0wx45
,0wx4d
] => KW_SYSTEM
14882 | _
=> KW_OTHER name
14886 fun parseSysLit
' quote f
=
14888 fun doit
text (c
,f
) =
14889 if c
=quote
then (text
,getChar f
)
14890 else if c
<>0wx0
then doit (c
::text
) (getChar f
)
14891 else let val _
= catError(catPos f
,ERR_EOF LOC_SYSID
)
14894 val (text
,cf1
) = doit
nil (getChar f
)
14895 in (Data2Uri(rev text
),cf1
)
14897 fun parseSysLit
req (c
,f
) =
14898 if c
=0wx22
orelse c
=0wx27
then parseSysLit
' c f
14899 else if req
then let val _
= catError(catPos f
,ERR_EXPECTED(EXP_LITERAL
,c
))
14900 in raise SyntaxError (c
,f
)
14902 else raise NotFound (c
,f
)
14904 fun parsePubLit
' quote f
=
14906 fun doit (hadSpace
,atStart
,text
) (c
,f
) =
14908 of 0wx0
=> let val _
= catError(catPos f
,ERR_EOF LOC_PUBID
)
14911 |
0wx0A
=> doit (true,atStart
,text
) (getChar f
)
14912 |
0wx20
=> doit (true,atStart
,text
) (getChar f
)
14914 if c
=quote
then (text
,getChar f
)
14916 then if hadSpace
andalso not atStart
14917 then doit (false,false,c
::0wx20
::text
) (getChar f
)
14918 else doit (false,false,c
::text
) (getChar f
)
14919 else let val _
= catError(catPos f
,ERR_ILLEGAL_HERE(c
,LOC_PUBID
))
14920 in doit (hadSpace
,atStart
,text
) (getChar f
)
14922 val (text
,cf1
) = doit (false,true,nil
) (getChar f
)
14923 in (Latin2String(rev text
),cf1
)
14925 fun parsePubLit (c
,f
) =
14926 if c
=0wx22
orelse c
=0wx27
then parsePubLit
' c f
14927 else let val _
= catError(catPos f
,ERR_EXPECTED(EXP_LITERAL
,c
))
14928 in raise SyntaxError (c
,f
)
14931 fun skipComment (c
,f
) =
14933 of 0wx00
=> let val _
= catError(catPos f
,ERR_EOF LOC_COMMENT
)
14936 |
0wx2D
=> let val (c1
,f1
) = getChar f
14937 in if c1
= 0wx2D
then (getChar f1
) else skipComment (c1
,f1
)
14939 | _
=> skipComment (getChar f
)
14940 fun skipCopt (c
,f
) =
14943 |
0wx2D
=> let val (c1
,f1
) = getChar f
14944 in if c1
=0wx2D
then skipComment (getChar f1
)
14945 else let val _
= catError(catPos f
,ERR_ILLEGAL_HERE(c
,LOC_NOCOMMENT
))
14951 fun skipScomm req0 cf
=
14953 fun endit
req (c
,f
) =
14954 if req
andalso c
<>0wx00
14955 then let val _
= catError(catPos f
,ERR_MISSING_WHITE
)
14959 fun doit
req (c
,f
) =
14961 of 0wx00
=> endit
req (c
,f
)
14962 |
0wx09
=> doit
false (getChar f
)
14963 |
0wx0A
=> doit
false (getChar f
)
14964 |
0wx20
=> doit
false (getChar f
)
14965 |
0wx22
=> endit
req (c
,f
)
14966 |
0wx27
=> endit
req (c
,f
)
14968 let val (c1
,f1
) = getChar f
14970 then let val _
= if not req
then ()
14971 else catError(catPos f1
,ERR_MISSING_WHITE
)
14972 val cf1
= skipComment (getChar f1
)
14975 else let val _
= catError(catPos f
,ERR_ILLEGAL_HERE(c
,LOC_NOCOMMENT
))
14976 in doit
req (c1
,f1
)
14979 | _
=> if isNms c
then endit
req (c
,f
)
14980 else let val _
= catError(catPos f
,ERR_ILLEGAL_HERE(c
,LOC_NOCOMMENT
))
14981 in doit
req (getChar f
)
14986 val skipWS
= skipScomm
true
14987 val skipCommWS
= (skipScomm
false) o skipCopt
14988 val skipWSComm
= skipScomm
false
14992 val cf1
= skipWS cf
14993 val cf2
= let val (_
,cf
') = parseName cf1
14996 handle NotFound cf
=> cf
14999 let val (_
,cf1
) = parseSysLit
false cf
15000 in doit (skipWS cf1
)
15002 handle NotFound(c
,f
) => (c
,f
)
15009 val cf1
= skipWS cf
15010 val (lit
,cf2
) = parseSysLit
true cf1
15011 val cf3
= skipWS cf2
15013 (SOME(E_BASE lit
),cf3
)
15016 fun parseExtend cf
=
15018 val cf1
= skipWS cf
15019 val (lit
,cf2
) = parseSysLit
true cf1
15020 val cf3
= skipWS cf2
15022 (SOME(E_EXTEND lit
),cf3
)
15025 fun parseDelegate cf
=
15027 val cf1
= skipWS cf
15028 val (pub
,cf2
) = parsePubLit cf1
15029 val cf3
= skipWS cf2
15030 val (sys
,cf4
) = parseSysLit
true cf3
15031 val cf5
= skipWS cf4
15033 (SOME(E_DELEGATE(pub
,sys
)),cf5
)
15036 fun parseRemap cf
=
15038 val cf1
= skipWS cf
15039 val (sys0
,cf2
) = parseSysLit
true cf1
15040 val cf3
= skipWS cf2
15041 val (sys
,cf4
) = parseSysLit
true cf3
15042 val cf5
= skipWS cf4
15044 (SOME(E_REMAP(sys0
,sys
)),cf5
)
15049 val cf1
= skipWS cf
15050 val (pub
,cf2
) = parsePubLit cf1
15051 val cf3
= skipWS cf2
15052 val (sys
,cf4
) = parseSysLit
true cf3
15053 val cf5
= skipWS cf4
15055 (SOME(E_MAP(pub
,sys
)),cf5
)
15060 fun do_lit
q (c
,f
) =
15061 if c
=0wx00
then (c
,f
)
15062 else if c
=q
then getChar f
15063 else do_lit
q (getChar f
)
15067 |
0wx2D
=> let val (c1
,f1
) = getChar f
15068 in if c1
=0wx2D
then getChar f1
15069 else do_com (c1
,f1
)
15071 | _
=> do_com (getChar f
)
15075 |
0wx22
=> doit (do_lit
c (getChar f
))
15076 |
0wx27
=> doit (do_lit
c (getChar f
))
15077 |
0wx2D
=> let val (c1
,f1
) = getChar f
15078 in if c1
=0wx2D
then doit (do_com (getChar f1
))
15081 | _
=> if isNms c
then (c
,f
)
15082 else doit (getChar f
)
15086 fun parseEntry (cf
as (c
,f
)) =
15087 let val (kw
,cf1
) = parseKeyword cf
handle NotFound cf
=> raise SyntaxError cf
15089 of KW_BASE
=> parseBase cf1
15090 | KW_CATALOG
=> parseExtend cf1
15091 | KW_DELEGATE
=> parseDelegate cf1
15092 | KW_SYSTEM
=> parseRemap cf1
15093 | KW_PUBLIC
=> parseMap cf1
15094 | KW_OTHER _
=> skipOther cf1
15096 handle SyntaxError cf
=> (NONE
,recover cf
)
15098 fun parseDocument cf
=
15101 if c
=0wx0
then nil
before catCloseFile f
15102 else let val (opt
,cf1
) = parseEntry (c
,f
)
15103 val entries
= doit cf1
15106 | SOME entry
=> entry
::entries
15109 val cf1
= skipCommWS cf
15114 fun parseSoCat uri
=
15116 val f
= catOpenFile uri
15117 val cf1
= getChar f
15119 (uri
,parseDocument cf1
)
15122 (* stop
of ../../Catalog
/socatParse
.sml
*)
15123 (* start
of ../../Catalog
/catDtd
.sml
*)
15129 val delegateIdx
: int
15130 val extendIdx
: int
15138 val Index2AttNot
: Dtd
-> int -> UniChar
.Data
15139 val Index2Element
: Dtd
-> int -> UniChar
.Data
15146 val baseGi
= UniChar
.String2Data
"Base"
15147 val delegateGi
= UniChar
.String2Data
"Delegate"
15148 val extendGi
= UniChar
.String2Data
"Extend"
15149 val mapGi
= UniChar
.String2Data
"Map"
15150 val remapGi
= UniChar
.String2Data
"Remap"
15152 val hrefAtt
= UniChar
.String2Data
"HRef"
15153 val pubidAtt
= UniChar
.String2Data
"PublicId"
15154 val sysidAtt
= UniChar
.String2Data
"SystemId"
15156 fun initDtdTables () =
15158 val dtd
= Dtd
.initDtdTables()
15159 val _
= app (ignore
o (Element2Index dtd
)) [baseGi
,delegateGi
,extendGi
,mapGi
,remapGi
]
15160 val _
= app (ignore
o (AttNot2Index dtd
)) [hrefAtt
,pubidAtt
,sysidAtt
]
15165 val dtd
= initDtdTables()
15167 val baseIdx
= Element2Index dtd baseGi
15168 val delegateIdx
= Element2Index dtd delegateGi
15169 val extendIdx
= Element2Index dtd extendGi
15170 val mapIdx
= Element2Index dtd mapGi
15171 val remapIdx
= Element2Index dtd remapGi
15173 val hrefIdx
= AttNot2Index dtd hrefAtt
15174 val pubidIdx
= AttNot2Index dtd pubidAtt
15175 val sysidIdx
= AttNot2Index dtd sysidAtt
15178 (* stop
of ../../Catalog
/catDtd
.sml
*)
15179 (* start
of ../../Parser
/Params
/ignore
.sml
*)
15180 structure IgnoreHooks
=
15182 type AppData
= unit
15183 type AppFinal
= unit
15185 fun hookXml(a
,_
) = a
15186 fun hookFinish a
= a
15188 fun hookError(a
,_
) = a
15189 fun hookWarning(a
,_
) = a
15191 fun hookProcInst(a
,_
) = a
15192 fun hookComment(a
,_
) = a
15193 fun hookWhite(a
,_
) = a
15194 fun hookDecl (a
,_
) = a
15196 fun hookStartTag(a
,_
) = a
15197 fun hookEndTag(a
,_
) = a
15198 fun hookCData(a
,_
) = a
15199 fun hookData(a
,_
) = a
15201 fun hookCharRef(a
,_
) = a
15202 fun hookGenRef(a
,_
) = a
15203 fun hookParRef(a
,_
) = a
15204 fun hookEntEnd(a
,_
) = a
15206 fun hookDocType(a
,_
) = a
15207 fun hookSubset(a
,_
) = a
15208 fun hookExtSubset(a
,_
) = a
15209 fun hookEndDtd(a
,_
) = a
15211 (* stop
of ../../Parser
/Params
/ignore
.sml
*)
15212 (* start
of ../../Catalog
/catHooks
.sml
*)
15213 signature CatHooks
=
15215 type AppData
= CatData
.CatEntry list
15217 val initCatHooks
: unit
-> AppData
15220 functor CatHooks (structure Params
: CatParams
15221 structure Dtd
: CatDtd
) =
15224 Dtd HookData IgnoreHooks Params UniChar UniClasses Uri UtilList
15227 type AppData
= Dtd
* CatEntry list
15228 type AppFinal
= CatEntry list
15230 fun initCatHooks dtd
= (dtd
,nil
)
15232 fun hookError (a
,(pos
,err
)) = a
before catError (pos
,ERR_XML err
)
15234 fun getAtt
dtd (pos
,elem
,att
,trans
) atts
=
15236 val cvOpt
= findAndMap
15237 (fn (i
,ap
,_
) => if i
<>att
then NONE
15239 of AP_DEFAULT(_
,cv
,_
) => SOME cv
15240 |
AP_PRESENT(_
,cv
,_
) => SOME cv
15244 of SOME cv
=> trans (pos
,att
) cv
15245 | NONE
=> NONE
before catError
15246 (pos
,ERR_MISSING_ATT(Index2Element dtd elem
,Index2AttNot dtd att
))
15249 fun makePubid
dtd (pos
,att
) cv
=
15252 (fn (c
,(cs
,bad
)) => if isPubid c
then (Char2char c
::cs
,bad
)
15255 in if null bad
then SOME(String.implode cs
)
15256 else NONE
before catError(pos
,ERR_NON_PUBID(Index2AttNot dtd att
,bad
))
15259 fun makeUri (pos
,att
) cv
= SOME cv
15261 fun hookStartTag (a
as (dtd
,items
),((_
,pos
),elem
,atts
,_
,_
)) =
15263 then let val hrefOpt
= getAtt
dtd (pos
,elem
,hrefIdx
,makeUri
) atts
15266 | SOME href
=> (dtd
,E_BASE (Vector2Uri href
)::items
)
15268 else if elem
=delegateIdx
15269 then let val hrefOpt
= getAtt
dtd (pos
,elem
,hrefIdx
,makeUri
) atts
15270 val pubidOpt
= getAtt
dtd (pos
,elem
,pubidIdx
,makePubid dtd
) atts
15271 in case (hrefOpt
,pubidOpt
)
15272 of (SOME href
,SOME pubid
) =>
15273 (dtd
,E_DELEGATE(pubid
,Vector2Uri href
)::items
)
15276 else if elem
=extendIdx
15277 then let val hrefOpt
= getAtt
dtd (pos
,elem
,hrefIdx
,makeUri
) atts
15280 | SOME href
=> (dtd
,E_EXTEND (Vector2Uri href
)::items
)
15282 else if elem
=mapIdx
15283 then let val hrefOpt
= getAtt
dtd (pos
,elem
,hrefIdx
,makeUri
) atts
15284 val pubidOpt
= getAtt
dtd (pos
,elem
,pubidIdx
,makePubid dtd
) atts
15285 in case (hrefOpt
,pubidOpt
)
15286 of (SOME href
,SOME pubid
) =>
15287 (dtd
,E_MAP(pubid
,Vector2Uri href
)::items
)
15290 else if elem
=remapIdx
15291 then let val hrefOpt
= getAtt
dtd (pos
,elem
,hrefIdx
,makeUri
) atts
15292 val sysidOpt
= getAtt
dtd (pos
,elem
,sysidIdx
,makeUri
) atts
15293 in case (hrefOpt
,sysidOpt
)
15294 of (SOME href
,SOME sysid
) =>
15295 (dtd
,E_REMAP(Vector2Uri sysid
,Vector2Uri href
)::items
)
15300 fun hookFinish (_
,items
) = rev items
15302 (* stop
of ../../Catalog
/catHooks
.sml
*)
15303 (* start
of ../../Catalog
/catParse
.sml
*)
15304 signature CatParse
=
15306 val parseCatalog
: Uri
.Uri
-> CatData
.Catalog
15309 functor CatParse (structure Params
: CatParams
) : CatParse
=
15311 structure SocatParse
= SocatParse (structure Params
= Params
)
15313 structure ParserOptions
=
15315 structure Options
= ParserOptions()
15319 fun setDefaults() =
15321 val _
= setParserDefaults()
15323 val _
= O_WARN_MULT_ENUM
:= false
15324 val _
= O_WARN_XML_DECL
:= false
15325 val _
= O_WARN_ATT_NO_ELEM
:= false
15326 val _
= O_WARN_MULT_ENT_DECL
:= false
15327 val _
= O_WARN_MULT_NOT_DECL
:= false
15328 val _
= O_WARN_MULT_ATT_DEF
:= false
15329 val _
= O_WARN_MULT_ATT_DECL
:= false
15330 val _
= O_WARN_SHOULD_DECLARE
:= false
15332 val _
= O_VALIDATE
:= false
15333 val _
= O_COMPATIBILITY
:= false
15334 val _
= O_INTEROPERABILITY
:= false
15336 val _
= O_INCLUDE_EXT_PARSED
:= true
15340 val setParserDefaults
= setDefaults
15344 structure CatHooks
= CatHooks (structure Params
= Params
15345 structure Dtd
= CatDtd
)
15346 structure Parse
= Parse (structure Dtd
= CatDtd
15347 structure Hooks
= CatHooks
15348 structure Resolve
= ResolveNull
15349 structure ParserOptions
= ParserOptions
)
15351 open CatHooks CatDtd Parse ParserOptions SocatParse Uri
15353 fun parseXmlCat uri
=
15355 val _
= setParserDefaults()
15356 val dtd
= initDtdTables()
15357 val items
= parseDocument (SOME uri
) (SOME dtd
) (initCatHooks dtd
)
15362 fun isSocatSuffix x
= x
="soc" orelse x
="SOC"
15363 fun isXmlSuffix x
= x
="xml" orelse x
="XML"
15365 fun parseCatalog uri
=
15366 let val suffix
= uriSuffix uri
15367 in if isSocatSuffix suffix
then parseSoCat uri
15368 else (if isXmlSuffix suffix
then parseXmlCat uri
15369 else (if !O_PREFER_SOCAT
then parseSoCat uri
15370 else parseXmlCat uri
))
15373 (* stop
of ../../Catalog
/catParse
.sml
*)
15374 (* start
of ../../Catalog
/catalog
.sml
*)
15384 signature Catalog
=
15386 val resolveExtId
: string option
* (Uri
.Uri
* Uri
.Uri
) option
-> Uri
.Uri option
15389 functor Catalog ( structure Params
: CatParams
) : Catalog
=
15391 structure CatParse
= CatParse ( structure Params
= Params
)
15393 open CatData CatParse Params Uri UriDict
15395 val catDict
= makeDict("catalog",6,NONE
:Catalog option
)
15397 fun getCatalog uri
=
15398 let val idx
= getIndex(catDict
,uri
)
15399 in case getByIndex(catDict
,idx
)
15401 | NONE
=> let val cat
= parseCatalog uri
15402 val _
= setByIndex(catDict
,idx
,SOME cat
)
15407 datatype SearchType
=
15410 datatype SearchResult
=
15412 | NOTFOUND
of Uri list
15416 fun searchOne (base
,other
) nil
= NOTFOUND other
15417 |
searchOne (base
,other
) (entry
::entries
) =
15420 let val newBase
= uriJoin(base
,path
)
15421 in searchOne (newBase
,other
) entries
15424 let val fullPath
= uriJoin(base
,path
)
15425 in searchOne (base
,fullPath
::other
) entries
15427 |
E_DELEGATE(prefix
,path
) =>
15429 of PUB pid
=> if String.isPrefix prefix pid
15430 then let val fullPath
= uriJoin(base
,path
)
15431 in searchOne (base
,fullPath
::other
) entries
15433 else searchOne (base
,other
) entries
15434 | SYS _
=> searchOne (base
,other
) entries
)
15435 |
E_MAP(pubid
,path
) =>
15437 of PUB pid
=> if pubid
=pid
then FOUND (base
,path
)
15438 else searchOne (base
,other
) entries
15439 | _
=> searchOne (base
,other
) entries
)
15440 |
E_REMAP(sysid
,path
) =>
15442 of SYS sid
=> if sysid
=sid
then FOUND(base
,path
)
15443 else searchOne (base
,other
) entries
15444 | _
=> searchOne (base
,other
) entries
)
15446 fun searchLevel other nil
= NOTFOUND(rev other
)
15447 | searchLevel
other (fname
::fnames
) =
15449 val (base
,entries
) = getCatalog fname
15451 case searchOne (base
,other
) entries
15452 of FOUND bp
=> FOUND bp
15453 | NOTFOUND other
' => searchLevel other
' fnames
15456 fun searchAll fnames
=
15457 if null fnames
then NONE
15458 else case searchLevel nil fnames
15459 of FOUND bp
=> SOME bp
15460 | NOTFOUND other
=> searchAll other
15462 val fnames
= !O_CATALOG_FILES
15465 of PUB _
=> searchAll fnames
15466 | SYS _
=> if !O_SUPPORT_REMAP
then searchAll fnames
else NONE
15469 fun resolveExtId (pub
,sys
) =
15471 fun resolvePubCat () =
15474 | SOME id
=> case searchId (PUB id
)
15476 |
SOME(base
,sysid
) => case searchId (SYS sysid
)
15477 of NONE
=> SOME(base
,sysid
)
15480 fun resolveSysCat () =
15483 |
SOME(base
,id
) => searchId (SYS id
)
15485 fun resolveCat () =
15487 then case resolveSysCat ()
15488 of NONE
=> resolvePubCat ()
15490 else case resolvePubCat ()
15491 of NONE
=> resolveSysCat ()
15495 if !O_PREFER_CATALOG
15496 then case resolveCat ()
15497 of NONE
=> (case sys
15499 |
SOME(base
,id
) => SOME(base
,id
))
15502 of NONE
=> resolvePubCat ()
15503 |
SOME(base
,id
) => SOME(base
,id
)
15505 if null (!O_CATALOG_FILES
)
15508 |
SOME(base
,id
) => SOME (uriJoin (base
,id
))
15509 else case resolve ()
15511 | SOME bp
=> SOME (uriJoin bp
)
15514 (* stop
of ../../Catalog
/catalog
.sml
*)
15515 (* start
of ../../Catalog
/catResolve
.sml
*)
15523 functor ResolveCatalog ( structure Params
: CatParams
) : Resolve
=
15525 structure Catalog
= Catalog ( structure Params
= Params
)
15529 fun resolveExtId (id
as EXTID(pub
,sys
)) =
15530 let val pub1
= case pub
15532 |
SOME (str
,_
) => SOME str
15533 val sys1
= case sys
15535 |
SOME (base
,file
,_
) => SOME(base
,file
)
15536 in case Catalog
.resolveExtId (pub1
,sys1
)
15537 of NONE
=> raise NoSuchFile ("","Could not generate system identifier")
15541 (* stop
of ../../Catalog
/catResolve
.sml
*)
15542 (* start
of ../../Catalog
/catOptions
.sml
*)
15543 signature CatOptions
=
15545 val O_CATALOG_FILES
: Uri
.Uri list ref
15546 val O_PREFER_SOCAT
: bool ref
15547 val O_PREFER_SYSID
: bool ref
15548 val O_PREFER_CATALOG
: bool ref
15549 val O_SUPPORT_REMAP
: bool ref
15550 val O_CATALOG_ENC
: Encoding
.Encoding ref
15552 val setCatalogDefaults
: unit
-> unit
15553 val setCatalogOptions
: Options
.Option list
* (string -> unit
) -> Options
.Option list
15555 val catalogUsage
: Options
.Usage
15558 functor CatOptions () : CatOptions
=
15560 open Encoding Options Uri
15562 val O_CATALOG_FILES
= ref nil
: Uri list ref
15563 val O_PREFER_SOCAT
= ref
false
15564 val O_PREFER_SYSID
= ref
false
15565 val O_PREFER_CATALOG
= ref
true
15566 val O_SUPPORT_REMAP
= ref
true
15567 val O_CATALOG_ENC
= ref LATIN1
15569 fun setCatalogDefaults() =
15571 val _
= O_CATALOG_FILES
:= nil
15572 val _
= O_PREFER_SOCAT
:= false
15573 val _
= O_PREFER_SYSID
:= false
15574 val _
= O_PREFER_CATALOG
:= true
15575 val _
= O_SUPPORT_REMAP
:= true
15576 val _
= O_CATALOG_ENC
:= LATIN1
15581 [U_ITEM(["-C <url>","--catalog=<url>"],"Use catalog <url>"),
15582 U_ITEM(["--catalog-syntax=(soc|xml)"],"Default syntax for catalogs (xml)"),
15583 U_ITEM(["--catalog-encoding=<enc>"],"Default encoding for Socat catalogs (LATIN1)"),
15584 U_ITEM(["--catalog-remap=[(yes|no)]"],"Support remapping of system identifiers (yes)"),
15585 U_ITEM(["--catalog-priority=(map|remap|sys)"],"Resolving strategy in catalogs (map)")
15588 fun setCatalogOptions (opts
,doError
) =
15590 val catalogs
= ref nil
:string list ref
15592 fun hasNoArg key
= "option "^key^
" has no argument"
15593 fun mustHave key
= String.concat
["option ",key
," must have an argument"]
15594 fun mustBe(key
,what
) = String.concat
["the argument to --",key
," must be ",what
]
15596 val yesNo
= "'yes' or 'no'"
15597 val mapRemapSys
= "'map', 'remap' or 'sys'"
15598 val encName
= "'ascii', 'latin1', 'utf8' or 'utf16'"
15599 val syntaxName
= "'soc' or 'xml'"
15601 fun do_catalog valOpt
=
15603 of NONE
=> doError(mustHave
"--catalog")
15604 | SOME s
=> catalogs
:= s
::(!catalogs
)
15606 fun do_prio valOpt
=
15607 let fun set(cat
,sys
) = (O_PREFER_CATALOG
:= cat
; O_PREFER_SYSID
:= sys
)
15609 of NONE
=> doError(mustHave
"--catalog-priority")
15610 | SOME
"map" => set(true,false)
15611 | SOME
"remap" => set(true,true)
15612 | SOME
"sys" => set(false,true)
15613 | SOME s
=> doError(mustBe("catalog-priority",mapRemapSys
))
15616 fun do_enc valOpt
=
15618 of NONE
=> doError(mustHave
"--catalog-encoding")
15619 | SOME s
=> case isEncoding s
15620 of NOENC
=> doError("unsupported encoding "^s
)
15621 | enc
=> O_CATALOG_ENC
:= enc
15623 fun do_remap valOpt
=
15625 of NONE
=> doError(mustHave
"--catalog-remap")
15626 | SOME
"no" => O_SUPPORT_REMAP
:= false
15627 | SOME
"yes" => O_SUPPORT_REMAP
:= true
15628 | SOME s
=> doError(mustBe("catalog-remap",yesNo
))
15630 fun do_syntax valOpt
=
15632 of NONE
=> doError(mustHave
"--catalog-syntax")
15633 | SOME
"soc" => O_PREFER_SOCAT
:= true
15634 | SOME
"xml" => O_PREFER_SOCAT
:= false
15635 | SOME s
=> doError(mustBe("catalog-remap",syntaxName
))
15637 fun do_long(key
,valOpt
) =
15639 of "catalog" => true before do_catalog valOpt
15640 |
"catalog-remap" => true before do_remap valOpt
15641 |
"catalog-syntax" => true before do_syntax valOpt
15642 |
"catalog-encoding" => true before do_enc valOpt
15643 |
"catalog-priority" => true before do_prio valOpt
15646 fun do_short cs opts
=
15648 of nil
=> doit opts
15651 of OPT_STRING s
::opts1
=> (catalogs
:= s
::(!catalogs
);
15653 | _
=> let val _
= doError (mustHave
"-C")
15657 let val cs1
= List.filter
15658 (fn c
=> if #
"C"<>c
then true
15659 else false before doError (mustHave
"-C")) cs
15660 in if null cs1
then doit opts
else (OPT_SHORT cs1
)::doit opts
15664 |
doit (opt
::opts
) =
15666 of OPT_NOOPT
=> opts
15667 |
OPT_LONG(key
,value
) => if do_long(key
,value
) then doit opts
15668 else opt
::doit opts
15669 | OPT_SHORT cs
=> do_short cs opts
15670 | OPT_NEG cs
=> opt
::doit opts
15671 | OPT_STRING s
=> opt
::doit opts
15673 val opts1
= doit opts
15674 val uris
= map
String2Uri (!catalogs
)
15675 val _
= O_CATALOG_FILES
:= uris
15679 (* stop
of ../../Catalog
/catOptions
.sml
*)
15680 (* start
of nullOptions
.sml
*)
15681 signature NullOptions
=
15683 val O_SILENT
: bool ref
15684 val O_ERROR_DEVICE
: TextIO.outstream ref
15685 val O_ERROR_LINEWIDTH
: int ref
15687 val setNullDefaults
: unit
-> unit
15688 val setNullOptions
: Options
.Option list
* (string -> unit
)
15689 -> bool * bool * string option
* string option
15691 val nullUsage
: Options
.Usage
15694 structure NullOptions
: NullOptions
=
15698 val O_SILENT
= ref
false
15699 val O_ERROR_DEVICE
= ref
TextIO.stdErr
15700 val O_ERROR_LINEWIDTH
= ref
80
15703 [U_ITEM(["-s","--silent"],"Suppress reporting of errors and warnings"),
15704 U_ITEM(["-e <file>","--error-output=<file>"],"Redirect errors to file (stderr)"),
15706 U_ITEM(["--version"],"Print the version number and exit"),
15707 U_ITEM(["-?","--help"],"Print this text and exit"),
15708 U_ITEM(["--"],"Do not recognize remaining arguments as options")
15711 fun setNullDefaults () =
15713 val _
= O_SILENT
:= false
15714 val _
= O_ERROR_DEVICE
:= TextIO.stdErr
15718 fun setNullOptions (opts
,optError
) =
15720 fun onlyOne what
= "at most one "^what^
" may be specified"
15721 fun unknown pre opt
= String.concat
["unknown option ",pre
,opt
]
15722 fun hasNoArg pre key
= String.concat
["option ",pre
,key
," expects no argument"]
15723 fun mustHave pre key
= String.concat
["option ",pre
,key
," must have an argument"]
15725 fun check_noarg(key
,valOpt
) =
15726 if isSome valOpt
then optError (hasNoArg
"--" key
) else ()
15728 fun do_long (pars
as (v
,h
,e
,f
)) (key
,valOpt
) =
15730 of "help" => (v
,true,e
,f
) before check_noarg(key
,valOpt
)
15731 |
"version" => (true,h
,e
,f
) before check_noarg(key
,valOpt
)
15732 |
"silent" => pars
before O_SILENT
:= true before check_noarg(key
,valOpt
)
15733 |
"error-output" =>
15735 of NONE
=> pars
before optError (mustHave
"--" key
)
15736 | SOME s
=> (v
,h
,SOME s
,f
))
15737 | _
=> pars
before optError(unknown
"--" key
)
15739 fun do_short (pars
as (v
,h
,e
,f
)) (cs
,opts
) =
15741 of nil
=> doit pars opts
15742 |
[#
"e"] => (case opts
15743 of OPT_STRING s
::opts1
=> doit (v
,h
,SOME s
,f
) opts1
15744 | _
=> (optError (hasNoArg
"-" "e"); doit pars opts
))
15745 | cs
=> doit (foldr
15748 of #
"e" => pars
before optError (hasNoArg
"-" "e")
15749 | #
"s" => pars
before O_SILENT
:= true
15750 | #
"?" => (v
,true,e
,f
)
15752 optError (unknown
"-" (String.implode
[c
])))
15755 and doit pars nil
= pars
15756 |
doit (pars
as (v
,h
,e
,f
)) (opt
::opts
) =
15758 of OPT_LONG(key
,valOpt
) => doit (do_long
pars (key
,valOpt
)) opts
15759 | OPT_SHORT cs
=> do_short
pars (cs
,opts
)
15760 | OPT_STRING s
=> if isSome f
15761 then let val _
= optError(onlyOne
"input file")
15764 else doit (v
,h
,e
,SOME s
) opts
15765 | OPT_NOOPT
=> doit pars opts
15766 | OPT_NEG cs
=> let val _
= if null cs
then ()
15767 else app (fn c
=> optError
15768 (unknown
"-n" (String.implode
[c
]))) cs
15771 in doit (false,false,NONE
,NONE
) opts
15774 (* stop
of nullOptions
.sml
*)
15775 (* start
of nullHooks
.sml
*)
15776 structure NullHooks
=
15778 open Errors IgnoreHooks NullOptions
15780 type AppData
= OS
.Process
.status
15781 type AppFinal
= AppData
15782 val nullStart
= OS
.Process
.success
15784 fun printError(pos
,err
) = if !O_SILENT
then () else TextIO.output
15785 (!O_ERROR_DEVICE
,formatMessage (4,!O_ERROR_LINEWIDTH
)
15786 (Position2String pos
15787 ::(if isFatalError err
then "Fatal error:" else "Error:")
15788 ::errorMessage err
))
15789 fun printWarning(pos
,warn
) = if !O_SILENT
then () else TextIO.output
15790 (!O_ERROR_DEVICE
,formatMessage (4,!O_ERROR_LINEWIDTH
)
15791 (Position2String pos^
" Warning:"::warningMessage warn
))
15793 fun hookError (_
,pe
) = OS
.Process
.failure
before printError pe
15794 fun hookWarning (status
,pw
) = status
before printWarning pw
15796 (* stop
of nullHooks
.sml
*)
15797 (* start
of null
.sml
*)
15800 structure ParserOptions
= ParserOptions ()
15801 structure CatOptions
= CatOptions ()
15802 structure CatParams
=
15804 open CatError CatOptions NullOptions Uri UtilError
15806 fun catError(pos
,err
) = if !O_SILENT
then () else TextIO.output
15807 (!O_ERROR_DEVICE
,formatMessage (4,!O_ERROR_LINEWIDTH
)
15808 (Position2String pos^
" Error in catalog:"::catMessage err
))
15810 structure Resolve
= ResolveCatalog (structure Params
= CatParams
)
15811 structure ParseNull
= Parse (structure Dtd
= Dtd
15812 structure Hooks
= NullHooks
15813 structure Resolve
= Resolve
15814 structure ParserOptions
= ParserOptions
)
15816 fun parseNull uri
= ParseNull
.parseDocument uri NONE NullHooks
.nullStart
15819 CatOptions NullOptions Options ParserOptions Uri
15821 val usage
= List.concat
[parserUsage
,[U_SEP
],catalogUsage
,[U_SEP
],nullUsage
]
15823 exception Exit
of OS
.Process
.status
15825 fun null(prog
,args
) =
15828 val hadError
= ref
false
15831 let val _
= TextIO.output(TextIO.stdErr
,msg^
".\n")
15832 in hadError
:= true
15834 fun exitError msg
=
15835 let val _
= TextIO.output(TextIO.stdErr
,msg^
".\n")
15836 in raise Exit OS
.Process
.failure
15838 fun exitHelp prog
=
15839 let val _
= printUsage
TextIO.stdOut prog usage
15840 in raise Exit OS
.Process
.success
15842 fun exitVersion prog
=
15843 let val _
= app print
[prog
," version ",Version
.FXP_VERSION
,"\n"]
15844 in raise Exit OS
.Process
.success
15847 fun summOpt prog
= "For a summary of options type "^prog^
" --help"
15848 fun noFile(f
,cause
) = "can't open file '"^f^
"': "^exnMessage cause
15850 val opts
= parseOptions args
15851 val _
= setParserDefaults()
15852 val opts1
= setParserOptions (opts
,optError
)
15853 val _
= setCatalogDefaults()
15854 val opts2
= setCatalogOptions (opts1
,optError
)
15855 val _
= setNullDefaults()
15856 val (vers
,help
,err
,file
) = setNullOptions (opts2
,optError
)
15857 val _
= if !hadError
then exitError (summOpt prog
) else ()
15858 val _
= if vers
then exitVersion prog
else ()
15859 val _
= if help
then exitHelp prog
else ()
15861 of SOME
"-" => O_ERROR_DEVICE
:= TextIO.stdErr
15862 | SOME f
=> (O_ERROR_DEVICE
:= TextIO.openOut f
15863 handle IO
.Io
{cause
,...} => exitError(noFile(f
,cause
)))
15865 val f
= valOf file
handle Option
=> "-"
15866 val uri
= if f
="-" then NONE
else SOME(String2Uri f
)
15867 val status
= parseNull uri
15868 val _
= if isSome err
then TextIO.closeOut (!O_ERROR_DEVICE
) else ()
15871 handle Exit status
=> status
15873 let val _
= TextIO.output
15874 (TextIO.stdErr
,prog^
": Unexpected exception: "^exnMessage exn^
".\n")
15875 in OS
.Process
.failure
15879 (* stop
of null
.sml
*)
15880 (* start
of call
-null
.sml
*)
15881 val _
= Null
.null (CommandLine
.name (), CommandLine
.arguments ())
15882 (* stop
of call
-null
.sml
*)