(* MLton 20010629 (built Fri Jun 29 11:14:21 2001 on starlinux) *) (* created this file on Fri Jun 29 13:02:19 2001. *) (* Do not edit this file. *) (* Flag settings: *) (* aux: false *) (* chunk: chunk per function *) (* debug: false *) (* defines: [NODEBUG,MLton_safe=TRUE,MLton_detectOverflow=TRUE] *) (* detect overflow: true *) (* fixed heap: None *) (* indentation: 3 *) (* includes: [mlton.h] *) (* inline: NonRecursive {product = 320,small = 60} *) (* input file: fxp.cm *) (* instrument: false *) (* instrument Sxml: false *) (* keep Cps: false *) (* native: true *) (* native commented: 0 *) (* native copy prop: true *) (* future: 64 *) (* native ieee fp: false *) (* native live transfer: true *) (* native move hoist: true *) (* native optimize: 1 *) (* native split: Some (20000) *) (* polyvariance: Some ({rounds = 2,small = 30,product = 300}) *) (* print at fun entry: false *) (* profile: false *) (* safe: true *) (* show types: false *) (* static: false *) (* use basis library: true *) (* verbosity: Silent *) (* start of ../../Util/utilTime.sml *) (*--------------------------------------------------------------------------*) (* Structure: UtilTime *) (* *) (* Depends on: *) (* *) (* Exceptions raised by functions in this structure: *) (* time : none *) (* timeN : none *) (*--------------------------------------------------------------------------*) signature UtilTime = sig val time : ('a -> 'b) -> 'a -> 'b * {usr:Time.time, sys:Time.time, gc:Time.time} val timeN : int -> ('a -> 'b) -> 'a -> 'b * {usr:Time.time, sys:Time.time, gc:Time.time} end structure UtilTime : UtilTime = struct (*--------------------------------------------------------------------*) (* run f on x, and measure the runtime. return the result and time. *) (*--------------------------------------------------------------------*) fun time f x = let val timer = Timer.startCPUTimer () val y = f x val ptime = Timer.checkCPUTimer timer in (y,ptime) end (*--------------------------------------------------------------------*) (* run f n times on x, and measure the runtime. return the time. *) (*--------------------------------------------------------------------*) fun timeN n f x = let fun iter m = if m<=1 then f x else (ignore (f x); iter (m-1)) in time iter n end end (* stop of ../../Util/utilTime.sml *) (* start of ../../Util/utilString.sml *) (*--------------------------------------------------------------------------*) (* Structure: UtilString *) (*--------------------------------------------------------------------------*) signature UtilString = sig val quoteString : char -> string -> string val numberNth : int -> string val prependAnA : string -> string val nBlanks : int -> string val padxLeft : char -> string * int -> string val padxRight : char -> string * int -> string val breakLines : int -> string -> string list val toUpperFirst : string -> string val toUpperString : string -> string val Int2String : int -> string val Bool2xString : string * string -> bool -> string val Bool2String : bool -> string val Option2xString : string * (('a -> string) -> 'a -> string) -> ('a -> string) -> 'a option -> string val Option2String0 : ('a -> string) -> 'a option -> string val Option2String : ('a -> string) -> 'a option -> string val List2xString : string * string * string -> ('a -> string) -> 'a list -> string val List2String0 : ('a -> string) -> 'a list -> string val List2String : ('a -> string) -> 'a list -> string val Vector2xString : string * string * string -> ('a -> string) -> 'a vector -> string val Vector2String : ('a -> string) -> 'a vector -> string end structure UtilString : UtilString = struct fun quoteString q s = let val quote = String.implode [q] in quote^s^quote end (*--------------------------------------------------------------------*) (* generate a string with the ordinal number of n, by appending *) (* "st", "nd", "rd" or "th" to the number. *) (*--------------------------------------------------------------------*) fun numberNth n = let val suffix = case n mod 9 of 1 => "st" | 2 => "nd" | 3 => "rd" | _ => "th" in Int.toString n^suffix end (*--------------------------------------------------------------------*) (* is the single character c represented by a word starting with a *) (* vocal in the alphabet? (l~ell->true, k~kay->false) *) (*--------------------------------------------------------------------*) fun vocalLetter c = case Char.toLower c of #"a" => true | #"f" => true | #"h" => true | #"i" => true | #"l" => true | #"m" => true | #"n" => true | #"o" => true | #"r" => true | #"s" => true | #"x" => true | #"8" => true | _ => false (*--------------------------------------------------------------------*) (* is character c a vocal? *) (*--------------------------------------------------------------------*) fun isVocal c = case Char.toLower c of #"a" => true | #"e" => true | #"i" => true | #"o" => true | #"u" => true | _ => false (*--------------------------------------------------------------------*) (* does a word require "an" as undefinite article? true if: *) (* - it is a single letter that starts with a vocal in the alphabet *) (* - its first two letters are capitals, i.e. it is an abbreviation, *) (* and its first letter starts with a vocal in the alphabet *) (* - it has more than one letter, is not an abbreviation, and either *) (* + it starts with a, i or o *) (* + it starts with e and the second letter is not a u (europe) *) (* + it starts with a u and continues with a vocal (very unlikely, *) (* only in c.s., like uuencoded or uid *) (* + it starts with a u, continues with a consonant not followed by *) (* an i (like in unicode); that is something like un-... *) (* This ruleset is not complete since it does not cover, e.g., the *) (* word uninvented, but sufficient for most cases. *) (* (Is english pronounciation decidable at all?) *) (*--------------------------------------------------------------------*) fun extendsAtoAn word = case String.explode word of nil => false | [c] => vocalLetter c | c1::c2::cs => if not (Char.isLower c1 orelse Char.isLower c2) then vocalLetter c1 else case Char.toLower c1 of #"a" => true | #"i" => true | #"o" => true | #"e" => Char.toLower c2 <> #"u" | #"u" => if isVocal c2 then false else (case cs of nil => true | c3::_ => Char.toLower c3 <> #"i") | _ => false (*--------------------------------------------------------------------*) (* add an undefinite article to a word. *) (*--------------------------------------------------------------------*) fun prependAnA word = if extendsAtoAn word then "an "^word else "a "^word (*--------------------------------------------------------------------*) (* generate a list/string of n times character c. *) (*--------------------------------------------------------------------*) fun nCharsC c n = if n>0 then c::nCharsC c (n-1) else nil fun nChars c n = String.implode (nCharsC c n) val nBlanks = nChars #" " (*--------------------------------------------------------------------*) (* add a minimal number of characters c to the left/right of a string *) (* in order to make its length at least n. *) (*--------------------------------------------------------------------*) fun padxLeft c (s,n) = (nChars c (n-String.size s))^s fun padxRight c (s,n) = s^(nChars c (n-String.size s)) val padLeft = padxLeft #" " val padRight = padxRight #" " (*--------------------------------------------------------------------*) (* break a string into several lines of length width. *) (*--------------------------------------------------------------------*) fun breakLines width str = let val tokens = String.tokens (fn c => #" "=c) str fun makeLine(toks,lines) = if null toks then lines else (String.concat (rev toks))::lines fun doit w (toks,lines) nil = makeLine(toks,lines) | doit w (toks,lines) (one::rest) = let val l = String.size one val w1 = w+l in if w1=width then doit 0 (nil,one::makeLine(toks,lines)) rest else doit (l+1) ([" ",one],makeLine(toks,lines)) rest end in List.rev (doit 0 (nil,nil) tokens) end (*--------------------------------------------------------------------*) (* convert the first/all characters of a string to upper case *) (*--------------------------------------------------------------------*) fun toUpperFirst str = case String.explode str of nil => "" | c::cs => String.implode (Char.toUpper c::cs) fun toUpperString str = String.implode(map Char.toUpper (String.explode str)) (*--------------------------------------------------------------------*) (* return a string representation of an int, char or unit. *) (*--------------------------------------------------------------------*) val Int2String = Int.toString val Char2String = Char.toString fun Unit2String() = "()" (*--------------------------------------------------------------------*) (* return a string representation of a boolean. *) (*--------------------------------------------------------------------*) fun Bool2xString (t,f) b = if b then t else f val Bool2String = Bool2xString ("true","false") (*--------------------------------------------------------------------*) (* return a string representation of an option. *) (* the first arg is a string for the NONE case, the second a function *) (* that converts x to a string, given a function for doing so. *) (*--------------------------------------------------------------------*) fun Option2xString (none,Some2String) x2String opt = case opt of NONE => none | SOME x => Some2String x2String x fun Option2String0 x2String = Option2xString ("",fn f => fn x => f x) x2String fun Option2String x2String = Option2xString ("NONE",fn f => fn x => "SOME "^f x) x2String (*--------------------------------------------------------------------*) (* return a string representation of list; start with pre, separate *) (* with sep and finish with post; use X2String for each element. *) (*--------------------------------------------------------------------*) fun List2xString (pre,sep,post) X2String nil = pre^post | List2xString (pre,sep,post) X2String l = let fun doit nil _ = [post] | doit (x::r) str = str::X2String x::doit r sep in String.concat (doit l pre) end fun List2String X2String nil = "[]" | List2String X2String l = let fun doit nil _ = ["]"] | doit (x::r) str = str::X2String x::doit r "," in String.concat (doit l "[") end fun List2String0 X2String nil = "" | List2String0 X2String l = let fun doit nil _ = nil | doit (x::r) str = str::X2String x::doit r " " in String.concat (doit l "") end (* a compiler bug in smlnj 110 makes the following uncompilable: *) (* fun List2String X2String xs = List2xString ("[",",","]") X2String xs *) (* fun List2String0 X2String xs = List2xString (""," ","") X2String xs *) (*--------------------------------------------------------------------*) (* return a string representation of list; start with pre, separate *) (* with sep and finish with post; use X2String for each element. *) (*--------------------------------------------------------------------*) fun Vector2xString (pre,sep,post) X2String vec = if Vector.length vec=0 then pre^post else String.concat (pre::X2String(Vector.sub(vec,0)):: Vector.foldri (fn (_,x,yet) => sep::X2String x::yet) [post] (vec,1,NONE)) fun Vector2String X2String vec = Vector2xString ("#[",",","]") X2String vec end (* stop of ../../Util/utilString.sml *) (* start of ../../Util/utilCompare.sml *) signature UtilCompare = sig type 'a Comparer = 'a * 'a -> order val comparePair : 'a Comparer * 'b Comparer -> ('a * 'b) Comparer val compareTriple : 'a Comparer * 'b Comparer * 'c Comparer -> ('a * 'b * 'c) Comparer val compareOption : 'a Comparer -> 'a option Comparer val compareList : 'a Comparer -> 'a list Comparer val compareVector : 'a Comparer -> 'a vector Comparer val compareInt : int Comparer val compareIntPair : (int * int) Comparer val compareIntTriple : (int * int * int) Comparer val compareWord : word Comparer val compareWordPair : (word * word) Comparer val compareWordTriple : (word * word * word) Comparer end structure UtilCompare : UtilCompare = struct type 'a Comparer = 'a * 'a -> order fun comparePair (compareA,compareB) ((a1,b1),(a2,b2)) = case compareA(a1,a2) of EQUAL => compareB(b1,b2) | order => order fun compareTriple (compareA,compareB,compareC) ((a1,b1,c1),(a2,b2,c2)) = case compareA(a1,a2) of EQUAL => (case compareB(b1,b2) of EQUAL => compareC(c1,c2) | order => order) | order => order val compareInt = Int.compare fun compareIntPair((x1,y1),(x2,y2)) = case Int.compare(x1,x2) of EQUAL => Int.compare (y1,y2) | order => order fun compareIntTriple((x1,y1,z1),(x2,y2,z2)) = case Int.compare(x1,x2) of EQUAL => (case Int.compare (y1,y2) of EQUAL => Int.compare (z1,z2) | order => order) | order => order val compareWord = Word.compare fun compareWordPair((x1,y1),(x2,y2)) = case Word.compare(x1,x2) of EQUAL => Word.compare (y1,y2) | order => order fun compareWordTriple((x1,y1,z1),(x2,y2,z2)) = case Word.compare(x1,x2) of EQUAL => (case Word.compare (y1,y2) of EQUAL => Word.compare (z1,z2) | order => order) | order => order fun compareOption compareA opts = case opts of (NONE,NONE) => EQUAL | (NONE,SOME x) => LESS | (SOME x,NONE) => GREATER | (SOME x,SOME y) => compareA(x,y) fun compareList compA ll = let fun doit (nil,nil) = EQUAL | doit (nil,_) = LESS | doit (_,nil) = GREATER | doit (a1::as1,a2::as2) = case compA(a1,a2) of EQUAL => doit(as1,as2) | order => order in doit ll end fun compareVector compA (vec1,vec2) = let val (l,l2) = (Vector.length vec1,Vector.length vec2) in case Int.compare(l,l2) of EQUAL => let fun doit i = if i>=l then EQUAL else case compA(Vector.sub(vec1,i),Vector.sub(vec2,i)) of EQUAL => doit (i+1) | order => order in doit 0 end | order => order end end (* stop of ../../Util/utilCompare.sml *) (* start of ../../Util/utilHash.sml *) signature UtilHash = sig val hashPair : ('a -> word) * ('b -> word) -> 'a * 'b -> word val hashTriple : ('a -> word) * ('b -> word) * ('c -> word) -> 'a * 'b * 'c -> word val hashOption : ('a -> word) -> 'a option -> word val hashList : ('a -> word) -> 'a list -> word val hashVector : ('a -> word) -> 'a vector -> word val hashString : string -> word val hashInt : int -> word val hashIntPair : int * int -> word val hashIntTriple : int * int * int -> word val hashWord : word -> word val hashWordPair : word * word -> word val hashWordTriple : word * word * word -> word end structure UtilHash : UtilHash = struct fun hashPair (hashA,hashB) (a,b) = 0w1327 * hashA a + 0w3853 * hashB b fun hashTriple (hashA,hashB,hashC) (a,b,c) = 0w1327 * hashA a + 0w3853 * hashB b + 0w2851 * hashC c val hashInt = Word.fromInt fun hashIntPair (i,j) = 0w1327 * Word.fromInt i + 0w3853 * Word.fromInt j fun hashIntTriple (i,j,k) = 0w1327 * Word.fromInt i + 0w3853 * Word.fromInt j + 0w2851 * Word.fromInt k fun hashWord w = w fun hashWordPair (i,j) = 0w1327 * i + 0w3853 * j fun hashWordTriple (i,j,k) = 0w1327 * i + 0w3853 * j + 0w2851 * k val hashChar = Word.fromInt o ord fun hashString s = case String.size s of 0 => 0wx0 | 1 => 0w1 + hashChar(String.sub(s,0)) | 2 => let val w1 = String.sub(s,0) val w2 = String.sub(s,1) in 0w2 + hashChar w1 * 0wx1327 + hashChar w2 end | n => let val w1 = String.sub(s,0) val w2 = String.sub(s,1) val wn = String.sub(s,n-1) in 0w3 + hashChar w1 * 0wx3853 + hashChar w2 * 0wx1327 + hashChar wn end fun hashOption hashA opt = case opt of NONE => 0w0 | SOME a => 0w1 + hashA a fun hashList hashA l = case l of nil => 0wx0 | [a] => 0w1 + hashA a | a1::a2::_ => 0w2 + 0w3853 * hashA a1 + 0wx1327 * hashA a2 fun hashVector hashA cv = case Vector.length cv of 0 => 0wx0 | 1 => 0w1 + hashA(Vector.sub(cv,0)) | 2 => let val w1 = Vector.sub(cv,0) val w2 = Vector.sub(cv,1) in 0w2 + hashA w1 * 0wx1327 + hashA w2 end | n => let val w1 = Vector.sub(cv,0) val w2 = Vector.sub(cv,1) val wn = Vector.sub(cv,n-1) in 0w3 + hashA w1 * 0wx3853 + hashA w2 * 0wx1327 + hashA wn end end (* stop of ../../Util/utilHash.sml *) (* start of ../../Util/SymDict/key.sml *) (*--------------------------------------------------------------------------*) (* In order to be used as a dictinary/symbol table key, a type must have a *) (* null value, hash to words, must be comparable and printable. *) (*--------------------------------------------------------------------------*) signature Key = sig type Key val null : Key val hash : Key -> word val compare : Key * Key -> order val toString : Key -> string end (* stop of ../../Util/SymDict/key.sml *) (* start of ../../Util/utilInt.sml *) (*--------------------------------------------------------------------------*) (* Structure: UtilInt *) (* *) (* Depends on: *) (* *) (* Exceptions raised by functions in this structure: *) (* appInterval : none *) (* insertInt : none *) (* insertNewInt : none *) (* nextPowerTwo : none *) (*--------------------------------------------------------------------------*) signature UtilInt = sig val intervalList : (int * int) -> int list val appInterval : (int -> unit) -> (int * int) -> unit val insertInt : int * int list -> int list val insertNewInt : int * int list -> int list option val powerOfTwo : int -> int val nextPowerTwo : int -> int end structure UtilInt : UtilInt = struct (*--------------------------------------------------------------------*) (* generate the list [n,...,m] *) (*--------------------------------------------------------------------*) fun intervalList(n,m) = if n>m then nil else n::intervalList(n+1,m) (*--------------------------------------------------------------------*) (* apply f to each number in [n...m] *) (*--------------------------------------------------------------------*) fun appInterval f (n,m) = let fun doit i = if i>m then () else let val _ = f i in doit (i+1) end in doit n end (*--------------------------------------------------------------------*) (* insert an integer into a sorted list without duplicates. *) (*--------------------------------------------------------------------*) fun insertInt (x:int,l) = let fun go nil = [x] | go (l as y::ys) = case Int.compare (x,y) of LESS => x::l | EQUAL => l | GREATER => y::go ys in go l end (*--------------------------------------------------------------------*) (* insert an integer into a sorted list if it is not yet in it. *) (*--------------------------------------------------------------------*) fun insertNewInt (x:int,l) = let fun go nil = SOME [x] | go (l as y::ys) = case Int.compare (x,y) of LESS => SOME(x::l) | EQUAL => NONE | GREATER => case go ys of NONE => NONE | SOME xys => SOME(y::xys) in go l end (*--------------------------------------------------------------------*) (* compute the power to the base of two. *) (*--------------------------------------------------------------------*) fun powerOfTwo n = if n=0 then 1 else if n mod 2=0 then let val x=powerOfTwo (n div 2) in x*x end else let val x=powerOfTwo (n-1) in 2*x end (*--------------------------------------------------------------------*) (* find the smallest p with 2^p >= n. *) (*--------------------------------------------------------------------*) fun nextPowerTwo n = let fun doit (p,m) = if m>=n then p else if m*m<2*n then doit (2*p,m*m) else doit (1+p,2*m) in doit (1,2) end end (* stop of ../../Util/utilInt.sml *) (* start of ../../Util/utilError.sml *) signature UtilError = sig exception InternalError of string * string * string exception NoSuchFile of string * string val formatMessage : int * int -> string list -> string end structure UtilError : UtilError = struct open UtilString exception InternalError of string * string * string exception NoSuchFile of string * string fun formatMessage (indentWidth,lineWidth) strs = let val indent = nBlanks indentWidth val nl = "\n"^indent val blank = " " val dot = "." fun isSep c = #" "=c orelse #"\n"=c orelse #"\t"=c fun go (w,yet) nil = List.rev ("\n"::yet) | go (w,yet) (x::xs) = let val y = if null xs then x^dot else x val l = String.size y val w1 = w+l val (w2,yet2) = if w1<=lineWidth then (w1,y::yet) else (indentWidth+l,y::nl::yet) val (w3,yet3) = if null xs then (w2,yet2) else (if w2 'a Dict val makeDict : string * int * 'a -> 'a Dict val clearDict : 'a Dict * int option -> unit val hasIndex : 'a Dict * Key -> int option val getIndex : 'a Dict * Key -> int val getKey : 'a Dict * int -> Key val getByIndex : 'a Dict * int -> 'a val getByKey : 'a Dict * Key -> 'a val setByIndex : 'a Dict * int * 'a -> unit val setByKey : 'a Dict * Key * 'a -> unit val usedIndices : 'a Dict -> int val extractDict : 'a Dict -> (Key * 'a) array val printDict : ('a -> string) -> 'a Dict -> unit end functor Dict (structure Key : Key) : Dict = struct open UtilError UtilInt type Key = Key.Key exception NoSuchIndex (*--------------------------------------------------------------------*) (* a dictionary can have at most size MAX_WIDTH. This is because *) (* arrays may at most have Array.maxLen elements. We only use powers *) (* of two as sizes, so we are really only interested in the position *) (* of maxLen's highest bit. That would be the maximal width for hash *) (* tables, and thus we must decrease it by one for obtaining the max *) (* table width. *) (*--------------------------------------------------------------------*) fun highestBit w = if w=0w0 then 0 else 1+highestBit(Word.>>(w,0w1)) val MAX_WIDTH = highestBit (Word.fromInt Array.maxLen)-1 type Bucket = (Key * int) list val nullBucket = nil : Bucket (*--------------------------------------------------------------------*) (* buckets are unsorted - they are probably small, so comparing the *) (* keys might be overkill. *) (*--------------------------------------------------------------------*) fun addToBucket (ni as (key,_),bucket) = let fun doit nil = [ni] | doit (nis as (ni' as (key',_))::rest) = case Key.compare (key',key) of LESS => ni'::doit rest | EQUAL => ni::rest | GREATER => ni::nis in doit bucket end fun searchBucket (key,bucket) = let fun doit nil = NONE | doit ((key',i)::rest) = case Key.compare (key',key) of LESS => doit rest | EQUAL => SOME i | GREATER => NONE in doit bucket end (*--------------------------------------------------------------------*) (* a dictionary consists of *) (* - a string desc saying what is stored in this dictionary *) (* - an array tab holding for each index its key and value *) (* - a hash table, i.e. Bucket array, of double size than tab *) (* - a hashFun mapping Key to the range of the hash table *) (* - an integer width for computing table sizes *) (* - an integer size wich is the size of the value table *) (* - an integer count holding the next free index *) (* - a default value for the value table *) (*--------------------------------------------------------------------*) type 'a Dict = {desc : string, tab : (Key * 'a) array ref, hashTab : Bucket array ref, hashFun : (Key -> int) ref, width : int ref, (* bit width *) size : int ref, (* tab size=2^width, hash size is double *) count : int ref, (* number of entries *) def : 'a (* default for values *) } fun nullDict (desc,def) = {desc = desc, tab = ref (Array.array(1,(Key.null,def))), hashTab = ref (Array.array(2,nullBucket)), hashFun = ref (fn _ => 0), count = ref 0, size = ref 1, width = ref 0, def = def} (*--------------------------------------------------------------------*) (* how many entries are in the dictionary? *) (*--------------------------------------------------------------------*) fun usedIndices ({count,...}:'a Dict) = !count (*--------------------------------------------------------------------*) (* what is the table load, i.e. percentage of number of entries to *) (* hash table size = 100*count/(2*size) = 50*count/size. *) (*--------------------------------------------------------------------*) fun hashRatio({count,size,...}:'a Dict) = 50 * !count div !size handle Div => 100 (*--------------------------------------------------------------------*) (* this is the hash function. Key.hash hashes data to arbitrary *) (* words, that are mapped to the hash range by this function, where *) (* mask is the bitmask corresponding to the size of the hash table: *) (* 1. square the word produced by Key.hash *) (* 2. take the width bits from the middle of the square, these are *) (* the bit-places influenced by all input bit-places: *) (* - shift to the right by half of the destination width *) (* - mask out all bits to the left of destination *) (* this is a simple strategy but experiences good results. *) (*--------------------------------------------------------------------*) fun square (x:word) = Word.*(x,x) fun hashKey(half,mask) x = Word.toInt(Word.andb(mask,Word.>>(square(Key.hash x),half))) fun makeHashFun(size,width) = let val mask = 0w2*Word.fromInt size-0w1 val half = Word.fromInt((width+1) div 2) in hashKey(half,mask) end (*--------------------------------------------------------------------*) (* create a new dictionary for 2^w, but at least 2 and at most 2^m *) (* entries, where m is the value of MAX_WIDTH. *) (*--------------------------------------------------------------------*) fun makeDict (desc,w,def) = let val width= Int.min(Int.max(1,w),MAX_WIDTH) val size = Word.toInt(Word.<<(0w1,Word.fromInt(width-1))) in {desc = desc, tab = ref (Array.array(size,(Key.null,def))), hashTab = ref (Array.array(2*size,nullBucket)), hashFun = ref (makeHashFun(size,width)), width = ref width, size = ref size, count = ref 0, def = def} end (*--------------------------------------------------------------------*) (* clear a dictionary. If the 2nd arg is SOME w, use w for resizing. *) (*--------------------------------------------------------------------*) fun clearDict (dict:'a Dict,widthOpt) = case widthOpt of NONE => let val {tab=ref tab,hashTab=ref hashTab,size,count,def,...} = dict val _ = appInterval (fn i => Array.update(tab,i,(Key.null,def))) (0,!count-1) val _ = appInterval (fn i => Array.update(hashTab,i,nullBucket)) (0,!size*2-1) in count := 0 end | SOME w => let val {tab,hashTab,hashFun,width,size,count,def,...} = dict val newWidth = Int.min(Int.max(1,w),MAX_WIDTH) val newSize = Word.toInt(Word.<<(0w1,Word.fromInt(newWidth-1))) val _ = tab := (Array.array(newSize,(Key.null,def))) val _ = hashTab := (Array.array(2*newSize,nullBucket)) val _ = hashFun := (makeHashFun(newSize,newWidth)) val _ = width := newWidth val _ = size := newSize in count := 0 end (*--------------------------------------------------------------------*) (* grow a dictionary to the double size. raise InternalError if the *) (* dictionary already has maximal size. *) (*--------------------------------------------------------------------*) fun growDictionary ({desc,tab,hashTab,hashFun,width,size,count,def}:'a Dict) = let val oldTab = !tab val _ = if !width < MAX_WIDTH then width := !width+1 else raise InternalError ("Dict","growDictionary", String.concat ["growing the ",desc," dictionary ", "exceeded the system maximum size of ", Int.toString Array.maxLen," for arrays"]) val _ = size := !size*2 val _ = tab := Array.array(!size,(Key.null,def)) val _ = hashTab := Array.array(!size*2,nullBucket) val _ = hashFun := makeHashFun(!size,!width) fun addTo (i,kv as (key,_)) = let val idx = !hashFun key val _ = Array.update(!hashTab,idx,addToBucket((key,i),Array.sub(!hashTab,idx))) val _ = Array.update(!tab,i,kv) in () end in Array.appi addTo (oldTab,0,NONE) end (*--------------------------------------------------------------------*) (* lookup the key for an index of the dictionary. *) (*--------------------------------------------------------------------*) fun getKey({tab,count,...}:'a Dict,idx) = if !count>idx then #1(Array.sub(!tab,idx)) else raise NoSuchIndex (*--------------------------------------------------------------------*) (* map a Key to its index in the dictionary. if it is not in the *) (* dictionary yet, add a new entry with a new index. grow the table *) (* if there is no more free index in the dictionary. *) (*--------------------------------------------------------------------*) fun getIndex(dict as {tab,hashTab,hashFun,size,count,def,...}:'a Dict,key) = let val k = !hashFun key val bucket = Array.sub(!hashTab,k) in case searchBucket(key,bucket) of SOME idx => idx | NONE => let val idx = !count val (k',buck') = if !size>idx then (k,bucket) else let val _ = growDictionary dict val k' = !hashFun key val buck' = Array.sub(!hashTab,k') in (k',buck') end val _ = Array.update(!hashTab,k',addToBucket((key,idx),buck')) val _ = Array.update(!tab,idx,(key,def)) val _ = count := idx+1 in idx end end (*--------------------------------------------------------------------*) (* does a Key have an entry in a dictionary? *) (*--------------------------------------------------------------------*) fun hasIndex({hashTab,hashFun,...}:'a Dict,key) = let val idx = !hashFun key val bucket = Array.sub(!hashTab,idx) in searchBucket(key,bucket) end (*--------------------------------------------------------------------*) (* get the value stored for index idx *) (*--------------------------------------------------------------------*) fun getByIndex({tab,count,...}:'a Dict,idx) = if !count>idx then #2(Array.sub(!tab,idx)) else raise NoSuchIndex (*--------------------------------------------------------------------*) (* get the value stored for a key *) (*--------------------------------------------------------------------*) fun getByKey(dict,key) = getByIndex(dict,getIndex(dict,key)) (*--------------------------------------------------------------------*) (* enter a value for index idx. *) (*--------------------------------------------------------------------*) fun setByIndex({tab,count,...}:'a Dict,idx,a) = if !count>idx then let val (key,_) = Array.sub(!tab,idx) in Array.update(!tab,idx,(key,a)) end else raise NoSuchIndex (*--------------------------------------------------------------------*) (* enter a value for a key. *) (*--------------------------------------------------------------------*) fun setByKey(dict,key,v) = setByIndex(dict,getIndex(dict,key),v) (*--------------------------------------------------------------------*) (* extract the contents of the dictionary to an array. *) (*--------------------------------------------------------------------*) fun extractDict({count,tab,...}:'a Dict) = Array.tabulate(!count,fn i => Array.sub(!tab,i)) (*--------------------------------------------------------------------*) (* print the contents of the dictionary. *) (*--------------------------------------------------------------------*) fun printDict X2String ({desc,tab,count,...}:'a Dict) = (print (desc^" dictionary:\n"); Array.appi (fn (n,(key,value)) => print (" "^Int.toString n^": "^Key.toString key^" = "^X2String value^"\n")) (!tab,0,SOME (!count))) end (* stop of ../../Util/SymDict/dict.sml *) (* start of ../../Util/SymDict/symbolTable.sml *) (*--------------------------------------------------------------------------*) (* Functor: SymbolTable *) (* *) (* Exceptions raised by functions in this structure: *) (* getSymIndex : Key.InternalError *) (* getSymKey : NoSuchSymbol *) (* hasSymIndex : none *) (* makeSymTable : none *) (* nullSymTable : none *) (* printSymTable : none *) (* usedSymbols : none *) (*--------------------------------------------------------------------------*) (* A symbol table maps Keys to consecutive integers. *) (*--------------------------------------------------------------------------*) signature SymTable = sig type Key type SymTable exception NoSuchSymbol val nullSymTable : string -> SymTable val makeSymTable : string * int -> SymTable val clearSymTable : SymTable * int option -> unit val hasSymIndex : SymTable * Key -> int option val getSymIndex : SymTable * Key -> int val getSymKey : SymTable * int -> Key val usedSymbols : SymTable -> int val assignSymIndex : SymTable * Key * int -> unit val reserveSymIndex : SymTable -> int val extractSymTable : SymTable -> Key vector val printSymTable : SymTable -> unit end functor SymTable (structure Key : Key) : SymTable = struct open UtilError UtilInt exception NoSuchSymbol type Key = Key.Key (*--------------------------------------------------------------------*) (* a symbol table can have at most size MAX_WIDTH. This is because *) (* arrays may at most have Array.maxLen elements. We only use powers *) (* of two as sizes, so we are really only interested in the position *) (* of maxLen's highest bit. That would be the maximal width for hash *) (* tables, and thus we must decrease it by one for obtaining the max *) (* table width. *) (*--------------------------------------------------------------------*) fun highestBit w = if w=0w0 then 0 else 1+highestBit(Word.>>(w,0w1)) val MAX_WIDTH = highestBit (Word.fromInt Array.maxLen)-1 type Bucket = (Key * int) list val nullBucket = nil : Bucket (*--------------------------------------------------------------------*) (* buckets are sorted - though they are probably small. *) (*--------------------------------------------------------------------*) fun addToBucket (ni as (key,_),bucket) = let fun doit nil = [ni] | doit (nis as (ni' as (key',_))::rest) = case Key.compare (key',key) of LESS => ni'::doit rest | EQUAL => ni::rest | GREATER => ni::nis in doit bucket end fun searchBucket (key,bucket) = let fun doit nil = NONE | doit ((key',i)::rest) = case Key.compare (key',key) of LESS => doit rest | EQUAL => SOME i | GREATER => NONE in doit bucket end (*--------------------------------------------------------------------*) (* a symbol table consists of *) (* - an array tab holding for each index its key *) (* - a hash table, i.e. Bucket array, of double size than tab *) (* - a hashFun mapping Key to the range of the hash table *) (* - an integer width for computing table sizes *) (* - an integer size wich is the size of the value table *) (* - an integer count holding the next free index *) (*--------------------------------------------------------------------*) type SymTable = {desc : string, tab : Key array ref, hash : Bucket array ref, hashFun : (Key -> int) ref, width : int ref, (* bit width *) size : int ref, (* tab size=2^width, hash size is double *) count : int ref (* number of entries *) } fun nullSymTable desc = {desc = desc, tab = ref (Array.array(1,Key.null)), hash = ref (Array.array(2,nullBucket)), hashFun = ref (fn _ => 0), count = ref 0, size = ref 1, width = ref 0} : SymTable (*--------------------------------------------------------------------*) (* how many entries are in the symtable? *) (*--------------------------------------------------------------------*) fun usedSymbols ({count,...}:SymTable) = !count (*--------------------------------------------------------------------*) (* what is the table load, i.e. percentage of number of entries to *) (* hash table size = 100*count/(2*size) = 50*count/size. *) (*--------------------------------------------------------------------*) fun hashRatio({count,size,...}:SymTable) = 50 * !count div !size handle Div => 100 (*--------------------------------------------------------------------*) (* this is the hash function. Key.hash hashes data to arbitrary *) (* words, that are mapped to the hash range by this function, where *) (* mask is the bitmask corresponding to the size of the hash table: *) (* 1. square the word produced by Key.hash *) (* 2. take the width bits from the middle of the square, these are *) (* the bit-places influenced by all input bit-places: *) (* - shift to the right by half of the destination width *) (* - mask out all bits to the left of destination *) (* this is a simple strategy but experiences good results. *) (*--------------------------------------------------------------------*) fun square (x:word) = Word.*(x,x) fun hashKey(half,mask) x = Word.toInt(Word.andb(mask,Word.>>(square(Key.hash x),half))) fun makeHashFun(size,width) = let val mask = Word.fromInt(2*size-1) val half = Word.fromInt((width+1) div 2) in hashKey(half,mask) end (*--------------------------------------------------------------------*) (* create a new symtable for 2^w, but at least 2 and at most 2^m *) (* entries, where m is the value of MAX_WIDTH. *) (*--------------------------------------------------------------------*) fun makeSymTable (desc,w) = let val width= Int.min(Int.max(1,w),MAX_WIDTH) val size = Word.toInt(Word.<<(0w1,Word.fromInt(width-1))) in {desc = desc, tab = ref (Array.array(size,Key.null)), hash = ref (Array.array(2*size,nullBucket)), hashFun = ref (makeHashFun(size,width)), width = ref width, size = ref size, count = ref 0} end (*--------------------------------------------------------------------*) (* clear a dictionary. If the 2nd arg is SOME w, use w for resizing. *) (*--------------------------------------------------------------------*) fun clearSymTable (symTab:SymTable,widthOpt) = case widthOpt of NONE => let val {tab=ref tab,hash=ref hash,size,count,...} = symTab val _ = appInterval (fn i => Array.update(tab,i,Key.null)) (0,!count-1) val _ = appInterval (fn i => Array.update(hash,i,nullBucket)) (0,!size*2-1) in count := 0 end | SOME w => let val {tab,hash,hashFun,width,size,count,...} = symTab val newWidth = Int.min(Int.max(1,w),MAX_WIDTH) val newSize = Word.toInt(Word.<<(0w1,Word.fromInt(newWidth-1))) val _ = tab := (Array.array(newSize,Key.null)) val _ = hash := (Array.array(2*newSize,nullBucket)) val _ = hashFun := (makeHashFun(newSize,newWidth)) val _ = width := newWidth val _ = size := newSize in count := 0 end (*--------------------------------------------------------------------*) (* grow a symtable to the double size. raise InternalError if the *) (* table already has maximal size. *) (*--------------------------------------------------------------------*) fun growTable ({desc,tab,hash,hashFun,width,size,count}:SymTable) = let val newWidth = if !width < MAX_WIDTH then !width+1 else raise InternalError ("SymTable","growTable", String.concat ["growing the ",desc," symbol table ", "exceeded the system maximum size of ", Int.toString Array.maxLen," for arrays"]) val newSize = !size*2 val oldTab = !tab val newTab = Array.array(newSize,Key.null) val newHash = Array.array(2*newSize,nullBucket) val newHashFun = makeHashFun(newSize,newWidth) fun addToNew (inv as (i,key)) = let val idx = newHashFun key val _ = Array.update(newHash,idx,addToBucket((key,i),Array.sub(newHash,idx))) val _ = Array.update(newTab,i,key) in () end val _ = Array.appi addToNew (!tab,0,NONE) val _ = tab := newTab val _ = hash := newHash val _ = size := newSize val _ = width := newWidth val _ = hashFun := newHashFun in () end (*--------------------------------------------------------------------*) (* lookup the key for an index of the symbol table. *) (*--------------------------------------------------------------------*) fun getSymKey({tab,count,...}:SymTable,idx) = if !count>idx then Array.sub(!tab,idx) else raise NoSuchSymbol (*--------------------------------------------------------------------*) (* map a Key to its index in the symbol table. if it is not in the *) (* symbol table yet, add a new entry with a new index. grow the table *) (* if there is no more free index in the table. *) (*--------------------------------------------------------------------*) fun getSymIndex(st as {tab,hash,hashFun,size,count,...}:SymTable,key) = let val idx = !hashFun key val bucket = Array.sub(!hash,idx) in case searchBucket(key,bucket) of SOME i => i | NONE => let val i = !count val (idx',buck') = if !size>i then (idx,bucket) else let val _ = growTable st val idx' = !hashFun key val buck' = Array.sub(!hash,idx') in (idx',buck') end val _ = Array.update(!hash,idx',addToBucket((key,i),buck')) val _ = Array.update(!tab,i,key) val _ = count := i+1 in i end end (*--------------------------------------------------------------------*) (* does a Key have an entry in a symbol table? *) (*--------------------------------------------------------------------*) fun hasSymIndex({hash,hashFun,...}:SymTable,key) = let val idx = !hashFun key val buck = Array.sub(!hash,idx) in searchBucket(key,buck) end (*--------------------------------------------------------------------*) (* reserve an index for a (yet unknown) key. *) (*--------------------------------------------------------------------*) fun reserveSymIndex(st as {size,count=count as ref i,...}:SymTable) = let val _ = if !size>i then () else growTable st val _ = count := i+1 in i end (*--------------------------------------------------------------------*) (* assign an index to a (previously reserved) index. *) (*--------------------------------------------------------------------*) fun assignSymIndex(st as {count,hash,hashFun,tab,...}:SymTable,key,i) = if !count<=i then raise NoSuchSymbol else let val idx = !hashFun key val buck = Array.sub(!hash,idx) val newBuck = addToBucket((key,i),buck) val _ = Array.update(!hash,idx,newBuck) val _ = Array.update(!tab,i,key) in () end (*--------------------------------------------------------------------*) (* extract the contents of a symbol table to a vector. *) (*--------------------------------------------------------------------*) fun extractSymTable({count,tab,...}:SymTable) = Array.extract(!tab,0,SOME(!count)) (*--------------------------------------------------------------------*) (* print the contents of the symbol table. *) (*--------------------------------------------------------------------*) fun printSymTable ({desc,tab,count,...}:SymTable) = (print (desc^" table:\n"); Array.appi (fn (n,key) => print (" "^Int.toString n^": "^Key.toString key^"\n")) (!tab,0,SOME (!count))) end (* stop of ../../Util/SymDict/symbolTable.sml *) (* start of ../../Util/SymDict/intListDict.sml *) structure KeyIntList : Key = struct type Key = int list val null = nil val hash = UtilHash.hashList Word.fromInt val compare = UtilCompare.compareList Int.compare val toString = UtilString.List2String Int.toString end structure IntListDict = Dict (structure Key = KeyIntList) structure IntListSymTab = SymTable (structure Key = KeyIntList) (* stop of ../../Util/SymDict/intListDict.sml *) (* start of ../../Util/SymDict/intDict.sml *) structure KeyInt : Key = struct type Key = int val null = 0 val hash = Word.fromInt val compare = Int.compare val toString = Int.toString end structure IntDict = Dict (structure Key = KeyInt) structure IntSymTab = SymTable (structure Key = KeyInt) (* stop of ../../Util/SymDict/intDict.sml *) (* start of ../../Unicode/Chars/uniChar.sml *) (*--------------------------------------------------------------------------*) (* Structure: UniChar *) (* *) (* Depends on: *) (* UtilString *) (* *) (* Exceptions raised by functions in this structure: *) (*--------------------------------------------------------------------------*) signature UniChar = sig structure Chars : WORD type Char = Chars.word type Data = Char list type Vector = Char vector val nullData : Data val nullVector : Vector val hashChar : Char -> word val hashData : Data -> word val hashVector : Vector -> word val compareChar : Char * Char -> order val compareData : Data * Data -> order val compareVector : Vector * Vector -> order val char2Char : char -> Char val Char2char : Char -> char val Char2Uni : Char -> string val Char2String : Char -> string val String2Data : string -> Data val Data2String : Data -> string val Latin2String : Data -> string val Data2Vector : Data -> Vector val Vector2Data : Vector -> Data val String2Vector : string -> Vector val Vector2String : Vector -> string val quoteUni : Char -> string -> string val quoteChar : Char -> Char -> string val quoteData : Char -> Data -> string val quoteVector : Char -> Vector -> string end structure UniChar : UniChar = struct val O_VECTOR_PRINTLEN = 48 structure Chars = Word val _ = if Chars.wordSize > 21 then () else let val str = ("UniChar: Chars.wordSize is too small.\n"^ "Cannot compile on this system!\n" ) val _ = print str in raise Fail str end type Char = Chars.word type Data = Char list type CharInterval = Char * Char type CharRange = CharInterval list type Vector = Char vector val nullChar = 0wx0:Char val nullData = nil:Data val nullVector = Vector.fromList nullData val hashChar = Word.fromLargeWord o Chars.toLargeWord val hashData = UtilHash.hashList hashChar val hashVector = UtilHash.hashVector hashChar val compareChar = Chars.compare val compareData = UtilCompare.compareList compareChar val compareVector = UtilCompare.compareVector compareChar val char2Char = Chars.fromLargeWord o Word8.toLargeWord o Byte.charToByte val Char2char = Byte.byteToChar o Word8.fromLargeWord o Chars.toLargeWord fun Char2Uni c = "U+"^UtilString.toUpperString(StringCvt.padLeft #"0" 4 (Chars.toString c)) fun Char2String c = case c of 0wx9 => "\\t" | 0wxA => "\\n" | _ => if c<0wx100 then String.implode [Char2char c] else Char2Uni c fun String2Data s = map char2Char (String.explode s) fun Data2String cs = String.concat (map Char2String cs) fun Latin2String cs = String.implode (map Char2char cs) val Data2Vector = Vector.fromList fun String2Vector s = Vector.tabulate(String.size s,fn i => char2Char(String.sub(s,i))) fun Vector2Data vec = Vector.foldr (op ::) nil vec fun Vector2String vec = let val maxlen = O_VECTOR_PRINTLEN val len = Vector.length vec in if len<=maxlen orelse maxlen=0 then Data2String (Vector2Data vec) else let val cs1 = Vector.foldri (fn (_,c,cs) => c::cs) nil (vec,0,SOME (maxlen div 2)) val cs2 = Vector.foldri (fn (_,c,cs) => c::cs) nil (vec,len-3-maxlen div 2,NONE) in Data2String cs1^"..."^Data2String cs2 end end fun quoteUni q s = let val sQ = Char2String q in sQ^s^sQ end fun quoteChar q c = if c=0wx0 then "entity end" else quoteUni q (Char2String c) fun quoteData q cs = quoteUni q (Data2String cs) fun quoteVector q v = quoteUni q (Vector2String v) end (* stop of ../../Unicode/Chars/uniChar.sml *) (* start of ../../Unicode/Chars/charVecDict.sml *) structure KeyVector : Key = struct type Key = UniChar.Vector val null = UniChar.nullVector val compare = UniChar.compareVector val toString = UniChar.Vector2String val hash = UniChar.hashVector end structure VectorDict = Dict (structure Key = KeyVector) (* stop of ../../Unicode/Chars/charVecDict.sml *) (* start of ../../Util/SymDict/stringDict.sml *) structure KeyString : Key = struct type Key = string val null = "" val hash = UtilHash.hashString val compare = String.compare fun toString str = str end structure StringDict = Dict (structure Key = KeyString) (* stop of ../../Util/SymDict/stringDict.sml *) (* start of ../../Unicode/encoding.sml *) signature Encoding = sig datatype Encoding = NOENC | ASCII | EBCDIC | LATIN1 | UCS4B | UCS4L | UCS4SB | UCS4SL | UCS2B | UCS2L | UTF16B | UTF16L | UTF8 val UCS2 : Encoding val UCS4 : Encoding val UTF16 : Encoding val encodingName : Encoding -> string val isEncoding : string -> Encoding val switchEncoding : Encoding * Encoding -> Encoding end structure Encoding : Encoding = struct open StringDict datatype Encoding = NOENC | ASCII | EBCDIC | LATIN1 | UCS4B | UCS4L | UCS4SB | UCS4SL | UCS2B | UCS2L | UTF16B | UTF16L | UTF8 val UCS2 = UCS2B val UCS4 = UCS4B val UTF16 = UTF16B fun encodingName enc = case enc of NOENC => "NONE" | ASCII => "ASCII" | EBCDIC => "EBCDIC" | LATIN1 => "ISO-8859-1" | UCS2B => "UCS-2" | UCS2L => "UCS-2" | UCS4B => "UCS-4" | UCS4L => "UCS-4" | UCS4SB => "UCS-4" | UCS4SL => "UCS-4" | UTF8 => "UTF-8" | UTF16B => "UTF-16" | UTF16L => "UTF-16" val encDict = makeDict("encoding",6,NOENC) val encAliases = [(ASCII,["ANSI_X3.4-1968","ANSI_X3.4-1986","ASCII","US-ASCII","US", "ISO646-US","ISO-IR-6","ISO_646.IRV:1991","IBM367","CP367"]), (EBCDIC,["EBCDIC"]), (LATIN1,["ISO_8859-1:1987","ISO-8859-1","ISO_8859-1", "ISO-IR-100","CP819","IBM819","L1","LATIN1"]), (UCS2,["UCS-2","ISO-10646-UCS-2"]), (UCS4,["UCS-4","ISO-10646-UCS-4"]), (UTF16,["UTF-16"]), (UTF8,["UTF-8"]) ] val _ = app (fn (x,ys) => app (fn y => setByKey(encDict,y,x)) ys) encAliases fun isEncoding name = getByKey(encDict,name) fun compatAscii new = case new of ASCII => new | LATIN1 => new | UTF8 => new | _ => NOENC fun compatUcs4 (old,new) = if new=UCS4 then old else NOENC fun switchEncoding(old,new) = case old of NOENC => NOENC | ASCII => compatAscii new | EBCDIC => if new=EBCDIC then new else NOENC | LATIN1 => compatAscii new | UCS4B => compatUcs4(old,new) | UCS4L => compatUcs4(old,new) | UCS4SB => compatUcs4(old,new) | UCS4SL => compatUcs4(old,new) | UTF16B => if new=UTF16 then old else if new=UCS2 then UCS2B else NOENC | UTF16L => if new=UTF16 then old else if new=UCS2 then UCS2L else NOENC | UCS2B => if new=UCS2 then old else if new=UTF16 then UTF16B else NOENC | UCS2L => if new=UCS2 then old else if new=UTF16 then UTF16L else NOENC | UTF8 => compatAscii new end (* stop of ../../Unicode/encoding.sml *) (* start of ../../Unicode/Encode/encodeBasic.sml *) (*--------------------------------------------------------------------------*) (* Structure: EncodeBasic *) (* *) (* Exceptions raised by functions in this structure: *) (* closeFile : none *) (* fileName : none *) (* openFile : NoSuchFile *) (* writeByte : Io *) (*--------------------------------------------------------------------------*) signature EncodeBasic = sig type File val stdOutFile : File val closeFile : File -> unit val fileName : File -> string val openFile : string -> File val writeByte : File * Word8.word -> File end structure EncodeBasic : EncodeBasic = struct open UtilError type outstream = TextIO.outstream val closeOut = TextIO.closeOut val openOut = TextIO.openOut val output1 = TextIO.output1 val stdOut = TextIO.stdOut type File = string * outstream val stdOutFile = ("-",stdOut) fun closeFile(fname,s) = if fname="-" then () else closeOut s fun fileName(fname,_) = if fname="-" then "" else fname fun openFile fname = if fname = "-" then (fname,stdOut) else (fname,openOut fname) handle IO.Io {name,cause,...} => raise NoSuchFile(name,exnMessage cause) fun writeByte (f as (_,s),b) = f before output1(s,chr(Word8.toInt b)) end (* stop of ../../Unicode/Encode/encodeBasic.sml *) (* start of ../../Unicode/Encode/encodeError.sml *) signature EncodeError = sig datatype EncodeError = ERR_ILLEGAL_CHAR of UniChar.Char * string val encodeMessage : EncodeError -> string list exception EncodeError of EncodeBasic.File * EncodeError end structure EncodeError : EncodeError = struct open UtilString UniChar datatype EncodeError = ERR_ILLEGAL_CHAR of UniChar.Char * string fun encodeMessage err = case err of ERR_ILLEGAL_CHAR(c,what) => [Char2Uni c,"is not",prependAnA what,"character"] exception EncodeError of EncodeBasic.File * EncodeError end (* stop of ../../Unicode/Encode/encodeError.sml *) (* start of ../../Unicode/Encode/encodeMisc.sml *) (* require "basis.__word"; require "basis.__word8"; require "basis.__word8_vector"; require "chars"; require "encodeBasic"; require "encodeError"; *) signature EncodeMisc = sig val writeCharAscii : UniChar.Char * EncodeBasic.File -> EncodeBasic.File val writeCharEbcdic : UniChar.Char * EncodeBasic.File -> EncodeBasic.File val writeCharLatin1 : UniChar.Char * EncodeBasic.File -> EncodeBasic.File val writeCharUcs4B : UniChar.Char * EncodeBasic.File -> EncodeBasic.File val writeCharUcs4L : UniChar.Char * EncodeBasic.File -> EncodeBasic.File val writeCharUcs4SB : UniChar.Char * EncodeBasic.File -> EncodeBasic.File val writeCharUcs4SL : UniChar.Char * EncodeBasic.File -> EncodeBasic.File val writeCharUtf8 : UniChar.Char * EncodeBasic.File -> EncodeBasic.File val writeCharUtf16B : UniChar.Char * EncodeBasic.File -> EncodeBasic.File val writeCharUtf16L : UniChar.Char * EncodeBasic.File -> EncodeBasic.File val writeCharUcs2B : UniChar.Char * EncodeBasic.File -> EncodeBasic.File val writeCharUcs2L : UniChar.Char * EncodeBasic.File -> EncodeBasic.File val validCharAscii : UniChar.Char -> bool val validCharEbcdic : UniChar.Char -> bool val validCharLatin1 : UniChar.Char -> bool end structure EncodeMisc : EncodeMisc = struct open UniChar EncodeBasic EncodeError infix 8 >> infix 7 && infix 6 || val op && = Chars.andb val op >> = Chars.>> val op || = Word8.orb fun splitSurrogates (c : Char) = (((c-0wx10000) >> 0w10)+0wxD800,c && 0wx3FF + 0wxDC00) fun Char2Byte c = Word8.fromLargeWord(Chars.toLargeWord c) (*---------------------------------------------------------------------*) (* Ascii *) (*---------------------------------------------------------------------*) fun validCharAscii (c : Char) = c<0wx80 fun writeCharAscii(c,f) = if c<0wx80 then writeByte(f,Char2Byte c) else raise EncodeError(f,ERR_ILLEGAL_CHAR(c,"ASCII")) (*---------------------------------------------------------------------*) (* Ebcdic *) (*---------------------------------------------------------------------*) val latin2ebcdicTab = Word8Vector.fromList [0wx00,0wx01,0wx02,0wx03,0wx37,0wx2D,0wx2E,0wx2F, 0wx16,0wx05,0wx25,0wx0B,0wx0C,0wx0D,0wx0E,0wx0F, 0wx10,0wx11,0wx12,0wx13,0wx3C,0wx3D,0wx32,0wx26, 0wx18,0wx19,0wx3F,0wx27,0wx1C,0wx1D,0wx1E,0wx1F, 0wx40,0wx4F,0wx7F,0wx7B,0wx5B,0wx6C,0wx50,0wx7D, 0wx4D,0wx5D,0wx5C,0wx4E,0wx6B,0wx60,0wx4B,0wx61, 0wxF0,0wxF1,0wxF2,0wxF3,0wxF4,0wxF5,0wxF6,0wxF7, 0wxF8,0wxF9,0wx7A,0wx5E,0wx4C,0wx7E,0wx6E,0wx6F, 0wx7C,0wxC1,0wxC2,0wxC3,0wxC4,0wxC5,0wxC6,0wxC7, 0wxC8,0wxC9,0wxD1,0wxD2,0wxD3,0wxD4,0wxD5,0wxD6, 0wxD7,0wxD8,0wxD9,0wxE2,0wxE3,0wxE4,0wxE5,0wxE6, 0wxE7,0wxE8,0wxE9,0wx4A,0wxE0,0wx5A,0wx5F,0wx6D, 0wx79,0wx81,0wx82,0wx83,0wx84,0wx85,0wx86,0wx87, 0wx88,0wx89,0wx91,0wx92,0wx93,0wx94,0wx95,0wx96, 0wx97,0wx98,0wx99,0wxA2,0wxA3,0wxA4,0wxA5,0wxA6, 0wxA7,0wxA8,0wxA9,0wxC0,0wx6A,0wxD0,0wxA1,0wx07, 0wx20,0wx21,0wx22,0wx23,0wx24,0wx15,0wx06,0wx17, 0wx28,0wx29,0wx2A,0wx2B,0wx2C,0wx09,0wx0A,0wx1B, 0wx30,0wx31,0wx1A,0wx33,0wx34,0wx35,0wx36,0wx08, 0wx38,0wx39,0wx3A,0wx3B,0wx04,0wx14,0wx3E,0wxE1, 0wx41,0wx42,0wx43,0wx44,0wx45,0wx46,0wx47,0wx48, 0wx49,0wx51,0wx52,0wx53,0wx54,0wx55,0wx56,0wx57, 0wx58,0wx59,0wx62,0wx63,0wx64,0wx65,0wx66,0wx67, 0wx68,0wx69,0wx70,0wx71,0wx72,0wx73,0wx74,0wx75, 0wx76,0wx77,0wx78,0wx80,0wx8A,0wx8B,0wx8C,0wx8D, 0wx8E,0wx8F,0wx90,0wx9A,0wx9B,0wx9C,0wx9D,0wx9E, 0wx9F,0wxA0,0wxAA,0wxAB,0wxAC,0wxAD,0wxAE,0wxAF, 0wxB0,0wxB1,0wxB2,0wxB3,0wxB4,0wxB5,0wxB6,0wxB7, 0wxB8,0wxB9,0wxBA,0wxBB,0wxBC,0wxBD,0wxBE,0wxBF, 0wxCA,0wxCB,0wxCC,0wxCD,0wxCE,0wxCF,0wxDA,0wxDB, 0wxDC,0wxDD,0wxDE,0wxDF,0wxEA,0wxEB,0wxEC,0wxED, 0wxEE,0wxEF,0wxFA,0wxFB,0wxFC,0wxFD,0wxFE,0wxFF ] fun validCharEbcdic (c : Char) = c<0wx100 fun writeCharEbcdic(c,f) = if c<0wx100 then writeByte(f,Word8Vector.sub(latin2ebcdicTab,Chars.toInt c)) else raise EncodeError(f,ERR_ILLEGAL_CHAR(c,"EBCDIC")) (*---------------------------------------------------------------------*) (* Latin1 *) (*---------------------------------------------------------------------*) fun validCharLatin1 (c : Char) = c<0wx100 fun writeCharLatin1(c,f) = if c<0wx100 then writeByte(f,Char2Byte c) else raise EncodeError(f,ERR_ILLEGAL_CHAR(c,"LATIN-1")) (*---------------------------------------------------------------------*) (* UCS-4 *) (*---------------------------------------------------------------------*) fun ucs4Bytes c = (Char2Byte(c >> 0w24), Char2Byte(c >> 0w16), Char2Byte(c >> 0w8), Char2Byte c) fun writeCharUcs4 perm = fn (c,f) => let val bytes = ucs4Bytes c val (b1,b2,b3,b4) = perm bytes val f1 = writeByte(f,b1) val f2 = writeByte(f1,b2) val f3 = writeByte(f2,b3) val f4 = writeByte(f3,b4) in f4 end fun permUcs4B x = x fun permUcs4L (b1,b2,b3,b4) = (b4,b3,b2,b1) fun permUcs4SB (b1,b2,b3,b4) = (b2,b1,b4,b3) fun permUcs4SL (b1,b2,b3,b4) = (b3,b4,b1,b2) val writeCharUcs4B = writeCharUcs4 permUcs4B val writeCharUcs4L = writeCharUcs4 permUcs4L val writeCharUcs4SB = writeCharUcs4 permUcs4SB val writeCharUcs4SL = writeCharUcs4 permUcs4SL (*---------------------------------------------------------------------*) (* UTF-8 *) (*---------------------------------------------------------------------*) fun writeCharUtf8(c,f) = if c<0wx80 then writeByte(f,Char2Byte c) else if c<0wx800 then let val f1 = writeByte(f,0wxC0 || Char2Byte(c >> 0w6)) val f2 = writeByte(f1,0wx80 || Char2Byte(c && 0wx3F)) in f2 end else if c<0wx10000 then let val f1 = writeByte(f, 0wxE0 || Char2Byte(c >> 0w12)) val f2 = writeByte(f1,0wx80 || Char2Byte((c >> 0w6) && 0wx3F)) val f3 = writeByte(f2,0wx80 || Char2Byte(c && 0wx3F)) in f3 end else if c<0wx200000 then let val f1 = writeByte(f, 0wxF0 || Char2Byte(c >> 0w18)) val f2 = writeByte(f1,0wx80 || Char2Byte((c >> 0w12) && 0wx3F)) val f3 = writeByte(f2,0wx80 || Char2Byte((c >> 0w6) && 0wx3F)) val f4 = writeByte(f3,0wx80 || Char2Byte(c && 0wx3F)) in f4 end else if c<0wx4000000 then let val f1 = writeByte(f, 0wxF8 || Char2Byte(c >> 0w24)) val f2 = writeByte(f1,0wx80 || Char2Byte((c >> 0w18) && 0wx3F)) val f3 = writeByte(f2,0wx80 || Char2Byte((c >> 0w12) && 0wx3F)) val f4 = writeByte(f3,0wx80 || Char2Byte((c >> 0w6) && 0wx3F)) val f5 = writeByte(f4,0wx80 || Char2Byte(c && 0wx3F)) in f5 end else let val f1 = writeByte(f, 0wxFC || Char2Byte(c >> 0w30)) val f2 = writeByte(f1,0wx80 || Char2Byte((c >> 0w24) && 0wx3F)) val f3 = writeByte(f2,0wx80 || Char2Byte((c >> 0w18) && 0wx3F)) val f4 = writeByte(f3,0wx80 || Char2Byte((c >> 0w12) && 0wx3F)) val f5 = writeByte(f4,0wx80 || Char2Byte((c >> 0w6) && 0wx3F)) val f6 = writeByte(f5,0wx80 || Char2Byte(c && 0wx3F)) in f6 end (*---------------------------------------------------------------------*) (* UTF-16 *) (*---------------------------------------------------------------------*) fun oneUtf16 isL (c,f) = let val (b1,b2) = (Char2Byte(c >> 0w8),Char2Byte c) in if isL then writeByte(writeByte(f,b2),b1) else writeByte(writeByte(f,b1),b2) end fun writeCharUtf16 isL = fn (c,f) => if c<0wx10000 then oneUtf16 isL (c,f) else let val (hi,lo) = splitSurrogates c val f1 = oneUtf16 isL (hi,f) val f2 = oneUtf16 isL (lo,f1) in f2 end val writeCharUtf16B = writeCharUtf16 false val writeCharUtf16L = writeCharUtf16 true (*---------------------------------------------------------------------*) (* UCS-2 *) (*---------------------------------------------------------------------*) fun writeCharUcs2 isL = fn (c,f) => if c<0wx10000 then let val (b1,b2) = (Char2Byte(c >> 0w8),Char2Byte c) in if isL then writeByte(writeByte(f,b2),b1) else writeByte(writeByte(f,b1),b2) end else raise EncodeError(f,ERR_ILLEGAL_CHAR(c,"UCS-2")) val writeCharUcs2B = writeCharUcs2 false val writeCharUcs2L = writeCharUcs2 true end (* stop of ../../Unicode/Encode/encodeMisc.sml *) (* start of ../../Unicode/Encode/encode.sml *) signature Encode = sig include EncodeError type File type EncFile val encNoFile : EncFile val encStdOut : EncFile val encOpenFile : string * Encoding.Encoding * string -> EncFile val encCloseFile : EncFile -> unit val encAdapt : EncFile * File -> EncFile val encPutChar : EncFile * UniChar.Char -> EncFile val encValidChar : EncFile * UniChar.Char -> bool end structure Encode : Encode = struct open Encoding UtilError EncodeBasic EncodeError EncodeMisc type EncFile = Encoding * File val encNoFile = (NOENC,stdOutFile) val encStdOut = (LATIN1,stdOutFile) fun encAdapt((enc,_),f) = (enc,f) fun encValidChar((enc,_),c) = case enc of ASCII => validCharAscii c | EBCDIC => validCharEbcdic c | LATIN1 => validCharLatin1 c | _ => true fun encPutChar((enc,f),c) = let val f1 = case enc of NOENC => f | ASCII => (writeCharAscii(c,f)) | EBCDIC => (writeCharEbcdic(c,f)) | LATIN1 => (writeCharLatin1(c,f)) | UCS2B => (writeCharUcs2B(c,f)) | UCS2L => (writeCharUcs2L(c,f)) | UCS4B => (writeCharUcs4B(c,f)) | UCS4L => (writeCharUcs4L(c,f)) | UCS4SB => (writeCharUcs4SB(c,f)) | UCS4SL => (writeCharUcs4SL(c,f)) | UTF8 => (writeCharUtf8(c,f)) | UTF16B => (writeCharUtf16B(c,f)) | UTF16L => (writeCharUtf16L(c,f)) in (enc,f1) end fun encCloseFile(_,f) = closeFile f fun encOpenFile (fname,enc,name) = let val outEnc = case enc of NOENC => (case isEncoding name of NOENC => raise NoSuchFile(fname,"Unsupported encoding \""^name^"\"") | enc => enc) | enc => enc val f = openFile fname val f1 = case outEnc of UTF16B => writeByte(writeByte(f,0wxFE),0wxFF) | UTF16L => writeByte(writeByte(f,0wxFF),0wxFE) | _ => f in (outEnc,f1) end end (* stop of ../../Unicode/Encode/encode.sml *) (* start of nullHard.sml *) (* structure NullHard = struct fun parseNull uri = NullParse.parseDocument uri NONE NullHooks.nullStart open NullCatOptions NullOptions Options NullParserOptions Uri val usage = List.concat [parserUsage,[("","")],catalogUsage,[("","")],nullUsage] exception Exit of OS.Process.status fun null(prog,args) = let val prog = "fxp" val hadError = ref false fun optError msg = let val _ = TextIO.output(TextIO.stdErr,msg^".\n") in hadError := true end fun exitError msg = let val _ = TextIO.output(TextIO.stdErr,msg^".\n") in raise Exit OS.Process.failure end fun exitHelp prog = let val _ = printUsage TextIO.stdOut prog usage in raise Exit OS.Process.success end fun exitVersion prog = let val _ = app print [prog," version ",Version.FXP_VERSION,"\n"] in raise Exit OS.Process.success end fun summOpt prog = "For a summary of options type "^prog^" --help" fun noFile(f,cause) = "can't open file '"^f^"': "^exnMessage cause val opts = parseOptions args val _ = setParserDefaults() val opts1 = setParserOptions (opts,optError) val _ = setCatalogDefaults() val opts2 = setCatalogOptions (opts1,optError) val _ = setNullDefaults() val (vers,help,err,file) = setNullOptions (opts2,optError) val _ = if !hadError then exitError (summOpt prog) else () val _ = if vers then exitVersion prog else () val _ = if help then exitHelp prog else () val _ = case err of SOME "-" => O_ERROR_DEVICE := TextIO.stdErr | SOME f => (O_ERROR_DEVICE := TextIO.openOut f handle IO.Io {cause,...} => exitError(noFile(f,cause))) | NONE => () val f = valOf file handle Option => "-" val uri = if f="-" then NONE else SOME(String2Uri f) val status = parseNull uri val _ = if isSome err then TextIO.closeOut (!O_ERROR_DEVICE) else () in status end handle Exit status => status | exn => let val _ = TextIO.output (TextIO.stdErr,prog^": Unexpected exception: "^exnMessage exn^".\n") in OS.Process.failure end end *) structure NullHard = struct end (* stop of nullHard.sml *) (* start of ../../Util/options.sml *) signature Options= sig datatype Option = OPT_LONG of string * string option | OPT_SHORT of char list | OPT_NEG of char list | OPT_NOOPT | OPT_STRING of string val parseOptions : string list -> Option list datatype UsageItem = U_SEP | U_TITLE of string | U_ITEM of string list * string type Usage = UsageItem list val printUsage : TextIO.outstream -> string -> Usage -> unit end structure Options : Options = struct exception BadOption of string datatype Option = OPT_LONG of string * string option | OPT_SHORT of char list | OPT_NEG of char list | OPT_NOOPT | OPT_STRING of string datatype UsageItem = U_SEP | U_TITLE of string | U_ITEM of string list * string type Usage = UsageItem list fun parseOptions ss = let fun doOne opt = if String.isPrefix "--" opt then let val opt1 = Substring.extract(opt,2,NONE) val (key0,opt2) = Substring.splitl (fn c => #"="<>c) opt1 val key = if Substring.isEmpty key0 then raise BadOption opt else Substring.string key0 val valOpt = if Substring.isPrefix "=" opt2 then let val val0 = Substring.triml 1 opt2 in if Substring.isEmpty val0 then raise BadOption opt else SOME(Substring.string val0) end else NONE in OPT_LONG(key,valOpt) end handle BadOption s => if opt="--" then OPT_NOOPT else OPT_STRING opt else if String.isPrefix "-" opt then let val chars = tl(String.explode opt) (* val _ = app (fn c => if Char.isAlphaNum c then () else raise BadOption opt) chars *) in case chars of nil => OPT_STRING opt | #"n"::(cs as _::_) => OPT_NEG cs | _ => OPT_SHORT chars end handle BadOption s => OPT_STRING opt else OPT_STRING opt fun doAll nil = nil | doAll (s::ss) = let val opt = doOne s in case opt of OPT_NOOPT => opt::map OPT_STRING ss | _ => opt::doAll ss end in doAll ss end fun printUsage stream prog usage = let val KEY_WIDTH = 30 val LINE_WIDTH = 80 val EMPTY_KEY = UtilString.nBlanks KEY_WIDTH fun appendKeys col nil = if col>KEY_WIDTH then "\n"^EMPTY_KEY else UtilString.nBlanks (KEY_WIDTH-col) | appendKeys col [key] = key^" "^appendKeys (col+1+String.size key) nil | appendKeys col (key::keys) = let val col1 = col+2+String.size key in if col1>KEY_WIDTH then key^",\n"^appendKeys 0 keys else key^", "^appendKeys col1 keys end fun makeKey keylist = appendKeys 0 keylist val makeText = UtilString.breakLines(LINE_WIDTH-KEY_WIDTH) fun format (keylist,text) = let val key = makeKey keylist in case makeText text of nil => [key] | line::lines => key^line::map (fn line => EMPTY_KEY^line) lines end val _ = app (fn x => TextIO.output(stream,x)) ["Usage: ",prog," [option ...] file\n","where option is one of:\n\n"] val _ = app (fn item => app (fn x => TextIO.output(stream,x^"\n")) (case item of U_SEP => [""] | U_TITLE txt => ["",txt] | U_ITEM option => format option)) usage in () end end (* stop of ../../Util/options.sml *) (* start of ../../config.sml *) structure Config = struct (*---------------------------------------------------------------------*) (* The OS command for retrieving a URI from the internet and storing *) (* it in a local file, where *) (* %1 is replaced by the URI. *) (* %2 is replaced by the local filename. *) (* It is recommended that the command exits with failure in case the *) (* URI cannot be retrieved. If the command generates a HTML error *) (* message instead (like, e.g., lynx), this HTML file is considered *) (* to be XML and will probably cause a mess of parsing errors. If you *) (* don't need URI retrieval, use "exit 1" which always fails. *) (* Sensible values are, e.g.: *) (* val retrieveCommand = "wget -qO %2 %1" *) (* val retrieveCommand = "got_it -o %2 %1" *) (* val retrieveCommand = "urlget -s -o %2 %1" *) (*---------------------------------------------------------------------*) val retrieveCommand = "wget -qO %2 %1" end (* stop of ../../config.sml *) (* start of ../../Unicode/Chars/charClasses.sml *) (*--------------------------------------------------------------------------*) (* Structure: CharClasses *) (* *) (* Notes: *) (* This implementation uses the UNSAFE array operations, and does NO *) (* range checks. This is for efficiency reasons. *) (* If class=makeCharClass(lo,hi) then a filed of size hi-lo+1 is allo- *) (* cated. In order to lookup a character, first make sure it in [lo..hi], *) (* then subtract lo before calling inCharClass! *) (* The same holds for addChar. *) (* *) (* Depends on: *) (* UniChar *) (* UtilInt *) (* *) (* Exceptions raised by functions in this structure: *) (* addChar : none *) (* addCharClass : none *) (* inCharClass : none *) (* makeCharClass : none *) (*--------------------------------------------------------------------------*) signature CharClasses = sig type CharClass type MutableClass type CharInterval = UniChar.Char * UniChar.Char type CharRange = CharInterval list val initialize : CharInterval -> MutableClass val finalize : MutableClass -> CharClass val addChar : MutableClass * UniChar.Char * UniChar.Char * UniChar.Char -> unit val addCharRange : MutableClass * UniChar.Char * UniChar.Char * CharRange -> CharRange val inCharClass : UniChar.Char * CharClass -> bool end structure CharClasses : CharClasses = struct open UniChar type CharInterval = Char * Char type CharRange = CharInterval list val Char2Word = Word.fromLargeWord o Chars.toLargeWord (*--------------------------------------------------------------------*) (* helpers *) (*--------------------------------------------------------------------*) infix 5 >> >>> <<< infix 6 || ||| infix 6 -- infix 7 & && &&& val op >> = Chars.>> val op -- = Chars.- val op || = Chars.orb val op && = Chars.andb val op >>> = Word32.>> val op <<< = Word32.<< val op &&& = Word32.andb val op ||| = Word32.orb val op & = Word.andb val max32 = Word32.notb 0wx0 (*--------------------------------------------------------------------*) (* a char class is an array of words, interpreted as bitvectors. *) (*--------------------------------------------------------------------*) type MutableClass = Word32.word array type CharClass = Word32.word vector (*--------------------------------------------------------------------*) (* each word in a char class holds 32 entries. Thus the for a char c *) (* is c div 32 == c >> 5. The bitmask is a word of zeros, only the *) (* significant bit for c, i.e. the (c && 31==0x1F)th bit set to one. *) (*--------------------------------------------------------------------*) fun indexMask c = let val idx = Chars.toInt(c>>0w5) val mask = 0wx1 <<< Char2Word c & 0w31 in (idx,mask) end (*--------------------------------------------------------------------*) (* generate index and mask, then lookup. *) (*--------------------------------------------------------------------*) fun inCharClass(c,vec) = let val (idx,mask) = indexMask c in mask &&& Vector.sub(vec,idx) <> 0wx0 end (*--------------------------------------------------------------------*) (* generate a CharClass large enough to hold (max-min+1) characters. *) (*--------------------------------------------------------------------*) fun initialize(min,max) = Array.array((Chars.toInt max-Chars.toInt min+1) div 32+1,0wx0):MutableClass fun finalize arr = Array.extract(arr,0,NONE) (*--------------------------------------------------------------------*) (* add a single character to a CharClass. *) (*--------------------------------------------------------------------*) fun addChar(cls,min,max,c) = let val (idx,new) = indexMask c val old = Array.sub(cls,idx) in Array.update(cls,idx,old|||new) end (*--------------------------------------------------------------------*) (* add a full range of characters to a CharClass. *) (* this is the only function that computes the offset before access *) (* to the array. *) (*--------------------------------------------------------------------*) fun addCharRange(cls,min,max,range) = let fun doOne (lo,hi) = let val (l,h) = (lo-min,hi-min) val (idxL,idxH) = ((Chars.toInt l) div 32,(Chars.toInt h) div 32) val (bitL,bitH) = (Char2Word l & 0w31,Char2Word h & 0w31) in if idxL=idxH then let val new = (max32>>>(0w31-bitH+bitL))<<>>(0w31-bitH) val oldL = Array.sub(cls,idxL) val oldH = Array.sub(cls,idxH) val _ = Array.update(cls,idxL,oldL|||newL) val _ = Array.update(cls,idxH,oldH|||newH) val _ = UtilInt.appInterval (fn i => Array.update(cls,i,max32)) (idxL+1,idxH-1) in () end else () end fun doAll nil = nil | doAll ((lh as (lo,hi))::lhs) = if himax then lh::doAll lhs else if lo=min andalso hi<=max then (doOne lh; doAll lhs) else if lo>=min andalso hi>max then (doOne(lo,max); (max+0w1,hi)::lhs) else (doOne(min,max); (max+0w1,hi)::lhs) val _ = doAll range in doAll range end end (* stop of ../../Unicode/Chars/charClasses.sml *) (* start of ../../Unicode/Chars/uniRanges.sml *) structure UniRanges = struct val digitRange = [(0wx0030,0wx0039), (0wx0660,0wx0669), (0wx06F0,0wx06F9), (0wx0966,0wx096F), (0wx09E6,0wx09EF), (0wx0A66,0wx0A6F), (0wx0AE6,0wx0AEF), (0wx0B66,0wx0B6F), (0wx0BE7,0wx0BEF), (0wx0C66,0wx0C6F), (0wx0CE6,0wx0CEF), (0wx0D66,0wx0D6F), (0wx0E50,0wx0E59), (0wx0ED0,0wx0ED9), (0wx0F20,0wx0F29) ] : CharClasses.CharRange val digitRange09 = [(0wx0030,0wx0039), (0wx0660,0wx0669), (0wx06F0,0wx06F9), (0wx0E50,0wx0E59), (0wx0ED0,0wx0ED9), (0wx0F20,0wx0F29) ] : CharClasses.CharRange val digitRange6F = [(0wx0966,0wx096F), (0wx09E6,0wx09EF), (0wx0A66,0wx0A6F), (0wx0AE6,0wx0AEF), (0wx0B66,0wx0B6F), (0wx0BE7,0wx0BEF), (0wx0C66,0wx0C6F), (0wx0CE6,0wx0CEF), (0wx0D66,0wx0D6F) ] : CharClasses.CharRange val baseRange = [(0wx0041,0wx005A), (0wx0061,0wx007A), (0wx00C0,0wx00D6), (0wx00D8,0wx00F6), (0wx00F8,0wx00FF), (0wx0100,0wx0131), (0wx0134,0wx013E), (0wx0141,0wx0148), (0wx014A,0wx017E), (0wx0180,0wx01C3), (0wx01CD,0wx01F0), (0wx01F4,0wx01F5), (0wx01FA,0wx0217), (0wx0250,0wx02A8), (0wx02BB,0wx02C1), (0wx0386,0wx0386), (0wx0388,0wx038A), (0wx038C,0wx038C), (0wx038E,0wx03A1), (0wx03A3,0wx03CE), (0wx03D0,0wx03D6), (0wx03DA,0wx03DA), (0wx03DC,0wx03DC), (0wx03DE,0wx03DE), (0wx03E0,0wx03E0), (0wx03E2,0wx03F3), (0wx0401,0wx040C), (0wx040E,0wx044F), (0wx0451,0wx045C), (0wx045E,0wx0481), (0wx0490,0wx04C4), (0wx04C7,0wx04C8), (0wx04CB,0wx04CC), (0wx04D0,0wx04EB), (0wx04EE,0wx04F5), (0wx04F8,0wx04F9), (0wx0531,0wx0556), (0wx0559,0wx0559), (0wx0561,0wx0586), (0wx05D0,0wx05EA), (0wx05F0,0wx05F2), (0wx0621,0wx063A), (0wx0641,0wx064A), (0wx0671,0wx06B7), (0wx06BA,0wx06BE), (0wx06C0,0wx06CE), (0wx06D0,0wx06D3), (0wx06D5,0wx06D5), (0wx06E5,0wx06E6), (0wx0905,0wx0939), (0wx093D,0wx093D), (0wx0958,0wx0961), (0wx0985,0wx098C), (0wx098F,0wx0990), (0wx0993,0wx09A8), (0wx09AA,0wx09B0), (0wx09B2,0wx09B2), (0wx09B6,0wx09B9), (0wx09DC,0wx09DD), (0wx09DF,0wx09E1), (0wx09F0,0wx09F1), (0wx0A05,0wx0A0A), (0wx0A0F,0wx0A10), (0wx0A13,0wx0A28), (0wx0A2A,0wx0A30), (0wx0A32,0wx0A33), (0wx0A35,0wx0A36), (0wx0A38,0wx0A39), (0wx0A59,0wx0A5C), (0wx0A5E,0wx0A5E), (0wx0A72,0wx0A74), (0wx0A85,0wx0A8B), (0wx0A8D,0wx0A8D), (0wx0A8F,0wx0A91), (0wx0A93,0wx0AA8), (0wx0AAA,0wx0AB0), (0wx0AB2,0wx0AB3), (0wx0AB5,0wx0AB9), (0wx0ABD,0wx0ABD), (0wx0AE0,0wx0AE0), (0wx0B05,0wx0B0C), (0wx0B0F,0wx0B10), (0wx0B13,0wx0B28), (0wx0B2A,0wx0B30), (0wx0B32,0wx0B33), (0wx0B36,0wx0B39), (0wx0B3D,0wx0B3D), (0wx0B5C,0wx0B5D), (0wx0B5F,0wx0B61), (0wx0B85,0wx0B8A), (0wx0B8E,0wx0B90), (0wx0B92,0wx0B95), (0wx0B99,0wx0B9A), (0wx0B9C,0wx0B9C), (0wx0B9E,0wx0B9F), (0wx0BA3,0wx0BA4), (0wx0BA8,0wx0BAA), (0wx0BAE,0wx0BB5), (0wx0BB7,0wx0BB9), (0wx0C05,0wx0C0C), (0wx0C0E,0wx0C10), (0wx0C12,0wx0C28), (0wx0C2A,0wx0C33), (0wx0C35,0wx0C39), (0wx0C60,0wx0C61), (0wx0C85,0wx0C8C), (0wx0C8E,0wx0C90), (0wx0C92,0wx0CA8), (0wx0CAA,0wx0CB3), (0wx0CB5,0wx0CB9), (0wx0CDE,0wx0CDE), (0wx0CE0,0wx0CE1), (0wx0D05,0wx0D0C), (0wx0D0E,0wx0D10), (0wx0D12,0wx0D28), (0wx0D2A,0wx0D39), (0wx0D60,0wx0D61), (0wx0E01,0wx0E2E), (0wx0E30,0wx0E30), (0wx0E32,0wx0E33), (0wx0E40,0wx0E45), (0wx0E81,0wx0E82), (0wx0E84,0wx0E84), (0wx0E87,0wx0E88), (0wx0E8A,0wx0E8A), (0wx0E8D,0wx0E8D), (0wx0E94,0wx0E97), (0wx0E99,0wx0E9F), (0wx0EA1,0wx0EA3), (0wx0EA5,0wx0EA5), (0wx0EA7,0wx0EA7), (0wx0EAA,0wx0EAB), (0wx0EAD,0wx0EAE), (0wx0EB0,0wx0EB0), (0wx0EB2,0wx0EB3), (0wx0EBD,0wx0EBD), (0wx0EC0,0wx0EC4), (0wx0F40,0wx0F47), (0wx0F49,0wx0F69), (0wx10A0,0wx10C5), (0wx10D0,0wx10F6), (0wx1100,0wx1100), (0wx1102,0wx1103), (0wx1105,0wx1107), (0wx1109,0wx1109), (0wx110B,0wx110C), (0wx110E,0wx1112), (0wx113C,0wx113C), (0wx113E,0wx113E), (0wx1140,0wx1140), (0wx114C,0wx114C), (0wx114E,0wx114E), (0wx1150,0wx1150), (0wx1154,0wx1155), (0wx1159,0wx1159), (0wx115F,0wx1161), (0wx1163,0wx1163), (0wx1165,0wx1165), (0wx1167,0wx1167), (0wx1169,0wx1169), (0wx116D,0wx116E), (0wx1172,0wx1173), (0wx1175,0wx1175), (0wx119E,0wx119E), (0wx11A8,0wx11A8), (0wx11AB,0wx11AB), (0wx11AE,0wx11AF), (0wx11B7,0wx11B8), (0wx11BA,0wx11BA), (0wx11BC,0wx11C2), (0wx11EB,0wx11EB), (0wx11F0,0wx11F0), (0wx11F9,0wx11F9), (0wx1E00,0wx1E9B), (0wx1EA0,0wx1EF9), (0wx1F00,0wx1F15), (0wx1F18,0wx1F1D), (0wx1F20,0wx1F45), (0wx1F48,0wx1F4D), (0wx1F50,0wx1F57), (0wx1F59,0wx1F59), (0wx1F5B,0wx1F5B), (0wx1F5D,0wx1F5D), (0wx1F5F,0wx1F7D), (0wx1F80,0wx1FB4), (0wx1FB6,0wx1FBC), (0wx1FBE,0wx1FBE), (0wx1FC2,0wx1FC4), (0wx1FC6,0wx1FCC), (0wx1FD0,0wx1FD3), (0wx1FD6,0wx1FDB), (0wx1FE0,0wx1FEC), (0wx1FF2,0wx1FF4), (0wx1FF6,0wx1FFC), (0wx2126,0wx2126), (0wx212A,0wx212B), (0wx212E,0wx212E), (0wx2180,0wx2182), (0wx3041,0wx3094), (0wx30A1,0wx30FA), (0wx3105,0wx312C), (0wxAC00,0wxD7A3) ] : CharClasses.CharRange val ideoRange = [(0wx3007,0wx3007), (0wx3021,0wx3029), (0wx4E00,0wx9FA5) ] : CharClasses.CharRange val combRange = [(0wx0300,0wx0345), (0wx0360,0wx0361), (0wx0483,0wx0486), (0wx0591,0wx05A1), (0wx05A3,0wx05B9), (0wx05BB,0wx05BD), (0wx05BF,0wx05BF), (0wx05C1,0wx05C2), (0wx05C4,0wx05C4), (0wx064B,0wx0652), (0wx0670,0wx0670), (0wx06D6,0wx06DC), (0wx06DD,0wx06DF), (0wx06E0,0wx06E4), (0wx06E7,0wx06E8), (0wx06EA,0wx06ED), (0wx0901,0wx0903), (0wx093C,0wx093C), (0wx093E,0wx094C), (0wx094D,0wx094D), (0wx0951,0wx0954), (0wx0962,0wx0963), (0wx0981,0wx0983), (0wx09BC,0wx09BC), (0wx09BE,0wx09BE), (0wx09BF,0wx09BF), (0wx09C0,0wx09C4), (0wx09C7,0wx09C8), (0wx09CB,0wx09CD), (0wx09D7,0wx09D7), (0wx09E2,0wx09E3), (0wx0A02,0wx0A02), (0wx0A3C,0wx0A3C), (0wx0A3E,0wx0A3E), (0wx0A3F,0wx0A3F), (0wx0A40,0wx0A42), (0wx0A47,0wx0A48), (0wx0A4B,0wx0A4D), (0wx0A70,0wx0A71), (0wx0A81,0wx0A83), (0wx0ABC,0wx0ABC), (0wx0ABE,0wx0AC5), (0wx0AC7,0wx0AC9), (0wx0ACB,0wx0ACD), (0wx0B01,0wx0B03), (0wx0B3C,0wx0B3C), (0wx0B3E,0wx0B43), (0wx0B47,0wx0B48), (0wx0B4B,0wx0B4D), (0wx0B56,0wx0B57), (0wx0B82,0wx0B83), (0wx0BBE,0wx0BC2), (0wx0BC6,0wx0BC8), (0wx0BCA,0wx0BCD), (0wx0BD7,0wx0BD7), (0wx0C01,0wx0C03), (0wx0C3E,0wx0C44), (0wx0C46,0wx0C48), (0wx0C4A,0wx0C4D), (0wx0C55,0wx0C56), (0wx0C82,0wx0C83), (0wx0CBE,0wx0CC4), (0wx0CC6,0wx0CC8), (0wx0CCA,0wx0CCD), (0wx0CD5,0wx0CD6), (0wx0D02,0wx0D03), (0wx0D3E,0wx0D43), (0wx0D46,0wx0D48), (0wx0D4A,0wx0D4D), (0wx0D57,0wx0D57), (0wx0E31,0wx0E31), (0wx0E34,0wx0E3A), (0wx0E47,0wx0E4E), (0wx0EB1,0wx0EB1), (0wx0EB4,0wx0EB9), (0wx0EBB,0wx0EBC), (0wx0EC8,0wx0ECD), (0wx0F18,0wx0F19), (0wx0F35,0wx0F35), (0wx0F37,0wx0F37), (0wx0F39,0wx0F39), (0wx0F3E,0wx0F3E), (0wx0F3F,0wx0F3F), (0wx0F71,0wx0F84), (0wx0F86,0wx0F8B), (0wx0F90,0wx0F95), (0wx0F97,0wx0F97), (0wx0F99,0wx0FAD), (0wx0FB1,0wx0FB7), (0wx0FB9,0wx0FB9), (0wx20D0,0wx20DC), (0wx20E1,0wx20E1), (0wx302A,0wx302F), (0wx3099,0wx3099), (0wx309A,0wx309A) ] : CharClasses.CharRange val extRange = [(0wx00B7,0wx00B7), (0wx02D0,0wx02D0), (0wx02D1,0wx02D1), (0wx0387,0wx0387), (0wx0640,0wx0640), (0wx0E46,0wx0E46), (0wx0EC6,0wx0EC6), (0wx3005,0wx3005), (0wx3031,0wx3035), (0wx309D,0wx309E), (0wx30FC,0wx30FE) ] : CharClasses.CharRange val nmsRange = List.concat [[(0wx3A,0wx3A),(0wx5F,0wx5F)](* :_ *), baseRange, ideoRange] val nameRange = List.concat [[(0wx2D,0wx2D),(0wx2E,0wx2E)](* -. *), digitRange, combRange, extRange, nmsRange] val pubidRange = List.concat [map (fn c => (c,c)) [0wx0A,0wx0D,0wx20], (* space,cr,lf *) map (fn c => (c,c)) (UniChar.String2Data "-'()+,./:=?;!*#@$_%"), [(0wx0030,0wx0039),(0wx0041,0wx005A),(0wx0061,0wx007A)] (* [0-9A-Za-z] *) ] : CharClasses.CharRange val encRange = [(0wx002D,0wx002E), (* -. *) (0wx0030,0wx0039), (* 0-9 *) (0wx0041,0wx005A), (* A-Z *) (0wx005F,0wx005F), (* _ *) (0wx0061,0wx007A) (* a-z *) ] : CharClasses.CharRange end (* stop of ../../Unicode/Chars/uniRanges.sml *) (* start of ../../Unicode/Chars/uniClasses.sml *) (*--------------------------------------------------------------------------*) (* Structure: UniClasses *) (* *) (* Notes: *) (* read CharClasses in order to understand how CharClasses are handled. *) (* *) (* Depends on: *) (* UniChar *) (* CharClasses *) (* *) (* Exceptions raised by functions in this structure: *) (* decValue : none *) (* hexValue : none *) (* isAsciiLetter : none *) (* isEnc : none *) (* isEncS : none *) (* isName : none *) (* isNms : none *) (* isPubid : none *) (* isS : none *) (* isXml : none *) (* isUnicode : none *) (* isVers : none *) (*--------------------------------------------------------------------------*) signature UniClasses = sig val isName : UniChar.Char -> bool val isNms : UniChar.Char -> bool val isPubid : UniChar.Char -> bool val isS : UniChar.Char -> bool val isEnc : UniChar.Char -> bool val isEncS : UniChar.Char -> bool val isVers : UniChar.Char -> bool val isDec : UniChar.Char -> bool val isHex : UniChar.Char -> bool val isXml : UniChar.Char -> bool val isUnicode : UniChar.Char -> bool val decValue : UniChar.Char -> UniChar.Char option val hexValue : UniChar.Char -> UniChar.Char option val isAsciiLetter : UniChar.Char -> bool end structure UniClasses : UniClasses = struct open UniChar CharClasses UniRanges (*--------------------------------------------------------------------*) (* initialize the character classes. *) (*--------------------------------------------------------------------*) local val nmsTemp = initialize(0wx0000,0wx3FFF) val restNms = addCharRange(nmsTemp,0wx0000,0wx3FFF,nmsRange) val _ = if restNms=[(0wxAC00,0wxD7A3),(0wx4E00,0wx9FA5)] then () else print ("Warning: extra characters after computing nms char class.\n") val nameTemp = initialize(0wx0000,0wxFFFF) val restName = addCharRange(nameTemp,0wx0000,0wx3FFF,nameRange) val _ = if restName=[(0wxAC00,0wxD7A3),(0wx4E00,0wx9FA5)] then () else print ("Warning: extra characters after computing name char class.\n") val pubTemp = initialize(0wx0000,0wx007F) val restPubid = addCharRange(pubTemp,0wx0000,0wx007F,pubidRange) val _ = if restPubid=nil then () else print ("Warning: extra characters after computing pubid char class.\n") val encTemp = initialize(0wx0000,0wx007F) val restEnc = addCharRange(encTemp,0wx0000,0wx007F,encRange) val _ = if restEnc=nil then () else print ("Warning: extra characters after computing enc char class.\n") in val nmsClass = finalize nmsTemp val nameClass = finalize nameTemp val pubClass = finalize pubTemp val encClass = finalize encTemp end (*--------------------------------------------------------------------*) (* is a character a name start char? *) (*--------------------------------------------------------------------*) fun isNms c = if c<0wx4000 then inCharClass(c,nmsClass) else c>=0wx4E00 andalso c<=0wx9FA5 orelse c>=0wxAC00 andalso c<=0wxD7A3 (*--------------------------------------------------------------------*) (* is a character a name char? *) (*--------------------------------------------------------------------*) fun isName c = if c<0wx4000 then inCharClass(c,nameClass) else c>=0wx4E00 andalso c<=0wx9FA5 orelse c>=0wxAC00 andalso c<=0wxD7A3 (*--------------------------------------------------------------------*) (* is a character a pubid char? *) (*--------------------------------------------------------------------*) fun isPubid c = c<0wx80 andalso inCharClass(c,pubClass) (*--------------------------------------------------------------------*) (* is a character valid in an encoding name, at its start, or in a *) (* version number? *) (*--------------------------------------------------------------------*) fun isEnc c = c<0wx80 andalso inCharClass(c,encClass) fun isEncS (c:UniChar.Char) = c>=0wx41 andalso c<=0wx5A orelse c>=0wx61 andalso c<=0wx7A fun isVers c = isEnc c orelse c=0wx3A (* #":" *) (*--------------------------------------------------------------------*) (* these are the valid Unicode characters (including surrogates). *) (*--------------------------------------------------------------------*) fun isUnicode (c:UniChar.Char) = c<=0wx10FFFF (*--------------------------------------------------------------------*) (* XML characters if not checked for Unicode char in advance. *) (*--------------------------------------------------------------------*) fun isXml (c:UniChar.Char) = c>=0wx0020 andalso c<=0wxD7FF orelse c>=0wxE000 andalso c<=0wxFFFD orelse c>=0wx10000 andalso c<=0wx10FFFF orelse c=0wx9 orelse c=0wxA orelse c=0wxD (*--------------------------------------------------------------------*) (* the frontend supresses 0wxD (carriage return), but its is still *) (* present when encoding is recognized. *) (*--------------------------------------------------------------------*) fun isS (c:UniChar.Char) = case c of 0wx09 => true | 0wx0A => true | 0wx0D => true | 0wx20 => true | _ => false (*--------------------------------------------------------------------*) (* is this character an ascii decimal/hexadecimal digit? *) (*--------------------------------------------------------------------*) fun isDec (c:UniChar.Char) = c>=0wx30 andalso c<=0wx39 fun isHex (c:UniChar.Char) = c>=0wx30 andalso c<=0wx39 orelse c>=0wx41 andalso c<=0wx46 orelse c>=0wx61 andalso c<=0wx66 (*--------------------------------------------------------------------*) (* calculate the decimal/hexadecimal value of an ascii (hex-)digit. *) (*--------------------------------------------------------------------*) fun decValue (c:UniChar.Char) = let val v = c-0wx30 in if v<=0wx9 then SOME v else NONE end fun hexValue (c:UniChar.Char) = let val v = c-0wx30 in if v<=0wx9 then SOME v else (if c>=0wx41 andalso c<=0wx46 then SOME(c-0wx37) else if c>=0wx61 andalso c<=0wx66 then SOME(c-0wx57) else NONE) end (*--------------------------------------------------------------------*) (* is c in [a-z]+[A-Z]? *) (*--------------------------------------------------------------------*) fun isAsciiLetter (c:UniChar.Char) = c>=0wx41 andalso c<=0wx5A orelse c>=0wx61 andalso c<=0wx7A end (* stop of ../../Unicode/Chars/uniClasses.sml *) (* start of ../../Unicode/Uri/uriDecode.sml *) signature UriDecode = sig val decodeUriLatin : string -> string val decodeUriUtf8 : string -> string end structure UriDecode : UriDecode = struct open UniChar UtilInt infix 8 << <<< infix 7 && infix 6 || ||| val op && = Word8.andb val op << = Word8.<< val op <<< = Chars.<< val op || = Word8.orb val op ||| = Chars.orb val Byte2Char = Chars.fromLargeWord o Word8.toLargeWord fun hexValue c = if #"0"<=c andalso #"9">=c then SOME (Byte.charToByte c-0wx30) else if #"A"<=c andalso #"F">=c then SOME (Byte.charToByte c-0wx37) else if #"a"<=c andalso #"f">=c then SOME (Byte.charToByte c-0wx57) else NONE exception Failed of char list fun getQuads cs = case cs of c1::c2::cs1 => (case (hexValue c1,hexValue c2) of (SOME b1,SOME b2) => ((b1 << 0w4 || b2),cs1) | _ => raise Failed cs1) | _ => raise Failed nil (*--------------------------------------------------------------------*) (* decode UTF-8 *) (*--------------------------------------------------------------------*) val byte1switch = Array.array(256,1) (* 1 byte *) val _ = appInterval (fn i => Array.update(byte1switch,i,0)) (0x80,0xBF) (* Error *) val _ = appInterval (fn i => Array.update(byte1switch,i,2)) (0xC0,0xDF) (* 2 bytes *) val _ = appInterval (fn i => Array.update(byte1switch,i,3)) (0xE0,0xEF) (* 3 bytes *) val _ = appInterval (fn i => Array.update(byte1switch,i,4)) (0xF0,0xF7) (* 4 bytes *) val _ = appInterval (fn i => Array.update(byte1switch,i,5)) (0xF8,0xFB) (* 5 bytes *) val _ = appInterval (fn i => Array.update(byte1switch,i,6)) (0xFC,0xFD) (* 6 bytes *) val diff2 = 0wx00003080 val diff3 = diff2 <<< 0wx6 ||| 0wx00020080 val diff4 = diff3 <<< 0wx6 ||| 0wx00400080 val diff5 = diff4 <<< 0wx6 ||| 0wx08000080 val diff6 = diff5 <<< 0wx6 ||| 0wx00000080 val diffsByLen = Vector.fromList [0w0,0w0,diff2,diff3,diff4,diff5,diff6] fun getByte cs = case cs of #"%"::cs1 => getQuads cs1 | c::cs1 => (Byte.charToByte c,cs1) | nil => raise Failed nil fun getBytes(b,cs,n) = let fun do_err (cs,m) = if n do_err(cs,m+1) val w1 = if b && 0wxC0 = 0wx80 then w <<< 0w6 + Byte2Char b else do_err(cs1,m+1) in doit (w1,cs1,m+1) end val (w,cs1) = doit (Byte2Char b,cs,2) val diff = Vector.sub(diffsByLen,n) val c = w-diff in if c<0wx100 then (Char2char c,cs1) else raise Failed cs1 end fun getCharUtf8 cs = let val (b,cs1) = getQuads cs in case Array.sub(byte1switch,Word8.toInt b) of 0 (* error *) => raise Failed cs1 | 1 (* 1 byte *) => (Byte.byteToChar b,cs1) | n (* n bytes *) => getBytes(b,cs1,n) end fun decodeUriUtf8 str = let val cs = String.explode str fun doit yet nil = yet | doit yet (c::cs) = if #"%"<>c then doit (c::yet) cs else let val (yet1,cs1) = let val (ch,cs1) = getCharUtf8 cs in (ch::yet,cs1) end handle Failed cs => (yet,cs) in doit yet1 cs1 end in String.implode(rev(doit nil cs)) end (*--------------------------------------------------------------------*) (* decode Latin *) (*--------------------------------------------------------------------*) fun getChar cs = case cs of #"%"::cs1 => let val (b,cs2) = getQuads cs1 in (Byte.byteToChar b,cs2) end | c::cs1 => (c,cs1) | nil => raise Failed nil fun decodeUriLatin str = let val cs = String.explode str fun doit yet nil = yet | doit yet (c::cs) = let val (yet1,cs1) = let val (ch,cs1) = getChar cs in (ch::yet,cs1) end handle Failed cs => (yet,cs) in doit yet1 cs1 end in String.implode(rev(doit nil cs)) end end (* stop of ../../Unicode/Uri/uriDecode.sml *) (* start of ../../Unicode/Uri/uriEncode.sml *) signature UriEncode = sig val Data2UriUtf8 : UniChar.Data -> string val Data2UriLatin : UniChar.Data -> string val Vector2UriUtf8 : UniChar.Vector -> string val Vector2UriLatin : UniChar.Vector -> string val String2UriUtf8 : string -> string val String2UriLatin : string -> string end structure UriEncode : UriEncode = struct open UniChar UniClasses infix 8 >> >>> infix 7 && &&& infix 6 || val op && = Word8.andb val op &&& = Chars.andb val op >> = Word8.>> val op >>> = Chars.>> val op || = Word8.orb val Char2Byte = Word8.fromLargeWord o Chars.toLargeWord fun encodeCharUtf8 c = if c<0wx80 then [Char2Byte c] else if c<0wx800 then [0wxC0 || Char2Byte(c >>> 0w6), 0wx80 || Char2Byte(c &&& 0wx3F)] else if c<0wx10000 then [0wxE0 || Char2Byte(c >>> 0w12), 0wx80 || Char2Byte((c >>> 0w6) &&& 0wx3F), 0wx80 || Char2Byte(c &&& 0wx3F)] else if c<0wx200000 then [0wxF0 || Char2Byte(c >>> 0w18), 0wx80 || Char2Byte((c >>> 0w12) &&& 0wx3F), 0wx80 || Char2Byte((c >>> 0w6) &&& 0wx3F), 0wx80 || Char2Byte(c &&& 0wx3F)] else if c<0wx4000000 then [0wxF8 || Char2Byte(c >>> 0w24), 0wx80 || Char2Byte((c >>> 0w18) &&& 0wx3F), 0wx80 || Char2Byte((c >>> 0w12) &&& 0wx3F), 0wx80 || Char2Byte((c >>> 0w6) &&& 0wx3F), 0wx80 || Char2Byte(c &&& 0wx3F)] else [0wxFC || Char2Byte(c >>> 0w30), 0wx80 || Char2Byte((c >>> 0w24) &&& 0wx3F), 0wx80 || Char2Byte((c >>> 0w18) &&& 0wx3F), 0wx80 || Char2Byte((c >>> 0w12) &&& 0wx3F), 0wx80 || Char2Byte((c >>> 0w6) &&& 0wx3F), 0wx80 || Char2Byte(c &&& 0wx3F)] fun Byte2Cc b = let fun Quad2C b = if b<0wxA then Byte.byteToChar(b+0wx30) else Byte.byteToChar(b+0wx37) in (Quad2C(b >> 0w4),Quad2C(b && 0wx0F)) end fun precedesHex (i,cv) = if Vector.length cv <= i+2 then false else let val (c1,c2) = (Vector.sub(cv,i+1),Vector.sub(cv,i+2)) in isHex c1 andalso isHex c2 end fun Vector2UriUtf8 cv = let val revd = Vector.foldli (fn (i,c,s) => if c<0wx80 andalso (c<>0wx25 orelse precedesHex(i,cv)) then Char2char c::s else foldl (fn (b,s) => let val (c1,c2) = Byte2Cc b in c2::c1:: #"%"::s end) s (encodeCharUtf8 c)) nil (cv,0,NONE) in String.implode (rev revd) end fun Vector2UriLatin cv = let val revd = Vector.foldli (fn (i,c,s) => if c<0wx80 andalso (c<>0wx25 orelse precedesHex(i,cv)) then Char2char c::s else (if c>= 0w100 then s else let val (c1,c2) = Byte2Cc (Char2Byte c) in c2::c1:: #"%"::s end)) nil (cv,0,NONE) in String.implode (rev revd) end val Data2UriUtf8 = Vector2UriUtf8 o Data2Vector val Data2UriLatin = Vector2UriLatin o Data2Vector val String2UriUtf8 = Vector2UriUtf8 o String2Vector val String2UriLatin = Vector2UriLatin o String2Vector end (* stop of ../../Unicode/Uri/uriEncode.sml *) (* start of ../../Unicode/Uri/uri.sml *) (* require "basis.__array"; require "basis.__byte"; require "basis.__string"; require "basis.__vector"; require "basis.__word"; require "basis.__word8"; require "util.unsafe"; require "util.utilInt"; require "chars"; require "naming"; *) signature Uri = sig eqtype Uri val emptyUri : Uri val hashUri : Uri -> word val compareUri : Uri * Uri -> order val uriJoin : Uri * Uri -> Uri val uriSuffix : Uri -> string val Data2Uri : UniChar.Data -> Uri val Vector2Uri : UniChar.Vector -> Uri val String2Uri : string -> Uri val Uri2String : Uri -> string val retrieveUri : Uri -> string * string * bool end structure Uri :> Uri = struct open UniChar UniClasses UriDecode UriEncode UtilError UtilInt (*--------------------------------------------------------------------*) (* decoding *) (*--------------------------------------------------------------------*) type Uri = string val emptyUri = "" val Vector2Uri = Vector2UriUtf8 val Data2Uri = Data2UriUtf8 val String2Uri = String2UriUtf8 val Uri2String = decodeUriUtf8 val slash = "/" fun uriSuffix s = let fun search i = if i<0 then NONE else case String.sub(s,i) of #"." => SOME i | #"/" => NONE | _ => search (i-1) in case search (String.size s-1) of NONE => "" | SOME i => String.extract(s,i+1,NONE) end fun isScheme c = Char.isAlphaNum c orelse #"+"=c orelse #"-"=c orelse #"."=c fun uriAbsolute uri = let fun search i = if i>=String.size uri then false else let val c=String.sub(uri,i) in if #":"=c then true else if isScheme c then search (i+1) else false end in if uri="" then false else if Char.isAlpha (String.sub(uri,0)) then search 1 else false end fun uriRelative uri = not (uriAbsolute uri) fun uriLocal uri = if String.isPrefix "file:" uri then SOME(String.extract(uri,5,NONE)) else if uriRelative uri then SOME uri else NONE fun uriPath s = let fun search (i,hadSlash) = if i<0 then if hadSlash then SOME 0 else NONE else case String.sub(s,i) of #"/" => if hadSlash then NONE else search(i-1,true) | _ => if hadSlash then SOME(i+1) else search(i-1,false) val len = String.size s val posOpt = search(len-1,false) in case posOpt of NONE => emptyUri | SOME i => if i=0 then slash else String.extract(s,0,SOME(i+1)) end fun uriAuth uri = let fun searchScheme i = if i>=String.size uri then NONE else let val c=String.sub(uri,i) in if #":"=c then SOME i else if isScheme c then searchScheme (i+1) else NONE end fun searchSlash i = if i>=String.size uri then NONE else let val c=String.sub(uri,i) in if #"/"=c then SOME i else searchSlash (i+1) end in if uri="" then "" else if not (Char.isAlpha(String.sub(uri,0))) then "" else case searchScheme 1 of NONE => "" | SOME i => if String.size uri<=i+2 then String.extract(uri,0,SOME(i+1)) else if #"/"=String.sub(uri,i+1) andalso #"/"=String.sub(uri,i+2) then case searchSlash (i+3) of NONE => uri | SOME j => String.extract(uri,0,SOME j) else String.extract(uri,0,SOME(i+1)) end fun uriScheme uri = let fun searchScheme i = if i>=String.size uri then NONE else let val c=String.sub(uri,i) in if #":"=c then SOME i else if isScheme c then searchScheme (i+1) else NONE end in if uri="" then "" else if not (Char.isAlpha(String.sub(uri,0))) then "" else case searchScheme 1 of NONE => "" | SOME i => String.extract(uri,0,SOME(i+1)) end fun uriJoin(abs,rel) = if rel="" then uriPath abs else if abs="" then rel else if String.isPrefix "//" rel then uriScheme abs^rel else if #"/"=String.sub(rel,0) then uriAuth abs^rel else if uriAbsolute rel then rel else uriPath abs^rel val compareUri = String.compare val hashUri = UtilHash.hashString fun convertCommand str (src,dst) = let val s = Substring.all str fun doit ss s = if Substring.isEmpty s then ss else let val (sl,sr) = Substring.splitr (fn c => #"%"<>c) s in if Substring.isEmpty sl then sr::ss else let val sl' = Substring.trimr 1 sl in case Substring.first sr of SOME #"1" => let val sr' = Substring.triml 1 sr in doit (Substring.all src::sr'::ss) sl' end | SOME #"2" => let val sr' = Substring.triml 1 sr in doit (Substring.all dst::sr'::ss) sl' end | _ => doit (Substring.all "%"::sr::ss) sl' end end val ss = doit nil s val s = Substring.concat ss in s end fun retrieveRemote uri = let val tmp = OS.FileSys.tmpName() val cmd = convertCommand Config.retrieveCommand (uri,tmp) val status = OS.Process.system cmd val _ = if status = OS.Process.success then () else let val _ = (OS.FileSys.remove tmp handle OS.SysErr _ => ()) val cmd = convertCommand Config.retrieveCommand ("",tmp) in raise NoSuchFile (uri,"command '"^cmd^"' failed") end in (Uri2String uri,tmp,true) end fun retrieveUri uri = case uriLocal uri of SOME f => (Uri2String uri,Uri2String f,false) | NONE => retrieveRemote uri end (* stop of ../../Unicode/Uri/uri.sml *) (* start of ../../Parser/version.sml *) structure Version = struct val FXP_VERSION = "1.4.4" end (* stop of ../../Parser/version.sml *) (* start of ../../Util/utilList.sml *) (*--------------------------------------------------------------------------*) (* Structure: UtilList *) (* *) (* Depends on: *) (* *) (* Exceptions raised by functions in this structure: *) (* member : none *) (* findAndDelete : none *) (*--------------------------------------------------------------------------*) signature UtilList = sig val split : ('a -> bool) -> 'a list -> 'a list list val member : ''a -> ''a list -> bool val mapAllPairs : ('a * 'b -> 'c) -> 'a list * 'b list -> 'c list val findAndMap : ('a -> 'b option) -> 'a list -> 'b option val findAndDelete : ('a -> bool) -> 'a list -> ('a option * 'a list) val sort : ('a * 'a -> order) -> 'a list -> 'a list val merge : ('a * 'a -> order) -> 'a list * 'a list -> 'a list val diff : ('a * 'a -> order) -> 'a list * 'a list -> 'a list val cap : ('a * 'a -> order) -> 'a list * 'a list -> 'a list val sub : ('a * 'a -> order) -> 'a list * 'a list -> bool val insert : ('a * 'a -> order) -> 'a * 'a list -> 'a list val delete : ('a * 'a -> order) -> 'a * 'a list -> 'a list val elem : ('a * 'a -> order) -> 'a * 'a list -> bool end structure UtilList : UtilList = struct (*--------------------------------------------------------------------*) (* split a list into a list of lists at each element fullfilling p. *) (*--------------------------------------------------------------------*) fun split p l = let val (one,ls) = foldr (fn (a,(curr,ls)) => if p a then (nil,curr::ls) else (a::curr,ls)) (nil,nil) l in one::ls end (*--------------------------------------------------------------------*) (* is x a member of l? *) (*--------------------------------------------------------------------*) fun member x l = List.exists (fn y => x=y) l (*--------------------------------------------------------------------*) (* for [a1,...,an] and [b1,...,bk], generate *) (* [f(a1,b1),f(a1,b2),...,f(an,bk-1),f(an,bk)]. *) (*--------------------------------------------------------------------*) fun mapAllPairs f (ass,bs) = foldr (fn (a,cs) => foldr (fn (b,cs) => f(a,b)::cs) cs bs) nil ass (*--------------------------------------------------------------------*) (* find the first element x of l such that f x = SOME y, and return *) (* f x. If there is no such x, return NONE. *) (*--------------------------------------------------------------------*) fun findAndMap _ nil = NONE | findAndMap f (x::xs) = case f x of NONE => findAndMap f xs | y => y (*--------------------------------------------------------------------*) (* find the first element x of l such that f x = true, delete it from *) (* l, and return SOME x with the modified list. If there is no such x *) (* return (NONE,l). *) (*--------------------------------------------------------------------*) fun findAndDelete _ nil = (NONE,nil) | findAndDelete f (x::xs) = if f x then (SOME x,xs) else let val (y,ys) = findAndDelete f xs in (y,x::ys) end (*--------------------------------------------------------------------*) (* given a function that compares elements, merge two sorted lists. *) (*--------------------------------------------------------------------*) fun merge comp (l1,l2) = let fun go (nil,l) = l | go (l,nil) = l | go (l1 as (x1::r1),l2 as (x2::r2)) = case comp(x1,x2) of LESS => x1::go(r1,l2) | EQUAL => go(l1,r2) | GREATER => x2::go(l1,r2) in go(l1,l2) end (*--------------------------------------------------------------------*) (* given a comparing function, compute the intersection of two *) (* ordered lists. *) (*--------------------------------------------------------------------*) fun cap comp (l1,l2) = let fun go (nil,l) = nil | go (l,nil) = nil | go (l1 as (x1::r1),l2 as (x2::r2)) = case comp(x1,x2) of LESS => go(r1,l2) | EQUAL => x1::go(r1,r2) | GREATER => go(l1,r2) in go(l1,l2) end (*--------------------------------------------------------------------*) (* given a comparing function, compute the difference of two *) (* ordered lists. *) (*--------------------------------------------------------------------*) fun diff comp (l1,l2) = let fun go (nil,l) = nil | go (l,nil) = l | go (l1 as (x1::r1),l2 as (x2::r2)) = case comp(x1,x2) of LESS => x1::go(r1,l2) | EQUAL => go(r1,r2) | GREATER => go(l1,r2) in go(l1,l2) end (*--------------------------------------------------------------------*) (* given a comparing function, find out whether an ordered list is *) (* contained in an other ordered list. *) (*--------------------------------------------------------------------*) fun sub comp (l1,l2) = let fun go (nil,l) = true | go (l,nil) = false | go (l1 as (x1::r1),l2 as (x2::r2)) = case comp(x1,x2) of LESS => false | EQUAL => go(r1,r2) | GREATER => go(l1,r2) in go(l1,l2) end (*--------------------------------------------------------------------*) (* given a function that compares elements, insert an element into an *) (* ordered list. *) (*--------------------------------------------------------------------*) fun insert comp (x,l) = let fun go nil = [x] | go (l as y::ys) = case comp(x,y) of LESS => x::l | EQUAL => l | GREATER => y::go ys in go l end (*--------------------------------------------------------------------*) (* given a function that compares elements, delete an element from *) (* an ordered list. *) (*--------------------------------------------------------------------*) fun delete comp (x,l) = let fun go nil = [x] | go (l as y::ys) = case comp(x,y) of LESS => l | EQUAL => ys | GREATER => y::go ys in go l end (*--------------------------------------------------------------------*) (* given a function that compares elements, insert an element into an *) (* ordered list. *) (*--------------------------------------------------------------------*) fun elem comp (x,l) = let fun go nil = false | go (l as y::ys) = case comp(x,y) of LESS => false | EQUAL => true | GREATER => go ys in go l end (*--------------------------------------------------------------------*) (* merge-sort a list of elements comparable with the function in the *) (* 1st argument. Preserve duplicate elements. *) (*--------------------------------------------------------------------*) fun sort _ nil = nil | sort comp l = let fun mergeOne (x::y::l) = merge comp (x,y)::mergeOne l | mergeOne l = l fun mergeAll [l] = l | mergeAll ls = mergeAll (mergeOne ls) val singles = map (fn x => [x]) l in mergeAll singles end end (* stop of ../../Util/utilList.sml *) (* start of ../../Parser/Dfa/dfaOptions.sml *) signature DfaOptions = sig val O_DFA_INITIAL_WIDTH : int ref val O_DFA_MAX_STATES : int ref val O_DFA_WARN_TOO_LARGE : bool ref val setDfaDefaults : unit -> unit val setDfaOptions : Options.Option list * (string -> unit) -> Options.Option list val dfaUsage : Options.Usage end functor DfaOptions () : DfaOptions = struct open Options UtilInt val O_DFA_INITIAL_WIDTH = ref 4 val O_DFA_MAX_STATES = ref 256 val O_DFA_WARN_TOO_LARGE = ref true fun setDfaDefaults() = let val _ = O_DFA_INITIAL_WIDTH := 4 val _ = O_DFA_MAX_STATES := 256 val _ = O_DFA_WARN_TOO_LARGE := true in () end val dfaUsage = [U_ITEM(["--dfa-initial-size=n"],"Initial size of DFA transition tables (16)"), U_ITEM(["--dfa-initial-width=n"],"Same as --dfa-initial-size=2^n (4)"), U_ITEM(["--dfa-max-size=n"],"Maximal size of DFAs for ambiguous content models (256)"), U_ITEM(["--dfa-warn-size[=(yes|no)]"],"Warn about too large DFAs (yes)") ] fun setDfaOptions(opts,doError) = let exception Failed of string option fun getNat str = if str="" then raise Failed NONE else let val cs = String.explode str in foldl (fn (c,n) => if #"0">c orelse #"9" raise Failed (SOME("number "^str^" is too large for this system")) end val yesNo = "'yes' or 'no'" fun tooLarge n = String.concat ["number ",n," is too large for this system"] fun mustHave key = String.concat ["option --",key," must have an argument"] fun mustBe key what = String.concat ["the argument to option --",key," must be ",what] fun do_yesno(key,valOpt,flag) = case valOpt of NONE => flag := true | SOME "yes" => flag := true | SOME "no" => flag := false | SOME s => doError (mustBe key yesNo) fun do_num(key,valOpt,flag) = case valOpt of NONE => doError (mustHave key) | SOME s => flag := getNat s handle Failed NONE => doError (mustBe key "a number") | Failed (SOME s) => doError s fun do_dfa_ts(key,valOpt,toWidth) = case valOpt of NONE => doError (mustHave key) | SOME s => O_DFA_INITIAL_WIDTH := toWidth (getNat s) handle Failed NONE => doError (mustBe key "a number") | Failed (SOME s) => doError s fun do_long(key,valOpt) = case key of "dfa-initial-size" => true before do_dfa_ts(key,valOpt,nextPowerTwo) | "dfa-initial-width" => true before do_dfa_ts(key,valOpt,fn i => i) | "dfa-max-size" => true before do_num(key,valOpt,O_DFA_MAX_STATES) | "dfa-warn-size" => true before do_yesno(key,valOpt,O_DFA_WARN_TOO_LARGE) | _ => false and doit nil = nil | doit (opt::opts) = case opt of OPT_NOOPT => opts | OPT_LONG(key,value) => if do_long(key,value) then doit opts else opt::doit opts | OPT_NEG _ => opt::doit opts | OPT_SHORT _ => opt::doit opts | OPT_STRING _ => opt::doit opts in doit opts end end (* stop of ../../Parser/Dfa/dfaOptions.sml *) (* start of ../../Parser/Params/parserOptions.sml *) (*--------------------------------------------------------------------------*) (* Structure: ParserOptions *) (* *) (* Depends on: none *) (*--------------------------------------------------------------------------*) signature ParserOptions = sig structure DfaOptions : DfaOptions val O_CHECK_ISO639 : bool ref val O_CHECK_LANGID : bool ref val O_CHECK_PREDEFINED : bool ref val O_CHECK_RESERVED : bool ref val O_CHECK_VERSION : bool ref val O_WARN_MULT_ENUM : bool ref val O_WARN_XML_DECL : bool ref val O_WARN_ATT_NO_ELEM : bool ref val O_WARN_MULT_ENT_DECL : bool ref val O_WARN_MULT_NOT_DECL : bool ref val O_WARN_MULT_ATT_DEF : bool ref val O_WARN_MULT_ATT_DECL : bool ref val O_WARN_SHOULD_DECLARE : bool ref val O_WARN_NON_ASCII_URI : bool ref val O_ERROR_MINIMIZE : bool ref val O_VALIDATE : bool ref val O_COMPATIBILITY : bool ref val O_INTEROPERABILITY : bool ref val O_INCLUDE_EXT_PARSED : bool ref val O_INCLUDE_PARAM_ENTS : bool ref val setParserDefaults : unit -> unit val setParserOptions : Options.Option list * (string -> unit) -> Options.Option list val parserUsage : Options.Usage end functor ParserOptions () : ParserOptions = struct structure DfaOptions = DfaOptions () open DfaOptions Options UtilInt UtilList val O_CHECK_VERSION = ref true (* check for conforming xml version? *) val O_CHECK_ISO639 = ref true (* check whether a two-letter LangCode *) (* is acording to ISO 639? *) val O_CHECK_LANGID = ref true (* check whether a LangCode fullfills *) (* IETF RFC 1766? *) val O_CHECK_RESERVED = ref false(* check for names starting with xml? *) val O_CHECK_PREDEFINED = ref true (* check declarations of predefined *) val O_WARN_MULT_ENUM = ref true (* check whether a token occurs *) (* twice in the enumerated attribute *) (* types of the same element *) val O_WARN_XML_DECL = ref false (* warn if the XML decl is missing? *) val O_WARN_ATT_NO_ELEM = ref true (* warn for undeclared elements *) (* in att def list declarations? *) val O_WARN_MULT_ENT_DECL = ref true (* warn about redefined entities *) val O_WARN_MULT_NOT_DECL = ref true (* warn about redefined notations*) val O_WARN_SHOULD_DECLARE = ref true (* warn if predefined entities *) (* are not declared in the dtd *) val O_WARN_MULT_ATT_DEF = ref true (* warn if an attributes is defd *) (* twice for the same element? *) val O_WARN_MULT_ATT_DECL = ref true (* warn if there are multiple att *) (* def lists for one element? *) val O_WARN_NON_ASCII_URI = ref true (* warn about non-ascii chars in *) (* system identifiers? *) val O_ERROR_MINIMIZE = ref true (* try to avoid repeating errors? *) val O_VALIDATE = ref true val O_COMPATIBILITY = ref true val O_INTEROPERABILITY = ref false val O_INCLUDE_EXT_PARSED = ref false val O_INCLUDE_PARAM_ENTS = ref false fun setParserDefaults() = let val _ = setDfaDefaults() val _ = O_CHECK_ISO639 := false val _ = O_CHECK_LANGID := false val _ = O_CHECK_PREDEFINED := true val _ = O_CHECK_RESERVED := false val _ = O_CHECK_VERSION := true val _ = O_WARN_MULT_ENUM := true val _ = O_WARN_XML_DECL := false val _ = O_WARN_ATT_NO_ELEM := false val _ = O_WARN_MULT_ENT_DECL := false val _ = O_WARN_MULT_NOT_DECL := false val _ = O_WARN_MULT_ATT_DEF := false val _ = O_WARN_MULT_ATT_DECL := false val _ = O_WARN_SHOULD_DECLARE := true val _ = O_WARN_NON_ASCII_URI := true val _ = O_VALIDATE := true val _ = O_COMPATIBILITY := true val _ = O_INTEROPERABILITY := false val _ = O_ERROR_MINIMIZE := true val _ = O_INCLUDE_EXT_PARSED := false val _ = O_INCLUDE_PARAM_ENTS := false in () end val parserUsage = [U_ITEM(["-[n]v","--validate[=(yes|no)]"],"Turn on or off validation (yes)"), U_ITEM(["-[n]c","--compat[=(yes|no)]","--compatibility[=(yes|no)]"], "Turn on or off compatibility checking (yes)"), U_ITEM(["-[n]i","--interop[=(yes|no)]","--interoperability[=(yes|no)]"], "Turn on or off interoperability checking (no)"), U_SEP, U_ITEM(["--few-errors[=(yes|no)]"],"Report fewer errors (no)"), U_ITEM(["--check-reserved[=(yes|no)]"], "Checking for reserved names (no)"), U_ITEM(["--check-predef[=(yes|no)]","--check-predefined[=(yes|no)]"], "Check declaration of predefined entities (yes)"), U_ITEM(["--check-lang-id[=(yes|no)]"],"Checking language identifiers (no)"), U_ITEM(["--check-iso639[=(yes|no)]"],"Check ISO 639 language codes (no)"), U_ITEM(["--check-xml-version[=(yes|no)]"], "Check XML version number (yes)"), U_SEP, U_ITEM(["--warn-xml-decl[=(yes|no)]"],"Warn if there is no XML declaration (no)"), U_ITEM(["--warn-att-elem[=(yes|no)]"], "Warn about attlist declarations for undeclared elements (no)"), U_ITEM(["--warn-predefined[=(yes|no)]"], "Warn if the predefined entities are not declared (no)"), U_ITEM(["--warn-mult-decl[=]"],"Warn about multiple declarations (none)"), U_ITEM(["--warn-uri[=(yes|no)]"],"Warn about non-ASCII characters in URIs (yes)"), U_ITEM(["--warn[=all]"],"Warn about nearly everything"), U_ITEM(["--warn=none"],"Do not print warnings"), U_SEP, U_ITEM(["--include-ext[=(yes|no)]","--include-external[=(yes|no)]"], "Include external entities in non-validating mode (no)"), U_ITEM(["--include-par[=(yes|no)]","--include-parameter[=(yes|no)]"], "Include parameter entities and external subset in "^ "non-validating mode (no)"), U_SEP] @dfaUsage fun setParserOptions(opts,doError) = let datatype What = ATT | ATTLIST | ENT | NOT exception Failed of string option fun getNat str = if str="" then raise Failed NONE else let val cs = String.explode str in foldl (fn (c,n) => if #"0">c orelse #"9" raise Failed (SOME("number "^str^" is too large for this system")) end val allNone = "'all' or 'none'" val yesNo = "'yes' or 'no'" val yesNoWhat = "'yes', 'no' or a list of 'att', 'attlist', 'ent' and 'not'" fun errorMustBe(key,what) = doError (String.concat ["the argument to option --",key," must be ",what]) fun errorNoArg key = doError (String.concat ["option --",key," has no argument"]) fun do_mult_decl(key,valOpt) = let val all = [ATT,ATTLIST,ENT,NOT] fun setFlags whats = app (fn (what,flag) => flag := member what whats) [(ATT,O_WARN_MULT_ATT_DEF),(ATTLIST,O_WARN_MULT_ATT_DECL), (ENT,O_WARN_MULT_ENT_DECL),(NOT,O_WARN_MULT_NOT_DECL)] in case valOpt of NONE => setFlags all | SOME "yes" => setFlags all | SOME "no" => setFlags nil | SOME s => let val fields = String.fields (fn c => #","=c) s val whats = map (fn s => case s of "att" => ATT | "attlist" => ATTLIST | "ent" => ENT | "not" => NOT | _ => raise Failed NONE) fields in setFlags whats end handle Failed _ => errorMustBe(key,yesNoWhat) end fun do_noarg(key,valOpt,flag) = case valOpt of NONE => flag := true | SOME _ => errorNoArg key fun do_yesno(key,valOpt,flag) = case valOpt of NONE => flag := true | SOME "yes" => flag := true | SOME "no" => flag := false | SOME s => errorMustBe(key,yesNo) fun do_num(key,valOpt,flag) = case valOpt of NONE => errorMustBe(key,"a number") | SOME s => flag := getNat s handle Failed NONE => errorMustBe(key,"a number") | Failed (SOME s) => doError s fun do_warn(key,valOpt) = let val all = [O_WARN_MULT_ENUM,O_WARN_ATT_NO_ELEM, O_WARN_MULT_ENT_DECL,O_WARN_MULT_NOT_DECL,O_WARN_MULT_ATT_DEF, O_WARN_MULT_ATT_DECL,O_WARN_SHOULD_DECLARE,O_WARN_XML_DECL] fun setFlags value = app (fn flag => flag := value) all in case valOpt of NONE => setFlags true | SOME "all" => setFlags true | SOME "none" => setFlags false | SOME _ => errorMustBe(key,allNone) end fun do_long(key,valOpt) = case key of "validate" => true before do_yesno(key,valOpt,O_VALIDATE) | "compat" => true before do_yesno(key,valOpt,O_COMPATIBILITY) | "compatibility" => true before do_yesno(key,valOpt,O_COMPATIBILITY) | "interop" => true before do_yesno(key,valOpt,O_INTEROPERABILITY) | "interoperability" => true before do_yesno(key,valOpt,O_INTEROPERABILITY) | "few-errors" => true before do_yesno(key,valOpt,O_ERROR_MINIMIZE) | "check-reserved" => true before do_yesno(key,valOpt,O_CHECK_RESERVED) | "check-predef" => true before do_yesno(key,valOpt,O_CHECK_PREDEFINED) | "check-predefined" => true before do_yesno(key,valOpt,O_CHECK_PREDEFINED) | "check-lang-id" => true before do_yesno(key,valOpt,O_CHECK_LANGID) | "check-iso639" => true before do_yesno(key,valOpt,O_CHECK_ISO639) | "check-xml-version" => true before do_yesno(key,valOpt,O_CHECK_VERSION) | "warn" => true before do_warn(key,valOpt) | "warn-xml-decl" => true before do_yesno(key,valOpt,O_WARN_XML_DECL) | "warn-att-elem" => true before do_yesno(key,valOpt,O_WARN_ATT_NO_ELEM) | "warn-predefined" => true before do_yesno(key,valOpt,O_WARN_SHOULD_DECLARE) | "warn-mult-decl" => true before do_mult_decl(key,valOpt) | "warn-uri" => true before do_yesno(key,valOpt,O_WARN_NON_ASCII_URI) | "include-ext" => true before do_yesno(key,valOpt,O_INCLUDE_EXT_PARSED) | "include-external" => true before do_yesno(key,valOpt,O_INCLUDE_EXT_PARSED) | "include-par" => true before do_yesno(key,valOpt,O_INCLUDE_PARAM_ENTS) | "include-parameter" => true before do_yesno(key,valOpt,O_INCLUDE_PARAM_ENTS) | _ => false fun do_short cs = let fun doOne c = case c of #"v" => false before O_VALIDATE := true | #"c" => false before O_COMPATIBILITY := true | #"i" => false before O_INTEROPERABILITY := true | _ => true in List.filter doOne cs end fun do_neg cs = let fun doOne c = case c of #"v" => false before O_VALIDATE := false | #"c" => false before O_COMPATIBILITY := false | #"i" => false before O_INTEROPERABILITY := false | _ => true in List.filter doOne cs end and doit nil = nil | doit (opt::opts) = case opt of OPT_NOOPT => opts | OPT_LONG(key,value) => if do_long(key,value) then doit opts else opt::doit opts | OPT_SHORT cs => (case do_short cs of nil => doit opts | rest => OPT_SHORT rest::doit opts) | OPT_NEG cs => (case do_neg cs of nil => doit opts | rest => OPT_NEG rest::doit opts) | OPT_STRING s => opt::doit opts val opts1 = setDfaOptions (opts,doError) in doit opts1 end end (* stop of ../../Parser/Params/parserOptions.sml *) (* start of ../../Util/intLists.sml *) signature IntLists = sig type IntList = int list val emptyIntList : IntList val singleIntList : int -> IntList val fullIntList : int -> IntList val isEmptyIntList : IntList -> bool val inIntList : int * IntList -> bool val subIntList : IntList * IntList -> bool val compareIntLists: IntList * IntList -> order val hashIntList : IntList -> word val addIntList : int * IntList -> IntList val delIntList : int * IntList -> IntList val cupIntLists : IntList * IntList -> IntList val capIntLists : IntList * IntList -> IntList val diffIntLists : IntList * IntList -> IntList val IntList2String : IntList -> string end structure IntLists : IntLists = struct open UtilCompare UtilHash UtilInt UtilList UtilString type IntList = int list val emptyIntList = nil : IntList fun fullIntList n = intervalList(0,n) fun singleIntList n = [n] val isEmptyIntList = null val inIntList = elem Int.compare val subIntList = sub Int.compare val addIntList = insert Int.compare val delIntList = delete Int.compare val capIntLists = cap Int.compare val cupIntLists = merge Int.compare val diffIntLists = diff Int.compare val compareIntLists = compareList Int.compare val hashIntList = hashList hashInt val IntList2String = List2String Int.toString end (* stop of ../../Util/intLists.sml *) (* start of ../../Unicode/Chars/dataDict.sml *) structure KeyData : Key = struct type Key = UniChar.Data val null = UniChar.nullData val hash = UniChar.hashData val compare = UniChar.compareData val toString = UniChar.Data2String end structure DataDict = Dict (structure Key = KeyData) structure DataSymTab = SymTable (structure Key = KeyData) (* stop of ../../Unicode/Chars/dataDict.sml *) (* start of ../../Parser/Dfa/dfaData.sml *) (*--------------------------------------------------------------------------*) (* Structure: DfaData *) (* *) (* Depends on: *) (* *) (* Exceptions raised by functions in this structure: *) (* boundsFollow : none *) (* mergeFirst : ConflictFirst *) (* mergeFollow : ConflictFollow *) (*--------------------------------------------------------------------------*) signature DfaData = sig type Dfa datatype ContentModel = CM_ELEM of int | CM_OPT of ContentModel | CM_REP of ContentModel | CM_PLUS of ContentModel | CM_ALT of ContentModel list | CM_SEQ of ContentModel list end structure DfaBase = struct (*--- visible to the parser ---*) datatype ContentModel = CM_ELEM of int | CM_OPT of ContentModel | CM_REP of ContentModel | CM_PLUS of ContentModel | CM_ALT of ContentModel list | CM_SEQ of ContentModel list type Sigma = int type State = int val dfaDontCare = ~2 val dfaError = ~1 val dfaInitial = 0 exception DfaTooLarge of int exception Ambiguous of Sigma * int * int exception ConflictFirst of Sigma * State * State exception ConflictFollow of Sigma * State * State type Empty = bool type First = (State * Sigma) list type Follow = First type Info = State * Empty * First datatype CM' = ELEM of Sigma | OPT of CM | REP of CM | PLUS of CM | ALT of CM list | SEQ of CM list withtype CM = CM' * Info type Row = Sigma * Sigma * State vector * bool val nullRow : Row = (1,0,Vector.fromList nil,false) type Dfa = Row vector val emptyDfa : Dfa = Vector.fromList [(1,0,Vector.fromList nil,true)] end structure DfaData = DfaBase : DfaData (* stop of ../../Parser/Dfa/dfaData.sml *) (* start of ../../Unicode/Decode/decodeFile.sml *) (*--------------------------------------------------------------------------*) (* Structure: DecodeBasic *) (* *) (* Exceptions raised by functions in this structure: *) (* closeFile : none *) (* filePos : none *) (* fileName : none *) (* nextByte : EndOfFile *) (* openFile : NoSuchFile *) (*--------------------------------------------------------------------------*) signature DecodeFile = sig structure Bytes : WORD type File type Byte = Bytes.word exception EndOfFile of File val Char2Byte : UniChar.Char -> Byte val Byte2Char : Byte -> UniChar.Char val Byte2Hex : Byte -> string val openFile : Uri.Uri option -> File val closeFile : File -> unit val getByte : File -> Byte * File val ungetBytes : File * Byte list -> File val fileUri : File -> Uri.Uri val fileName : File -> string end structure DecodeFile : DecodeFile = struct open UniChar Uri UtilError structure Bytes = Word8 type Byte = Bytes.word fun Byte2Char b = Chars.fromLargeWord(Bytes.toLargeWord b) fun Byte2Hex b = "0x"^UtilString.toUpperString(StringCvt.padLeft #"0" 2 (Bytes.toString b)) fun Char2Byte c = Bytes.fromLargeWord(Chars.toLargeWord c) type instream = TextIO.instream val closeIn = TextIO.closeIn val input = TextIO.input val input1 = TextIO.input1 val openIn = TextIO.openIn val stdIn = TextIO.stdIn (*--------------------------------------------------------------------*) (* a file type is stdin or a uri with its string representation and *) (* the file it is mapped to. *) (* a file position is a stream, an int position and a file type. *) (* a file is a file position, a buffer, its size and current index. *) (*--------------------------------------------------------------------*) datatype FileType = STD | FNAME of (Uri * string * string * bool) type FilePos = FileType * instream * int type File = FilePos * Word8Vector.vector * int * int exception EndOfFile of File val nullVec = Word8Vector.fromList nil (*--------------------------------------------------------------------*) (* return the uri of a file. *) (*--------------------------------------------------------------------*) fun fileUri ((typ,_,_),_,_,_) = case typ of STD => emptyUri | FNAME(uri,_,_,_) => uri (*--------------------------------------------------------------------*) (* return the uri string name of a file. *) (*--------------------------------------------------------------------*) fun fileName ((typ,_,_),_,_,_) = case typ of STD => "" | FNAME(_,str,_,_) => str (*--------------------------------------------------------------------*) (* return the uri string and the position in the the file. *) (*--------------------------------------------------------------------*) fun filePos ((typ,_,p),_,s,i) = case typ of STD => ("",p+i-s) | FNAME(_,str,_,_) => (str,p+i-s) (*--------------------------------------------------------------------*) (* open a file; report IO errors by raising NoSuchFile. *) (*--------------------------------------------------------------------*) fun openFile uriOpt = let val (typ,stream) = case uriOpt of NONE => (STD,stdIn) | SOME uri => let val (str,fname,tmp) = retrieveUri uri in (FNAME(uri,str,fname,tmp),openIn fname) end handle IO.Io {name,cause,...} => raise NoSuchFile(name,exnMessage cause) in ((typ,stream,0),nullVec,0,0) end (*--------------------------------------------------------------------*) (* close the file; ignore IO errors. *) (*--------------------------------------------------------------------*) fun closeStream (typ,stream,_) = case typ of STD => () | FNAME(_,uri,fname,tmp) => let val _ = closeIn stream handle IO.Io _ => () val _ = (if tmp andalso OS.FileSys.access(fname,nil) then OS.FileSys.remove fname else ()) handle exn as OS.SysErr _ => TextIO.output(TextIO.stdErr,String.concat ["Error removing temporary file ",fname,"for URI",uri, "(",exnMessage exn,")\n"]) in () end fun closeFile (tsp,_,_,_) = closeStream tsp (*--------------------------------------------------------------------*) (* read a byte from the file; if at the end of buffer, reload it. *) (* if a reload fails or returns an IO error, raise EndOfFile. --------*) (*--------------------------------------------------------------------*) fun getByte (tsp,vec,s,i) = if i nullVec val s = Word8Vector.length v in if s=0 then let val _ = closeStream tsp in raise EndOfFile(tsp,v,0,0) end else (Word8Vector.sub(v,0),((typ,stream,pos+s),v,s,1)) end (*--------------------------------------------------------------------*) (* un-get some bytes. this should only happen while checking for a *) (* byte-order mark or xml/text declaration. It should be efficient in *) (* that case, otherwise might be very space-consuming. *) (*--------------------------------------------------------------------*) fun ungetBytes ((tsp,vec,s,i),bs) = let val len = length bs in if len<=i then (tsp,vec,s,i-len) else let val diff = len-i val vec0 = Word8Vector.fromList(List.take(bs,diff)) in (tsp,Word8Vector.concat [vec0,vec],s+diff,0) end end end (* stop of ../../Unicode/Decode/decodeFile.sml *) (* start of ../../Unicode/Decode/decodeError.sml *) (*--------------------------------------------------------------------------*) (* Structure: DecodeError *) (* *) (* Exceptions raised by functions in this structure: *) (* decodeMessage : none *) (*--------------------------------------------------------------------------*) signature DecodeError = sig datatype DecodeError = ERR_ILLEGAL_CHAR of DecodeFile.Byte * string | ERR_NON_UNI_UCS4 of UniChar.Char | ERR_EOF_UCS4 of int * DecodeFile.Byte list | ERR_NON_DIRECT_UTF7 of DecodeFile.Byte | ERR_PADDING_UTF7 of UniChar.Char | ERR_ILLFORMED_UTF8 of DecodeFile.Byte * int * int | ERR_ILLEGAL_UTF8 of DecodeFile.Byte | ERR_INVALID_UTF8_SEQ of DecodeFile.Byte list | ERR_EOF_UTF8 of int * int | ERR_NON_UNI_UTF8 of UniChar.Char * int | ERR_EOF_UCS2 of DecodeFile.Byte | ERR_EOF_UTF16 of DecodeFile.Byte | ERR_LOW_SURROGATE of UniChar.Char | ERR_HIGH_SURROGATE of UniChar.Char * UniChar.Char | ERR_EOF_SURROGATE of UniChar.Char | ERR_NO_ENC_DECL of string | ERR_UNSUPPORTED_ENC of string | ERR_INCOMPATIBLE_ENC of string * string val decodeMessage : DecodeError -> string list exception DecodeError of DecodeFile.File * bool * DecodeError end structure DecodeError : DecodeError = struct open DecodeFile UtilString UniChar datatype DecodeError = ERR_ILLEGAL_CHAR of DecodeFile.Byte * string | ERR_NON_UNI_UCS4 of UniChar.Char | ERR_EOF_UCS4 of int * DecodeFile.Byte list | ERR_NON_DIRECT_UTF7 of DecodeFile.Byte | ERR_PADDING_UTF7 of UniChar.Char | ERR_ILLFORMED_UTF8 of DecodeFile.Byte * int * int | ERR_ILLEGAL_UTF8 of DecodeFile.Byte | ERR_INVALID_UTF8_SEQ of DecodeFile.Byte list | ERR_EOF_UTF8 of int * int | ERR_NON_UNI_UTF8 of UniChar.Char * int | ERR_EOF_UCS2 of DecodeFile.Byte | ERR_EOF_UTF16 of DecodeFile.Byte | ERR_LOW_SURROGATE of UniChar.Char | ERR_HIGH_SURROGATE of UniChar.Char * UniChar.Char | ERR_EOF_SURROGATE of UniChar.Char | ERR_NO_ENC_DECL of string | ERR_UNSUPPORTED_ENC of string | ERR_INCOMPATIBLE_ENC of string * string fun Char2Hex c = "0x"^UtilString.toUpperString(StringCvt.padLeft #"0" 4 (Chars.toString c)) fun decodeMessage err = case err of ERR_ILLEGAL_CHAR(b,what) => [Byte2Hex b,"is not",prependAnA what,"character"] | ERR_NON_UNI_UCS4 c => ["UCS-4 coded non-Unicode character",Char2Uni c] | ERR_EOF_UCS4(pos,bytes) => ["End of file after",Int2String pos,"bytes of UCS-4 character", "starting with ",List2String0 Byte2Hex bytes] | ERR_NON_DIRECT_UTF7 b => ["Indirect UTF-7 character ",Byte2Hex b,"in non-shifted mode"] | ERR_PADDING_UTF7 pad => ["Non-zero padding",Char2Hex pad,"at end of UTF-7 shifted sequence"] | ERR_ILLFORMED_UTF8 (b,len,pos) => [numberNth pos,"byte",Byte2Hex b,"of a",Int2String len^"-byte", "UTF-8 sequence does not start with bits 10"] | ERR_ILLEGAL_UTF8 b => ["Byte",Byte2Hex b,"is neither ASCII nor does it start", "a valid multi-byte UTF-8 sequence"] | ERR_EOF_UTF8 (len,pos) => ["End of file terminates a ",Int2String len^"-byte", "UTF-8 sequence before the ",numberNth pos,"byte"] | ERR_NON_UNI_UTF8 (c,len) => [Int2String len^"-byte UTF-8 sequence decodes to non-Unicode character",Char2Uni c] | ERR_INVALID_UTF8_SEQ bs => ["Invalid UTF-8 sequence",List2xString (""," ","") Byte2Hex bs] | ERR_EOF_UCS2 b => ["End of file before second byte of UCS-2 character starting with",Byte2Hex b] | ERR_EOF_UTF16 b => ["End of file before second byte of UTF-16 character starting with",Byte2Hex b] | ERR_LOW_SURROGATE c => ["Low surrogate",Char2Uni c,"without preceding high surrogate"] | ERR_HIGH_SURROGATE (c,c1) => ["High surrogate",Char2Uni c,"followed by",Char2Uni c1,"instead of low surrogate"] | ERR_EOF_SURROGATE c => ["High surrogate",Char2Uni c,"followed by the end of file"] | ERR_NO_ENC_DECL auto => ["Couldn't parse encoding declaration but auto-detected encoding",auto,"required so"] | ERR_UNSUPPORTED_ENC enc => ["Unsupported encoding",enc] | ERR_INCOMPATIBLE_ENC (enc,auto) => ["Encoding",enc,"is incompatible with auto-detected encoding",auto] exception DecodeError of File * bool * DecodeError end (* stop of ../../Unicode/Decode/decodeError.sml *) (* start of ../../Unicode/Decode/decodeUtil.sml *) (* require "basis.__word"; require "basis.__word8"; require "chars"; require "decodeBasic"; require "decodeError"; *) (*--------------------------------------------------------------------------*) (* Structure: DecodeUtil *) (* *) (* Exceptions raised by functions in this structure: *) (* combineSurrogates : none *) (* combineUcs4big : none *) (* combineUcs4little : none *) (* combineUcs4strangeBig : none *) (* combineUcs4strangeLittle : none *) (* combineUtf16big : none *) (* combineUtf16little : none *) (* isLowSurrogate : none *) (* isHighSurrogate : none *) (* isSurrogate : none *) (*--------------------------------------------------------------------------*) signature DecodeUtil = sig val isSurrogate : UniChar.Char -> bool val isLowSurrogate : UniChar.Char -> bool val isHighSurrogate : UniChar.Char -> bool val combineSurrogates : UniChar.Char * UniChar.Char -> UniChar.Char end structure DecodeUtil : DecodeUtil = struct open UniChar DecodeFile DecodeError fun isSurrogate c = Chars.orb(c,0wx7FF)=0wxDFFF fun isLowSurrogate c = Chars.orb(c,0wx3FF)=0wxDFFF fun isHighSurrogate c = Chars.orb(c,0wx3FF)=0wxDBFF fun combineSurrogates(hi,lo) = (hi-0wxD800)*0wx400+lo+0wx2400 : Char end (* stop of ../../Unicode/Decode/decodeUtil.sml *) (* start of ../../Unicode/Decode/decodeUcs2.sml *) signature DecodeUcs2 = sig val getCharUcs2b : DecodeFile.File -> UniChar.Char * DecodeFile.File val getCharUcs2l : DecodeFile.File -> UniChar.Char * DecodeFile.File end structure DecodeUcs2 : DecodeUcs2 = struct open UniChar Encoding DecodeFile DecodeError DecodeUtil fun getCharUcs2b f = let val (b1,f1) = getByte f val (b2,f2) = getByte f1 handle exn as EndOfFile f => raise DecodeError(f,true,ERR_EOF_UCS2 b1) val c = Chars.orb(Chars.<<(Byte2Char b1,0w8),Byte2Char b2) in (c,f2) end fun getCharUcs2l f = let val (b1,f1) = getByte f val (b2,f2) = getByte f1 handle exn as EndOfFile f => raise DecodeError(f,true,ERR_EOF_UCS2 b1) val c = Chars.orb(Chars.<<(Byte2Char b2,0w8),Byte2Char b1) in (c,f2) end end (* stop of ../../Unicode/Decode/decodeUcs2.sml *) (* start of ../../Unicode/Decode/decodeMisc.sml *) signature DecodeMisc = sig val getCharAscii : DecodeFile.File -> UniChar.Char * DecodeFile.File val getCharEbcdic : DecodeFile.File -> UniChar.Char * DecodeFile.File val getCharEof : DecodeFile.File -> UniChar.Char * DecodeFile.File val getCharLatin1 : DecodeFile.File -> UniChar.Char * DecodeFile.File end structure DecodeMisc : DecodeMisc = struct open UniChar DecodeFile DecodeError fun getCharEof f = raise EndOfFile f (*--------------------------------------------------------------------*) (* ASCII characters must be lower than 0wx80 *) (*--------------------------------------------------------------------*) fun getCharAscii f = let val (b,f1) = getByte f in if b<0wx80 then (Byte2Char b,f1) else raise DecodeError(f1,false,ERR_ILLEGAL_CHAR(b,"ASCII")) end (*--------------------------------------------------------------------*) (* LATIN-1 is the first plane of Unicode. *) (*--------------------------------------------------------------------*) fun getCharLatin1 f = let val (b,f1) = getByte f in (Byte2Char b,f1) end (*--------------------------------------------------------------------*) (* EBCDIC is mapped to the first plane of Unicode. *) (*--------------------------------------------------------------------*) (* according to rfc-1345 (and gnu recode experiments) *) val ebcdic2latinTab = Vector.fromList [0wx00,0wx01,0wx02,0wx03,0wx9C,0wx09,0wx86,0wx7F, 0wx97,0wx8D,0wx8E,0wx0B,0wx0C,0wx0D,0wx0E,0wx0F, 0wx10,0wx11,0wx12,0wx13,0wx9D,0wx85,0wx08,0wx87, 0wx18,0wx19,0wx92,0wx8F,0wx1C,0wx1D,0wx1E,0wx1F, 0wx80,0wx81,0wx82,0wx83,0wx84,0wx0A,0wx17,0wx1B, 0wx88,0wx89,0wx8A,0wx8B,0wx8C,0wx05,0wx06,0wx07, 0wx90,0wx91,0wx16,0wx93,0wx94,0wx95,0wx96,0wx04, 0wx98,0wx99,0wx9A,0wx9B,0wx14,0wx15,0wx9E,0wx1A, 0wx20,0wxA0,0wxA1,0wxA2,0wxA3,0wxA4,0wxA5,0wxA6, 0wxA7,0wxA8,0wx5B,0wx2E,0wx3C,0wx28,0wx2B,0wx21, 0wx26,0wxA9,0wxAA,0wxAB,0wxAC,0wxAD,0wxAE,0wxAF, 0wxB0,0wxB1,0wx5D,0wx24,0wx2A,0wx29,0wx3B,0wx5E, 0wx2D,0wx2F,0wxB2,0wxB3,0wxB4,0wxB5,0wxB6,0wxB7, 0wxB8,0wxB9,0wx7C,0wx2C,0wx25,0wx5F,0wx3E,0wx3F, 0wxBA,0wxBB,0wxBC,0wxBD,0wxBE,0wxBF,0wxC0,0wxC1, 0wxC2,0wx60,0wx3A,0wx23,0wx40,0wx27,0wx3D,0wx22, 0wxC3,0wx61,0wx62,0wx63,0wx64,0wx65,0wx66,0wx67, 0wx68,0wx69,0wxC4,0wxC5,0wxC6,0wxC7,0wxC8,0wxC9, 0wxCA,0wx6A,0wx6B,0wx6C,0wx6D,0wx6E,0wx6F,0wx70, 0wx71,0wx72,0wxCB,0wxCC,0wxCD,0wxCE,0wxCF,0wxD0, 0wxD1,0wx7E,0wx73,0wx74,0wx75,0wx76,0wx77,0wx78, 0wx79,0wx7A,0wxD2,0wxD3,0wxD4,0wxD5,0wxD6,0wxD7, 0wxD8,0wxD9,0wxDA,0wxDB,0wxDC,0wxDD,0wxDE,0wxDF, 0wxE0,0wxE1,0wxE2,0wxE3,0wxE4,0wxE5,0wxE6,0wxE7, 0wx7B,0wx41,0wx42,0wx43,0wx44,0wx45,0wx46,0wx47, 0wx48,0wx49,0wxE8,0wxE9,0wxEA,0wxEB,0wxEC,0wxED, 0wx7D,0wx4A,0wx4B,0wx4C,0wx4D,0wx4E,0wx4F,0wx50, 0wx51,0wx52,0wxEE,0wxEF,0wxF0,0wxF1,0wxF2,0wxF3, 0wx5C,0wx9F,0wx53,0wx54,0wx55,0wx56,0wx57,0wx58, 0wx59,0wx5A,0wxF4,0wxF5,0wxF6,0wxF7,0wxF8,0wxF9, 0wx30,0wx31,0wx32,0wx33,0wx34,0wx35,0wx36,0wx37, 0wx38,0wx39,0wxFA,0wxFB,0wxFC,0wxFD,0wxFE,0wxFF ] fun ebcdic2latin b = Vector.sub(ebcdic2latinTab,Word8.toInt b) fun getCharEbcdic f = let val (b,f1) = getByte f in (ebcdic2latin b,f1) end end (* stop of ../../Unicode/Decode/decodeMisc.sml *) (* start of ../../Unicode/Decode/decodeUcs4.sml *) signature DecodeUcs4 = sig val getCharUcs4b : DecodeFile.File -> UniChar.Char * DecodeFile.File val getCharUcs4l : DecodeFile.File -> UniChar.Char * DecodeFile.File val getCharUcs4sb : DecodeFile.File -> UniChar.Char * DecodeFile.File val getCharUcs4sl : DecodeFile.File -> UniChar.Char * DecodeFile.File end structure DecodeUcs4 : DecodeUcs4 = struct open UniChar UniClasses DecodeFile DecodeError DecodeUtil fun getCharUcs4b f = let val (b1,f1) = getByte f val (b2,f2) = getByte f1 handle EndOfFile f => raise DecodeError(f,true,ERR_EOF_UCS4(1,[b1])) val (b3,f3) = getByte f2 handle EndOfFile f => raise DecodeError(f,true,ERR_EOF_UCS4(1,[b1,b2])) val (b4,f4) = getByte f3 handle EndOfFile f => raise DecodeError(f,true,ERR_EOF_UCS4(1,[b1,b2,b3])) val c = Chars.orb(Chars.orb(Chars.<<(Byte2Char b1,0w24), Chars.<<(Byte2Char b2,0w16)), Chars.orb(Chars.<<(Byte2Char b3,0w08), Byte2Char b4)) in if isUnicode c then (c,f4) else raise DecodeError(f4,false,ERR_NON_UNI_UCS4 c) end fun getCharUcs4l f = let val (b1,f1) = getByte f val (b2,f2) = getByte f1 handle EndOfFile f => raise DecodeError(f,true,ERR_EOF_UCS4(1,[b1])) val (b3,f3) = getByte f2 handle EndOfFile f => raise DecodeError(f,true,ERR_EOF_UCS4(1,[b1,b2])) val (b4,f4) = getByte f3 handle EndOfFile f => raise DecodeError(f,true,ERR_EOF_UCS4(1,[b1,b2,b3])) val c = Chars.orb(Chars.orb(Chars.<<(Byte2Char b4,0w24), Chars.<<(Byte2Char b3,0w16)), Chars.orb(Chars.<<(Byte2Char b2,0w08), Byte2Char b1)) in if isUnicode c then (c,f4) else raise DecodeError(f4,false,ERR_NON_UNI_UCS4 c) end fun getCharUcs4sb f = let val (b1,f1) = getByte f val (b2,f2) = getByte f1 handle EndOfFile f => raise DecodeError(f,true,ERR_EOF_UCS4(1,[b1])) val (b3,f3) = getByte f2 handle EndOfFile f => raise DecodeError(f,true,ERR_EOF_UCS4(1,[b1,b2])) val (b4,f4) = getByte f3 handle EndOfFile f => raise DecodeError(f,true,ERR_EOF_UCS4(1,[b1,b2,b3])) val c = Chars.orb(Chars.orb(Chars.<<(Byte2Char b2,0w24), Chars.<<(Byte2Char b1,0w16)), Chars.orb(Chars.<<(Byte2Char b4,0w08), Byte2Char b3)) in if isUnicode c then (c,f4) else raise DecodeError(f4,false,ERR_NON_UNI_UCS4 c) end fun getCharUcs4sl f = let val (b1,f1) = getByte f val (b2,f2) = getByte f1 handle EndOfFile f => raise DecodeError(f,true,ERR_EOF_UCS4(1,[b1])) val (b3,f3) = getByte f2 handle EndOfFile f => raise DecodeError(f,true,ERR_EOF_UCS4(1,[b1,b2])) val (b4,f4) = getByte f3 handle EndOfFile f => raise DecodeError(f,true,ERR_EOF_UCS4(1,[b1,b2,b3])) val c = Chars.orb(Chars.orb(Chars.<<(Byte2Char b3,0w24), Chars.<<(Byte2Char b4,0w16)), Chars.orb(Chars.<<(Byte2Char b1,0w08), Byte2Char b2)) in if isUnicode c then (c,f4) else raise DecodeError(f4,false,ERR_NON_UNI_UCS4 c) end end (* stop of ../../Unicode/Decode/decodeUcs4.sml *) (* start of ../../Unicode/Decode/decodeUtf16.sml *) signature DecodeUtf16 = sig val getCharUtf16b : DecodeFile.File -> UniChar.Char * DecodeFile.File val getCharUtf16l : DecodeFile.File -> UniChar.Char * DecodeFile.File end structure DecodeUtf16 : DecodeUtf16 = struct open UniChar Encoding DecodeFile DecodeError DecodeUtil fun getCharUtf16b f = let val (b1,f1) = getByte f val (b2,f2) = getByte f1 handle exn as EndOfFile f => raise DecodeError(f,true,ERR_EOF_UTF16 b1) val c = Chars.orb(Chars.<<(Byte2Char b1,0w8),Byte2Char b2) in if isSurrogate c then (* Chars.orb(c,0wx7FF)=0wxDFFF *) if isLowSurrogate c then raise DecodeError(f2,false,ERR_LOW_SURROGATE c) else let val (b3,f3) = getByte f2 handle exn as EndOfFile f => raise DecodeError(f,true,ERR_EOF_SURROGATE c) val (b4,f4) = getByte f3 handle exn as EndOfFile f => raise DecodeError(f,true,ERR_EOF_UTF16 b3) val c1 = Chars.orb(Chars.<<(Byte2Char b3,0w8),Byte2Char b4) in if isLowSurrogate c1 then (combineSurrogates(c,c1),f4) else raise DecodeError(f4,false,ERR_HIGH_SURROGATE(c,c1)) end else (c,f2) end fun getCharUtf16l f = let val (b1,f1) = getByte f val (b2,f2) = getByte f1 handle exn as EndOfFile f => raise DecodeError(f,true,ERR_EOF_UTF16 b1) val c = Chars.orb(Chars.<<(Byte2Char b2,0w8),Byte2Char b1) in if isSurrogate c then if isLowSurrogate c then raise DecodeError(f2,false,ERR_LOW_SURROGATE c) else let val (b3,f3) = getByte f2 handle exn as EndOfFile f => raise DecodeError(f,true,ERR_EOF_SURROGATE c) val (b4,f4) = getByte f3 handle exn as EndOfFile f => raise DecodeError(f,true,ERR_EOF_UTF16 b3) val c1 = Chars.orb(Chars.<<(Byte2Char b4,0w8),Byte2Char b3) in if isLowSurrogate c1 then (combineSurrogates(c,c1),f4) else raise DecodeError(f4,false,ERR_HIGH_SURROGATE(c,c1)) end else (c,f2) end end (* stop of ../../Unicode/Decode/decodeUtf16.sml *) (* start of ../../Unicode/Decode/decodeUtf8.sml *) signature DecodeUtf8 = sig val getCharUtf8 : DecodeFile.File -> UniChar.Char * DecodeFile.File end structure DecodeUtf8 : DecodeUtf8 = struct open UniChar UniClasses UtilError UtilInt DecodeFile DecodeError DecodeUtil val THIS_MODULE = "DecodeUtf8" infix 8 <<< infix 7 && infix 6 ||| val op && = Bytes.andb val op <<< = Chars.<< val op ||| = Chars.orb val byte1switch = Vector.tabulate (256,fn i => if i<0x80 then 1 else if i<0xC0 then 0 else if i<0xE0 then 2 else if i<0xF0 then 3 else if i<0xF8 then 4 else if i<0xFC then 5 else if i<0xFE then 6 else 0) val diff2 : Char = 0wx00003080 val diff3 : Char = diff2 <<< 0wx6 ||| 0wx00020080 val diff4 : Char = diff3 <<< 0wx6 ||| 0wx00400080 val diff5 : Char = diff4 <<< 0wx6 ||| 0wx08000080 val diff6 : Char = diff5 <<< 0wx6 ||| 0wx00000080 fun getCharUtf8 f = let val (b1,f1) = getByte f in if b1<0wx80 then (Byte2Char b1,f1) else let val n = Vector.sub(byte1switch,Word8.toInt b1) in case n of 0 (* error *) => raise DecodeError(f1,false,ERR_ILLEGAL_UTF8 b1) | 1 => (Byte2Char b1,f1) | 2 => let val (b2,f2) = getByte f1 handle EndOfFile f => raise DecodeError(f,true,ERR_EOF_UTF8(n,2)) in if b2 && 0wxC0 <> 0wx80 then raise DecodeError(f2,false,ERR_ILLFORMED_UTF8(b2,n,2)) else let val c = Byte2Char b1 <<< 0w6 + Byte2Char b2 - diff2 in if c>=0wx80 then (c,f2) else raise DecodeError(f2,false,ERR_INVALID_UTF8_SEQ [b1,b2]) end end | 3 => let val (b2,f2) = getByte f1 handle EndOfFile f => raise DecodeError(f,true,ERR_EOF_UTF8(n,2)) val (b3,f3) = getByte f2 handle EndOfFile f => raise DecodeError(f,true,ERR_EOF_UTF8(n,3)) in if b2 && 0wxC0 <> 0wx80 then raise DecodeError(f3,false,ERR_ILLFORMED_UTF8(b2,n,2)) else if b3 && 0wxC0 <> 0wx80 then raise DecodeError(f3,false,ERR_ILLFORMED_UTF8(b2,n,3)) else let val c = (Byte2Char b1 <<< 0w12 + Byte2Char b2 <<< 0w06 + Byte2Char b3 - diff3) in if c>=0wx800 then (c,f3) else raise DecodeError (f3,false,ERR_INVALID_UTF8_SEQ [b1,b2,b3]) end end | 4 => let val (b2,f2) = getByte f1 handle EndOfFile f => raise DecodeError(f,true,ERR_EOF_UTF8(n,2)) val (b3,f3) = getByte f2 handle EndOfFile f => raise DecodeError(f,true,ERR_EOF_UTF8(n,3)) val (b4,f4) = getByte f3 handle EndOfFile f => raise DecodeError(f,true,ERR_EOF_UTF8(n,4)) in if b2 && 0wxC0 <> 0wx80 then raise DecodeError(f4,false,ERR_ILLFORMED_UTF8(b2,n,2)) else if b3 && 0wxC0 <> 0wx80 then raise DecodeError(f4,false,ERR_ILLFORMED_UTF8(b2,n,3)) else if b4 && 0wxC0 <> 0wx80 then raise DecodeError(f4,false,ERR_ILLFORMED_UTF8(b2,n,4)) else let val c = (Byte2Char b1 <<< 0w18 + Byte2Char b2 <<< 0w12 + Byte2Char b3 <<< 0w06 + Byte2Char b4 - diff4) in if c>=0wx100000 andalso c<=0wx10FFFF then (c,f4) else if c<0wx10000 then raise DecodeError (f4,false,ERR_INVALID_UTF8_SEQ [b1,b2,b3,b4]) else raise DecodeError (f4,false,ERR_NON_UNI_UTF8(c,n)) end end | 5 => let val (b2,f2) = getByte f1 handle EndOfFile f => raise DecodeError(f,true,ERR_EOF_UTF8(n,2)) val (b3,f3) = getByte f2 handle EndOfFile f => raise DecodeError(f,true,ERR_EOF_UTF8(n,3)) val (b4,f4) = getByte f3 handle EndOfFile f => raise DecodeError(f,true,ERR_EOF_UTF8(n,4)) val (b5,f5) = getByte f4 handle EndOfFile f => raise DecodeError(f,true,ERR_EOF_UTF8(n,5)) in if b2 && 0wxC0 <> 0wx80 then raise DecodeError(f5,false,ERR_ILLFORMED_UTF8(b2,n,2)) else if b3 && 0wxC0 <> 0wx80 then raise DecodeError(f5,false,ERR_ILLFORMED_UTF8(b2,n,3)) else if b4 && 0wxC0 <> 0wx80 then raise DecodeError(f5,false,ERR_ILLFORMED_UTF8(b2,n,4)) else if b5 && 0wxC0 <> 0wx80 then raise DecodeError(f5,false,ERR_ILLFORMED_UTF8(b2,n,5)) else let val c = (Byte2Char b1 <<< 0w24 + Byte2Char b2 <<< 0w18 + Byte2Char b3 <<< 0w12 + Byte2Char b4 <<< 0w06 + Byte2Char b5 - diff5) in if c<0wx200000 then raise DecodeError (f5,false,ERR_INVALID_UTF8_SEQ [b1,b2,b3,b4,b5]) else raise DecodeError (f5,false,ERR_NON_UNI_UTF8(c,n)) end end | 6 => let val (b2,f2) = getByte f1 handle EndOfFile f => raise DecodeError(f,true,ERR_EOF_UTF8(n,2)) val (b3,f3) = getByte f2 handle EndOfFile f => raise DecodeError(f,true,ERR_EOF_UTF8(n,3)) val (b4,f4) = getByte f3 handle EndOfFile f => raise DecodeError(f,true,ERR_EOF_UTF8(n,4)) val (b5,f5) = getByte f4 handle EndOfFile f => raise DecodeError(f,true,ERR_EOF_UTF8(n,5)) val (b6,f6) = getByte f5 handle EndOfFile f => raise DecodeError(f,true,ERR_EOF_UTF8(n,6)) in if b2 && 0wxC0 <> 0wx80 then raise DecodeError(f6,false,ERR_ILLFORMED_UTF8(b2,n,2)) else if b3 && 0wxC0 <> 0wx80 then raise DecodeError(f6,false,ERR_ILLFORMED_UTF8(b2,n,3)) else if b4 && 0wxC0 <> 0wx80 then raise DecodeError(f6,false,ERR_ILLFORMED_UTF8(b2,n,4)) else if b5 && 0wxC0 <> 0wx80 then raise DecodeError(f6,false,ERR_ILLFORMED_UTF8(b2,n,5)) else if b6 && 0wxC0 <> 0wx80 then raise DecodeError(f6,false,ERR_ILLFORMED_UTF8(b2,n,6)) else let val c = (Byte2Char b1 <<< 0w30 + Byte2Char b2 <<< 0w24 + Byte2Char b3 <<< 0w18 + Byte2Char b4 <<< 0w12 + Byte2Char b5 <<< 0w06 + Byte2Char b6 - diff6) in if c<0wx4000000 then raise DecodeError (f6,false,ERR_INVALID_UTF8_SEQ [b1,b2,b3,b4,b5,b6]) else raise DecodeError (f6,false,ERR_NON_UNI_UTF8(c,n)) end end | _ => raise InternalError(THIS_MODULE,"getCharUtf8", "byte1switch holds "^Int.toString n^ ">6 for byte "^Bytes.toString b1) end end end (* stop of ../../Unicode/Decode/decodeUtf8.sml *) (* start of ../../Unicode/Decode/decode.sml *) (*--------------------------------------------------------------------------*) (* Structure: Decode *) (* *) (* Exceptions raised by functions in this structure: *) (* checkEncoding : NoSuchFile *) (* encCloseFile : none *) (* encFileName : none *) (*--------------------------------------------------------------------------*) signature Decode = sig structure Error : DecodeError type DecFile exception DecEof of DecFile exception DecError of DecFile * bool * Error.DecodeError val decUri : DecFile -> Uri.Uri val decName : DecFile -> string val decEncoding : DecFile -> Encoding.Encoding val decOpenXml : Uri.Uri option -> DecFile val decOpenUni : Uri.Uri option * Encoding.Encoding -> DecFile val decClose : DecFile -> DecFile val decCommit : DecFile -> unit val decSwitch : DecFile * string -> DecFile val decGetChar : DecFile -> UniChar.Char * DecFile val decGetArray : DecFile -> UniChar.Char array -> int * DecFile * Error.DecodeError option end structure Decode : Decode = struct structure Error = DecodeError open UniChar Encoding Error DecodeFile DecodeMisc DecodeUcs2 DecodeUcs4 DecodeUtf16 DecodeUtf8 DecodeUtil type DecFile = Encoding * File exception DecEof of DecFile exception DecError of DecFile * bool * DecodeError (*--------------------------------------------------------------------*) (* close an encoded entity. *) (*--------------------------------------------------------------------*) fun decClose (_,f) = (NOENC,f) before closeFile f (*--------------------------------------------------------------------*) (* get the uri string of an encoded entity. *) (*--------------------------------------------------------------------*) fun decName (_,f) = fileName f (*--------------------------------------------------------------------*) (* get the uri of an encoded entity. *) (*--------------------------------------------------------------------*) fun decUri (_,f) = fileUri f (*--------------------------------------------------------------------*) (* get the encoding of an encoded entity. *) (*--------------------------------------------------------------------*) fun decEncoding (enc,_) = enc (*--------------------------------------------------------------------*) (* commit the auto-detected encoding. *) (*--------------------------------------------------------------------*) fun decCommit (enc,f) = case enc of UTF8 => () | UTF16B => () | UTF16L => () | _ => raise DecError((enc,f),false,ERR_NO_ENC_DECL(encodingName enc)) (*--------------------------------------------------------------------*) (* change to another - compatible - encoding. *) (*--------------------------------------------------------------------*) fun decSwitch ((enc,f),decl) = let val decEnc = isEncoding decl val _ = if decEnc<>NOENC then () else raise DecError((enc,f),false,ERR_UNSUPPORTED_ENC decl) val newEnc = switchEncoding(enc,decEnc) val _ = if decEnc<>NOENC orelse enc=NOENC then () else raise DecError((enc,f),false,ERR_INCOMPATIBLE_ENC(encodingName enc,decl)) in (newEnc,f) end (*--------------------------------------------------------------------*) (* get a character from an encoded entity. *) (*--------------------------------------------------------------------*) fun decGetChar (enc,f) = let val (c,f1) = case enc of NOENC => raise EndOfFile f | ASCII => getCharAscii f | EBCDIC => getCharEbcdic f | LATIN1 => getCharLatin1 f | UCS2B => getCharUcs2b f | UCS2L => getCharUcs2l f | UCS4B => getCharUcs4b f | UCS4L => getCharUcs4l f | UCS4SB => getCharUcs4sb f | UCS4SL => getCharUcs4sl f | UTF8 => getCharUtf8 f | UTF16B => getCharUtf16b f | UTF16L => getCharUtf16l f in (c,(enc,f1)) end handle EndOfFile f => raise DecEof(NOENC,f) | DecodeError(f,eof,err) => raise DecError((enc,f),eof,err) (*--------------------------------------------------------------------*) (* Load new characters, depending on the current entity's encoding. *) (*--------------------------------------------------------------------*) fun decGetArray (enc,f) arr = let (*--------------------------------------------------------------*) (* Load the buffer with len new characters, or until the entity *) (* end is reached. Close the current file in that case. *) (* Local exception Ended is needed in order to preserve tail *) (* recursion. *) (*--------------------------------------------------------------*) fun loadArray getChar = let val ende = Array.length arr exception Error of int * exn fun doit (idx,f) = if idx=ende then (ende,(enc,f),NONE) else let val (c,f1) = getChar f handle exn => raise Error (idx,exn) val _ = Array.update(arr,idx,c) in doit (idx+1,f1) end in doit (0,f) handle Error(idx,exn) => case exn of EndOfFile f => (idx,(NOENC,f),NONE) | DecodeError (f,_,err) => (idx,(enc,f),SOME err) | _ => raise exn end in case enc of NOENC => (0,(NOENC,f),NONE) | ASCII => loadArray getCharAscii | EBCDIC => loadArray getCharEbcdic | LATIN1 => loadArray getCharLatin1 | UCS2B => loadArray getCharUcs2b | UCS2L => loadArray getCharUcs2l | UCS4B => loadArray getCharUcs4b | UCS4L => loadArray getCharUcs4l | UCS4SB => loadArray getCharUcs4sb | UCS4SL => loadArray getCharUcs4sl | UTF8 => loadArray getCharUtf8 | UTF16B => loadArray getCharUtf16b | UTF16L => loadArray getCharUtf16l end (*--------------------------------------------------------------------*) (* open an XML file and try to auto-detect its encoding. *) (*--------------------------------------------------------------------*) (* Auto-detection of the encoding of XML entities according to App. F *) (* of the XML recommendation. *) (* *) (* The file is opened in basic mode and upto four bytes are read from *) (* it in order to detect the encoding: if they constitute a prefix of *) (* " (nil,f) fun detect bs = case bs of [0wx0,0wx0,0wxFE,0wxFF] => (UCS4B,nil) | [0wxFF,0wxFE,0wx0,0wx0] => (UCS4L,nil) | [0wxFE,0wxFF,0wx0,b4] => if b4 <> 0wx0 then (UTF16B,[0wx0,b4]) else (UTF8,bs) | [0wxFF,0wxFE,b3,0wx0] => if b3 <> 0wx0 then (UTF16L,[b3,0wx0]) else (UTF8,bs) | [0wxEF,0wxBB,0wxBF,b4] => (UTF8,[b4]) | [0wx0,0wx0,0wx0,0wx3C] => (UCS4B,bs) | [0wx3C,0wx0,0wx0,0wx0] => (UCS4L,bs) | [0wx0,0wx0,0wx3C,0wx0] => (UCS4SB,bs) | [0wx0,0wx3C,0wx0,0wx0] => (UCS4SL,bs) | [0wx0,b2,b3,b4] => if (b2=0wx3C orelse b2=0wx25 orelse b2=0wx20 orelse b2=0wx09 orelse b2=0wx0D orelse b2=0wx0A) andalso (b3<>0wx0 orelse b4<>0wx0) then (UTF16B,bs) else (UTF8,bs) | [b1,0wx0,b3,b4] => if (b1=0wx3C orelse b1=0wx25 orelse b1=0wx20 orelse b1=0wx09 orelse b1=0wx0D orelse b1=0wx0A) andalso (b3<>0wx0 orelse b4<>0wx0) then (UTF16L,bs) else (UTF8,bs) | [0wx4C,0wx6F,0wxA7,0wx94] => (EBCDIC,bs) | _ => (UTF8,bs) val f = openFile uri val (bs,f1) = get4Bytes(0,f) val (enc,unget) = detect bs in (enc,ungetBytes(f1,unget)) end (*--------------------------------------------------------------------*) (* open a Unicode file. Check whether it starts with a byte order *) (* mark. If yes, chose UTF16 encoding, otherwise use the default that *) (* is provided as second argument. *) (* *) (* return the encoded file, a list of bytes looked ahead and the *) (* encoding. *) (*--------------------------------------------------------------------*) fun decOpenUni (uri,default) = let fun def(f,bs) = (default,ungetBytes(f,bs)) fun detect f = let val (b1,f1) = getByte f in case b1 of 0wxFE => (let val (b2,f2) = getByte f1 in if b2 = 0wxFF then (UTF16B,f2) else def(f2,[b1,b2]) end handle EndOfFile f => def(f,[b1])) | 0wxFF => (let val (b2,f2) = getByte f1 in if b2 = 0wxFE then (UTF16L,f2) else def(f2,[b1,b2]) end handle EndOfFile f => def(f,[b1])) | _ => def(f1,[b1]) end handle EndOfFile f => def(f,nil) val f = openFile uri val (enc,f1) = detect f in (enc,f1) end end (* stop of ../../Unicode/Decode/decode.sml *) (* start of ../../Parser/Error/errorData.sml *) structure ErrorData = struct (*--------------------------------------------------------------------*) (* a position holds the filename, line and column number. *) (*--------------------------------------------------------------------*) type Position = string * int * int val nullPosition = ("",0,0) datatype ExpItem = EXP_CHAR of UniChar.Char | EXP_DATA of UniChar.Data | EXP_STRING of string type Expected = ExpItem list type Found = UniChar.Data datatype Location = LOC_NONE | LOC_AFTER_DTD | LOC_ATT_DECL | LOC_ATT_DEFAULT of Position | LOC_ATT_VALUE | LOC_CDATA | LOC_CHOICE | LOC_COMMENT | LOC_CONTENT | LOC_DECL | LOC_DOC_DECL | LOC_ELEM_DECL | LOC_ENCODING | LOC_ENT_DECL | LOC_ENT_VALUE | LOC_EPILOG | LOC_ETAG | LOC_IGNORED | LOC_INCLUDED | LOC_INT_DECL | LOC_INT_SUBSET | LOC_LITERAL | LOC_MIXED | LOC_NOT_DECL | LOC_OUT_COND | LOC_PROC | LOC_PROLOG | LOC_PUB_LIT | LOC_SEQ | LOC_STAG | LOC_SUBSET | LOC_SYS_LIT | LOC_TEXT_DECL | LOC_VERSION | LOC_XML_DECL datatype EntityClass = ENT_GENERAL | ENT_PARAMETER | ENT_EXTERNAL | ENT_UNPARSED datatype Item = IT_ATT_NAME | IT_CDATA | IT_CHAR of UniChar.Char | IT_CHAR_REF | IT_COND | IT_DATA of UniChar.Data | IT_DECL | IT_DTD | IT_ELEM | IT_ENT_NAME | IT_ETAG | IT_GEN_ENT | IT_ID_NAME | IT_LANG_ID | IT_NAME | IT_NMTOKEN | IT_NOT_NAME | IT_NOTATION | IT_PAR_ENT | IT_PAR_REF | IT_REF | IT_STAG | IT_TARGET datatype Error = (* syntax errors *) ERR_EMPTY of Location | ERR_ENDED_BY_EE of Location | ERR_EXPECTED of Expected * Found | ERR_NON_XML_CHAR of UniChar.Char | ERR_MISSING_WHITE | ERR_NON_XML_CHARREF of UniChar.Char (* other well-formedness errors *) | ERR_CANT_PARSE of Location | ERR_ELEM_ENT_NESTING of UniChar.Data | ERR_ELEM_TYPE_MATCH of UniChar.Data * UniChar.Data | ERR_OMITTED_END_TAG of UniChar.Data | ERR_IGNORED_END_TAG of UniChar.Data * UniChar.Data | ERR_ENDED_IN_PROLOG | ERR_FORBIDDEN_HERE of Item * Location | ERR_ILLEGAL_ENTITY of EntityClass * UniChar.Data * Location | ERR_MULTIPLE_DTD | ERR_MULT_ATT_SPEC of UniChar.Data | ERR_RECURSIVE_ENTITY of EntityClass * UniChar.Data | ERR_UNDEC_ENTITY of EntityClass * UniChar.Data (* validity errors concerning attributes *) | ERR_AT_LEAST_ONE of Item | ERR_AT_MOST_ONE of Item | ERR_ATT_IS_NOT of UniChar.Data * Item | ERR_EXACTLY_ONE of Item | ERR_FIXED_VALUE of UniChar.Data * UniChar.Vector * UniChar.Vector | ERR_ID_DEFAULT | ERR_MISSING_ATT of UniChar.Data | ERR_MULT_ID_ELEM of UniChar.Data | ERR_MUST_BE_AMONG of Item * UniChar.Data * UniChar.Data list | ERR_MUST_BE_UNPARSED of UniChar.Data * Location | ERR_REPEATED_ID of UniChar.Data | ERR_UNDECL_ATT of UniChar.Data * UniChar.Data | ERR_UNDECL_ID of UniChar.Data * Position list (* validity errors concerning elements *) | ERR_BAD_ELEM of UniChar.Data * UniChar.Data | ERR_ELEM_CONTENT of Item | ERR_EMPTY_TAG of UniChar.Data | ERR_ENDED_EARLY of UniChar.Data | ERR_MULT_MIXED of UniChar.Data | ERR_NONEMPTY of UniChar.Data | ERR_REDEC_ELEM of UniChar.Data | ERR_ROOT_ELEM of UniChar.Data * UniChar.Data (* other validity errors *) | ERR_DECL_ENT_NESTING of Location | ERR_EE_INT_SUBSET | ERR_GROUP_ENT_NESTING of Location | ERR_NO_DTD | ERR_STANDALONE_DEF of UniChar.Data | ERR_STANDALONE_ELEM of UniChar.Data | ERR_STANDALONE_ENT of EntityClass *UniChar.Data | ERR_STANDALONE_NORM of UniChar.Data | ERR_UNDECLARED of Item * UniChar.Data * Location (* miscellaneous errors *) | ERR_DECL_PREDEF of UniChar.Data * UniChar.Vector | ERR_NO_SUCH_FILE of string * string | ERR_RESERVED of UniChar.Data * Item | ERR_VERSION of string | ERR_XML_SPACE (* compatibility errors *) | ERR_AMBIGUOUS of UniChar.Data * int * int | ERR_MUST_ESCAPE of UniChar.Char (* interoperability errors *) | ERR_EMPTY_TAG_INTER of UniChar.Data | ERR_MUST_BE_EMPTY of UniChar.Data (* decoding errors *) | ERR_DECODE_ERROR of Decode.Error.DecodeError datatype Warning = WARN_NO_XML_DECL | WARN_MULT_DECL of Item * UniChar.Data | WARN_SHOULD_DECLARE of UniChar.Data list | WARN_ATT_UNDEC_ELEM of UniChar.Data | WARN_MULT_ATT_DECL of UniChar.Data | WARN_MULT_ATT_DEF of UniChar.Data * UniChar.Data | WARN_ENUM_ATTS of UniChar.Data * UniChar.Data list | WARN_DFA_TOO_LARGE of UniChar.Data * int | WARN_NON_ASCII_URI of UniChar.Char end (* stop of ../../Parser/Error/errorData.sml *) (* start of ../../Parser/Error/errorString.sml *) signature ErrorString = sig val errorChar2String : UniChar.Char -> string val errorData2String : UniChar.Data -> string val errorVector2String : UniChar.Vector -> string val quoteErrorChar0 : UniChar.Char -> string val quoteErrorChar : UniChar.Char -> string val quoteErrorData : UniChar.Data -> string val quoteErrorString : string -> string val quoteErrorVector : UniChar.Vector -> string val Position2String : ErrorData.Position -> string val Expected2String : ErrorData.Expected -> string val Found2String : ErrorData.Found -> string val Item2String : ErrorData.Item -> string val AnItem2String : ErrorData.Item -> string val Location2String : ErrorData.Location -> string val InLocation2String : ErrorData.Location -> string val EntityClass2String : ErrorData.EntityClass -> string end structure ErrorString : ErrorString = struct open ErrorData UniChar UtilString fun errorChar2String c = case c of 0wx9 => "\\t" | 0wxA => "\\n" | _ => if c>=0wx20 andalso c<0wx100 then String.implode [Char2char c] else "U+"^UtilString.toUpperString (StringCvt.padLeft #"0" 4 (Chars.toString c)) fun errorData2String cs = String.concat (map errorChar2String cs) fun errorVector2String vec = errorData2String (Vector.foldr (op ::) nil vec) val QUOTE = "'" fun quoteErrorChar0 c = QUOTE^errorChar2String c^QUOTE fun quoteErrorChar c = if c=0wx0 then "entity end" else QUOTE^errorChar2String c^QUOTE fun quoteErrorData cs = QUOTE^errorData2String cs^QUOTE fun quoteErrorString s = QUOTE^s^QUOTE fun quoteErrorVector v = QUOTE^errorVector2String v^QUOTE fun Position2String (fname,l,c) = if fname="" then "" else String.concat ["[",fname,":",Int2String l,".",Int2String c,"]"] fun ExpItem2String exp = case exp of EXP_CHAR c => quoteErrorChar c | EXP_DATA cs => quoteErrorData cs | EXP_STRING s => s fun Expected2String exp = case exp of nil => "nothing" | [one] => ExpItem2String one | _ => let val l=List.length exp in List2xString ("",", ","") ExpItem2String (List.take (exp,l-1)) ^" or "^ExpItem2String (List.last exp) end fun Found2String fnd = case fnd of [0wx0] => "entity end" | cs => quoteErrorData cs fun Location2String loc = case loc of LOC_NONE => "nothing" | LOC_AFTER_DTD => "document instance" | LOC_ATT_DECL => "attribute list declaration" | LOC_ATT_DEFAULT pos => "default value declared at "^Position2String pos | LOC_ATT_VALUE => "attribute value" | LOC_CDATA => "CDATA section" | LOC_CHOICE => "choice list" | LOC_COMMENT => "comment" | LOC_CONTENT => "content" | LOC_DECL => "declaration" | LOC_DOC_DECL => "document type declaration" | LOC_ELEM_DECL => "element type declaration" | LOC_ENCODING => "encoding name" | LOC_ENT_DECL => "entity declaration" | LOC_ENT_VALUE => "entity value" | LOC_EPILOG => "epilog" | LOC_ETAG => "end-tag" | LOC_IGNORED => "ignored section" | LOC_INCLUDED => "included section" | LOC_INT_DECL => "declaration in the internal subset" | LOC_INT_SUBSET => "internal subset" | LOC_LITERAL => "literal" | LOC_MIXED => "Mixed list" | LOC_NOT_DECL => "notation declaration" | LOC_OUT_COND => "outside a conditional section" | LOC_PROLOG => "prolog" | LOC_PROC => "processing instruction" | LOC_PUB_LIT => "public identifier" | LOC_SEQ => "sequence list" | LOC_STAG => "start-tag" | LOC_SUBSET => "declaration subset" | LOC_SYS_LIT => "system identifier" | LOC_TEXT_DECL => "text declaration" | LOC_VERSION => "version number" | LOC_XML_DECL => "XML declaration" fun InLocation2String loc = case loc of LOC_NONE => "" | LOC_AFTER_DTD => "after the DTD" | LOC_CONTENT => "in content" | LOC_ATT_DEFAULT pos => "in default value declared at "^Position2String pos | LOC_DOC_DECL => "in the document type declaration" | LOC_EPILOG => "after the root element" | LOC_INT_SUBSET => "in the internal subset" | LOC_OUT_COND => "outside a conditional section" | LOC_PROLOG => "in prolog" | LOC_SUBSET => "in the declaration subset" | LOC_XML_DECL => "in the XML declaration" | _ => "in "^prependAnA (Location2String loc) fun EntityClass2String ent = case ent of ENT_GENERAL => "general" | ENT_PARAMETER => "parameter" | ENT_UNPARSED => "unparsed" | ENT_EXTERNAL => "external" fun Item2String item = case item of IT_ATT_NAME => "attribute name" | IT_CDATA => "CDATA section" | IT_CHAR c => "character "^quoteErrorChar c | IT_CHAR_REF => "character reference" | IT_COND => "conditional section" | IT_DATA cs => if null cs then "character data" else quoteErrorData cs | IT_DECL => "declaration" | IT_DTD => "document type declaration" | IT_ELEM => "element type" | IT_ENT_NAME => "entity name" | IT_ETAG => "end-tag" | IT_GEN_ENT => "general entity" | IT_ID_NAME => "ID name" | IT_LANG_ID => "language identifier" | IT_NAME => "name" | IT_NMTOKEN => "name token" | IT_NOT_NAME => "notation name" | IT_NOTATION => "notation" | IT_PAR_ENT => "parameter entity" | IT_PAR_REF => "parameter entity reference" | IT_REF => "reference" | IT_STAG => "start-tag" | IT_TARGET => "target name" fun AnItem2String item = case item of IT_CHAR c => Item2String item | IT_DATA cs => Item2String item | _ => prependAnA (Item2String item) end (* stop of ../../Parser/Error/errorString.sml *) (* start of ../../Parser/Error/errorMessage.sml *) signature ErrorMessage = sig val errorMessage : ErrorData.Error -> string list val warningMessage : ErrorData.Warning -> string list end structure ErrorMessage : ErrorMessage = struct open Decode UtilString ErrorData ErrorString val quoteChar0 = quoteErrorChar0 val quoteChar = quoteErrorChar val quoteData = quoteErrorData val quoteString = quoteErrorString val quoteVector = quoteErrorVector fun errorMessage err = case err (* syntax errors *) of ERR_EMPTY loc => ["Empty",Location2String loc] | ERR_ENDED_BY_EE loc => [toUpperFirst (Location2String loc),"ended by entity end"] | ERR_EXPECTED (exp,found) => ["Expected",Expected2String exp,"but found",Found2String found] | ERR_MISSING_WHITE => ["Missing white space"] | ERR_NON_XML_CHAR c => ["Non-XML character",quoteChar0 c] | ERR_NON_XML_CHARREF c => ["Reference to non-XML character",quoteChar0 c] (* other well-formedness errors *) | ERR_CANT_PARSE loc => ["Cannot parse",Location2String loc] | ERR_ELEM_ENT_NESTING elem => ["The first and last character of element",quoteData elem, "are in different entities"] | ERR_ELEM_TYPE_MATCH (elem,other) => ["Element",quoteData elem,"was ended by an end-tag for",quoteData other] | ERR_IGNORED_END_TAG(elem,other) => ["An end-tag for element type",quoteData other,"is not allowed in the", "content of element",quoteData elem] | ERR_OMITTED_END_TAG elem => ["Element",quoteData elem,"has no end-tag"] | ERR_ENDED_IN_PROLOG => ["Document entity ended in prolog"] | ERR_FORBIDDEN_HERE(what,loc) => [AnItem2String what,"is not allowed",InLocation2String loc] | ERR_ILLEGAL_ENTITY(what,ent,loc) => ["Reference to",EntityClass2String what,"entity",quoteData ent,InLocation2String loc] | ERR_MULTIPLE_DTD => ["Repeated document type declaration"] | ERR_MULT_ATT_SPEC att => ["A value for attribute",quoteData att,"was already specified in this tag"] | ERR_RECURSIVE_ENTITY(what,ent) => ["Reference to",EntityClass2String what,"entity",quoteData ent, "that is already open"] | ERR_UNDEC_ENTITY(what,ent) => ["Reference to undeclared",EntityClass2String what,"entity",quoteData ent] (* validity errors concerning attributes *) | ERR_AT_LEAST_ONE what => ["At least one",Item2String what,"must be specified"] | ERR_AT_MOST_ONE what => ["Only one",Item2String what,"may be specified"] | ERR_ATT_IS_NOT(cs,what) => [quoteData cs,"is not",AnItem2String what] | ERR_EXACTLY_ONE what => [toUpperFirst (AnItem2String what),"must be specified"] | ERR_FIXED_VALUE(att,value,fixed) => ["Attribute",quoteData att,"has the value",quoteVector value, "but was declared with a fixed default value of",quoteVector fixed] | ERR_ID_DEFAULT => ["An ID attribute must have a default value of #IMPLIED or #REQUIRED"] | ERR_MISSING_ATT att => ["No value was specified for required attribute",quoteData att] | ERR_MULT_ID_ELEM elem => ["Element type",quoteData elem,"already has an ID attribute"] | ERR_MUST_BE_AMONG (what,x,ys) => [toUpperFirst (Item2String what),quoteData x,"is none of", List2xString ("",",","") quoteData ys] | ERR_MUST_BE_UNPARSED (name,loc) => [quoteData name,InLocation2String loc,"is not the name of an unparsed entity"] | ERR_REPEATED_ID name => ["ID name",quoteData name,"already occurred as an attribute value"] | ERR_UNDECL_ATT(att,elem) => ["Attribute",quoteData att,"was not declared for element type",quoteData elem] | ERR_UNDECL_ID(name,refs) => (if null refs then ["Reference to non-existent ID",quoteData name] else ["Reference to non-existent ID",quoteData name, "(also referenced at",List2xString ("",", ",")") Position2String refs]) (* validity errors concerning elements *) | ERR_BAD_ELEM (curr,elem) => ["Element type",quoteData elem,"not allowed at this point", "in the content of element",quoteData curr] | ERR_ELEM_CONTENT what => [toUpperFirst (AnItem2String what),"is not allowed in element content"] | ERR_EMPTY_TAG elem => ["Empty-element tag for element type",quoteData elem, "whose content model requires non-empty content"] | ERR_ENDED_EARLY elem => ["Element",quoteData elem,"ended before its content was completed"] | ERR_MULT_MIXED elem => ["Element type",quoteData elem,"already occurred in this mixed-content declaration"] | ERR_NONEMPTY elem => ["The end-tag for element",quoteData elem,"with declared EMPTY content", "must follow immediately after its start-tag"] | ERR_REDEC_ELEM elem => ["Element type",quoteData elem,"was already declared"] | ERR_ROOT_ELEM (dec,root) => ["Document element",quoteData root,"does not match the name", quoteData dec,"in the document type declaration"] (* other validity errors *) | ERR_DECL_ENT_NESTING loc => ["The first and last character of this",Location2String loc, "are not in the same entity replacement text"] | ERR_EE_INT_SUBSET => ["An entity end is not allowed in a declaration in the internal subset"] | ERR_GROUP_ENT_NESTING loc => ["The opening and closing parentheses of this",Location2String loc, "are not in the same entity replacement text"] | ERR_NO_DTD => ["There is no document type declaration. Switching to semi-validating mode", "(will not check for declaredness of entities, elements, etc.)"] | ERR_STANDALONE_DEF att => ["Externally declared attribute",quoteData att,"was defaulted,", "although the standalone declaration is",quoteString "yes"] | ERR_STANDALONE_ELEM elem => ["White space occurred in the content of externally declared", "element",quoteData elem,"with declared element content", "although the standalone declaration is",quoteString "yes"] | ERR_STANDALONE_ENT(what,ent) => ["Reference to externally declared",EntityClass2String what,"entity", quoteData ent^",","although the standalone declaration is",quoteString "yes"] | ERR_STANDALONE_NORM att => ["The value for externally declared attribute", quoteData att,"was changed as a result of normalization,", "although the standalone declaration is",quoteString "yes"] | ERR_UNDECLARED (what,x,loc) => ["Undeclared",Item2String what,quoteData x,InLocation2String loc] (* miscellaneous errors *) | ERR_DECL_PREDEF(ent,def) => ["General entity",quoteData ent,"must be declared as internal entity", "with replacement text",quoteVector def] | ERR_NO_SUCH_FILE(f,msg) => ["Could not open file",quoteString f,"("^msg^")"] | ERR_RESERVED(name,what) => [quoteData name,"is reserved for standardization and therefore not allowed as", AnItem2String what] | ERR_VERSION version => ["XML version",quoteString version,"is not supported"] | ERR_XML_SPACE => ["Attribute",quoteString "xml:space","must be given an enumeration type", "with values",quoteString "default","and",quoteString "preserve","only"] (* compatibility errors *) | ERR_AMBIGUOUS(a,n1,n2) => ["Content model is ambiguous: conflict between the",numberNth n1, "and the",numberNth n2,"occurrence of element",quoteData a^".", "Using an approximation instead"] | ERR_MUST_ESCAPE c => ["Character",quoteChar c,"must be escaped for compatibility"] (* interoperability errors *) | ERR_EMPTY_TAG_INTER elem => ["Empty-element tag for element",quoteData elem,"with non-EMPTY declared content"] | ERR_MUST_BE_EMPTY elem => ["An empty-element tag must be used for element type", quoteData elem,"with EMPTY declared content"] (* decoding errors *) | ERR_DECODE_ERROR err => "Decoding error:"::Decode.Error.decodeMessage err fun warningMessage warn = case warn of WARN_NO_XML_DECL => ["Document entity has no XML declaration"] | WARN_MULT_DECL(what,name) => ["Repeated declaration for",Item2String what,quoteData name] | WARN_SHOULD_DECLARE(ents) => let val (one,more) = (hd ents,tl ents) in case more of nil => ["The predefined entity",quoteData one,"should have been declared"] | _ => ["The predefined entities",List2xString ("",", ","") quoteData more, "and",quoteData one,"should have been declared"] end | WARN_ATT_UNDEC_ELEM elem => ["Attribute-list declaration for undeclared element type",quoteData elem] | WARN_MULT_ATT_DECL elem => ["Repeated attribute-list declaration for element type",quoteData elem] | WARN_MULT_ATT_DEF(elem,att) => ["Repeated definition of attribute",quoteData att,"for element type",quoteData elem] | WARN_ENUM_ATTS(elem,names) => ["The following name tokens occur more than once in the enumerated attribute", "types of element",quoteData elem^":",List2xString ("",", ","") quoteData names] | WARN_DFA_TOO_LARGE (elem,max) => ["The finite state machine for the content model of element type", quoteData elem,"would have more than the maximal allowed number of", Int2String max,"states. Using an approximation instead"] | WARN_NON_ASCII_URI c => ["System identifier contains non-ASCII character",quoteChar c] end (* stop of ../../Parser/Error/errorMessage.sml *) (* start of ../../Parser/Error/errorUtil.sml *) signature ErrorUtil = sig val isFatalError : ErrorData.Error -> bool val isDecodeError : ErrorData.Error -> bool val isSyntaxError : ErrorData.Error -> bool val isValidityError : ErrorData.Error -> bool val isWellFormedError : ErrorData.Error -> bool end structure ErrorUtil : ErrorUtil = struct open ErrorData fun isDecodeError err = case err of ERR_DECODE_ERROR _ => true | _ => false fun isSyntaxError err = case err of ERR_EMPTY _ => true | ERR_ENDED_BY_EE _ => true | ERR_EXPECTED _ => true | ERR_MISSING_WHITE => true | ERR_NON_XML_CHAR _ => true | ERR_NON_XML_CHARREF _ => true | _ => false fun isWellFormedError err = case err of ERR_CANT_PARSE _ => true | ERR_ELEM_ENT_NESTING _ => true | ERR_ELEM_TYPE_MATCH _ => true | ERR_OMITTED_END_TAG _ => true | ERR_IGNORED_END_TAG _ => true | ERR_ENDED_IN_PROLOG => true | ERR_FORBIDDEN_HERE _ => true | ERR_ILLEGAL_ENTITY _ => true | ERR_MULTIPLE_DTD => true | ERR_MULT_ATT_SPEC _ => true | ERR_RECURSIVE_ENTITY _ => true | ERR_UNDEC_ENTITY _ => true | _ => isSyntaxError err fun isFatalError err = case err of ERR_NO_SUCH_FILE _ => true | _ => isWellFormedError err fun isValidityError err = case err of ERR_AT_LEAST_ONE _ => true | ERR_AT_MOST_ONE _ => true | ERR_ATT_IS_NOT _ => true | ERR_EXACTLY_ONE _ => true | ERR_FIXED_VALUE _ => true | ERR_ID_DEFAULT => true | ERR_MISSING_ATT _ => true | ERR_MULT_ID_ELEM _ => true | ERR_MUST_BE_AMONG _ => true | ERR_MUST_BE_UNPARSED _ => true | ERR_REPEATED_ID _ => true | ERR_UNDECL_ATT _ => true | ERR_UNDECL_ID _ => true | ERR_BAD_ELEM _ => true | ERR_ELEM_CONTENT _ => true | ERR_EMPTY_TAG _ => true | ERR_ENDED_EARLY _ => true | ERR_MULT_MIXED _ => true | ERR_NONEMPTY _ => true | ERR_REDEC_ELEM _ => true | ERR_ROOT_ELEM _ => true | ERR_DECL_ENT_NESTING _ => true | ERR_EE_INT_SUBSET => true | ERR_GROUP_ENT_NESTING _ => true | ERR_NO_DTD => true | ERR_STANDALONE_DEF _ => true | ERR_STANDALONE_ELEM _ => true | ERR_STANDALONE_ENT _ => true | ERR_STANDALONE_NORM _ => true | ERR_UNDECLARED _ => true | _ => false end (* stop of ../../Parser/Error/errorUtil.sml *) (* start of ../../Parser/Error/expected.sml *) structure Expected = struct local open UniChar ErrorData in val expAnElemName = [EXP_STRING "an element name"] val expAnEntName = [EXP_STRING "an entity name"] val expAName = [EXP_STRING "a name"] val expANameToken = [EXP_STRING "a name token"] val expANotName = [EXP_STRING "a notation name"] val expATarget = [EXP_STRING "a target name"] val expAttDefKey = [EXP_DATA (String2Data "REQUIRED"),EXP_DATA (String2Data "IMPLIED"), EXP_DATA (String2Data "FIXED")] val expAttNameGt = [EXP_STRING "an attribute name",EXP_CHAR 0wx3E] val expAttSTagEnd = [EXP_STRING "an attribute name",EXP_CHAR 0wx3E, EXP_DATA(String2Data "/>")] val expAttType = [EXP_CHAR 0wx28,EXP_DATA (String2Data "CDATA"), EXP_DATA (String2Data "ID"),EXP_DATA (String2Data "IDREF"), EXP_DATA (String2Data "IDREFS"),EXP_DATA (String2Data "ENTITY"), EXP_DATA (String2Data "ENTITIES"),EXP_DATA (String2Data "NMTOKEN"), EXP_DATA (String2Data "NMTOKENS"),EXP_DATA (String2Data "NOTATION")] val expBarRpar = [EXP_CHAR 0wx29,EXP_CHAR 0wx7C] val expCdata = [EXP_DATA (String2Data "CDATA")] fun expConCRpar c = [EXP_CHAR 0wx29,EXP_CHAR c] val expConRpar = [EXP_CHAR 0wx29,EXP_CHAR 0wx2C,EXP_CHAR 0wx7C] val expCondStatus = [EXP_DATA (String2Data "IGNORE"),EXP_DATA (String2Data "INCLUDE")] val expContSpec = [EXP_CHAR 0wx28,EXP_DATA (String2Data "ANY"), EXP_DATA (String2Data "EMPTY")] val expElemLpar = [EXP_STRING "an element name",EXP_CHAR 0wx28] val expEncStand = [EXP_DATA (String2Data "encoding"), EXP_DATA (String2Data "standalone")] val expDash = [EXP_CHAR 0wx2D] val expDashDocLbrk = [EXP_CHAR 0wx2D,EXP_CHAR 0wx5B,EXP_DATA (String2Data "DOCTYPE")] val expDashLbrack = [EXP_CHAR 0wx2D,EXP_CHAR 0wx5B] val expDigitX = [EXP_STRING "a digit",EXP_CHAR 0wx78] val expEncoding = [EXP_DATA (String2Data "encoding")] val expEncVers = [EXP_DATA (String2Data "encoding"),EXP_DATA (String2Data "version")] val expEntNamePero = [EXP_STRING "an entity name",EXP_CHAR 0wx25] val expEq = [EXP_CHAR 0wx3D] val expExclQuest = [EXP_CHAR 0wx21,EXP_CHAR 0wx3F] val expExtId = [EXP_DATA (String2Data "PUBLIC"),EXP_DATA (String2Data "SYSTEM")] val expGt = [EXP_CHAR 0wx3E] val expGtNdata = [EXP_CHAR 0wx3E,EXP_DATA (String2Data "NDATA")] val expHexDigit = [EXP_STRING "a hexadecimal digit"] val expInSubset = [EXP_CHAR 0wx3C,EXP_CHAR 0wx5D,EXP_CHAR 0wx25, EXP_STRING "white space"] val expLbrack = [EXP_CHAR 0wx5B] val expLitQuote = [EXP_CHAR 0wx22,EXP_CHAR 0wx27] val expLitQuotExt = [EXP_CHAR 0wx22,EXP_CHAR 0wx27, EXP_DATA (String2Data "PUBLIC"),EXP_DATA (String2Data "SYSTEM")] val expLpar = [EXP_CHAR 0wx28] val expNoYes = [EXP_DATA (String2Data "no"),EXP_DATA (String2Data "yes")] val expPcdata = [EXP_DATA (String2Data "PCDATA")] val expProcEnd = [EXP_DATA (String2Data "?>")] val expQuoteRni = [EXP_CHAR 0wx22,EXP_CHAR 0wx27,EXP_CHAR 0wx23] val expRbrack = [EXP_CHAR 0wx5D] val expRep = [EXP_CHAR 0wx2A] val expSemi = [EXP_CHAR 0wx3B] val expStandOpt = [EXP_DATA (String2Data "standalone"),EXP_DATA (String2Data "?>")] val expStartEnc = [EXP_STRING "a letter"] val expStartMarkup = [EXP_DATA (String2Data "--"),EXP_DATA (String2Data "ATTLIST"), EXP_DATA (String2Data "ELEMENT"),EXP_DATA (String2Data "ENTITY"), EXP_DATA (String2Data "NOTATION")] val expVersion = [EXP_DATA (String2Data "version")] end end (* stop of ../../Parser/Error/expected.sml *) (* start of ../../Parser/Error/errors.sml *) structure Errors = struct open UtilError ErrorData ErrorMessage ErrorString ErrorUtil Expected end (* stop of ../../Parser/Error/errors.sml *) (* start of ../../Parser/Base/baseData.sml *) (*--------------------------------------------------------------------------*) (* Structure: BaseData *) (*--------------------------------------------------------------------------*) structure BaseData = struct open DfaData (*--- external ids may have a public id and must have a system id ---*) (*--- for notations, however, also the system id can be optional ----*) datatype ExternalId = EXTID of (string * UniChar.Char) option * (Uri.Uri * Uri.Uri * UniChar.Char) option (*--- external ids may have a public id and must have a system id ---*) type NotationInfo = ExternalId option (*--- replacement of a general entity ---*) datatype GenEntity = GE_NULL | GE_INTERN of UniChar.Vector * UniChar.Vector | GE_EXTERN of ExternalId | GE_UNPARSED of ExternalId * int * Errors.Position type GenEntInfo = GenEntity * bool fun isExtGen (GE_EXTERN _) = true | isExtGen _ = false (*--- replacement of a parameter entity ---*) datatype ParEntity = PE_NULL | PE_INTERN of UniChar.Vector * UniChar.Vector | PE_EXTERN of ExternalId type ParEntInfo = ParEntity * bool fun isExtPar (PE_EXTERN _) = true | isExtPar _ = false (*--- declared type of an attribute ---*) datatype AttType = AT_CDATA | AT_NMTOKEN | AT_NMTOKENS | AT_ID | AT_IDREF | AT_IDREFS | AT_ENTITY | AT_ENTITIES | AT_GROUP of int list | AT_NOTATION of int list (*--- typed attribute value ---*) datatype AttValue = AV_CDATA of UniChar.Vector | AV_NMTOKEN of UniChar.Data | AV_NMTOKENS of UniChar.Data list | AV_ID of int | AV_IDREF of int | AV_IDREFS of int list | AV_ENTITY of int | AV_ENTITIES of int list | AV_GROUP of int list * int | AV_NOTATION of int list * int fun isIdType at = at=AT_ID (*--- default values of attributes ---*) datatype AttDefault = AD_IMPLIED | AD_REQUIRED | AD_DEFAULT of (UniChar.Vector * UniChar.Vector * AttValue option) * (Errors.Position * bool ref) | AD_FIXED of (UniChar.Vector * UniChar.Vector * AttValue option) * (Errors.Position * bool ref) (*--- attribute definition (list) ---*) (*--- the boolean says whether it was externally declared ---*) type AttDef = int * AttType * AttDefault * bool type AttDefList = AttDef list (*--- content specification ---*) fun defaultAttDef idx = (idx,AT_CDATA,AD_IMPLIED,false) (*--- content specification ---*) datatype ContentSpec = CT_ANY | CT_EMPTY | CT_MIXED of int list | CT_ELEMENT of DfaData.ContentModel * DfaData.Dfa fun isMixed ct = case ct of CT_ANY => true | CT_MIXED _ => true | _ => false type ElemInfo = {decl : (ContentSpec * bool) option, atts : (AttDefList * bool) option, errAtts : int list} val nullElemInfo : ElemInfo = {decl=NONE, atts=NONE, errAtts=nil} (*--------------------------------------------------------------------*) (* the id info tells whether an id value has occurred for a name and *) (* the list of all positions where it occurred as an idref value. *) (*--------------------------------------------------------------------*) type IdInfo = bool * Errors.Position list val nullIdInfo : IdInfo = (false,nil) end (* stop of ../../Parser/Base/baseData.sml *) (* start of ../../Parser/Dfa/dfaString.sml *) (*--------------------------------------------------------------------------*) (* Structure: DfaString *) (* *) (* Notes: *) (* This structure is needed for debugging of content models and tables. *) (* *) (* Depends on: *) (* DfaData *) (* UtilString *) (* *) (* Exceptions raised by functions in this structure: *) (* Table2String : none *) (* ContentModel2String : none *) (*--------------------------------------------------------------------------*) signature DfaString = sig val ContentModel2String : (int -> string) -> DfaData.ContentModel -> string val Dfa2String : (int -> string) -> DfaData.Dfa -> string end structure DfaString : DfaString = struct open DfaBase UtilString fun State2String q = if q=dfaError then "Error" else Int2String q fun Info2String Elem2String (q,mt,fst) = String.concat (State2String q::Bool2xString ("[empty]","") mt ::map (fn (q,a) => " "^Elem2String a^"->"^State2String q) fst) fun ContentModel2String Elem2String cm = case cm of CM_ELEM i => Elem2String i | CM_OPT cm => ContentModel2String Elem2String cm^"?" | CM_REP cm => ContentModel2String Elem2String cm^"*" | CM_PLUS cm => ContentModel2String Elem2String cm^"+" | CM_ALT cms => List2xString ("(","|",")") (ContentModel2String Elem2String) cms | CM_SEQ cms => List2xString ("(",",",")") (ContentModel2String Elem2String) cms fun CM2String Elem2String = let fun cm2s indent cm = case cm of (ELEM a,info) => String.concat [indent,Elem2String a," ",Info2String Elem2String info,"\n"] | (OPT cm',info) => String.concat [indent,"? ",Info2String Elem2String info,"\n",cm2s (indent^" ") cm'] | (REP cm',info) => String.concat [indent,"* ",Info2String Elem2String info,"\n",cm2s (indent^" ") cm'] | (PLUS cm',info) => String.concat [indent,"+ ",Info2String Elem2String info,"\n",cm2s (indent^" ") cm'] | (ALT cms,info) => String.concat (indent^"| "::Info2String Elem2String info::"\n" ::map (cm2s (indent^" ")) cms) | (SEQ cms,info) => String.concat (indent^", "::Info2String Elem2String info::"\n" ::map (cm2s (indent^" ")) cms) in cm2s "" end fun Row2String Elem2String (lo,hi,tab,fin) = String.concat (Vector.foldri (fn (i,q,yet) => if q<0 then yet else " "::Elem2String (i+lo)::"->"::State2String q::yet) (if fin then [" [Final]"] else nil) (tab,0,NONE)) fun Dfa2String Elem2String tab = String.concat (Vector.foldri (fn (q,row,yet) => State2String q::":"::Row2String Elem2String row::yet) nil (tab,0,NONE)) end (* stop of ../../Parser/Dfa/dfaString.sml *) (* start of ../../Parser/Base/baseString.sml *) (*--------------------------------------------------------------------------*) (* Structure: BaseString *) (* *) (* Depends on: *) (* UniChar *) (* Dfa *) (* BaseData *) (* UtilString *) (* *) (* Exceptions raised by functions in this structure: *) (* ElemInfo2xString : InternalError *) (* ExternalId2String : none *) (* GenEntity2xString : none *) (* Notation2String : none *) (* IdInfo2String : none *) (* ParEntity2String : none *) (*--------------------------------------------------------------------------*) signature BaseString = sig val ExternalId2String : BaseData.ExternalId -> string val NotationInfo2String : BaseData.NotationInfo -> string val GenEntity2xString : (int -> string) -> BaseData.GenEntity -> string val ParEntity2String : BaseData.ParEntity -> string val ElemInfo2xString : (int -> string) * (int -> string) * (int -> string) * (int -> string) * (int -> string) -> BaseData.ElemInfo -> string val IdInfo2String : BaseData.IdInfo -> string end structure BaseString : BaseString = struct open UtilString Uri Errors UniChar DfaString BaseData val THIS_MODULE = "BaseString" fun ExternalId2String (EXTID id) = case id of (SOME(p,pq),SOME(rel,s,sq)) => String.concat ["PUBLIC ",quoteUni pq p, " ",quoteUni sq (Uri2String rel), " @ ",quoteUni sq (Uri2String s)] | (SOME(p,pq),NONE) => String.concat ["PUBLIC ",quoteUni pq p] | (NONE,SOME(rel,s,sq)) => String.concat ["SYSTEM ",quoteUni sq (Uri2String rel), " @ ",quoteUni sq (Uri2String s)] | (NONE,NONE) => "" fun NotationInfo2String not = case not of NONE => "undeclared" | SOME extId => ExternalId2String extId fun GenEntity2xString NotIdx2String ge = case ge of GE_NULL => "NULL" | GE_INTERN(lit,cv) => let val quote = Vector.sub(lit,0) in String.concat ["INTERN ",Vector2String lit, " - ",quoteVector quote cv] end | GE_EXTERN id => "EXTERN "^ExternalId2String id | GE_UNPARSED(id,not,_) => "UNPARSED "^ExternalId2String id^" "^NotIdx2String not fun ParEntity2String pe = case pe of PE_NULL => "NULL" | PE_INTERN(lit,cv) => let val quote = Vector.sub(lit,0) in String.concat ["INTERN ",Vector2String lit, " - ",quoteVector quote cv] end | PE_EXTERN id => "EXTERN "^ExternalId2String id fun ContentSpec2String Elem2String cs = case cs of CT_ANY => "ANY" | CT_EMPTY => "EMPTY" | CT_MIXED is => List2xString ("MIXED (","|",")") Elem2String is | CT_ELEMENT(cm,_) => "ELEMENT "^ContentModel2String Elem2String cm fun AttValue2xString (Att2String,Ent2String,Id2String,Not2String) quote av = quoteUni quote (case av of AV_CDATA buf => Vector2String buf | AV_NMTOKEN cs => Data2String cs | AV_NMTOKENS css => List2xString (""," ","") Data2String css | AV_ID idx => Id2String idx | AV_IDREF idx => Id2String idx | AV_IDREFS idxs => List2xString (""," ","") Id2String idxs | AV_ENTITY idx => Ent2String idx | AV_ENTITIES idxs => List2xString (""," ","") Ent2String idxs | AV_GROUP(_,idx) => Att2String idx | AV_NOTATION(_,idx) => Not2String idx) fun AttDefault2xString funs ad = case ad of AD_DEFAULT ((lit,cv,av),_) => let val quote = Vector.sub(lit,0) in String.concat [quoteVector quote cv," ", Option2String0 (AttValue2xString funs quote) av] end | AD_FIXED ((lit,cv,av),_) => let val quote = Vector.sub(lit,0) in String.concat ["#FIXED ",quoteVector quote cv," ", Option2String0 (AttValue2xString funs quote) av] end | AD_IMPLIED => "#IMPLIED" | AD_REQUIRED => "#REQUIRED" fun AttType2xString (Att2String,Not2String) at = case at of AT_CDATA => "CDATA" | AT_NMTOKEN => "NMTOKEN" | AT_NMTOKENS => "NMTOKENS" | AT_ID => "ID" | AT_IDREF => "IDREF" | AT_IDREFS => "IDREFS" | AT_ENTITY => "ENTITY" | AT_ENTITIES => "ENTITIES" | AT_GROUP idxs => List2xString ("(","|",")") Att2String idxs | AT_NOTATION idxs => List2xString ("NOTATION(","|",")") Not2String idxs fun AttDef2xString (funs as (Att2String,_,_,Not2String)) (idx,attType,default,ext) = String.concat [Att2String idx," ", AttType2xString (Att2String,Not2String) attType," ", AttDefault2xString funs default, Bool2xString ("[external]","") ext] fun AttDefList2xString funs adl = List2xString ("",",","") (AttDef2xString funs) adl fun ElemInfo2xString (Att2String,Elem2String,Ent2String,Id2String,Not2String) ({decl,atts,...}:ElemInfo) = let val dec = case decl of NONE => "elem undeclared" | SOME(cont,ext) => String.concat ["elem declared ",if ext then "ex" else "in","ternally: ", ContentSpec2String Elem2String cont] val att = case atts of NONE => "no atts declared" | SOME(defs,hadId) => String.concat ["atts were declared",if hadId then "(has id attribute): " else ": ", AttDefList2xString (Att2String,Ent2String,Id2String,Not2String) defs] in dec^att end fun IdInfo2String (decl,refs) = Bool2xString ("declared","undeclared") decl^"/"^ (if null refs then "no references" else List2xString ("references: ",", ","") Position2String refs) end (* stop of ../../Parser/Base/baseString.sml *) (* start of ../../Parser/Base/base.sml *) structure Base = struct open BaseData BaseString end (* stop of ../../Parser/Base/base.sml *) (* start of ../../Parser/Params/dtd.sml *) (*--------------------------------------------------------------------------*) (* Structure: Dtd *) (* *) (* Exceptions raised by functions in this structure: *) (* AttNot2Index : none *) (* Element2Index : none *) (* GenEnt2Index : none *) (* Id2Index : none *) (* Index2AttNot : NoSuchIndex *) (* Index2Element : NoSuchIndex *) (* Index2GenEnt : NoSuchIndex *) (* Index2Id : NoSuchIndex *) (* Index2ParEnt : NoSuchIndex *) (* ParEnt2Index : none *) (* entitiesWellformed : none *) (* getElement : NoSuchIndex *) (* getGenEnt : NoSuchIndex *) (* getId : NoSuchIndex *) (* getNotation : NoSuchIndex *) (* getParEnt : NoSuchIndex *) (* hasNotation : NoSuchIndex *) (* initDtdTables : none *) (* maxUsedElem : none *) (* maxUsedId : none *) (* printAttNotTable : none *) (* printIdTable : none *) (* printParEntTable : none *) (* printxElementTable : none *) (* printxGenEntTable : none *) (* setElement : NoSuchIndex *) (* setGenEnt : NoSuchIndex *) (* setId : NoSuchIndex *) (* setNotation : NoSuchIndex *) (* setParEnt : NoSuchIndex *) (*--------------------------------------------------------------------------*) signature Dtd = sig type Dtd val hasDtd : Dtd -> bool val hasExternal : Dtd -> bool val standsAlone : Dtd -> bool val setHasDtd : Dtd -> unit val setExternal : Dtd -> unit val setStandAlone : Dtd -> bool -> unit val entitiesWellformed : Dtd -> bool val validPredef : int -> UniChar.Vector val isRedefined : Dtd -> int -> bool val setRedefined : Dtd -> int -> unit val notRedefined : Dtd -> UniChar.Data list val AttNot2Index : Dtd -> UniChar.Data -> int val Element2Index : Dtd -> UniChar.Data -> int val Id2Index : Dtd -> UniChar.Data -> int val GenEnt2Index : Dtd -> UniChar.Data -> int val ParEnt2Index : Dtd -> UniChar.Data -> int val Index2Element : Dtd -> int -> UniChar.Data val Index2Id : Dtd -> int -> UniChar.Data val Index2GenEnt : Dtd -> int -> UniChar.Data val Index2AttNot : Dtd -> int -> UniChar.Data val Index2ParEnt : Dtd -> int -> UniChar.Data val getId : Dtd -> int -> Base.IdInfo val getElement : Dtd -> int -> Base.ElemInfo val getGenEnt : Dtd -> int -> Base.GenEntInfo val getNotation : Dtd -> int -> Base.NotationInfo val getParEnt : Dtd -> int -> Base.ParEntInfo val hasNotation : Dtd -> int -> bool val setId : Dtd -> int * Base.IdInfo -> unit val setElement : Dtd -> int * Base.ElemInfo -> unit val setGenEnt : Dtd -> int * Base.GenEntInfo -> unit val setNotation : Dtd -> int * Base.ExternalId -> unit val setParEnt : Dtd -> int * Base.ParEntInfo -> unit val maxUsedId : Dtd -> int val maxUsedElem : Dtd -> int val maxUsedGen : Dtd -> int val initDtdTables : unit -> Dtd val printDtdTables : Dtd -> unit val printAttNotTable : Dtd -> unit val printIdTable : Dtd -> unit val printElementTable : Dtd -> unit val printGenEntTable : Dtd -> unit val printParEntTable : Dtd -> unit val defaultIdx : int val preserveIdx : int val xmlLangIdx : int val xmlSpaceIdx : int val xmlSpaceType : Base.AttType end structure Dtd :> Dtd = struct open UtilInt Base UniChar DataDict DataSymTab val O_TS_ELEM = ref 6 (* Initial size of element table *) val O_TS_GEN_ENT = ref 6 (* Initial size of general entity table *) val O_TS_ID = ref 6 (* Initial size of id attribute table *) val O_TS_ATT_NOT = ref 6 (* Initial size of notation table *) val O_TS_PAR_ENT = ref 6 (* Initial size of parameter entity table *) (*--------------------------------------------------------------------*) (* this is how the predefined entities must be declared. *) (*--------------------------------------------------------------------*) val predefined = Vector.fromList (map (fn (x,y,z) => (String2Data x,String2Vector y,String2Vector z)) [("","",""), ("amp" ,"'&'","&"), ("lt" ,"'<'","<"), ("gt" ,"'>'",">"), ("apos","\"'\"" ,"'" ), ("quot","'\"'" ,"\"" )]) fun validPredef i = #3(Vector.sub(predefined,i)) (*--------------------------------------------------------------------*) (* this type holds all information relevent to the DTD. *) (*--------------------------------------------------------------------*) type Dtd = {hasDtdFlag : bool ref, standAloneFlag : bool ref, externalFlag : bool ref, elDict : ElemInfo DataDict.Dict, genDict : GenEntInfo DataDict.Dict, idDict : IdInfo DataDict.Dict, notDict : NotationInfo DataDict.Dict, parDict : ParEntInfo DataDict.Dict, preRedef : bool array } fun newDtd() = {hasDtdFlag = ref false, standAloneFlag = ref false, externalFlag = ref false, elDict = nullDict ("element",nullElemInfo), idDict = nullDict ("ID name",nullIdInfo), genDict = nullDict ("general entity",(GE_NULL,false)), notDict = nullDict ("attribute and notation",NONE:NotationInfo), parDict = nullDict ("parameter entity",(PE_NULL,false)), preRedef = Array.array(6,false) } : Dtd val default = String2Data "default" val preserve = String2Data "preserve" val xmlLang = String2Data "xml:lang" val xmlSpace = String2Data "xml:space" (*--------------------------------------------------------------------*) (* standalone status, existance of a DTD and of external declarations *) (* externalFlag is true if there is an external subset or a (not nece-*) (* ssarily external) parameter entity reference in the DTD. (cf. 4.1) *) (*--------------------------------------------------------------------*) fun standsAlone (dtd:Dtd) = !(#standAloneFlag dtd) fun hasExternal (dtd:Dtd) = !(#externalFlag dtd) fun hasDtd (dtd:Dtd) = !(#hasDtdFlag dtd) fun setHasDtd (dtd:Dtd) = #hasDtdFlag dtd := true fun setExternal (dtd:Dtd) = #externalFlag dtd := true fun setStandAlone (dtd:Dtd) x = #standAloneFlag dtd := x (*--------------------------------------------------------------------*) (* 4.1: *) (* Well-Formedness Constraint: Entity Declared *) (* In a document without any DTD, a document with only an internal *) (* DTD subset which contains no parameter entity references, or a *) (* document with "standalone='yes'", the Name given in the entity *) (* reference must match that in an entity declaration ... Note that *) (* if entities are declared in the external subset or in external *) (* parameter entities, a non-validating processor is not obligated *) (* to read and process their declarations; for such documents, the *) (* rule that an entity must be declared is a well-formedness *) (* constraint only if standalone='yes'. *) (* *) (* Thus a reference to an undeclared entity is a well-formedness *) (* error if either #hasDtdFlag or #externalFlag is false, or if *) (* #standaloneFlag is true *) (*--------------------------------------------------------------------*) (* bug fixed 080600: changed !hasDtdFlag to not(!hasDtdFlag) *) (*--------------------------------------------------------------------*) fun entitiesWellformed ({hasDtdFlag,standAloneFlag,externalFlag,...}:Dtd) = not (!hasDtdFlag andalso !externalFlag) orelse !standAloneFlag fun initStandalone ({hasDtdFlag,standAloneFlag,externalFlag,...}:Dtd) = (hasDtdFlag := false; standAloneFlag := false; externalFlag := false) (*--------------------------------------------------------------------*) (* this array tells whether the predefined entities (index 1-5) have *) (* been declared in the dtd. *) (*--------------------------------------------------------------------*) fun isRedefined (dtd:Dtd) i = Array.sub(#preRedef dtd,i) fun setRedefined (dtd:Dtd) i = Array.update(#preRedef dtd,i,true) fun notRedefined dtd = List.mapPartial (fn i => if isRedefined dtd i then NONE else SOME(#1(Vector.sub(predefined,i)))) [1,2,3,4,5] fun AttNot2Index (dtd:Dtd) name = getIndex(#notDict dtd,name) fun Element2Index (dtd:Dtd) name = getIndex(#elDict dtd,name) fun GenEnt2Index (dtd:Dtd) name = getIndex(#genDict dtd,name) fun Id2Index (dtd:Dtd) name = getIndex(#idDict dtd,name) fun ParEnt2Index (dtd:Dtd) name = getIndex(#parDict dtd,name) fun Index2AttNot (dtd:Dtd) idx = getKey(#notDict dtd,idx) fun Index2Element (dtd:Dtd) idx = getKey(#elDict dtd,idx) fun Index2GenEnt (dtd:Dtd) idx = getKey(#genDict dtd,idx) fun Index2Id (dtd:Dtd) idx = getKey(#idDict dtd,idx) fun Index2ParEnt (dtd:Dtd) idx = getKey(#parDict dtd,idx) fun getElement (dtd:Dtd) idx = getByIndex(#elDict dtd,idx) fun getGenEnt (dtd:Dtd) idx = getByIndex(#genDict dtd,idx) fun getId (dtd:Dtd) idx = getByIndex(#idDict dtd,idx) fun getNotation (dtd:Dtd) idx = getByIndex(#notDict dtd,idx) fun getParEnt (dtd:Dtd) idx = getByIndex(#parDict dtd,idx) fun hasNotation (dtd:Dtd) idx = isSome(getByIndex(#notDict dtd,idx)) fun setElement (dtd:Dtd) (idx,el) = setByIndex(#elDict dtd,idx,el) fun setGenEnt (dtd:Dtd) (idx,ge) = setByIndex(#genDict dtd,idx,ge) fun setId (dtd:Dtd) (idx,a) = setByIndex(#idDict dtd,idx,a) fun setNotation (dtd:Dtd) (idx,nt) = setByIndex(#notDict dtd,idx,SOME nt) fun setParEnt (dtd:Dtd) (idx,pe) = setByIndex(#parDict dtd,idx,pe) fun maxUsedElem (dtd:Dtd) = usedIndices(#elDict dtd)-1 fun maxUsedGen (dtd:Dtd) = usedIndices(#genDict dtd)-1 fun maxUsedId (dtd:Dtd) = usedIndices(#idDict dtd)-1 (*--------------------------------------------------------------------*) (* initialize the attribute tables. Make sure that indices 0...3 are *) (* assigned to "default", "preserve", "xml:lang" and "xml:space". *) (*--------------------------------------------------------------------*) fun initAttNotTable (dtd as {idDict,notDict,...}:Dtd) = let val _ = clearDict(notDict,SOME(!O_TS_ATT_NOT)) val _ = clearDict(idDict,SOME(!O_TS_ID)) val _ = AttNot2Index dtd default val _ = AttNot2Index dtd preserve val _ = AttNot2Index dtd xmlLang val _ = AttNot2Index dtd xmlSpace in () end fun initElementTable (dtd:Dtd) = clearDict(#elDict dtd,SOME(!O_TS_ELEM)) (*--------------------------------------------------------------------*) (* reserve 0 for gen entity -, i.e., the document entity. *) (* reserve 1 for gen entity amp, i.e., "&#38;" *) (* reserve 2 for gen entity lt, i.e., "&#60;" *) (* reserve 3 for gen entity gt, i.e., ">" *) (* reserve 4 for gen entity apos, i.e., "'" *) (* reserve 5 for gen entity quot, i.e., """ *) (* reserve 0 for par entity -, i.e., the external dtd subset. *) (* *) (* Cf. 4.1: *) (* *) (* ... except that well-formed documents need not declare any of *) (* the following entities: amp, lt, gt, apos, quot. *) (* *) (* and 4.6: *) (* *) (* *) (* *) (* *) (* *) (* *) (*--------------------------------------------------------------------*) fun initEntityTables (dtd as {genDict,parDict,preRedef,...}:Dtd) = let val _ = clearDict(genDict,SOME(!O_TS_GEN_ENT)) val _ = clearDict(parDict,SOME(!O_TS_PAR_ENT)) val _ = map (fn i => Array.update(preRedef,i,false)) [1,2,3,4,5] val _ = GenEnt2Index dtd [0wx2D] (* "-" *) val _ = ParEnt2Index dtd [0wx2D] (* "-" *) val _ = Vector.appi (fn (_,(name,lit,cs)) => (setGenEnt dtd (GenEnt2Index dtd name,(GE_INTERN(lit,cs),false)))) (predefined,1,NONE) in () end fun initDtdTables() = let val dtd = newDtd() val _ = initAttNotTable dtd val _ = initElementTable dtd val _ = initEntityTables dtd val _ = initStandalone dtd in dtd end local val dtd = initDtdTables() in val defaultIdx = AttNot2Index dtd default val preserveIdx = AttNot2Index dtd preserve val xmlLangIdx = AttNot2Index dtd xmlLang val xmlSpaceIdx = AttNot2Index dtd xmlSpace val xmlSpaceType = AT_GROUP (IntLists.addIntList (preserveIdx,[defaultIdx])) end fun printAttNotTable (dtd:Dtd) = printDict NotationInfo2String (#notDict dtd) fun printElementTable dtd = printDict (ElemInfo2xString (UniChar.Data2String o (Index2AttNot dtd), UniChar.Data2String o (Index2Element dtd), UniChar.Data2String o (Index2GenEnt dtd), UniChar.Data2String o (Index2Id dtd), UniChar.Data2String o (Index2AttNot dtd))) (#elDict dtd) fun printGenEntTable dtd = printDict (fn (ent,ext) => GenEntity2xString (Data2String o (Index2AttNot dtd)) ent ^(if ext then "[external]" else "")) (#genDict dtd) fun printIdTable (dtd:Dtd) = printDict (IdInfo2String) (#idDict dtd) fun printParEntTable (dtd:Dtd) = printDict (fn (ent,ext) => ParEntity2String ent ^(if ext then "[external]" else "")) (#parDict dtd) fun printDtdTables dtd = (printAttNotTable dtd; printElementTable dtd; printGenEntTable dtd; printIdTable dtd; printParEntTable dtd) end (* stop of ../../Parser/Params/dtd.sml *) (* start of ../../Parser/Params/hookData.sml *) structure HookData = struct type StartEnd = Errors.Position * Errors.Position (*--------------------------------------------------------------------*) (* a text declaration consists of a version info and an encoding decl.*) (* an xml declaration has an additional standalone decl. *) (*--------------------------------------------------------------------*) type TextDecl = string option * string option type XmlDecl = string option * string option * bool option type XmlInfo = Uri.Uri * Encoding.Encoding * XmlDecl option type ExtSubsetInfo = Uri.Uri * Encoding.Encoding * TextDecl option type SubsetInfo = Errors.Position type EndDtdInfo = Errors.Position type ErrorInfo = Errors.Position * Errors.Error type WarningInfo = Errors.Position * Errors.Warning type NoFileInfo = string * string type CommentInfo = StartEnd * UniChar.Vector type ProcInstInfo = StartEnd * UniChar.Data * Errors.Position * UniChar.Vector type DtdInfo = int * Base.ExternalId option datatype AttPresent = AP_IMPLIED | AP_MISSING | AP_DEFAULT of UniChar.Vector * UniChar.Vector * Base.AttValue option | AP_PRESENT of UniChar.Vector * UniChar.Vector * Base.AttValue option type AttSpec = int * AttPresent * (UniChar.Data * UniChar.Data) option type AttSpecList = AttSpec list type EndTagInfo = StartEnd * int * (int * UniChar.Data) option type StartTagInfo = StartEnd * int * AttSpecList * UniChar.Data * bool type WhiteInfo = UniChar.Vector type CDataInfo = StartEnd * UniChar.Vector type DataInfo = StartEnd * UniChar.Vector * bool type CharRefInfo = StartEnd * UniChar.Char * UniChar.Vector type GenRefInfo = StartEnd * int * Base.GenEntity * bool type ParRefInfo = StartEnd * int * Base.ParEntity * bool type EntEndInfo = Errors.Position datatype MarkupDecl = DEC_ATTLIST of int * (int * Base.AttType * Base.AttDefault) list * bool | DEC_ELEMENT of int * Base.ContentSpec * bool | DEC_GEN_ENT of int * Base.GenEntity * bool | DEC_PAR_ENT of int * Base.ParEntity * bool | DEC_NOTATION of int * Base.ExternalId * bool type DeclInfo = StartEnd * MarkupDecl fun isExtDecl decl = case decl of DEC_ATTLIST(_,_,ext) => ext | DEC_ELEMENT(_,_,ext) => ext | DEC_GEN_ENT(_,_,ext) => ext | DEC_PAR_ENT(_,_,ext) => ext | DEC_NOTATION(_,_,ext) => ext end (* stop of ../../Parser/Params/hookData.sml *) (* start of ../../Parser/Params/hooks.sml *) signature Hooks = sig type AppData type AppFinal val hookXml : AppData * HookData.XmlInfo -> AppData val hookFinish : AppData -> AppFinal val hookError : AppData * HookData.ErrorInfo -> AppData val hookWarning : AppData * HookData.WarningInfo -> AppData val hookProcInst : AppData * HookData.ProcInstInfo -> AppData val hookComment : AppData * HookData.CommentInfo -> AppData val hookWhite : AppData * HookData.WhiteInfo -> AppData val hookDecl : AppData * HookData.DeclInfo -> AppData val hookStartTag : AppData * HookData.StartTagInfo -> AppData val hookEndTag : AppData * HookData.EndTagInfo -> AppData val hookCData : AppData * HookData.CDataInfo -> AppData val hookData : AppData * HookData.DataInfo -> AppData val hookCharRef : AppData * HookData.CharRefInfo -> AppData val hookGenRef : AppData * HookData.GenRefInfo -> AppData val hookParRef : AppData * HookData.ParRefInfo -> AppData val hookEntEnd : AppData * HookData.EntEndInfo -> AppData val hookDocType : AppData * HookData.DtdInfo -> AppData val hookSubset : AppData * HookData.SubsetInfo -> AppData val hookExtSubset : AppData * HookData.ExtSubsetInfo -> AppData val hookEndDtd : AppData * HookData.EndDtdInfo -> AppData end (* stop of ../../Parser/Params/hooks.sml *) (* start of ../../Parser/Params/resolve.sml *) signature Resolve = sig val resolveExtId : Base.ExternalId -> Uri.Uri end structure ResolveNull : Resolve = struct open Base Errors Uri fun resolveExtId (EXTID(_,sys)) = case sys of NONE => raise NoSuchFile ("","Could not generate system identifier") | SOME (base,file,_) => uriJoin(base,file) end (* stop of ../../Parser/Params/resolve.sml *) (* start of ../../Parser/Dfa/dfaUtil.sml *) (*--------------------------------------------------------------------------*) (* Structure: DfaUtil *) (* *) (* Depends on: *) (* DfaData *) (* UtilInt *) (* *) (* Exceptions raised by functions in this structure: *) (* boundsFollow : none *) (* cmSymbols : none *) (* makeRow : none *) (* mergeFirst : ConflictFirst *) (* mergeFollow : ConflictFollow *) (*--------------------------------------------------------------------------*) signature DfaUtil = sig val mergeFirst : bool -> DfaBase.First * DfaBase.First -> DfaBase.First val mergeFollow : bool -> DfaBase.Follow * DfaBase.Follow -> DfaBase.Follow val boundsFollow : DfaBase.Follow -> DfaBase.Sigma * DfaBase.Sigma val cmSymbols : DfaBase.ContentModel -> DfaBase.Sigma list val makeRow : DfaBase.Follow * bool -> DfaBase.Row end structure DfaUtil : DfaUtil = struct open UtilInt DfaBase (*--------------------------------------------------------------------*) (* merge two First sets, raise ConflictFirst at conflict: there may *) (* not be two entries (q1,a) and (q2,a) in the same First set, if *) (* nondet is false. *) (*--------------------------------------------------------------------*) fun mergeFirst nondet ll = let fun go_det (nil,l) = l | go_det (l,nil) = l | go_det (l1 as (x1 as (q1,a1))::r1,l2 as (x2 as (q2,a2))::r2) = case Int.compare(a1,a2) of LESS => x1::go_det(r1,l2) | GREATER => x2::go_det(l1,r2) | EQUAL => raise ConflictFirst(a1,q1,q2) fun go_nondet (nil,l) = l | go_nondet (l,nil) = l | go_nondet (l1 as (x1 as (q1,a1))::r1,l2 as (x2 as (q2,a2))::r2) = case Int.compare(a1,a2) of LESS => x1::go_nondet(r1,l2) | GREATER => x2::go_nondet(l1,r2) | EQUAL => case Int.compare(q1,q2) of LESS => x1::go_nondet(r1,l2) | GREATER => x2::go_nondet(l1,r2) | EQUAL => go_nondet(l1,r2) in if nondet then go_nondet ll else go_det ll end (*--------------------------------------------------------------------*) (* merge two Follow sets, raise ConflictFollow at conflict. there may *) (* not be two entries (q1,a) and (q2,a) with q1<>q2 in the same Follow*) (* set, if nondet is false. Note that, e.g. for (a+)+, Follow(a) = *) (* Follow(a+) U First(a+), so duplicate occurrences of the same (q,a) *) (* are possible (as opposed to First). *) (*--------------------------------------------------------------------*) fun mergeFollow nondet ll = let fun go_det (nil,l) = l | go_det (l,nil) = l | go_det (l1 as (x1 as (q1,a1))::r1,l2 as (x2 as (q2,a2))::r2) = case Int.compare(a1,a2) of LESS => x1::go_det(r1,l2) | GREATER => x2::go_det(l1,r2) | EQUAL => if q1=q2 then go_det(l1,r2) else raise ConflictFollow(a1,q1,q2) fun go_nondet (nil,l) = l | go_nondet (l,nil) = l | go_nondet (l1 as (x1 as (q1,a1))::r1,l2 as (x2 as (q2,a2))::r2) = case Int.compare(a1,a2) of LESS => x1::go_nondet(r1,l2) | GREATER => x2::go_nondet(l1,r2) | EQUAL => case Int.compare(q1,q2) of LESS => x1::go_nondet(r1,l2) | GREATER => x2::go_nondet(l1,r2) | EQUAL => go_nondet(l1,r2) in if nondet then go_nondet ll else go_det ll end (*--------------------------------------------------------------------*) (* what are the least and largest symbol occurring in a Follow set? *) (*--------------------------------------------------------------------*) fun boundsFollow (nil:Follow) = (1,0) | boundsFollow [(q,a)] = (a,a) | boundsFollow ((q,a)::xs) = (a,#2(List.last xs)) (*--------------------------------------------------------------------*) (* return the list of all symbols occurring in a content model. *) (*--------------------------------------------------------------------*) fun cmSymbols cm = let fun do_cm(cm,yet) = case cm of CM_ELEM a => insertInt(a,yet) | CM_OPT cm => do_cm(cm,yet) | CM_REP cm => do_cm(cm,yet) | CM_PLUS cm => do_cm(cm,yet) | CM_ALT cms => foldr do_cm yet cms | CM_SEQ cms => foldr do_cm yet cms in do_cm(cm,nil) end (*--------------------------------------------------------------------*) (* given the follow set and the final flag, make a row in the dfa. *) (*--------------------------------------------------------------------*) fun makeRow (flw,fin) = let val (lo,hi) = boundsFollow flw val tab = Array.array(hi-lo+1,dfaError) val _ = app (fn (q,a) => Array.update (tab,a-lo,q)) flw in (lo,hi,Array.extract (tab,0,NONE),fin) end end (* stop of ../../Parser/Dfa/dfaUtil.sml *) (* start of ../../Util/intSets.sml *) signature IntSets = sig eqtype IntSet val emptyIntSet : IntSet val singleIntSet : int -> IntSet val fullIntSet : int -> IntSet val isEmptyIntSet : IntSet -> bool val inIntSet : int * IntSet -> bool val compareIntSets: IntSet * IntSet -> order val hashIntSet : IntSet -> word val addIntSet : int * IntSet -> IntSet val delIntSet : int * IntSet -> IntSet val cupIntSets : IntSet * IntSet -> IntSet val capIntSets : IntSet * IntSet -> IntSet val diffIntSets : IntSet * IntSet -> IntSet val IntSet2List : IntSet -> int list val IntList2Set : int list -> IntSet end structure IntSets : IntSets = struct structure W = Word32 val wordSize = W.wordSize type IntSet = W.word vector infix 7 << >> infix 6 && infix 5 || val op >> = W.>> val op << = W.<< val op && = W.andb val op || = W.orb val !! = W.notb fun normalize (vec:IntSet) = let val max = Vector.foldli (fn (i,w,max) => if w=0wx0 then i else max) 0 (vec,0,NONE) in Vector.extract (vec,0,SOME max) end val emptyIntSet = Vector.fromList nil : IntSet fun fullIntSet n = let val size = (n+wordSize-1) div wordSize val full = 0w0-0w1:W.word val bits = (n-1) mod wordSize+1 val last = full >> (Word.fromInt (wordSize-bits)) in Vector.tabulate(n div wordSize+1, fn i => if i if i=idx then mask else 0w0):IntSet end fun isEmptyIntSet vec = Vector.length vec=0 fun inIntSet(n,vec) = let val idx = n div wordSize in if idx>=Vector.length vec then false else let val mask = 0w1 << (Word.fromInt (n mod wordSize)) in Vector.sub(vec,idx) && mask <> 0w0 end end fun addIntSet(n,vec) = let val idx = n div wordSize val mask = 0w1 << (Word.fromInt (n mod wordSize)) val size = Vector.length vec in if size>idx then Vector.mapi (fn (i,x) => if i=idx then x||mask else x) (vec,0,NONE) else Vector.tabulate (idx+1,fn i => if i if i=idx then x && mask else x) (vec,0,NONE) end in normalize vec1 end fun capIntSets(vec1,vec2) = let val l12 = Int.min(Vector.length vec1,Vector.length vec2) val v12 = Vector.tabulate(l12,fn i => Vector.sub(vec1,i) && Vector.sub(vec2,i)) in normalize v12 end fun cupIntSets(vec1,vec2) = let val (l1,l2) = (Vector.length vec1,Vector.length vec2) val (shorter,longer,v) = if l1<=l2 then (l1,l2,vec2) else (l2,l1,vec1) in Vector.tabulate (longer,fn i => if i>=shorter then Vector.sub(v,i) else Vector.sub(vec1,i) || Vector.sub(vec2,i)) end fun diffIntSets(vec1,vec2) = let val (l1,l2) = (Vector.length vec1,Vector.length vec2) val vec1 = Vector.tabulate (l1,fn i => if i>=l2 then Vector.sub(vec1,i) else Vector.sub(vec1,i) && !!(Vector.sub(vec2,i))) in normalize vec1 end fun IntList2Set l = List.foldl addIntSet emptyIntSet l fun IntSet2List vec = let val size = Vector.length vec fun doOne (w,off,yet) = let fun doit (i,mask) = if i=wordSize then yet else if w&&mask=0w0 then doit(i+1,mask<<0wx1) else (off+i)::doit(i+1,mask<<0wx1) in doit(0,0wx1) end fun doAll i = if i>=size then nil else doOne(Vector.sub(vec,i),wordSize*i,(doAll (i+1))) in doAll 0 end fun compareIntSets (vec1,vec2:IntSet) = let val (l1,l2) = (Vector.length vec1,Vector.length vec2) val (l12,ifEq) = case Int.compare(l1,l2) of LESS => (l1,LESS) | order => (l2,order) fun doit i = if i>=l12 then ifEq else case W.compare(Vector.sub(vec1,i),Vector.sub(vec2,i)) of EQUAL => doit (i+1) | order => order in doit 0 end val intShift = case Int.precision of NONE => 0w0 | SOME x => Word.fromInt(Int.max(wordSize-x+1,0)) fun hashIntSet vec = case Vector.length vec of 0 => 0w0 | 1 => Word.fromInt(W.toInt(W.>>(Vector.sub(vec,0),intShift))) | l => Word.fromInt(W.toInt(W.>>(Vector.sub(vec,0)+Vector.sub(vec,l-1),intShift))) end (* stop of ../../Util/intSets.sml *) (* start of ../../Util/SymDict/intSetDict.sml *) structure KeyIntSet : Key = struct open IntSets UtilString type Key = IntSet val null = emptyIntSet val hash = hashIntSet val compare = compareIntSets val toString = (List2xString ("{",",","}") Int2String) o IntSet2List end structure IntSetDict = Dict (structure Key = KeyIntSet) structure IntSetSymTab = SymTable (structure Key = KeyIntSet) (* stop of ../../Util/SymDict/intSetDict.sml *) (* start of ../../Parser/Dfa/dfaPassThree.sml *) (*--------------------------------------------------------------------------*) (* Structure: DfaPassThree *) (* *) (* Depends on: *) (* DfaData *) (* DfaUtil *) (* IntSets *) (* IntSetDict *) (* ParseOptions *) (* *) (* Exceptions raised by functions in this structure: *) (* passThree : TooLarge *) (*--------------------------------------------------------------------------*) signature DfaPassThree = sig val passThree: bool -> (DfaBase.Follow * bool) vector -> DfaBase.Dfa end functor DfaPassThree (structure DfaOptions : DfaOptions) : DfaPassThree = struct open IntSets IntSetDict DfaBase DfaOptions DfaUtil (*--------------------------------------------------------------------*) (* do the subset construction. *) (*--------------------------------------------------------------------*) (* given an automaton (Q,q0,F,delta), the subset automaton is *) (* (Q',q0',F',delta') with: *) (* - Q' = 2^Q *) (* - q0'= {q0} *) (* - F' = {S | S cap F <> empty} *) (* - delta'(S,a) = {p | (q,a,p) in delta, q in S} *) (*--------------------------------------------------------------------*) fun makeDet tab = let (* the new start state is the singleton of the old start state *) val sNull = singleIntSet 0 (* create a dictionary for the subsets, make sNull get index 0 *) val tau = makeDict("",!O_DFA_INITIAL_WIDTH,(nil:Follow,false)) val pInitial = getIndex(tau,sNull) (* enter a new set state. raise DfaTooLarge if the new state *) (* would have a too large index *) fun makeState s = let val (max,i) = (!O_DFA_MAX_STATES,getIndex(tau,s)) in if max>i then i else raise DfaTooLarge max end (* compute the follow set for a set state from the follow sets *) (* of its members *) fun makeFollow NONE nil = nil | makeFollow (SOME(s,a)) nil = [(makeState s,a)] | makeFollow NONE ((q,a)::qas) = makeFollow (SOME(singleIntSet q,a)) qas | makeFollow (SOME(s,a)) ((q,b)::qas) = if a=b then makeFollow (SOME(addIntSet(q,s),a)) qas else (makeState s,a)::makeFollow (SOME(singleIntSet q,b)) qas (* continue until all entries in the state dictionary are done -*) fun doit i = if i>=usedIndices tau then i else let val sI = getKey(tau,i) val lI = IntSet2List sI val ffs = map (fn j => Vector.sub(tab,j)) lI val (followJs,finI) = foldl (fn ((flwJ,finJ),(flw,fin)) => (mergeFollow true (flwJ,flw), finJ orelse fin)) (nil,false) ffs val followI = makeFollow NONE followJs val _ = setByIndex(tau,i,(followI,finI)) in doit (i+1) end val size = doit 0 in (* finally create a vector holding the new follow/fin pairs *) Vector.tabulate (size,fn i => getByIndex(tau,i)) end (*--------------------------------------------------------------------*) (* given a vector of Follow and boolean final condition, make a dfa *) (* out of it. if the first arg is true, then the content model was *) (* ambiguous; in this case we must first apply a subset construction *) (* in order to obtain a deterministic finite machine. *) (*--------------------------------------------------------------------*) fun passThree nondet tab = let val det = if nondet then makeDet tab else tab in Vector.map makeRow det end end (* stop of ../../Parser/Dfa/dfaPassThree.sml *) (* start of ../../Parser/Dfa/dfaError.sml *) (*--------------------------------------------------------------------------*) (* Structure: DfaError *) (* *) (* Note: *) (* The function in this structure is for producing good error messages *) (* for ambiguous content models. It numbers the nodes of a cm exactly *) (* like passOne does, but counts the occurrences of symbol a in order to *) (* indicate which are in conflict. It is only executed in case of error. *) (* *) (* Depends on: *) (* DfaData *) (* *) (* Exceptions raised by functions in this structure: *) (* countOccs : none *) (*--------------------------------------------------------------------------*) signature DfaError = sig val countOccs : DfaBase.Sigma * DfaBase.State * DfaBase.State -> DfaBase.ContentModel -> DfaBase.Sigma * int * int end structure DfaError : DfaError = struct open DfaBase fun countOccs (a,q1,q2) cm = let val (q1,q2) = if q1>q2 then (q2,q1) else (q1,q2) fun next a nil = (1,[(a,2)]) | next a ((b,n)::rest) = if a=b then (n,(b,n+1)::rest) else if a insert a yet | CM_OPT cmi => doit (cmi,yet) | CM_REP cmi => doit (cmi,yet) | CM_PLUS cmi => doit (cmi,yet) | CM_ALT cmis => foldl doit yet cmis | CM_SEQ cmis => foldl doit yet cmis val (_,_,n1,n2) = doit (cm,(1,nil,0,0)) in (a,n1,n2) end end (* stop of ../../Parser/Dfa/dfaError.sml *) (* start of ../../Parser/Dfa/dfaPassOne.sml *) (*--------------------------------------------------------------------------*) (* Structure: DfaPassOne *) (* *) (* Depends on: *) (* DfaData *) (* DfaUtil *) (* *) (* Exceptions raised by functions in this structure: *) (* passOne : ConflictFirst *) (*--------------------------------------------------------------------------*) signature DfaPassOne = sig val passOne : bool -> DfaBase.ContentModel -> DfaBase.CM end structure DfaPassOne : DfaPassOne = struct open DfaBase DfaUtil (*--------------------------------------------------------------------*) (* Given a content model, number the leafs, compute Empty and First *) (* for each node, and construct a corresponding CM annotated with *) (* these informations. *) (* *) (* Numbering: *) (* The leafs are numbered in left-to-right, depth-first order, *) (* starting with 1 (0 will be the start state of the DFA). *) (* *) (* Empty a = false *) (* Empty e? = Empty e* = true *) (* Empty e+ = Empty e *) (* Empty e1|...|eN = Empty e1 \/ ... \/ Empty eN *) (* Empty e1,...,eN = Empty e1 /\ ... /\ Empty eN *) (* *) (* First a = {q,a}, where q is the number of this leaf. *) (* First e? = First e* = First e+ = First e *) (* First e1|...|eN = First e1 ++ ... ++ First eN *) (* First e1,...,eN = ++{First eI | Empty eJ=False forall ja1 forall (q1,a1) in F1, (q1,a1) in F1} *) (* error, if exist (q1,a) in F1, (q2,a) in F2 *) (* then raise ConflictFirst(a,q1,q2) *) (*--------------------------------------------------------------------*) fun passOne nondet cm = let fun und(a,b) = a andalso b fun oder(a,b) = a orelse b fun op_fst_seq (fst,fsts,mt) = if mt then mergeFirst nondet (fst,fsts) else fst fun op_fst_or (fst,fsts,_) = mergeFirst nondet (fst,fsts) fun do_cm cm q = case cm of CM_ELEM a => (ELEM a,(q+1,false,[(q+1,a)])) | CM_OPT cm => let val cmi as (_,(q1,_,fst)) = do_cm cm q in (OPT cmi,(q1,true,fst)) end | CM_REP cm => let val cmi as (_,(q1,_,fst)) = do_cm cm q in (REP cmi,(q1,true,fst)) end | CM_PLUS cm => let val cmi as (_,info1) = do_cm cm q in (PLUS cmi,info1) end | CM_ALT cms => do_cms (ALT,false,oder,op_fst_or) cms q | CM_SEQ cms => do_cms (SEQ,true,und,op_fst_seq) cms q and do_cms(con,null_mt,op_mt,op_fst) cms q = let fun doit [] q = ([],(q,null_mt,[])) | doit (cm::cms) q = let val cmi as (_,(q1,mt1,fst1)) = do_cm cm q val (cmis,(q2,mt2,fst2)) = doit cms q1 in (cmi::cmis,(q2,op_mt(mt1,mt2),op_fst(fst1,fst2,mt1))) end val (cmis,info1) = doit cms q in (con cmis,info1) end in do_cm cm 0 end end (* stop of ../../Parser/Dfa/dfaPassOne.sml *) (* start of ../../Parser/Dfa/dfaPassTwo.sml *) (*--------------------------------------------------------------------------*) (* Structure: DfaPassTwo *) (* *) (* Depends on: *) (* DfaData *) (* DfaUtil *) (* *) (* Exceptions raised by functions in this structure: *) (* passTwo : ConflictFollow *) (*--------------------------------------------------------------------------*) signature DfaPassTwo = sig val passTwo: bool -> DfaBase.CM -> (DfaBase.Follow * bool) vector end structure DfaPassTwo : DfaPassTwo = struct open DfaBase DfaUtil (*--------------------------------------------------------------------*) (* Given a CM annotated with leaf numbers (states), Empty and First, *) (* compute Follow and Fin foreach node, and generate the transition *) (* row if node is a leaf. Follow and Fin are computed top-down: *) (* *) (* (Top-Level): *) (* Follow e = {}, Fin e = true *) (* *) (* (e=e1?): *) (* Follow e1 = Follow e, Fin e1 = Fin e *) (* *) (* (e=e1*, e=e1+) *) (* Follow e1 = Follow e1 ++ First e1, Fin e1 = Fin e *) (* *) (* (e=e1|...|eN) = *) (* Follow eI = Follow e, Fin eI = Fin e for i=0...n *) (* *) (* (e=e1,...,eN) = *) (* Follow eN = Follow e, Fin eN = Fin e *) (* Follow eI = First eI+1, if Empty eI+1 = false, ia1 forall (q1,a1) in F1, (q1,a1) in F1} *) (* error, if exist (q1,a) in F1, (q2,a) in F2 *) (* then raise ConflictFirst(a,q1,q2) *) (*--------------------------------------------------------------------*) fun passTwo nondet (cmi as (_,(n,mt,fst))) = let val table = Array.array(n+1,(nil,false)) val _ = Array.update(table,0,(fst,mt)) fun do_cm (ff as (flw,fin)) (cm,(q,mt,fst)) = case cm of ELEM a => Array.update(table,q,ff) | OPT cmi => do_cm ff cmi | REP cmi => do_cm (mergeFollow nondet (fst,flw),fin) cmi | PLUS cmi => do_cm (mergeFollow nondet (fst,flw),fin) cmi | ALT cmis => app (do_cm ff) cmis | SEQ cmis => ignore (do_seq ff cmis) and do_seq ff cmis = foldr (fn (cmi as (_,(_,mt,fst)),ff as (flw,fin)) => (do_cm ff cmi; if mt then (mergeFollow nondet (fst,flw),fin) else (fst,false))) ff cmis val _ = do_cm (nil,true) cmi in Array.extract (table,0,NONE) end end (* stop of ../../Parser/Dfa/dfaPassTwo.sml *) (* start of ../../Parser/Dfa/dfa.sml *) (*--------------------------------------------------------------------------*) (* Structure: Dfa *) (* *) (* Depends on: *) (* DfaData *) (* DfaError *) (* DfaPassOne *) (* DfaPassTwo *) (* DfaString *) (* DfaUtil *) (* *) (* Exceptions raised by functions in this structure: *) (* ContentModel2String : none *) (* dfaFinal : none *) (* dfaTrans : none *) (* makeAmbiguous : DfaTooLarge *) (* makeChoiceDfa : none *) (* makeDfa : Ambiguous *) (* Dfa2String : none *) (*--------------------------------------------------------------------------*) signature Dfa = sig eqtype DfaState val dfaError : DfaState val dfaInitial : DfaState exception DfaTooLarge of int exception Ambiguous of int * int * int val emptyDfa : DfaData.Dfa val makeDfa : DfaData.ContentModel -> DfaData.Dfa val makeAmbiguous : DfaData.ContentModel -> DfaData.Dfa val makeChoiceDfa : DfaData.ContentModel -> DfaData.Dfa val dfaFinal : DfaData.Dfa * DfaState -> bool val dfaTrans : DfaData.Dfa * DfaState * int -> DfaState end functor Dfa (structure DfaOptions : DfaOptions) : Dfa = struct structure DfaPassThree = DfaPassThree (structure DfaOptions = DfaOptions) open DfaBase DfaError DfaPassOne DfaPassTwo DfaString DfaUtil type DfaState = State (*--------------------------------------------------------------------*) (* Create a dfa for the content model (a1|...|aN)*, where a1,...,aN *) (* are the symbols occurring in the input dfa. *) (*--------------------------------------------------------------------*) fun makeChoiceDfa cm = let val syms = cmSymbols cm val flw = map (fn a => (dfaInitial,a)) syms in Vector.fromList [makeRow(flw,true)] end (*--------------------------------------------------------------------*) (* create a dfa for an ambiguous content model. Raise DfaTooLarge if *) (* the subset construction yields too many states. *) (*--------------------------------------------------------------------*) fun makeAmbiguous cm = let val cmi = DfaPassOne.passOne true cm val tab = DfaPassTwo.passTwo true cmi val dfa = DfaPassThree.passThree true tab in dfa end (*--------------------------------------------------------------------*) (* generate a dfa for a content model. Raise Ambiguous if the content *) (* model is ambiguous. *) (*--------------------------------------------------------------------*) fun makeDfa cm = let val cmi = DfaPassOne.passOne false cm val tab = DfaPassTwo.passTwo false cmi val dfa = DfaPassThree.passThree false tab in dfa end handle ConflictFirst aqq => raise Ambiguous (countOccs aqq cm) | ConflictFollow aqq => raise Ambiguous (countOccs aqq cm) (*--------------------------------------------------------------------*) (* make one transitions in the dfa. *) (*--------------------------------------------------------------------*) fun dfaTrans(tab,q,a) = if q<0 then dfaDontCare else let val (lo,hi,tab,_) = Vector.sub(tab,q) in if a>=lo andalso a<=hi then Vector.sub(tab,a-lo) else dfaError end (*--------------------------------------------------------------------*) (* check whether a dfa's state is an accepting state. *) (*--------------------------------------------------------------------*) fun dfaFinal (tab,q) = q<0 orelse #4(Vector.sub(tab,q):Row) end (* stop of ../../Parser/Dfa/dfa.sml *) (* start of ../../Parser/entities.sml *) (*--------------------------------------------------------------------------*) (* Structure: Entities *) (* *) (* Exceptions raised by functions in this structure: *) (* closeAll : none *) (* getChar : none *) (* getEntId : none *) (* getPos : none *) (* inInternalSubset : none *) (* isOpenEntity : none *) (* isSpecialEnd : none *) (* Position2String : none *) (* pushDummy : none *) (* pushExtern : NoSuchFile *) (* pushIntern : none *) (* pushSpecial : NoSuchFile *) (* statePos : none *) (*--------------------------------------------------------------------------*) (* This module maintains the entity stack. For each open entity it holds a *) (* buffer to read characters from. When the buffer is exceeded, it gets re- *) (* filled with new characters, depending on the entity's encoding. *) (* *) (* End-of-line handling as specified in 2.11 is performed: *) (* *) (* ... To simplify the tasks of applications, wherever an external parsed *) (* entity or the literal entity value of an internal parsed entity *) (* contains either the literal two-character sequence "#xD#xA" or a *) (* standalone literal #xD, an XML processor must pass to the application *) (* the single character #xA. *) (* (This behavior can conveniently be produced by normalizing all line *) (* breaks to #xA on input, before parsing.) *) (* *) (* It also checks for illegal characters, cf. 2.2: *) (* *) (* [2] Char ::= #x9 | #xA | #xD /* any Unicode character, *) (* | [#x20-#xD7FF] excluding the surrogate *) (* | [#xE000-#xFFFD] blocks, FFFE, and FFFF. */ *) (* | [#x10000-#x10FFFF] *) (* *) (* More precisely, it assumes that all decoded characters are valid Unicode *) (* characters. It thus only checks for control characters other than #x9, *) (* #xA or #xD. *) (*--------------------------------------------------------------------------*) signature Entities = sig include Hooks type State eqtype EntId datatype Special = DOC_ENTITY | EXT_SUBSET exception CantOpenFile of (string * string) * AppData val pushIntern : State * int * bool * UniChar.Vector -> State val pushExtern : State * int * bool * Uri.Uri -> State * Encoding.Encoding val pushSpecial : Special * Uri.Uri option -> State * Encoding.Encoding val closeAll : State -> unit val commitAuto : AppData * State -> AppData * State val changeAuto : AppData * State * string -> AppData * State * Encoding.Encoding val getEntId : State -> EntId val getPos : State -> Errors.Position val getUri : State -> Uri.Uri val getChar : AppData * State -> UniChar.Char * AppData * State val ungetChars : State * UniChar.Data -> State val isOpen : int * bool * State -> bool val isSpecial : State -> bool val inDocEntity : State -> bool end functor Entities (structure Hooks : Hooks) : Entities = struct open UniChar Decode Decode.Error Errors Hooks Uri UtilError val THIS_MODULE = "Entities" val BUFSIZE = 1024 type CharBuffer = UniChar.Char array (*--------------------------------------------------------------------*) (* A special entity can not be popped from the stack by getChar, so *) (* it must be popped explicitly. This is for the document entity and *) (* the external subset. *) (*--------------------------------------------------------------------*) datatype Special = DOC_ENTITY | EXT_SUBSET (*--------------------------------------------------------------------*) (* In order to distinguish a general entity from a paramter entity, *) (* entity idxs are marked with this datatype. *) (*--------------------------------------------------------------------*) datatype EntId = GENERAL of int | PARAMETER of int (*--------------------------------------------------------------------*) (* Make an EntId from the entity's index. *) (*--------------------------------------------------------------------*) fun makeEntId(idx,isParam) = if isParam then PARAMETER idx else GENERAL idx (*--------------------------------------------------------------------*) (* A non-empty stack is: *) (* *) (* an internal entity INT(buf,size,idx,(id,other)): *) (* - (vec,idx,size) is a buffer,current index and its size; *) (* - id is the index of the entity's name in the entity table. *) (* - other contains the underlying entities (the rest of the stack). *) (* The components are nested according to access frequency. *) (* *) (* an external entity has three forms: *) (* EXT2(buf,size,idx,line,col,break,(dec,err,typ)) *) (* - (buf,size,idx) is a buffer, its size and current index; *) (* - (line,col) are the line and column; *) (* - break is a boolean indicating whether the last character was a *) (* carriage return (0xD) (then a succeeding line feed (0xA) must be *) (* supressed); *) (* - err is an option: if it is SOME(f,ee,err) then it indicates that *) (* the array was finished by a decoding error err, with the basic *) (* file f; f was at end of file if ee is true. Otherwise there was *) (* no error when loading the array. *) (* - dec describies the encoding of the entity and thus, how more *) (* data can be loaded; *) (* - typ is either of the form SPECIAL spec indicating a special *) (* entity; then this is the only entity on the stack. Otherwise it *) (* is NORMAL(id,other) for a normal external entity, with: *) (* + id is the index of the entity's name in the DTD; *) (* + other is the underlying stack. *) (* The components are nested according to access frequency. *) (* *) (* The second form of an external entity is *) (* EXT1(dec,line,col,break,typ). This is an unbuffered *) (* entity whose encoding declaration is being read. We may not load *) (* an array of characters as a whole because the encoding might still *) (* change. The components have the same meaning as for EXT2. *) (* *) (* A closed entity remains on the stack until the next getChar, for *) (* purposes of error printing. A closed external entity has the form *) (* CLOSED(dec,l,col,typ); components have the same meaning *) (* as for open external entities. A closed internal entity has the *) (* form ENDED(id,other) with components as above. *) (* *) (* Sometimes (for parsing xml/decl declarations) we need a lookahead. *) (* LOOKING(cs,q) is a state remembering all chars cs looked ahead up *) (* to state q, in reverse order. LOOKED(cs,q) is an undone lookahead, *) (* the looked-ahead chars now in the right order. *) (*--------------------------------------------------------------------*) datatype ExtType = SPECIAL of Special | NORMAL of EntId * State and State = LOOKED of Data * State | ENDED of EntId * State | CLOSED of DecFile * int * int * ExtType | INT of Vector * int * int * (EntId * State) | EXT1 of DecFile * int * int * bool * ExtType | EXT2 of CharBuffer * int * int * int * int * bool * (DecFile * DecodeError option * ExtType) exception CantOpenFile of (string * string) * AppData (*--------------------------------------------------------------------*) (* Extract the unique number from a state. *) (*--------------------------------------------------------------------*) fun getExtEntId extType = case extType of SPECIAL DOC_ENTITY => GENERAL 0 | SPECIAL EXT_SUBSET => PARAMETER 0 | NORMAL(id,_) => id fun getEntId q = case q of LOOKED (_,q) => getEntId q | ENDED(id,_) => id | CLOSED(_,_,_,extType) => getExtEntId extType | INT(_,_,_,(id,_)) => id | EXT1(_,_,_,_,extType) => getExtEntId extType | EXT2(_,_,_,_,_,_,(_,_,extType)) => getExtEntId extType (*--------------------------------------------------------------------*) (* Find the nearest enclosing external entity, and return its *) (* filename, line and column number. *) (*--------------------------------------------------------------------*) fun getPos q = case q of ENDED(_,other) => getPos other | INT(_,_,_,(_,other)) => getPos other | CLOSED(dec,l,col,_) => (decName dec,l,col) | EXT1(dec,l,col,_,_) => (decName dec,l,col) | EXT2(_,_,_,l,col,_,(dec,_,_)) => (decName dec,l,col) | LOOKED (cs,q) => let val (f,l,c) = getPos q val k = length cs in if c>=k then (f,l,c-k) else (f,l,0) end (*--------------------------------------------------------------------*) (* get the path of the nearest enclosing external entity. *) (*--------------------------------------------------------------------*) fun getUri q = case q of LOOKED (_,q) => getUri q | ENDED(_,other) => getUri other | INT(_,_,_,(_,other)) => getUri other | CLOSED(dec,l,col,_) => decUri dec | EXT1(dec,l,col,_,_) => decUri dec | EXT2(_,_,_,l,col,_,(dec,_,_)) => decUri dec (*--------------------------------------------------------------------*) (* close all files, return nothing. *) (*--------------------------------------------------------------------*) fun closeAll q = case q of LOOKED(_,other) => closeAll other | ENDED(_,other) => closeAll other | CLOSED(_,_,_,SPECIAL _) => () | CLOSED(_,_,_,NORMAL(_,other)) => closeAll other | INT(_,_,_,(_,other)) => closeAll other | EXT1(dec,_,_,_,SPECIAL _) => ignore(decClose dec) | EXT1(dec,_,_,_,NORMAL(_,other)) => (decClose dec; closeAll other) | EXT2(_,_,_,_,_,_,(dec,_,SPECIAL _)) => ignore(decClose dec) | EXT2(_,_,_,_,_,_,(dec,_,NORMAL(_,other))) => (decClose dec; closeAll other) (*--------------------------------------------------------------------*) (* is this entity already on the stack? *) (*--------------------------------------------------------------------*) fun isOpen (idx,isParam,q) = let val id = makeEntId(idx,isParam) fun doit q = case q of LOOKED (_,other) => doit other | ENDED(id',other) => id=id' orelse doit other | CLOSED(_,_,_,SPECIAL _) => false | CLOSED(_,_,_,NORMAL(id',other)) => id=id' orelse doit other | INT(_,_,_,(id',other)) => id=id' orelse doit other | EXT1(_,_,_,_,SPECIAL _) => false | EXT1(_,_,_,_,NORMAL(id',other)) => id=id' orelse doit other | EXT2(_,_,_,_,_,_,(_,_,SPECIAL _)) => false | EXT2(_,_,_,_,_,_,(_,_,NORMAL(id',other))) => id=id' orelse doit other in doit q end (*--------------------------------------------------------------------*) (* are we in the internal subset, i.e., in the document entity? *) (* The internal subset can only be in the document entity, since no *) (* parameter entities are declared prior to it. The document entity *) (* is then the only entity on the stack. *) (*--------------------------------------------------------------------*) fun inDocEntity q = case q of LOOKED (_,q) => inDocEntity q | ENDED(_,other) => inDocEntity other | INT(_,_,_,(_,other)) => inDocEntity other | CLOSED(_,_,_,NORMAL _) => false | CLOSED(_,_,_,SPECIAL what) => what=DOC_ENTITY | EXT1(_,_,_,_,NORMAL _) => false | EXT1(_,_,_,_,SPECIAL what) => what=DOC_ENTITY | EXT2(_,_,_,_,_,_,(_,_,NORMAL _)) => false | EXT2(_,_,_,_,_,_,(_,_,SPECIAL what)) => what=DOC_ENTITY (*--------------------------------------------------------------------*) (* is this state the document end, i.e., are all entities closed? *) (*--------------------------------------------------------------------*) fun isSpecial q = case q of LOOKED (_,q) => isSpecial q | CLOSED(_,_,_,SPECIAL _) => true | EXT1(_,_,_,_,SPECIAL _) => true | EXT2(_,_,_,_,_,_,(_,_,SPECIAL _)) => true | _ => false (*--------------------------------------------------------------------*) (* Initialize and load a new buffer when opening an external entity. *) (*--------------------------------------------------------------------*) fun initArray dec = let val arr = Array.array(BUFSIZE,0wx0) val (n,dec1,err) = decGetArray dec arr in (arr,n,dec1,err) end (*--------------------------------------------------------------------*) (* Open an external/internal entity. *) (*--------------------------------------------------------------------*) fun pushIntern(q,id,isParam,vec) = INT(vec,Vector.length vec,0,(makeEntId(id,isParam),q)) fun pushExtern(q,id,isParam,uri) = let val dec = decOpenXml (SOME uri) val auto = decEncoding dec val q1 = EXT1(dec,1,0,false,NORMAL(makeEntId(id,isParam),q)) in (q1,auto) end fun pushSpecial(what,uri) = let val dec = decOpenXml uri val auto = decEncoding dec val q = EXT1(dec,1,0,false,SPECIAL what) in (q,auto) end (*--------------------------------------------------------------------*) (* confirm the autodetected encoding of an external entity. *) (*--------------------------------------------------------------------*) fun commitAuto(a,q) = case q of EXT1(dec,l,col,brk,typ) => let val a1 = a before decCommit dec handle DecError(_,_,err) => hookError(a,(getPos q,ERR_DECODE_ERROR err)) val (arr,n,dec1,err) = initArray dec in (a1,EXT2(arr,n,0,l,col,brk,(dec1,err,typ))) end (* in (a1,EXT1(dec,l,col,brk,typ)) end *) | LOOKED(cs,q1) => let val (a1,q2) = commitAuto (a,q1) in (a1,LOOKED(cs,q2)) end | CLOSED _ => (a,q) | _ => raise InternalError(THIS_MODULE,"commitAuto", "entity is neither EXT1 nor CLOSED nor LOOKED") (*--------------------------------------------------------------------*) (* change from the autodetected encoding to the declared one. *) (*--------------------------------------------------------------------*) fun changeAuto (a,q,decl) = case q of EXT1(dec,l,col,brk,typ) => let val dec1 = decSwitch(dec,decl) handle DecError(dec,_,err) => let val a1 = hookError(a,(getPos q,ERR_DECODE_ERROR err)) val _ = decClose dec val uri = decName dec val msg = case err of ERR_UNSUPPORTED_ENC _ => "Unsupported encoding" | _ => "Declared encoding incompatible" ^"with auto-detected encoding" in raise CantOpenFile ((uri,msg),a1) end val newEnc = decEncoding dec1 val (arr,n,dec2,err) = initArray dec1 in (a,EXT2(arr,n,0,l,col,brk,(dec2,err,typ)),newEnc) end (* in (a,EXT1(dec1,l,col,brk,typ),newEnc) end *) | LOOKED(cs,q1) => let val (a2,q2,enc2) = changeAuto(a,q1,decl) in (a2,LOOKED(cs,q2),enc2) end | CLOSED(dec,_,_,_) => (a,q,decEncoding dec) | _ => raise InternalError(THIS_MODULE,"changeAuto", "entity is neither EXT1 nor CLOSED nor LOOKED") (*--------------------------------------------------------------------*) (* Get one character from the current entity. Possibly reload buffer. *) (* Return 0wx0 at entity end. Otherwise check whether the character *) (* is valid (cf. 2.2). If the last character was a carriage return *) (* (0xD) supress a line feed (0xA). *) (*--------------------------------------------------------------------*) fun getChar (a,q) = case q of ENDED(_,other) => getChar(a,other) | CLOSED(_,_,_,typ) => (case typ of SPECIAL _ => raise InternalError (THIS_MODULE,"getChar", "attempt to read beyond special entity end") | NORMAL(_,other) => getChar(a,other)) | INT(vec,s,i,io) => if i>=s then (0wx0,a,ENDED io) else (Vector.sub(vec,i),a,INT(vec,s,i+1,io)) | EXT1(dec,l,col,br,typ) => (let val (c,dec1) = decGetChar dec in if (* c>=0wx20 orelse c=0wx09 *) c>=0wx0020 andalso (c<=0wxD7FF orelse c>=0wxE000 andalso (c<=0wxFFFD orelse c>=0wx10000)) orelse c=0wx9 then (c,a,EXT1(dec1,l,col+1,false,typ)) else if c=0wxA then if br then getChar(a,EXT1(dec1,l,col,false,typ)) else (c,a,EXT1(dec1,l+1,0,false,typ)) else (if c=0wxD then (0wxA,a,EXT1(dec1,l+1,0,true,typ)) else let val a1 = hookError(a,(getPos q,ERR_NON_XML_CHAR c)) in getChar(a1,EXT1(dec1,l,col+1,false,typ)) end) end handle DecEof dec => (0wx0,a,CLOSED(dec,l,col,typ)) | DecError(dec,eof,err) => let val err = ERR_DECODE_ERROR err val a1 = hookError(a,(getPos q,err)) in if eof then (0wx0,a,CLOSED(dec,l,col,typ)) else getChar(a1,EXT1(dec,col,l,br,typ)) end) | EXT2(arr,s,i,l,col,br,det) => if i=0wx20 orelse c=0wx09 *) (* c>=0wx0020 andalso c<=0wxD7FF orelse c=0wx9 orelse *) (* c>=0wxE000 andalso c<=0wxFFFD orelse c>=0wx10000 *) c>=0wx0020 andalso (c<=0wxD7FF orelse c>=0wxE000 andalso (c<=0wxFFFD orelse c>=0wx10000)) orelse c=0wx9 then (c,a,EXT2(arr,s,i+1,l,col+1,false,det)) else if c=0wxA then if br then getChar(a,EXT2(arr,s,i+1,l,col,false,det)) else (c,a,EXT2(arr,s,i+1,l+1,0,false,det)) else (if c=0wxD then (0wxA,a,EXT2(arr,s,i+1,l+1,0,true,det)) else let val a1 = hookError(a,(getPos q,ERR_NON_XML_CHAR c)) in getChar(a1,EXT2(arr,s,i+1,l,col+1,false,det)) end) end else let val (dec,err,typ) = det val (a1,(n,dec1,err1)) = case err of NONE => if s=BUFSIZE then (a,decGetArray dec arr) else (a,(0,dec,NONE)) | SOME err => (hookError(a,(getPos q,ERR_DECODE_ERROR err)), decGetArray dec arr) in if n=0 andalso not (isSome err1) then (0wx0,a1,CLOSED(dec1,l,col,typ)) else getChar(a1,EXT2(arr,n,0,l,col,br,(dec1,err1,typ))) end | LOOKED(nil,q) => getChar(a,q) | LOOKED(c::cs,q) => (c,a,LOOKED(cs,q)) (*--------------------------------------------------------------------*) (* unget a list of characters. *) (*--------------------------------------------------------------------*) fun ungetChars (q,cs) = LOOKED(cs,q) end (* stop of ../../Parser/entities.sml *) (* start of ../../Parser/Dtd/dtdDeclare.sml *) (*--------------------------------------------------------------------------*) (* Structure: DtdDeclare *) (* *) (*--------------------------------------------------------------------------*) (* Functor: DtdDeclare *) (*--------------------------------------------------------------------------*) (* This module provides functions for adding declarations to the DTD tables *) (* and for doing checks on components of declarations. *) (*--------------------------------------------------------------------------*) functor DtdDeclare (structure Dtd : Dtd structure Entities : Entities structure ParserOptions : ParserOptions) = struct open UtilInt UtilList Base Dtd Errors Entities ParserOptions UniChar UniClasses (*--------------------------------------------------------------------*) (* check whether a sequence a chars is the b-adic representation of a *) (* character's code, terminated by ";". base will be 10 or 16, isBase *) (* will check for a character being a decimal/hexadecimal number. *) (*--------------------------------------------------------------------*) fun checkBasimal (base,baseValue) (ch:Char,cs) = let fun doit _ (nil:Data) = false | doit yet [0wx3B] = yet=ch | doit yet (c::cs) = case baseValue c of NONE => false | SOME v => doit (base*yet+v) cs in doit 0w0 cs end val checkDecimal = checkBasimal (0w10,decValue) val checkHeximal = checkBasimal (0wx10,hexValue) (*--------------------------------------------------------------------*) (* check a character reference for identifying a character. *) (*--------------------------------------------------------------------*) fun checkRef (ch,0wx26::0wx23::0wx78::cs) (* "&#x..." *) = checkHeximal(ch,cs) | checkRef (ch,0wx26::0wx23::cs) (* "&#..." *) = checkDecimal(ch,cs) | checkRef _ = false (*--------------------------------------------------------------------*) (* check for a single character ch. *) (*--------------------------------------------------------------------*) fun checkSingle (ch,[c]) = c=ch | checkSingle _ = false (*--------------------------------------------------------------------*) (* check a predefined entity for being well defined. Note that both *) (* a single char and a char ref representation are allowed, except *) (* for 'amp' which must be escaped. *) (*--------------------------------------------------------------------*) fun checkPredef (idx,cs) = case idx of 1 => checkRef(0wx26,cs) | 2 => checkSingle(0wx3C,cs) orelse checkRef(0wx3C,cs) | 3 => checkSingle(0wx3E,cs) orelse checkRef(0wx3E,cs) | 4 => checkSingle(0wx27,cs) orelse checkRef(0wx27,cs) | 5 => checkSingle(0wx22,cs) orelse checkRef(0wx22,cs) | _ => true (*--------------------------------------------------------------------*) (* Given the declaration of an entity check whether it is predefined. *) (* If no return false. If yes, check whether is was already declared *) (* and whether it is correctly declared. See 4.6: *) (* *) (* All XML processors must recognize these entities whether they *) (* are declared or not. For interoperability, valid XML documents *) (* should declare these entities, like any others, before using *) (* them. If the entities in question are declared, they must be *) (* declared as internal entities whose replacement text is the *) (* single character being escaped or a character reference to that *) (* character, as shown below. *) (* *) (* *) (* *) (* *) (* *) (* *) (* *) (* Note that the < and & characters in the declarations of "lt" and *) (* "amp" are doubly escaped to meet the requirement that entity *) (* replacement be well-formed. *) (* *) (* print an error if the entity was already declared. *) (* print an error if the declaration is not correct. *) (*--------------------------------------------------------------------*) fun checkPredefined dtd (a,q) (idx,ent) = if !O_VALIDATE andalso idx>=1 andalso idx<=5 then let val a1 = if !O_WARN_MULT_ENT_DECL andalso isRedefined dtd idx then let val warn = WARN_MULT_DECL(IT_GEN_ENT,Index2GenEnt dtd idx) in hookWarning(a,(getPos q,warn)) end else a before setRedefined dtd idx val a2 = if !O_CHECK_PREDEFINED then let val correct = case ent of GE_INTERN(_,rep) => checkPredef (idx,Vector2Data rep) | _ => false in if correct then a1 else let val err = ERR_DECL_PREDEF(Index2GenEnt dtd idx,validPredef idx) in hookError(a1,(getPos q,err)) end end else a1 in (true,a2) end else (false,a) (*--------------------------------------------------------------------*) (* add an entity declaration to the DTD tables. 4.2 *) (* *) (* ... If the same entity is declared more than once, the first *) (* declaration encountered is binding; at user option, an XML *) (* processor may issue a warning if entities are declared multiple *) (* times. *) (* *) (* For general entities, check whether it is a predefined entity and *) (* if so, whether it is declared correctly. *) (*--------------------------------------------------------------------*) (* print a warning and ignore the declaration if the notation was *) (* declared previously. *) (*--------------------------------------------------------------------*) fun addGenEnt dtd (a,q) (idx,ent,ext) = case getGenEnt dtd idx of (GE_NULL,_) => a before setGenEnt dtd (idx,(ent,ext)) | _ => let val (pre,a1) = checkPredefined dtd (a,q) (idx,ent) in if pre orelse not (!O_WARN_MULT_ENT_DECL) then a1 else hookWarning(a1,(getPos q,WARN_MULT_DECL (IT_GEN_ENT,Index2GenEnt dtd idx))) end fun addParEnt dtd (a,q) (idx,ent,ext) = case getParEnt dtd idx of (PE_NULL,_) => a before setParEnt dtd (idx,(ent,ext)) | _ => if !O_WARN_MULT_ENT_DECL then hookWarning(a,(getPos q,WARN_MULT_DECL (IT_PAR_ENT,Index2ParEnt dtd idx))) else a (*--------------------------------------------------------------------*) (* at option print a warning if not all predefined entities have been *) (* declared. Cf. 4.1: *) (* *) (* For interoperability, valid documents should declare the *) (* entities amp, lt, gt, apos, quot, in the form specified in *) (* "4.6 Predefined Entities". *) (*--------------------------------------------------------------------*) fun checkPreDefined dtd (a,q) = if !O_VALIDATE andalso !O_INTEROPERABILITY andalso !O_WARN_SHOULD_DECLARE andalso hasDtd dtd then case notRedefined dtd of nil => a | ents => hookWarning(a,(getPos q,WARN_SHOULD_DECLARE ents)) else a (*--------------------------------------------------------------------*) (* add a notation declaration to the DTD tables. *) (* *) (* though the rec. says nothing about repeated notation declarations, *) (* I assume that the intention is to treat them like entities, i.e. *) (* ignore repeated declarations with an optional warning. *) (* *) (* print a warning and ignore the declaration if the notation was *) (* declared previously. *) (*--------------------------------------------------------------------*) fun addNotation dtd (a,q) (idx,nt) = if hasNotation dtd idx then if !O_WARN_MULT_NOT_DECL then hookWarning(a,(getPos q,WARN_MULT_DECL (IT_NOTATION,Index2AttNot dtd idx))) else a else a before setNotation dtd (idx,nt) (*--------------------------------------------------------------------*) (* add an element declaration to the element table. Only the content *) (* part of the element info is updated. 3.2: *) (* *) (* Validity Constraint: Unique Element Type Declaration *) (* No element type may be declared more than once. *) (* *) (* print an error and ignore the declaration if the element was *) (* declared previously. *) (*--------------------------------------------------------------------*) fun addElement dtd (a,q) (idx,cont,ext) = let val {decl,atts,errAtts,...} = getElement dtd idx in case decl of NONE => a before setElement dtd (idx,{decl = SOME(cont,ext), atts = atts, errAtts = errAtts}) | SOME _ => if !O_VALIDATE then hookError(a,(getPos q,ERR_REDEC_ELEM(Index2Element dtd idx))) else a end (*--------------------------------------------------------------------*) (* at option, pretend an element is declared by adding a default *) (* declaration. Only the decl flag of the element info is updated. *) (*--------------------------------------------------------------------*) fun handleUndeclElement dtd idx = let val {atts,errAtts,...} = getElement dtd idx val newInfo = {decl = SOME(CT_ANY,false), atts = atts, errAtts = errAtts} in newInfo before setElement dtd (idx,newInfo) end (*--------------------------------------------------------------------*) (* check whether an element is declared and whether it already had an *) (* attribute list declaration. Cf. 3.3: *) (* *) (* At user option, an XML processor may issue a warning if *) (* attributes are declared for an element type not itself declared, *) (* but this is not an error. *) (* *) (* ... an XML processor may at user option issue a warning when *) (* more than one attribute-list declaration is provided for a given *) (* element type, ... *) (* *) (* print a warning if the element is not declared or already had an *) (* attribute list declaration. *) (*--------------------------------------------------------------------*) fun enterAttList dtd (a,q) idx = let val {decl,atts,errAtts,...} = getElement dtd idx val a1 = if isSome decl orelse not (!O_WARN_ATT_NO_ELEM) then a else hookWarning(a,(getPos q,WARN_ATT_UNDEC_ELEM(Index2Element dtd idx))) in case atts of NONE => a1 before setElement dtd (idx,{decl=decl,atts=SOME(nil,false),errAtts=errAtts}) | _ => if !O_INTEROPERABILITY andalso !O_WARN_MULT_ATT_DECL then hookWarning(a1,(getPos q,WARN_MULT_ATT_DECL(Index2Element dtd idx))) else a1 end (*--------------------------------------------------------------------*) (* check whether attribute "xml:space" is declared correctly. 2.10: *) (* *) (* A special attribute named xml:space may be attached ... In valid *) (* documents, this attribute, like any other, must be declared if *) (* it is used. When declared, it must be given as an enumerated *) (* type whose only possible values are "default" and "preserve". *) (*--------------------------------------------------------------------*) fun checkAttDef (a,q) (aidx,attType,_,_) = if aidx<>xmlSpaceIdx orelse attType=xmlSpaceType then a else hookError(a,(getPos q,ERR_XML_SPACE)) (*--------------------------------------------------------------------*) (* enter a definition of a single attribute to the element table. *) (* ignore the definition if the attribute is already defined for that *) (* element. Cf. 3.3: *) (* *) (* When more than one AttlistDecl is provided for a given element *) (* type, the contents of all those provided are merged. When more *) (* than one definition is provided for the same attribute of a *) (* given element type, the first declaration is binding and later *) (* declarations are ignored. For interoperability, an XML processor *) (* may at user option issue a warning when ... more than one *) (* attribute definition is provided for a given attribute, but this *) (* is not an error. *) (* *) (* If the attribute type is ID, check whether an element already has *) (* an attribute of that type. 3.3.1: *) (* *) (* Validity Constraint: One ID per Element Type *) (* No element type may have more than one ID attribute specified. *) (*--------------------------------------------------------------------*) (* print an error if the element already has an ID attribute. *) (* print a warning if the attr. is already defined for this element. *) (*--------------------------------------------------------------------*) (* return the new application data. *) (*--------------------------------------------------------------------*) fun addAttribute dtd (a,q) (eidx,attDef as (att,attType,attDefault,_)) = let val a1 = checkAttDef (a,q) attDef fun doit nil = (false,[attDef],a) | doit (atts as (ad as (aidx,_,_,_))::rest) = if aidx=att then let val a1 = if !O_INTEROPERABILITY andalso !O_WARN_MULT_ATT_DEF then let val warn = WARN_MULT_ATT_DEF (Index2Element dtd eidx,Index2AttNot dtd att) in hookWarning(a,(getPos q,warn)) end else a in (true,atts,a1) end else (if aidx (c1=0wx58 orelse c1=0wx78) andalso (c2=0wx4D orelse c2=0wx6D) andalso (c3=0wx4C orelse c3=0wx6C) | _ => false fun checkAttName (a,q) name = if !O_CHECK_RESERVED andalso startsWithXml name then case name of [0wx78,0wx6d,0wx6c,0wx3a,0wx6c,0wx61,0wx6e,0wx67] (* ":lang" *) => a | [0wx78,0wx6d,0wx6c,0wx3a,0wx73,0wx70,0wx61,0wx63,0wx65] (* ":space" *) => a | _ => hookError(a,(getPos q,ERR_RESERVED(name,IT_ATT_NAME))) else a fun checkElemName (a,q) name = if !O_CHECK_RESERVED andalso startsWithXml name then hookError(a,(getPos q,ERR_RESERVED(name,IT_ELEM))) else a (*--------------------------------------------------------------------*) (* check for each element in the dtd, whether a name token occurs *) (* more than once in its enumerated attribute types. *) (* *) (* print a warning for each element where this is true. *) (* *) (* return nothing. *) (*--------------------------------------------------------------------*) fun checkMultEnum dtd (a,q) = if !O_INTEROPERABILITY andalso !O_WARN_MULT_ENUM then let fun doElem a idx = let (*-----------------------------------------------------*) (* for each i, add i to yet if it not in that list. *) (* otherwise add it to dup. *) (*-----------------------------------------------------*) fun do_list yd nil = yd | do_list (yet,dup) (i::is) = let val yd' = case insertNewInt (i,yet) of NONE => (yet,insertInt (i,dup)) | SOME new => (new,dup) in do_list yd' is end (*-----------------------------------------------------*) (* For each enumerated attribute type call the appro- *) (* priate function. *) (*-----------------------------------------------------*) fun doit (yet,dup) nil = dup | doit (yet,dup) ((_,attType,_,_)::rest) = case attType of AT_GROUP is => doit (do_list (yet,dup) is) rest | AT_NOTATION is => doit (do_list (yet,dup) is) rest | _ => doit (yet,dup) rest val defs = case #atts(getElement dtd idx) of NONE => nil | SOME(defs,_) => defs val dup = doit (nil,nil) defs in if null dup then a else hookWarning(a,(getPos q,WARN_ENUM_ATTS (Index2Element dtd idx,map (Index2AttNot dtd) dup))) end (*-----------------------------------------------------------*) (* the highest used index is usedIndices-1. *) (*-----------------------------------------------------------*) val maxIdx = maxUsedElem dtd fun doit a i = if i>maxIdx then a else doit (doElem a i) (i+1) in doit a 0 end else a (*--------------------------------------------------------------------*) (* check for all id names refereneced by some IDREF attribute whether *) (* it was also declared by an ID attribute. *) (* *) (* print an error if a referenced ID name was not defined. *) (* *) (* return nothing. *) (*--------------------------------------------------------------------*) fun checkDefinedIds dtd (a,q) = if !O_VALIDATE then let val maxId = maxUsedId dtd fun doOne a i = let val (decl,refs) = getId dtd i in if decl orelse null refs then a else hookError(a,(hd refs,ERR_UNDECL_ID(Index2Id dtd i,tl refs))) end fun doAll a i = if i>maxId then a else doAll (doOne a i) (i+1) in doAll a 0 end else a (*--------------------------------------------------------------------*) (* check for all declared unparsed entities, whether their notations *) (* have been declared. *) (* *) (* print an error if a notation was not declared. *) (* *) (* return nothing. *) (*--------------------------------------------------------------------*) fun checkUnparsed dtd a = if !O_VALIDATE then let val maxGen = maxUsedGen dtd fun doOne a i = case getGenEnt dtd i of (GE_UNPARSED(_,nidx,pos),_) => if hasNotation dtd nidx then a else hookError(a,(pos,ERR_UNDECLARED (IT_NOTATION,Index2AttNot dtd nidx,LOC_NONE))) | _ => a fun doAll a i = if i>maxGen then a else doAll (doOne a i) (i+1) in doAll a 0 end else a end (* stop of ../../Parser/Dtd/dtdDeclare.sml *) (* start of ../../Parser/Dtd/dtdAttributes.sml *) (*--------------------------------------------------------------------------*) (* Structure: DtdAttributes *) (* *) (* Exceptions raised by functions in this structure: *) (* checkAttValue : AttValue InternalError *) (* checkDefinedIds : none *) (* genMissingAtts : none *) (* makeAttValue : AttValue InternalError *) (*--------------------------------------------------------------------------*) functor DtdAttributes (structure Dtd : Dtd structure Entities : Entities structure ParserOptions : ParserOptions) = struct structure DtdDeclare = DtdDeclare (structure Dtd = Dtd structure Entities = Entities structure ParserOptions = ParserOptions) open UniChar UniClasses UtilList Base Dtd DtdDeclare Errors Entities HookData ParserOptions val THIS_MODULE = "DtdAttributes" exception AttValue of AppData (*--------------------------------------------------------------------*) (* this is the list of language codes in ISO 639. *) (*--------------------------------------------------------------------*) val iso639codes = Vector.fromList ["AA","AB","AF","AM","AR","AS","AY","AZ", "BA","BE","BG","BH","BI","BN","BO","BR", "CA","CO","CS","CY", "DA","DE","DZ", "EL","EN","EO","ES","ET","EU", "FA","FI","FJ","FO","FR","FY", "GA","GD","GL","GN","GU", "HA","HE","HI","HR","HU","HY", "IA","ID","IE","IK","IN","IS","IT","IU","IW", "JA","JI","JW", "KA","KK","KL","KM","KN","KO","KS","KU","KY", "LA","LN","LO","LT","LV", "MG","MI","MK","ML","MN","MO","MR","MS","MT","MY", "NA","NE","NL","NO", "OC","OM","OR", "PA","PL","PS","PT", "QU", "RM","RN","RO","RU","RW", "SA","SD","SG","SH","SI","SK","SL","SM","SN","SO","SQ","SR","SS","ST","SU","SV","SW", "TA","TE","TG","TH","TI","TK","TL","TN","TO","TR","TS","TT","TW", "UG","UK","UR","UZ", "VI","VO", "WO", "XH", "YI","YO", "ZA","ZH","ZU"] (*--------------------------------------------------------------------*) (* a two-dimensional field [0..25][0..25] of booleans for ISO 639. *) (*--------------------------------------------------------------------*) val iso639field = let val arr = Array.tabulate(26,fn _ => Array.array(26,false)) val _ = Vector.map (fn s => Array.update(Array.sub(arr,ord(String.sub(s,0))-65), ord(String.sub(s,1))-65, true)) iso639codes in Vector.tabulate(26,fn i => Array.extract (Array.sub(arr,i),0,NONE)) end (*--------------------------------------------------------------------*) (* for a letter, compute ord(toUpper c)-ord(#"A"), for subscripting. *) (*--------------------------------------------------------------------*) val toUpperMask = Chars.notb(0wx20) fun cIndex c = Chars.toInt(Chars.andb(c,toUpperMask)-0wx41) (*--------------------------------------------------------------------*) (* are these two letters an ISO 639 code? *) (*--------------------------------------------------------------------*) fun isIso639 (c1,c2) = if !O_CHECK_ISO639 then Vector.sub(Vector.sub(iso639field,cIndex c1),cIndex c2) handle Subscript => false else isAsciiLetter c1 andalso isAsciiLetter c2 (*--------------------------------------------------------------------*) (* does this match Subcode ('-' Subcode)* ? *) (* is this a sequence of ('-' Subcode) ? *) (* Iana codes and user codes also end on ([a-z] | [A-Z])+ *) (*--------------------------------------------------------------------*) fun isSubcode' nil = false | isSubcode' (c::cs) = let fun doit nil = true | doit (c::cs) = if c=0wx2D then isSubcode' cs else isAsciiLetter c andalso doit cs in isAsciiLetter c andalso doit cs end fun isSubcode nil = true | isSubcode (c::cs) = c=0wx2D andalso isSubcode' cs val isIanaUser = isSubcode' (*--------------------------------------------------------------------*) (* Check whether a "xml:lang" attribute matches the LanguageID *) (* production. 2.12: *) (* *) (* [33] LanguageID ::= Langcode ('-' Subcode)* *) (* [34] Langcode ::= ISO639Code | IanaCode | UserCode *) (* [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z]) *) (* [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+ *) (* [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+ *) (* [38] Subcode ::= ([a-z] | [A-Z])+ *) (* *) (* print an error and raise AttValue if the "xml:lang" attribute does *) (* not have a valid value. *) (*--------------------------------------------------------------------*) fun checkAttSpec (a,q) (aidx,cs) = if !O_CHECK_LANGID andalso aidx=xmlLangIdx then let val valid = case cs of c::0wx2D::cs' => (c=0wx49 orelse c=0wx69 orelse c=0wx58 orelse c=0wx78) andalso isIanaUser cs' | c1::c2::cs' => isIso639 (c1,c2) andalso isSubcode cs' | _ => false in if valid then a else raise AttValue(hookError(a,(getPos q,ERR_ATT_IS_NOT(cs,IT_LANG_ID)))) end else a (*--------------------------------------------------------------------*) (* Normalize an attribute value of type other than CDATA, and split *) (* it into tokens at space characters. Cf. 3.3.3: *) (* *) (* ... If the declared value is not CDATA, then the XML processor *) (* must further process the normalized attribute value by dis- *) (* carding any leading and trailing space (#x20) characters, and by *) (* replacing sequences of space (#x20) characters by a single space *) (* (#x20) character. *) (* *) (* replacement of references is already done when parsing the literal,*) (* thus we need only do whitespace normalization. we don't need to *) (* take care of the 3rd rule since replacement of sequences of #x20 *) (* and then splitting subsumes its effect. *) (* *) (* return the list of tokens as character lists and the normalized *) (* value as a char vector. *) (*--------------------------------------------------------------------*) fun splitAttValue av = let fun doOne nil = (nil,nil,nil) | doOne (c::cs) = if c=0wx20 then let val (toks,ys) = doAll true cs in (nil,toks,ys) end else let val (tok,toks,ys) = doOne cs in ((c::tok),toks,c::ys) end and doAll addS nil = (nil,nil) | doAll addS (c::cs) = if c=0wx20 then doAll addS cs else let val (tok,toks,ys) = doOne cs in ((c::tok)::toks, if addS then 0wx20::c::ys else c::ys) end val (tokens,normed) = doAll false av in (Data2Vector normed,tokens) end (*--------------------------------------------------------------------*) (* normalize an attribute value other than CDATA according to 3.3.3. *) (* *) (* return the normalized att value as a Vector. *) (*--------------------------------------------------------------------*) fun normAttValue av = let fun doOne nil = nil | doOne (c::cs) = if c=0wx20 then doAll true cs else c::doOne cs and doAll addS nil = nil | doAll addS (c::cs) = if c=0wx20 then doAll addS cs else let val ys = doOne cs in if addS then 0wx20::c::ys else c::ys end val normed = doAll false av in Data2Vector normed end (*--------------------------------------------------------------------*) (* Check whether a sequence of chars forms a name (token). *) (*--------------------------------------------------------------------*) fun isNmToken cs = List.all isName cs fun isaName nil = false | isaName (c::cs) = isNms c andalso List.all isName cs (*--------------------------------------------------------------------*) (* Check whether a list of tokens is a single what fulfilling isWhat. *) (* print an error and raise AttValue if it is not. *) (*--------------------------------------------------------------------*) fun checkOne (isWhat,what,detail) (a,q) toks = case toks of nil => raise AttValue (hookError(a,(getPos q,ERR_EXACTLY_ONE detail))) | [one] => if isWhat one then one else raise AttValue(hookError(a,(getPos q,ERR_ATT_IS_NOT(one,what)))) | more => raise AttValue(hookError(a,(getPos q,ERR_AT_MOST_ONE detail))) (*--------------------------------------------------------------------*) (* Check whether a list of tokens is non-empty and all elements ful- *) (* fil isWhat. *) (* print an error and raise AttValue if not. *) (*--------------------------------------------------------------------*) fun checkList (isWhat,what,detail) (a,q) toks = case toks of nil => raise AttValue (hookError(a,(getPos q,ERR_AT_LEAST_ONE detail))) | _ => app (fn one => if isWhat one then () else let val err = ERR_ATT_IS_NOT(one,what) in raise AttValue(hookError(a,(getPos q,err))) end) toks (*--------------------------------------------------------------------*) (* Convert a list of tokens into an ID att value. 3.3.1: *) (* *) (* Validity Constraint: ID *) (* Values of type ID must match the Name production. *) (* *) (* Validity Constraint: ID *) (* ... A name must not appear more than once in an XML document as *) (* a value of this type; i.e., ID values must uniquely identify the *) (* elements which bear them. *) (* *) (* mark the value as used, print an error and raise AttValue if it *) (* was already used. *) (* print an error and raise AttValue if it is not a name. *) (*--------------------------------------------------------------------*) fun takeId (dtd,inDtd) (a,q) toks = let val one = checkOne (isaName,IT_NAME,IT_ID_NAME) (a,q) toks val idx = Id2Index dtd one val _ = if inDtd then () else let val (decl,refs) = getId dtd idx in if decl then let val err = ERR_REPEATED_ID one in raise AttValue (hookError(a,(getPos q,err))) end else setId dtd (idx,(true,refs)) end in (SOME(AV_ID idx),a) end (*--------------------------------------------------------------------*) (* Convert a list of tokens into an IDREF/IDREFS att value. 3.3.1: *) (* *) (* Validity Constraint: IDREF *) (* Values of type IDREF must match the Name production. *) (* *) (* print an error an raise AttValue if it is not a (list of) name(s). *) (*--------------------------------------------------------------------*) fun setIdRef (dtd,q) idx = let val (decl,refs) = getId dtd idx in setId dtd (idx,(decl,getPos q::refs)) end fun takeIdref (dtd,_) (a,q) toks = let val one = checkOne (isaName,IT_NAME,IT_ID_NAME) (a,q) toks val idx=Id2Index dtd one val _ = setIdRef (dtd,q) idx in (SOME(AV_IDREF idx),a) end fun takeIdrefs (dtd,_) (a,q) toks = let val _ = checkList (isaName,IT_NAME,IT_ID_NAME) (a,q) toks val idxs = map (Id2Index dtd) toks val _ = app (setIdRef (dtd,q)) idxs in (SOME(AV_IDREFS idxs),a) end (*--------------------------------------------------------------------*) (* Convert a list of tokens into an ENTITY/IES att value. 3.3.1: *) (* *) (* Validity Constraint: Entity Name *) (* Values of type ENTITY must match the Name production... *) (* must match the name of an unparsed entity declared in the DTD. *) (* *) (* print an error and raise AttValue if a token is not a name. *) (* print an error and raise AttValue if an entity is undeclared or a *) (* parsed entity. *) (*--------------------------------------------------------------------*) fun checkEntity (dtd,inDtd) (a,q) name = let val idx = GenEnt2Index dtd name val (ent,_) = getGenEnt dtd idx val _ = if inDtd then () else case ent of GE_UNPARSED _ => () | GE_NULL => let val err = ERR_UNDECLARED(IT_GEN_ENT,name,LOC_NONE) in raise AttValue (hookError(a,(getPos q,err))) end | _ => let val err = ERR_MUST_BE_UNPARSED(name,LOC_NONE) in raise AttValue (hookError(a,(getPos q,err))) end in idx end fun takeEntity (dtd,inDtd) (aq as (a,_)) toks = let val one = checkOne (isaName,IT_NAME,IT_ENT_NAME) aq toks val idx = checkEntity (dtd,inDtd) aq one in (SOME(AV_ENTITY idx),a) end fun takeEntities (dtd,inDtd) (aq as (a,_)) toks = let val _ = checkList (isaName,IT_NAME,IT_ENT_NAME) aq toks val idxs = map (checkEntity (dtd,inDtd) aq) toks in (SOME(AV_ENTITIES idxs),a) end (*--------------------------------------------------------------------*) (* Convert a list of tokens into a NOTATION att value. 3.3.1: *) (* *) (* Validity Constraint: Notation Attributes *) (* Values of this type must match one of the notation names *) (* included in the declaration. *) (* *) (* print an error and raise AttValue if it is not a single name. *) (* print an error and raise AttValue if the notation's index is not *) (* in the list given as 1st arg. *) (*--------------------------------------------------------------------*) fun takeNotation is (dtd,inDtd) (aq as (a,q)) toks = let val one = checkOne (isaName,IT_NAME,IT_NOT_NAME) aq toks val idx = AttNot2Index dtd one val _ = if member idx is then () else let val nots = map (Index2AttNot dtd) is val err = ERR_MUST_BE_AMONG(IT_NOT_NAME,one,nots) in raise AttValue (hookError(a,(getPos q,err))) end in (SOME(AV_NOTATION(is,idx)),a) end (*--------------------------------------------------------------------*) (* Convert a list of tokens into an enumerated att value. 3.3.1: *) (* *) (* Validity Constraint: Enumeration *) (* Values of this type must match one of the Nmtoken tokens in *) (* the declaration. *) (* *) (* print an error and raise AttValue if it is not a single name token.*) (* print an error and raise AttValue if the token's index is not *) (* in the list given as 1st arg. *) (*--------------------------------------------------------------------*) fun takeGroup is (dtd,_) (aq as (a,q)) toks = let val one = checkOne (isNmToken,IT_NMTOKEN,IT_NMTOKEN) aq toks val idx = AttNot2Index dtd one val _ = if member idx is then () else let val toks = map (Index2AttNot dtd) is val err = ERR_MUST_BE_AMONG(IT_NMTOKEN,one,toks) in raise AttValue (hookError(a,(getPos q,err))) end in (SOME(AV_GROUP(is,idx)),a) end (*--------------------------------------------------------------------*) (* Given an attribute type and a list of characters, construct the *) (* corresponding AttValue. *) (* *) (* print an error (and possibly raise AttValue) if the attribute *) (* is ill-formed. *) (*--------------------------------------------------------------------*) fun makeAttValue dtd (a,q) (aidx,attType,ext,inDtd,cs) = if attType=AT_CDATA then let val cv = Data2Vector cs in if !O_VALIDATE andalso hasDtd dtd then (cv,(SOME(AV_CDATA cv),checkAttSpec (a,q) (aidx,cs))) else (cv,(NONE,a)) end else if !O_VALIDATE andalso hasDtd dtd then let val a1 = checkAttSpec (a,q) (aidx,cs) val (cv,toks) = splitAttValue cs val a2 = if ext andalso standsAlone dtd then let val cdata = Data2Vector cs in if cdata=cv then a1 else let val err = ERR_STANDALONE_NORM(Index2AttNot dtd aidx) val _ = setStandAlone dtd (not (!O_ERROR_MINIMIZE)) in hookError(a1,(getPos q,err)) end end else a1 in case attType of AT_NMTOKEN => (cv,(SOME(AV_NMTOKEN(checkOne(isNmToken,IT_NMTOKEN, IT_NMTOKEN) (a2,q) toks)),a2)) | AT_NMTOKENS => (cv,(SOME(AV_NMTOKENS toks),a2)) before checkList(isNmToken,IT_NMTOKEN,IT_NMTOKEN) (a2,q) toks | AT_ID => (cv,takeId (dtd,inDtd) (a2,q) toks) | AT_IDREF => (cv,takeIdref (dtd,inDtd) (a2,q) toks) | AT_IDREFS => (cv,takeIdrefs (dtd,inDtd) (a2,q) toks) | AT_ENTITY => (cv,takeEntity (dtd,inDtd) (a2,q) toks) | AT_ENTITIES => (cv,takeEntities (dtd,inDtd) (a2,q) toks) | AT_GROUP is => (cv,takeGroup is (dtd,inDtd) (a2,q) toks) | AT_NOTATION is => (cv,takeNotation is (dtd,inDtd) (a2,q) toks) | AT_CDATA => raise InternalError(THIS_MODULE,"makeAttValue", "AT_CDATA in the innermost case") end else (normAttValue cs,(NONE,a)) (*--------------------------------------------------------------------*) (* given an attribute value literal and the attribute type, generate *) (* the AttValue, and check whether it complies with its default value.*) (* If yes, make an AttPresent value out of it. *) (* See 3.3.2: *) (* *) (* Validity Constraint: Fixed Attribute Default *) (* If an attribute has a default value declared with the #FIXED *) (* keyword, instances of that attribute must match the default *) (* value. *) (* *) (* print an error and raise AttValue if the attribute value doesn't *) (* comply. *) (* *) (* return the value as a AttPresent value. *) (*--------------------------------------------------------------------*) fun checkAttValue dtd (a,q) ((aidx,attType,defVal,ext),literal,cs) = let val (cv,(av,a1)) = makeAttValue dtd (a,q) (aidx,attType,ext,false,cs) in if !O_VALIDATE andalso hasDtd dtd then case defVal of AD_FIXED((def,cv',_),_) => if cv=cv' then (AP_PRESENT(literal,cv,av),a1) else raise AttValue (hookError(a1,(getPos q,ERR_FIXED_VALUE(Index2AttNot dtd aidx,cv,cv')))) | _ => (AP_PRESENT(literal,cv,av),a1) else (AP_PRESENT(literal,cv,av),a1) end (*--------------------------------------------------------------------*) (* check a defaulted attribute value for validity. *) (* *) (* since the lexical constraints are checked when the default is *) (* declared we only need to check whether notations are declared and *) (* entities are declared and unparsed. An ID attribute cannot be *) (* defaulted, so no need to check for duplicate ID attributes. *) (*--------------------------------------------------------------------*) fun checkDefaultValue dtd (a,q,pos) av = let fun checkEntity (idx,a) = let val (ent,_) = getGenEnt dtd idx in case ent of GE_UNPARSED _ => a | GE_NULL => hookError(a,(getPos q,ERR_UNDECLARED (IT_GEN_ENT,Index2GenEnt dtd idx, LOC_ATT_DEFAULT pos))) | _ => hookError(a,(getPos q,ERR_MUST_BE_UNPARSED (Index2GenEnt dtd idx,LOC_ATT_DEFAULT pos))) end fun checkNotation (idx,a) = if hasNotation dtd idx then a else hookError(a,(getPos q,ERR_UNDECLARED (IT_NOTATION,Index2AttNot dtd idx,LOC_ATT_DEFAULT pos))) in case av of SOME(AV_ENTITY i) => checkEntity (i,a) | SOME(AV_ENTITIES is) => foldl checkEntity a is | SOME(AV_NOTATION(_,i)) => checkNotation(i,a) | _ => a end (*--------------------------------------------------------------------*) (* Generate the attributes not specified in a start-tag, the defs of *) (* these atts and the specified atts given as argument. 3.3.2: *) (* *) (* If the declaration is neither #REQUIRED nor #IMPLIED, then the *) (* AttValue value contains the declared default value; ... If a *) (* default value is declared, when an XML processor encounters an *) (* omitted attribute, it is to behave as though the attribute were *) (* present with the declared default value. *) (* *) (* Validity Constraint: Required Attribute *) (* If the default declaration is the keyword #REQUIRED, then the *) (* attribute must be specified for all elements of the type in the *) (* attribute-list declaration. *) (* *) (* print an error if a required attribute was omitted. *) (* *) (* return the AttSpecList of all attributes for this tag. *) (*--------------------------------------------------------------------*) fun genMissingAtts dtd (a,q) (defs,specd) = let fun default a (idx,(v as (_,_,av),(pos,checked)),ext) = let val a1 = if ext andalso !O_VALIDATE andalso standsAlone dtd then let val err = ERR_STANDALONE_DEF(Index2AttNot dtd idx) val _ = setStandAlone dtd (not (!O_ERROR_MINIMIZE)) in hookError(a,(getPos q,err)) end else a val a2 = if !O_VALIDATE andalso not (!checked andalso !O_ERROR_MINIMIZE) then checkDefaultValue dtd (a1,q,pos) av before checked := true else a1 in (AP_DEFAULT v,a1) end fun doit a nil = (specd,a) | doit a ((idx,_,dv,ext)::rest) = let val (value,a1) = case dv of AD_DEFAULT v => default a (idx,v,ext) | AD_FIXED v => default a (idx,v,ext) | AD_IMPLIED => (AP_IMPLIED,a) | AD_REQUIRED => let val a1 = if not (!O_VALIDATE) then a else hookError(a,(getPos q, ERR_MISSING_ATT(Index2AttNot dtd idx))) in (AP_MISSING,a1) end val (other,a2) = doit a1 rest in ((idx,value,NONE)::other,a2) end in doit a defs end (*--------------------------------------------------------------------*) (* process an undeclared attribute in a start-tag. *) (* At option, an error message is generated only once for the same *) (* attribute and element. *) (* *) (* possibly print an error. *) (* *) (* return nothing. *) (*--------------------------------------------------------------------*) fun handleUndeclAtt dtd (a,q) (aidx,att,eidx,elem) = if !O_ERROR_MINIMIZE then let val {decl,atts,errAtts} = getElement dtd eidx in if member aidx errAtts then a else let val a1 = if !O_VALIDATE andalso hasDtd dtd then let val err = ERR_UNDECL_ATT(att,elem) in hookError(a,(getPos q,err)) end else a val a2 = checkAttName (a1,q) att val _ = setElement dtd (eidx,{decl = decl, atts = atts, errAtts = aidx::errAtts}) in a2 end end else let val a1 = if !O_VALIDATE andalso hasDtd dtd then hookError(a,(getPos q,ERR_UNDECL_ATT(att,elem))) else a in checkAttName (a1,q) att end end (* stop of ../../Parser/Dtd/dtdAttributes.sml *) (* start of ../../Parser/Dtd/dtdManager.sml *) (*--------------------------------------------------------------------------*) (* Structure: Dtd *) (* *) (* Depends on: *) (* UniChar *) (* DtdAttributes *) (* DtdElements *) (* DtdEntities *) (* DtdNotations *) (* DtdStandalone *) (* *) (* Exceptions raised by functions in this structure: *) (* initDtdTables : none *) (* AttIdx2String : NoSuchSymbol *) (* ElemIdx2String : NoSuchIndex *) (* GenEntIdx2String : NoSuchIndex *) (* IdIdx2String : NoSuchIndex *) (* NotIdx2String : NoSuchIndex *) (* GenEntity2String : NoSuchIndex *) (* ElemInfo2String : NoSuchIndex NoSuchSymbol *) (* printGenEntTable : NoSuchIndex *) (* printElementTable : NoSuchIndex NoSuchSymbol *) (* printDtdTables : NoSuchIndex NoSuchSymbol *) (*--------------------------------------------------------------------------*) signature DtdManager = sig include Entities include Dtd exception AttValue of AppData val makeAttValue : Dtd -> AppData * State -> int * Base.AttType * bool * bool * UniChar.Data -> UniChar.Vector * (Base.AttValue option * AppData) val checkAttValue : Dtd -> AppData * State -> Base.AttDef * UniChar.Vector * UniChar.Data -> HookData.AttPresent * AppData val genMissingAtts : Dtd -> AppData * State -> Base.AttDefList * HookData.AttSpecList -> HookData.AttSpecList * AppData val handleUndeclAtt : Dtd -> AppData * State -> int * UniChar.Data * int * UniChar.Data -> AppData val handleUndeclElement : Dtd -> int -> Base.ElemInfo val checkAttName : AppData * State -> UniChar.Data -> AppData val checkElemName : AppData * State -> UniChar.Data -> AppData val checkDefinedIds : Dtd -> AppData * State -> AppData val checkMultEnum : Dtd -> AppData * State -> AppData val checkPreDefined : Dtd -> AppData * State -> AppData val checkUnparsed : Dtd -> AppData -> AppData val enterAttList : Dtd -> AppData * State -> int -> AppData val addAttribute : Dtd -> AppData * State -> int * Base.AttDef -> AppData val addElement : Dtd -> AppData * State -> int * Base.ContentSpec * bool -> AppData val addGenEnt : Dtd -> AppData * State -> int * Base.GenEntity * bool -> AppData val addNotation : Dtd -> AppData * State -> int * Base.ExternalId -> AppData val addParEnt : Dtd -> AppData * State -> int * Base.ParEntity * bool -> AppData end functor DtdManager (structure Dtd : Dtd structure Hooks : Hooks structure ParserOptions : ParserOptions) : DtdManager = struct structure Entities = Entities (structure Hooks = Hooks) structure DtdAttributes = DtdAttributes (structure Dtd = Dtd structure Entities = Entities structure ParserOptions = ParserOptions) open Dtd DtdAttributes end (* stop of ../../Parser/Dtd/dtdManager.sml *) (* start of ../../Parser/Parse/parseBase.sml *) signature ParseBase = sig include Dfa DtdManager Resolve DfaOptions ParserOptions exception NoSuchChar of AppData * State exception NoSuchEntity of AppData * State exception NotFound of UniChar.Char * AppData * State exception SyntaxError of UniChar.Char * AppData * State val expectedOrEnded : Errors.Expected * Errors.Location -> UniChar.Char -> Errors.Error val recoverXml : UniChar.Char * AppData * State -> UniChar.Char * AppData * State val recoverETag : UniChar.Char * AppData * State -> UniChar.Char * AppData * State val recoverSTag : UniChar.Char * AppData * State -> bool * (UniChar.Char * AppData * State) val recoverDecl : bool -> UniChar.Char * AppData * State -> (UniChar.Char * AppData * State) val useParamEnts : unit -> bool end (*--------------------------------------------------------------------------*) (* Structure: ParseBase *) (*--------------------------------------------------------------------------*) (* This structure provides exceptions for the Parse functions, and strings *) (* for error generation (these strings don't really need to reside in their *) (* own structure, but like this the code is more easier to read). *) (*--------------------------------------------------------------------------*) functor ParseBase (structure Dtd : Dtd structure Hooks : Hooks structure Resolve : Resolve structure ParserOptions : ParserOptions) : ParseBase = struct structure DfaOptions = ParserOptions.DfaOptions structure Dfa = Dfa (structure DfaOptions = DfaOptions) structure DtdManager = DtdManager (structure Dtd = Dtd structure Hooks = Hooks structure ParserOptions = ParserOptions) open Base DtdManager DfaOptions Dfa Errors ParserOptions Resolve UniChar exception NoSuchChar of AppData * State exception NoSuchEntity of AppData * State exception NotFound of UniChar.Char * AppData * State exception SyntaxError of UniChar.Char * AppData * State fun expectedOrEnded (exp,ended) c = if c=0wx00 then ERR_ENDED_BY_EE ended else ERR_EXPECTED(exp,[c]) (*--------------------------------------------------------------------*) (* Besides "?>" also recognize ">" as end delimiter, because the typo *) (* might be an omitted "?". Also stop on "<"; then the entire "?>" *) (* was omitted; the "<" may not be consumed then. *) (* Within literals dont recognize ">" and "<", but only "?>"; then *) (* the typo is an omitted quote character. *) (*--------------------------------------------------------------------*) fun recoverXml caq = let fun do_lit ch (c,a,q) = case c of 0wx00 => (c,a,q) | 0wx3F (* #"?" *) => let val (c1,a1,q1) = getChar (a,q) in if c1=0wx3E (* #">" *) then (c1,a1,q1) else do_lit ch (c1,a1,q1) end | _ => if c=ch then (getChar (a,q)) else do_lit ch (getChar (a,q)) fun doit (c,a,q) = case c of 0wx00 => (c,a,q) | 0wx22 (* #""""*) => doit (do_lit c (getChar (a,q))) | 0wx25 (* #"%" *) => (c,a,q) | 0wx26 (* #"&" *) => (c,a,q) | 0wx27 (* #"'" *) => doit (do_lit c (getChar (a,q))) | 0wx3C (* #"<" *) => (c,a,q) | 0wx3E (* #">" *) => (getChar (a,q)) | _ => doit (getChar (a,q)) in doit caq end fun recoverETag caq = let fun do_lit ch (c,a,q) = case c of 0wx00 => (c,a,q) | _ => if c=ch then (getChar (a,q)) else do_lit ch (getChar (a,q)) fun doit (c,a,q) = case c of 0wx00 => (c,a,q) | 0wx22 (* #""""*) => doit (do_lit c (getChar (a,q))) | 0wx26 (* #"&" *) => (c,a,q) | 0wx27 (* #"'" *) => doit (do_lit c (getChar (a,q))) | 0wx3E (* #">" *) => (getChar (a,q)) | 0wx3C (* #"<" *) => (c,a,q) | _ => doit (getChar (a,q)) in doit caq end fun recoverSTag caq = let fun do_lit ch (c,a,q) = case c of 0wx00 => (c,a,q) | _ => if c=ch then (getChar (a,q)) else do_lit ch (getChar (a,q)) fun doit (c,a,q) = case c of 0wx00 => (false,(c,a,q)) | 0wx22 (* #""""*) => doit (do_lit c (getChar (a,q))) | 0wx26 (* #"&" *) => (false,(c,a,q)) | 0wx27 (* #"'" *) => doit (do_lit c (getChar (a,q))) | 0wx2F (* #"/" *) => let val (c1,a1,q1) = getChar (a,q) in if c1=0wx3E (* #">" *) then (true,(c1,a1,q1)) else doit (c1,a1,q1) end | 0wx3E (* #">" *) => (false,getChar (a,q)) | 0wx3C (* #"<" *) => (false,(c,a,q)) | _ => doit (getChar (a,q)) in doit caq end fun recoverDecl hasSubset caq = let fun do_lit ch (c,a,q) = if c=0wx00 then (c,a,q) else if c=ch then getChar (a,q) else do_lit ch (getChar(a,q)) fun do_decl (c,a,q) = case c of 0wx00 => (c,a,q) | 0wx22 (* #"\""*) => do_decl (do_lit c (getChar (a,q))) | 0wx27 (* #"'" *) => do_decl (do_lit c (getChar (a,q))) | 0wx3E (* #">" *) => getChar (a,q) | _ => do_decl (getChar (a,q)) fun do_subset (c,a,q) = case c of 0wx00 => (c,a,q) | 0wx3C (* #"<" *) => do_subset (do_decl (getChar (a,q))) | 0wx5D (* #"]" *) => getChar (a,q) | _ => do_subset (getChar (a,q)) fun doit (c,a,q) = case c of 0wx00 => if isSpecial q then (c,a,q) else doit (getChar (a,q)) | 0wx22 (* #"\""*) => doit (do_lit c (getChar (a,q))) | 0wx25 (* #"%" *) => if hasSubset then (c,a,q) else doit (getChar (a,q)) | 0wx27 (* #"'" *) => doit (do_lit c (getChar (a,q))) | 0wx3C (* #"<" *) => (c,a,q) | 0wx3E (* #">" *) => getChar (a,q) | 0wx5B (* #"[" *) => if hasSubset then doit (do_subset (getChar (a,q))) else doit (getChar (a,q)) | _ => doit (getChar (a,q)) in doit caq end fun useParamEnts() = !O_VALIDATE orelse !O_INCLUDE_PARAM_ENTS end (* stop of ../../Parser/Parse/parseBase.sml *) (* start of ../../Parser/Parse/parseNames.sml *) signature ParseNames = sig include ParseBase val parseName : UniChar.Char * AppData * State -> UniChar.Data * (UniChar.Char * AppData * State) val parseNmtoken : UniChar.Char * AppData * State -> UniChar.Data * (UniChar.Char * AppData * State) val parseNameLit : UniChar.Data -> UniChar.Char * AppData * State -> UniChar.Data * UniChar.Data * (UniChar.Char * AppData * State) val parseEntName : UniChar.Data * UniChar.Data -> UniChar.Char * AppData * State -> bool * UniChar.Data * UniChar.Data * (UniChar.Char * AppData * State) end (*--------------------------------------------------------------------------*) (* Structure: ParseNames *) (* *) (* Exceptions raised by functions in this structure: *) (* parseEntName : none *) (* parseName : NotFound *) (* parseNmtoken : NotFound *) (*--------------------------------------------------------------------------*) functor ParseNames (structure ParseBase : ParseBase) : ParseNames = struct open Errors ParseBase UniClasses (*--------------------------------------------------------------------*) (* parse (the remainder of) a name or nmtoken. *) (* *) (* [5] Name ::= (Letter | '_' | ':') (NameChar)* *) (* *) (* raise NotFound if no name/name start character comes first. *) (* *) (* return the name as a list of characters, together with the next *) (* character and the remaining state. *) (*--------------------------------------------------------------------*) fun parseName' (c,a,q) = if isName c then let val (cs,caq1) = parseName'(getChar(a,q)) in (c::cs,caq1) end else (nil,(c,a,q)) fun parseName (c,a,q) = if isNms c then let val (cs,caq1) = parseName'(getChar(a,q)) in (c::cs,caq1) end else raise NotFound(c,a,q) fun parseNmtoken (c,a,q) = if isName c then let val (cs,caq1) = parseName'(getChar(a,q)) in (c::cs,caq1) end else raise NotFound(c,a,q) (*--------------------------------------------------------------------*) (* parse a name, additionally accumulating its characters in reverse *) (* order to the first argument. *) (* *) (* raise NotFound if no name/name start character comes first. *) (*--------------------------------------------------------------------*) fun parseNameLit cs (c,a,q) = let fun doit (cs,ns) (c,a,q) = if isName c then doit (c::cs,c::ns) (getChar(a,q)) else (cs,rev ns,(c,a,q)) in if isNms c then doit (c::cs,[c]) (getChar(a,q)) else raise NotFound(c,a,q) end (*--------------------------------------------------------------------*) (* parse a name, accumulating its reverse in the first arg text. This *) (* is useful for parsing of entity values, where entity references *) (* are parsed but bypassed, and must thus be accumulated together *) (* the other literal text. *) (* *) (* print an error if no name/name start character comes first. *) (* *) (* return a boolean indicating whether a name was found, the reverse *) (* name as a list of characters, concatenated with the text in the *) (* first arg, together with the next character and remaining state. *) (*--------------------------------------------------------------------*) fun parseEntName (lit,text) (c,a,q) = let fun doit (lit,text) (c,a,q) = if isName c then doit (c::lit,c::text) (getChar (a,q)) else (true,lit,text,(c,a,q)) in if isNms c then doit (c::lit,c::text) (getChar (a,q)) else let val a1 = hookError(a,(getPos q,ERR_EXPECTED(expAnEntName,[c]))) in (false,lit,text,(c,a1,q)) end end end (* stop of ../../Parser/Parse/parseNames.sml *) (* start of ../../Parser/Parse/parseMisc.sml *) signature ParseMisc = sig (*---------------------------------------------------------------------- include ParseBase val parseName : UniChar.Char * AppData * State -> UniChar.Data * (UniChar.Char * AppData * State) val parseNmtoken : UniChar.Char * AppData * State -> UniChar.Data * (UniChar.Char * AppData * State) val parseNameLit : UniChar.Data -> UniChar.Char * AppData * State -> UniChar.Data * UniChar.Data * (UniChar.Char * AppData * State) val parseEntName : UniChar.Data * UniChar.Data -> UniChar.Char * AppData * State -> bool * UniChar.Data * UniChar.Data * (UniChar.Char * AppData * State) ----------------------------------------------------------------------*) include ParseNames val skipS : UniChar.Char * AppData * State -> UniChar.Char * AppData * State val skipSopt : UniChar.Char * AppData * State -> UniChar.Char * AppData * State val skipSmay : UniChar.Char * AppData * State -> bool * (UniChar.Char * AppData * State) val parseSopt : UniChar.Data -> UniChar.Char * AppData * State -> UniChar.Data * (UniChar.Char * AppData * State) val parseSmay : UniChar.Data -> UniChar.Char * AppData * State -> bool * (UniChar.Data * (UniChar.Char * AppData * State)) val skipEq : UniChar.Char * AppData * State -> UniChar.Char * AppData * State val parseEq : UniChar.Char * AppData * State -> UniChar.Data * (UniChar.Char * AppData * State) val parseComment : Errors.Position -> AppData * State -> (UniChar.Char * AppData * State) val parseProcInstr : Errors.Position -> AppData * State -> (UniChar.Char * AppData * State) end (*--------------------------------------------------------------------------*) (* Structure: ParseMisc *) (* *) (* Exceptions raised by functions in this structure: *) (* skipS : none *) (* skipSopt : none *) (* skipSmay : none *) (* skipEq : SyntaxError *) (* skipComment : none *) (* parseComment : none *) (* parseProcInstr : none *) (*--------------------------------------------------------------------------*) functor ParseMisc (structure ParseBase : ParseBase) : ParseMisc = struct structure ParseNames = ParseNames (structure ParseBase = ParseBase) open UniChar Errors ParseNames (*--------------------------------------------------------------------*) (* parse a sequence of white space. 2.3: *) (* *) (* [3] S ::= (#x20 | #x9 | #xD | #xA)+ *) (*--------------------------------------------------------------------*) (* parse optional white space. *) (*--------------------------------------------------------------------*) (* Return type: Char * AppData * State *) (*--------------------------------------------------------------------*) fun skipSopt (c,a,q) = case c of 0wx09 => skipSopt (getChar (a,q)) | 0wx0A => skipSopt (getChar (a,q)) | 0wx20 => skipSopt (getChar (a,q)) | _ => (c,a,q) fun parseSopt cs (c,a,q) = case c of 0wx09 => parseSopt (c::cs) (getChar (a,q)) | 0wx0A => parseSopt (c::cs) (getChar (a,q)) | 0wx20 => parseSopt (c::cs) (getChar (a,q)) | _ => (cs,(c,a,q)) (*--------------------------------------------------------------------*) (* parse optional white space. *) (*--------------------------------------------------------------------*) (* Return type: bool * (Char * AppData * State) *) (* the bool indicates whether white space was found or not. *) (*--------------------------------------------------------------------*) fun skipSmay (c,a,q) = case c of 0wx09 => (true,skipSopt (getChar (a,q))) | 0wx0A => (true,skipSopt (getChar (a,q))) | 0wx20 => (true,skipSopt (getChar (a,q))) | _ => (false,(c,a,q)) fun parseSmay cs (c,a,q) = case c of 0wx09 => (true,parseSopt (c::cs) (getChar (a,q))) | 0wx0A => (true,parseSopt (c::cs) (getChar (a,q))) | 0wx20 => (true,parseSopt (c::cs) (getChar (a,q))) | _ => (false,(cs,(c,a,q))) (*--------------------------------------------------------------------*) (* parse required white space. *) (*--------------------------------------------------------------------*) (* print an error if no white space character is found. *) (*--------------------------------------------------------------------*) (* Return type: Char * AppData * State *) (*--------------------------------------------------------------------*) fun skipS (c,a,q) = case c of 0wx09 => skipSopt (getChar (a,q)) | 0wx0A => skipSopt (getChar (a,q)) | 0wx20 => skipSopt (getChar (a,q)) | _ => (c,hookError(a,(getPos q,ERR_MISSING_WHITE)),q) (*--------------------------------------------------------------------*) (* parse a "=" together with surrounding white space. Cf. 28: *) (* *) (* [25] Eq ::= S? '=' S? *) (*--------------------------------------------------------------------*) (* Raises: *) (* SyntaxError if no "=" is found. *) (*--------------------------------------------------------------------*) (* Return type: Char * AppData * State *) (*--------------------------------------------------------------------*) fun skipEq caq = let val (c1,a1,q1) = skipSopt caq in if c1=0wx3D then skipSopt (getChar (a1,q1)) else let val a2 = hookError(a1,(getPos q1,ERR_EXPECTED(expEq,[c1]))) in raise SyntaxError(c1,a2,q1) end end fun parseEq caq = let val (cs1,(c1,a1,q1)) = parseSopt nil caq in if c1=0wx3D then let val (cs2,caq2)= parseSopt (c1::cs1) (getChar (a1,q1)) in (rev cs2,caq2) end else let val a2 = hookError(a1,(getPos q1,ERR_EXPECTED(expEq,[c1]))) in raise SyntaxError(c1,a2,q1) end end (*--------------------------------------------------------------------*) (* parse a comment, the initial "<--" already consumed. cf. 2.5: *) (* *) (* They are not part of the document's character data; an XML *) (* processor may, but need not, make it possible for an application *) (* to retrieve the text of comments. For compatibility, the string *) (* "--" (double-hyphen) must not occur within comments. *) (* *) (* [15] Comment ::= '' *) (*--------------------------------------------------------------------*) (* print an error and end the comment if an entity end is found. *) (* print an error if the comment contains "--". *) (*--------------------------------------------------------------------*) (* add the comment to the user data. *) (*--------------------------------------------------------------------*) (* Return type: Char * AppData * State *) (*--------------------------------------------------------------------*) fun parseComment startPos aq = let fun check_end yet (a0,q0) = let val (c,a,q) = getChar (a0,q0) in if c=0wx2D (* #"-" *) then let val (c1,a1,q1) = getChar (a,q) in if c1=0wx3E (* #">" *) then let val cs = Data2Vector(rev yet) val a2 = hookComment(a1,((startPos,getPos q1),cs)) in getChar(a2,q1) end else let val a2 = if not (!O_COMPATIBILITY) then a1 else hookError(a1,(getPos q0,ERR_FORBIDDEN_HERE (IT_DATA [c,c],LOC_COMMENT))) in doit (c::c::yet) (c1,a2,q1) end end else doit (0wx2D::yet) (c,a,q) end and doit yet (c,a,q) = if c=0wx2D (* #"-" *) then check_end yet (a,q) else if c<>0wx00 then doit (c::yet) (getChar (a,q)) else let val err = ERR_ENDED_BY_EE LOC_COMMENT val a1 = hookError(a,(getPos q,err)) val cs = Data2Vector(rev yet) val a2 = hookComment(a1,((startPos,getPos q),cs)) in (c,a2,q) end in doit nil (getChar aq) end (*--------------------------------------------------------------------*) (* check whether a name matches "xml", disregarding case, cf. 2.6: *) (* *) (* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) *) (* *) (* The target names "XML", "xml", and so on are reserved for *) (* standardization in this or future versions of this specification.*) (*--------------------------------------------------------------------*) (* print an error if it does match. *) (*--------------------------------------------------------------------*) (* Return type: AppData *) (*--------------------------------------------------------------------*) fun checkPiTarget (a,q) name = case name of [c1,c2,c3] => if ((c1=0wx58 orelse c1=0wx78) andalso (c2=0wx4D orelse c2=0wx6D) andalso (c3=0wx4C orelse c3=0wx6C)) then hookError(a,(getPos q,ERR_RESERVED(name,IT_TARGET))) else a | _ => a (*--------------------------------------------------------------------*) (* parse a processing instruction, the initial "' Char* )))? '?>'*) (* *) (* The first arg consists of the target and the (reversed) list of *) (* leading characters of the text that have been looked ahead. *) (*--------------------------------------------------------------------*) (* print an error and end the proc. instr. if an entity end is found. *) (*--------------------------------------------------------------------*) (* add the processing instruction to the user data. *) (*--------------------------------------------------------------------*) (* Return type: Char * AppData * State *) (*--------------------------------------------------------------------*) fun parseProcInstr' (startPos,target,txtPos,yetText) caq = let fun doit text (c1,a1,q1) = case c1 of 0wx00 => let val a2 = hookError(a1,(getPos q1,ERR_ENDED_BY_EE LOC_PROC)) in (text,getPos q1,(c1,a2,q1)) end | 0wx3F => (* #"?" *) let val (c2,a2,q2) = getChar (a1,q1) in case c2 of 0wx3E => (* #">" *) (text,getPos q2,getChar(a2,q2)) | _ => doit (c1::text) (c2,a2,q2) end | _ => doit (c1::text) (getChar (a1,q1)) val (cs,endPos,(c2,a2,q2)) = doit yetText caq val text = Data2Vector(rev cs) val a3 = hookProcInst(a2,((startPos,endPos),target,txtPos,text)) in (c2,a3,q2) end (*--------------------------------------------------------------------*) (* parse a processing instruction, the initial "' Char* )))? '?>'*) (*--------------------------------------------------------------------*) (* print an error and end the proc. instr. if an entity end is found. *) (* print an error if no target name is found. *) (* print an error if no whitespace follows the target. *) (*--------------------------------------------------------------------*) (* add the processing instruction to the user data. *) (*--------------------------------------------------------------------*) (* Return type: Char * AppData * State *) (*--------------------------------------------------------------------*) fun parseProcInstr startPos (a,q) = let (* NotFound is handled after the 'in .. end' *) val (target,(c1,a1,q1)) = parseName (getChar(a,q)) val a1 = checkPiTarget (a1,q) target in case c1 of 0wx00 => let val a2 = hookError(a1,(getPos q1,ERR_ENDED_BY_EE LOC_PROC)) val a3 = hookProcInst(a2,((startPos,getPos q1),target,getPos q1,nullVector)) in (c1,a3,q1) end | 0wx3F => (* #"?" *) let val (c2,a2,q2) = getChar (a1,q1) in case c2 of 0wx3E => (* #">" *) let val a3 = hookProcInst(a2,((startPos,getPos q2),target, getPos q1,nullVector)) in getChar (a3,q2) end | _ => let val a3 = hookError(a2,(getPos q1,ERR_MISSING_WHITE)) in parseProcInstr' (startPos,target,getPos q1,[c1]) (c2,a3,q2) end end | _ => let val (hadS,(c2,a2,q2)) = skipSmay (c1,a1,q1) val a3 = if hadS then a2 else hookError(a2,(getPos q2,ERR_MISSING_WHITE)) in parseProcInstr' (startPos,target,getPos q2,nil) (c2,a3,q2) end end handle NotFound(c,a,q) => let val a1 = hookError(a,(getPos q,ERR_EXPECTED(expATarget,[c]))) in parseProcInstr' (startPos,nullData,getPos q,nil) (c,a1,q) end end (* stop of ../../Parser/Parse/parseMisc.sml *) (* start of ../../Parser/Parse/parseXml.sml *) signature ParseXml = sig (*---------------------------------------------------------------------- include ParseBase val parseName : UniChar.Char * AppData * State -> UniChar.Data * (UniChar.Char * AppData * State) val parseNmtoken : UniChar.Char * AppData * State -> UniChar.Data * (UniChar.Char * AppData * State) val parseNameLit : UniChar.Data -> UniChar.Char * AppData * State -> UniChar.Data * UniChar.Data * (UniChar.Char * AppData * State) val parseEntName : UniChar.Data * UniChar.Data -> UniChar.Char * AppData * State -> bool * UniChar.Data * UniChar.Data * (UniChar.Char * AppData * State) val parseComment : Errors.Position -> AppData * State -> (UniChar.Char * AppData * State) val parseProcInstr : Errors.Position -> AppData * State -> (UniChar.Char * AppData * State) val skipS : UniChar.Char * AppData * State -> UniChar.Char * AppData * State val skipSopt : UniChar.Char * AppData * State -> UniChar.Char * AppData * State val skipSmay : UniChar.Char * AppData * State -> bool * (UniChar.Char * AppData * State) val parseSopt : UniChar.Data -> UniChar.Char * AppData * State -> UniChar.Data * (UniChar.Char * AppData * State) val parseSmay : UniChar.Data -> UniChar.Char * AppData * State -> bool * (UniChar.Data * (UniChar.Char * AppData * State)) val parseEq : UniChar.Char * AppData * State -> UniChar.Data * (UniChar.Char * AppData * State) ----------------------------------------------------------------------*) include ParseMisc val openDocument : Uri.Uri option -> AppData -> Encoding.Encoding * HookData.XmlDecl option * (UniChar.Char * AppData * State) val openSubset : Uri.Uri -> AppData -> Encoding.Encoding * HookData.TextDecl option * (UniChar.Char * AppData * State) val openExtern : int * bool * Uri.Uri -> AppData * State -> Encoding.Encoding * HookData.TextDecl option * (UniChar.Char * AppData * State) end (*--------------------------------------------------------------------------*) (* Structure: ParseXml *) (* *) (* Exceptions raised by functions in this structure: *) (* openDocument : NoSuchFile *) (* openExtern : none *) (* openSubset : NoSuchFile *) (*--------------------------------------------------------------------------*) functor ParseXml (structure ParseBase : ParseBase) : ParseXml = struct structure ParseMisc = ParseMisc (structure ParseBase = ParseBase) open Errors UniChar UniClasses UtilString ParseMisc fun checkVersionNum (a,q) version = if not (!O_CHECK_VERSION) orelse version="1.0" then a else hookError(a,(getPos q,ERR_VERSION version)) (*--------------------------------------------------------------------*) (* parse a version number, the quote character ("'" or '"') passed as *) (* first argument. cf. 2.8: *) (* *) (* [24] VersionInfo ::= S 'version' Eq (' VersionNum ' *) (* | " VersionNum ") *) (* [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+ *) (* *) (* print an error and end the literal if an entity end is found. *) (* print an error if a disallowed character is found. *) (* *) (* return the version number as a string option, together with the *) (* next character and state. *) (*--------------------------------------------------------------------*) (* might raise: none *) (*--------------------------------------------------------------------*) fun parseVersionNum quote aq = let fun doit text (c,a,q) = if c=quote then (text,getChar (a,q)) else if isVers c then doit (c::text) (getChar (a,q)) else if c=0wx0 then let val a1 = hookError(a,(getPos q,ERR_ENDED_BY_EE LOC_VERSION)) in (text,(c,a1,q)) end else let val err = ERR_FORBIDDEN_HERE(IT_CHAR c,LOC_VERSION) val a1 = hookError(a,(getPos q,err)) in doit text (getChar (a1,q)) end val (c1,a1,q1) = getChar aq val (text,(c2,a2,q2)) = if isVers c1 then doit [c1] (getChar (a1,q1)) else if c1=quote then let val a2 = hookError(a1,(getPos q1,ERR_EMPTY LOC_VERSION)) in (nil,getChar (a2,q1)) end else if c1=0wx00 then let val a2 = hookError(a1,(getPos q1,ERR_ENDED_BY_EE LOC_VERSION)) val a3 = hookError(a2,(getPos q1,ERR_EMPTY LOC_VERSION)) in (nil,(c1,a3,q1)) end else let val err = ERR_FORBIDDEN_HERE(IT_CHAR c1,LOC_VERSION) val a2 = hookError(a1,(getPos q1,err)) in doit nil (getChar (a2,q1)) end val version = Latin2String (rev text) val a3 = checkVersionNum (a2,q1) version in (SOME version,(c2,a3,q2)) end (*--------------------------------------------------------------------*) (* parse a version info starting after 'version'. Cf. 2.8: *) (* *) (* [24] VersionInfo ::= S 'version' Eq (' VersionNum ' *) (* | " VersionNum ") *) (* *) (* print an error and raise SyntaxState if no '=' is found. *) (* print an error and raise SyntaxState if no quote sign is found. *) (* *) (* return the version number as a string option, together with the *) (* next char and the remaining state. *) (*--------------------------------------------------------------------*) (* might raise: SyntaxState *) (*--------------------------------------------------------------------*) fun parseVersionInfo caq = let val (c1,a1,q1) = skipEq caq in case c1 of 0wx22 (* '""' *) => parseVersionNum c1 (a1,q1) | 0wx27 (* "'" *) => parseVersionNum c1 (a1,q1) | _ => let val a2 = hookError(a1,(getPos q1,ERR_EXPECTED(expLitQuote,[c1]))) in raise SyntaxError(c1,a2,q1) end end (*--------------------------------------------------------------------*) (* parse an encoding name, the quote character ("'" or '"') passed as *) (* first argument. cf. 4.3.3: *) (* *) (* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' *) (* |"'" EncName "'") *) (* *) (* [81] EncName ::= [A-Za-z] /* Encoding name *) (* ([A-Za-z0-9._] | '-')* contains only Latin *) (* characters */ *) (* *) (* print an error and end the literal if an entity end is found. *) (* print an error if a disallowed character is found. *) (* *) (* return the encoding name as a string option, together with the *) (* next character and state. *) (*--------------------------------------------------------------------*) (* might raise: none *) (*--------------------------------------------------------------------*) fun parseEncName quote aq = let fun doit text (c,a,q) = if c=quote then (text,getChar (a,q)) else if isEnc c then doit (c::text) (getChar (a,q)) else if c=0wx00 then let val a1 = hookError(a,(getPos q,ERR_ENDED_BY_EE LOC_ENCODING)) in (text,(c,a1,q)) end else let val err = ERR_FORBIDDEN_HERE(IT_CHAR c,LOC_ENCODING) val a1 = hookError(a,(getPos q,err)) in doit text (getChar (a,q)) end val (c1,a1,q1) = getChar aq val (text,caq2) = if isEncS c1 then doit [c1] (getChar (a1,q1)) else if c1=quote then let val a2 = hookError(a1,(getPos q1,ERR_EMPTY LOC_ENCODING)) in (nil,getChar (a2,q1)) end else if c1=0wx00 then let val a2 = hookError(a1,(getPos q1,ERR_ENDED_BY_EE LOC_ENCODING)) val a3 = hookError(a2,(getPos q1,ERR_EMPTY LOC_ENCODING)) in (nil,(c1,a3,q1)) end else let val a2 = hookError(a1,(getPos q1,ERR_EXPECTED(expStartEnc,[c1]))) in doit nil (getChar (a2,q1)) end val enc = toUpperString (Latin2String (rev text)) in (enc,caq2) end (*--------------------------------------------------------------------*) (* parse an encoding decl starting after 'encoding'. Cf. 4.3.3: *) (* *) (* *) (* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' *) (* |"'" EncName "'") *) (* *) (* print an error and raise SyntaxState if no '=' is found. *) (* print an error and raise SyntaxState if no quote sign is found. *) (* *) (* return the encoding name as a string option, together with the *) (* next char and the remaining state. *) (*--------------------------------------------------------------------*) (* might raise: SyntaxState *) (*--------------------------------------------------------------------*) fun parseEncodingDecl caq = let val (c1,a1,q1) = skipEq caq in case c1 of 0wx22 (* '""' *) => parseEncName c1 (a1,q1) | 0wx27 (* "'" *) => parseEncName c1 (a1,q1) | _ => let val a2 = hookError(a1,(getPos q1,ERR_EXPECTED(expLitQuote,[c1]))) in raise SyntaxError(c1,a2,q1) end end (*--------------------------------------------------------------------*) (* parse a standalone declaration starting after 'standalone'. *) (* Cf. 2.9: *) (* *) (* [32] SDDecl ::= S 'standalone' Eq [ VC: Standalone *) (* ( ("'" ('yes' | 'no') "'") Document *) (* | ('"' ('yes' | 'no') '"')) Declaration ] *) (* *) (* print an error and raise SyntaxState if no '=' is found. *) (* print an error and raise SyntaxState if no literal is found. *) (* print an error and end the literal if an entity end is found. *) (* print an error if the literal is neither 'yes' nor 'no'. *) (* *) (* return the standalone status as a boolean option, together with *) (* the next character and the remaining state. *) (*--------------------------------------------------------------------*) (* might raise: SyntaxState *) (*--------------------------------------------------------------------*) fun parseStandaloneDecl caq0 = let val (quote,a,q) = skipEq caq0 fun doit text (c,a,q) = if c=quote then (text,getChar (a,q)) else if c<>0wx0 then doit (c::text) (getChar (a,q)) else let val a1 = hookError(a,(getPos q,ERR_ENDED_BY_EE LOC_LITERAL)) in (text,(c,a1,q)) end val caq1 as (_,_,q1) = case quote of 0wx22 (* '""' *) => (getChar (a,q)) | 0wx27 (* "'" *) => (getChar (a,q)) | _ => let val a1 = hookError(a,(getPos q,ERR_EXPECTED(expLitQuote,[quote]))) in raise SyntaxError(quote,a1,q) end val (text,caq2) = doit nil caq1 in case text of [0wx73,0wx65,0wx79] (* reversed "yes" *) => (SOME true,caq2) | [0wx6f,0wx6e] (* reversed "no" *) => (SOME false,caq2) | revd => let val (c2,a2,q2) = caq2 val a3 = hookError(a2,(getPos q1,ERR_EXPECTED(expNoYes,revd))) in (NONE,(c2,a3,q2)) end end (*--------------------------------------------------------------------*) (* parse an xml declaration starting after 'xml ' (i.e. the first *) (* white space character is already consumed). Cf. 2.8: *) (* *) (* [23] XMLDecl ::= ''*) (* *) (* [24] VersionInfo ::= S 'version' Eq (' VersionNum ' *) (* | " VersionNum ") *) (* *) (* [32] SDDecl ::= S 'standalone' Eq [ VC: Standalone *) (* ( ("'" ('yes' | 'no') "'") Document *) (* | ('"' ('yes' | 'no') '"')) Declaration ] *) (* *) (* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' *) (* |"'" EncName "'") *) (* *) (* default version, encoding and standalone status to NONE. *) (* *) (* print an error if no leading white space is found. *) (* print an error whenever a wrong name is encountered. *) (* print an Error if no VersionInfo is found. *) (* print an Error if no '?>' is found at the end. *) (* print an error and raise SyntaxState if no '=' or no literal is *) (* found in VersionInfo, EncodingDecl or SDDecl. *) (* print an error if a literal does not have a correct value. *) (* *) (* return the corresponding XmlDecl option and the next char & state. *) (*--------------------------------------------------------------------*) (* might raise: SyntaxState *) (*--------------------------------------------------------------------*) fun parseXmlDecl auto caq = let (*-----------------------------------------------------------------*) (* skip the '?>' at the end of the xml declaration. *) (* *) (* print an error and raise SyntaxState if no '?>' is found. *) (* *) (* return the info passed as first arg, and the next char & state. *) (*-----------------------------------------------------------------*) (* might raise: SyntaxState *) (*-----------------------------------------------------------------*) fun skipXmlDeclEnd enc res (c,a,q) = if c=0wx3F (* "#?" *) then let val (c1,a1,q1) = getChar (a,q) in if c1=0wx3E (* #">" *) then (enc,SOME res,getChar (a1,q1)) else let val a2 = hookError(a1,(getPos q1,ERR_EXPECTED(expGt,[c1]))) in raise SyntaxError (c1,a2,q1) end end else let val a1 = hookError(a,(getPos q,ERR_EXPECTED(expProcEnd,[c]))) in raise SyntaxError (c,a1,q) end (*-----------------------------------------------------------------*) (* parse the remainder after the keyword 'standalone', the version *) (* and encoding already parsed and given in the first arg. *) (* *) (* pass the version,encoding and sd status to skipXmlDeclEnd *) (*-----------------------------------------------------------------*) (* might raise: SyntaxState *) (*-----------------------------------------------------------------*) fun parseXmlDeclAfterS enc (v,e) caq = let val (alone,caq1) = parseStandaloneDecl caq val caq2 = skipSopt caq1 in skipXmlDeclEnd enc (v,e,alone) caq2 end (*-----------------------------------------------------------------*) (* parse the remainder after the encoding declaration, the version *) (* and encoding already parsed and given in the first arg. *) (* *) (* print an error if a name other than 'standalone' is found. *) (* *) (* pass the version and encoding to parseXmlDeclAfterS. *) (*-----------------------------------------------------------------*) (* might raise: SyntaxState *) (*-----------------------------------------------------------------*) fun parseXmlDeclBeforeS enc (v,e) caq = let val (hadS,caq1 as (_,_,q1)) = skipSmay caq val (name,(c2,a2,q2)) = parseName caq1 (* NotFound handled below *) val a3 = if hadS then a2 else hookError(a2,(getPos q1,ERR_MISSING_WHITE)) in case name of [0wx73,0wx74,0wx61,0wx6e,0wx64,0wx61,0wx6c,0wx6f,0wx6e,0wx65] => (* "standalone" *) parseXmlDeclAfterS enc (v,e) (c2,a3,q2) | _ => let val a4 = hookError(a3,(getPos q1,ERR_EXPECTED(expStandOpt,name))) in parseXmlDeclAfterS enc (v,e) (c2,a4,q2) end end handle NotFound caq => (* exception raised by parseName *) skipXmlDeclEnd enc (v,e,NONE) caq (*-----------------------------------------------------------------*) (* parse the remainder after the keyword 'encoding', the version *) (* already parsed and given in the first arg. *) (* *) (* pass the version and encoding and to parseXmlDeclBeforeS *) (*-----------------------------------------------------------------*) (* might raise: SyntaxState *) (*-----------------------------------------------------------------*) fun parseXmlDeclAfterE ver caq = let val (enc,(c1,a1,q1)) = parseEncodingDecl caq val (a2,q2,enc1) = changeAuto(a1,q1,enc) in parseXmlDeclBeforeS enc1 (ver,SOME enc) (c1,a2,q2) end (*-----------------------------------------------------------------*) (* parse the remainder after the version info, the version already *) (* parsed and given in the first arg. *) (* *) (* print an error if a name other than 'encoding' or 'standalone' *) (* is found. *) (* *) (* pass obtained/default values to parseXmlDeclAfter[E|S] or to *) (* skipXmlDeclEnd. *) (*-----------------------------------------------------------------*) (* might raise: SyntaxState *) (*-----------------------------------------------------------------*) fun parseXmlDeclBeforeE ver caq = let val (hadS,caq1 as (_,_,q1)) = skipSmay caq val (name,(c2,a2,q2)) = parseName caq1 (* NotFound handled below *) val a3 = if hadS then a2 else hookError(a2,(getPos q1,ERR_MISSING_WHITE)) in case name of [0wx65,0wx6e,0wx63,0wx6f,0wx64,0wx69,0wx6e,0wx67] => (* "encoding" *) parseXmlDeclAfterE ver (c2,a3,q2) | [0wx73,0wx74,0wx61,0wx6e,0wx64,0wx61,0wx6c,0wx6f,0wx6e,0wx65] => (* "standalone" *) parseXmlDeclAfterS auto (ver,NONE) (c2,a3,q2) | _ => let val a4 = hookError(a3,(getPos q1,ERR_EXPECTED(expEncStand,name))) in parseXmlDeclAfterE ver (c2,a4,q2) end end handle NotFound caq => (* exception raised by parseName *) skipXmlDeclEnd auto (ver,NONE,NONE) caq (*-----------------------------------------------------------------*) (* do the main work. if the first name is not 'version' then it *) (* might be 'encoding' or 'standalone'. Then take the default *) (* NONE for version and - if needed - encoding and call the *) (* appropriate function. otherwise assume a typo and parse the *) (* version number, then call parseXmlDeclBeforeE. if no name is *) (* found at all, proceed with skipXmlDeclEnd. *) (* *) (* print an error and raise SyntaxState if an entity end is found. *) (* print an error and raise SyntaxState if appropriate. *) (* print an error if a name other than 'version' is found. *) (*-----------------------------------------------------------------*) (* might raise: SyntaxState *) (*-----------------------------------------------------------------*) val caq1 as (_,_,q1) = skipSopt caq val (name,(caq2 as (c2,a2,q2))) = parseName caq1 handle NotFound (c,a,q) => let val err = ERR_EXPECTED(expVersion,[c]) val a1 = hookError(a,(getPos q,err)) in raise SyntaxError (c,a1,q) end in if name=[0wx76,0wx65,0wx72,0wx73,0wx69,0wx6f,0wx6e] (* "version" *) then let val (ver,caq3) = parseVersionInfo caq2 in parseXmlDeclBeforeE ver caq3 end else let val a3 = hookError(a2,(getPos q1,ERR_EXPECTED(expVersion,name))) in case name of [0wx65,0wx6e,0wx63,0wx6f,0wx64,0wx69,0wx6e,0wx67] => (* "encoding" *) parseXmlDeclAfterE NONE (c2,a3,q2) | [0wx73,0wx74,0wx61,0wx6e,0wx64,0wx61,0wx6c,0wx6f,0wx6e,0wx65] => (* "standalone" *) parseXmlDeclAfterS auto (NONE,NONE) (c2,a3,q2) | _ => let val (ver,caq3) = parseVersionInfo (c2,a3,q2) in parseXmlDeclBeforeE ver caq3 end end end (*----------------------------------------------------------------*) (* catch entity end exceptions raised by subfunctions, print an *) (* error and re-raise the exception. *) (*----------------------------------------------------------------*) handle SyntaxError(c,a,q) => let val err = if c=0wx0 then ERR_ENDED_BY_EE LOC_XML_DECL else ERR_CANT_PARSE LOC_XML_DECL val a1 = hookError(a,(getPos q,err)) in (auto,NONE,recoverXml(c,a1,q)) end (*--------------------------------------------------------------------*) (* parse a text declaration starting after 'xml ' (i.e. the first *) (* white space character is already consumed). Cf. 2.8: *) (* *) (* [77] TextDecl ::= '' *) (* *) (* [24] VersionInfo ::= S 'version' Eq (' VersionNum ' *) (* | " VersionNum ") *) (* *) (* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' *) (* |"'" EncName "'") *) (* *) (* default version and encoding to NONE. *) (* *) (* print an error if no leading white space is found. *) (* print an error whenever a wrong name is encountered. *) (* print an Error if no EncodingDecl is found. *) (* print an Error if '?>' is found at the end. *) (* print an error and raise SyntaxState if no '=' or no literal is *) (* found in VersionInfo or EncodingDecl. *) (* print an error if a literal does not have a correct value. *) (* *) (* return the corresponding TextDecl option and the next char & state.*) (*--------------------------------------------------------------------*) (* might raise: SyntaxState *) (*--------------------------------------------------------------------*) fun parseTextDecl auto caq = let (*-----------------------------------------------------------------*) (* skip the '?>' at the end of the text declaration. *) (* *) (* print an error and raise SyntaxState if no '?>' is found. *) (* *) (* return the info passed as first arg, and the next char & state. *) (*-----------------------------------------------------------------*) (* might raise: SyntaxState *) (*-----------------------------------------------------------------*) fun skipTextDeclEnd enc res (c,a,q) = if c=0wx3F (* "#?" *) then let val (c1,a1,q1) = getChar (a,q) in if c1=0wx3E (* #">" *) then (enc,SOME res,getChar (a1,q1)) else let val a2 = hookError(a1,(getPos q1,ERR_EXPECTED(expGt,[c1]))) in raise SyntaxError(c1,a2,q1) end end else let val a1 = hookError(a,(getPos q,ERR_EXPECTED(expProcEnd,[c]))) in raise SyntaxError(c,a1,q) end (*-----------------------------------------------------------------*) (* parse the remainder after the keyword 'encoding', the version *) (* already parsed and given in the first arg. *) (* *) (* pass the version and encoding and to skipTextDeclEnd. *) (*-----------------------------------------------------------------*) (* might raise: SyntaxState *) (*-----------------------------------------------------------------*) fun parseTextDeclAfterE ver caq = let val (enc,(c1,a1,q1)) = parseEncodingDecl caq val (a2,q2,enc1) = changeAuto(a1,q1,enc) val caq3 = skipSopt (c1,a2,q2) in skipTextDeclEnd enc1 (ver,SOME enc) caq3 end (*-----------------------------------------------------------------*) (* parse the remainder after the version info, the version given *) (* as first argument. *) (* *) (* print an error and raise SyntaxState is no name is found. *) (* print an error if a name other than 'encoding' is found. *) (* *) (* pass obtained/default values to parseTextDeclAfterE. *) (*-----------------------------------------------------------------*) (* might raise: SyntaxState *) (*-----------------------------------------------------------------*) fun parseTextDeclBeforeE ver caq = let val caq1 as (_,_,q1) = skipS caq val (name,caq2) = parseName caq1 handle NotFound (c,a,q) => let val err = ERR_EXPECTED(expEncoding,[c]) val a1 = hookError(a,(getPos q,err)) in raise SyntaxError (c,a1,q) end in if name=[0wx65,0wx6e,0wx63,0wx6f,0wx64,0wx69,0wx6e,0wx67] (* "encoding" *) then parseTextDeclAfterE ver caq2 else let val (c2,a2,q2) = caq2 val a3 = hookError(a2,(getPos q1,ERR_EXPECTED(expEncoding,name))) in parseTextDeclAfterE ver (c2,a3,q2) end end (*-----------------------------------------------------------------*) (* do the main work. if the first name is neither 'version' nor *) (* 'encoding' then assume typo of 'version'. Then parse the *) (* version number, call parseTextDeclBeforeE. if no name is found *) (* at all, proceed with skipTextDeclEnd. *) (* *) (* print an error and raise SyntaxState if appropriate. *) (* print an error if a name other than 'version' or 'encoding' is *) (* found. *) (*-----------------------------------------------------------------*) (* might raise: SyntaxState *) (*-----------------------------------------------------------------*) val caq1 as (_,_,q1) = skipSopt caq val (name,caq2) = parseName caq1 handle NotFound (c,a,q) => let val err = ERR_EXPECTED(expEncVers,[c]) val a1 = hookError(a,(getPos q,err)) in raise SyntaxError(c,a1,q) end in case name of [0wx76,0wx65,0wx72,0wx73,0wx69,0wx6f,0wx6e] => (* "version" *) let val (ver,caq3) = parseVersionInfo caq2 in parseTextDeclBeforeE ver caq3 end | [0wx65,0wx6e,0wx63,0wx6f,0wx64,0wx69,0wx6e,0wx67] => (* "encoding" *) parseTextDeclAfterE NONE caq2 | _ => let val (c2,a2,q2) = caq2 val a3 = hookError(a2,(getPos q1,ERR_EXPECTED(expEncVers,name))) val (ver,caq3) = parseVersionInfo (c2,a3,q2) in parseTextDeclBeforeE ver caq3 end end (*----------------------------------------------------------------*) (* catch entity end exceptions raised by subfunctions, print an *) (* error and re-raise the exception. *) (*----------------------------------------------------------------*) handle SyntaxError(c,a,q) => let val err = if c=0wx0 then ERR_ENDED_BY_EE LOC_TEXT_DECL else ERR_CANT_PARSE LOC_TEXT_DECL val a1 = hookError(a,(getPos q,err)) in (auto,NONE,recoverXml(c,a1,q)) end (*--------------------------------------------------------------------*) (* check for the string " if isS c1 then (true,(a1,q1)) else (false,(a1,ungetChars(q1,rev(c1::seen)))) | c::cs => if c1=c then doit (c1::seen,cs) (a1,q1) else (false,(a1,ungetChars(q1,rev(c1::seen)))) end in doit (nil,unseen) aq end (*--------------------------------------------------------------------*) (* consume the text/xml declaration. The first parameter is a pair of *) (* the function that parses the declaration and a boolean indicating *) (* whether a warning should we produced if the declaration is missing.*) (* The second parameter is a pair (seen,auto), where auto is the *) (* auto-detected encoding, and seen is SOME cs, if auto-detection *) (* found some initial characters cs of the string " raise CantOpenFile(fmsg,a) (*--------------------------------------------------------------------*) (* open the external subset; consume its text declaration if present. *) (* See 2.8: *) (* *) (* [30] extSubset ::= TextDecl? extSubsetDecl *) (* *) (* return the optional text declaration and the first char and state. *) (*--------------------------------------------------------------------*) (* might raise: NoSuchFile *) (*--------------------------------------------------------------------*) fun openSubset uri a = let val (q,auto) = pushSpecial (EXT_SUBSET,SOME uri) in findTextDecl (parseTextDecl,false) auto (a,q) end handle NoSuchFile fmsg => raise CantOpenFile(fmsg,a) (*--------------------------------------------------------------------*) (* open the document entity; consume its xml declaration if present. *) (* See 2.8: *) (* *) (* [1] document ::= prolog element Misc* *) (* [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc* )? *) (* *) (* return the optional xml declaration and the first char and state. *) (*--------------------------------------------------------------------*) (* might raise: NoSuchFile *) (*--------------------------------------------------------------------*) fun openDocument uri a = let val (q,auto) = pushSpecial (DOC_ENTITY,uri) in findTextDecl (parseXmlDecl,!O_WARN_XML_DECL) auto (a,q) end handle NoSuchFile fmsg => raise CantOpenFile(fmsg,a) end (* stop of ../../Parser/Parse/parseXml.sml *) (* start of ../../Parser/Parse/parseRefs.sml *) signature ParseRefs = sig (*---------------------------------------------------------------------- include ParseBase val parseName : UniChar.Char * AppData * State -> UniChar.Data * (UniChar.Char * AppData * State) val parseNmtoken : UniChar.Char * AppData * State -> UniChar.Data * (UniChar.Char * AppData * State) val parseEntName : UniChar.Data * UniChar.Data -> UniChar.Char * AppData * State -> bool * UniChar.Data * UniChar.Data * (UniChar.Char * AppData * State) val parseComment : Errors.Position -> AppData * State -> (UniChar.Char * AppData * State) val parseProcInstr : Errors.Position -> AppData * State -> (UniChar.Char * AppData * State) val skipS : UniChar.Char * AppData * State -> UniChar.Char * AppData * State val skipSopt : UniChar.Char * AppData * State -> UniChar.Char * AppData * State val skipSmay : UniChar.Char * AppData * State -> bool * (UniChar.Char * AppData * State) val parseSopt : UniChar.Data -> UniChar.Char * AppData * State -> UniChar.Data * (UniChar.Char * AppData * State) val parseSmay : UniChar.Data -> UniChar.Char * AppData * State -> bool * (UniChar.Data * (UniChar.Char * AppData * State)) val parseEq : UniChar.Char * AppData * State -> UniChar.Data * (UniChar.Char * AppData * State) val openExtern : int * Uri.Uri -> AppData * State -> Encoding.Encoding * HookData.TextDecl option * (UniChar.Char * AppData * State) val openDocument : Uri.Uri option -> AppData -> Encoding.Encoding * HookData.XmlDecl option * (UniChar.Char * AppData * State) val openSubset : Uri.Uri -> AppData -> Encoding.Encoding * HookData.TextDecl option * (UniChar.Char * AppData * State) ----------------------------------------------------------------------*) include ParseXml val parseCharRef : AppData * State -> UniChar.Char * AppData * State val parseGenRef : Dtd -> UniChar.Char * AppData * State -> (int * Base.GenEntity) * (AppData * State) val parseParRef : Dtd -> UniChar.Char * AppData * State -> (int * Base.ParEntity) * (AppData * State) val parseCharRefLit : UniChar.Data -> AppData * State -> UniChar.Data * (UniChar.Char * AppData * State) val parseGenRefLit : Dtd -> UniChar.Data -> UniChar.Char * AppData * State -> UniChar.Data * ((int * Base.GenEntity) * (AppData * State)) val parseParRefLit : Dtd -> UniChar.Data -> UniChar.Char * AppData * State -> UniChar.Data * ((int * Base.ParEntity) * (AppData * State)) val skipCharRef : AppData * State -> (UniChar.Char * AppData * State) val skipReference : UniChar.Char * AppData * State -> (UniChar.Char * AppData * State) val skipPS : Dtd -> UniChar.Char * AppData * State -> UniChar.Char * AppData * State val skipPSopt : Dtd -> UniChar.Char * AppData * State -> UniChar.Char * AppData * State val skipPSmay : Dtd -> UniChar.Char * AppData * State -> bool * (UniChar.Char * AppData * State) val skipPSdec : Dtd -> UniChar.Char * AppData * State -> bool * (UniChar.Char * AppData * State) end (*--------------------------------------------------------------------------*) (* Structure: ParseRefs *) (* *) (* Exceptions raised by functions in this structure: *) (* parseCharRef : NoSuchChar SyntaxError *) (* parseGenRef : NoSuchEntity SyntaxState *) (* parseParRef : NoSuchEntity SyntaxState *) (* skipCharRef : none *) (* skipPS : none *) (* skipPSdec : none *) (* skipPSmay : none *) (* skipPSopt : none *) (* skipReference : none *) (*--------------------------------------------------------------------------*) functor ParseRefs (structure ParseBase : ParseBase) : ParseRefs = struct structure ParseXml = ParseXml (structure ParseBase = ParseBase) open Base Errors UniClasses ParseXml (*--------------------------------------------------------------------*) (* parse a character reference, the "&#" already read. See 4.1: *) (* *) (* [66] CharRef ::= '&#' [0-9]+ ';' *) (* | '&#x' [0-9a-fA-F]+ ';' [ WFC: Legal Character ] *) (* *) (* Well-Formedness Constraint: Legal Character *) (* Characters referred to using character references must match the *) (* production for Char. *) (* *) (* If the character reference begins with "&#x", the digits and *) (* letters up to the terminating ; provide a hexadecimal *) (* representation of the character's code point in ISO/IEC 10646. *) (* If it begins just with "&#", the digits up to the terminating ; *) (* provide a decimal representation of the character's code point. *) (* *) (* raise SyntaxError if no number or x plus hexnum is found, or if no *) (* semicolon follows it. *) (* raise NoSuchChar if the reference is to a non-XML character. *) (* *) (* return the character referred to, and the remaining state. *) (*--------------------------------------------------------------------*) fun parseCharRef aq = let (*--------------------------------------------------------------*) (* parse a (hexa)decimal number, accumulating the value in the *) (* first parameter. *) (* *) (* return the numbers value as a Char. *) (*--------------------------------------------------------------*) fun do_hex_n yet (c,a,q) = case hexValue c of NONE => (yet,(c,a,q)) | SOME v => do_hex_n (0wx10*yet+v) (getChar (a,q)) fun do_dec_n yet (c,a,q) = case decValue c of NONE => (yet,(c,a,q)) | SOME v => do_dec_n (0wx0A*yet+v) (getChar (a,q)) (*--------------------------------------------------------------*) (* Parse a (hexa)decimal number of at least one digit. *) (* *) (* raise SyntaxError if no hexdigit is found first. *) (* *) (* return the numbers value as a Char. *) (*--------------------------------------------------------------*) fun do_hex_1 (c,a,q) = case hexValue c of SOME v => do_hex_n v (getChar (a,q)) | NONE => let val a1 = hookError(a,(getPos q,ERR_EXPECTED(expHexDigit,[c]))) in raise SyntaxError(c,a1,q) end (*--------------------------------------------------------------*) (* Parse a decimal number of at least one digit, or a hexnumber *) (* if the first character is 'x'. *) (* *) (* raise SyntaxError if neither 'x' nor digit is found first. *) (* *) (* return the number's value as a Char. *) (*--------------------------------------------------------------*) fun do_dec_1 (c,a,q) = case decValue c of SOME v => do_dec_n v (getChar (a,q)) | NONE => if c=0wx78 (* #"x" *) then do_hex_1 (getChar (a,q)) else let val a1 = hookError(a,(getPos q,ERR_EXPECTED(expDigitX,[c]))) in raise SyntaxError(c,a1,q) end val (ch,(c1,a1,q1)) = do_dec_1 (getChar aq) val _ = if c1=0wx3B then () else let val a2 = hookError(a1,(getPos q1,ERR_EXPECTED(expSemi,[c1]))) in raise SyntaxError(c1,a2,q1) end val _ = if isXml ch then () else let val a2 = hookError(a1,(getPos q1,ERR_NON_XML_CHARREF ch)) in raise NoSuchChar (a2,q1) end in (ch,a1,q1) end fun parseCharRefLit cs aq = let (*--------------------------------------------------------------*) (* parse a (hexa)decimal number, accumulating the value in the *) (* first parameter. *) (* *) (* return the numbers value as a Char. *) (*--------------------------------------------------------------*) fun do_hex_n (cs,yet) (c,a,q) = case hexValue c of NONE => (cs,yet,(c,a,q)) | SOME v => do_hex_n (c::cs,0wx10*yet+v) (getChar (a,q)) fun do_dec_n (cs,yet) (c,a,q) = case decValue c of NONE => (cs,yet,(c,a,q)) | SOME v => do_dec_n (c::cs,0wx0A*yet+v) (getChar (a,q)) (*--------------------------------------------------------------*) (* Parse a (hexa)decimal number of at least one digit. *) (* *) (* raise SyntaxError if no hexdigit is found first. *) (* *) (* return the numbers value as a Char. *) (*--------------------------------------------------------------*) fun do_hex_1 cs (c,a,q) = case hexValue c of SOME v => do_hex_n (c::cs,v) (getChar (a,q)) | NONE => let val a1 = hookError(a,(getPos q,ERR_EXPECTED(expHexDigit,[c]))) in raise SyntaxError(c,a1,q) end (*--------------------------------------------------------------*) (* Parse a decimal number of at least one digit, or a hexnumber *) (* if the first character is 'x'. *) (* *) (* raise SyntaxError if neither 'x' nor digit is found first. *) (* *) (* return the number's value as a Char. *) (*--------------------------------------------------------------*) fun do_dec_1 cs (c,a,q) = case decValue c of SOME v => do_dec_n (c::cs,v) (getChar (a,q)) | NONE => if c=0wx78 (* #"x" *) then do_hex_1 (c::cs) (getChar (a,q)) else let val a1 = hookError(a,(getPos q,ERR_EXPECTED(expDigitX,[c]))) in raise SyntaxError(c,a1,q) end val (cs1,ch,(c1,a1,q1)) = do_dec_1 cs (getChar aq) val _ = if c1=0wx3B then () else let val a2 = hookError(a1,(getPos q1,ERR_EXPECTED(expSemi,[c1]))) in raise SyntaxError(c1,a2,q1) end val _ = if isXml ch then () else let val a2 = hookError(a1,(getPos q1,ERR_NON_XML_CHARREF ch)) in raise NoSuchChar (a2,q1) end in (c1::cs1,(ch,a1,q1)) end (*--------------------------------------------------------------------*) (* parse a general entity reference, the "&" already read. See 4.1: *) (* *) (* [68] EntityRef ::= '&' Name ';' [ WFC: Entity Declared ] *) (* [ VC: Entity Declared ] *) (* [ WFC: Parsed Entity ] *) (* [ WFC: No Recursion ] *) (* *) (* Well-Formedness Constraint: Entity Declared *) (* In a document without any DTD, a document with only an internal *) (* DTD subset which contains no parameter entity references, or a *) (* document with "standalone='yes'", the Name given in the entity *) (* reference must match that in an entity declaration, ... *) (* ... the declaration of a general entity must precede any *) (* reference to it which appears in a default value in an *) (* attribute-list declaration. *) (* *) (* Validity Constraint: Entity Declared *) (* In a document with an external subset or external parameter *) (* entities with "standalone='no'", the Name given in the entity *) (* reference must match that in an entity declaration. ... *) (* ... the declaration of a general entity must precede any *) (* reference to it which appears in a default value in an *) (* attribute-list declaration. *) (* *) (* Thus: in both cases it is an error if the entity is not declared. *) (* The only difference is the impact on well-formednes/validity. *) (* *) (* There are three contexts in which a general entity reference can *) (* appear: in content, in attribute value, in entity value. This *) (* passage states that it need not be declared prior to a reference *) (* in an entity value. But in this context, it is bypassed and not *) (* included, i.e., it need not be recognized. *) (* *) (* Well-Formedness Constraint: Parsed Entity *) (* An entity reference must not contain the name of an unparsed *) (* entity. Unparsed entities may be referred to only in attribute *) (* values ... *) (* *) (* Well-Formedness Constraint: No Recursion *) (* A parsed entity must not contain a recursive reference to *) (* itself, either directly or indirectly. *) (* *) (* print an error and raise SyntaxState if no name is found, or if no *) (* semicolon follows it. *) (* print an error and return GE_NULL if the reference is to an *) (* undeclared, unparsed or open entity. *) (* *) (* return the entity referred to, and the remaining state. *) (*--------------------------------------------------------------------*) fun parseGenRef dtd (caq as (_,_,q)) = let val (name,(c1,a1,q1)) = parseName caq handle NotFound (c,a,q) => let val err = ERR_EXPECTED(expAnEntName,[c]) val a1 = hookError(a,(getPos q,err)) in raise SyntaxError(c,a1,q) end val _ = if c1=0wx3B then () else let val a2 = hookError(a1,(getPos q1,ERR_EXPECTED(expSemi,[c1]))) in raise SyntaxError(c1,a2,q1) end val idx = GenEnt2Index dtd name val (ent,ext) = getGenEnt dtd idx val _ = (* check whether entity is undeclared/unparsed/open *) case ent of GE_NULL => if entitiesWellformed dtd then let val err = ERR_UNDEC_ENTITY(ENT_GENERAL,name) val a2 = hookError(a1,(getPos q,err)) in raise NoSuchEntity (a2,q1) end else if useParamEnts() then let val err = ERR_UNDECLARED(IT_GEN_ENT,name,LOC_NONE) val a2 = hookError(a1,(getPos q,err)) in raise NoSuchEntity (a2,q1) end else () | GE_UNPARSED _ => let val err = ERR_ILLEGAL_ENTITY(ENT_UNPARSED,name,LOC_NONE) val a2 = hookError(a1,(getPos q,err)) in raise NoSuchEntity (a2,q1) end | _ => if isOpen(idx,false,q1) then let val err = ERR_RECURSIVE_ENTITY(ENT_GENERAL,name) val a2 = hookError(a1,(getPos q,err)) in raise NoSuchEntity (a2,q1) end else () val a2 = if ext andalso !O_VALIDATE andalso standsAlone dtd andalso inDocEntity q1 then let val _ = if !O_ERROR_MINIMIZE then setStandAlone dtd false else () in hookError(a1,(getPos q,ERR_STANDALONE_ENT(ENT_GENERAL,name))) end else a1 in ((idx,ent),(a2,q1)) end fun parseGenRefLit dtd cs (caq as (_,_,q)) = let val (cs1,name,(c1,a1,q1)) = parseNameLit cs caq handle NotFound (c,a,q) => let val err = ERR_EXPECTED(expAnEntName,[c]) val a1 = hookError(a,(getPos q,err)) in raise SyntaxError(c,a1,q) end val _ = if c1=0wx3B then () else let val a2 = hookError(a1,(getPos q1,ERR_EXPECTED(expSemi,[c1]))) in raise SyntaxError(c1,a2,q1) end val idx = GenEnt2Index dtd name val (ent,ext) = getGenEnt dtd idx val _ = (* check whether entity is undeclared/unparsed/open *) case ent of GE_NULL => if entitiesWellformed dtd then let val err = ERR_UNDEC_ENTITY(ENT_GENERAL,name) val a2 = hookError(a1,(getPos q,err)) in raise NoSuchEntity (a2,q1) end else if useParamEnts() then let val err = ERR_UNDECLARED(IT_GEN_ENT,name,LOC_NONE) val a2 = hookError(a1,(getPos q,err)) in raise NoSuchEntity (a2,q1) end else () | GE_UNPARSED _ => let val err = ERR_ILLEGAL_ENTITY(ENT_UNPARSED,name,LOC_NONE) val a2 = hookError(a1,(getPos q,err)) in raise NoSuchEntity (a2,q1) end | _ => if isOpen(idx,false,q1) then let val err = ERR_RECURSIVE_ENTITY(ENT_GENERAL,name) val a2 = hookError(a1,(getPos q,err)) in raise NoSuchEntity (a2,q1) end else () val a2 = if ext andalso !O_VALIDATE andalso standsAlone dtd andalso inDocEntity q1 then let val _ = if !O_ERROR_MINIMIZE then setStandAlone dtd false else () in hookError(a1,(getPos q,ERR_STANDALONE_ENT(ENT_GENERAL,name))) end else a1 in (c1::cs1,((idx,ent),(a2,q1))) end (*--------------------------------------------------------------------*) (* parse a parameter entity reference, the "%" already read. See 4.1: *) (* *) (* [69] PEReference ::= '%' Name ';' [ VC: Entity Declared ] *) (* [ WFC: No Recursion ] *) (* [ WFC: In DTD ] *) (* *) (* Well-Formedness Constraint: Entity Declared *) (* In a document without any DTD, a document with only an internal *) (* DTD subset which contains no parameter entity references, or a *) (* document with "standalone='yes'", the Name given in the entity *) (* reference must match that in an entity declaration, ... *) (* The declaration of a parameter entity must precede any reference *) (* to it... *) (* *) (* Validity Constraint: Entity Declared *) (* In a document with an external subset or external parameter *) (* entities with "standalone='no'", the Name given in the entity *) (* reference must match that in an entity declaration. ... *) (* The declaration of a parameter entity must precede any reference *) (* to it... *) (* *) (* Thus: in both cases it is an error if the entity is not declared. *) (* The only difference is the impact on well-formednes/validity. *) (* Because the thing to be parsed is a parameter entity reference, *) (* this DTD has references, and thus an undeclared entity is probably *) (* a validity and not a well-formedness error. Thus setExternal must *) (* be called before determining a possible error! *) (* *) (* Well-Formedness Constraint: No Recursion *) (* A parsed entity must not contain a recursive reference to *) (* itself, either directly or indirectly. *) (* *) (* print an error and raise SyntaxError if no name is found, or if no *) (* semicolon follows it. *) (* print an error and return PE_NULL if the reference is to an *) (* undeclared or open entity. *) (* *) (* return the entity referred to, and the remaining state. *) (*--------------------------------------------------------------------*) fun parseParRef dtd (caq as (_,_,q)) = let val (name,(c1,a1,q1)) = parseName caq handle NotFound(c,a,q) => let val err = ERR_EXPECTED(expAnEntName,[c]) val a1 = hookError(a,(getPos q,err)) in raise SyntaxError(c,a1,q) end val _ = if c1=0wx3B then () else let val err = ERR_EXPECTED(expSemi,[c1]) val a2 = hookError(a1,(getPos q1,err)) in raise SyntaxError(c1,a2,q1) end val _ = setExternal dtd; val idx = ParEnt2Index dtd name val (ent,ext) = getParEnt dtd idx val _ = (* check whether entity is declared *) case ent of PE_NULL => if entitiesWellformed dtd then let val err = ERR_UNDEC_ENTITY(ENT_PARAMETER,name) val a2 = hookError(a1,(getPos q,err)) in raise NoSuchEntity (a2,q1) end else if useParamEnts() then let val err = ERR_UNDECLARED(IT_PAR_ENT,name,LOC_NONE) val a2 = hookError(a1,(getPos q,err)) in raise NoSuchEntity (a2,q1) end else () (* check whether the entity is already open *) | _ => if isOpen(idx,true,q1) then let val err = ERR_RECURSIVE_ENTITY(ENT_PARAMETER,name) val a2 = hookError(a1,(getPos q,err)) in raise NoSuchEntity (a2,q1) end else () in ((idx,ent),(a1,q1)) end fun parseParRefLit dtd cs (caq as (_,_,q)) = let val (cs1,name,(c1,a1,q1)) = parseNameLit cs caq handle NotFound(c,a,q) => let val err = ERR_EXPECTED(expAnEntName,[c]) val a1 = hookError(a,(getPos q,err)) in raise SyntaxError(c,a1,q) end val _ = if c1=0wx3B then () else let val err = ERR_EXPECTED(expSemi,[c1]) val a2 = hookError(a1,(getPos q1,err)) in raise SyntaxError(c1,a2,q1) end val _ = setExternal dtd; val idx = ParEnt2Index dtd name val (ent,ext) = getParEnt dtd idx val _ = (* check whether entity is declared *) case ent of PE_NULL => if entitiesWellformed dtd then let val err = ERR_UNDEC_ENTITY(ENT_PARAMETER,name) val a2 = hookError(a1,(getPos q,err)) in raise NoSuchEntity (a2,q1) end else if useParamEnts() then let val err = ERR_UNDECLARED(IT_PAR_ENT,name,LOC_NONE) val a2 = hookError(a1,(getPos q,err)) in raise NoSuchEntity (a2,q1) end else () (* check whether the entity is already open *) | _ => if isOpen(idx,true,q1) then let val err = ERR_RECURSIVE_ENTITY(ENT_PARAMETER,name) val a2 = hookError(a1,(getPos q,err)) in raise NoSuchEntity (a2,q1) end else () in (c1::cs1,((idx,ent),(a1,q1))) end (*--------------------------------------------------------------------*) (* skip a general/parameter entity reference, the "&/%" already read. *) (* *) (* print an error if no name is found, or if no semicolon follows it. *) (* *) (* handle any SyntaxState by returning its char and state. *) (* *) (* return the remaining state. *) (*--------------------------------------------------------------------*) fun skipReference caq = let val (_,(c1,a1,q1)) = parseName caq in if c1=0wx3B then getChar (a1,q1) else let val err = ERR_EXPECTED(expSemi,[c1]) val a2 = hookError(a1,(getPos q1,err)) in (c1,a2,q1) end end handle NotFound (c,a,q) => let val err = ERR_EXPECTED(expAnEntName,[c]) val a1 = hookError(a,(getPos q,err)) in (c,a1,q) end (*--------------------------------------------------------------------*) (* skip a character reference, the "&#" already read. See 4.1: *) (* *) (* print an error if no number or x plus hexnum is found, or if no *) (* semicolon follows it. *) (* *) (* handle any SyntaxState by returning its char and state. *) (* *) (* return the remaining char and state. *) (*--------------------------------------------------------------------*) fun skipCharRef aq = let (*--------------------------------------------------------------*) (* skip a (hexa)decimal number. *) (*--------------------------------------------------------------*) fun skip_ximal isX (c,a,q) = if isX c then skip_ximal isX (getChar (a,q)) else (c,a,q) val (c1,a1,q1) = getChar aq val (c2,a2,q2) = if isDec c1 then skip_ximal isDec (getChar (a1,q1)) else if c1=0wx78 (* #"x" *) then let val (c2,a2,q2) = getChar (a1,q1) in if isHex c2 then skip_ximal isHex (getChar (a2,q2)) else let val err = ERR_EXPECTED(expHexDigit,[c2]) val a3 = hookError(a2,(getPos q2,err)) in raise SyntaxError(c2,a3,q2) end end else let val a2 = hookError(a1,(getPos q1,ERR_EXPECTED(expDigitX,[c1]))) in raise SyntaxError (c1,a2,q1) end in if c2=0wx3B then getChar (a2,q2) else (c2,hookError(a2,(getPos q2,ERR_EXPECTED(expSemi,[c2]))),q2) end handle SyntaxError caq => caq (*--------------------------------------------------------------------*) (* parse a sequence of white space in markup declarations. Cf. 2.3: *) (* *) (* [3] S ::= (#x20 | #x9 | #xD | #xA)+ *) (* *) (* and 2.8 states: *) (* *) (* The markup declarations may be made up in whole or in part of *) (* the replacement text of parameter entities. The productions *) (* later in this specification for individual nonterminals *) (* (elementdecl, AttlistDecl, and so on) describe the declarations *) (* after all the parameter entities have been included. *) (* *) (* in markup declarations, we thus have to include entity references *) (* and skip entity ends, except for the document end. *) (* *) (* Well-Formedness Constraint: PEs in Internal Subset *) (* In the internal DTD subset, parameter-entity references can *) (* occur only where markup declarations can occur, not within *) (* markup declarations. (This does not apply to references that *) (* occur in external parameter entities or to the external subset.) *) (* *) (* we therefore always check whether we are in the internal subset *) (* before including a parameter entity. *) (*--------------------------------------------------------------------*) (* handle a parameter entity reference *) (*--------------------------------------------------------------------*) fun doParRef dtd (caq as (c,a,q)) = if inDocEntity q then let val err = ERR_FORBIDDEN_HERE(IT_PAR_REF,LOC_INT_DECL) val a1 = hookError(a,(getPos q,err)) in skipReference (c,a1,q) end else let val ((id,ent),(a1,q1)) = parseParRef dtd caq in case ent of PE_NULL => getChar (a1,q1) | PE_INTERN (_,rep) => getChar(a1,(pushIntern(q1,id,true,rep))) | PE_EXTERN extId => #3(openExtern(id,true,resolveExtId extId) (a1,q1)) handle CantOpenFile(fmsg,a) => let val err = ERR_NO_SUCH_FILE fmsg val a1 = hookError(a,(getPos q1,err)) in (getChar(a1,q1)) end end handle SyntaxError caq => caq | NoSuchEntity aq => getChar aq (*--------------------------------------------------------------------*) (* parse optional white space. *) (* *) (* catch SyntaxState exceptions from parameter refs. *) (* *) (* print an error if a parameter entity reference or an entity end is *) (* found inside the internal subset. *) (* *) (* return the following character and the remaining state. *) (*--------------------------------------------------------------------*) fun skipPSopt dtd caq = let fun doit (c,a,q) = case c of 0wx00 => if isSpecial q then (c,a,q) else let val a1 = if !O_VALIDATE andalso inDocEntity q then hookError(a,(getPos q,ERR_EE_INT_SUBSET)) else a in doit (getChar (a1,q)) end | 0wx09 => doit (getChar (a,q)) | 0wx0A => doit (getChar (a,q)) | 0wx20 => doit (getChar (a,q)) | 0wx25 (* #"%" *) => doit (doParRef dtd (getChar (a,q))) | _ => (c,a,q) in doit caq end (*--------------------------------------------------------------------*) (* parse optional white space. *) (* *) (* catch SyntaxState exceptions from parameter refs. *) (* *) (* print an error if a parameter entity reference or an entity end is *) (* found inside the internal subset. *) (* *) (* return a boolean whether white space was actually found, and the *) (* following character with the remaining state. *) (*--------------------------------------------------------------------*) fun skipPSmay dtd (c,a,q) = case c of 0wx00 => if isSpecial q then (false,(c,a,q)) else let val a1 = if !O_VALIDATE andalso inDocEntity q then hookError(a,(getPos q,ERR_EE_INT_SUBSET)) else a in (true,skipPSopt dtd (getChar (a1,q))) end | 0wx09 => (true,skipPSopt dtd (getChar (a,q))) | 0wx0A => (true,skipPSopt dtd (getChar (a,q))) | 0wx20 => (true,skipPSopt dtd (getChar (a,q))) | 0wx25 (* #"%" *) => (true,skipPSopt dtd (doParRef dtd (getChar (a,q)))) | _ => (false,(c,a,q)) (*--------------------------------------------------------------------*) (* parse required white space. *) (* *) (* catch SyntaxState exceptions from parameter refs. *) (* *) (* print an error and return if no white space character is found. *) (* print an error if a parameter entity reference or an entity end is *) (* found inside the internal subset. *) (* *) (* return the following character and the remaining state. *) (*--------------------------------------------------------------------*) fun skipPS dtd (c,a,q) = case c of 0wx00 => if isSpecial q then (c,hookError(a,(getPos q,ERR_MISSING_WHITE)),q) else let val a1 = if !O_VALIDATE andalso inDocEntity q then hookError(a,(getPos q,ERR_EE_INT_SUBSET)) else a in skipPSopt dtd (getChar (a1,q)) end | 0wx09 => skipPSopt dtd (getChar (a,q)) | 0wx0A => skipPSopt dtd (getChar (a,q)) | 0wx20 => skipPSopt dtd (getChar (a,q)) | 0wx25 (* #"%" *) => skipPSopt dtd (doParRef dtd (getChar (a,q))) | _ => (c,hookError(a,(getPos q,ERR_MISSING_WHITE)),q) (*--------------------------------------------------------------------*) (* parse required white space, taking care of a single '%' character. *) (* this is only needed before the entity name in an entity decl. *) (* *) (* catch SyntaxState exceptions from parameter refs. *) (* *) (* print an error if no white space character is found. *) (* print an error if a parameter entity reference or an entity end is *) (* found inside the internal subset. *) (* *) (* return a boolean whether a '%' was found, the following character *) (* and the remaining state. *) (*--------------------------------------------------------------------*) fun skipPSdec dtd caq = let fun doit req (c,a,q) = case c of 0wx00 => if isSpecial q then (false,(c,a,q)) else let val a1 = if !O_VALIDATE andalso inDocEntity q then hookError(a,(getPos q,ERR_EE_INT_SUBSET)) else a in doit false (getChar (a1,q)) end | 0wx09 => doit false (getChar (a,q)) | 0wx0A => doit false (getChar (a,q)) | 0wx20 => doit false (getChar (a,q)) | 0wx25 => (* #"%" *) let val (c1,a1,q1) = getChar (a,q) in if isNms c1 then doit false (doParRef dtd (c1,a1,q1)) else let val a2 = if req then hookError(a1,(getPos q,ERR_MISSING_WHITE)) else a1 in (true,(c1,a2,q1)) end end | _ => let val a1 = if req then hookError(a,(getPos q,ERR_MISSING_WHITE)) else a in (false,(c,a1,q)) end in doit true caq end end (* stop of ../../Parser/Parse/parseRefs.sml *) (* start of ../../Parser/Parse/parseLiterals.sml *) signature ParseLiterals = sig (*---------------------------------------------------------------------- include ParseBase val parseName : UniChar.Char * AppData * State -> UniChar.Data * (UniChar.Char * AppData * State) val parseNmtoken : UniChar.Char * AppData * State -> UniChar.Data * (UniChar.Char * AppData * State) val parseComment : Errors.Position -> AppData * State -> (UniChar.Char * AppData * State) val parseProcInstr : Errors.Position -> AppData * State -> (UniChar.Char * AppData * State) val skipS : UniChar.Char * AppData * State -> UniChar.Char * AppData * State val skipSopt : UniChar.Char * AppData * State -> UniChar.Char * AppData * State val skipSmay : UniChar.Char * AppData * State -> bool * (UniChar.Char * AppData * State) val parseSopt : UniChar.Data -> UniChar.Char * AppData * State -> UniChar.Data * (UniChar.Char * AppData * State) val parseSmay : UniChar.Data -> UniChar.Char * AppData * State -> bool * (UniChar.Data * (UniChar.Char * AppData * State)) val parseEq : UniChar.Char * AppData * State -> UniChar.Data * (UniChar.Char * AppData * State) val openExtern : int * Uri.Uri -> AppData * State -> Encoding.Encoding * HookData.TextDecl option * (UniChar.Char * AppData * State) val openDocument : Uri.Uri option -> AppData -> Encoding.Encoding * HookData.XmlDecl option * (UniChar.Char * AppData * State) val openSubset : Uri.Uri -> AppData -> Encoding.Encoding * HookData.TextDecl option * (UniChar.Char * AppData * State) val skipCharRef : AppData * State -> (UniChar.Char * AppData * State) val skipReference : UniChar.Char * AppData * State -> (UniChar.Char * AppData * State) val parseGenRef : Dtd -> UniChar.Char * AppData * State -> (int * Base.GenEntity) * (AppData * State) val parseParRef : Dtd -> UniChar.Char * AppData * State -> (int * Base.ParEntity) * (AppData * State) val parseCharRefLit : UniChar.Data -> AppData * State -> UniChar.Data * (UniChar.Char * AppData * State) val skipPS : Dtd -> UniChar.Char * AppData * State -> UniChar.Char * AppData * State val skipPSopt : Dtd -> UniChar.Char * AppData * State -> UniChar.Char * AppData * State val skipPSmay : Dtd -> UniChar.Char * AppData * State -> bool * (UniChar.Char * AppData * State) val skipPSdec : Dtd -> UniChar.Char * AppData * State -> bool * (UniChar.Char * AppData * State) ----------------------------------------------------------------------*) include ParseRefs val parseSystemLiteral : UniChar.Char * AppData * State -> Uri.Uri * UniChar.Char * (UniChar.Char * AppData * State) val parsePubidLiteral : UniChar.Char * AppData * State -> string * UniChar.Char * (UniChar.Char * AppData * State) val parseAttValue : Dtd -> UniChar.Char * AppData * State -> UniChar.Vector * UniChar.Data * (UniChar.Char * AppData * State) val parseEntityValue : Dtd -> (UniChar.Vector * UniChar.Vector -> 'a) -> UniChar.Char * AppData * State -> 'a * (UniChar.Char * AppData * State) end (*--------------------------------------------------------------------------*) (* Structure: ParseLiterals *) (* *) (* Exceptions raised by functions in this structure: *) (* parseSystemLiteral : NotFound *) (* parsePubidLiteral : NotFound *) (* parseAttValue : NotFound *) (* parseEntityValue : NotFound *) (*--------------------------------------------------------------------------*) functor ParseLiterals (structure ParseBase : ParseBase) : ParseLiterals = struct structure ParseRefs = ParseRefs (structure ParseBase = ParseBase) open Base UniChar Errors UniClasses Uri ParseRefs val THIS_MODULE = "ParseLiterals" (*--------------------------------------------------------------------*) (* parse a system literal, the quote character ("'" or '"') already --*) (* read and passed as first argument. cf. 2.3: *) (* *) (* ... Note that a SystemLiteral can be parsed without scanning *) (* for markup. *) (* *) (* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") *) (* *) (* print an error and end the literal if an entity end is found. *) (* *) (* return the literal as a string together with the next character *) (* and remaining state. *) (*--------------------------------------------------------------------*) (* might raise: none *) (*--------------------------------------------------------------------*) fun parseSystemLiteral' quote aq = let fun doit text (c,a,q) = if c=quote then (text,getChar (a,q)) else if c=0wx0 then let val a1 = hookError(a,(getPos q,ERR_ENDED_BY_EE LOC_SYS_LIT)) in (text,(c,a1,q)) end else if c>0wx7F andalso !O_WARN_NON_ASCII_URI then let val a1 = hookWarning(a,(getPos q,WARN_NON_ASCII_URI c)) in doit (c::text) (getChar(a1,q)) end else doit (c::text) (getChar(a,q)) val (text,caq1) = doit nil (getChar aq) in (Data2Uri(rev text),quote,caq1) end (*--------------------------------------------------------------------*) (* parse a system literal. *) (* *) (* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") *) (* *) (* raise NotFound if neither '"' nor "'" comes first. *) (* *) (* return the literal as a string together with the next character *) (* and remaining state. *) (*--------------------------------------------------------------------*) (* might raise: NotFound *) (*--------------------------------------------------------------------*) fun parseSystemLiteral (c,a,q) = if c=0wx22 (* "'" *) orelse c=0wx27 (* '"' *) then parseSystemLiteral' c (a,q) else raise NotFound (c,a,q) (*--------------------------------------------------------------------*) (* parse a pubid literal, the quote character ("'" or '"') already ---*) (* read and passed as first argument. cf. 2.3: *) (* *) (* [12] PubidLiteral ::= '"' PubidChar* '"' *) (* | "'" (PubidChar - "'")* "'" *) (* *) (* print an error and end the literal if an entity end is found. *) (* print an error if a non-pubid character is found. *) (* *) (* return the literal as a string together with the next character *) (* and remaining state. *) (*--------------------------------------------------------------------*) (* might raise: none *) (*--------------------------------------------------------------------*) fun parsePubidLiteral' quote aq = let fun doit (hadSpace,atStart,text) aq = let val (c1,a1,q1) = getChar aq in case c1 of 0wx00 => let val a2 = hookError(a1,(getPos q1,ERR_ENDED_BY_EE LOC_PUB_LIT)) in (text,(c1,a2,q1)) end | 0wx0A => doit (true,atStart,text) (a1,q1) | 0wx20 => doit (true,atStart,text) (a1,q1) | _ => if c1=quote then (text,getChar (a1,q1)) else if not (isPubid c1) then let val err = ERR_FORBIDDEN_HERE(IT_CHAR c1,LOC_PUB_LIT) val a2 = hookError(a1,(getPos q1,err)) in doit (hadSpace,atStart,text) (a2,q1) end else if hadSpace andalso not atStart then doit (false,false,c1::0wx20::text) (a1,q1) else doit (false,false,c1::text) (a1,q1) end val (text,caq1) = doit (false,true,nil) aq in (Latin2String(rev text),quote,caq1) end (*--------------------------------------------------------------------*) (* parse a pubid literal. *) (* *) (* [12] PubidLiteral ::= '"' PubidChar* '"' *) (* | "'" (PubidChar - "'")* "'" *) (* *) (* raise NotFound if neither '"' nor "'" comes first. *) (* *) (* return the literal as a string together with the next character *) (* and remaining state. *) (*--------------------------------------------------------------------*) (* might raise: NotFound *) (*--------------------------------------------------------------------*) fun parsePubidLiteral (c,a,q) = if c=0wx22 (* "'" *) orelse c=0wx27 (* '"' *) then parsePubidLiteral' c (a,q) else raise NotFound (c,a,q) (*--------------------------------------------------------------------*) (* parse an entity value and the quote character ("'" or '"') passed *) (* as first argument. Cf. 2.3: *) (* *) (* [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'*) (* | "'" ([^%&'] | PEReference | Reference)* "'"*) (* See also 4.4.5: *) (* *) (* When ... a parameter entity reference appears in a literal *) (* entity value, its replacement text is processed in place of the *) (* reference itself as though it were part of the document at the *) (* location the reference was recognized, except that a single or *) (* double quote character in the replacement text is always treated *) (* as a normal data character and will not terminate the literal. *) (* *) (* and 4.4.7: *) (* *) (* When a general entity reference appears in the EntityValue in an *) (* entity declaration, it is bypassed and left as is. *) (* *) (* A bypassed entity ref must, however, be checked for syntactic *) (* validity, as opposed to SGML, where it is not even recognized. *) (* *) (* print an error and end the literal if an entity end is found at *) (* the toplevel. *) (* print an error if a general entity reference is ill-formed. *) (* *) (* handle any errors in references by ignoring them syntactically. *) (* *) (* return argument con applied to the entity value as a char buffer, *) (* and the remaining char and state. *) (*--------------------------------------------------------------------*) (* might raise: none *) (*--------------------------------------------------------------------*) fun parseEntityValue' dtd (quote,con) aq = let fun doit (level,hadCr,lit,text) (c1,a1,q1) = case c1 of 0wx00 => if level=0 then let val err = ERR_ENDED_BY_EE LOC_ENT_VALUE val a2 = hookError(a1,(getPos q1,err)) in (lit,text,(c1,a2,q1)) end else doit (level-1,false,lit,text) (getChar (a1,q1)) | 0wx25 => (* #"%" *) let val (level1,lit1,caq2) = if inDocEntity q1 then let val err = ERR_FORBIDDEN_HERE(IT_PAR_REF,LOC_INT_DECL) val a2 = hookError(a1,(getPos q1,err)) in (level,lit,skipReference (getChar(a2,q1))) end else let val (lit1,((id,ent),(a2,q2))) = if level=0 then parseParRefLit dtd (c1::lit) (getChar(a1,q1)) else (lit,parseParRef dtd (getChar(a1,q1))) in case ent of PE_NULL => (level,lit1,getChar(a2,q2)) | PE_INTERN(_,rep) => let val q3 = pushIntern(q2,id,true,rep) in (level+1,lit1,getChar(a2,q3)) end | PE_EXTERN extId => let val fname = resolveExtId extId val caq3 = #3(openExtern (id,true,fname) (a2,q2)) in (level+1,lit1,caq3) end handle CantOpenFile(fmsg,a) => let val err = ERR_NO_SUCH_FILE fmsg val a1 = hookError(a,(getPos q1,err)) in (level,lit1,getChar(a1,q1)) end end (* ignore syntax errors in references *) handle SyntaxError caq => (level,lit,caq) | NoSuchEntity aq => (level,lit,getChar aq) in doit (level1,false,lit1,text) caq2 end | 0wx26 => (* #"&" *) let val (c2,a2,q2) = getChar (a1,q1) in (if c2=0wx23 (* #"#" *) (*--------------------------------------------------*) (* it's a character reference. *) (*--------------------------------------------------*) then (if level=0 then let val (lit3,(ch,a3,q3)) = parseCharRefLit (c2::c1::lit) (a2,q2) in doit (level,false,lit3,ch::text) (getChar(a3,q3)) end else let val (ch,a3,q3) = parseCharRef (a2,q2) in doit (level,false,lit,ch::text) (getChar(a3,q3)) end) (* ignore errors in char references *) handle SyntaxError caq => doit (level,false,lit,text) caq | NoSuchChar aq => doit (level,false,lit,text) (getChar aq) (*-----------------------------------------------------*) (* it's a general entity reference. *) (*-----------------------------------------------------*) else let val (fnd,lit3,text3,(c3,a3,q3)) = parseEntName (c1::lit,c1::text) (c2,a2,q2) val (lit4,text4,caq4) = if not fnd then (lit,text,(c3,a3,q3)) else if c3=0wx3B (* #";" *) then (c3::lit3,c3::text3,(getChar(a3,q3))) else let val err = ERR_EXPECTED(expSemi,[c3]) val a4 = hookError(a3,(getPos q3,err)) in (lit,text,(c3,a4,q3)) end in doit (level,false,lit4,text4) caq4 end ) end | 0wx0A => doit (level,false,if level=0 then c1::lit else lit, if hadCr then text else c1::text) (getChar (a1,q1)) | 0wx0D => doit (level,true,if level=0 then c1::lit else lit,0wx0A::text) (getChar (a1,q1)) | _ => if c1=quote andalso level=0 then (lit,text,getChar(a1,q1)) else doit (level,false,if level=0 then c1::lit else lit,c1::text) (getChar (a1,q1)) val (lit,text,caq1) = doit (0,false,nil,nil) (getChar aq) val literal = Data2Vector(quote::rev(quote::lit)) val repText = Data2Vector(rev text) in (con(literal,repText),caq1) end (*--------------------------------------------------------------------*) (* parse an entity value. *) (* *) (* [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'*) (* | "'" ([^%&'] | PEReference | Reference)* "'"*) (* *) (* raise NotFound if neither '"' nor "'" comes first. *) (* *) (* return the entity value as a char buffer, and the remaining char *) (* and state. *) (*--------------------------------------------------------------------*) (* might raise: NotFound *) (*--------------------------------------------------------------------*) fun parseEntityValue dtd con (c,a,q) = if c=0wx22 (* "'" *) orelse c=0wx27 (* '"' *) then parseEntityValue' dtd (c,con) (a,q) else raise NotFound (c,a,q) (*--------------------------------------------------------------------*) (* parse and normalize an attribute value, consume the final quote *) (* character ("'" or '""') passed in the argument. Cf. 2.3: *) (* *) (* [10] AttValue ::= '"' ([^<&""] | Reference)* '"' *) (* | "'" ([^<&'] | Reference)* "'" *) (* See also 4.4.5: *) (* *) (* When an entity reference appears in an attribute value ..., *) (* its replacement text is processed in place of the reference *) (* itself as though it were part of the document at the location *) (* the reference was recognized, except that a single or double *) (* quote character in the replacement text is always treated as a *) (* normal data character and will not terminate the literal. *) (* *) (* and 3.3.3: *) (* *) (* Before the value of an attribute is passed to the application *) (* or checked for validity, the XML processor must normalize it as *) (* follows: *) (* *) (* * a character reference is processed by appending the referenced *) (* character to the attribute value *) (* * an entity reference is processed by recursively processing the *) (* replacement text of the entity *) (* * a whitespace character (#x20, #xD, #xA, #x9) is processed by *) (* appending #x20 to the normalized value, except that only a *) (* single #x20 is appended for a "#xD#xA" sequence that is part *) (* of an external parsed entity or the literal entity value of *) (* an internal parsed entity *) (* * other characters are processed by appending them to the *) (* normalized value *) (* *) (* since #xD#xA are normalized by the parseEntityValue (internal) and *) (* getChar (external entities), we don't need to care about that. *) (*--------------------------------------------------------------------*) (* print an error and end the literal if an entity end is found. *) (* print an error if a general entity reference is ill-formed. *) (* print an error if a reference to an external or unparsed entity is *) (* found. *) (* print an error if character '<' appears literally. *) (* *) (* handle any errors in references by ignoring them syntactically. *) (* raise NotFound if neither '"' nor "'" comes first. *) (* *) (* return the list of chars in the value, and the next char and state *) (*--------------------------------------------------------------------*) (* might raise: NotFound *) (*--------------------------------------------------------------------*) fun parseAttValue dtd (quote,a,q) = let fun doit (lhlt as (level,lit,text)) (c1,a1,q1) = case c1 of 0wx00 => if level=0 then let val err = ERR_ENDED_BY_EE LOC_ATT_VALUE val a2 = hookError(a1,(getPos q1,err)) in (lit,text,(c1,a2,q1)) end else doit (level-1,lit,text) (getChar (a1,q1)) | 0wx26 => (* #"&" *) let val (c2,a2,q2) = getChar (a1,q1) val ((level1,lit1,text1),caq3) = (if c2=0wx23 (* #"#" *) (*--------------------------------------------------*) (* it's a character reference. *) (*--------------------------------------------------*) then if level=0 then let val (lit3,(ch,a3,q3)) = parseCharRefLit (c2::c1::lit) (a2,q2) in ((level,lit3,ch::text),getChar(a3,q3)) end else let val (ch,a3,q3) = parseCharRef (a2,q2) in ((level,lit,ch::text),getChar (a3,q3)) end (*-----------------------------------------------------*) (* it's a general entity reference. *) (*-----------------------------------------------------*) else let val (lit3,((id,ent),(a3,q3))) = if level=0 then parseGenRefLit dtd (c1::lit) (c2,a2,q2) else (nil,parseGenRef dtd (c2,a2,q2)) in case ent of GE_NULL => ((level,lit3,text),getChar(a3,q3)) | GE_INTERN(_,rep) => let val q4 = pushIntern(q3,id,false,rep) in ((level+1,lit3,text),getChar (a3,q4)) end | GE_EXTERN _ => let val err = ERR_ILLEGAL_ENTITY (ENT_EXTERNAL,Index2GenEnt dtd id,LOC_ATT_VALUE) val a4 = hookError(a3,(getPos q2,err)) in ((level,lit,text),getChar (a4,q3)) end | GE_UNPARSED _ => raise InternalError (THIS_MODULE,"parseAttValue'", "parseGenRef returned GE_UNPARSED") end) (*------------------------------------------------------*) (* handle any errors in references by ignoring them. *) (*------------------------------------------------------*) handle SyntaxError caq => ((level,lit,text),caq) | NoSuchEntity aq => ((level,lit,text),getChar aq) | NoSuchChar aq => ((level,lit,text),getChar aq) in doit (level1,lit1,text1) caq3 end | 0wx3C => let val err = ERR_FORBIDDEN_HERE(IT_CHAR c1,LOC_ATT_VALUE) val a2 = hookError(a1,(getPos q1,err)) val lit1 = if level=0 then c1::lit else lit in doit (level,lit1,c1::text) (getChar (a2,q1)) end | _ => if isS c1 then doit (level,if level=0 then c1::lit else lit,0wx20::text) (getChar (a1,q1)) else (if c1=quote andalso level=0 then (lit,text,getChar (a1,q1)) else doit (level,if level=0 then c1::lit else lit,c1::text) (getChar (a1,q1))) val _ = if quote=0wx22 orelse quote=0wx27 (* "'",'"' *) then () else raise NotFound (quote,a,q) val (lit,text,caq1) = doit (0,nil,nil) (getChar(a,q)) in (Data2Vector(quote::rev(quote::lit)),rev text,caq1) end end (* stop of ../../Parser/Parse/parseLiterals.sml *) (* start of ../../Parser/Parse/parseTags.sml *) signature ParseTags = sig (*---------------------------------------------------------------------- include ParseBase val parseName : UniChar.Char * AppData * State -> UniChar.Data * (UniChar.Char * AppData * State) val parseNmtoken : UniChar.Char * AppData * State -> UniChar.Data * (UniChar.Char * AppData * State) val parseComment : Errors.Position -> AppData * State -> (UniChar.Char * AppData * State) val parseProcInstr : Errors.Position -> AppData * State -> (UniChar.Char * AppData * State) val skipS : UniChar.Char * AppData * State -> UniChar.Char * AppData * State val skipSopt : UniChar.Char * AppData * State -> UniChar.Char * AppData * State val skipSmay : UniChar.Char * AppData * State -> bool * (UniChar.Char * AppData * State) val openExtern : int * Uri.Uri -> AppData * State -> Encoding.Encoding * HookData.TextDecl option * (UniChar.Char * AppData * State) val openDocument : Uri.Uri option -> AppData -> Encoding.Encoding * HookData.XmlDecl option * (UniChar.Char * AppData * State) val openSubset : Uri.Uri -> AppData -> Encoding.Encoding * HookData.TextDecl option * (UniChar.Char * AppData * State) val skipCharRef : AppData * State -> (UniChar.Char * AppData * State) val skipReference : UniChar.Char * AppData * State -> (UniChar.Char * AppData * State) val parseGenRef : Dtd -> UniChar.Char * AppData * State -> (int * Base.GenEntity) * (AppData * State) val parseParRef : Dtd -> UniChar.Char * AppData * State -> (int * Base.ParEntity) * (AppData * State) val parseCharRefLit : UniChar.Data -> AppData * State -> UniChar.Data * (UniChar.Char * AppData * State) val skipPS : Dtd -> UniChar.Char * AppData * State -> UniChar.Char * AppData * State val skipPSopt : Dtd -> UniChar.Char * AppData * State -> UniChar.Char * AppData * State val skipPSmay : Dtd -> UniChar.Char * AppData * State -> bool * (UniChar.Char * AppData * State) val skipPSdec : Dtd -> UniChar.Char * AppData * State -> bool * (UniChar.Char * AppData * State) val parseSystemLiteral : UniChar.Char * AppData * State -> Uri.Uri * UniChar.Char * (UniChar.Char * AppData * State) val parsePubidLiteral : UniChar.Char * AppData * State -> string * UniChar.Char * (UniChar.Char * AppData * State) val parseAttValue : Dtd -> UniChar.Char * AppData * State -> UniChar.Vector * UniChar.Data * (UniChar.Char * AppData * State) val parseEntityValue : Dtd -> (UniChar.Vector * UniChar.Vector -> 'a) -> UniChar.Char * AppData * State -> 'a * (UniChar.Char * AppData * State) ----------------------------------------------------------------------*) include ParseLiterals val skipTag : Errors.Location -> AppData * State -> (UniChar.Char * AppData * State) val parseETag : Dtd -> AppData * State -> int * UniChar.Data * Errors.Position * (UniChar.Char * AppData * State) val parseSTag : Dtd -> Errors.Position -> UniChar.Char * AppData * State -> (HookData.StartTagInfo * Base.ElemInfo) * (UniChar.Char * AppData * State) end (*--------------------------------------------------------------------------*) (* Structure: ParseTags *) (* *) (* Exceptions raised by functions in this structure: *) (* skipTag : none *) (* parseETag : SyntaxState *) (* parseSTag : SyntaxState *) (*--------------------------------------------------------------------------*) functor ParseTags (structure ParseBase : ParseBase) : ParseTags = struct structure ParseLiterals = ParseLiterals (structure ParseBase = ParseBase) open UtilList Base Errors UniClasses ParseLiterals (*--------------------------------------------------------------------*) (* parse an end-tag, the "' *) (* *) (* and 3. states: *) (* *) (* Validity Constraint: Element Valid *) (* An element is valid if there is a declaration matching elementdecl *) (* where the Name matches the element type, and ... *) (* *) (* print an error, recover and raise SyntaxState if no name is found. *) (* print an error and recover if no ">" is found. *) (* print an error if the element is not declared. *) (* *) (* return the index of the element, and the next char and state. *) (*--------------------------------------------------------------------*) (* might raise: SyntaxState *) (*--------------------------------------------------------------------*) fun parseETag dtd aq = let val caq0 as (_,_,q0) = getChar aq val (elem,(c1,a1,q1)) = parseName caq0 handle NotFound (c,a,q) => let val err = expectedOrEnded (expAName,LOC_ETAG) c val a1 = hookError(a,(getPos q,err)) val caq1 = recoverETag (c,a1,q) in raise SyntaxError caq1 end val idx = Element2Index dtd elem val elemInfo as {decl,...} = getElement dtd idx val a1' = if isSome decl then a1 else let val a2 = if not (!O_VALIDATE andalso hasDtd dtd) then a1 else let val err = ERR_UNDECLARED(IT_ELEM,elem,LOC_ETAG) val a1' = hookError(a1,(getPos q0,err)) val _ = if not (!O_ERROR_MINIMIZE) then () else ignore (handleUndeclElement dtd idx) in a1' end in checkElemName (a2,q0) elem end val (cs,(c2,a2,q2)) = parseSopt nil (c1,a1',q1) val space = rev cs in if c2=0wx3E (* #">" *) then (idx,space,getPos q2,getChar(a2,q2)) else let val err = expectedOrEnded (expGt,LOC_ETAG) c2 val a3 = hookError(a2,(getPos q2,err)) val caq3 = recoverETag(c2,a3,q2) in (idx,space,getPos q2,caq3) end end (*--------------------------------------------------------------------*) (* parse a start-tag or an empty-element-tag, the "<" already read. *) (* 3.1: *) (* *) (* [40] STag ::= '<' Name (S Attribute)* S? '>' *) (* [ WFC: Unique Att Spec ] *) (* [41] Attribute ::= Name Eq AttValue [ VC: Attribute Value Type ] *) (* *) (* Well-Formedness Constraint: Unique Att Spec *) (* No attribute name may appear more than once in the same *) (* start-tag or empty-element tag. *) (* *) (* Validity Constraint: Attribute Value Type *) (* The attribute must have been declared; the value must be of the *) (* type declared for it. *) (* *) (* [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' *) (* [ WFC: Unique Att Spec ] *) (* *) (* and 3. states: *) (* *) (* Validity Constraint: Element Valid *) (* An element is valid if there is a declaration matching elementdecl *) (* where the Name matches the element type, and ... *) (* *) (* catch entity end exceptions in subfunctions by printing an error *) (* and re-raising the exception. *) (* *) (* print an error, recover and raise SyntaxState if no element name *) (* is found. *) (* print an error and recover if no ">" or "/>" is found. *) (* print an error and continue if no "=" is found after an att name. *) (* print an error and recover if no literal is found after the "=". *) (* print an error if white space is missing. *) (* print an error if the element is not declared. *) (* print an error and ignore the attribute if an attribute is *) (* specified twice. *) (* print an error if an attribute is not declared. *) (* *) (* return the index of the element, its ElemInfo, the list of *) (* AttSpecs (specified and omitted atts) and a boolean whether it was *) (* an empty-element-tag, together with the next char and state. *) (*--------------------------------------------------------------------*) (* might raise: SyntaxState *) (*--------------------------------------------------------------------*) fun parseSTag dtd startPos (caq as (_,_,q)) = let val (elem,(c1,a1,q1)) = parseName caq handle NotFound (c,a,q) => let val err = expectedOrEnded (expAName,LOC_STAG) c val a1 = hookError(a,(getPos q,err)) val (_,caq1) = recoverSTag (c,a1,q) in raise SyntaxError (c,a1,q) end val eidx = Element2Index dtd elem val elemInfo as {atts,decl,...} = getElement dtd eidx val defs = case atts of NONE => nil | SOME (defs,_) => defs val (a1',elemInfo) = if isSome decl then (a1,elemInfo) else let val (a2,newInfo) = if not (!O_VALIDATE andalso hasDtd dtd) then (a1,elemInfo) else let val err = ERR_UNDECLARED(IT_ELEM,elem,LOC_STAG) val a1' = hookError(a1,(getPos q,err)) val newInfo = if not (!O_ERROR_MINIMIZE) then elemInfo else handleUndeclElement dtd eidx in (a1',newInfo) end in (checkElemName (a2,q) elem,newInfo) end val hscaq2 = parseSmay nil (c1,a1',q1) (*--------------------------------------------------------------*) (* yet are the indices of attributes encountered yet, old are *) (* the valid attributes specified yet, and todo are the defs of *) (* attributes yet to be specified. hadS indicates whether white *) (* space preceded. *) (*--------------------------------------------------------------*) fun doit (yet,old,todo) (hadS,(sp,(c,a,q))) = case c of 0wx3E (* #">" *) => (old,todo,sp,false,q,getChar(a,q)) | 0wx2F (* #"/" *) => let val (c1,a1,q1) = getChar(a,q) in if c1=0wx3E (* #">" *) then (old,todo,sp,true,q1,getChar(a1,q1)) else let val err = expectedOrEnded (expGt,LOC_STAG) c1 val a2 = hookError(a1,(getPos q1,err)) val (mt,caq2) = recoverSTag (c1,a2,q1) in (old,todo,sp,mt,q,caq2) end end | _ => if not (isNms c) then let val err = expectedOrEnded (expAttSTagEnd,LOC_STAG) c val a1 = hookError(a,(getPos q,err)) val (mt,caq1) = recoverSTag (c,a1,q) in (old,todo,sp,mt,q,caq1) end else let(* first parse the name of the attribute *) val (att,(c1,a1,q1)) = parseName (c,a,q) val a2 = if hadS then a1 else hookError(a1,(getPos q,ERR_MISSING_WHITE)) (* now get its index, check whether it already *) (* occurred and get its definition. *) val aidx = AttNot2Index dtd att val (hadIt,a3) = if member aidx yet then (true,hookError(a2,(getPos q,ERR_MULT_ATT_SPEC att))) else (false,a2) val (def,rest) = findAndDelete (fn (i,_,_,_) => i=aidx) todo val a4 = if isSome def orelse hadIt then a3 else handleUndeclAtt dtd (a3,q) (aidx,att,eidx,elem) (* consume the " = ", ignore errors *) val (eq,caq5 as (_,_,q5)) = parseEq (c1,a4,q1) handle SyntaxError caq => ([0wx3D],caq) (* now parse the attribute value *) val (literal,value,(c6,a6,q6)) = parseAttValue dtd caq5 (* possibly make a new AttSpec *) val space = rev sp val (new,a7) = if hadIt then (old,a6) else case def of NONE => if !O_VALIDATE andalso hasDtd dtd then (old,a6) else (let val (attVal,a7) = checkAttValue dtd (a6,q5) (defaultAttDef aidx,literal,value) in ((aidx,attVal,SOME(space,eq))::old,a7) end handle AttValue a => (old,a)) | SOME ad => let val (attVal,a7) = checkAttValue dtd (a6,q5) (ad,literal,value) in ((aidx,attVal,SOME(space,eq))::old,a7) end handle AttValue a => (old,a) val hscaq8 = parseSmay nil (c6,a7,q6) in doit (aidx::yet,new,rest) hscaq8 end handle NotFound (c,a,q) (* raised by parseAttValue above *) => let val err = expectedOrEnded (expLitQuote,LOC_STAG) c val a1 = hookError(a,(getPos q,err)) val (mt,caq1) = recoverSTag (c,a1,q) in (old,todo,sp,mt,q,caq1) end val (specd,todo,sp,empty,qe,(c3,a3,q3)) = doit (nil,nil,defs) hscaq2 val space = rev sp (* generate the defaults for unspecified attributes *) val (all,a4) = genMissingAtts dtd (a3,qe) (todo,rev specd) in ((((startPos,getPos q3),eidx,all,space,empty),elemInfo),(c3,a4,q3)) end (*--------------------------------------------------------------------*) (* skip a tag, the initial "<" or "" and *) (* "/>" if within a literal. *) (* *) (* print an error and finish if an entity end is found. *) (* *) (* return the remaining char and state. *) (*--------------------------------------------------------------------*) (* might raise: none *) (*--------------------------------------------------------------------*) fun skipTag loc aq = let fun do_lit ch (c,a,q) = if c=0wx00 then let val a1 = hookError(a,(getPos q,ERR_ENDED_BY_EE loc)) in (c,a1,q) end else if c=ch then doit (getChar(a,q)) else do_lit ch (getChar(a,q)) and doit (c,a,q) = case c of 0wx00 => let val a1 = hookError(a,(getPos q,ERR_ENDED_BY_EE loc)) in (c,a1,q) end | 0wx22 (* #"\""*) => do_lit c (getChar(a,q)) | 0wx27 (* #"'" *) => do_lit c (getChar(a,q)) | 0wx2F (* #"/" *) => (case getChar(a,q) of (0wx3E,a1,q1) (* #">" *) => getChar(a1,q1) | caq1 => doit caq1) | 0wx3E (* #">" *) => getChar(a,q) | _ => doit(getChar(a,q)) in doit (getChar aq) end end (* stop of ../../Parser/Parse/parseTags.sml *) (* start of ../../Parser/Parse/parseDecl.sml *) signature ParseDecl = sig (*---------------------------------------------------------------------- include ParseBase val parseName : UniChar.Char * AppData * State -> UniChar.Data * (UniChar.Char * AppData * State) val parseComment : Errors.Position -> AppData * State -> (UniChar.Char * AppData * State) val parseProcInstr : Errors.Position -> AppData * State -> (UniChar.Char * AppData * State) val skipS : UniChar.Char * AppData * State -> UniChar.Char * AppData * State val skipSopt : UniChar.Char * AppData * State -> UniChar.Char * AppData * State val skipSmay : UniChar.Char * AppData * State -> bool * (UniChar.Char * AppData * State) val openExtern : int * Uri.Uri -> AppData * State -> Encoding.Encoding * HookData.TextDecl option * (UniChar.Char * AppData * State) val openDocument : Uri.Uri option -> AppData -> Encoding.Encoding * HookData.XmlDecl option * (UniChar.Char * AppData * State) val openSubset : Uri.Uri -> AppData -> Encoding.Encoding * HookData.TextDecl option * (UniChar.Char * AppData * State) val skipCharRef : AppData * State -> (UniChar.Char * AppData * State) val skipReference : UniChar.Char * AppData * State -> (UniChar.Char * AppData * State) val parseGenRef : Dtd -> UniChar.Char * AppData * State -> (int * Base.GenEntity) * (AppData * State) val parseParRef : Dtd -> UniChar.Char * AppData * State -> (int * Base.ParEntity) * (AppData * State) val parseCharRefLit : UniChar.Data -> AppData * State -> UniChar.Data * (UniChar.Char * AppData * State) val skipPSopt : Dtd -> UniChar.Char * AppData * State -> UniChar.Char * AppData * State val skipTag : Errors.Location -> AppData * State -> (UniChar.Char * AppData * State) val parseETag : Dtd -> AppData * State -> int * UniChar.Data * Errors.Position * (UniChar.Char * AppData * State) val parseSTag : Dtd -> Errors.Position -> UniChar.Char * AppData * State -> (HookData.StartTagInfo * Base.ElemInfo) * (UniChar.Char * AppData * State) ----------------------------------------------------------------------*) include ParseTags val skipDecl : bool -> UniChar.Char * AppData * State -> UniChar.Char * AppData * State val parseExtIdSub : Dtd -> UniChar.Char * AppData * State -> Base.ExternalId * bool * (UniChar.Char * AppData * State) val parseEntityDecl : Dtd -> EntId * Errors.Position * bool -> UniChar.Char * AppData * State -> UniChar.Char * AppData * State val parseElementDecl : Dtd -> EntId * Errors.Position * bool -> UniChar.Char * AppData * State -> UniChar.Char * AppData * State val parseNotationDecl : Dtd -> EntId * Errors.Position * bool -> UniChar.Char * AppData * State -> UniChar.Char * AppData * State val parseAttListDecl : Dtd -> EntId * Errors.Position * bool -> UniChar.Char * AppData * State -> UniChar.Char * AppData * State end (*--------------------------------------------------------------------------*) (* Structure: ParseDecl *) (* *) (* Exceptions raised by functions in this structure: *) (* skipDecl : none *) (* parseExtIdSub : NotFound SyntaxError *) (* parseEntityDecl : none *) (* parseElementDecl : none *) (* parseNotationDecl : none *) (* parseAttListDecl : none *) (*--------------------------------------------------------------------------*) functor ParseDecl (structure ParseBase : ParseBase) : ParseDecl = struct structure ParseTags = ParseTags (structure ParseBase = ParseBase) open UtilInt UtilList Base Errors HookData ParseTags (*--------------------------------------------------------------------*) (* skip a markup declaration, the initial "" if within a literal. yake care of internal subset if *) (* the first arg is true. *) (* *) (* print an error and finish if an entity end is found. *) (* *) (* return the remaining char and state. *) (*--------------------------------------------------------------------*) (* might raise: none *) (*--------------------------------------------------------------------*) fun skipDecl hasSubset caq = let fun do_lit ch (c,a,q) = if c=0wx00 then (c,a,q) else if c=ch then getChar (a,q) else do_lit ch (getChar(a,q)) fun do_decl (c,a,q) = case c of 0wx00 => (c,a,q) | 0wx22 (* #"\""" *) => do_decl (do_lit c (getChar(a,q))) | 0wx27 (* #"'" *) => do_decl (do_lit c (getChar(a,q))) | 0wx3E (* #">" *) => getChar(a,q) | _ => do_decl (getChar(a,q)) fun do_subset (c,a,q) = case c of 0wx00 => (c,a,q) | 0wx3C (* #"<" *) => do_subset (do_decl (getChar(a,q))) | 0wx5D (* #"]" *) => getChar(a,q) | _ => do_subset (getChar(a,q)) fun doit (c,a,q) = case c of 0wx00 => (c,hookError(a,(getPos q,ERR_ENDED_BY_EE LOC_DECL)),q) | 0wx22 (* #"\"""*) => doit (do_lit c (getChar(a,q))) | 0wx27 (* #"'" *) => doit (do_lit c (getChar(a,q))) | 0wx3E (* #">" *) => getChar(a,q) | 0wx5B (* #"[" *) => if hasSubset then doit (do_subset (getChar(a,q))) else doit (getChar(a,q)) | _ => doit (getChar(a,q)) in doit caq end (*--------------------------------------------------------------------*) (* parse an external id, or a public id if the first arg is true. *) (* Cf. 4.2.2 and 4.7: *) (* *) (* [75] ExternalID ::= 'SYSTEM' S SystemLiteral *) (* | 'PUBLIC' S PubidLiteral S SystemLiteral *) (* *) (* [83] PublicID ::= 'PUBLIC' S PubidLiteral *) (* *) (* raise NotFound if no name is found first. *) (* print an error if white space is missing. *) (* print an error and raise SyntaxState if a wrong name is found. *) (* print an Error and raise SyntaxState if a required literal is not *) (* found (depends on optSys). *) (* *) (* return the public and system identifiers as string options, *) (* a boolean, whether whit space followed the external id, *) (* and the next character and the remaining state. *) (*--------------------------------------------------------------------*) (* might raise: NotFound SyntaxState *) (*--------------------------------------------------------------------*) fun parseExternalId dtd optSys (caq as (_,_,q))= let (* do not handle NotFound: in this case no extId was found *) val (name,caq1) = parseName caq val caq2 as (_,_,q2)= skipPS dtd caq1 in case name of [0wx50,0wx55,0wx42,0wx4c,0wx49,0wx43] => (* "PUBLIC" *) let val (pub,pquote,caq3) = parsePubidLiteral caq2 handle NotFound (c,a,q) => let val err = ERR_EXPECTED(expLitQuote,[c]) val a1 = hookError(a,(getPos q,err)) in raise SyntaxError (c,a1,q) end val (hadS,caq4 as (_,_,q4)) = skipPSmay dtd caq3 in let val (sys,squote,(c5,a5,q5)) = parseSystemLiteral caq4 val base = getUri q4 val a6 = if hadS then a5 else hookError(a5,(getPos q4,ERR_MISSING_WHITE)) val (hadS6,caq6) = skipPSmay dtd (c5,a6,q5) in (EXTID(SOME(pub,pquote),SOME(base,sys,squote)),hadS6,caq6) end handle NotFound (c,a,q) => (* no system id *) if optSys then (EXTID(SOME(pub,pquote),NONE),hadS,(c,a,q)) else let val a1 = hookError(a,(getPos q,ERR_EXPECTED(expLitQuote,[c]))) in raise SyntaxError (c,a1,q) end end | [0wx53,0wx59,0wx53,0wx54,0wx45,0wx4d] => (* "SYSTEM" *) let val (sys,squote,caq3) = parseSystemLiteral caq2 handle NotFound (c,a,q) => let val err = ERR_EXPECTED(expLitQuote,[c]) val a1 = hookError(a,(getPos q,err)) in raise SyntaxError (c,a1,q) end val base = getUri q2 val (hadS,caq4) = skipPSmay dtd caq3 in (EXTID(NONE,SOME(base,sys,squote)),hadS,caq4) end | _ => let val (c2,a2,q2) = caq2 val a3 = hookError(a2,(getPos q,ERR_EXPECTED(expExtId,name))) in raise SyntaxError (c2,a3,q2) end end (*--------------------------------------------------------------------*) (* parse an external id in an entity definition. Cf. 4.2.2: *) (* *) (* print an Error and raise SyntaxState if no external id is found. *) (*--------------------------------------------------------------------*) (* might raise: SyntaxState *) (*--------------------------------------------------------------------*) fun parseExtIdEnt dtd caq = parseExternalId dtd false caq handle NotFound (c,a,q) => let val err = ERR_EXPECTED(expLitQuotExt,[c]) in raise SyntaxError (c,hookError(a,(getPos q,err)),q) end (*--------------------------------------------------------------------*) (* parse an external or public id in a notation declaration. *) (* *) (* print an Error and raise SyntaxState if neither external nor *) (* public id is found. *) (*--------------------------------------------------------------------*) (* might raise: SyntaxState *) (*--------------------------------------------------------------------*) fun parseExtIdNot dtd caq = parseExternalId dtd true caq handle NotFound (c,a,q) => let val err = ERR_EXPECTED(expExtId,[c]) in raise SyntaxError (c,hookError(a,(getPos q,err)),q) end (*--------------------------------------------------------------------*) (* parse an external id for the external subset. *) (* *) (* raise NotFound if no external id is found. *) (*--------------------------------------------------------------------*) (* might raise: NotFound SyntaxState *) (*--------------------------------------------------------------------*) fun parseExtIdSub dtd caq = parseExternalId dtd false caq (*--------------------------------------------------------------------*) (* parse a parameter entity declaration, starting after the '%'. The *) (* unique entity id of the initial '<' is given as first arg. 4.2: *) (* *) (* [72] PEDecl ::= '' *) (* [74] PEDef ::= EntityValue | ExternalID *) (* *) (* (see also the comments for ParseDtd.parseMarkupDecl). *) (* *) (* print an error if white space is missing. *) (* print an error and raise SyntaxState if neither entity value nor *) (* external identifier is found. *) (* print an error and raise SyntaxState if the closing '>' is missing.*) (* print an error if the '>' is not in the same entity as the ' let val err = ERR_EXPECTED(expAnEntName,[c]) in raise SyntaxError (c,hookError(a,(getPos q,err)),q) end val idx = ParEnt2Index dtd name val caq3 = skipPS dtd caq2 val (ent,(c4,a4,q4)) = let val (ent,caq4) = parseEntityValue dtd PE_INTERN caq3 val caq5 = skipPSopt dtd caq4 in (ent,caq5) end handle NotFound caq => let val (extId,_,caq1) = parseExtIdEnt dtd caq in (PE_EXTERN extId,caq1) end val a5 = if useParamEnts() orelse not ext then addParEnt dtd (a4,q1) (idx,ent,ext) else a4 val a6 = hookDecl(a5,((startPos,getPos q4),DEC_PAR_ENT(idx,ent,ext))) in if c4<>0wx3E (* #">" *) then let val a7 = hookError(a6,(getPos q4,ERR_EXPECTED(expGt,[c4]))) in raise SyntaxError(c4,a7,q4) end else let val a7 = if not (!O_VALIDATE) orelse getEntId q4=startEnt then a6 else hookError(a6,(getPos q4,ERR_DECL_ENT_NESTING LOC_ENT_DECL)) in getChar(a7,q4) end end (*--------------------------------------------------------------------*) (* parse a general entity declaration, starting with the name. The *) (* unique entity id of the initial '<' is given as first arg. 4.2: *) (* *) (* [71] GEDecl ::= '' *) (* [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) *) (* *) (* [76] NDataDecl ::= S 'NDATA' S Name [ VC: Notation *) (* Declared ] *) (* *) (* If the NDataDecl is present, this is a general unparsed entity; *) (* otherwise it is a parsed entity. *) (* *) (* Validity Constraint: Notation Declared *) (* The Name must match the declared name of a notation. *) (* *) (* (see also the comments for ParseDtd.parseMarkupDecl). *) (* *) (* print an error if white space is missing. *) (* print an error and raise SyntaxState if neither entity value nor *) (* external identifier is found. *) (* print an error if name other then 'NDATA' is found after ext. id. *) (* print an error and raise SyntaxState if no name is found after the *) (* 'NDATA'. *) (* print an error if the notation is not declared. *) (* print an error and raise SyntaxState if the closing '>' is missing.*) (* print an error if the '>' is not in the same entity as the ' let val err = ERR_EXPECTED(expEntNamePero,[c]) in raise SyntaxError (c,hookError(a,(getPos q,err)),q) end val idx = GenEnt2Index dtd name val caq2 = skipPS dtd caq1 val (ent,expEnd,(c3,a3,q3)) = (*-----------------------------------------------------------*) (* Try for an internal entity. Then '>' must follow. *) (*-----------------------------------------------------------*) let val (ent,caq3) = parseEntityValue dtd GE_INTERN caq2 val caq4 = skipPSopt dtd caq3 in (ent,expGt,caq4) end handle NotFound cq => (* raised by parseEntityValue *) (*-----------------------------------------------------------*) (* Must be external. First parse the external identifier. *) (*-----------------------------------------------------------*) let val (extId,hadS,caq1 as (_,_,q1)) = parseExtIdEnt dtd caq2 in let (*-----------------------------------------------------*) (* Does a name follow? Then is must be 'NDATA' and the *) (* notation name follows. Thus the entity is unparsed. *) (* Also, only '>' may come next. *) (* NotFound is handled at the end of the let. *) (*-----------------------------------------------------*) val (key,(c2,a2,q2)) = parseName caq1 val a3 = if hadS then a2 else hookError(a2,(getPos q1,ERR_MISSING_WHITE)) val a4 = if key = [0wx4e,0wx44,0wx41,0wx54,0wx41] (* "NDATA" *) then a3 else hookError(a3,(getPos q1,ERR_EXPECTED(expGtNdata,key))) val caq5 as (_,_,q5) = skipPS dtd (c2,a4,q2) val (not,caq6) = parseName caq5 handle NotFound (c,a,q) => let val err = ERR_EXPECTED(expANotName,[c]) val a1 = hookError(a,(getPos q,err)) in raise SyntaxError (c,a1,q) end val notIdx = AttNot2Index dtd not val caq7 = skipPSopt dtd caq6 in (GE_UNPARSED(extId,notIdx,getPos q5),expGt,caq7) end handle NotFound caq => (*--------------------------------------------------------*) (* No 'NDATA' present, so it's parsed external entity. *) (* A 'NDATA' might have followed. *) (*--------------------------------------------------------*) (GE_EXTERN extId,expGtNdata,caq) end val a4 = if useParamEnts() orelse not ext then addGenEnt dtd (a3,q) (idx,ent,ext) else a3 val a5 = hookDecl(a4,((startPos,getPos q3),DEC_GEN_ENT(idx,ent,ext))) in if c3<>0wx3E (* #">" *) then let val a6 = hookError(a5,(getPos q3,ERR_EXPECTED(expGt,[c3]))) in raise SyntaxError(c3,a6,q3) end else let val a6 = if not (!O_VALIDATE) orelse getEntId q3=startEnt then a5 else hookError(a5,(getPos q3,ERR_DECL_ENT_NESTING LOC_ENT_DECL)) in getChar(a6,q3) end end (*--------------------------------------------------------------------*) (* parse an entity declaration, the initial '' *) (* [72] PEDecl ::= '' *) (* *) (* (see also the comments for ParseDtd.parseMarkupDecl). *) (* *) (* raise SyntaxState in case of a syntax error. *) (* print an error if white space is missing. *) (* *) (* print an error for entity end exceptions in subfunctions. *) (* catch syntax errors by recovering to the next possible state. *) (* *) (* pass control to parseParEntDecl or parseGenEntDecl, depending on *) (* whether the S is followed by a '%'. *) (* return the remaining char and state. *) (*--------------------------------------------------------------------*) (* might raise: none *) (*--------------------------------------------------------------------*) fun parseEntityDecl dtd pars caq = let val (hadPero,caq1) = skipPSdec dtd caq in if hadPero then parseParEntDecl dtd pars caq1 else parseGenEntDecl dtd pars caq1 end handle exn as SyntaxError (c,a,q) => let val a1 = if c=0wx00 then hookError(a,(getPos q,ERR_ENDED_BY_EE LOC_ENT_DECL)) else a in recoverDecl false (c,a1,q) end (*--------------------------------------------------------------------*) (* parse a notation declaration, the initial '' *) (* *) (* (see also the comments for ParseDtd.parseMarkupDecl). *) (* *) (* print an error and raise SyntaxState if no notation name, no *) (* external/public identifier or no final '>' is found. *) (* print an error if the '>' is not in the same entity as the ' let val err = ERR_EXPECTED(expANotName,[c]) in raise SyntaxError (c,hookError(a,(getPos q,err)),q) end val idx = AttNot2Index dtd name val caq3 = skipPS dtd caq2 val (extId,_,(c4,a4,q4)) = parseExtIdNot dtd caq3 val a5 = if useParamEnts() orelse not ext then addNotation dtd (a4,q1) (idx,extId) else a4 val a6 = hookDecl(a5,((startPos,getPos q4),DEC_NOTATION(idx,extId,ext))) in if c4<>0wx3E (* #">" *) then let val a7 = hookError(a6,(getPos q4,ERR_EXPECTED(expGt,[c4]))) in raise SyntaxError (c4,a7,q4) end else let val a7 = if not (!O_VALIDATE) orelse getEntId q4=startEnt then a6 else hookError(a6,(getPos q4,ERR_DECL_ENT_NESTING LOC_NOT_DECL)) in getChar(a7,q4) end end handle exn as SyntaxError(c,a,q) => let val a1 = if c=0wx00 then hookError(a,(getPos q,ERR_ENDED_BY_EE LOC_NOT_DECL)) else a in recoverDecl false (c,a1,q) end (*--------------------------------------------------------------------*) (* parse a mixed-content specification, the initial '(', S? and '#' *) (* already read. The unique id of the openening paren's entity is *) (* given as first arg. Cf. 3.2.1/2: *) (* *) (* Validity Constraint: Proper Group/PE Nesting *) (* Parameter-entity replacement text must be properly nested with *) (* parenthetized groups. That is to say, if either of the opening *) (* or closing parentheses in a choice, seq, or Mixed construct is *) (* contained in the replacement text for a parameter entity, both *) (* must be contained in the same replacement text. *) (* ... *) (* [51] Mixed ::= '(' S? '#PCDATA' [ VC: Proper Group/PE *) (* (S? '|' S? Name)* S? ')*' Nesting ] *) (* | '(' S? '#PCDATA' S? ')' [ VC: No Duplicate *) (* Types ] *) (* *) (* print an error and raise SyntaxState if no name is found first. *) (* print an error if a name other than 'PCDATA' is found. *) (* is found in the first place. *) (* print an error if element names are specified but no '*' follows. *) (* print an error if an element name is specified more than once. *) (* print an error and raise SyntaxState if neither '|' nor ')' is *) (* found after the 'PCDATA' or after an element name. *) (* print an error if the closing parenthesis is not in the same *) (* as the opening one. *) (* *) (* return the mixed-content specification, togther with the next *) (* character and state. *) (*--------------------------------------------------------------------*) (* might raise: SyntaxState *) (*--------------------------------------------------------------------*) fun parseMixed dtd lparEnt (caq as (_,_,q)) = let fun doit is (c,a,q) = case c of 0wx29 (* #")" *) => let val a1 = if not (!O_VALIDATE) orelse getEntId q=lparEnt then a else hookError(a,(getPos q,ERR_GROUP_ENT_NESTING LOC_MIXED)) in (rev is,getChar(a1,q)) end | 0wx7C (* #"|" *) => let val caq1 as (_,_,q1) = skipPSopt dtd (getChar(a,q)) val (name,(c2,a2,q2)) = parseName caq1 handle NotFound (c,a,q) => let val err = ERR_EXPECTED(expAName,[c]) val a1 = hookError(a,(getPos q,err)) in raise SyntaxError (c,a1,q) end val i = Element2Index dtd name val (newis,a3) = if not (member i is) then (i::is,a2) else let val a3 = if !O_VALIDATE then hookError(a2,(getPos q1,ERR_MULT_MIXED name)) else a2 in (is,a3) end val caq3 = skipPSopt dtd (c2,a3,q2) in doit newis caq3 end | _ => let val a1 = hookError(a,(getPos q,ERR_EXPECTED(expBarRpar,[c]))) in raise SyntaxError (c,a1,q) end val (name,(c1,a1,q1)) = parseName caq handle NotFound (c,a,q) => let val err = ERR_EXPECTED(expPcdata,[c]) in raise SyntaxError (c,hookError(a,(getPos q,err)),q) end val a2 = case name of [0wx50,0wx43,0wx44,0wx41,0wx54,0wx41] (* "PCDATA" *) => a1 | _ => hookError(a1,(getPos q,ERR_EXPECTED(expPcdata,name))) val caq2 = skipPSopt dtd (c1,a2,q1) val (is,(c3,a3,q3)) = doit nil caq2 val caq4 = if c3=0wx2A (* #"*" *) then getChar(a3,q3) else let val a4 = if null is then a3 else hookError(a3,(getPos q3,ERR_EXPECTED(expRep,[c3]))) in (c3,a4,q3) end in (CT_MIXED is,caq4) end (*--------------------------------------------------------------------*) (* parse an optional occurrence indicator afer a content particle or *) (* a content model, given as first argument. Cf. 3.2.1: *) (* *) (* [47] children ::= (choice | seq) ('?' | '*' | '+')? *) (* [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? *) (* *) (* return the (possibly modified) content particle, together with the *) (* next char and state. *) (*--------------------------------------------------------------------*) (* might raise: none *) (*--------------------------------------------------------------------*) fun parseOcc cm (c,a,q) = case c of 0wx3F (* #"?" *) => (CM_OPT cm,getChar(a,q)) | 0wx2A (* #"*" *) => (CM_REP cm,getChar(a,q)) | 0wx2B (* #"+" *) => (CM_PLUS cm,getChar(a,q)) | _ => (cm,(c,a,q)) (*--------------------------------------------------------------------*) (* parse a content particle. Cf. 3.2.1: *) (* *) (* Validity Constraint: Proper Group/PE Nesting *) (* Parameter-entity replacement text must be properly nested with *) (* parenthetized groups. ... *) (* *) (* (see also parseMixed) *) (* *) (* [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? *) (* [49] choice ::= '(' S? cp [ VC: Proper Group/ *) (* ( S? '|' S? cp )* S? ')' PE Nesting ] *) (* [50] seq ::= '(' S? cp [ VC: Proper Group/ *) (* ( S? ',' S? cp )* S? ')' PE Nesting ] *) (* *) (* print an error and raise SyntaxState if no element name or "(" is *) (* found in the first place. *) (* *) (* return the content particle together with the next char and state. *) (*--------------------------------------------------------------------*) (* might raise: SyntaxState *) (*--------------------------------------------------------------------*) fun parseCP dtd (c,a,q) = case c of 0wx28 (* #"(" *) => let val lparEnt = getEntId q val caq1 = skipPSopt dtd (getChar (a,q)) in parseGroup dtd lparEnt caq1 end | _ => (* must be an element name *) let val (name,caq1) = parseName (c,a,q) handle NotFound (c,a,q) => let val err = ERR_EXPECTED(expElemLpar,[c]) val a1 = hookError(a,(getPos q,err)) in raise SyntaxError (c,a1,q) end val idx = Element2Index dtd name in parseOcc (CM_ELEM idx) caq1 end (*--------------------------------------------------------------------*) (* parse a seq/choice, the first content particle and the connector *) (* already parsed; the connector, the type of group and the entity id *) (* of the opening parenthesis are given in first arg. Cf. 3.2.1: *) (* *) (* Validity Constraint: Proper Group/PE Nesting *) (* Parameter-entity replacement text must be properly nested with *) (* parenthetized groups. ... *) (* *) (* (see also parseMixed) *) (* *) (* [49] choice ::= '(' S? cp [ VC: Proper Group/ *) (* ( S? '|' S? cp )* S? ')' PE Nesting ] *) (* [50] seq ::= '(' S? cp [ VC: Proper Group/ *) (* ( S? ',' S? cp )* S? ')' PE Nesting ] *) (* *) (* print an error and raise SyntaxState if something other than the *) (* connector or ')' is found after a content particle. *) (* print an error if the closing parenthesis of a group is not in the *) (* same entity as the opening one. *) (* *) (* return the list of content particles parsed, together with the *) (* remaining character and state. *) (*--------------------------------------------------------------------*) (* might raise: SyntaxState *) (*--------------------------------------------------------------------*) and parseGroup' dtd (con,loc,lparEnt) caq = let fun doit caq = let val caq1 = skipPSopt dtd caq val (cp,caq2) = parseCP dtd caq1 val (c3,a3,q3) = skipPSopt dtd caq2 in if c3=0wx29 (* #")" ( *) then let val a4 = if not (!O_VALIDATE) orelse getEntId q3=lparEnt then a3 else hookError(a3,(getPos q3,ERR_GROUP_ENT_NESTING loc)) in ([cp],getChar(a4,q3)) end else (if c3=con then let val (cps,caq4) = doit (getChar(a3,q3)) in (cp::cps,caq4) end else let val err = ERR_EXPECTED(expConCRpar con,[c3]) in raise SyntaxError (c3,hookError(a3,(getPos q3,err)),q3) end) end in doit caq end (*--------------------------------------------------------------------*) (* parse a seq/choice, the first content particle parsed; the entity *) (* id of the opening parenthesis are given in first arg. Cf. 3.2.1: *) (* *) (* (see also parseMixed) *) (* *) (* [49] choice ::= '(' S? cp [ VC: Proper Group/ *) (* ( S? '|' S? cp )* S? ')' PE Nesting ] *) (* [50] seq ::= '(' S? cp [ VC: Proper Group/ *) (* ( S? ',' S? cp )* S? ')' PE Nesting ] *) (* *) (* print an error and raise SyntaxState if neither '|' nor ',' nor *) (* ')' follows the first content particle in a seq/choice. *) (* *) (* return the list of as a ContentModel, together with the remaining *) (* character and state. *) (*--------------------------------------------------------------------*) (* might raise: SyntaxState *) (*--------------------------------------------------------------------*) and parseGroup dtd lparEnt caq = let val (cp,caq1) = parseCP dtd caq val (c2,a2,q2) = skipPSopt dtd caq1 val (group,caq3) = case c2 of 0wx29 (* #")" *) => let val a3 = if not (!O_VALIDATE) orelse getEntId q2=lparEnt then a2 else hookError(a2,(getPos q2,ERR_GROUP_ENT_NESTING LOC_SEQ)) in (CM_SEQ[cp],getChar(a3,q2)) end | 0wx2C (* #"," *) => let val (cps,caq3) = parseGroup' dtd (c2,LOC_SEQ,lparEnt) (getChar(a2,q2)) in (CM_SEQ(cp::cps),caq3) end | 0wx7C (* #"|" *) => let val (cps,caq3) = parseGroup' dtd (c2,LOC_CHOICE,lparEnt) (getChar(a2,q2)) in (CM_ALT(cp::cps),caq3) end | _ => let val a3 = hookError(a2,(getPos q2,ERR_EXPECTED(expConRpar,[c2]))) in raise SyntaxError (c2,a3,q2) end in parseOcc group caq3 end (*--------------------------------------------------------------------*) (* parse a content specification. Cf. 3.2/3.2.1: *) (* *) (* Validity Constraint: Proper Group/PE Nesting *) (* Parameter-entity replacement text must be properly nested with *) (* parenthetized groups. That is to say, if either of the opening *) (* or closing parentheses in a choice, seq, or Mixed construct is *) (* contained in the replacement text for a parameter entity, both *) (* must be contained in the same replacement text. *) (* ... *) (* [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children *) (* *) (* [47] children ::= (choice | seq) ('?' | '*' | '+')? *) (* *) (* [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' [ VC:Proper *) (* [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' Group/PE *) (* Nesting ]*) (* *) (* [51] Mixed ::= '(' S? '#PCDATA' [ VC: Proper Group/PE *) (* (S? '|' S? Name)* S? ')*' Nesting ] *) (* | '(' S? '#PCDATA' S? ')' [ VC: No Duplicate *) (* Types ] *) (* *) (* print an error and raise SyntaxState if no children, Mixed, or *) (* name is found. *) (* print an error and assume ANY if an ambiguous content model is *) (* specified. *) (* print an error and assume ANY if a name other than EMPTY or ANY *) (* is found. *) (* print an error if the closing parenthesis of a Mixed is not in the *) (* same entity as the opening one. *) (* *) (* return the parsed content specification, togther with the next *) (* character and state. *) (*--------------------------------------------------------------------*) (* might raise: SyntaxState *) (*--------------------------------------------------------------------*) fun parseContentSpec dtd curr (c,a,q) = case c of 0wx28 (* #"(" *) => let val (c1,a1,q1) = skipPSopt dtd (getChar(a,q)) val lparEnt = getEntId q in if c1=0wx23 (* #"#" *) then parseMixed dtd lparEnt (getChar(a1,q1)) else let val (cm,(c2,a2,q2)) = parseGroup dtd lparEnt (c1,a1,q1) val (dfa,a3) = (makeDfa cm,a2) handle Ambiguous(a,n1,n2) => if !O_COMPATIBILITY then let val err = ERR_AMBIGUOUS(Index2Element dtd a,n1,n2) val a3 = hookError(a2,(getPos q,err)) val dfa = makeChoiceDfa cm in (dfa,a3) end else (makeAmbiguous cm,a2) handle DfaTooLarge max => let val a3 = if !O_DFA_WARN_TOO_LARGE then hookWarning (a2,(getPos q,WARN_DFA_TOO_LARGE(curr,max))) else a2 val dfa = makeChoiceDfa cm in (dfa,a3) end in (CT_ELEMENT(cm,dfa),(c2,a3,q2)) end end | _ => (* must be ANY or EMPTY *) let val (name,caq1 as (c1,a1,q1)) = parseName (c,a,q) handle NotFound (c,a,q) => let val err = ERR_EXPECTED(expContSpec,[c]) in raise SyntaxError(c,hookError(a,(getPos q,err)),q) end in case name of [0wx41,0wx4e,0wx59] (* "ANY" *) => (CT_ANY,caq1) | [0wx45,0wx4d,0wx50,0wx54,0wx59] (* "EMPTY" *) => (CT_EMPTY,caq1) | _ => let val a2 = hookError(a1,(getPos q,ERR_EXPECTED(expContSpec,name))) in (CT_ANY,(c1,a2,q1)) end end (*--------------------------------------------------------------------*) (* parse an element declaration, the initial '' Element Type *) (* Declaration ] *) (* *) (* (see also the comments for ParseDtd.parseMarkupDecl). *) (* *) (* print an error and raise SyntaxState if no element name, no *) (* content specification, or no final '>' is found. *) (* print an error if the '>' is not in the same entity as the ' let val err = ERR_EXPECTED(expAnElemName,[c]) in raise SyntaxError(c,hookError(a,(getPos q,err)),q) end val a3 = checkElemName (a2,q1) name val idx = Element2Index dtd name val caq3 = skipPS dtd (c2,a3,q2) val (contSpec,(c4,a4,q4)) = parseContentSpec dtd name caq3 val a5 = if useParamEnts() orelse not ext then addElement dtd (a4,q1) (idx,contSpec,ext) else a4 val a5' = hookDecl(a5,((startPos,getPos q4),DEC_ELEMENT(idx,contSpec,ext))) val (c6,a6,q6) = skipPSopt dtd (c4,a5',q4) in if c6<>0wx3E (* #">" *) then let val a7 = hookError(a6,(getPos q6,ERR_EXPECTED(expGt,[c6]))) in raise SyntaxError(c6,a7,q6) end else let val a7 = if not (!O_VALIDATE) orelse getEntId q6=startEnt then a6 else hookError(a6,(getPos q6,ERR_DECL_ENT_NESTING LOC_ELEM_DECL)) in getChar(a7,q6) end end handle exn as SyntaxError (c,a,q) => let val a1 = if c=0wx00 then hookError(a,(getPos q,ERR_ENDED_BY_EE LOC_ELEM_DECL)) else a in recoverDecl false (c,a1,q) end (*--------------------------------------------------------------------*) (* parse an enumerated attribute type, the '(' already consumed. the *) (* 1st arg is a string describing the attribute (nmtoken or notation),*) (* the 2nd arg is a function that parses a single token, the 3rd arg *) (* a function for converting the token to its index. 3.3.1: *) (* *) (* [58] NotationType ::= 'NOTATION' S *) (* '(' S? Name (S? '|' S? Name)* S? ')' *) (* [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' *) (* *) (* print an error and raise SyntaxState if no token is found after a *) (* '(' or '|', or if neither '|' nor ')' follows a token. *) (* *) (* return the (sorted) list of indices of the parsed tokens. *) (*--------------------------------------------------------------------*) (* might raise: SyntaxState *) (*--------------------------------------------------------------------*) fun parseEnumerated dtd (expWhat,parseToken,Token2Index) caq = let fun doit idxs caq = let val caq1 as (_,_,q1) = skipPSopt dtd caq val (nt,(c2,a2,q2)) = parseToken caq1 handle NotFound (c,a,q) => let val err = ERR_EXPECTED(expWhat,[c]) in raise SyntaxError(c,hookError(a,(getPos q,err)),q) end val (idx,a3) = Token2Index dtd (a2,q1) nt val (c4,a4,q4) = skipPSopt dtd (c2,a3,q2) val newIdxs = insertInt(idx,idxs) in case c4 of 0wx7C (* #"|" *) => doit newIdxs (getChar(a4,q4)) | 0wx29 (* #")" *) => (newIdxs,getChar(a4,q4)) | _ => let val a5 = hookError(a4,(getPos q4,ERR_EXPECTED(expBarRpar,[c4]))) in raise SyntaxError (c4,a5,q4) end end in doit nil caq end (*--------------------------------------------------------------------*) (* Convert a (name) token to its index as an enumerated attribute. *) (* 3.3.1: *) (* *) (* Validity Constraint: Notation Attributes *) (* ... all notation names in the declaration must be declared. *) (* *) (* print an error if a notation is not declared. *) (*--------------------------------------------------------------------*) (* might raise: SyntaxState *) (*--------------------------------------------------------------------*) fun Token2NmtokenIndex dtd (a,_) token = (AttNot2Index dtd token,a) fun Token2NotationIndex dtd (a,q) token = let val idx = AttNot2Index dtd token val a1 = if not (!O_VALIDATE) orelse hasNotation dtd idx then a else hookError(a,(getPos q,ERR_UNDECLARED(IT_NOTATION,token,LOC_NONE))) in (idx,a1) end (*--------------------------------------------------------------------*) (* parse an attribute type, the 1st arg being the element this decl. *) (* refers to. 3.3.1: *) (* *) (* [54] AttType ::= StringType | TokenizedType | EnumeratedType *) (* *) (* [55] StringType ::= 'CDATA' *) (* [56] TokenizedType ::= 'ID' [VC: One ID per Element Type ] *) (* | 'IDREF' *) (* | 'IDREFS' *) (* | 'ENTITY' *) (* | 'ENTITIES' *) (* | 'NMTOKEN' *) (* | 'NMTOKENS' *) (* *) (* Validity Constraint: One ID per Element Type *) (* No element type may have more than one ID attribute specified. *) (* *) (* Enumerated Attribute Types *) (* *) (* [57] EnumeratedType ::= NotationType | Enumeration *) (* [58] NotationType ::= 'NOTATION' S '(' ... *) (* [59] Enumeration ::= '(' ... *) (* *) (* print an error and raise SyntaxState if no '(', or name is found *) (* in the first place, or the name does not start an attribute type, *) (* or if no '(' follows a 'NOTATION'. *) (* print an error and assume NMTOKEN instead of ID if the element *) (* already has an ID attribute. *) (* *) (* return the attribute type together with the next char and state. *) (*--------------------------------------------------------------------*) (* might raise: SyntaxState *) (*--------------------------------------------------------------------*) fun parseAttType dtd elem (c,a,q) = if c=0wx28 (* #"(" *) then let val (idxs,caq1) = parseEnumerated dtd (expANameToken,parseNmtoken,Token2NmtokenIndex) (getChar(a,q)) in (AT_GROUP idxs,caq1) end else let val (name,caq1 as (c1,a1,q1)) = parseName (c,a,q) handle NotFound cq => let val err = ERR_EXPECTED(expAttType,[c]) in raise SyntaxError (c,hookError(a,(getPos q,err)),q) end in case name of [0wx43,0wx44,0wx41,0wx54,0wx41] (* "CDATA" *) => (AT_CDATA,caq1) | [0wx49,0wx44] (* "ID" *) => (AT_ID,caq1) | [0wx49,0wx44,0wx52,0wx45,0wx46] (* "IDREF" *) => (AT_IDREF,caq1) | [0wx49,0wx44,0wx52,0wx45,0wx46,0wx53] (* "IDREFS" *) => (AT_IDREFS,caq1) | [0wx45,0wx4e,0wx54,0wx49,0wx54,0wx59] (* "ENTITY" *) => (AT_ENTITY,caq1) | [0wx45,0wx4e,0wx54,0wx49,0wx54,0wx49,0wx45,0wx53] (* "ENTITIES" *) => (AT_ENTITIES,caq1) | [0wx4e,0wx4d,0wx54,0wx4f,0wx4b,0wx45,0wx4e] (* "NMTOKEN" *) => (AT_NMTOKEN,caq1) | [0wx4e,0wx4d,0wx54,0wx4f,0wx4b,0wx45,0wx4e,0wx53] (* "NMTOKEN" *) => (AT_NMTOKENS,caq1) | [0wx4e,0wx4f,0wx54,0wx41,0wx54,0wx49,0wx4f,0wx4e] (* "NOTATION" *) => let val (c2,a2,q2) = skipPSopt dtd caq1 in case c2 of 0wx28 (* #"(" *) => let val (idxs,caq3) = parseEnumerated dtd (expANotName,parseName,Token2NotationIndex) (getChar(a2,q2)) in (AT_NOTATION idxs,caq3) end | _ => let val err = ERR_EXPECTED(expLpar,[c2]) in raise SyntaxError(c2,hookError(a2,(getPos q2,err)),q2) end end | _ => let val a2 = hookError(a1,(getPos q,ERR_EXPECTED(expAttType,name))) in raise SyntaxError (c1,a2,q1) end end (*--------------------------------------------------------------------*) (* parse an attribute default, for an attribute whose type is given *) (* the 1st argument. Cf. 3.3.2: *) (* *) (* [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' *) (* | (('#FIXED' S)? AttValue) *) (* *) (* Validity Constraint: Attribute Default Legal *) (* The declared default value must meet the lexical constraints of *) (* the declared attribute type. *) (* *) (* and 3.3.1: *) (* *) (* Validity Constraint: ID Attribute Default *) (* An ID attribute must have a declared default of #IMPLIED or *) (* #REQUIRED. *) (* *) (* print an error and raise SyntaxState if no '#' or literal is found *) (* in the first place, or no name or a wrong name is found after the *) (* '#', or if no literal follows the 'FIXED'. *) (* print an error if white space is missing. *) (* print an error and assume IMPLIED if the default for an ID attrib. *) (* is not IMPLIED or REQUIRED. *) (* *) (* return the default together with the remaining char and state. *) (*--------------------------------------------------------------------*) (* might raise: SyntaxState *) (*--------------------------------------------------------------------*) fun parseDefaultDecl dtd (aidx,attType) (c,a,q) = if c=0wx23 (* #"#" *) then let val caq0 as (_,_,q0) = (getChar(a,q)) val (name,caq1) = parseName caq0 handle NotFound (c,a,q) => let val err = ERR_EXPECTED(expAttDefKey,[c]) in raise SyntaxError(c,hookError(a,(getPos q,err)),q) end in case name of [0wx46,0wx49,0wx58,0wx45,0wx44] (* "FIXED" *) => let val caq2 as (_,_,q2) = skipPS dtd caq1 val (lit,text,(c3,a3,q3)) = parseAttValue dtd caq2 handle NotFound (c,a,q) => let val err = ERR_EXPECTED(expLitQuote,[c]) val a1 = hookError(a,(getPos q,err)) in raise SyntaxError (c,a1,q) end in if !O_VALIDATE andalso isIdType attType then let val a4 = hookError(a3,(getPos q,ERR_ID_DEFAULT)) in (AD_IMPLIED,(c3,a4,q3)) end else let val (cv,(av,a4)) = makeAttValue dtd (a3,q2) (aidx,attType,false,true,text) in (AD_FIXED((lit,cv,av),(getPos q2,ref false)),(c3,a4,q3)) end handle AttValue a => (AD_IMPLIED,(c3,a,q3)) end | [0wx49,0wx4d,0wx50,0wx4c,0wx49,0wx45,0wx44] (* "IMPLIED" *) => (AD_IMPLIED,caq1) | [0wx52,0wx45,0wx51,0wx55,0wx49,0wx52,0wx45,0wx44] (* "REQUIRED" *) => (AD_REQUIRED,caq1) | _ => let val (c1,a1,q1) = caq1 val a2 = hookError(a1,(getPos q0,ERR_EXPECTED(expAttDefKey,name))) in raise SyntaxError (c1,a2,q1) end end else let val (lit,text,(c1,a1,q1)) = parseAttValue dtd (c,a,q) handle NotFound (c,a,q) => let val err = ERR_EXPECTED(expQuoteRni,[c]) val a1 = hookError(a,(getPos q,err)) in raise SyntaxError(c,a1,q) end in if !O_VALIDATE andalso isIdType attType then let val a2 = hookError(a1,(getPos q,ERR_ID_DEFAULT)) in (AD_IMPLIED,(c1,a2,q1)) end else let val (cv,(av,a2)) = makeAttValue dtd (a1,q) (aidx,attType,false,true,text) in (AD_DEFAULT((lit,cv,av),(getPos q,ref false)),(c1,a2,q1)) end handle AttValue a => (AD_IMPLIED,(c1,a,q1)) end (*--------------------------------------------------------------------*) (* parse an attribute definition, the referred element given as 1st *) (* argument. 3.3: *) (* *) (* [53] AttDef ::= S Name S AttType S DefaultDecl *) (* *) (* raise NotFound if no name is found (and thus no attribute def.) *) (* print an error if white space is missing. *) (* *) (* enter the attribute definition into the element table. *) (* return the next character and the remaining state. *) (*--------------------------------------------------------------------*) (* might raise: NotFound SyntaxState *) (*--------------------------------------------------------------------*) fun parseAttDef dtd (elem,ext) caq = let val (hadS,caq1 as (_,_,q1)) = skipPSmay dtd caq val (name,(c2,a2,q2)) = parseName caq1 (* NotFound falls through to the next level *) val a3 = if hadS then a2 else hookError(a2,(getPos q1,ERR_MISSING_WHITE)) val a4 = checkAttName (a3,q1) name val idx = AttNot2Index dtd name val caq5 = skipPS dtd (c2,a4,q2) val (attType,caq6) = parseAttType dtd elem caq5 val caq7 = skipPS dtd caq6 val (attDef,(c8,a8,q8)) = parseDefaultDecl dtd (idx,attType) caq7 val a9 = if useParamEnts() orelse not ext then addAttribute dtd (a8,q1) (elem,(idx,attType,attDef,ext)) else a8 in ((idx,attType,attDef),(c8,a9,q8)) end (*--------------------------------------------------------------------*) (* parse an attribute-list declaration, the initial '' *) (* *) (* (see also the comments for ParseDtd.parseMarkupDecl). *) (* *) (* check whether the element already had an attlist declaration. (cf. *) (* DtdElements.enterAttDecl) *) (* *) (* print an error and raise SyntaxState if no element name, or no *) (* final '>' is found. *) (* print an error if the '>' is not in the same entity as the ' let val err = ERR_EXPECTED(expAnElemName,[c]) in raise SyntaxError (c,hookError(a,(getPos q,err)),q) end val a3 = checkElemName (a2,q1) name val idx = Element2Index dtd name val a4 = if !O_VALIDATE orelse not ext then enterAttList dtd (a3,q1) idx else a3 fun doit attDefs caq = let val (attDef,caq1) = parseAttDef dtd (idx,ext) caq handle NotFound (c,a,q) => raise NotFound (c,hookDecl(a,((startPos,getPos q),DEC_ATTLIST(idx,rev attDefs,ext))),q) | SyntaxError (c,a,q) => raise SyntaxError (c,hookDecl(a,((startPos,getPos q),DEC_ATTLIST(idx,rev attDefs,ext))),q) in doit (attDef::attDefs) caq1 end val (c5,a5,q5) = doit nil (c2,a4,q2) handle NotFound caq => caq in if c5 <> 0wx3E (* #">" *) then let val a6 = hookError(a5,(getPos q5,ERR_EXPECTED(expAttNameGt,[c5]))) in raise SyntaxError (c5,a6,q5) end else let val a6 = if not (!O_VALIDATE) orelse getEntId q5=startEnt then a5 else hookError(a5,(getPos q5,ERR_DECL_ENT_NESTING LOC_ATT_DECL)) in getChar(a6,q5) end end handle exn as SyntaxError (c,a,q) => let val a1 = if c=0wx00 then hookError(a,(getPos q,ERR_ENDED_BY_EE LOC_ATT_DECL)) else a in recoverDecl false (c,a,q) end end (* stop of ../../Parser/Parse/parseDecl.sml *) (* start of ../../Parser/Parse/parseDtd.sml *) signature ParseDtd = sig (*---------------------------------------------------------------------- include ParseBase val parseName : UniChar.Char * AppData * State -> UniChar.Data * (UniChar.Char * AppData * State) val openExtern : int * Uri.Uri -> AppData * State -> Encoding.Encoding * HookData.TextDecl option * (UniChar.Char * AppData * State) val openDocument : Uri.Uri option -> AppData -> Encoding.Encoding * HookData.XmlDecl option * (UniChar.Char * AppData * State) val skipCharRef : AppData * State -> (UniChar.Char * AppData * State) val skipReference : UniChar.Char * AppData * State -> (UniChar.Char * AppData * State) val parseGenRef : Dtd -> UniChar.Char * AppData * State -> (int * Base.GenEntity) * (AppData * State) val parseCharRefLit : UniChar.Data -> AppData * State -> UniChar.Data * (UniChar.Char * AppData * State) val parseComment : Errors.Position -> AppData * State -> (UniChar.Char * AppData * State) val parseProcInstr : Errors.Position -> AppData * State -> (UniChar.Char * AppData * State) val skipTag : Errors.Location -> AppData * State -> (UniChar.Char * AppData * State) val parseETag : Dtd -> AppData * State -> int * UniChar.Data * Errors.Position * (UniChar.Char * AppData * State) val parseSTag : Dtd -> Errors.Position -> UniChar.Char * AppData * State -> (HookData.StartTagInfo * Base.ElemInfo) * (UniChar.Char * AppData * State) val skipDecl : bool -> UniChar.Char * AppData * State -> UniChar.Char * AppData * State ----------------------------------------------------------------------*) include ParseDecl val parseDocTypeDecl : Dtd -> (UniChar.Char * AppData * State) -> int option * (UniChar.Char * AppData * State) end (*--------------------------------------------------------------------------*) (* Structure: ParseDtd *) (* *) (* Exceptions raised by functions in this structure: *) (* parseDocTypeDecl : none *) (*--------------------------------------------------------------------------*) functor ParseDtd (structure ParseBase : ParseBase) : ParseDtd = struct structure ParseDecl = ParseDecl (structure ParseBase = ParseBase) open Base UniChar Errors ParseDecl (*--------------------------------------------------------------------*) (* parse a markup declaration other than a processing instruction, *) (* " (* #"-" *) let val (c1,a1,q1) = getChar (a,q) in if c1<>0wx2D (* #"-" *) then let val a2 = hookError(a1,(getPos q1,ERR_EXPECTED(expDash,[c1]))) in recoverDecl false (c1,a2,q1) end else parseComment startPos (a1,q1) end | _ => let val (name,caq1) = parseName (c,a,q) handle NotFound (c,a,q) => let val err = ERR_EXPECTED(expStartMarkup,[c]) val a1 = hookError(a,(getPos q,err)) in raise SyntaxError (c,a1,q) end val ext = hasExternal dtd in case name of [0wx45,0wx4c,0wx45,0wx4d,0wx45,0wx4e,0wx54] (* "ELEMENT" *) => parseElementDecl dtd (startEnt,startPos,ext) caq1 | [0wx41,0wx54,0wx54,0wx4c,0wx49,0wx53,0wx54] (* "ATTLIST" *) => parseAttListDecl dtd (startEnt,startPos,ext) caq1 | [0wx4e,0wx4f,0wx54,0wx41,0wx54,0wx49,0wx4f,0wx4e] (* "NOTATION" *) => parseNotationDecl dtd (startEnt,startPos,ext) caq1 | [0wx45,0wx4e,0wx54,0wx49,0wx54,0wx59] (* "ENTITY" *) => parseEntityDecl dtd (startEnt,startPos,ext) caq1 | _ => let val (c1,a1,q1) = caq1 val err = ERR_EXPECTED(expStartMarkup,name) val a2 = hookError(a1,(getPos q,err)) in recoverDecl false (c1,a2,q1) end end (*--------------------------------------------------------------------*) (* skip an ignored section, starting after the '". 3.4: *) (* *) (* [63] ignoreSect ::= '' *) (* [64] ignoreSectContents ::= Ignore ('' Ignore)* *) (* [65] Ignore ::= Char* - (Char* ('') Char* ) *) (* *) (* ... If the keyword of the conditional section is IGNORE, then *) (* the contents of the conditional section are not logically part *) (* of the DTD. Note that for reliable parsing, the contents of even *) (* ignored conditional sections must be read in order to detect *) (* nested conditional sections and ensure that the end of the *) (* outermost (ignored) conditional section is properly detected. *) (* If a conditional section with a keyword of INCLUDE occurs within *) (* a larger conditional section with a keyword of IGNORE, both the *) (* outer and the inner conditional sections are ignored. *) (* *) (* print an error an finish if an entity end is encountered. *) (* *) (* return the next char and state. *) (*--------------------------------------------------------------------*) (* might raise: none *) (*--------------------------------------------------------------------*) fun skipIgnored caq = let (*--------------------------------------------------------------*) (* level counts the nesting of conditional sections. *) (* if the second char after a "<" ("]") is not a "[" ("]"), it *) (* can nevertheless start another delimiter and is therefore *) (* fed into a recursive call of doit. *) (*--------------------------------------------------------------*) fun doit level (c,a,q) = case c of 0wx00 => (c,hookError(a,(getPos q,ERR_ENDED_BY_EE LOC_IGNORED)),q) | 0wx3C (* #"<" *) => let val (c1,a1,q1) = getChar (a,q) in if c1=0wx21 (* #"!" *) then let val (c2,a2,q2) = (getChar(a1,q1)) in if c2=0wx5B (* #"[" *) then doit (level+1) (getChar(a2,q2)) else doit level (c2,a2,q2) end else doit level (c1,a1,q1) end | 0wx5D (* #"]" *) => let val (c1,a1,q1) = getChar (a,q) in if c1=0wx5D (* #"]" *) then doit' level (getChar (a1,q1)) else doit level (c1,a1,q1) end | _ => doit level (getChar (a,q)) (*--------------------------------------------------------------*) (* if the second "]" is followed by a "]", then this might be *) (* the real second "]". Therefore doit' loops as long as it *) (* finds "]"'s. *) (*--------------------------------------------------------------*) and doit' level (c,a,q) = case c of 0wx3E (* #">" *) => if level>0 then doit (level-1) (getChar (a,q)) else getChar (a,q) | 0wx5D (* #"]" *) => doit' level (getChar (a,q)) | _ => doit level (c,a,q) in doit 0 caq end (*--------------------------------------------------------------------*) (* parse the internal or external subset of the dtd. handle included *) (* sections by counting their nesting level. Cf 2.8: *) (* *) (* Validity Constraint: Proper Declaration/PE Nesting *) (* Parameter-entity replacement text must be properly nested with *) (* markup declarations. That is to say, if either the first *) (* character or the last character of a markup declaration *) (* (markupdecl above) is contained in the replacement text for a *) (* parameter-entity reference, both must be contained in the same *) (* replacement text. *) (* ... *) (* [28] doctypedecl ::= '' *) (* [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl *) (* | NotationDecl | PI | Comment *) (* [30] extSubset ::= TextDecl? extSubsetDecl *) (* [31] extSubsetDecl ::= ( markupdecl | conditionalSect *) (* | PEReference | S )* *) (* and 3.4: *) (* *) (* [61] conditionalSect ::= includeSect | ignoreSect *) (* [62] includeSect ::= '' *) (* [63] ignoreSect ::= '' *) (* *) (* print an error and finish if the end of document is encountered in *) (* the internal subset. *) (* print an error and raise SyntaxState if a "<" is not followed by a *) (* "!" or a "?". *) (* print an error and raise SyntaxState if a "]" is not followed by *) (* "]>". *) (* print an error if a "" is found outside an included section. *) (* print an error an raise SyntaxState if something other than a *) (* markup declaration, parameter entity reference, white space or *) (* a conditional section is encountered. *) (* print an error and raise SyntaxState if a " (ws,(c,a,q)) | 0wx09 => doit false (c::ws) (getChar(a,q)) | 0wx0A => doit false (c::ws) (getChar(a,q)) | 0wx20 => doit false (c::ws) (getChar(a,q)) | 0wx25 => (ws,(c,a,q)) | 0wx3C => (ws,(c,a,q)) | 0wx5D => (ws,(c,a,q)) | _ => if hadError then doit true ws (getChar(a,q)) else let val err = ERR_FORBIDDEN_HERE(IT_DATA nil,LOC_SUBSET) val a1 = hookError (a,(getPos q,err)) in doit true ws (getChar(a1,q)) end val (ws,(c1,a1,q1)) = doit false nil caq val a2 = if null ws then a1 else hookWhite(a1,Data2Vector (rev ws)) in (c1,a2,q1) end fun doit cond (c,a,q) = case c of 0wx00 => if isSpecial q (*---------------------------------------------------*) (* the external subset ends at and of special entity.*) (* so does the internal subset, but with error. *) (*---------------------------------------------------*) then let val a1 = if inDocEntity q then hookError(a,(getPos q,ERR_ENDED_BY_EE LOC_INT_SUBSET)) else if cond=0 then a else hookError(a,(getPos q,ERR_ENDED_BY_EE LOC_INCLUDED)) in (c,a1,q) end else let val a1 = hookEntEnd (a,getPos q) in doit cond (getChar(a1,q)) end (* ignore errors in parameter references -----------------*) | 0wx25 (* #"%" *) => let val caq2 = let val ((id,ent),(a1,q1)) = parseParRef dtd (getChar(a,q)) in if !O_VALIDATE orelse !O_INCLUDE_PARAM_ENTS then case ent of PE_NULL => getChar(a1,q1) | PE_INTERN(_,rep) => let val q2 = pushIntern(q1,id,true,rep) val a2 = hookParRef(a1,((getPos q,getPos q1),id,ent,true)) in getChar(a2,q2) end | PE_EXTERN extId => let val a2 = hookParRef(a1,((getPos q,getPos q1),id,ent,true)) val caq3 = #3(openExtern (id,true,resolveExtId extId) (a2,q1)) handle CantOpenFile(fmsg,a) => let val err = ERR_NO_SUCH_FILE fmsg val a1 = hookError(a,(getPos q1,err)) val a2 = hookEntEnd (a1,getPos q1) in (getChar(a2,q1)) end in caq3 end (* changed 080600: setExternal is already called by parseParRef *) else let val a2 = hookParRef(a1,((getPos q,getPos q1),id,ent,false)) in getChar(a2,q1) end end handle SyntaxError caq => caq | NoSuchEntity aq => getChar aq in doit cond caq2 end | 0wx3C (* #"<" *) => let val (c1,a1,q1) = getChar(a,q) in case c1 of 0wx3F => (* #"?" *) let val caq2 = parseProcInstr (getPos q) (a1,q1) in doit cond caq2 end | 0wx21 => (* #"!" *) let val (c2,a2,q2) = (getChar(a1,q1)) in if c2=0wx5B (* #"[" *) then do_cond cond q (a2,q2) else let val caq3 = parseMarkupDecl dtd (getEntId q,getPos q) (c2,a2,q2) in doit cond caq3 end end | _ => let val err = ERR_EXPECTED(expExclQuest,[c1]) val a2 = hookError(a1,(getPos q1,err)) val caq3 = recoverDecl false (c1,a2,q1) in doit cond caq3 end end | 0wx5D (* #"]" *) => do_brack cond q (getChar(a,q)) | _ => let val caq1 = do_data (c,a,q) in doit cond caq1 end and do_brack cond q0 (c,a,q) = if inDocEntity q then (c,a,q) else if c=0wx5D (* #"]" *) then let val (c1,a1,q1) = getChar(a,q) in if c1=0wx3E (* #">" *) (* ignore wrong "]]>"'s ------------------*) then if cond=0 then let val err = ERR_FORBIDDEN_HERE(IT_DATA [c,c,c1], LOC_OUT_COND) val a2 = hookError(a1,(getPos q0,err)) in doit cond (getChar(a2,q1)) end else doit (cond-1) (getChar(a1,q1)) (* the second "]" may start another "]]>" ---*) else let val a2 = hookError(a1,(getPos q1,ERR_EXPECTED(expGt,[c1]))) in do_brack cond q (c1,a2,q1) end end else let val a1 = hookError(a,(getPos q,ERR_EXPECTED(expRbrack,[c]))) in doit cond (c,a1,q) end and do_cond cond q0 (a,q) = let (* marked sections are forbidden in the internal subset. -*) val inInt = inDocEntity q val a1 = if inInt then hookError (a,(getPos q0,ERR_FORBIDDEN_HERE (IT_COND,LOC_INT_SUBSET))) else a val caq2 as (_,_,q2) = skipPSopt dtd (getChar(a1,q)) val (status,caq3) = let val (name,(c3,a3,q3)) = parseName caq2 (* ignore sections with bad status keyword ---------*) val (status,a4) = case name of [0wx49,0wx47,0wx4e,0wx4f,0wx52,0wx45] => (IGNORE,a3) | [0wx49,0wx4e,0wx43,0wx4c,0wx55,0wx44,0wx45] => (INCLUDE,a3) | _ => let val err = ERR_EXPECTED(expCondStatus,name) val a4 = hookError(a3,(getPos q2,err)) in (IGNORE,a4) end val (c5,a5,q5) = skipPSopt dtd (c3,a4,q3) in (* ignore sections without "[" after keyword -------*) if c5=0wx5B then (status,getChar(a5,q5)) else let val a6 = hookError(a5,(getPos q5,ERR_EXPECTED(expLbrack,[c5]))) in (IGNORE,(c5,a6,q5)) end end handle NotFound (c,a,q) => let val err = ERR_EXPECTED(expCondStatus,[c]) val a1 = hookError(a,(getPos q,err)) in (IGNORE,(c,a1,q)) end in (* ignore sections in the internal subset ----------------*) case (status,inInt) of (INCLUDE,_) => doit (cond+1) caq3 | (_,_) => doit cond (skipIgnored caq3) end in doit 0 caq end (*--------------------------------------------------------------------*) (* parse the internal subset of the dtd. Cf 2.8: *) (* *) (* return the remaining character and state. *) (*--------------------------------------------------------------------*) (* might raise: none *) (*--------------------------------------------------------------------*) fun parseInternalSubset dtd (a,q) = let val a1 = hookSubset (a,getPos q) in parseSubset dtd (getChar(a1,q)) end (*--------------------------------------------------------------------*) (* parse the external subset of the dtd, the filename given as first *) (* argument. handle included sections by counting their nesting level.*) (* the file is opened on its own stack, and closed at the end. *) (* Cf 2.8: *) (* *) (* print an error and do nothing if the file cannot be opened. *) (* *) (* return nothing. *) (*--------------------------------------------------------------------*) (* might raise: none *) (*--------------------------------------------------------------------*) fun parseExternalSubset dtd (a,q) extId = let val uri = resolveExtId extId val (enc,textDecl,(c1,a1,q1)) = openSubset uri a val a2 = hookExtSubset (a1,(uri,enc,textDecl)) val (_,a3,q3) = parseSubset dtd (c1,a2,q1) val _ = closeAll q3 in a3 end handle CantOpenFile(fmsg,a) => hookError(a,(getPos q,ERR_NO_SUCH_FILE fmsg)) (*--------------------------------------------------------------------*) (* Parse the document type declaration, the ' *) (* *) (* print an error and raise SyntaxState if no name is found. *) (* print an error and raise SyntaxState if no final ">" is found. *) (* external identifier is found. *) (* print an error if white space is missing. *) (* *) (* return nothing. *) (*--------------------------------------------------------------------*) (* might raise: none *) (*--------------------------------------------------------------------*) fun parseDocTypeDecl dtd caq = let val _ = setHasDtd dtd val caq1 = skipS caq val (doc,caq2) = parseName caq1 handle NotFound (c,a,q) => let val err = ERR_EXPECTED(expAName,[c]) in raise SyntaxError (c,hookError(a,(getPos q,err)),q) end val idx = Element2Index dtd doc val (hadS,caq3 as (_,_,q3)) = skipSmay caq2 val (ext,(c4,a4,q4)) = let val (extId,_,(c4,a4,q4)) = parseExtIdSub dtd caq3 val a5 = if hadS then a4 else hookError(a4,(getPos q3,ERR_MISSING_WHITE)) in (SOME extId,(c4,a5,q4)) end handle NotFound caq => (NONE,caq) val a4' = hookDocType(a4,(idx,ext)) val (c5,a5,q5) = case c4 of 0wx5B (* #"[" *) => let val caq5 = parseInternalSubset dtd (a4',q4) in skipSopt caq5 end | _ => (c4,a4',q4) val a6 = case ext of NONE => a5 | SOME extId => let val _ = setExternal dtd in if !O_VALIDATE orelse !O_INCLUDE_PARAM_ENTS then parseExternalSubset dtd (a5,q5) extId else a5 end val a7 = checkMultEnum dtd (a6,q5) val a7'= checkPreDefined dtd (a7,q5) val a8 = checkUnparsed dtd a7' val (c9,a9,q9) = if c5=0wx3E (* #">" *) then getChar(a8,q5) else let val err = expectedOrEnded(expGt,LOC_DOC_DECL) c5 val a9 = hookError(a8,(getPos q5,err)) in recoverDecl false (c5,a9,q5) end in (SOME idx,(c9,hookEndDtd(a9,getPos q9),q9)) end handle exn as SyntaxError(c,a,q) => let val a1 = if c=0wx00 then hookError(a,(getPos q,ERR_ENDED_BY_EE LOC_DOC_DECL)) else a val (c2,a2,q2) = recoverDecl true (c,a1,q) in (NONE,(c2,hookEndDtd(a2,getPos q2),q2)) end end (* stop of ../../Parser/Parse/parseDtd.sml *) (* start of ../../Parser/Parse/parseContent.sml *) signature ParseContent = sig (*---------------------------------------------------------------------- include ParseBase val parseName : UniChar.Char * AppData * State -> UniChar.Data * (UniChar.Char * AppData * State) val openDocument : Uri.Uri option -> AppData -> Encoding.Encoding * HookData.XmlDecl option * (UniChar.Char * AppData * State) val skipCharRef : AppData * State -> (UniChar.Char * AppData * State) val skipReference : UniChar.Char * AppData * State -> (UniChar.Char * AppData * State) val parseComment : Errors.Position -> AppData * State -> (UniChar.Char * AppData * State) val parseProcInstr : Errors.Position -> AppData * State -> (UniChar.Char * AppData * State) val skipTag : Errors.Location -> AppData * State -> (UniChar.Char * AppData * State) val parseSTag : Dtd -> Errors.Position -> UniChar.Char * AppData * State -> (HookData.StartTagInfo * Base.ElemInfo) * (UniChar.Char * AppData * State) val skipDecl : bool -> UniChar.Char * AppData * State -> UniChar.Char * AppData * State val parseDocTypeDecl : Dtd -> (UniChar.Char * AppData * State) -> int option * (UniChar.Char * AppData * State) ----------------------------------------------------------------------*) include ParseDtd val skipBadSection : UniChar.Char * AppData * State -> (UniChar.Char * AppData * State) val parseElement : Dtd * int list * State * (HookData.StartTagInfo * Base.ElemInfo) * (UniChar.Char * AppData * State) -> (int * UniChar.Data * Errors.Position * Errors.Position) option * (UniChar.Char * AppData * State) end (*--------------------------------------------------------------------------*) (* Structure: ParseContent *) (* *) (* Exceptions raised by functions in this structure: *) (* skipBadSection : none *) (* parseElement : none *) (*--------------------------------------------------------------------------*) functor ParseContent (structure ParseBase : ParseBase) : ParseContent = struct structure ParseDtd = ParseDtd (structure ParseBase = ParseBase) open Base Errors UniChar UniClasses UtilList ParseDtd val THIS_MODULE = "ParseContent" val DATA_BUFSIZE = 1024 val dataBuffer = Array.array(DATA_BUFSIZE,0w0:UniChar.Char) (*--------------------------------------------------------------------*) (* skip a cdata section, the initial "' Char* )) [[ *) (* [21] CDEnd ::= ']]>' *) (* *) (* don't care abeout whether "CDATA[" is present. just skip until the *) (* next "]]>" or entity end. *) (* *) (* return the remaining char and state. *) (*--------------------------------------------------------------------*) (* might raise: none *) (*--------------------------------------------------------------------*) fun skipBadSection caq = let(*--------------------------------------------------------------*) (* for a sequence of "]"s, check whether the last two are *) (* followed by a ">" *) (*--------------------------------------------------------------*) fun checkEnd aq = let val (c1,a1,q1) = getChar aq in case c1 of 0wx3E (* #">" *) => getChar(a1,q1) | 0wx5D (* #"]" *) => checkEnd(a1,q1) | _ => doit(c1,a1,q1) end and doit (c,a,q) = case c of 0wx00 => let val a1 = hookError(a,(getPos q,ERR_ENDED_BY_EE LOC_CDATA)) in (c,a1,q) end | 0wx5D (* #"]" *) => let val (c1,a1,q1) = getChar(a,q) in if c1=0wx5D (* #"]" *) then checkEnd(a1,q1) else doit (c1,a1,q1) end | _ => doit (getChar(a,q)) in doit caq end (*--------------------------------------------------------------------*) (* parse a cdata section, the initial "' Char* )) [[ *) (* [21] CDEnd ::= ']]>' *) (* *) (* print an error and finish if an entity end is found. *) (* *) (* return the data as a Vector option and the next char & state. *) (*--------------------------------------------------------------------*) (* might raise: none *) (*--------------------------------------------------------------------*) fun parseCDataSection' (aq as (_,q)) = let (*--------------------------------------------------------------*) (* for a sequence of "]"s, check whether the last two are *) (* followed by a ">" *) (*--------------------------------------------------------------*) fun doEnd (text,q0,q1) (a2,q2) = let val (c3,a3,q3) = getChar (a2,q2) in case c3 of 0wx00 => let val a4 = hookError(a3,(getPos q3,ERR_ENDED_BY_EE LOC_CDATA)) in (0wx5D::text,getPos q2,(c3,a4,q3)) end | 0wx3E => (* #">" *) (text,getPos q0,getChar(a3,q3)) | 0wx5D => doEnd (0wx5D::text,q1,q2) (a3,q3) | _ => doit (c3::0wx5D::0wx5D::text) (a3,q3) end and doBrack (text,q0) (a1,q1) = let val (c2,a2,q2) = getChar(a1,q1) in case c2 of 0wx00 => let val a3 = hookError(a2,(getPos q2,ERR_ENDED_BY_EE LOC_CDATA)) in (0wx5D::text,getPos q1,(c2,a3,q2)) end | 0wx5D (* #"]" *) => doEnd (text,q0,q1) (a2,q2) | _ => doit (c2::0wx5D::text) (a2,q2) end and doit text (a,q) = let val (c1,a1,q1) = getChar(a,q) in case c1 of 0wx00 => let val a2 = hookError(a1,(getPos q1,ERR_ENDED_BY_EE LOC_CDATA)) in (text,getPos q,(c1,a2,q1)) end | 0wx5D (* #"]" *) => doBrack (text,q) (a1,q1) | _ => doit (c1::text) (a1,q1) end val (c1,a1,q1) = getChar aq val startPos = getPos q1 val (cs,endPos,(c2,a2,q2)) = case c1 of 0wx00 => let val a2 = hookError(a1,(getPos q1,ERR_ENDED_BY_EE LOC_CDATA)) in (nil,getPos q,(c1,a2,q1)) end | 0wx5D (* #"]" *) => doBrack (nil,q) (a1,q1) | _ => doit [c1] (a1,q1) val text = Data2Vector(rev cs) val a3 = hookCData(a1,((startPos,endPos),text)) in (c2,a3,q2) end (*--------------------------------------------------------------------*) (* parse a cdata section, the initial "' Char* )) [[ *) (* [21] CDEnd ::= ']]>' *) (* *) (* print an error and skip the section if no name or a name other *) (* than CDATA comes first, or no '[' follows the name. *) (* *) (* return the text of the section together with the remaining state. *) (*--------------------------------------------------------------------*) (* might raise: none *) (*--------------------------------------------------------------------*) fun parseCDataSection startPos aq = let val caq0 as (_,_,q0) = (getChar aq) val (name,(c1,a1,q1)) = parseName caq0 handle NotFound (c,a,q) => let val err = expectedOrEnded(expCdata,LOC_CDATA) c in raise SyntaxError(c,hookError(a,(getPos q,err)),q) end val _ = if name = [0wx43,0wx44,0wx41,0wx54,0wx41] (* "CDATA" *) then () else let val err = ERR_EXPECTED(expCdata,name) in raise SyntaxError(c1,hookError(a1,(getPos q0,err)),q1) end val _ = if c1=0wx5B (* #"[" *) then () else let val err = expectedOrEnded(expLbrack,LOC_CDATA) c1 in raise SyntaxError(c1,hookError(a1,(getPos q1,err)),q1) end in parseCDataSection'(a1,q1) end handle SyntaxError caq => skipBadSection caq (*--------------------------------------------------------------------*) (* parse element or empty content. The second arg holds the unique *) (* number of the element's first characters's entity, the index of *) (* the current element, and the dfa for its content. Cf. 3: *) (* *) (* [39] element ::= EmptyElemTag *) (* | STag content ETag *) (* ... *) (* Well-Formedness Constraint: Element Type Match *) (* The Name in an element's end-tag must match the element type in *) (* the start-tag. *) (* *) (* Validity Constraint: Element Valid *) (* An element is valid if there is a declaration matching *) (* elementdecl where the Name matches the element type, and one of *) (* the following holds: *) (* *) (* 1. The declaration matches EMPTY and the element has no content. *) (* 2. The declaration matches children and the sequence of child *) (* elements belongs to the language generated by the regular *) (* expression in the content model, with optional white space *) (* (characters matching the nonterminal S) between each pair of *) (* child elements. *) (* *) (* and 3.1: *) (* *) (* [43] content ::= (element | CharData | Reference | CDSect | PI *) (* | Comment)* *) (* 2.4: *) (* The ampersand character (&) and the left angle bracket (<) may *) (* appear in their literal form only when used as markup delimiters,*) (* or within a comment, a processing instruction, or a CDATA *) (* section... If they are needed elsewhere, they must be escaped *) (* using either numeric character references or the strings "&" *) (* and "<" respectively... *) (* *) (* consume the content of the element, accumulating it via the user *) (* data functions (parameter a in subfunctions). trace the content *) (* model of the element with a dfa transitions on a dfa state (para- *) (* meter p in subfunctions). finish at the first end-tag, whether *) (* matching or not, or at the document end. *) (* *) (* handle all syntax and other recoverable errors from subfunctions *) (* and try to continue. *) (* *) (* return the accumulated user data and the next char and state. *) (*--------------------------------------------------------------------*) (* might raise: none *) (*--------------------------------------------------------------------*) fun parseElementContent dtd (openElems,startEnt,curr,dfa,ext,mt) caq = let (*--------------------------------------------------------------*) (* check whether the dfa allows a transition/an end tag here. *) (* print an error if not. After a transition return the new *) (* dfa state. *) (*--------------------------------------------------------------*) fun fin_elem (a,pos,dfa,p) = if dfaFinal(dfa,p) then a else hookError(a,(pos,ERR_ENDED_EARLY(Index2Element dtd curr))) fun trans_elem (a,q,dfa,p,el) = let val p1 = dfaTrans(dfa,p,el) in if p1<>dfaError then (p1,a) else let val err = ERR_BAD_ELEM(Index2Element dtd curr,Index2Element dtd el) in (p1,hookError(a,(getPos q,err))) end end (*--------------------------------------------------------------*) (* consume all white space and skip all data until the next "<" *) (* or "&". print an error for each sequence of data encountered.*) (* *) (* add the white space as data to the user data. *) (* return the next char and state. *) (*--------------------------------------------------------------*) fun do_char_elem (c0,a0,q0) = let (*--------------------------------------------------------------*) (* read data characters until the next "<", "&" or entity end. *) (* add the data to the user data when an error occurs or no *) (* more data follows. *) (* *) (* return the modified user data with the next char and state. *) (*--------------------------------------------------------------*) fun data_hook(a,q,cs) = if null cs then a else hookData(a,((getPos q0,getPos q),Data2Vector(rev cs),true)) fun after_error (caq as (c,a,q)) = case c of 0wx00 => caq | 0wx26 (* #"&" *) => caq | 0wx3C (* #"<" *) => caq | _ => after_error(getChar(a,q)) fun do_data (yet,aq as (_,q)) = let val (c1,a1,q1) = getChar aq in case c1 of 0wx00 => (c1,data_hook(a1,q,yet),q1) | 0wx26 (* #"&" *) => (c1,data_hook(a1,q,yet),q1) | 0wx3C (* #"<" *) => (c1,data_hook(a1,q,yet),q1) | _ => if isS c1 then do_data (c1::yet,(a1,q1)) else let val a2 = data_hook(a1,q,yet) val err = ERR_ELEM_CONTENT(IT_DATA nil) val a3 = hookError(a2,(getPos q1,err)) in after_error (getChar(a3,q1)) end end in if isS c0 then let val a1 = if not (ext andalso standsAlone dtd) then a0 else let val err = ERR_STANDALONE_ELEM(Index2Element dtd curr) val _ = setStandAlone dtd (not (!O_ERROR_MINIMIZE)) in hookError(a0,(getPos q0,err)) end in do_data ([c0],(a1,q0)) end else let val a1 = hookError(a0,(getPos q0,ERR_ELEM_CONTENT(IT_DATA nil))) in after_error(getChar(a1,q0)) end end (*--------------------------------------------------------------*) (* consume a reference, handling errors by ignoring them. *) (*--------------------------------------------------------------*) fun do_ref (q,(c1,a1,q1)) = if c1=0wx23 (* #"#" *) (*------------------------------------------------------*) (* it's a character reference. *) (*------------------------------------------------------*) then let val err = ERR_ELEM_CONTENT IT_CHAR_REF val a2 = hookError(a1,(getPos q,err)) in skipCharRef(a2,q1) end (*---------------------------------------------------------*) (* it's a general entity reference. *) (*---------------------------------------------------------*) else let val ((id,ent),(a2,q2)) = parseGenRef dtd (c1,a1,q1) in case ent of GE_NULL => let val a3 = hookGenRef(a2,((getPos q,getPos q2),id,ent,false)) in (getChar(a3,q2)) end | GE_INTERN(_,rep) => let val q3 = pushIntern(q2,id,false,rep) val a3 = hookGenRef(a2,((getPos q,getPos q2),id,ent,true)) in (getChar(a3,q3)) end | GE_EXTERN ext => if !O_VALIDATE orelse !O_INCLUDE_EXT_PARSED then let val a3 = hookGenRef(a2,((getPos q,getPos q2),id,ent,true)) val caq4 = #3(openExtern (id,false,resolveExtId ext) (a3,q2)) handle CantOpenFile(fmsg,a) => let val err = ERR_NO_SUCH_FILE fmsg val a2 = hookError(a,(getPos q2,err)) val a3 = hookEntEnd(a2,getPos q2) in (getChar(a3,q2)) end in caq4 end else let val a3 = hookGenRef(a2,((getPos q,getPos q2),id,ent,false)) in getChar(a3,q2) end | GE_UNPARSED _ => raise InternalError (THIS_MODULE,"parseElementContent", "parseGenRef returned GE_UNPARSED") end (*-------------------------------------------------------*) (* handle any errors in references by ignoring them. *) (*-------------------------------------------------------*) handle SyntaxError caq => caq | NoSuchEntity aq => getChar aq (*--------------------------------------------------------------*) (* handle an end-tag. finish the element in the user data and *) (* return. *) (* *) (* print an error if the element's content is not yet finished. *) (* print an error if the end-tag is for another element. *) (* print an error if the element's first character was not in *) (* the same entity. *) (*--------------------------------------------------------------*) and do_etag (p,etag as (elem,space,startPos,endPos),(c,a,q)) = let fun checkNesting a = if getEntId q=startEnt then a else hookError(a,(startPos,ERR_ELEM_ENT_NESTING(Index2Element dtd curr))) in if elem=curr then let val a1 = fin_elem (a,startPos,dfa,p) val a2 = checkNesting a1 val a3 = hookEndTag (a2,((startPos,endPos),curr,SOME(elem,space))) in (NONE,(c,a3,q)) end else if member elem openElems then let val err = ERR_OMITTED_END_TAG(Index2Element dtd curr) val a1 = hookError(a,(startPos,err)) val a2 = fin_elem (a1,startPos,dfa,p) val a3 = hookEndTag(a2,((startPos,endPos),curr,NONE)) in (SOME etag,(c,a3,q)) end else if dfaFinal(dfa,p) then let val err = ERR_ELEM_TYPE_MATCH(Index2Element dtd curr, Index2Element dtd elem) val a1 = hookError(a,(startPos,err)) val a2 = checkNesting a1 val a3 = hookEndTag(a2,((startPos,endPos),curr,SOME(elem,space))) in (NONE,(c,a3,q)) end else let val err = ERR_IGNORED_END_TAG(Index2Element dtd curr, Index2Element dtd elem) val a1 = hookError(a,(startPos,err)) in do_elem(p,(c,a1,q)) end end (*--------------------------------------------------------------*) (* handle a declaration, proc. instr or tag. *) (*--------------------------------------------------------------*) and do_lt (p,q,(c1,a1,q1)) = case c1 of 0wx21 (* #"!" *) => (*------------------------------------------------------*) (* its a declaration, cdata section or comment. *) (* Only comments are valid. *) (*------------------------------------------------------*) let val (c2,a2,q2) = getChar(a1,q1) val caq3 = case c2 of 0wx2D (* #"-" *) => let val (c3,a3,q3) = getChar(a2,q2) in if c3=0wx2D then parseComment (getPos q) (a3,q3) else let val err = ERR_EXPECTED(expDash,[c3]) val a4 = hookError(a3,(getPos q3,err)) in recoverDecl false (c3,a4,q3) end end | 0wx5B (* #"[" *) => let val a3 = hookError(a2,(getPos q2,ERR_ELEM_CONTENT IT_CDATA)) in skipBadSection (getChar(a3,q2)) end | _ => (c2,hookError(a2,(getPos q2,ERR_EXPECTED(expDash,[c2]))),q2) in do_elem(p,caq3) end | 0wx2F (* #"/" *) => (let val (elem,space,endPos,caq2) = parseETag dtd (a1,q1) in do_etag (p,(elem,space,getPos q,endPos),caq2) end handle SyntaxError caq => do_elem(p,caq)) | 0wx3F (* #"?" *) => do_elem (p,parseProcInstr (getPos q) (a1,q1)) | _ => (*------------------------------------------------------*) (* it's a start tag. the recursive call to parseElement *) (* might return an end-tag that has to be consumed. *) (*------------------------------------------------------*) if isNms c1 then let val (p1,(opt,caq2)) = (let val (stag as ((_,elem,_,_,_),_),(c2,a2,q2)) = parseSTag dtd (getPos q) (c1,a1,q1) val (p1,a3) = trans_elem (a2,q1,dfa,p,elem) in (p1,parseElement (dtd,curr::openElems,q,stag,(c2,a3,q2))) end) handle SyntaxError caq => (p,(NONE,caq)) in case opt of NONE => do_elem (p1,caq2) | SOME etag => do_etag (p1,etag,caq2) end else let val err = ERR_FORBIDDEN_HERE(IT_CHAR 0wx3C,LOC_CONTENT) val a2 = hookError(a1,(getPos q,err)) in do_elem (p,(c1,a2,q1)) end (*--------------------------------------------------------------*) (* do element content. handle the document end by printing an *) (* error and finishing like with an end-tag. *) (*--------------------------------------------------------------*) and do_elem (p,(c,a,q)) = case c of 0wx00 => if isSpecial q then let val err = ERR_OMITTED_END_TAG(Index2Element dtd curr) val a1 = hookError(a,(getPos q,err)) val pos = getPos q val a2 = fin_elem (a1,pos,dfa,p) val a3 = hookEndTag(a2,((pos,pos),curr,NONE)) in (NONE,(c,a3,q)) end else let val a1 = hookEntEnd(a,getPos q) in do_elem (p,getChar(a1,q)) end | 0wx26 (* #"&" *) => do_elem (p,do_ref (q,getChar(a,q))) | 0wx3C (* #"<" *) => do_lt (p,q,getChar(a,q)) | _ => do_elem (p,do_char_elem (c,a,q)) (*--------------------------------------------------------------*) (* do empty content. if the first thing to come is the current *) (* element's end-tag, finish it. Otherwise print an error and *) (* continue as for element content. *) (*--------------------------------------------------------------*) and do_empty (c,a,q) = if c<>0wx3C (* #"<" *) then let val a1 = hookError(a,(getPos q,ERR_NONEMPTY(Index2Element dtd curr))) in do_elem (dfaInitial,(c,a1,q)) end else let val (c1,a1,q1) = getChar(a,q) in if c1<>0wx2F (* #"/" *) then let val err = ERR_NONEMPTY(Index2Element dtd curr) val a2 = hookError(a1,(getPos q,err)) in do_lt (dfaInitial,q,(c1,a2,q1)) end else let val (elem,space,endPos,caq2) = parseETag dtd (a1,q1) in do_etag (dfaInitial,(elem,space,getPos q,endPos),caq2) end handle SyntaxError caq => do_elem (dfaInitial,caq) end in if mt then do_empty caq else do_elem (dfaInitial,caq) end (*--------------------------------------------------------------------*) (* parse mixed or any content. The second arg holds the unique number *) (* of the element's first characters's entity, the idx of the current *) (* element, and a function for validating child elements. Cf. 3: *) (* *) (* [39] element ::= EmptyElemTag *) (* | STag content ETag *) (* ... *) (* Well-Formedness Constraint: Element Type Match *) (* The Name in an element's end-tag must match the element type in *) (* the start-tag. *) (* *) (* Validity Constraint: Element Valid *) (* An element is valid if there is a declaration matching *) (* elementdecl where the Name matches the element type, and one of *) (* the following holds: *) (* ... *) (* 3. The declaration matches Mixed and the content consists of *) (* character data and child elements whose types match names in *) (* the content model. *) (* 4. The declaration matches ANY, and the types of any child *) (* elements have been declared. *) (* *) (* 3.1: *) (* *) (* [43] content ::= (element | CharData | Reference | CDSect | PI *) (* | Comment)* *) (* 2.4: *) (* The ampersand character (&) and the left angle bracket (<) may *) (* appear in their literal form only when used as markup delimiters,*) (* or within a comment, a processing instruction, or a CDATA *) (* section... If they are needed elsewhere, they must be escaped *) (* using either numeric character references or the strings "&" *) (* and "<" respectively. The right angle bracket (>) may be *) (* represented using the string ">", and must, for compatibility,*) (* be escaped using ">" or a character reference when it appears *) (* in the string "]]>" in content, when that string is not marking *) (* the end of a CDATA section. *) (* *) (* consume the content of the element, accumulating it via the user *) (* data functions (parameter a in subfunctions). for each child, *) (* check whether it was specified in the element's Mixed content *) (* specification (validate). finish at the first end-tag, whether *) (* matching or not, or at the document end. *) (* *) (* handle all syntax and other recoverable errors from subfunctions *) (* and try to continue. *) (* *) (* return the accumulated user data and the next char and state. *) (*--------------------------------------------------------------------*) (* might raise: none *) (*--------------------------------------------------------------------*) and parseMixedContent dtd (openElems,startEnt,curr,validate) caq = let (*--------------------------------------------------------------*) (* read data characters until the next "<", "&" or entity end. *) (* add the data to the user data when an error occurs or no *) (* more data follows. *) (* *) (* return the modified user data with the next char and state. *) (*--------------------------------------------------------------*) fun do_data (br,(c0,a0,q0)) = let val pos0 = ref (getPos q0) val _ = Array.update(dataBuffer,0,c0) fun data_hook (i,(a,q)) = hookData(a,((!pos0,getPos q),Array.extract(dataBuffer,0,SOME i),false)) fun takeOne (c,qE,i,aq as (a,q)) = if i (c1,data_hook(i,(a1,q)),q1) | 0wx26 (* #"&" *) => (c1,data_hook(i,(a1,q)),q1) | 0wx3C (* #"<" *) => (c1,data_hook(i,(a1,q)),q1) | 0wx5D (* #"]" *) => do_br (n+1,takeOne(c1,q,i,(a1,q1))) | 0wx3E (* #">" *) => let val a2 = if n=1 then a1 else hookError(a1,(getPos q1,ERR_MUST_ESCAPE c1)) in doit (takeOne(c1,q,i,(a2,q1))) end | _ => doit (takeOne(c1,q,i,(a1,q1))) end and doit (i,aq as (_,q)) = let val (c1,a1,q1) = getChar aq in case c1 of 0wx00 => (c1,data_hook(i,(a1,q)),q1) | 0wx26 (* #"&" *) => (c1,data_hook(i,(a1,q)),q1) | 0wx3C (* #"<" *) => (c1,data_hook(i,(a1,q)),q1) | 0wx5D (* #"]" *) => if !O_COMPATIBILITY then do_br (1,takeOne(c1,q,i,(a1,q1))) else doit (takeOne(c1,q,i,(a1,q1))) | _ => doit (takeOne(c1,q,i,(a1,q1))) end in if br then do_br (1,(1,(a0,q0))) else doit (1,(a0,q0)) end (* fun do_data (br,(c0,a0,q0)) = let fun data_hook (yet,(a,q)) = hookData(a,((getPos q0,getPos q),Data2Vector(rev yet),false)) fun do_br (n,yet,aq as (_,q)) = let val (c1,a1,q1) = getChar aq in case c1 of 0wx00 => (c1,data_hook(yet,(a1,q)),q1) | 0wx26 (* #"&" *) => (c1,data_hook(yet,(a1,q)),q1) | 0wx3C (* #"<" *) => (c1,data_hook(yet,(a1,q)),q1) | 0wx5D (* #"]" *) => do_br (n+1,c1::yet,(a1,q1)) | 0wx3E (* #">" *) => let val a2 = if n=1 then a1 else hookError(a1,(getPos q1,ERR_MUST_ESCAPE c1)) in doit (c1::yet,(a2,q1)) end | _ => doit (c1::yet,(a1,q1)) end and doit (yet,aq as (_,q)) = let val (c1,a1,q1) = getChar aq in case c1 of 0wx00 => (c1,data_hook(yet,(a1,q)),q1) | 0wx26 (* #"&" *) => (c1,data_hook(yet,(a1,q)),q1) | 0wx3C (* #"<" *) => (c1,data_hook(yet,(a1,q)),q1) | 0wx5D (* #"]" *) => if !O_COMPATIBILITY then do_br (1,c1::yet,(a1,q1)) else doit (c1::yet,(a1,q1)) | _ => doit (c1::yet,(a1,q1)) end in if br then do_br (1,[0wx5D],(a0,q0)) else doit ([c0],(a0,q0)) end *) (*--------------------------------------------------------------*) (* consume a reference, handling errors by ignoring them. *) (*--------------------------------------------------------------*) fun do_ref (q0,(c,a,q)) = if c=0wx23 (* #"#" *) (*------------------------------------------------------*) (* it's a character reference. *) (*------------------------------------------------------*) then let val (cs,(ch,a1,q1)) = parseCharRefLit [0wx23,0wx26] (a,q) val cv = Data2Vector(rev cs) val a2 = hookCharRef(a1,((getPos q0,getPos q1),ch,cv)) in getChar(a2,q1) end handle SyntaxError caq => caq | NoSuchChar aq => getChar aq (*---------------------------------------------------------*) (* it's a general entity reference. *) (*---------------------------------------------------------*) else let val ((id,ent),(a1,q1)) = parseGenRef dtd (c,a,q) in case ent of GE_NULL => let val a2 = hookGenRef(a1,((getPos q0,getPos q1),id,ent,false)) in getChar(a2,q1) end | GE_INTERN(_,rep) => let val q2 = pushIntern(q1,id,false,rep) val a2 = hookGenRef(a1,((getPos q0,getPos q1),id,ent,true)) in getChar(a2,q2) end | GE_EXTERN ext => if !O_VALIDATE orelse !O_INCLUDE_EXT_PARSED then let val a2 = hookGenRef(a1,((getPos q0,getPos q1),id,ent,true)) val caq3 = #3(openExtern (id,false,resolveExtId ext) (a2,q1)) handle CantOpenFile(fmsg,a) => let val err = ERR_NO_SUCH_FILE fmsg val a1 = hookError(a,(getPos q1,err)) val a2 = hookEntEnd(a1,getPos q1) in (getChar(a2,q1)) end in caq3 end else let val a2 = hookGenRef(a1,((getPos q0,getPos q1),id,ent,false)) in getChar(a2,q1) end | GE_UNPARSED _ => raise InternalError ("THIS_MODULE","parseMixedContent", "parseGenRef returned GE_UNPARSED") end (*-------------------------------------------------------*) (* handle any errors in references by ignoring them. *) (*-------------------------------------------------------*) handle SyntaxError caq => caq | NoSuchEntity aq => getChar aq (*--------------------------------------------------------------*) (* handle an end-tag. finish the element in the user data and *) (* return. *) (* *) (* print an error if the element's content is not yet finished. *) (* print an error if the end-tag is for another element. *) (* print an error if the element's first character was not in *) (* the same entity. *) (*--------------------------------------------------------------*) and do_etag (etag as (elem,space,startPos,endPos),(c,a,q)) = let fun checkNesting a = if getEntId q=startEnt then a else hookError(a,(startPos,ERR_ELEM_ENT_NESTING(Index2Element dtd curr))) in if elem=curr then let val a1 = checkNesting a val a2 = hookEndTag (a1,((startPos,endPos),curr,SOME(elem,space))) in (NONE,(c,a2,q)) end else if member elem openElems then let val err = ERR_OMITTED_END_TAG(Index2Element dtd curr) val a1 = hookError(a,(startPos,err)) val a2 = hookEndTag(a1,((startPos,endPos),curr,NONE)) in (SOME etag,(c,a2,q)) end else let val err = ERR_ELEM_TYPE_MATCH(Index2Element dtd curr, Index2Element dtd elem) val a1 = hookError(a,(startPos,err)) val a2 = checkNesting a1 val a3 = hookEndTag(a2,((startPos,endPos),curr,SOME(elem,space))) in (NONE,(c,a3,q)) end end (*--------------------------------------------------------------*) (* handle a declaration, proc. instr or tag. If it is an end- *) (* tag, finish the element in the user data and return. *) (* *) (* print an error if the element's content is not yet finished. *) (* print an error if the end-tag is for another element. *) (* print an error if the element's first character was not in *) (* the same entity. *) (*--------------------------------------------------------------*) and do_lt (q,(c1,a1,q1)) = case c1 of 0wx21 (* #"!" *) => (*------------------------------------------------------*) (* its a declaration, cdata section or comment. *) (* Only comments and cdata sections are valid. *) (*------------------------------------------------------*) let val (c2,a2,q2) = getChar(a1,q1) val caq3 = case c2 of 0wx2D (* #"-" *) => let val (c3,a3,q3) = getChar(a2,q2) in if c3=0wx2D then parseComment (getPos q) (a3,q3) else let val err = ERR_EXPECTED(expDash,[c3]) val a4 = hookError(a3,(getPos q3,err)) in recoverDecl false (c3,a4,q3) end end | 0wx5B (* #"[" *) => parseCDataSection (getPos q) (a2,q2) | _ => (c2,hookError(a2,(getPos q2,ERR_EXPECTED(expDashLbrack,[c2]))),q2) in do_mixed caq3 end | 0wx2F (* #"/" *) => (let val (elem,space,endPos,caq2) = parseETag dtd (a1,q1) in do_etag ((elem,space,getPos q,endPos),caq2) end handle SyntaxError caq => do_mixed caq) | 0wx3F (* #"?" *) => do_mixed (parseProcInstr (getPos q) (a1,q1)) | _ => (*------------------------------------------------------*) (* it's a start tag. the recursive call to parseElement *) (* might return an end-tag that has to be consumed. *) (*------------------------------------------------------*) if isNms c1 then let val (opt,caq2) = (let val (stag as ((_,elem,_,_,_),_),(c2,a2,q2)) = parseSTag dtd (getPos q) (c1,a1,q1) val a3 = validate (a2,q1) elem in parseElement (dtd,curr::openElems,q,stag,(c2,a3,q2)) end handle SyntaxError caq => (NONE,caq)) in case opt of NONE => do_mixed caq2 | SOME etag => do_etag (etag,caq2) end else let val err = ERR_FORBIDDEN_HERE(IT_CHAR 0wx3C,LOC_CONTENT) val a2 = hookError(a1,(getPos q,err)) in do_mixed (c1,a2,q1) end (*--------------------------------------------------------------*) (* do mixed content. handle the document end by printing an *) (* error and finishing like with an end-tag. *) (*--------------------------------------------------------------*) and do_mixed (c,a,q) = case c of 0wx00 => if isSpecial q then let val err = ERR_OMITTED_END_TAG(Index2Element dtd curr) val a1 = hookError(a,(getPos q,err)) val pos = getPos q val a2 = hookEndTag(a1,((pos,pos),curr,NONE)) in (NONE,(c,a2,q)) end else let val a1 = hookEntEnd(a,getPos q) in do_mixed (getChar(a1,q)) end | 0wx26 (* #"&" *) => do_mixed (do_ref (q,getChar(a,q))) | 0wx3C (* #"<" *) => do_lt (q,getChar(a,q)) | 0wx5D => do_mixed (do_data (!O_COMPATIBILITY,(c,a,q))) | _ => do_mixed (do_data (false,(c,a,q))) in do_mixed caq end (*--------------------------------------------------------------------*) (* parse an element, the start tag already read. the second arg holds *) (* the number of the entity of the start-tag's first char, and the *) (* start-tag information. The 1st arg is the start value for the user *) (* data. 3: *) (* *) (* [39] element ::= EmptyElemTag *) (* | STag content ETag *) (* and 3.1: *) (* *) (* Empty-element tags may be used for any element which has no *) (* content, whether or not it is declared using the keyword EMPTY. *) (* For interoperability, the empty-element tag must be used, and *) (* can only be used, for elements which are declared EMPTY. *) (*--------------------------------------------------------------------*) and parseElement (dtd,openElems,q0,(stag as (_,curr,_,_,mt),elemInfo),(c,a,q)) = let (*--------------------------------------------------------------*) (* validate whether an element is allowed in mixed/any content. *) (*--------------------------------------------------------------*) fun trans_any (a,_) _ = a fun trans_mixed is (a,q) i = if member i is then a else let val err = ERR_BAD_ELEM(Index2Element dtd curr,Index2Element dtd i) in hookError(a,(getPos q,err)) end in (*-----------------------------------------------------------*) (* For empty-element tags, verify that the element's declar. *) (* allows empty content. *) (*-----------------------------------------------------------*) if mt then let val a1 = if not (!O_VALIDATE andalso hasDtd dtd) then a else case #decl elemInfo of (SOME(CT_EMPTY,_)) => a | (SOME(CT_ELEMENT(_,dfa),_)) => if not (dfaFinal(dfa,dfaInitial)) then hookError(a,(getPos q0,ERR_EMPTY_TAG(Index2Element dtd curr))) else if not (!O_INTEROPERABILITY) then a else hookError (a,(getPos q0,ERR_EMPTY_TAG_INTER (Index2Element dtd curr))) | _ => if not (!O_INTEROPERABILITY) then a else hookError(a,(getPos q0,ERR_EMPTY_TAG_INTER (Index2Element dtd curr))) in (NONE,(c,hookStartTag(a1,stag),q)) end (*-----------------------------------------------------------*) (* for normal start-tags, check whether the element's decl. *) (* requires an empty-element tag, or empty content, then *) (* call the appropriate function that parses the content. *) (*-----------------------------------------------------------*) else let val startEnt = getEntId q0 in if !O_VALIDATE then case getOpt(#decl elemInfo,(CT_ANY,false)) of (CT_ANY,_) => parseMixedContent dtd (openElems,startEnt,curr,trans_any) (c,hookStartTag(a,stag),q) | (CT_MIXED is,_) => parseMixedContent dtd (openElems,startEnt,curr,trans_mixed is) (c,hookStartTag(a,stag),q) | (CT_ELEMENT(_,dfa),ext) => parseElementContent dtd (openElems,startEnt,curr,dfa,ext,false) (c,hookStartTag(a,stag),q) | (CT_EMPTY,_) => let val a1 = if not (!O_INTEROPERABILITY) then a else let val err = ERR_MUST_BE_EMPTY(Index2Element dtd curr) in hookError(a,(getPos q0,err)) end val a2 = hookStartTag(a1,stag) in parseElementContent dtd (openElems,startEnt,curr,emptyDfa,false,true) (c,a2,q) end else parseMixedContent dtd (openElems,startEnt,curr,trans_any) (c,hookStartTag(a,stag),q) end end end (* stop of ../../Parser/Parse/parseContent.sml *) (* start of ../../Parser/Parse/parseDocument.sml *) (*--------------------------------------------------------------------------*) (* Structure: ParseDocument *) (* *) (* Exceptions raised by functions in this structure: *) (* parseDocTypeDecl : none *) (*--------------------------------------------------------------------------*) functor Parse (structure Dtd : Dtd structure Hooks : Hooks structure Resolve : Resolve structure ParserOptions : ParserOptions) : sig val parseDocument : Uri.Uri option -> Dtd.Dtd option -> Hooks.AppData -> Hooks.AppFinal end = struct structure ParseBase = ParseBase (structure Dtd = Dtd structure Hooks = Hooks structure Resolve = Resolve structure ParserOptions = ParserOptions) structure ParseContent = ParseContent (structure ParseBase = ParseBase) open Base UniChar Errors UniClasses Uri ParseContent val THIS_MODULE = "ParseContent" datatype Where = PROLOG | EPILOG | INSTANCE of int option fun locOf wher = case wher of PROLOG => LOC_PROLOG | INSTANCE _ => LOC_PROLOG | EPILOG => LOC_EPILOG fun checkRoot dtd (a,q) (doc,stag as ((_,elem,_,_,_),_)) = if !O_VALIDATE then case doc of NONE => a | SOME doc => if doc=elem then a else let val err = ERR_ROOT_ELEM(Index2Element dtd doc, Index2Element dtd elem) in hookError(a,(getPos q,err)) end else a fun parseDoc dtd caq = let fun do_data wher caq = let fun doit hadError ws (c,a,q) = case c of 0wx00 => (ws,(c,a,q)) | 0wx26 (* #"&" *) => (ws,(c,a,q)) | 0wx3C (* #"<" *) => (ws,(c,a,q)) | 0wx09 (* #"\t"*) => doit hadError (c::ws) (getChar(a,q)) | 0wx0A (* #"\n"*) => doit hadError (c::ws) (getChar(a,q)) | 0wx20 (* #" " *) => doit hadError (c::ws) (getChar(a,q)) | _ => let val a1 = if hadError then a else hookError(a,(getPos q,ERR_FORBIDDEN_HERE (IT_DATA nil,locOf wher))) in doit true ws (getChar(a1,q)) end val (ws,(c1,a1,q1)) = doit false nil caq val a2 = if null ws then a1 else hookWhite(a1,Data2Vector (rev ws)) in (c1,a2,q1) end fun do_decl wher q0 (c,a,q) = case c of 0wx2D (* #"-" *) => let val (c1,a1,q1) = getChar(a,q) in if c1=0wx2D then (wher,parseComment (getPos q0) (a1,q1)) else let val err = ERR_EXPECTED(expDash,[c1]) val a2 = hookError(a1,(getPos q1,err)) val caq2 = recoverDecl false (c1,a2,q1) in (wher,caq2) end end | 0wx5B (* #"[" *) => let val err = ERR_FORBIDDEN_HERE (IT_CDATA,locOf wher) val a1 = hookError(a,(getPos q0,err)) val caq2 = skipBadSection (getChar(a1,q)) in (wher,caq2) end | _ => case wher of PROLOG => (let val (name,(c1,a1,q1)) = parseName (c,a,q) handle NotFound (c,a,q) => let val err = expectedOrEnded(expDashDocLbrk,LOC_DECL) c in raise SyntaxError (c,hookError(a,(getPos q,err)),q) end val _ = if name=[0wx44,0wx4f,0wx43,0wx54,0wx59,0wx50,0wx45] (* "DOCTYPE" *) then () else let val err = ERR_EXPECTED(expDashDocLbrk,name) val a2 = hookError(a1,(getPos q,err)) in raise SyntaxError (c1,a2,q1) end val (doc,caq2) = parseDocTypeDecl dtd (c1,a1,q1) in (INSTANCE doc,caq2) end handle SyntaxError caq => (PROLOG,recoverDecl true caq)) | _ => let val loc = if wher=EPILOG then LOC_EPILOG else LOC_AFTER_DTD val err = ERR_FORBIDDEN_HERE (IT_DECL,loc) val a1 = hookError(a,(getPos q0,err)) val caq2 = skipDecl true (c,a1,q) in (wher,caq2) end and doit wher (c,a,q) = case c of 0wx00 => if isSpecial q then (wher,(a,q)) else doit wher (getChar(a,q)) (*--------------------------------------------------------------*) (* References are forbidden outside the document element *) (*--------------------------------------------------------------*) | 0wx26 (* #"&" *) => let val (c1,a1,q1) = getChar(a,q) val caq2 = if c1=0wx23 (* #"#" *) then let val err = ERR_FORBIDDEN_HERE(IT_CHAR_REF,locOf wher) val a2 = hookError(a1,(getPos q,err)) in skipCharRef (a2,q1) end else let val err = ERR_FORBIDDEN_HERE(IT_REF,locOf wher) val a2 = hookError(a1,(getPos q,err)) in skipReference (c1,a2,q1) end in doit wher caq2 end | 0wx3C (* #"<" *) => let val (c1,a1,q1) = getChar (a,q) in case c1 of 0wx21 (* #"!" *) => let val (wher1,caq2) = do_decl wher q (getChar(a1,q1)) in doit wher1 caq2 end | 0wx2F (* #"/" *) => let val err = ERR_FORBIDDEN_HERE(IT_ETAG,locOf wher) val a2 = hookError(a1,(getPos q,err)) val caq3 = skipTag LOC_ETAG (a2,q1) in doit wher caq3 end | 0wx3F (* #"?" *) => doit wher (parseProcInstr (getPos q) (a1,q1)) | _ => if isName c1 then let val wher1 = case wher of PROLOG => INSTANCE NONE | _ => wher in case wher1 of PROLOG => raise InternalError(THIS_MODULE,"parseDoc.doit","") | EPILOG => let val err = ERR_FORBIDDEN_HERE(IT_STAG,LOC_EPILOG) val a2 = hookError(a1,(getPos q,err)) val caq3 = skipTag LOC_STAG (a2,q1) in doit EPILOG caq3 end | INSTANCE doc => (let val a2 = if not (!O_VALIDATE) orelse isSome doc then a1 else hookError(a1,(getPos q,ERR_NO_DTD)) val (stag,(c3,a3,q3)) = parseSTag dtd (getPos q) (c1,a2,q1) val a4 = checkRoot dtd (a3,q1) (doc,stag) val (opt,(c5,a5,q5)) = parseElement (dtd,nil,q,stag,(c3,a4,q3)) val a6 = checkDefinedIds dtd (a5,q5) in case opt of NONE => doit EPILOG (c5,a6,q5) | SOME (_,_,startPos,_) => let val err = ERR_FORBIDDEN_HERE(IT_ETAG,LOC_EPILOG) val a7 = hookError(a6,(startPos,err)) in doit EPILOG (c5,a7,q5) end end handle SyntaxError caq => doit wher1 caq) end else let val err = ERR_FORBIDDEN_HERE(IT_CHAR 0wx3C,locOf wher) val a2 = hookError(a1,(getPos q,err)) in doit wher (c1,a2,q1) end end | _ => let val caq1 = do_data wher (c,a,q) in doit wher caq1 end in doit PROLOG caq end (* to false. (cf. 2.9) *) (* *) (* ... If ... there is no standalone document declaration, the *) (* value "no" is assumed. *) fun parseDocument uriOpt dtdOpt a = let val dtd = case dtdOpt of NONE => initDtdTables () | SOME dtd => dtd val (enc,xmlDecl,(c1,a1,q1)) = openDocument uriOpt a val uri = getUri q1 val alone = case xmlDecl of (SOME(_,_,SOME sa)) => sa | _ => false val _ = if alone then setStandAlone dtd true else () val a2 = hookXml(a1,(uri,enc,xmlDecl)) val (wher,(a3,q3)) = parseDoc dtd (c1,a2,q1) val _ = closeAll q3 val a4 = case wher of EPILOG => a3 | _ => hookError(a3,(getPos q3,ERR_ENDED_IN_PROLOG)) in hookFinish a4 end handle CantOpenFile(fmsg,a) => let val a1 = hookError(a,(nullPosition,ERR_NO_SUCH_FILE fmsg)) in hookFinish a1 end end (* stop of ../../Parser/Parse/parseDocument.sml *) (* start of ../../Catalog/catError.sml *) signature CatError = sig type Position val nullPosition : Position val Position2String : Position -> string datatype Location = LOC_CATALOG | LOC_COMMENT | LOC_NOCOMMENT | LOC_PUBID | LOC_SYSID datatype Expected = EXP_NAME | EXP_LITERAL datatype CatError = ERR_DECODE_ERROR of Decode.Error.DecodeError | ERR_NO_SUCH_FILE of string * string | ERR_ILLEGAL_HERE of UniChar.Char * Location | ERR_MISSING_WHITE | ERR_EOF of Location | ERR_EXPECTED of Expected * UniChar.Char | ERR_XML of Errors.Error | ERR_MISSING_ATT of UniChar.Data * UniChar.Data | ERR_NON_PUBID of UniChar.Data * UniChar.Data val catMessage : CatError -> string list end structure CatError : CatError = struct open Errors UtilError UtilString type Position = string * int * int val nullPosition = ("",0,0) fun Position2String (fname,l,c) = if fname="" then "" else String.concat ["[",fname,":",Int2String l,".",Int2String c,"]"] datatype Location = LOC_CATALOG | LOC_COMMENT | LOC_NOCOMMENT | LOC_PUBID | LOC_SYSID fun Location2String loc = case loc of LOC_CATALOG => "catalog file" | LOC_COMMENT => "comment" | LOC_NOCOMMENT => "something other than a comment" | LOC_PUBID => "public identifier" | LOC_SYSID => "system identifier" fun InLocation2String loc = case loc of LOC_CATALOG => "in a catalog file" | LOC_COMMENT => "in a comment" | LOC_NOCOMMENT => "outside of comments" | LOC_PUBID => "in a public identifier" | LOC_SYSID => "in a system identifier" datatype Expected = EXP_NAME | EXP_LITERAL fun Expected2String exp = case exp of EXP_NAME => "a name" | EXP_LITERAL => "a literal" datatype CatError = ERR_DECODE_ERROR of Decode.Error.DecodeError | ERR_NO_SUCH_FILE of string * string | ERR_ILLEGAL_HERE of UniChar.Char * Location | ERR_MISSING_WHITE | ERR_EOF of Location | ERR_EXPECTED of Expected * UniChar.Char | ERR_XML of Error | ERR_MISSING_ATT of UniChar.Data * UniChar.Data | ERR_NON_PUBID of UniChar.Data * UniChar.Data fun catMessage err = case err of ERR_DECODE_ERROR err => Decode.Error.decodeMessage err | ERR_NO_SUCH_FILE(f,msg) => ["Could not open file",quoteErrorString f,"("^msg^")"] | ERR_ILLEGAL_HERE (c,loc) => ["Character",quoteErrorChar c,"is not allowed",InLocation2String loc] | ERR_MISSING_WHITE => ["Missing white space"] | ERR_EOF loc => [toUpperFirst (Location2String loc),"ended by end of file"] | ERR_EXPECTED (exp,c) => ["Expected",Expected2String exp,"but found",quoteErrorChar c] | ERR_XML err => errorMessage err | ERR_MISSING_ATT(elem,att) => ["Element",quoteErrorData elem,"has no",quoteErrorData att,"attribute"] | ERR_NON_PUBID(att,cs) => ["Value specified for attribute",quoteErrorData att,"contains non-PublicId", case cs of [c] => "character"^quoteErrorChar c | cs => List2xString ("characters ",", ","") quoteErrorChar cs] end (* stop of ../../Catalog/catError.sml *) (* start of ../../Catalog/catParams.sml *) signature CatParams = sig val O_CATALOG_FILES : Uri.Uri list ref val O_PREFER_SOCAT : bool ref val O_PREFER_SYSID : bool ref val O_PREFER_CATALOG : bool ref val O_SUPPORT_REMAP : bool ref val O_CATALOG_ENC : Encoding.Encoding ref val catError : CatError.Position * CatError.CatError -> unit end (* stop of ../../Catalog/catParams.sml *) (* start of ../../Unicode/Uri/uriDict.sml *) structure KeyUri : Key = struct type Key = Uri.Uri val null = Uri.emptyUri val compare = Uri.compareUri val toString = Uri.Uri2String val hash = Uri.hashUri end structure UriDict = Dict (structure Key = KeyUri) (* stop of ../../Unicode/Uri/uriDict.sml *) (* start of ../../Catalog/catData.sml *) structure CatData = struct datatype CatEntry = E_BASE of Uri.Uri | E_DELEGATE of string * Uri.Uri | E_EXTEND of Uri.Uri | E_MAP of string * Uri.Uri | E_REMAP of Uri.Uri * Uri.Uri type Catalog = Uri.Uri * CatEntry list end (* stop of ../../Catalog/catData.sml *) (* start of ../../Catalog/catFile.sml *) signature CatFile = sig type CatFile type Position val catOpenFile : Uri.Uri -> CatFile val catCloseFile : CatFile -> unit val catGetChar : CatFile -> UniChar.Char * CatFile val catPos : CatFile -> CatError.Position end functor CatFile ( structure Params : CatParams ) : CatFile = struct open UniChar CatError Decode Params Uri UtilError (* column, line, break *) type PosInfo = int * int * bool val startPos = (0,1,false) datatype CatFile = NOFILE of string * PosInfo | DIRECT of DecFile * PosInfo fun catPos cf = case cf of NOFILE (uri,(col,line,_)) => (uri,line,col) | DIRECT (dec,(col,line,_)) => (decName dec,line,col) fun catOpenFile uri = let val dec = decOpenUni(SOME uri,!O_CATALOG_ENC) in DIRECT(dec,startPos) end handle NoSuchFile fmsg => let val _ = catError(nullPosition,ERR_NO_SUCH_FILE fmsg) in NOFILE(Uri2String uri,startPos) end fun catCloseFile cf = case cf of NOFILE _ => () | DIRECT(dec,_) => ignore (decClose dec) fun catGetChar cf = case cf of NOFILE _ => (0wx00,cf) | DIRECT(dec,(col,line,brk)) => (let val (c,dec1) = decGetChar dec in case c of 0wx09 => (c,DIRECT(dec1,(col+1,line,false))) | 0wx0A => if brk then catGetChar(DIRECT(dec1,(col,line,false))) else (c,DIRECT(dec1,(0,line+1,false))) | 0wx0D => (0wx0A,DIRECT(dec1,(0,line+1,true))) | _ => if c>=0wx20 then (c,DIRECT(dec1,(col+1,line,false))) else let val err = ERR_ILLEGAL_HERE(c,LOC_CATALOG) val _ = catError(catPos cf,err) in catGetChar(DIRECT(dec1,(col+1,line,false))) end end handle DecEof dec => (0wx00,NOFILE(decName dec,(col,line,brk))) | DecError(dec,_,err) => let val _ = catError(catPos cf,ERR_DECODE_ERROR err) in catGetChar(DIRECT(dec,(col,line,false))) end ) end (* stop of ../../Catalog/catFile.sml *) (* start of ../../Catalog/socatParse.sml *) signature SocatParse = sig val parseSoCat : Uri.Uri -> CatData.Catalog end functor SocatParse ( structure Params : CatParams ) : SocatParse = struct structure CatFile = CatFile ( structure Params = Params ) open CatData CatError CatFile Params UniChar UniClasses Uri exception SyntaxError of UniChar.Char * CatFile.CatFile exception NotFound of UniChar.Char * CatFile.CatFile val getChar = catGetChar fun parseName' (c,f) = if isName c then let val (cs,cf1) = parseName' (getChar f) in (c::cs,cf1) end else (nil,(c,f)) fun parseName (c,f) = if isNms c then let val (cs,cf1) = parseName' (getChar f) in (c::cs,cf1) end else raise NotFound (c,f) datatype Keyword = KW_BASE | KW_CATALOG | KW_DELEGATE | KW_PUBLIC | KW_SYSTEM | KW_OTHER of UniChar.Data fun parseKeyword cf = let val (name,cf1) = parseName cf val kw = case name of [0wx42,0wx41,0wx53,0wx45] => KW_BASE | [0wx43,0wx41,0wx54,0wx41,0wx4c,0wx4f,0wx47] => KW_CATALOG | [0wx44,0wx45,0wx4c,0wx45,0wx47,0wx41,0wx54,0wx45] => KW_DELEGATE | [0wx50,0wx55,0wx42,0wx4c,0wx49,0wx43] => KW_PUBLIC | [0wx53,0wx59,0wx53,0wx54,0wx45,0wx4d] => KW_SYSTEM | _ => KW_OTHER name in (kw,cf1) end fun parseSysLit' quote f = let fun doit text (c,f) = if c=quote then (text,getChar f) else if c<>0wx0 then doit (c::text) (getChar f) else let val _ = catError(catPos f,ERR_EOF LOC_SYSID) in (text,(c,f)) end val (text,cf1) = doit nil (getChar f) in (Data2Uri(rev text),cf1) end fun parseSysLit req (c,f) = if c=0wx22 orelse c=0wx27 then parseSysLit' c f else if req then let val _ = catError(catPos f,ERR_EXPECTED(EXP_LITERAL,c)) in raise SyntaxError (c,f) end else raise NotFound (c,f) fun parsePubLit' quote f = let fun doit (hadSpace,atStart,text) (c,f) = case c of 0wx0 => let val _ = catError(catPos f,ERR_EOF LOC_PUBID) in (text,(c,f)) end | 0wx0A => doit (true,atStart,text) (getChar f) | 0wx20 => doit (true,atStart,text) (getChar f) | _ => if c=quote then (text,getChar f) else if isPubid c then if hadSpace andalso not atStart then doit (false,false,c::0wx20::text) (getChar f) else doit (false,false,c::text) (getChar f) else let val _ = catError(catPos f,ERR_ILLEGAL_HERE(c,LOC_PUBID)) in doit (hadSpace,atStart,text) (getChar f) end val (text,cf1) = doit (false,true,nil) (getChar f) in (Latin2String(rev text),cf1) end fun parsePubLit (c,f) = if c=0wx22 orelse c=0wx27 then parsePubLit' c f else let val _ = catError(catPos f,ERR_EXPECTED(EXP_LITERAL,c)) in raise SyntaxError (c,f) end fun skipComment (c,f) = case c of 0wx00 => let val _ = catError(catPos f,ERR_EOF LOC_COMMENT) in (c,f) end | 0wx2D => let val (c1,f1) = getChar f in if c1 = 0wx2D then (getChar f1) else skipComment (c1,f1) end | _ => skipComment (getChar f) fun skipCopt (c,f) = case c of 0wx00 => (c,f) | 0wx2D => let val (c1,f1) = getChar f in if c1=0wx2D then skipComment (getChar f1) else let val _ = catError(catPos f,ERR_ILLEGAL_HERE(c,LOC_NOCOMMENT)) in (c1,f1) end end | _ => (c,f) fun skipScomm req0 cf = let fun endit req (c,f) = if req andalso c<>0wx00 then let val _ = catError(catPos f,ERR_MISSING_WHITE) in (c,f) end else (c,f) fun doit req (c,f) = case c of 0wx00 => endit req (c,f) | 0wx09 => doit false (getChar f) | 0wx0A => doit false (getChar f) | 0wx20 => doit false (getChar f) | 0wx22 => endit req (c,f) | 0wx27 => endit req (c,f) | 0wx2D => let val (c1,f1) = getChar f in if c1=0wx2D then let val _ = if not req then () else catError(catPos f1,ERR_MISSING_WHITE) val cf1 = skipComment (getChar f1) in doit true cf1 end else let val _ = catError(catPos f,ERR_ILLEGAL_HERE(c,LOC_NOCOMMENT)) in doit req (c1,f1) end end | _ => if isNms c then endit req (c,f) else let val _ = catError(catPos f,ERR_ILLEGAL_HERE(c,LOC_NOCOMMENT)) in doit req (getChar f) end in doit req0 cf end val skipWS = skipScomm true val skipCommWS = (skipScomm false) o skipCopt val skipWSComm = skipScomm false fun skipOther cf = let val cf1 = skipWS cf val cf2 = let val (_,cf') = parseName cf1 in skipWS cf' end handle NotFound cf => cf fun doit cf = let val (_,cf1) = parseSysLit false cf in doit (skipWS cf1) end handle NotFound(c,f) => (c,f) in (NONE,doit cf2) end fun parseBase cf = let val cf1 = skipWS cf val (lit,cf2) = parseSysLit true cf1 val cf3 = skipWS cf2 in (SOME(E_BASE lit),cf3) end fun parseExtend cf = let val cf1 = skipWS cf val (lit,cf2) = parseSysLit true cf1 val cf3 = skipWS cf2 in (SOME(E_EXTEND lit),cf3) end fun parseDelegate cf = let val cf1 = skipWS cf val (pub,cf2) = parsePubLit cf1 val cf3 = skipWS cf2 val (sys,cf4) = parseSysLit true cf3 val cf5 = skipWS cf4 in (SOME(E_DELEGATE(pub,sys)),cf5) end fun parseRemap cf = let val cf1 = skipWS cf val (sys0,cf2) = parseSysLit true cf1 val cf3 = skipWS cf2 val (sys,cf4) = parseSysLit true cf3 val cf5 = skipWS cf4 in (SOME(E_REMAP(sys0,sys)),cf5) end fun parseMap cf = let val cf1 = skipWS cf val (pub,cf2) = parsePubLit cf1 val cf3 = skipWS cf2 val (sys,cf4) = parseSysLit true cf3 val cf5 = skipWS cf4 in (SOME(E_MAP(pub,sys)),cf5) end fun recover cf = let fun do_lit q (c,f) = if c=0wx00 then (c,f) else if c=q then getChar f else do_lit q (getChar f) fun do_com (c,f) = case c of 0wx00 => (c,f) | 0wx2D => let val (c1,f1) = getChar f in if c1=0wx2D then getChar f1 else do_com (c1,f1) end | _ => do_com (getChar f) fun doit (c,f) = case c of 0wx00 => (c,f) | 0wx22 => doit (do_lit c (getChar f)) | 0wx27 => doit (do_lit c (getChar f)) | 0wx2D => let val (c1,f1) = getChar f in if c1=0wx2D then doit (do_com (getChar f1)) else doit (c1,f1) end | _ => if isNms c then (c,f) else doit (getChar f) in doit cf end fun parseEntry (cf as (c,f)) = let val (kw,cf1) = parseKeyword cf handle NotFound cf => raise SyntaxError cf in case kw of KW_BASE => parseBase cf1 | KW_CATALOG => parseExtend cf1 | KW_DELEGATE => parseDelegate cf1 | KW_SYSTEM => parseRemap cf1 | KW_PUBLIC => parseMap cf1 | KW_OTHER _ => skipOther cf1 end handle SyntaxError cf => (NONE,recover cf) fun parseDocument cf = let fun doit (c,f) = if c=0wx0 then nil before catCloseFile f else let val (opt,cf1) = parseEntry (c,f) val entries = doit cf1 in case opt of NONE => entries | SOME entry => entry::entries end val cf1 = skipCommWS cf in doit cf1 end fun parseSoCat uri = let val f = catOpenFile uri val cf1 = getChar f in (uri,parseDocument cf1) end end (* stop of ../../Catalog/socatParse.sml *) (* start of ../../Catalog/catDtd.sml *) signature CatDtd = sig type Dtd val baseIdx : int val delegateIdx : int val extendIdx : int val mapIdx : int val remapIdx : int val hrefIdx : int val pubidIdx : int val sysidIdx : int val Index2AttNot : Dtd -> int -> UniChar.Data val Index2Element : Dtd -> int -> UniChar.Data end structure CatDtd = struct open Dtd val baseGi = UniChar.String2Data "Base" val delegateGi = UniChar.String2Data "Delegate" val extendGi = UniChar.String2Data "Extend" val mapGi = UniChar.String2Data "Map" val remapGi = UniChar.String2Data "Remap" val hrefAtt = UniChar.String2Data "HRef" val pubidAtt = UniChar.String2Data "PublicId" val sysidAtt = UniChar.String2Data "SystemId" fun initDtdTables () = let val dtd = Dtd.initDtdTables() val _ = app (ignore o (Element2Index dtd)) [baseGi,delegateGi,extendGi,mapGi,remapGi] val _ = app (ignore o (AttNot2Index dtd)) [hrefAtt,pubidAtt,sysidAtt] in dtd end local val dtd = initDtdTables() in val baseIdx = Element2Index dtd baseGi val delegateIdx = Element2Index dtd delegateGi val extendIdx = Element2Index dtd extendGi val mapIdx = Element2Index dtd mapGi val remapIdx = Element2Index dtd remapGi val hrefIdx = AttNot2Index dtd hrefAtt val pubidIdx = AttNot2Index dtd pubidAtt val sysidIdx = AttNot2Index dtd sysidAtt end end (* stop of ../../Catalog/catDtd.sml *) (* start of ../../Parser/Params/ignore.sml *) structure IgnoreHooks = struct type AppData = unit type AppFinal = unit fun hookXml(a,_) = a fun hookFinish a = a fun hookError(a,_) = a fun hookWarning(a,_) = a fun hookProcInst(a,_) = a fun hookComment(a,_) = a fun hookWhite(a,_) = a fun hookDecl (a,_) = a fun hookStartTag(a,_) = a fun hookEndTag(a,_) = a fun hookCData(a,_) = a fun hookData(a,_) = a fun hookCharRef(a,_) = a fun hookGenRef(a,_) = a fun hookParRef(a,_) = a fun hookEntEnd(a,_) = a fun hookDocType(a,_) = a fun hookSubset(a,_) = a fun hookExtSubset(a,_) = a fun hookEndDtd(a,_) = a end (* stop of ../../Parser/Params/ignore.sml *) (* start of ../../Catalog/catHooks.sml *) signature CatHooks = sig type AppData = CatData.CatEntry list val initCatHooks : unit -> AppData end functor CatHooks (structure Params : CatParams structure Dtd : CatDtd ) = struct open Dtd HookData IgnoreHooks Params UniChar UniClasses Uri UtilList CatData CatError type AppData = Dtd * CatEntry list type AppFinal = CatEntry list fun initCatHooks dtd = (dtd,nil) fun hookError (a,(pos,err)) = a before catError (pos,ERR_XML err) fun getAtt dtd (pos,elem,att,trans) atts = let val cvOpt = findAndMap (fn (i,ap,_) => if i<>att then NONE else case ap of AP_DEFAULT(_,cv,_) => SOME cv | AP_PRESENT(_,cv,_) => SOME cv | _ => NONE) atts in case cvOpt of SOME cv => trans (pos,att) cv | NONE => NONE before catError (pos,ERR_MISSING_ATT(Index2Element dtd elem,Index2AttNot dtd att)) end fun makePubid dtd (pos,att) cv = let val (cs,bad) = Vector.foldr (fn (c,(cs,bad)) => if isPubid c then (Char2char c::cs,bad) else (cs,c::bad)) (nil,nil) cv in if null bad then SOME(String.implode cs) else NONE before catError(pos,ERR_NON_PUBID(Index2AttNot dtd att,bad)) end fun makeUri (pos,att) cv = SOME cv fun hookStartTag (a as (dtd,items),((_,pos),elem,atts,_,_)) = if elem=baseIdx then let val hrefOpt = getAtt dtd (pos,elem,hrefIdx,makeUri) atts in case hrefOpt of NONE => a | SOME href => (dtd,E_BASE (Vector2Uri href)::items) end else if elem=delegateIdx then let val hrefOpt = getAtt dtd (pos,elem,hrefIdx,makeUri) atts val pubidOpt = getAtt dtd (pos,elem,pubidIdx,makePubid dtd) atts in case (hrefOpt,pubidOpt) of (SOME href,SOME pubid) => (dtd,E_DELEGATE(pubid,Vector2Uri href)::items) | _ => a end else if elem=extendIdx then let val hrefOpt = getAtt dtd (pos,elem,hrefIdx,makeUri) atts in case hrefOpt of NONE => a | SOME href => (dtd,E_EXTEND (Vector2Uri href)::items) end else if elem=mapIdx then let val hrefOpt = getAtt dtd (pos,elem,hrefIdx,makeUri) atts val pubidOpt = getAtt dtd (pos,elem,pubidIdx,makePubid dtd) atts in case (hrefOpt,pubidOpt) of (SOME href,SOME pubid) => (dtd,E_MAP(pubid,Vector2Uri href)::items) | _ => a end else if elem=remapIdx then let val hrefOpt = getAtt dtd (pos,elem,hrefIdx,makeUri) atts val sysidOpt = getAtt dtd (pos,elem,sysidIdx,makeUri) atts in case (hrefOpt,sysidOpt) of (SOME href,SOME sysid) => (dtd,E_REMAP(Vector2Uri sysid,Vector2Uri href)::items) | _ => a end else a fun hookFinish (_,items) = rev items end (* stop of ../../Catalog/catHooks.sml *) (* start of ../../Catalog/catParse.sml *) signature CatParse = sig val parseCatalog : Uri.Uri -> CatData.Catalog end functor CatParse (structure Params : CatParams) : CatParse = struct structure SocatParse = SocatParse (structure Params = Params) structure ParserOptions = struct structure Options = ParserOptions() open Options local fun setDefaults() = let val _ = setParserDefaults() val _ = O_WARN_MULT_ENUM := false val _ = O_WARN_XML_DECL := false val _ = O_WARN_ATT_NO_ELEM := false val _ = O_WARN_MULT_ENT_DECL := false val _ = O_WARN_MULT_NOT_DECL := false val _ = O_WARN_MULT_ATT_DEF := false val _ = O_WARN_MULT_ATT_DECL := false val _ = O_WARN_SHOULD_DECLARE := false val _ = O_VALIDATE := false val _ = O_COMPATIBILITY := false val _ = O_INTEROPERABILITY := false val _ = O_INCLUDE_EXT_PARSED := true in () end in val setParserDefaults = setDefaults end end structure CatHooks = CatHooks (structure Params = Params structure Dtd = CatDtd) structure Parse = Parse (structure Dtd = CatDtd structure Hooks = CatHooks structure Resolve = ResolveNull structure ParserOptions = ParserOptions) open CatHooks CatDtd Parse ParserOptions SocatParse Uri fun parseXmlCat uri = let val _ = setParserDefaults() val dtd = initDtdTables() val items = parseDocument (SOME uri) (SOME dtd) (initCatHooks dtd) in (uri,items) end fun isSocatSuffix x = x="soc" orelse x="SOC" fun isXmlSuffix x = x="xml" orelse x="XML" fun parseCatalog uri = let val suffix = uriSuffix uri in if isSocatSuffix suffix then parseSoCat uri else (if isXmlSuffix suffix then parseXmlCat uri else (if !O_PREFER_SOCAT then parseSoCat uri else parseXmlCat uri)) end end (* stop of ../../Catalog/catParse.sml *) (* start of ../../Catalog/catalog.sml *) signature Catalog = sig val resolveExtId : string option * (Uri.Uri * Uri.Uri) option -> Uri.Uri option end functor Catalog ( structure Params : CatParams ) : Catalog = struct structure CatParse = CatParse ( structure Params = Params ) open CatData CatParse Params Uri UriDict val catDict = makeDict("catalog",6,NONE:Catalog option) fun getCatalog uri = let val idx = getIndex(catDict,uri) in case getByIndex(catDict,idx) of SOME cat => cat | NONE => let val cat = parseCatalog uri val _ = setByIndex(catDict,idx,SOME cat) in cat end end datatype SearchType = SYS of Uri | PUB of string datatype SearchResult = FOUND of Uri * Uri | NOTFOUND of Uri list fun searchId id = let fun searchOne (base,other) nil = NOTFOUND other | searchOne (base,other) (entry::entries) = case entry of E_BASE path => let val newBase = uriJoin(base,path) in searchOne (newBase,other) entries end | E_EXTEND path => let val fullPath = uriJoin(base,path) in searchOne (base,fullPath::other) entries end | E_DELEGATE(prefix,path) => (case id of PUB pid => if String.isPrefix prefix pid then let val fullPath = uriJoin(base,path) in searchOne (base,fullPath::other) entries end else searchOne (base,other) entries | SYS _ => searchOne (base,other) entries) | E_MAP(pubid,path) => (case id of PUB pid => if pubid=pid then FOUND (base,path) else searchOne (base,other) entries | _ => searchOne (base,other) entries) | E_REMAP(sysid,path) => (case id of SYS sid => if sysid=sid then FOUND(base,path) else searchOne (base,other) entries | _ => searchOne (base,other) entries) fun searchLevel other nil = NOTFOUND(rev other) | searchLevel other (fname::fnames) = let val (base,entries) = getCatalog fname in case searchOne (base,other) entries of FOUND bp => FOUND bp | NOTFOUND other' => searchLevel other' fnames end fun searchAll fnames = if null fnames then NONE else case searchLevel nil fnames of FOUND bp => SOME bp | NOTFOUND other => searchAll other val fnames = !O_CATALOG_FILES in case id of PUB _ => searchAll fnames | SYS _ => if !O_SUPPORT_REMAP then searchAll fnames else NONE end fun resolveExtId (pub,sys) = let fun resolvePubCat () = case pub of NONE => NONE | SOME id => case searchId (PUB id) of NONE => NONE | SOME(base,sysid) => case searchId (SYS sysid) of NONE => SOME(base,sysid) | new => new fun resolveSysCat () = case sys of NONE => NONE | SOME(base,id) => searchId (SYS id) fun resolveCat () = if !O_PREFER_SYSID then case resolveSysCat () of NONE => resolvePubCat () | found => found else case resolvePubCat () of NONE => resolveSysCat () | found => found fun resolve () = if !O_PREFER_CATALOG then case resolveCat () of NONE => (case sys of NONE => NONE | SOME(base,id) => SOME(base,id)) | found => found else case sys of NONE => resolvePubCat () | SOME(base,id) => SOME(base,id) in if null (!O_CATALOG_FILES) then case sys of NONE => NONE | SOME(base,id) => SOME (uriJoin (base,id)) else case resolve () of NONE => NONE | SOME bp => SOME (uriJoin bp) end end (* stop of ../../Catalog/catalog.sml *) (* start of ../../Catalog/catResolve.sml *) functor ResolveCatalog ( structure Params : CatParams ) : Resolve = struct structure Catalog = Catalog ( structure Params = Params ) open Base Errors fun resolveExtId (id as EXTID(pub,sys)) = let val pub1 = case pub of NONE => NONE | SOME (str,_) => SOME str val sys1 = case sys of NONE => NONE | SOME (base,file,_) => SOME(base,file) in case Catalog.resolveExtId (pub1,sys1) of NONE => raise NoSuchFile ("","Could not generate system identifier") | SOME uri => uri end end (* stop of ../../Catalog/catResolve.sml *) (* start of ../../Catalog/catOptions.sml *) signature CatOptions = sig val O_CATALOG_FILES : Uri.Uri list ref val O_PREFER_SOCAT : bool ref val O_PREFER_SYSID : bool ref val O_PREFER_CATALOG : bool ref val O_SUPPORT_REMAP : bool ref val O_CATALOG_ENC : Encoding.Encoding ref val setCatalogDefaults : unit -> unit val setCatalogOptions : Options.Option list * (string -> unit) -> Options.Option list val catalogUsage : Options.Usage end functor CatOptions () : CatOptions = struct open Encoding Options Uri val O_CATALOG_FILES = ref nil: Uri list ref val O_PREFER_SOCAT = ref false val O_PREFER_SYSID = ref false val O_PREFER_CATALOG = ref true val O_SUPPORT_REMAP = ref true val O_CATALOG_ENC = ref LATIN1 fun setCatalogDefaults() = let val _ = O_CATALOG_FILES := nil val _ = O_PREFER_SOCAT := false val _ = O_PREFER_SYSID := false val _ = O_PREFER_CATALOG := true val _ = O_SUPPORT_REMAP := true val _ = O_CATALOG_ENC := LATIN1 in () end val catalogUsage = [U_ITEM(["-C ","--catalog="],"Use catalog "), U_ITEM(["--catalog-syntax=(soc|xml)"],"Default syntax for catalogs (xml)"), U_ITEM(["--catalog-encoding="],"Default encoding for Socat catalogs (LATIN1)"), U_ITEM(["--catalog-remap=[(yes|no)]"],"Support remapping of system identifiers (yes)"), U_ITEM(["--catalog-priority=(map|remap|sys)"],"Resolving strategy in catalogs (map)") ] fun setCatalogOptions (opts,doError) = let val catalogs = ref nil:string list ref fun hasNoArg key = "option "^key^" has no argument" fun mustHave key = String.concat ["option ",key," must have an argument"] fun mustBe(key,what) = String.concat ["the argument to --",key," must be ",what] val yesNo = "'yes' or 'no'" val mapRemapSys = "'map', 'remap' or 'sys'" val encName = "'ascii', 'latin1', 'utf8' or 'utf16'" val syntaxName = "'soc' or 'xml'" fun do_catalog valOpt = case valOpt of NONE => doError(mustHave "--catalog") | SOME s => catalogs := s::(!catalogs) fun do_prio valOpt = let fun set(cat,sys) = (O_PREFER_CATALOG := cat; O_PREFER_SYSID := sys) in case valOpt of NONE => doError(mustHave "--catalog-priority") | SOME "map" => set(true,false) | SOME "remap" => set(true,true) | SOME "sys" => set(false,true) | SOME s => doError(mustBe("catalog-priority",mapRemapSys)) end fun do_enc valOpt = case valOpt of NONE => doError(mustHave "--catalog-encoding") | SOME s => case isEncoding s of NOENC => doError("unsupported encoding "^s) | enc => O_CATALOG_ENC := enc fun do_remap valOpt = case valOpt of NONE => doError(mustHave "--catalog-remap") | SOME "no" => O_SUPPORT_REMAP := false | SOME "yes" => O_SUPPORT_REMAP := true | SOME s => doError(mustBe("catalog-remap",yesNo)) fun do_syntax valOpt = case valOpt of NONE => doError(mustHave "--catalog-syntax") | SOME "soc" => O_PREFER_SOCAT := true | SOME "xml" => O_PREFER_SOCAT := false | SOME s => doError(mustBe("catalog-remap",syntaxName)) fun do_long(key,valOpt) = case key of "catalog" => true before do_catalog valOpt | "catalog-remap" => true before do_remap valOpt | "catalog-syntax" => true before do_syntax valOpt | "catalog-encoding" => true before do_enc valOpt | "catalog-priority" => true before do_prio valOpt | _ => false fun do_short cs opts = case cs of nil => doit opts | [#"C"] => (case opts of OPT_STRING s::opts1 => (catalogs := s::(!catalogs); doit opts1) | _ => let val _ = doError (mustHave "-C") in doit opts end) | cs => let val cs1 = List.filter (fn c => if #"C"<>c then true else false before doError (mustHave "-C")) cs in if null cs1 then doit opts else (OPT_SHORT cs1)::doit opts end and doit nil = nil | doit (opt::opts) = case opt of OPT_NOOPT => opts | OPT_LONG(key,value) => if do_long(key,value) then doit opts else opt::doit opts | OPT_SHORT cs => do_short cs opts | OPT_NEG cs => opt::doit opts | OPT_STRING s => opt::doit opts val opts1 = doit opts val uris = map String2Uri (!catalogs) val _ = O_CATALOG_FILES := uris in opts1 end end (* stop of ../../Catalog/catOptions.sml *) (* start of nullOptions.sml *) signature NullOptions = sig val O_SILENT : bool ref val O_ERROR_DEVICE : TextIO.outstream ref val O_ERROR_LINEWIDTH : int ref val setNullDefaults : unit -> unit val setNullOptions : Options.Option list * (string -> unit) -> bool * bool * string option * string option val nullUsage : Options.Usage end structure NullOptions : NullOptions = struct open Options val O_SILENT = ref false val O_ERROR_DEVICE = ref TextIO.stdErr val O_ERROR_LINEWIDTH = ref 80 val nullUsage = [U_ITEM(["-s","--silent"],"Suppress reporting of errors and warnings"), U_ITEM(["-e ","--error-output="],"Redirect errors to file (stderr)"), U_SEP, U_ITEM(["--version"],"Print the version number and exit"), U_ITEM(["-?","--help"],"Print this text and exit"), U_ITEM(["--"],"Do not recognize remaining arguments as options") ] fun setNullDefaults () = let val _ = O_SILENT := false val _ = O_ERROR_DEVICE := TextIO.stdErr in () end fun setNullOptions (opts,optError) = let fun onlyOne what = "at most one "^what^" may be specified" fun unknown pre opt = String.concat ["unknown option ",pre,opt] fun hasNoArg pre key = String.concat ["option ",pre,key," expects no argument"] fun mustHave pre key = String.concat ["option ",pre,key," must have an argument"] fun check_noarg(key,valOpt) = if isSome valOpt then optError (hasNoArg "--" key) else () fun do_long (pars as (v,h,e,f)) (key,valOpt) = case key of "help" => (v,true,e,f) before check_noarg(key,valOpt) | "version" => (true,h,e,f) before check_noarg(key,valOpt) | "silent" => pars before O_SILENT := true before check_noarg(key,valOpt) | "error-output" => (case valOpt of NONE => pars before optError (mustHave "--" key) | SOME s => (v,h,SOME s,f)) | _ => pars before optError(unknown "--" key) fun do_short (pars as (v,h,e,f)) (cs,opts) = case cs of nil => doit pars opts | [#"e"] => (case opts of OPT_STRING s::opts1 => doit (v,h,SOME s,f) opts1 | _ => (optError (hasNoArg "-" "e"); doit pars opts)) | cs => doit (foldr (fn (c,pars) => case c of #"e" => pars before optError (hasNoArg "-" "e") | #"s" => pars before O_SILENT := true | #"?" => (v,true,e,f) | c => pars before optError (unknown "-" (String.implode [c]))) pars cs) opts and doit pars nil = pars | doit (pars as (v,h,e,f)) (opt::opts) = case opt of OPT_LONG(key,valOpt) => doit (do_long pars (key,valOpt)) opts | OPT_SHORT cs => do_short pars (cs,opts) | OPT_STRING s => if isSome f then let val _ = optError(onlyOne "input file") in doit pars opts end else doit (v,h,e,SOME s) opts | OPT_NOOPT => doit pars opts | OPT_NEG cs => let val _ = if null cs then () else app (fn c => optError (unknown "-n" (String.implode[c]))) cs in doit pars opts end in doit (false,false,NONE,NONE) opts end end (* stop of nullOptions.sml *) (* start of nullHooks.sml *) structure NullHooks = struct open Errors IgnoreHooks NullOptions type AppData = OS.Process.status type AppFinal = AppData val nullStart = OS.Process.success fun printError(pos,err) = if !O_SILENT then () else TextIO.output (!O_ERROR_DEVICE,formatMessage (4,!O_ERROR_LINEWIDTH) (Position2String pos ::(if isFatalError err then "Fatal error:" else "Error:") ::errorMessage err)) fun printWarning(pos,warn) = if !O_SILENT then () else TextIO.output (!O_ERROR_DEVICE,formatMessage (4,!O_ERROR_LINEWIDTH) (Position2String pos^" Warning:"::warningMessage warn)) fun hookError (_,pe) = OS.Process.failure before printError pe fun hookWarning (status,pw) = status before printWarning pw end (* stop of nullHooks.sml *) (* start of null.sml *) structure Null = struct structure ParserOptions = ParserOptions () structure CatOptions = CatOptions () structure CatParams = struct open CatError CatOptions NullOptions Uri UtilError fun catError(pos,err) = if !O_SILENT then () else TextIO.output (!O_ERROR_DEVICE,formatMessage (4,!O_ERROR_LINEWIDTH) (Position2String pos^" Error in catalog:"::catMessage err)) end structure Resolve = ResolveCatalog (structure Params = CatParams) structure ParseNull = Parse (structure Dtd = Dtd structure Hooks = NullHooks structure Resolve = Resolve structure ParserOptions = ParserOptions) fun parseNull uri = ParseNull.parseDocument uri NONE NullHooks.nullStart open CatOptions NullOptions Options ParserOptions Uri val usage = List.concat [parserUsage,[U_SEP],catalogUsage,[U_SEP],nullUsage] exception Exit of OS.Process.status fun null(prog,args) = let val prog = "fxp" val hadError = ref false fun optError msg = let val _ = TextIO.output(TextIO.stdErr,msg^".\n") in hadError := true end fun exitError msg = let val _ = TextIO.output(TextIO.stdErr,msg^".\n") in raise Exit OS.Process.failure end fun exitHelp prog = let val _ = printUsage TextIO.stdOut prog usage in raise Exit OS.Process.success end fun exitVersion prog = let val _ = app print [prog," version ",Version.FXP_VERSION,"\n"] in raise Exit OS.Process.success end fun summOpt prog = "For a summary of options type "^prog^" --help" fun noFile(f,cause) = "can't open file '"^f^"': "^exnMessage cause val opts = parseOptions args val _ = setParserDefaults() val opts1 = setParserOptions (opts,optError) val _ = setCatalogDefaults() val opts2 = setCatalogOptions (opts1,optError) val _ = setNullDefaults() val (vers,help,err,file) = setNullOptions (opts2,optError) val _ = if !hadError then exitError (summOpt prog) else () val _ = if vers then exitVersion prog else () val _ = if help then exitHelp prog else () val _ = case err of SOME "-" => O_ERROR_DEVICE := TextIO.stdErr | SOME f => (O_ERROR_DEVICE := TextIO.openOut f handle IO.Io {cause,...} => exitError(noFile(f,cause))) | NONE => () val f = valOf file handle Option => "-" val uri = if f="-" then NONE else SOME(String2Uri f) val status = parseNull uri val _ = if isSome err then TextIO.closeOut (!O_ERROR_DEVICE) else () in status end handle Exit status => status | exn => let val _ = TextIO.output (TextIO.stdErr,prog^": Unexpected exception: "^exnMessage exn^".\n") in OS.Process.failure end end (* stop of null.sml *) (* start of call-null.sml *) val _ = Null.null (CommandLine.name (), CommandLine.arguments ()) (* stop of call-null.sml *)