1 ;;; ebnf-dtd.el --- parser for DTD (Data Type Description for XML)
3 ;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006
4 ;; Free Sofware Foundation, Inc.
6 ;; Author: Vinicius Jose Latorre <viniciusjl@ig.com.br>
7 ;; Maintainer: Vinicius Jose Latorre <viniciusjl@ig.com.br>
8 ;; Time-stamp: <2004/04/04 21:50:16 vinicius>
9 ;; Keywords: wp, ebnf, PostScript
12 ;; This file is part of GNU Emacs.
14 ;; GNU Emacs is free software; you can redistribute it and/or modify
15 ;; it under the terms of the GNU General Public License as published by
16 ;; the Free Software Foundation; either version 2, or (at your option)
19 ;; GNU Emacs is distributed in the hope that it will be useful,
20 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
21 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 ;; GNU General Public License for more details.
24 ;; You should have received a copy of the GNU General Public License
25 ;; along with GNU Emacs; see the file COPYING. If not, write to the
26 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
27 ;; Boston, MA 02110-1301, USA.
31 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
34 ;; This is part of ebnf2ps package.
36 ;; This package defines a parser for DTD (Data Type Description for XML).
38 ;; See ebnf2ps.el for documentation.
45 ;; `http://www.w3.org/TR/2004/REC-xml-20040204/'
46 ;; (Extensible Markup Language (XML) 1.0 (Third Edition))
47 ;; `http://www.w3.org/TR/html40/'
48 ;; (HTML 4.01 Specification)
49 ;; `http://www.w3.org/TR/NOTE-html-970421'
50 ;; (HTML DTD with support for Style Sheets)
55 ;; document ::= prolog element Misc*
56 ;; /* Note that *only* the prolog will be parsed */
61 ;; Char ::= #x9 | #xA | #xD
62 ;; | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
63 ;; /* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */
67 ;; Document authors are encouraged to avoid "compatibility characters", as
68 ;; defined in section 6.8 of [Unicode] (see also D21 in section 3.6 of
69 ;; [Unicode3]). The characters defined in the following ranges are also
70 ;; discouraged. They are either control characters or permanently undefined
71 ;; Unicode characters:
73 ;; [#x7F-#x84], [#x86-#x9F], [#xFDD0-#xFDDF],
74 ;; [#1FFFE-#x1FFFF], [#2FFFE-#x2FFFF], [#3FFFE-#x3FFFF],
75 ;; [#4FFFE-#x4FFFF], [#5FFFE-#x5FFFF], [#6FFFE-#x6FFFF],
76 ;; [#7FFFE-#x7FFFF], [#8FFFE-#x8FFFF], [#9FFFE-#x9FFFF],
77 ;; [#AFFFE-#xAFFFF], [#BFFFE-#xBFFFF], [#CFFFE-#xCFFFF],
78 ;; [#DFFFE-#xDFFFF], [#EFFFE-#xEFFFF], [#FFFFE-#xFFFFF],
79 ;; [#10FFFE-#x10FFFF]. */
84 ;; S ::= (#x20 | #x9 | #xD | #xA)+
87 ;; /* Names and Tokens */
89 ;; NameChar ::= Letter | Digit | '.' | '-' | '_' | ':'
90 ;; | CombiningChar | Extender
92 ;; Name ::= (Letter | '_' | ':') (NameChar)*
94 ;; Names ::= Name (#x20 Name)*
96 ;; Nmtoken ::= (NameChar)+
98 ;; Nmtokens ::= Nmtoken (#x20 Nmtoken)*
103 ;; EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'
104 ;; | "'" ([^%&'] | PEReference | Reference)* "'"
106 ;; AttValue ::= '"' ([^<&"] | Reference)* '"'
107 ;; | "'" ([^<&'] | Reference)* "'"
109 ;; SystemLiteral ::= ('"' [^"]* '"')
112 ;; PubidLiteral ::= '"' PubidChar* '"'
113 ;; | "'" (PubidChar - "'")* "'"
115 ;; PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
119 ;; Although the EntityValue production allows the definition of a general
120 ;; entity consisting of a single explicit < in the literal (e.g., <!ENTITY
121 ;; mylt "<">), it is strongly advised to avoid this practice since any
122 ;; reference to that entity will cause a well-formedness error. */
125 ;; /* Character Data */
127 ;; CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
132 ;; Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
135 ;; /* Processing Instructions */
137 ;; PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
139 ;; PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
142 ;; /* CDATA Sections */
144 ;; CDSect ::= CDStart CData CDEnd
146 ;; CDStart ::= '<![CDATA['
148 ;; CData ::= (Char* - (Char* ']]>' Char*))
155 ;; prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
157 ;; XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
159 ;; VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')
163 ;; VersionNum ::= '1.0'
165 ;; Misc ::= Comment | PI | S
168 ;; /* Document Type Definition */
170 ;; doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
171 ;; ('[' intSubset ']' S?)? '>'
172 ;; [VC: Root Element Type]
173 ;; [WFC: External Subset]
175 ;; DeclSep ::= PEReference | S
176 ;; [WFC: PE Between Declarations]
178 ;; intSubset ::= (markupdecl | DeclSep)*
180 ;; markupdecl ::= elementdecl | AttlistDecl | EntityDecl
181 ;; | NotationDecl | PI | Comment
182 ;; [VC: Proper Declaration/PE Nesting]
183 ;; [WFC: PEs in Internal Subset]
186 ;; /* External Subset */
188 ;; extSubset ::= TextDecl? extSubsetDecl
190 ;; extSubsetDecl ::= ( markupdecl | conditionalSect | DeclSep)*
193 ;; /* Standalone Document Declaration */
195 ;; SDDecl ::= S 'standalone' Eq
196 ;; (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
197 ;; [VC: Standalone Document Declaration]
202 ;; element ::= EmptyElemTag | STag content ETag
203 ;; [WFC: Element Type Match]
204 ;; [VC: Element Valid]
209 ;; STag ::= '<' Name (S Attribute)* S? '>'
210 ;; [WFC: Unique Att Spec]
212 ;; Attribute ::= Name Eq AttValue
213 ;; [VC: Attribute Value Type]
214 ;; [WFC: No External Entity References]
215 ;; [WFC: No < in Attribute Values]
220 ;; ETag ::= '</' Name S? '>'
223 ;; /* Content of Elements */
225 ;; content ::= CharData?
226 ;; ((element | Reference | CDSect | PI | Comment) CharData?)*
229 ;; /* Tags for Empty Elements */
231 ;; EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
232 ;; [WFC: Unique Att Spec]
235 ;; /* Element Type Declaration */
237 ;; elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
238 ;; [VC: Unique Element Type Declaration]
240 ;; contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
243 ;; /* Element-content Models */
245 ;; children ::= (choice | seq) ('?' | '*' | '+')?
247 ;; cp ::= (Name | choice | seq) ('?' | '*' | '+')?
249 ;; choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'
250 ;; [VC: Proper Group/PE Nesting]
252 ;; seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
253 ;; [VC: Proper Group/PE Nesting]
256 ;; /* Mixed-content Declaration */
258 ;; Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*'
259 ;; | '(' S? '#PCDATA' S? ')'
260 ;; [VC: Proper Group/PE Nesting]
261 ;; [VC: No Duplicate Types]
264 ;; /* Attribute-list Declaration */
266 ;; AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
268 ;; AttDef ::= S Name S AttType S DefaultDecl
271 ;; /* Attribute Types */
273 ;; AttType ::= StringType | TokenizedType | EnumeratedType
275 ;; StringType ::= 'CDATA'
277 ;; TokenizedType ::= 'ID' [VC: ID]
278 ;; [VC: One ID per Element Type]
279 ;; [VC: ID Attribute Default]
280 ;; | 'IDREF' [VC: IDREF]
281 ;; | 'IDREFS' [VC: IDREF]
282 ;; | 'ENTITY' [VC: Entity Name]
283 ;; | 'ENTITIES' [VC: Entity Name]
284 ;; | 'NMTOKEN' [VC: Name Token]
285 ;; | 'NMTOKENS' [VC: Name Token]
288 ;; /* Enumerated Attribute Types */
290 ;; EnumeratedType ::= NotationType | Enumeration
292 ;; NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
293 ;; [VC: Notation Attributes]
294 ;; [VC: One Notation Per Element Type]
295 ;; [VC: No Notation on Empty Element]
296 ;; [VC: No Duplicate Tokens]
298 ;; Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
300 ;; [VC: No Duplicate Tokens]
303 ;; /* Attribute Defaults */
305 ;; DefaultDecl ::= '#REQUIRED' | '#IMPLIED'
306 ;; | (('#FIXED' S)? AttValue)
307 ;; [VC: Required Attribute]
308 ;; [VC: Attribute Default Value Syntactically Correct]
309 ;; [WFC: No < in Attribute Values]
310 ;; [VC: Fixed Attribute Default]
313 ;; /* Conditional Section */
315 ;; conditionalSect ::= includeSect | ignoreSect
317 ;; includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
318 ;; [VC: Proper Conditional Section/PE Nesting]
320 ;; ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
321 ;; [VC: Proper Conditional Section/PE Nesting]
323 ;; ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
325 ;; Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
328 ;; /* Character Reference */
330 ;; CharRef ::= '&#' [0-9]+ ';'
331 ;; | '&#x' [0-9a-fA-F]+ ';'
332 ;; [WFC: Legal Character]
335 ;; /* Entity Reference */
337 ;; Reference ::= EntityRef | CharRef
339 ;; EntityRef ::= '&' Name ';'
340 ;; [WFC: Entity Declared]
341 ;; [VC: Entity Declared]
342 ;; [WFC: Parsed Entity]
343 ;; [WFC: No Recursion]
345 ;; PEReference ::= '%' Name ';'
346 ;; [VC: Entity Declared]
347 ;; [WFC: No Recursion]
351 ;; /* Entity Declaration */
353 ;; EntityDecl ::= GEDecl | PEDecl
355 ;; GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
357 ;; PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
359 ;; EntityDef ::= EntityValue | (ExternalID NDataDecl?)
361 ;; PEDef ::= EntityValue | ExternalID
364 ;; /* External Entity Declaration */
366 ;; ExternalID ::= 'SYSTEM' S SystemLiteral
367 ;; | 'PUBLIC' S PubidLiteral S SystemLiteral
369 ;; NDataDecl ::= S 'NDATA' S Name
370 ;; [VC: Notation Declared]
373 ;; /* Text Declaration */
375 ;; TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
378 ;; /* Well-Formed External Parsed Entity */
380 ;; extParsedEnt ::= TextDecl? content
383 ;; /* Encoding Declaration */
385 ;; EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
387 ;; EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
388 ;; /* Encoding name contains only Latin characters */
391 ;; /* Notation Declarations */
393 ;; NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
394 ;; [VC: Unique Notation Name]
396 ;; PublicID ::= 'PUBLIC' S PubidLiteral
401 ;; Letter ::= BaseChar | Ideographic
403 ;; BaseChar ::= [#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6]
404 ;; | [#x00D8-#x00F6] | [#x00F8-#x00FF] | [#x0100-#x0131]
405 ;; | [#x0134-#x013E] | [#x0141-#x0148] | [#x014A-#x017E]
406 ;; | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5]
407 ;; | [#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1]
408 ;; | #x0386 | [#x0388-#x038A] | #x038C
409 ;; | [#x038E-#x03A1] | [#x03A3-#x03CE] | [#x03D0-#x03D6]
410 ;; | #x03DA | #x03DC | #x03DE
411 ;; | #x03E0 | [#x03E2-#x03F3] | [#x0401-#x040C]
412 ;; | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481]
413 ;; | [#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC]
414 ;; | [#x04D0-#x04EB] | [#x04EE-#x04F5] | [#x04F8-#x04F9]
415 ;; | [#x0531-#x0556] | #x0559 | [#x0561-#x0586]
416 ;; | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A]
417 ;; | [#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE]
418 ;; | [#x06C0-#x06CE] | [#x06D0-#x06D3] | #x06D5
419 ;; | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D
420 ;; | [#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990]
421 ;; | [#x0993-#x09A8] | [#x09AA-#x09B0] | #x09B2
422 ;; | [#x09B6-#x09B9] | [#x09DC-#x09DD] | [#x09DF-#x09E1]
423 ;; | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10]
424 ;; | [#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33]
425 ;; | [#x0A35-#x0A36] | [#x0A38-#x0A39] | [#x0A59-#x0A5C]
426 ;; | #x0A5E | [#x0A72-#x0A74] | [#x0A85-#x0A8B]
427 ;; | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8]
428 ;; | [#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9]
429 ;; | #x0ABD | #x0AE0 | [#x0B05-#x0B0C]
430 ;; | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30]
431 ;; | [#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D
432 ;; | [#x0B5C-#x0B5D] | [#x0B5F-#x0B61] | [#x0B85-#x0B8A]
433 ;; | [#x0B8E-#x0B90] | [#x0B92-#x0B95] | [#x0B99-#x0B9A]
434 ;; | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4]
435 ;; | [#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9]
436 ;; | [#x0C05-#x0C0C] | [#x0C0E-#x0C10] | [#x0C12-#x0C28]
437 ;; | [#x0C2A-#x0C33] | [#x0C35-#x0C39] | [#x0C60-#x0C61]
438 ;; | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8]
439 ;; | [#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE
440 ;; | [#x0CE0-#x0CE1] | [#x0D05-#x0D0C] | [#x0D0E-#x0D10]
441 ;; | [#x0D12-#x0D28] | [#x0D2A-#x0D39] | [#x0D60-#x0D61]
442 ;; | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33]
443 ;; | [#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84
444 ;; | [#x0E87-#x0E88] | #x0E8A | #x0E8D
445 ;; | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3]
446 ;; | #x0EA5 | #x0EA7 | [#x0EAA-#x0EAB]
447 ;; | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3]
448 ;; | #x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47]
449 ;; | [#x0F49-#x0F69] | [#x10A0-#x10C5] | [#x10D0-#x10F6]
450 ;; | #x1100 | [#x1102-#x1103] | [#x1105-#x1107]
451 ;; | #x1109 | [#x110B-#x110C] | [#x110E-#x1112]
452 ;; | #x113C | #x113E | #x1140
453 ;; | #x114C | #x114E | #x1150
454 ;; | [#x1154-#x1155] | #x1159 | [#x115F-#x1161]
455 ;; | #x1163 | #x1165 | #x1167
456 ;; | #x1169 | [#x116D-#x116E] | [#x1172-#x1173]
457 ;; | #x1175 | #x119E | #x11A8
458 ;; | #x11AB | [#x11AE-#x11AF] | [#x11B7-#x11B8]
459 ;; | #x11BA | [#x11BC-#x11C2] | #x11EB
460 ;; | #x11F0 | #x11F9 | [#x1E00-#x1E9B]
461 ;; | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D]
462 ;; | [#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57]
463 ;; | #x1F59 | #x1F5B | #x1F5D
464 ;; | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC]
465 ;; | #x1FBE | [#x1FC2-#x1FC4] | [#x1FC6-#x1FCC]
466 ;; | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] | [#x1FE0-#x1FEC]
467 ;; | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126
468 ;; | [#x212A-#x212B] | #x212E | [#x2180-#x2182]
469 ;; | [#x3041-#x3094] | [#x30A1-#x30FA] | [#x3105-#x312C]
472 ;; Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
474 ;; CombiningChar ::= [#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486]
475 ;; | [#x0591-#x05A1] | [#x05A3-#x05B9] | [#x05BB-#x05BD]
476 ;; | #x05BF | [#x05C1-#x05C2] | #x05C4
477 ;; | [#x064B-#x0652] | #x0670 | [#x06D6-#x06DC]
478 ;; | [#x06DD-#x06DF] | [#x06E0-#x06E4] | [#x06E7-#x06E8]
479 ;; | [#x06EA-#x06ED] | [#x0901-#x0903] | #x093C
480 ;; | [#x093E-#x094C] | #x094D | [#x0951-#x0954]
481 ;; | [#x0962-#x0963] | [#x0981-#x0983] | #x09BC
482 ;; | #x09BE | #x09BF | [#x09C0-#x09C4]
483 ;; | [#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7
484 ;; | [#x09E2-#x09E3] | #x0A02 | #x0A3C
485 ;; | #x0A3E | #x0A3F | [#x0A40-#x0A42]
486 ;; | [#x0A47-#x0A48] | [#x0A4B-#x0A4D] | [#x0A70-#x0A71]
487 ;; | [#x0A81-#x0A83] | #x0ABC | [#x0ABE-#x0AC5]
488 ;; | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03]
489 ;; | #x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48]
490 ;; | [#x0B4B-#x0B4D] | [#x0B56-#x0B57] | [#x0B82-#x0B83]
491 ;; | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] | [#x0BCA-#x0BCD]
492 ;; | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44]
493 ;; | [#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56]
494 ;; | [#x0C82-#x0C83] | [#x0CBE-#x0CC4] | [#x0CC6-#x0CC8]
495 ;; | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] | [#x0D02-#x0D03]
496 ;; | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D]
497 ;; | #x0D57 | #x0E31 | [#x0E34-#x0E3A]
498 ;; | [#x0E47-#x0E4E] | #x0EB1 | [#x0EB4-#x0EB9]
499 ;; | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19]
500 ;; | #x0F35 | #x0F37 | #x0F39
501 ;; | #x0F3E | #x0F3F | [#x0F71-#x0F84]
502 ;; | [#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97
503 ;; | [#x0F99-#x0FAD] | [#x0FB1-#x0FB7] | #x0FB9
504 ;; | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F]
507 ;; Digit ::= [#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9]
508 ;; | [#x0966-#x096F] | [#x09E6-#x09EF] | [#x0A66-#x0A6F]
509 ;; | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] | [#x0BE7-#x0BEF]
510 ;; | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F]
511 ;; | [#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]
513 ;; Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6
514 ;; | #x3005 | [#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]
520 ;; At moment, only the `<!ELEMENT' generates a syntactic chart. The
521 ;; `<!ATTLIST', `<!NOTATION' and `<!ENTITY' are syntacticly checked but they
522 ;; don't generate a syntactic chart.
524 ;; Besides the syntax above, ebnf-dtd also accepts a `pure' dtd file. An
525 ;; example of a `pure' dtd file is:
527 ;; <?xml version="1.0" encoding="UTF-8"?>
531 ;; <!ELEMENT workflow (registers?, trigger-functions?, initial-actions,
532 ;; steps, splits?, joins?)>
534 ;; An action that can be executed (id must be unique among actions for
535 ;; the enclosing step).
538 ;; <!ELEMENT action (restrict-to, validators?, pre-functions?, results,
541 ;; id CDATA #REQUIRED
542 ;; name CDATA #REQUIRED
546 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
554 (defvar ebnf-dtd-lex nil
555 "Value returned by `ebnf-dtd-lex' function.")
558 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
559 ;; Syntactic analyzer
562 ;;; document ::= prolog element Misc*
563 ;;; /* Note that *only* the prolog will be parsed */
565 (defun ebnf-dtd-parser (start)
567 (let ((total (+ (- ebnf-limit start
) 1))
570 rule-list token rule the-end
)
572 (setq token
(ebnf-dtd-lex))
573 (and (eq token
'end-of-input
)
574 (error "Empty DTD file"))
575 (setq token
(ebnf-dtd-prolog token
))
576 (unless (eq (car token
) 'end-prolog
)
577 (setq the-end
(cdr token
)
579 (while (not (eq token the-end
))
582 (/ (* (- (point) bias
) 100.0) total
))
583 (setq token
(ebnf-dtd-intsubset token
)
587 (ebnf-add-empty-rule-list rule
)
588 (setq rule-list
(cons rule rule-list
))))
589 (or (eq the-end
'end-of-input
)
590 (eq (ebnf-dtd-lex) 'end-decl
)
591 (error "Missing end of DOCTYPE"))
592 ;; adjust message, 'cause *only* prolog will be parsed
593 (ebnf-message-float "Parsing...%s%%" 100.0))
598 ;;; prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
600 ;;; XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
602 ;;; VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')
606 ;;; VersionNum ::= '1.0'
608 ;;; Misc ::= Comment | PI | S
610 ;;; EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
612 ;;; EncName ::= [A-Za-z] ([-A-Za-z0-9._])*
613 ;;; /* Encoding name contains only Latin characters */
615 ;;; SDDecl ::= S 'standalone' Eq
616 ;;; (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
618 ;;; doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
619 ;;; ('[' intSubset ']' S?)? '>'
622 (defun ebnf-dtd-prolog (token)
623 (when (and (eq token
'begin-pi
) (string= ebnf-dtd-lex
"xml"))
625 (setq token
(ebnf-dtd-attribute (ebnf-dtd-lex) 'version-attr
626 "^1\\.0$" "XML version"))
627 ;; ( encoding = "encoding name" )?
628 (setq token
(ebnf-dtd-attribute-optional
630 "^[A-Za-z][-A-Za-z0-9._]*$" "XML encoding"))
631 ;; ( standalone = ( "yes" | "no" ) )?
632 (setq token
(ebnf-dtd-attribute-optional
633 token
'standalone-attr
634 "^yes|no$" "XML standalone"))
635 (or (eq token
'end-pi
)
636 (error "Missing end of XML processing instruction")))
637 ;; processing instructions
638 (setq token
(ebnf-dtd-pi (ebnf-dtd-lex)))
641 ((eq token
'doctype-decl
)
642 (or (eq (ebnf-dtd-lex) 'name
)
643 (error "Document type name is missing"))
644 (cons (if (eq (ebnf-dtd-externalid) 'begin-subset
)
648 ((memq token
'(element-decl attlist-decl entity-decl notation-decl
))
649 (cons token
'end-of-input
))
651 '(end-prolog . end-subset
))
655 (defun ebnf-dtd-attribute (token attr match attr-name
)
657 (error "%s attribute is missing" attr-name
))
658 (ebnf-dtd-attribute-optional token attr match attr-name
))
661 (defun ebnf-dtd-attribute-optional (token attr match attr-name
)
662 (when (eq token attr
)
663 (or (and (eq (ebnf-dtd-lex) 'equal
)
664 (eq (ebnf-dtd-lex) 'string
)
665 (string-match match ebnf-dtd-lex
))
666 (error "XML %s attribute is invalid" attr-name
))
667 (setq token
(ebnf-dtd-lex)))
671 ;;; ExternalID ::= 'SYSTEM' S SystemLiteral
672 ;;; | 'PUBLIC' S PubidLiteral S SystemLiteral
675 (defun ebnf-dtd-externalid (&optional token
)
676 (let ((must-have token
))
677 (or token
(setq token
(ebnf-dtd-lex)))
678 (cond ((eq token
'system
)
679 (ebnf-dtd-systemliteral))
681 (ebnf-dtd-pubidliteral)
682 (ebnf-dtd-systemliteral))
684 (error "Missing `SYSTEM' or `PUBLIC' in external id"))
689 ;;; SystemLiteral ::= ('"' [^"]* '"')
690 ;;; | ("'" [^']* "'")
693 (defun ebnf-dtd-systemliteral ()
694 (or (eq (ebnf-dtd-lex) 'string
)
695 (error "System identifier is invalid"))
699 ;;; PubidLiteral ::= '"' PubidChar* '"'
700 ;;; | "'" (PubidChar - "'")* "'"
702 ;;; PubidChar ::= [-'()+,./:=?;!*#@$_%\n\r a-zA-Z0-9]
705 (defun ebnf-dtd-pubidliteral ()
706 (or (and (eq (ebnf-dtd-lex) 'string
)
707 (string-match "^[-'()+,./:=?;!*#@$_%\n\r a-zA-Z0-9]*$"
709 (error "Public identifier is invalid")))
712 ;;; PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
714 ;;; PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
717 (defun ebnf-dtd-pi (token)
718 (while (eq token
'begin-pi
)
719 (and (string-match "^[xX][mM][lL]$" ebnf-dtd-lex
)
720 (error "Processing instruction name can not be `XML'"))
721 (while (not (eq (ebnf-dtd-lex) 'end-pi
)))
722 (setq token
(ebnf-dtd-lex)))
726 ;;; doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
727 ;;; ('[' intSubset ']' S?)? '>'
729 ;;; intSubset ::= (markupdecl | DeclSep)*
731 ;;; DeclSep ::= PEReference | S
733 ;;; markupdecl ::= elementdecl | AttlistDecl | EntityDecl
734 ;;; | NotationDecl | PI | Comment
737 (defun ebnf-dtd-intsubset (token)
738 ;; PI - Processing Instruction
739 (and (eq token
'begin-pi
)
740 (setq token
(ebnf-dtd-pi token
)))
742 ((memq token
'(end-subset end-of-input
))
745 (cons (ebnf-dtd-lex) nil
)) ; annotation
746 ((eq token
'element-decl
)
747 (ebnf-dtd-elementdecl)) ; rule
748 ((eq token
'attlist-decl
)
749 (ebnf-dtd-attlistdecl)) ; annotation
750 ((eq token
'entity-decl
)
751 (ebnf-dtd-entitydecl)) ; annotation
752 ((eq token
'notation-decl
)
753 (ebnf-dtd-notationdecl)) ; annotation
755 (error "Invalid DOCTYPE element"))
759 ;;; elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
761 ;;; contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
763 ;;; Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*'
764 ;;; | '(' S? '#PCDATA' S? ')'
766 ;;; children ::= (choice | seq) ('?' | '*' | '+')?
768 ;;; choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'
770 ;;; seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
772 ;;; cp ::= (Name | choice | seq) ('?' | '*' | '+')?
775 (defun ebnf-dtd-elementdecl ()
776 (let ((action ebnf-action
)
778 (setq ebnf-action nil
)
779 (or (eq (ebnf-dtd-lex) 'name
)
780 (error "Invalid ELEMENT name"))
781 (setq name ebnf-dtd-lex
783 body
(cond ((memq token
'(empty any
))
784 (let ((term (ebnf-make-terminal ebnf-dtd-lex
)))
785 (cons (ebnf-dtd-lex) term
)))
786 ((eq token
'begin-group
)
787 (setq token
(ebnf-dtd-lex))
788 (if (eq token
'pcdata
)
790 (ebnf-dtd-children token
)))
792 (error "Invalid ELEMENT content"))
794 (or (eq (car body
) 'end-decl
)
795 (error "Missing `>' in ELEMENT declaration"))
796 (ebnf-eps-add-production name
)
798 (ebnf-make-production name
(cdr body
) action
))))
801 ;;; Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*'
802 ;;; | '(' S? '#PCDATA' S? ')'
805 (defun ebnf-dtd-mixed ()
806 (let* ((alt (cons (ebnf-make-terminal ebnf-dtd-lex
) nil
))
807 (token (ebnf-dtd-lex))
808 (has-alternative (eq token
'alternative
)))
809 (while (eq token
'alternative
)
810 (or (eq (ebnf-dtd-lex) 'name
)
811 (error "Invalid name"))
812 (setq alt
(cons ebnf-dtd-lex alt
)
813 token
(ebnf-dtd-lex)))
814 (or (eq token
'end-group
)
815 (error "Missing `)'"))
817 (or (eq (ebnf-dtd-lex) 'zero-or-more
)
818 (error "Missing `*'")))
819 (ebnf-token-alternative alt
(cons (ebnf-dtd-lex) nil
))))
822 ;;; children ::= (choice | seq) ('?' | '*' | '+')?
825 (defun ebnf-dtd-children (token)
826 (ebnf-dtd-operators (ebnf-dtd-choice-seq token
)))
829 ;;; choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'
831 ;;; seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
834 (defun ebnf-dtd-choice-seq (token)
835 (setq token
(ebnf-dtd-cp token
))
839 ((eq (car token
) 'alternative
)
840 (while (eq (car token
) 'alternative
)
841 (setq elist
(cons (cdr token
) elist
)
842 token
(ebnf-dtd-cp (ebnf-dtd-lex))))
843 (setq elist
(ebnf-token-alternative elist token
)))
845 ((eq (car token
) 'comma
)
846 (while (eq (car token
) 'comma
)
847 (setq elist
(cons (cdr token
) elist
)
848 token
(ebnf-dtd-cp (ebnf-dtd-lex))))
849 (setq elist
(ebnf-token-sequence (cons (cdr token
) elist
))))
852 (setq elist
(cdr token
))))
853 (or (eq (car token
) 'end-group
)
854 (error "Missing `)' in ELEMENT content"))
858 ;;; cp ::= (Name | choice | seq) ('?' | '*' | '+')?
861 (defun ebnf-dtd-cp (token)
862 (ebnf-dtd-operators (cond ((eq token
'name
)
863 (ebnf-make-terminal ebnf-dtd-lex
))
864 ((eq token
'begin-group
)
865 (ebnf-dtd-choice-seq (ebnf-dtd-lex)))
867 (error "Invalid element"))
871 ;;; elm ('?' | '*' | '+')?
874 (defun ebnf-dtd-operators (elm)
875 (let ((token (ebnf-dtd-lex)))
876 (cond ((eq token
'optional
) ; ? - optional
877 (cons (ebnf-dtd-lex) (ebnf-token-optional elm
)))
878 ((eq token
'zero-or-more
) ; * - zero or more
879 (cons (ebnf-dtd-lex) (ebnf-make-zero-or-more elm
)))
880 ((eq token
'one-or-more
) ; + - one or more
881 (cons (ebnf-dtd-lex) (ebnf-make-one-or-more elm
)))
887 ;;; AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
889 ;;; AttDef ::= S Name S AttType S DefaultDecl
891 ;;; AttType ::= StringType | TokenizedType | EnumeratedType
893 ;;; StringType ::= 'CDATA'
895 ;;; TokenizedType ::= 'ID'
903 ;;; EnumeratedType ::= NotationType | Enumeration
905 ;;; NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
907 ;;; Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
909 ;;; DefaultDecl ::= '#REQUIRED'
911 ;;; | (('#FIXED' S)? AttValue)
914 ;;; AttValue ::= '"' ([^<&"] | Reference)* '"'
915 ;;; | "'" ([^<&'] | Reference)* "'"
917 ;;; Reference ::= EntityRef | CharRef
919 ;;; EntityRef ::= '&' Name ';'
921 ;;; CharRef ::= '&#' [0-9]+ ';'
922 ;;; | '&#x' [0-9a-fA-F]+ ';'
924 ;;; "^\\(&\\([A-Za-z_:][-A-Za-z0-9._:]*\\|#\\(x[0-9a-fA-F]+\\|[0-9]+\\)\\);\\|[^<&]\\)*$"
927 (defun ebnf-dtd-attlistdecl ()
928 (or (eq (ebnf-dtd-lex) 'name
)
929 (error "Invalid ATTLIST name"))
931 (while (eq (setq token
(ebnf-dtd-lex)) 'name
)
933 (setq token
(ebnf-dtd-lex))
935 ((eq token
'notation
)
936 (or (eq (ebnf-dtd-lex) 'begin-group
)
937 (error "Missing `(' in NOTATION type in ATTLIST declaration"))
938 (ebnf-dtd-namelist "NOTATION" '(name)))
939 ((eq token
'begin-group
)
940 (ebnf-dtd-namelist "enumeration" '(name name-char
)))
942 '(cdata id idref idrefs entity entities nmtoken nmtokens
)))
944 (error "Invalid type in ATTLIST declaration")))
946 (setq token
(ebnf-dtd-lex))
947 (unless (memq token
'(required implied
))
948 (and (eq token
'fixed
)
949 (setq token
(ebnf-dtd-lex)))
950 (or (and (eq token
'string
)
952 "^\\(&\\([A-Za-z_:][-A-Za-z0-9._:]*\\|#\\(x[0-9a-fA-F]+\\|[0-9]+\\)\\);\\|[^<&]\\)*$"
954 (error "Invalid default value in ATTLIST declaration"))))
955 (or (eq token
'end-decl
)
956 (error "Missing `>' in end of ATTLIST"))
957 (cons (ebnf-dtd-lex) nil
)))
960 (defun ebnf-dtd-namelist (type name-list
)
963 (or (memq (ebnf-dtd-lex) name-list
)
964 (error "Invalid name in %s type in ATTLIST declaration" type
))
965 (eq (setq token
(ebnf-dtd-lex)) 'alternative
)))
966 (or (eq token
'end-group
)
967 (error "Missing `)' in %s type in ATTLIST declaration" type
))))
970 ;;; EntityDecl ::= GEDecl | PEDecl
972 ;;; GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
974 ;;; PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
976 ;;; EntityDef ::= EntityValue | (ExternalID NDataDecl?)
978 ;;; PEDef ::= EntityValue | ExternalID
980 ;;; NDataDecl ::= S 'NDATA' S Name
983 ;;; EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'
984 ;;; | "'" ([^%&'] | PEReference | Reference)* "'"
986 ;;; PEReference ::= '%' Name ';'
988 ;;; Reference ::= EntityRef | CharRef
990 ;;; EntityRef ::= '&' Name ';'
992 ;;; CharRef ::= '&#' [0-9]+ ';'
993 ;;; | '&#x' [0-9a-fA-F]+ ';'
995 ;;; "^\\(%[A-Za-z_:][-A-Za-z0-9._:]*;\\|&\\([A-Za-z_:][-A-Za-z0-9._:]*\\|#\\(x[0-9a-fA-F]+\\|[0-9]+\\)\\);\\|[^%&]\\)*$"
998 (defun ebnf-dtd-entitydecl ()
999 (let* ((token (ebnf-dtd-lex))
1000 (pedecl (eq token
'percent
)))
1002 (setq token
(ebnf-dtd-lex)))
1003 (or (eq token
'name
)
1004 (error "Invalid name of ENTITY"))
1005 (setq token
(ebnf-dtd-lex))
1006 (if (eq token
'string
)
1008 "^\\(%[A-Za-z_:][-A-Za-z0-9._:]*;\\|&\\([A-Za-z_:][-A-Za-z0-9._:]*\\|#\\(x[0-9a-fA-F]+\\|[0-9]+\\)\\);\\|[^%&]\\)*$"
1010 (setq token
(ebnf-dtd-lex))
1011 (error "Invalid ENTITY definition"))
1012 (setq token
(ebnf-dtd-externalid token
))
1013 (when (and (not pedecl
) (eq token
'ndata
))
1014 (or (eq (ebnf-dtd-lex) 'name
)
1015 (error "Invalid NDATA name"))
1016 (setq token
(ebnf-dtd-lex))))
1017 (or (eq token
'end-decl
)
1018 (error "Missing `>' in end of ENTITY"))
1019 (cons (ebnf-dtd-lex) nil
)))
1022 ;;; NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
1024 ;;; PublicID ::= 'PUBLIC' S PubidLiteral
1027 (defun ebnf-dtd-notationdecl ()
1028 (or (eq (ebnf-dtd-lex) 'name
)
1029 (error "Invalid name NOTATION"))
1030 (or (eq (ebnf-dtd-externalid-or-publicid) 'end-decl
)
1031 (error "Missing `>' in end of NOTATION"))
1032 (cons (ebnf-dtd-lex) nil
))
1035 ;;; ExternalID ::= 'SYSTEM' S SystemLiteral
1036 ;;; | 'PUBLIC' S PubidLiteral S SystemLiteral
1038 ;;; PublicID ::= 'PUBLIC' S PubidLiteral
1041 (defun ebnf-dtd-externalid-or-publicid ()
1042 (let ((token (ebnf-dtd-lex)))
1043 (cond ((eq token
'system
)
1044 (ebnf-dtd-systemliteral))
1046 (ebnf-dtd-pubidliteral)
1047 (and (eq (setq token
(ebnf-dtd-lex)) 'string
)
1048 (setq token
(ebnf-dtd-lex)))
1051 (error "Missing `SYSTEM' or `PUBLIC'")))))
1054 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1058 (defconst ebnf-dtd-token-table
(make-vector 256 'error
)
1059 "Vector used to map characters to a lexical token.")
1062 (defun ebnf-dtd-initialize ()
1063 "Initialize EBNF token table."
1064 ;; control character & control 8-bit character are set to `error'
1067 (while (< char ?
\072)
1068 (aset ebnf-dtd-token-table char
'name-char
)
1069 (setq char
(1+ char
)))
1070 ;; printable character: A-Z
1072 (while (< char ?
\133)
1073 (aset ebnf-dtd-token-table char
'name
)
1074 (setq char
(1+ char
)))
1075 ;; printable character: a-z
1077 (while (< char ?
\173)
1078 (aset ebnf-dtd-token-table char
'name
)
1079 (setq char
(1+ char
)))
1080 ;; European 8-bit accentuated characters:
1082 (while (< char ?
\400)
1083 (aset ebnf-dtd-token-table char
'name
)
1084 (setq char
(1+ char
)))
1085 ;; Override name characters:
1086 (aset ebnf-dtd-token-table ?_
'name
)
1087 (aset ebnf-dtd-token-table ?
: 'name
)
1088 (aset ebnf-dtd-token-table ?.
'name-char
)
1089 (aset ebnf-dtd-token-table ?-
'name-char
)
1090 ;; Override space characters:
1091 (aset ebnf-dtd-token-table ?
\n 'space
) ; [NL] linefeed
1092 (aset ebnf-dtd-token-table ?
\r 'space
) ; [CR] carriage return
1093 (aset ebnf-dtd-token-table ?
\t 'space
) ; [HT] horizontal tab
1094 (aset ebnf-dtd-token-table ?\
'space
) ; [SP] space
1095 ;; Override other lexical characters:
1096 (aset ebnf-dtd-token-table ?
= 'equal
)
1097 (aset ebnf-dtd-token-table ?
, 'comma
)
1098 (aset ebnf-dtd-token-table ?
* 'zero-or-more
)
1099 (aset ebnf-dtd-token-table ?
+ 'one-or-more
)
1100 (aset ebnf-dtd-token-table ?|
'alternative
)
1101 (aset ebnf-dtd-token-table ?%
'percent
)
1102 (aset ebnf-dtd-token-table ?
& 'ampersand
)
1103 (aset ebnf-dtd-token-table ?
# 'hash
)
1104 (aset ebnf-dtd-token-table ?
\? 'interrogation
)
1105 (aset ebnf-dtd-token-table ?
\" 'double-quote
)
1106 (aset ebnf-dtd-token-table ?
\' 'single-quote
)
1107 (aset ebnf-dtd-token-table ?
< 'less-than
)
1108 (aset ebnf-dtd-token-table ?
> 'end-decl
)
1109 (aset ebnf-dtd-token-table ?\
( 'begin-group
)
1110 (aset ebnf-dtd-token-table ?\
) 'end-group
)
1111 (aset ebnf-dtd-token-table ?\
[ 'begin-subset
)
1112 (aset ebnf-dtd-token-table ?\
] 'end-subset
)))
1115 ;; replace the range "\240-\377" (see `ebnf-range-regexp').
1116 (defconst ebnf-dtd-name-chars
1117 (ebnf-range-regexp "-._:0-9A-Za-z" ?
\240 ?
\377))
1120 (defconst ebnf-dtd-decl-alist
1121 '(("ATTLIST" . attlist-decl
)
1122 ("DOCTYPE" . doctype-decl
)
1123 ("ELEMENT" . element-decl
)
1124 ("ENTITY" . entity-decl
)
1125 ("NOTATION" . notation-decl
)))
1128 (defconst ebnf-dtd-element-alist
1129 '(("#FIXED" . fixed
)
1130 ("#IMPLIED" . implied
)
1131 ("#PCDATA" . pcdata
)
1132 ("#REQUIRED" . required
)))
1135 (defconst ebnf-dtd-name-alist
1139 ("ENTITIES" . entities
)
1145 ("NMTOKEN" . nmtoken
)
1146 ("NMTOKENS" . nmtokens
)
1147 ("NOTATION" . notation
)
1150 ("encoding" . encoding-attr
)
1151 ("standalone" . standalone-attr
)
1152 ("version" . version-attr
)))
1155 (defun ebnf-dtd-lex ()
1156 "Lexical analyzer for DTD.
1158 Return a lexical token.
1160 See documentation for variable `ebnf-dtd-lex'."
1161 (if (>= (point) ebnf-limit
)
1164 ;; skip spaces and comments
1165 (while (if (> (following-char) 255)
1169 (setq token
(aref ebnf-dtd-token-table
(following-char)))
1172 (skip-chars-forward " \n\r\t" ebnf-limit
)
1173 (< (point) ebnf-limit
))
1174 ((and (eq token
'less-than
)
1175 (looking-at "<!--"))
1176 (ebnf-dtd-skip-comment))
1181 ((>= (point) ebnf-limit
)
1185 (error "Invalid character"))
1186 ;; beginning of declaration:
1187 ;; <?name, <!ATTLIST, <!DOCTYPE, <!ELEMENT, <!ENTITY, <!NOTATION
1188 ((eq token
'less-than
)
1190 (let ((char (following-char)))
1191 (cond ((= char ?
\?) ; <?
1193 (setq ebnf-dtd-lex
(ebnf-buffer-substring ebnf-dtd-name-chars
))
1197 (let ((decl (ebnf-buffer-substring ebnf-dtd-name-chars
)))
1198 (or (cdr (assoc decl ebnf-dtd-decl-alist
))
1199 (error "Invalid declaration name `%s'" decl
))))
1201 (error "Invalid declaration `<%c'" char
)))))
1203 ((memq token
'(name name-char
))
1204 (setq ebnf-dtd-lex
(ebnf-buffer-substring ebnf-dtd-name-chars
))
1205 (or (cdr (assoc ebnf-dtd-lex ebnf-dtd-name-alist
))
1208 ((eq token
'interrogation
)
1210 (if (/= (following-char) ?
>)
1214 ;; #FIXED, #IMPLIED, #PCDATA, #REQUIRED
1218 (concat "#" (ebnf-buffer-substring ebnf-dtd-name-chars
)))
1219 (or (cdr (assoc ebnf-dtd-lex ebnf-dtd-element-alist
))
1220 (error "Invalid element `%s'" ebnf-dtd-lex
)))
1222 ((eq token
'double-quote
)
1223 (setq ebnf-dtd-lex
(ebnf-dtd-string ?
\"))
1226 ((eq token
'single-quote
)
1227 (setq ebnf-dtd-lex
(ebnf-dtd-string ?
\'))
1230 ((eq token
'percent
)
1232 (if (looking-at "[ \n\r\t]")
1234 (setq ebnf-dtd-lex
(ebnf-dtd-name-ref "%"))
1236 ;; &#...;, &#x...;, &name;
1237 ((eq token
'ampersand
)
1239 (if (/= (following-char) ?
#)
1242 (setq ebnf-dtd-lex
(ebnf-dtd-name-ref "&"))
1246 (setq ebnf-dtd-lex
(if (/= (following-char) ?x
)
1247 (ebnf-dtd-char-ref "&#" "0-9")
1249 (ebnf-dtd-char-ref "&#x" "0-9a-fA-F")))
1251 ;; miscellaneous: (, ), [, ], =, |, *, +, >, `,'
1258 (defun ebnf-dtd-name-ref (start)
1259 (ebnf-dtd-char-ref start ebnf-dtd-name-chars
))
1262 (defun ebnf-dtd-char-ref (start chars
)
1263 (let ((char (ebnf-buffer-substring chars
)))
1264 (or (= (following-char) ?\
;)
1265 (error "Invalid element `%s%s%c'" start char
(following-char)))
1267 (format "%s%s;" start char
)))
1270 ;; replace the range "\240-\377" (see `ebnf-range-regexp').
1271 (defconst ebnf-dtd-double-string-chars
1272 (ebnf-range-regexp "\t -!#-~" ?
\240 ?
\377))
1273 (defconst ebnf-dtd-single-string-chars
1274 (ebnf-range-regexp "\t -&(-~" ?
\240 ?
\377))
1277 (defun ebnf-dtd-string (delim)
1278 (buffer-substring-no-properties
1283 (skip-chars-forward (if (= delim ?
\")
1284 ebnf-dtd-double-string-chars
1285 ebnf-dtd-single-string-chars
)
1287 (or (= (following-char) delim
)
1288 (error "Missing string delimiter `%c'" delim
))
1294 ;; replace the range "\177-\237" (see `ebnf-range-regexp').
1295 (defconst ebnf-dtd-comment-chars
1296 (ebnf-range-regexp "^-\000-\010\013\014\016-\037" ?
\177 ?
\237))
1297 (defconst ebnf-dtd-filename-chars
1298 (ebnf-range-regexp "^-\000-\037" ?
\177 ?
\237))
1301 (defun ebnf-dtd-skip-comment ()
1302 (forward-char 4) ; <!--
1305 ((and ebnf-eps-executing
(= (following-char) ?\
[))
1306 (ebnf-eps-add-context (ebnf-dtd-eps-filename)))
1308 ((and ebnf-eps-executing
(= (following-char) ?\
]))
1309 (ebnf-eps-remove-context (ebnf-dtd-eps-filename)))
1310 ;; any other action in comment
1312 (setq ebnf-action
(aref ebnf-comment-table
(following-char))))
1315 (skip-chars-forward ebnf-dtd-comment-chars ebnf-limit
)
1316 (and (< (point) ebnf-limit
)
1317 (not (looking-at "-->"))))
1318 (skip-chars-forward "-" ebnf-limit
))
1319 ;; check for a valid end of comment
1320 (cond ((>= (point) ebnf-limit
)
1326 (error "Invalid character"))
1330 (defun ebnf-dtd-eps-filename ()
1336 (ebnf-buffer-substring ebnf-dtd-filename-chars
)))
1337 (and (< (point) ebnf-limit
)
1338 (= (following-char) ?-
) ; may be \n, \t, \r
1339 (not (looking-at "-->"))))
1340 (setq fname
(concat fname
(ebnf-buffer-substring "-"))))
1344 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1349 ;;; arch-tag: c21bb640-135f-4afa-8712-fa11d86301c4
1350 ;;; ebnf-dtd.el ends here