Add 2011 to FSF/AIST copyright years.
[bpt/emacs.git] / lisp / progmodes / ebnf-dtd.el
CommitLineData
728df3d9
VJL
1;;; ebnf-dtd.el --- parser for DTD (Data Type Description for XML)
2
5df4f04c 3;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
d7a0267c 4;; Free Software Foundation, Inc.
728df3d9
VJL
5
6;; Author: Vinicius Jose Latorre <viniciusjl@ig.com.br>
7;; Maintainer: Vinicius Jose Latorre <viniciusjl@ig.com.br>
728df3d9 8;; Keywords: wp, ebnf, PostScript
3ced5caa 9;; Version: 1.1
728df3d9
VJL
10
11;; This file is part of GNU Emacs.
12
b1fc2b50 13;; GNU Emacs is free software: you can redistribute it and/or modify
728df3d9 14;; it under the terms of the GNU General Public License as published by
b1fc2b50
GM
15;; the Free Software Foundation, either version 3 of the License, or
16;; (at your option) any later version.
728df3d9
VJL
17
18;; GNU Emacs is distributed in the hope that it will be useful,
19;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21;; GNU General Public License for more details.
22
23;; You should have received a copy of the GNU General Public License
b1fc2b50 24;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
728df3d9
VJL
25
26;;; Commentary:
27
28;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29;;
30;;
31;; This is part of ebnf2ps package.
32;;
33;; This package defines a parser for DTD (Data Type Description for XML).
34;;
35;; See ebnf2ps.el for documentation.
36;;
37;;
38;; DTD Syntax
39;; ----------
40;;
41;; See the URLs:
42;; `http://www.w3.org/TR/2004/REC-xml-20040204/'
43;; (Extensible Markup Language (XML) 1.0 (Third Edition))
44;; `http://www.w3.org/TR/html40/'
45;; (HTML 4.01 Specification)
46;; `http://www.w3.org/TR/NOTE-html-970421'
47;; (HTML DTD with support for Style Sheets)
48;;
49;;
50;; /* Document */
51;;
52;; document ::= prolog element Misc*
53;; /* Note that *only* the prolog will be parsed */
54;;
55;;
56;; /* Characters */
57;;
58;; Char ::= #x9 | #xA | #xD
59;; | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
60;; /* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */
61;;
62;; /* NOTE:
63;;
64;; Document authors are encouraged to avoid "compatibility characters", as
65;; defined in section 6.8 of [Unicode] (see also D21 in section 3.6 of
66;; [Unicode3]). The characters defined in the following ranges are also
67;; discouraged. They are either control characters or permanently undefined
68;; Unicode characters:
69;;
70;; [#x7F-#x84], [#x86-#x9F], [#xFDD0-#xFDDF],
71;; [#1FFFE-#x1FFFF], [#2FFFE-#x2FFFF], [#3FFFE-#x3FFFF],
72;; [#4FFFE-#x4FFFF], [#5FFFE-#x5FFFF], [#6FFFE-#x6FFFF],
73;; [#7FFFE-#x7FFFF], [#8FFFE-#x8FFFF], [#9FFFE-#x9FFFF],
74;; [#AFFFE-#xAFFFF], [#BFFFE-#xBFFFF], [#CFFFE-#xCFFFF],
75;; [#DFFFE-#xDFFFF], [#EFFFE-#xEFFFF], [#FFFFE-#xFFFFF],
76;; [#10FFFE-#x10FFFF]. */
77;;
78;;
79;; /* White Space */
80;;
81;; S ::= (#x20 | #x9 | #xD | #xA)+
82;;
83;;
84;; /* Names and Tokens */
85;;
86;; NameChar ::= Letter | Digit | '.' | '-' | '_' | ':'
87;; | CombiningChar | Extender
88;;
89;; Name ::= (Letter | '_' | ':') (NameChar)*
90;;
91;; Names ::= Name (#x20 Name)*
92;;
93;; Nmtoken ::= (NameChar)+
94;;
95;; Nmtokens ::= Nmtoken (#x20 Nmtoken)*
96;;
97;;
98;; /* Literals */
99;;
100;; EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'
101;; | "'" ([^%&'] | PEReference | Reference)* "'"
102;;
103;; AttValue ::= '"' ([^<&"] | Reference)* '"'
104;; | "'" ([^<&'] | Reference)* "'"
105;;
106;; SystemLiteral ::= ('"' [^"]* '"')
107;; | ("'" [^']* "'")
108;;
109;; PubidLiteral ::= '"' PubidChar* '"'
110;; | "'" (PubidChar - "'")* "'"
111;;
112;; PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
113;;
114;; /* NOTE:
115;;
116;; Although the EntityValue production allows the definition of a general
117;; entity consisting of a single explicit < in the literal (e.g., <!ENTITY
118;; mylt "<">), it is strongly advised to avoid this practice since any
119;; reference to that entity will cause a well-formedness error. */
120;;
121;;
122;; /* Character Data */
123;;
124;; CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
125;;
126;;
127;; /* Comments */
128;;
129;; Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
130;;
131;;
132;; /* Processing Instructions */
133;;
134;; PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
135;;
136;; PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
137;;
138;;
139;; /* CDATA Sections */
140;;
141;; CDSect ::= CDStart CData CDEnd
142;;
143;; CDStart ::= '<![CDATA['
144;;
145;; CData ::= (Char* - (Char* ']]>' Char*))
146;;
147;; CDEnd ::= ']]>'
148;;
149;;
150;; /* Prolog */
151;;
152;; prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
153;;
154;; XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
155;;
156;; VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')
157;;
158;; Eq ::= S? '=' S?
159;;
160;; VersionNum ::= '1.0'
161;;
162;; Misc ::= Comment | PI | S
163;;
164;;
165;; /* Document Type Definition */
166;;
167;; doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
168;; ('[' intSubset ']' S?)? '>'
169;; [VC: Root Element Type]
170;; [WFC: External Subset]
171;;
172;; DeclSep ::= PEReference | S
173;; [WFC: PE Between Declarations]
174;;
175;; intSubset ::= (markupdecl | DeclSep)*
176;;
177;; markupdecl ::= elementdecl | AttlistDecl | EntityDecl
178;; | NotationDecl | PI | Comment
179;; [VC: Proper Declaration/PE Nesting]
180;; [WFC: PEs in Internal Subset]
181;;
182;;
183;; /* External Subset */
184;;
185;; extSubset ::= TextDecl? extSubsetDecl
186;;
187;; extSubsetDecl ::= ( markupdecl | conditionalSect | DeclSep)*
188;;
189;;
190;; /* Standalone Document Declaration */
191;;
192;; SDDecl ::= S 'standalone' Eq
193;; (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
194;; [VC: Standalone Document Declaration]
195;;
196;;
197;; /* Element */
198;;
199;; element ::= EmptyElemTag | STag content ETag
200;; [WFC: Element Type Match]
201;; [VC: Element Valid]
202;;
203;;
204;; /* Start-tag */
205;;
206;; STag ::= '<' Name (S Attribute)* S? '>'
207;; [WFC: Unique Att Spec]
208;;
209;; Attribute ::= Name Eq AttValue
210;; [VC: Attribute Value Type]
211;; [WFC: No External Entity References]
212;; [WFC: No < in Attribute Values]
213;;
214;;
215;; /* End-tag */
216;;
217;; ETag ::= '</' Name S? '>'
218;;
219;;
220;; /* Content of Elements */
221;;
222;; content ::= CharData?
223;; ((element | Reference | CDSect | PI | Comment) CharData?)*
224;;
225;;
226;; /* Tags for Empty Elements */
227;;
228;; EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
229;; [WFC: Unique Att Spec]
230;;
231;;
232;; /* Element Type Declaration */
233;;
234;; elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
235;; [VC: Unique Element Type Declaration]
236;;
237;; contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
238;;
239;;
240;; /* Element-content Models */
241;;
242;; children ::= (choice | seq) ('?' | '*' | '+')?
243;;
244;; cp ::= (Name | choice | seq) ('?' | '*' | '+')?
245;;
246;; choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'
247;; [VC: Proper Group/PE Nesting]
248;;
249;; seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
250;; [VC: Proper Group/PE Nesting]
251;;
252;;
253;; /* Mixed-content Declaration */
254;;
255;; Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*'
256;; | '(' S? '#PCDATA' S? ')'
257;; [VC: Proper Group/PE Nesting]
258;; [VC: No Duplicate Types]
259;;
260;;
261;; /* Attribute-list Declaration */
262;;
263;; AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
264;;
265;; AttDef ::= S Name S AttType S DefaultDecl
266;;
267;;
268;; /* Attribute Types */
269;;
270;; AttType ::= StringType | TokenizedType | EnumeratedType
271;;
272;; StringType ::= 'CDATA'
273;;
274;; TokenizedType ::= 'ID' [VC: ID]
275;; [VC: One ID per Element Type]
276;; [VC: ID Attribute Default]
277;; | 'IDREF' [VC: IDREF]
278;; | 'IDREFS' [VC: IDREF]
279;; | 'ENTITY' [VC: Entity Name]
280;; | 'ENTITIES' [VC: Entity Name]
281;; | 'NMTOKEN' [VC: Name Token]
282;; | 'NMTOKENS' [VC: Name Token]
283;;
284;;
285;; /* Enumerated Attribute Types */
286;;
287;; EnumeratedType ::= NotationType | Enumeration
288;;
289;; NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
290;; [VC: Notation Attributes]
291;; [VC: One Notation Per Element Type]
292;; [VC: No Notation on Empty Element]
293;; [VC: No Duplicate Tokens]
294;;
295;; Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
296;; [VC: Enumeration]
297;; [VC: No Duplicate Tokens]
298;;
299;;
300;; /* Attribute Defaults */
301;;
302;; DefaultDecl ::= '#REQUIRED' | '#IMPLIED'
303;; | (('#FIXED' S)? AttValue)
304;; [VC: Required Attribute]
305;; [VC: Attribute Default Value Syntactically Correct]
306;; [WFC: No < in Attribute Values]
307;; [VC: Fixed Attribute Default]
308;;
309;;
310;; /* Conditional Section */
311;;
312;; conditionalSect ::= includeSect | ignoreSect
313;;
314;; includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
315;; [VC: Proper Conditional Section/PE Nesting]
316;;
317;; ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
318;; [VC: Proper Conditional Section/PE Nesting]
319;;
320;; ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
321;;
322;; Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
323;;
324;;
325;; /* Character Reference */
326;;
327;; CharRef ::= '&#' [0-9]+ ';'
328;; | '&#x' [0-9a-fA-F]+ ';'
329;; [WFC: Legal Character]
330;;
331;;
332;; /* Entity Reference */
333;;
334;; Reference ::= EntityRef | CharRef
335;;
336;; EntityRef ::= '&' Name ';'
337;; [WFC: Entity Declared]
338;; [VC: Entity Declared]
339;; [WFC: Parsed Entity]
340;; [WFC: No Recursion]
341;;
342;; PEReference ::= '%' Name ';'
343;; [VC: Entity Declared]
344;; [WFC: No Recursion]
345;; [WFC: In DTD]
346;;
347;;
348;; /* Entity Declaration */
349;;
350;; EntityDecl ::= GEDecl | PEDecl
351;;
352;; GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
353;;
354;; PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
355;;
356;; EntityDef ::= EntityValue | (ExternalID NDataDecl?)
357;;
358;; PEDef ::= EntityValue | ExternalID
359;;
360;;
361;; /* External Entity Declaration */
362;;
363;; ExternalID ::= 'SYSTEM' S SystemLiteral
364;; | 'PUBLIC' S PubidLiteral S SystemLiteral
365;;
366;; NDataDecl ::= S 'NDATA' S Name
367;; [VC: Notation Declared]
368;;
369;;
370;; /* Text Declaration */
371;;
372;; TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
373;;
374;;
375;; /* Well-Formed External Parsed Entity */
376;;
377;; extParsedEnt ::= TextDecl? content
378;;
379;;
380;; /* Encoding Declaration */
381;;
382;; EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
383;;
384;; EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
385;; /* Encoding name contains only Latin characters */
386;;
387;;
388;; /* Notation Declarations */
389;;
390;; NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
391;; [VC: Unique Notation Name]
392;;
393;; PublicID ::= 'PUBLIC' S PubidLiteral
394;;
395;;
396;; /* Characters */
397;;
398;; Letter ::= BaseChar | Ideographic
399;;
400;; BaseChar ::= [#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6]
401;; | [#x00D8-#x00F6] | [#x00F8-#x00FF] | [#x0100-#x0131]
402;; | [#x0134-#x013E] | [#x0141-#x0148] | [#x014A-#x017E]
403;; | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5]
404;; | [#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1]
405;; | #x0386 | [#x0388-#x038A] | #x038C
406;; | [#x038E-#x03A1] | [#x03A3-#x03CE] | [#x03D0-#x03D6]
407;; | #x03DA | #x03DC | #x03DE
408;; | #x03E0 | [#x03E2-#x03F3] | [#x0401-#x040C]
409;; | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481]
410;; | [#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC]
411;; | [#x04D0-#x04EB] | [#x04EE-#x04F5] | [#x04F8-#x04F9]
412;; | [#x0531-#x0556] | #x0559 | [#x0561-#x0586]
413;; | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A]
414;; | [#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE]
415;; | [#x06C0-#x06CE] | [#x06D0-#x06D3] | #x06D5
416;; | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D
417;; | [#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990]
418;; | [#x0993-#x09A8] | [#x09AA-#x09B0] | #x09B2
419;; | [#x09B6-#x09B9] | [#x09DC-#x09DD] | [#x09DF-#x09E1]
420;; | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10]
421;; | [#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33]
422;; | [#x0A35-#x0A36] | [#x0A38-#x0A39] | [#x0A59-#x0A5C]
423;; | #x0A5E | [#x0A72-#x0A74] | [#x0A85-#x0A8B]
424;; | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8]
425;; | [#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9]
426;; | #x0ABD | #x0AE0 | [#x0B05-#x0B0C]
427;; | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30]
428;; | [#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D
429;; | [#x0B5C-#x0B5D] | [#x0B5F-#x0B61] | [#x0B85-#x0B8A]
430;; | [#x0B8E-#x0B90] | [#x0B92-#x0B95] | [#x0B99-#x0B9A]
431;; | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4]
432;; | [#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9]
433;; | [#x0C05-#x0C0C] | [#x0C0E-#x0C10] | [#x0C12-#x0C28]
434;; | [#x0C2A-#x0C33] | [#x0C35-#x0C39] | [#x0C60-#x0C61]
435;; | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8]
436;; | [#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE
437;; | [#x0CE0-#x0CE1] | [#x0D05-#x0D0C] | [#x0D0E-#x0D10]
438;; | [#x0D12-#x0D28] | [#x0D2A-#x0D39] | [#x0D60-#x0D61]
439;; | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33]
440;; | [#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84
441;; | [#x0E87-#x0E88] | #x0E8A | #x0E8D
442;; | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3]
443;; | #x0EA5 | #x0EA7 | [#x0EAA-#x0EAB]
444;; | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3]
445;; | #x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47]
446;; | [#x0F49-#x0F69] | [#x10A0-#x10C5] | [#x10D0-#x10F6]
447;; | #x1100 | [#x1102-#x1103] | [#x1105-#x1107]
448;; | #x1109 | [#x110B-#x110C] | [#x110E-#x1112]
449;; | #x113C | #x113E | #x1140
450;; | #x114C | #x114E | #x1150
451;; | [#x1154-#x1155] | #x1159 | [#x115F-#x1161]
452;; | #x1163 | #x1165 | #x1167
453;; | #x1169 | [#x116D-#x116E] | [#x1172-#x1173]
454;; | #x1175 | #x119E | #x11A8
455;; | #x11AB | [#x11AE-#x11AF] | [#x11B7-#x11B8]
456;; | #x11BA | [#x11BC-#x11C2] | #x11EB
457;; | #x11F0 | #x11F9 | [#x1E00-#x1E9B]
458;; | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D]
459;; | [#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57]
460;; | #x1F59 | #x1F5B | #x1F5D
461;; | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC]
462;; | #x1FBE | [#x1FC2-#x1FC4] | [#x1FC6-#x1FCC]
463;; | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] | [#x1FE0-#x1FEC]
464;; | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126
465;; | [#x212A-#x212B] | #x212E | [#x2180-#x2182]
466;; | [#x3041-#x3094] | [#x30A1-#x30FA] | [#x3105-#x312C]
467;; | [#xAC00-#xD7A3]
468;;
469;; Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
470;;
471;; CombiningChar ::= [#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486]
472;; | [#x0591-#x05A1] | [#x05A3-#x05B9] | [#x05BB-#x05BD]
473;; | #x05BF | [#x05C1-#x05C2] | #x05C4
474;; | [#x064B-#x0652] | #x0670 | [#x06D6-#x06DC]
475;; | [#x06DD-#x06DF] | [#x06E0-#x06E4] | [#x06E7-#x06E8]
476;; | [#x06EA-#x06ED] | [#x0901-#x0903] | #x093C
477;; | [#x093E-#x094C] | #x094D | [#x0951-#x0954]
478;; | [#x0962-#x0963] | [#x0981-#x0983] | #x09BC
479;; | #x09BE | #x09BF | [#x09C0-#x09C4]
480;; | [#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7
481;; | [#x09E2-#x09E3] | #x0A02 | #x0A3C
482;; | #x0A3E | #x0A3F | [#x0A40-#x0A42]
483;; | [#x0A47-#x0A48] | [#x0A4B-#x0A4D] | [#x0A70-#x0A71]
484;; | [#x0A81-#x0A83] | #x0ABC | [#x0ABE-#x0AC5]
485;; | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03]
486;; | #x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48]
487;; | [#x0B4B-#x0B4D] | [#x0B56-#x0B57] | [#x0B82-#x0B83]
488;; | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] | [#x0BCA-#x0BCD]
489;; | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44]
490;; | [#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56]
491;; | [#x0C82-#x0C83] | [#x0CBE-#x0CC4] | [#x0CC6-#x0CC8]
492;; | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] | [#x0D02-#x0D03]
493;; | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D]
494;; | #x0D57 | #x0E31 | [#x0E34-#x0E3A]
495;; | [#x0E47-#x0E4E] | #x0EB1 | [#x0EB4-#x0EB9]
496;; | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19]
497;; | #x0F35 | #x0F37 | #x0F39
498;; | #x0F3E | #x0F3F | [#x0F71-#x0F84]
499;; | [#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97
500;; | [#x0F99-#x0FAD] | [#x0FB1-#x0FB7] | #x0FB9
501;; | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F]
502;; | #x3099 | #x309A
503;;
504;; Digit ::= [#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9]
505;; | [#x0966-#x096F] | [#x09E6-#x09EF] | [#x0A66-#x0A6F]
506;; | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] | [#x0BE7-#x0BEF]
507;; | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F]
508;; | [#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]
509;;
510;; Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6
511;; | #x3005 | [#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]
512;;
513;;
514;; NOTES
515;; -----
516;;
517;; At moment, only the `<!ELEMENT' generates a syntactic chart. The
518;; `<!ATTLIST', `<!NOTATION' and `<!ENTITY' are syntacticly checked but they
519;; don't generate a syntactic chart.
520;;
521;; Besides the syntax above, ebnf-dtd also accepts a `pure' dtd file. An
522;; example of a `pure' dtd file is:
523;;
524;; <?xml version="1.0" encoding="UTF-8"?>
525;; <!--
526;; The main element.
527;; -->
528;; <!ELEMENT workflow (registers?, trigger-functions?, initial-actions,
529;; steps, splits?, joins?)>
530;; <!--
531;; An action that can be executed (id must be unique among actions for
532;; the enclosing step).
533;; Used in: actions
534;; -->
535;; <!ELEMENT action (restrict-to, validators?, pre-functions?, results,
536;; post-functions?)>
537;; <!ATTLIST action
538;; id CDATA #REQUIRED
539;; name CDATA #REQUIRED
540;; >
541;;
542;;
543;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
544
545;;; Code:
546
547
548(require 'ebnf-otz)
549
550
551(defvar ebnf-dtd-lex nil
552 "Value returned by `ebnf-dtd-lex' function.")
553
554\f
555;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
556;; Syntactic analyzer
557
558
559;;; document ::= prolog element Misc*
560;;; /* Note that *only* the prolog will be parsed */
561
562(defun ebnf-dtd-parser (start)
563 "DTD parser."
564 (let ((total (+ (- ebnf-limit start) 1))
565 (bias (1- start))
566 (origin (point))
567 rule-list token rule the-end)
568 (goto-char start)
569 (setq token (ebnf-dtd-lex))
570 (and (eq token 'end-of-input)
571 (error "Empty DTD file"))
572 (setq token (ebnf-dtd-prolog token))
573 (unless (eq (car token) 'end-prolog)
574 (setq the-end (cdr token)
575 token (car token))
576 (while (not (eq token the-end))
577 (ebnf-message-float
578 "Parsing...%s%%"
579 (/ (* (- (point) bias) 100.0) total))
580 (setq token (ebnf-dtd-intsubset token)
581 rule (cdr token)
582 token (car token))
583 (or (null rule)
584 (ebnf-add-empty-rule-list rule)
585 (setq rule-list (cons rule rule-list))))
586 (or (eq the-end 'end-of-input)
587 (eq (ebnf-dtd-lex) 'end-decl)
588 (error "Missing end of DOCTYPE"))
589 ;; adjust message, 'cause *only* prolog will be parsed
590 (ebnf-message-float "Parsing...%s%%" 100.0))
591 (goto-char origin)
592 rule-list))
593
594
595;;; prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
596;;;
597;;; XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
598;;;
599;;; VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')
600;;;
601;;; Eq ::= S? '=' S?
602;;;
603;;; VersionNum ::= '1.0'
604;;;
605;;; Misc ::= Comment | PI | S
606;;;
607;;; EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
608;;;
609;;; EncName ::= [A-Za-z] ([-A-Za-z0-9._])*
610;;; /* Encoding name contains only Latin characters */
611;;;
612;;; SDDecl ::= S 'standalone' Eq
613;;; (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
614;;;
615;;; doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
616;;; ('[' intSubset ']' S?)? '>'
617
618
619(defun ebnf-dtd-prolog (token)
620 (when (and (eq token 'begin-pi) (string= ebnf-dtd-lex "xml"))
621 ;; version = "1.0"
622 (setq token (ebnf-dtd-attribute (ebnf-dtd-lex) 'version-attr
623 "^1\\.0$" "XML version"))
624 ;; ( encoding = "encoding name" )?
625 (setq token (ebnf-dtd-attribute-optional
626 token 'encoding-attr
627 "^[A-Za-z][-A-Za-z0-9._]*$" "XML encoding"))
628 ;; ( standalone = ( "yes" | "no" ) )?
629 (setq token (ebnf-dtd-attribute-optional
630 token 'standalone-attr
631 "^yes|no$" "XML standalone"))
632 (or (eq token 'end-pi)
633 (error "Missing end of XML processing instruction")))
634 ;; processing instructions
635 (setq token (ebnf-dtd-pi (ebnf-dtd-lex)))
636 (cond
637 ;; DOCTYPE
638 ((eq token 'doctype-decl)
639 (or (eq (ebnf-dtd-lex) 'name)
640 (error "Document type name is missing"))
641 (cons (if (eq (ebnf-dtd-externalid) 'begin-subset)
642 (ebnf-dtd-lex)
643 'end-prolog)
644 'end-subset))
645 ((memq token '(element-decl attlist-decl entity-decl notation-decl))
646 (cons token 'end-of-input))
647 (t
648 '(end-prolog . end-subset))
649 ))
650
651
652(defun ebnf-dtd-attribute (token attr match attr-name)
653 (or (eq token attr)
654 (error "%s attribute is missing" attr-name))
655 (ebnf-dtd-attribute-optional token attr match attr-name))
656
657
658(defun ebnf-dtd-attribute-optional (token attr match attr-name)
659 (when (eq token attr)
660 (or (and (eq (ebnf-dtd-lex) 'equal)
661 (eq (ebnf-dtd-lex) 'string)
662 (string-match match ebnf-dtd-lex))
663 (error "XML %s attribute is invalid" attr-name))
664 (setq token (ebnf-dtd-lex)))
665 token)
666
667
668;;; ExternalID ::= 'SYSTEM' S SystemLiteral
669;;; | 'PUBLIC' S PubidLiteral S SystemLiteral
670
671
672(defun ebnf-dtd-externalid (&optional token)
673 (let ((must-have token))
674 (or token (setq token (ebnf-dtd-lex)))
675 (cond ((eq token 'system)
676 (ebnf-dtd-systemliteral))
677 ((eq token 'public)
678 (ebnf-dtd-pubidliteral)
679 (ebnf-dtd-systemliteral))
680 (must-have
681 (error "Missing `SYSTEM' or `PUBLIC' in external id"))
682 (t
683 token))))
684
685
686;;; SystemLiteral ::= ('"' [^"]* '"')
687;;; | ("'" [^']* "'")
688
689
690(defun ebnf-dtd-systemliteral ()
691 (or (eq (ebnf-dtd-lex) 'string)
692 (error "System identifier is invalid"))
693 (ebnf-dtd-lex))
694
695
696;;; PubidLiteral ::= '"' PubidChar* '"'
697;;; | "'" (PubidChar - "'")* "'"
698;;;
699;;; PubidChar ::= [-'()+,./:=?;!*#@$_%\n\r a-zA-Z0-9]
700
701
702(defun ebnf-dtd-pubidliteral ()
703 (or (and (eq (ebnf-dtd-lex) 'string)
704 (string-match "^[-'()+,./:=?;!*#@$_%\n\r a-zA-Z0-9]*$"
705 ebnf-dtd-lex))
706 (error "Public identifier is invalid")))
707
708
709;;; PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
710;;;
711;;; PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
712
713
714(defun ebnf-dtd-pi (token)
715 (while (eq token 'begin-pi)
716 (and (string-match "^[xX][mM][lL]$" ebnf-dtd-lex)
717 (error "Processing instruction name can not be `XML'"))
718 (while (not (eq (ebnf-dtd-lex) 'end-pi)))
719 (setq token (ebnf-dtd-lex)))
720 token)
721
722
723;;; doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
724;;; ('[' intSubset ']' S?)? '>'
725;;;
726;;; intSubset ::= (markupdecl | DeclSep)*
727;;;
728;;; DeclSep ::= PEReference | S
729;;;
730;;; markupdecl ::= elementdecl | AttlistDecl | EntityDecl
731;;; | NotationDecl | PI | Comment
732
733
734(defun ebnf-dtd-intsubset (token)
735 ;; PI - Processing Instruction
736 (and (eq token 'begin-pi)
737 (setq token (ebnf-dtd-pi token)))
738 (cond
739 ((memq token '(end-subset end-of-input))
740 (cons token nil))
741 ((eq token 'pe-ref)
742 (cons (ebnf-dtd-lex) nil)) ; annotation
743 ((eq token 'element-decl)
744 (ebnf-dtd-elementdecl)) ; rule
745 ((eq token 'attlist-decl)
746 (ebnf-dtd-attlistdecl)) ; annotation
747 ((eq token 'entity-decl)
748 (ebnf-dtd-entitydecl)) ; annotation
749 ((eq token 'notation-decl)
750 (ebnf-dtd-notationdecl)) ; annotation
751 (t
752 (error "Invalid DOCTYPE element"))
753 ))
754
755
756;;; elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
757;;;
758;;; contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
759;;;
760;;; Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*'
761;;; | '(' S? '#PCDATA' S? ')'
762;;;
763;;; children ::= (choice | seq) ('?' | '*' | '+')?
764;;;
765;;; choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'
766;;;
767;;; seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
768;;;
769;;; cp ::= (Name | choice | seq) ('?' | '*' | '+')?
770
771
772(defun ebnf-dtd-elementdecl ()
773 (let ((action ebnf-action)
774 name token body)
775 (setq ebnf-action nil)
776 (or (eq (ebnf-dtd-lex) 'name)
777 (error "Invalid ELEMENT name"))
778 (setq name ebnf-dtd-lex
779 token (ebnf-dtd-lex)
780 body (cond ((memq token '(empty any))
781 (let ((term (ebnf-make-terminal ebnf-dtd-lex)))
782 (cons (ebnf-dtd-lex) term)))
783 ((eq token 'begin-group)
784 (setq token (ebnf-dtd-lex))
785 (if (eq token 'pcdata)
786 (ebnf-dtd-mixed)
787 (ebnf-dtd-children token)))
788 (t
789 (error "Invalid ELEMENT content"))
790 ))
791 (or (eq (car body) 'end-decl)
792 (error "Missing `>' in ELEMENT declaration"))
793 (ebnf-eps-add-production name)
794 (cons (ebnf-dtd-lex)
795 (ebnf-make-production name (cdr body) action))))
796
797
798;;; Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*'
799;;; | '(' S? '#PCDATA' S? ')'
800
801
802(defun ebnf-dtd-mixed ()
803 (let* ((alt (cons (ebnf-make-terminal ebnf-dtd-lex) nil))
804 (token (ebnf-dtd-lex))
805 (has-alternative (eq token 'alternative)))
806 (while (eq token 'alternative)
807 (or (eq (ebnf-dtd-lex) 'name)
808 (error "Invalid name"))
809 (setq alt (cons ebnf-dtd-lex alt)
810 token (ebnf-dtd-lex)))
811 (or (eq token 'end-group)
812 (error "Missing `)'"))
813 (and has-alternative
814 (or (eq (ebnf-dtd-lex) 'zero-or-more)
815 (error "Missing `*'")))
816 (ebnf-token-alternative alt (cons (ebnf-dtd-lex) nil))))
817
818
819;;; children ::= (choice | seq) ('?' | '*' | '+')?
820
821
822(defun ebnf-dtd-children (token)
823 (ebnf-dtd-operators (ebnf-dtd-choice-seq token)))
824
825
826;;; choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'
827;;;
828;;; seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
829
830
831(defun ebnf-dtd-choice-seq (token)
832 (setq token (ebnf-dtd-cp token))
833 (let (elist)
834 (cond
835 ;; choice
836 ((eq (car token) 'alternative)
837 (while (eq (car token) 'alternative)
838 (setq elist (cons (cdr token) elist)
839 token (ebnf-dtd-cp (ebnf-dtd-lex))))
840 (setq elist (ebnf-token-alternative elist token)))
841 ;; seq
842 ((eq (car token) 'comma)
843 (while (eq (car token) 'comma)
844 (setq elist (cons (cdr token) elist)
845 token (ebnf-dtd-cp (ebnf-dtd-lex))))
846 (setq elist (ebnf-token-sequence (cons (cdr token) elist))))
847 ;; only one element
848 (t
849 (setq elist (cdr token))))
850 (or (eq (car token) 'end-group)
851 (error "Missing `)' in ELEMENT content"))
852 elist))
853
854
855;;; cp ::= (Name | choice | seq) ('?' | '*' | '+')?
856
857
858(defun ebnf-dtd-cp (token)
859 (ebnf-dtd-operators (cond ((eq token 'name)
860 (ebnf-make-terminal ebnf-dtd-lex))
861 ((eq token 'begin-group)
862 (ebnf-dtd-choice-seq (ebnf-dtd-lex)))
863 (t
864 (error "Invalid element"))
865 )))
866
867
868;;; elm ('?' | '*' | '+')?
869
870
871(defun ebnf-dtd-operators (elm)
872 (let ((token (ebnf-dtd-lex)))
873 (cond ((eq token 'optional) ; ? - optional
874 (cons (ebnf-dtd-lex) (ebnf-token-optional elm)))
875 ((eq token 'zero-or-more) ; * - zero or more
876 (cons (ebnf-dtd-lex) (ebnf-make-zero-or-more elm)))
877 ((eq token 'one-or-more) ; + - one or more
878 (cons (ebnf-dtd-lex) (ebnf-make-one-or-more elm)))
879 (t ; only element
880 (cons token elm))
881 )))
882
883
884;;; AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
885;;;
886;;; AttDef ::= S Name S AttType S DefaultDecl
887;;;
888;;; AttType ::= StringType | TokenizedType | EnumeratedType
889;;;
890;;; StringType ::= 'CDATA'
891;;;
892;;; TokenizedType ::= 'ID'
893;;; | 'IDREF'
894;;; | 'IDREFS'
895;;; | 'ENTITY'
896;;; | 'ENTITIES'
897;;; | 'NMTOKEN'
898;;; | 'NMTOKENS'
899;;;
900;;; EnumeratedType ::= NotationType | Enumeration
901;;;
902;;; NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
903;;;
904;;; Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
905;;;
906;;; DefaultDecl ::= '#REQUIRED'
907;;; | '#IMPLIED'
908;;; | (('#FIXED' S)? AttValue)
909;;;
910;;;
911;;; AttValue ::= '"' ([^<&"] | Reference)* '"'
912;;; | "'" ([^<&'] | Reference)* "'"
913;;;
914;;; Reference ::= EntityRef | CharRef
915;;;
916;;; EntityRef ::= '&' Name ';'
917;;;
918;;; CharRef ::= '&#' [0-9]+ ';'
919;;; | '&#x' [0-9a-fA-F]+ ';'
920
921;;; "^\\(&\\([A-Za-z_:][-A-Za-z0-9._:]*\\|#\\(x[0-9a-fA-F]+\\|[0-9]+\\)\\);\\|[^<&]\\)*$"
922
923
924(defun ebnf-dtd-attlistdecl ()
925 (or (eq (ebnf-dtd-lex) 'name)
926 (error "Invalid ATTLIST name"))
927 (let (token)
928 (while (eq (setq token (ebnf-dtd-lex)) 'name)
929 ;; type
930 (setq token (ebnf-dtd-lex))
931 (cond
932 ((eq token 'notation)
933 (or (eq (ebnf-dtd-lex) 'begin-group)
934 (error "Missing `(' in NOTATION type in ATTLIST declaration"))
935 (ebnf-dtd-namelist "NOTATION" '(name)))
936 ((eq token 'begin-group)
937 (ebnf-dtd-namelist "enumeration" '(name name-char)))
938 ((memq token
939 '(cdata id idref idrefs entity entities nmtoken nmtokens)))
940 (t
941 (error "Invalid type in ATTLIST declaration")))
942 ;; default value
943 (setq token (ebnf-dtd-lex))
944 (unless (memq token '(required implied))
945 (and (eq token 'fixed)
946 (setq token (ebnf-dtd-lex)))
947 (or (and (eq token 'string)
948 (string-match
949 "^\\(&\\([A-Za-z_:][-A-Za-z0-9._:]*\\|#\\(x[0-9a-fA-F]+\\|[0-9]+\\)\\);\\|[^<&]\\)*$"
950 ebnf-dtd-lex))
951 (error "Invalid default value in ATTLIST declaration"))))
952 (or (eq token 'end-decl)
953 (error "Missing `>' in end of ATTLIST"))
954 (cons (ebnf-dtd-lex) nil)))
955
956
957(defun ebnf-dtd-namelist (type name-list)
958 (let (token)
959 (while (progn
960 (or (memq (ebnf-dtd-lex) name-list)
961 (error "Invalid name in %s type in ATTLIST declaration" type))
962 (eq (setq token (ebnf-dtd-lex)) 'alternative)))
963 (or (eq token 'end-group)
964 (error "Missing `)' in %s type in ATTLIST declaration" type))))
965
966
967;;; EntityDecl ::= GEDecl | PEDecl
968;;;
969;;; GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
970;;;
971;;; PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
972;;;
973;;; EntityDef ::= EntityValue | (ExternalID NDataDecl?)
974;;;
975;;; PEDef ::= EntityValue | ExternalID
976;;;
977;;; NDataDecl ::= S 'NDATA' S Name
978;;;
979;;;
980;;; EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'
981;;; | "'" ([^%&'] | PEReference | Reference)* "'"
982;;;
983;;; PEReference ::= '%' Name ';'
984;;;
985;;; Reference ::= EntityRef | CharRef
986;;;
987;;; EntityRef ::= '&' Name ';'
988;;;
989;;; CharRef ::= '&#' [0-9]+ ';'
990;;; | '&#x' [0-9a-fA-F]+ ';'
991
992;;; "^\\(%[A-Za-z_:][-A-Za-z0-9._:]*;\\|&\\([A-Za-z_:][-A-Za-z0-9._:]*\\|#\\(x[0-9a-fA-F]+\\|[0-9]+\\)\\);\\|[^%&]\\)*$"
993
994
995(defun ebnf-dtd-entitydecl ()
996 (let* ((token (ebnf-dtd-lex))
997 (pedecl (eq token 'percent)))
998 (and pedecl
999 (setq token (ebnf-dtd-lex)))
1000 (or (eq token 'name)
1001 (error "Invalid name of ENTITY"))
1002 (setq token (ebnf-dtd-lex))
1003 (if (eq token 'string)
1004 (if (string-match
1005 "^\\(%[A-Za-z_:][-A-Za-z0-9._:]*;\\|&\\([A-Za-z_:][-A-Za-z0-9._:]*\\|#\\(x[0-9a-fA-F]+\\|[0-9]+\\)\\);\\|[^%&]\\)*$"
1006 ebnf-dtd-lex)
1007 (setq token (ebnf-dtd-lex))
1008 (error "Invalid ENTITY definition"))
1009 (setq token (ebnf-dtd-externalid token))
1010 (when (and (not pedecl) (eq token 'ndata))
1011 (or (eq (ebnf-dtd-lex) 'name)
1012 (error "Invalid NDATA name"))
1013 (setq token (ebnf-dtd-lex))))
1014 (or (eq token 'end-decl)
1015 (error "Missing `>' in end of ENTITY"))
1016 (cons (ebnf-dtd-lex) nil)))
1017
1018
1019;;; NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
1020;;;
1021;;; PublicID ::= 'PUBLIC' S PubidLiteral
1022
1023
1024(defun ebnf-dtd-notationdecl ()
1025 (or (eq (ebnf-dtd-lex) 'name)
1026 (error "Invalid name NOTATION"))
1027 (or (eq (ebnf-dtd-externalid-or-publicid) 'end-decl)
1028 (error "Missing `>' in end of NOTATION"))
1029 (cons (ebnf-dtd-lex) nil))
1030
1031
1032;;; ExternalID ::= 'SYSTEM' S SystemLiteral
1033;;; | 'PUBLIC' S PubidLiteral S SystemLiteral
1034;;;
1035;;; PublicID ::= 'PUBLIC' S PubidLiteral
1036
1037
1038(defun ebnf-dtd-externalid-or-publicid ()
1039 (let ((token (ebnf-dtd-lex)))
1040 (cond ((eq token 'system)
1041 (ebnf-dtd-systemliteral))
1042 ((eq token 'public)
1043 (ebnf-dtd-pubidliteral)
1044 (and (eq (setq token (ebnf-dtd-lex)) 'string)
1045 (setq token (ebnf-dtd-lex)))
1046 token)
1047 (t
1048 (error "Missing `SYSTEM' or `PUBLIC'")))))
1049
1050\f
1051;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1052;; Lexical analyzer
1053
1054
1055(defconst ebnf-dtd-token-table (make-vector 256 'error)
1056 "Vector used to map characters to a lexical token.")
1057
1058
1059(defun ebnf-dtd-initialize ()
1060 "Initialize EBNF token table."
1061 ;; control character & control 8-bit character are set to `error'
1062 (let ((char ?\060))
1063 ;; digits: 0-9
1064 (while (< char ?\072)
1065 (aset ebnf-dtd-token-table char 'name-char)
1066 (setq char (1+ char)))
1067 ;; printable character: A-Z
1068 (setq char ?\101)
1069 (while (< char ?\133)
1070 (aset ebnf-dtd-token-table char 'name)
1071 (setq char (1+ char)))
1072 ;; printable character: a-z
1073 (setq char ?\141)
1074 (while (< char ?\173)
1075 (aset ebnf-dtd-token-table char 'name)
1076 (setq char (1+ char)))
1077 ;; European 8-bit accentuated characters:
1078 (setq char ?\240)
1079 (while (< char ?\400)
1080 (aset ebnf-dtd-token-table char 'name)
1081 (setq char (1+ char)))
1082 ;; Override name characters:
1083 (aset ebnf-dtd-token-table ?_ 'name)
1084 (aset ebnf-dtd-token-table ?: 'name)
1085 (aset ebnf-dtd-token-table ?. 'name-char)
1086 (aset ebnf-dtd-token-table ?- 'name-char)
1087 ;; Override space characters:
1088 (aset ebnf-dtd-token-table ?\n 'space) ; [NL] linefeed
1089 (aset ebnf-dtd-token-table ?\r 'space) ; [CR] carriage return
1090 (aset ebnf-dtd-token-table ?\t 'space) ; [HT] horizontal tab
1091 (aset ebnf-dtd-token-table ?\ 'space) ; [SP] space
1092 ;; Override other lexical characters:
1093 (aset ebnf-dtd-token-table ?= 'equal)
1094 (aset ebnf-dtd-token-table ?, 'comma)
1095 (aset ebnf-dtd-token-table ?* 'zero-or-more)
1096 (aset ebnf-dtd-token-table ?+ 'one-or-more)
1097 (aset ebnf-dtd-token-table ?| 'alternative)
1098 (aset ebnf-dtd-token-table ?% 'percent)
1099 (aset ebnf-dtd-token-table ?& 'ampersand)
1100 (aset ebnf-dtd-token-table ?# 'hash)
1101 (aset ebnf-dtd-token-table ?\? 'interrogation)
1102 (aset ebnf-dtd-token-table ?\" 'double-quote)
1103 (aset ebnf-dtd-token-table ?\' 'single-quote)
1104 (aset ebnf-dtd-token-table ?< 'less-than)
1105 (aset ebnf-dtd-token-table ?> 'end-decl)
1106 (aset ebnf-dtd-token-table ?\( 'begin-group)
1107 (aset ebnf-dtd-token-table ?\) 'end-group)
1108 (aset ebnf-dtd-token-table ?\[ 'begin-subset)
1109 (aset ebnf-dtd-token-table ?\] 'end-subset)))
1110
1111
1112;; replace the range "\240-\377" (see `ebnf-range-regexp').
1113(defconst ebnf-dtd-name-chars
1114 (ebnf-range-regexp "-._:0-9A-Za-z" ?\240 ?\377))
1115
1116
1117(defconst ebnf-dtd-decl-alist
1118 '(("ATTLIST" . attlist-decl)
1119 ("DOCTYPE" . doctype-decl)
1120 ("ELEMENT" . element-decl)
1121 ("ENTITY" . entity-decl)
1122 ("NOTATION" . notation-decl)))
1123
1124
1125(defconst ebnf-dtd-element-alist
1126 '(("#FIXED" . fixed)
1127 ("#IMPLIED" . implied)
1128 ("#PCDATA" . pcdata)
1129 ("#REQUIRED" . required)))
1130
1131
1132(defconst ebnf-dtd-name-alist
1133 '(("ANY" . any)
1134 ("CDATA" . cdata)
1135 ("EMPTY" . empty)
1136 ("ENTITIES" . entities)
1137 ("ENTITY" . entity)
1138 ("ID" . id)
1139 ("IDREF" . idref)
1140 ("IDREFS" . idrefs)
1141 ("NDATA" . ndata)
1142 ("NMTOKEN" . nmtoken)
1143 ("NMTOKENS" . nmtokens)
1144 ("NOTATION" . notation)
1145 ("PUBLIC" . public)
1146 ("SYSTEM" . system)
1147 ("encoding" . encoding-attr)
1148 ("standalone" . standalone-attr)
1149 ("version" . version-attr)))
1150
1151
1152(defun ebnf-dtd-lex ()
3fc422f8 1153 "Lexical analyzer for DTD.
728df3d9
VJL
1154
1155Return a lexical token.
1156
1157See documentation for variable `ebnf-dtd-lex'."
1158 (if (>= (point) ebnf-limit)
1159 'end-of-input
1160 (let (token)
1161 ;; skip spaces and comments
1162 (while (if (> (following-char) 255)
1163 (progn
1164 (setq token 'error)
1165 nil)
1166 (setq token (aref ebnf-dtd-token-table (following-char)))
1167 (cond
1168 ((eq token 'space)
1169 (skip-chars-forward " \n\r\t" ebnf-limit)
1170 (< (point) ebnf-limit))
1171 ((and (eq token 'less-than)
1172 (looking-at "<!--"))
1173 (ebnf-dtd-skip-comment))
1174 (t nil)
1175 )))
1176 (cond
1177 ;; end of input
1178 ((>= (point) ebnf-limit)
1179 'end-of-input)
1180 ;; error
1181 ((eq token 'error)
eac9c0ef 1182 (error "Invalid character"))
728df3d9
VJL
1183 ;; beginning of declaration:
1184 ;; <?name, <!ATTLIST, <!DOCTYPE, <!ELEMENT, <!ENTITY, <!NOTATION
1185 ((eq token 'less-than)
1186 (forward-char)
1187 (let ((char (following-char)))
1188 (cond ((= char ?\?) ; <?
1189 (forward-char)
1190 (setq ebnf-dtd-lex (ebnf-buffer-substring ebnf-dtd-name-chars))
1191 'begin-pi)
1192 ((= char ?!) ; <!
1193 (forward-char)
1194 (let ((decl (ebnf-buffer-substring ebnf-dtd-name-chars)))
1195 (or (cdr (assoc decl ebnf-dtd-decl-alist))
1196 (error "Invalid declaration name `%s'" decl))))
1197 (t ; <x
1198 (error "Invalid declaration `<%c'" char)))))
1199 ;; name, namechar
1200 ((memq token '(name name-char))
1201 (setq ebnf-dtd-lex (ebnf-buffer-substring ebnf-dtd-name-chars))
1202 (or (cdr (assoc ebnf-dtd-lex ebnf-dtd-name-alist))
1203 token))
1204 ;; ?, ?>
1205 ((eq token 'interrogation)
1206 (forward-char)
1207 (if (/= (following-char) ?>)
1208 'optional
1209 (forward-char)
1210 'end-pi))
1211 ;; #FIXED, #IMPLIED, #PCDATA, #REQUIRED
1212 ((eq token 'hash)
1213 (forward-char)
1214 (setq ebnf-dtd-lex
1215 (concat "#" (ebnf-buffer-substring ebnf-dtd-name-chars)))
1216 (or (cdr (assoc ebnf-dtd-lex ebnf-dtd-element-alist))
1217 (error "Invalid element `%s'" ebnf-dtd-lex)))
1218 ;; "string"
1219 ((eq token 'double-quote)
1220 (setq ebnf-dtd-lex (ebnf-dtd-string ?\"))
1221 'string)
1222 ;; 'string'
1223 ((eq token 'single-quote)
1224 (setq ebnf-dtd-lex (ebnf-dtd-string ?\'))
1225 'string)
1226 ;; %, %name;
1227 ((eq token 'percent)
1228 (forward-char)
1229 (if (looking-at "[ \n\r\t]")
1230 'percent
1231 (setq ebnf-dtd-lex (ebnf-dtd-name-ref "%"))
1232 'pe-ref))
1233 ;; &#...;, &#x...;, &name;
1234 ((eq token 'ampersand)
1235 (forward-char)
1236 (if (/= (following-char) ?#)
1237 (progn
1238 ;; &name;
1239 (setq ebnf-dtd-lex (ebnf-dtd-name-ref "&"))
1240 'entity-ref)
1241 ;; &#...;, &#x...;
1242 (forward-char)
1243 (setq ebnf-dtd-lex (if (/= (following-char) ?x)
1244 (ebnf-dtd-char-ref "&#" "0-9")
1245 (forward-char)
1246 (ebnf-dtd-char-ref "&#x" "0-9a-fA-F")))
1247 'char-ref))
1248 ;; miscellaneous: (, ), [, ], =, |, *, +, >, `,'
1249 (t
1250 (forward-char)
1251 token)
1252 ))))
1253
1254
1255(defun ebnf-dtd-name-ref (start)
1256 (ebnf-dtd-char-ref start ebnf-dtd-name-chars))
1257
1258
1259(defun ebnf-dtd-char-ref (start chars)
1260 (let ((char (ebnf-buffer-substring chars)))
1261 (or (= (following-char) ?\;)
1262 (error "Invalid element `%s%s%c'" start char (following-char)))
1263 (forward-char)
1264 (format "%s%s;" start char)))
1265
1266
1267;; replace the range "\240-\377" (see `ebnf-range-regexp').
1268(defconst ebnf-dtd-double-string-chars
1269 (ebnf-range-regexp "\t -!#-~" ?\240 ?\377))
1270(defconst ebnf-dtd-single-string-chars
1271 (ebnf-range-regexp "\t -&(-~" ?\240 ?\377))
1272
1273
1274(defun ebnf-dtd-string (delim)
1275 (buffer-substring-no-properties
1276 (progn
1277 (forward-char)
1278 (point))
1279 (progn
1280 (skip-chars-forward (if (= delim ?\")
1281 ebnf-dtd-double-string-chars
1282 ebnf-dtd-single-string-chars)
1283 ebnf-limit)
1284 (or (= (following-char) delim)
1285 (error "Missing string delimiter `%c'" delim))
1286 (prog1
1287 (point)
1288 (forward-char)))))
1289
1290
1291;; replace the range "\177-\237" (see `ebnf-range-regexp').
1292(defconst ebnf-dtd-comment-chars
1293 (ebnf-range-regexp "^-\000-\010\013\014\016-\037" ?\177 ?\237))
1294(defconst ebnf-dtd-filename-chars
1295 (ebnf-range-regexp "^-\000-\037" ?\177 ?\237))
1296
1297
1298(defun ebnf-dtd-skip-comment ()
1299 (forward-char 4) ; <!--
1300 (cond
1301 ;; open EPS file
1302 ((and ebnf-eps-executing (= (following-char) ?\[))
1303 (ebnf-eps-add-context (ebnf-dtd-eps-filename)))
1304 ;; close EPS file
1305 ((and ebnf-eps-executing (= (following-char) ?\]))
1306 (ebnf-eps-remove-context (ebnf-dtd-eps-filename)))
3ced5caa
VJL
1307 ;; EPS header
1308 ((and ebnf-eps-executing (= (following-char) ?H))
1309 (ebnf-eps-header-comment (ebnf-dtd-eps-filename)))
1310 ;; EPS footer
1311 ((and ebnf-eps-executing (= (following-char) ?F))
1312 (ebnf-eps-footer-comment (ebnf-dtd-eps-filename)))
728df3d9
VJL
1313 ;; any other action in comment
1314 (t
1315 (setq ebnf-action (aref ebnf-comment-table (following-char))))
1316 )
1317 (while (progn
1318 (skip-chars-forward ebnf-dtd-comment-chars ebnf-limit)
1319 (and (< (point) ebnf-limit)
1320 (not (looking-at "-->"))))
1321 (skip-chars-forward "-" ebnf-limit))
1322 ;; check for a valid end of comment
1323 (cond ((>= (point) ebnf-limit)
1324 nil)
1325 ((looking-at "-->")
1326 (forward-char 3)
1327 t)
1328 (t
eac9c0ef 1329 (error "Invalid character"))
728df3d9
VJL
1330 ))
1331
1332
1333(defun ebnf-dtd-eps-filename ()
1334 (forward-char)
1335 (let (fname)
1336 (while (progn
1337 (setq fname
1338 (concat fname
1339 (ebnf-buffer-substring ebnf-dtd-filename-chars)))
1340 (and (< (point) ebnf-limit)
1341 (= (following-char) ?-) ; may be \n, \t, \r
1342 (not (looking-at "-->"))))
1343 (setq fname (concat fname (ebnf-buffer-substring "-"))))
1344 fname))
1345
1346\f
1347;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1348
1349
1350(provide 'ebnf-dtd)
1351
cbee283d 1352;; arch-tag: c21bb640-135f-4afa-8712-fa11d86301c4
728df3d9 1353;;; ebnf-dtd.el ends here