Sync to HEAD
[bpt/emacs.git] / lisp / progmodes / ebnf-dtd.el
1 ;;; ebnf-dtd.el --- parser for DTD (Data Type Description for XML)
2
3 ;; Copyright (C) 2004 Free Sofware Foundation, Inc.
4
5 ;; Author: Vinicius Jose Latorre <viniciusjl@ig.com.br>
6 ;; Maintainer: Vinicius Jose Latorre <viniciusjl@ig.com.br>
7 ;; Time-stamp: <2004/04/04 21:50:16 vinicius>
8 ;; Keywords: wp, ebnf, PostScript
9 ;; Version: 1.0
10
11 ;; This file is part of GNU Emacs.
12
13 ;; GNU Emacs is free software; you can redistribute it and/or modify
14 ;; it under the terms of the GNU General Public License as published by
15 ;; the Free Software Foundation; either version 2, or (at your option)
16 ;; any later version.
17
18 ;; GNU Emacs is distributed in the hope that it will be useful,
19 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 ;; GNU General Public License for more details.
22
23 ;; You should have received a copy of the GNU General Public License
24 ;; along with GNU Emacs; see the file COPYING. If not, write to the
25 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
26 ;; Boston, MA 02111-1307, USA.
27
28 ;;; Commentary:
29
30 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
31 ;;
32 ;;
33 ;; This is part of ebnf2ps package.
34 ;;
35 ;; This package defines a parser for DTD (Data Type Description for XML).
36 ;;
37 ;; See ebnf2ps.el for documentation.
38 ;;
39 ;;
40 ;; DTD Syntax
41 ;; ----------
42 ;;
43 ;; See the URLs:
44 ;; `http://www.w3.org/TR/2004/REC-xml-20040204/'
45 ;; (Extensible Markup Language (XML) 1.0 (Third Edition))
46 ;; `http://www.w3.org/TR/html40/'
47 ;; (HTML 4.01 Specification)
48 ;; `http://www.w3.org/TR/NOTE-html-970421'
49 ;; (HTML DTD with support for Style Sheets)
50 ;;
51 ;;
52 ;; /* Document */
53 ;;
54 ;; document ::= prolog element Misc*
55 ;; /* Note that *only* the prolog will be parsed */
56 ;;
57 ;;
58 ;; /* Characters */
59 ;;
60 ;; Char ::= #x9 | #xA | #xD
61 ;; | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
62 ;; /* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */
63 ;;
64 ;; /* NOTE:
65 ;;
66 ;; Document authors are encouraged to avoid "compatibility characters", as
67 ;; defined in section 6.8 of [Unicode] (see also D21 in section 3.6 of
68 ;; [Unicode3]). The characters defined in the following ranges are also
69 ;; discouraged. They are either control characters or permanently undefined
70 ;; Unicode characters:
71 ;;
72 ;; [#x7F-#x84], [#x86-#x9F], [#xFDD0-#xFDDF],
73 ;; [#1FFFE-#x1FFFF], [#2FFFE-#x2FFFF], [#3FFFE-#x3FFFF],
74 ;; [#4FFFE-#x4FFFF], [#5FFFE-#x5FFFF], [#6FFFE-#x6FFFF],
75 ;; [#7FFFE-#x7FFFF], [#8FFFE-#x8FFFF], [#9FFFE-#x9FFFF],
76 ;; [#AFFFE-#xAFFFF], [#BFFFE-#xBFFFF], [#CFFFE-#xCFFFF],
77 ;; [#DFFFE-#xDFFFF], [#EFFFE-#xEFFFF], [#FFFFE-#xFFFFF],
78 ;; [#10FFFE-#x10FFFF]. */
79 ;;
80 ;;
81 ;; /* White Space */
82 ;;
83 ;; S ::= (#x20 | #x9 | #xD | #xA)+
84 ;;
85 ;;
86 ;; /* Names and Tokens */
87 ;;
88 ;; NameChar ::= Letter | Digit | '.' | '-' | '_' | ':'
89 ;; | CombiningChar | Extender
90 ;;
91 ;; Name ::= (Letter | '_' | ':') (NameChar)*
92 ;;
93 ;; Names ::= Name (#x20 Name)*
94 ;;
95 ;; Nmtoken ::= (NameChar)+
96 ;;
97 ;; Nmtokens ::= Nmtoken (#x20 Nmtoken)*
98 ;;
99 ;;
100 ;; /* Literals */
101 ;;
102 ;; EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'
103 ;; | "'" ([^%&'] | PEReference | Reference)* "'"
104 ;;
105 ;; AttValue ::= '"' ([^<&"] | Reference)* '"'
106 ;; | "'" ([^<&'] | Reference)* "'"
107 ;;
108 ;; SystemLiteral ::= ('"' [^"]* '"')
109 ;; | ("'" [^']* "'")
110 ;;
111 ;; PubidLiteral ::= '"' PubidChar* '"'
112 ;; | "'" (PubidChar - "'")* "'"
113 ;;
114 ;; PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
115 ;;
116 ;; /* NOTE:
117 ;;
118 ;; Although the EntityValue production allows the definition of a general
119 ;; entity consisting of a single explicit < in the literal (e.g., <!ENTITY
120 ;; mylt "<">), it is strongly advised to avoid this practice since any
121 ;; reference to that entity will cause a well-formedness error. */
122 ;;
123 ;;
124 ;; /* Character Data */
125 ;;
126 ;; CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
127 ;;
128 ;;
129 ;; /* Comments */
130 ;;
131 ;; Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
132 ;;
133 ;;
134 ;; /* Processing Instructions */
135 ;;
136 ;; PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
137 ;;
138 ;; PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
139 ;;
140 ;;
141 ;; /* CDATA Sections */
142 ;;
143 ;; CDSect ::= CDStart CData CDEnd
144 ;;
145 ;; CDStart ::= '<![CDATA['
146 ;;
147 ;; CData ::= (Char* - (Char* ']]>' Char*))
148 ;;
149 ;; CDEnd ::= ']]>'
150 ;;
151 ;;
152 ;; /* Prolog */
153 ;;
154 ;; prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
155 ;;
156 ;; XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
157 ;;
158 ;; VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')
159 ;;
160 ;; Eq ::= S? '=' S?
161 ;;
162 ;; VersionNum ::= '1.0'
163 ;;
164 ;; Misc ::= Comment | PI | S
165 ;;
166 ;;
167 ;; /* Document Type Definition */
168 ;;
169 ;; doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
170 ;; ('[' intSubset ']' S?)? '>'
171 ;; [VC: Root Element Type]
172 ;; [WFC: External Subset]
173 ;;
174 ;; DeclSep ::= PEReference | S
175 ;; [WFC: PE Between Declarations]
176 ;;
177 ;; intSubset ::= (markupdecl | DeclSep)*
178 ;;
179 ;; markupdecl ::= elementdecl | AttlistDecl | EntityDecl
180 ;; | NotationDecl | PI | Comment
181 ;; [VC: Proper Declaration/PE Nesting]
182 ;; [WFC: PEs in Internal Subset]
183 ;;
184 ;;
185 ;; /* External Subset */
186 ;;
187 ;; extSubset ::= TextDecl? extSubsetDecl
188 ;;
189 ;; extSubsetDecl ::= ( markupdecl | conditionalSect | DeclSep)*
190 ;;
191 ;;
192 ;; /* Standalone Document Declaration */
193 ;;
194 ;; SDDecl ::= S 'standalone' Eq
195 ;; (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
196 ;; [VC: Standalone Document Declaration]
197 ;;
198 ;;
199 ;; /* Element */
200 ;;
201 ;; element ::= EmptyElemTag | STag content ETag
202 ;; [WFC: Element Type Match]
203 ;; [VC: Element Valid]
204 ;;
205 ;;
206 ;; /* Start-tag */
207 ;;
208 ;; STag ::= '<' Name (S Attribute)* S? '>'
209 ;; [WFC: Unique Att Spec]
210 ;;
211 ;; Attribute ::= Name Eq AttValue
212 ;; [VC: Attribute Value Type]
213 ;; [WFC: No External Entity References]
214 ;; [WFC: No < in Attribute Values]
215 ;;
216 ;;
217 ;; /* End-tag */
218 ;;
219 ;; ETag ::= '</' Name S? '>'
220 ;;
221 ;;
222 ;; /* Content of Elements */
223 ;;
224 ;; content ::= CharData?
225 ;; ((element | Reference | CDSect | PI | Comment) CharData?)*
226 ;;
227 ;;
228 ;; /* Tags for Empty Elements */
229 ;;
230 ;; EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
231 ;; [WFC: Unique Att Spec]
232 ;;
233 ;;
234 ;; /* Element Type Declaration */
235 ;;
236 ;; elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
237 ;; [VC: Unique Element Type Declaration]
238 ;;
239 ;; contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
240 ;;
241 ;;
242 ;; /* Element-content Models */
243 ;;
244 ;; children ::= (choice | seq) ('?' | '*' | '+')?
245 ;;
246 ;; cp ::= (Name | choice | seq) ('?' | '*' | '+')?
247 ;;
248 ;; choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'
249 ;; [VC: Proper Group/PE Nesting]
250 ;;
251 ;; seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
252 ;; [VC: Proper Group/PE Nesting]
253 ;;
254 ;;
255 ;; /* Mixed-content Declaration */
256 ;;
257 ;; Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*'
258 ;; | '(' S? '#PCDATA' S? ')'
259 ;; [VC: Proper Group/PE Nesting]
260 ;; [VC: No Duplicate Types]
261 ;;
262 ;;
263 ;; /* Attribute-list Declaration */
264 ;;
265 ;; AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
266 ;;
267 ;; AttDef ::= S Name S AttType S DefaultDecl
268 ;;
269 ;;
270 ;; /* Attribute Types */
271 ;;
272 ;; AttType ::= StringType | TokenizedType | EnumeratedType
273 ;;
274 ;; StringType ::= 'CDATA'
275 ;;
276 ;; TokenizedType ::= 'ID' [VC: ID]
277 ;; [VC: One ID per Element Type]
278 ;; [VC: ID Attribute Default]
279 ;; | 'IDREF' [VC: IDREF]
280 ;; | 'IDREFS' [VC: IDREF]
281 ;; | 'ENTITY' [VC: Entity Name]
282 ;; | 'ENTITIES' [VC: Entity Name]
283 ;; | 'NMTOKEN' [VC: Name Token]
284 ;; | 'NMTOKENS' [VC: Name Token]
285 ;;
286 ;;
287 ;; /* Enumerated Attribute Types */
288 ;;
289 ;; EnumeratedType ::= NotationType | Enumeration
290 ;;
291 ;; NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
292 ;; [VC: Notation Attributes]
293 ;; [VC: One Notation Per Element Type]
294 ;; [VC: No Notation on Empty Element]
295 ;; [VC: No Duplicate Tokens]
296 ;;
297 ;; Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
298 ;; [VC: Enumeration]
299 ;; [VC: No Duplicate Tokens]
300 ;;
301 ;;
302 ;; /* Attribute Defaults */
303 ;;
304 ;; DefaultDecl ::= '#REQUIRED' | '#IMPLIED'
305 ;; | (('#FIXED' S)? AttValue)
306 ;; [VC: Required Attribute]
307 ;; [VC: Attribute Default Value Syntactically Correct]
308 ;; [WFC: No < in Attribute Values]
309 ;; [VC: Fixed Attribute Default]
310 ;;
311 ;;
312 ;; /* Conditional Section */
313 ;;
314 ;; conditionalSect ::= includeSect | ignoreSect
315 ;;
316 ;; includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
317 ;; [VC: Proper Conditional Section/PE Nesting]
318 ;;
319 ;; ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
320 ;; [VC: Proper Conditional Section/PE Nesting]
321 ;;
322 ;; ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
323 ;;
324 ;; Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
325 ;;
326 ;;
327 ;; /* Character Reference */
328 ;;
329 ;; CharRef ::= '&#' [0-9]+ ';'
330 ;; | '&#x' [0-9a-fA-F]+ ';'
331 ;; [WFC: Legal Character]
332 ;;
333 ;;
334 ;; /* Entity Reference */
335 ;;
336 ;; Reference ::= EntityRef | CharRef
337 ;;
338 ;; EntityRef ::= '&' Name ';'
339 ;; [WFC: Entity Declared]
340 ;; [VC: Entity Declared]
341 ;; [WFC: Parsed Entity]
342 ;; [WFC: No Recursion]
343 ;;
344 ;; PEReference ::= '%' Name ';'
345 ;; [VC: Entity Declared]
346 ;; [WFC: No Recursion]
347 ;; [WFC: In DTD]
348 ;;
349 ;;
350 ;; /* Entity Declaration */
351 ;;
352 ;; EntityDecl ::= GEDecl | PEDecl
353 ;;
354 ;; GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
355 ;;
356 ;; PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
357 ;;
358 ;; EntityDef ::= EntityValue | (ExternalID NDataDecl?)
359 ;;
360 ;; PEDef ::= EntityValue | ExternalID
361 ;;
362 ;;
363 ;; /* External Entity Declaration */
364 ;;
365 ;; ExternalID ::= 'SYSTEM' S SystemLiteral
366 ;; | 'PUBLIC' S PubidLiteral S SystemLiteral
367 ;;
368 ;; NDataDecl ::= S 'NDATA' S Name
369 ;; [VC: Notation Declared]
370 ;;
371 ;;
372 ;; /* Text Declaration */
373 ;;
374 ;; TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
375 ;;
376 ;;
377 ;; /* Well-Formed External Parsed Entity */
378 ;;
379 ;; extParsedEnt ::= TextDecl? content
380 ;;
381 ;;
382 ;; /* Encoding Declaration */
383 ;;
384 ;; EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
385 ;;
386 ;; EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
387 ;; /* Encoding name contains only Latin characters */
388 ;;
389 ;;
390 ;; /* Notation Declarations */
391 ;;
392 ;; NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
393 ;; [VC: Unique Notation Name]
394 ;;
395 ;; PublicID ::= 'PUBLIC' S PubidLiteral
396 ;;
397 ;;
398 ;; /* Characters */
399 ;;
400 ;; Letter ::= BaseChar | Ideographic
401 ;;
402 ;; BaseChar ::= [#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6]
403 ;; | [#x00D8-#x00F6] | [#x00F8-#x00FF] | [#x0100-#x0131]
404 ;; | [#x0134-#x013E] | [#x0141-#x0148] | [#x014A-#x017E]
405 ;; | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5]
406 ;; | [#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1]
407 ;; | #x0386 | [#x0388-#x038A] | #x038C
408 ;; | [#x038E-#x03A1] | [#x03A3-#x03CE] | [#x03D0-#x03D6]
409 ;; | #x03DA | #x03DC | #x03DE
410 ;; | #x03E0 | [#x03E2-#x03F3] | [#x0401-#x040C]
411 ;; | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481]
412 ;; | [#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC]
413 ;; | [#x04D0-#x04EB] | [#x04EE-#x04F5] | [#x04F8-#x04F9]
414 ;; | [#x0531-#x0556] | #x0559 | [#x0561-#x0586]
415 ;; | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A]
416 ;; | [#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE]
417 ;; | [#x06C0-#x06CE] | [#x06D0-#x06D3] | #x06D5
418 ;; | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D
419 ;; | [#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990]
420 ;; | [#x0993-#x09A8] | [#x09AA-#x09B0] | #x09B2
421 ;; | [#x09B6-#x09B9] | [#x09DC-#x09DD] | [#x09DF-#x09E1]
422 ;; | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10]
423 ;; | [#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33]
424 ;; | [#x0A35-#x0A36] | [#x0A38-#x0A39] | [#x0A59-#x0A5C]
425 ;; | #x0A5E | [#x0A72-#x0A74] | [#x0A85-#x0A8B]
426 ;; | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8]
427 ;; | [#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9]
428 ;; | #x0ABD | #x0AE0 | [#x0B05-#x0B0C]
429 ;; | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30]
430 ;; | [#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D
431 ;; | [#x0B5C-#x0B5D] | [#x0B5F-#x0B61] | [#x0B85-#x0B8A]
432 ;; | [#x0B8E-#x0B90] | [#x0B92-#x0B95] | [#x0B99-#x0B9A]
433 ;; | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4]
434 ;; | [#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9]
435 ;; | [#x0C05-#x0C0C] | [#x0C0E-#x0C10] | [#x0C12-#x0C28]
436 ;; | [#x0C2A-#x0C33] | [#x0C35-#x0C39] | [#x0C60-#x0C61]
437 ;; | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8]
438 ;; | [#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE
439 ;; | [#x0CE0-#x0CE1] | [#x0D05-#x0D0C] | [#x0D0E-#x0D10]
440 ;; | [#x0D12-#x0D28] | [#x0D2A-#x0D39] | [#x0D60-#x0D61]
441 ;; | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33]
442 ;; | [#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84
443 ;; | [#x0E87-#x0E88] | #x0E8A | #x0E8D
444 ;; | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3]
445 ;; | #x0EA5 | #x0EA7 | [#x0EAA-#x0EAB]
446 ;; | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3]
447 ;; | #x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47]
448 ;; | [#x0F49-#x0F69] | [#x10A0-#x10C5] | [#x10D0-#x10F6]
449 ;; | #x1100 | [#x1102-#x1103] | [#x1105-#x1107]
450 ;; | #x1109 | [#x110B-#x110C] | [#x110E-#x1112]
451 ;; | #x113C | #x113E | #x1140
452 ;; | #x114C | #x114E | #x1150
453 ;; | [#x1154-#x1155] | #x1159 | [#x115F-#x1161]
454 ;; | #x1163 | #x1165 | #x1167
455 ;; | #x1169 | [#x116D-#x116E] | [#x1172-#x1173]
456 ;; | #x1175 | #x119E | #x11A8
457 ;; | #x11AB | [#x11AE-#x11AF] | [#x11B7-#x11B8]
458 ;; | #x11BA | [#x11BC-#x11C2] | #x11EB
459 ;; | #x11F0 | #x11F9 | [#x1E00-#x1E9B]
460 ;; | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D]
461 ;; | [#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57]
462 ;; | #x1F59 | #x1F5B | #x1F5D
463 ;; | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC]
464 ;; | #x1FBE | [#x1FC2-#x1FC4] | [#x1FC6-#x1FCC]
465 ;; | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] | [#x1FE0-#x1FEC]
466 ;; | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126
467 ;; | [#x212A-#x212B] | #x212E | [#x2180-#x2182]
468 ;; | [#x3041-#x3094] | [#x30A1-#x30FA] | [#x3105-#x312C]
469 ;; | [#xAC00-#xD7A3]
470 ;;
471 ;; Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
472 ;;
473 ;; CombiningChar ::= [#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486]
474 ;; | [#x0591-#x05A1] | [#x05A3-#x05B9] | [#x05BB-#x05BD]
475 ;; | #x05BF | [#x05C1-#x05C2] | #x05C4
476 ;; | [#x064B-#x0652] | #x0670 | [#x06D6-#x06DC]
477 ;; | [#x06DD-#x06DF] | [#x06E0-#x06E4] | [#x06E7-#x06E8]
478 ;; | [#x06EA-#x06ED] | [#x0901-#x0903] | #x093C
479 ;; | [#x093E-#x094C] | #x094D | [#x0951-#x0954]
480 ;; | [#x0962-#x0963] | [#x0981-#x0983] | #x09BC
481 ;; | #x09BE | #x09BF | [#x09C0-#x09C4]
482 ;; | [#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7
483 ;; | [#x09E2-#x09E3] | #x0A02 | #x0A3C
484 ;; | #x0A3E | #x0A3F | [#x0A40-#x0A42]
485 ;; | [#x0A47-#x0A48] | [#x0A4B-#x0A4D] | [#x0A70-#x0A71]
486 ;; | [#x0A81-#x0A83] | #x0ABC | [#x0ABE-#x0AC5]
487 ;; | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03]
488 ;; | #x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48]
489 ;; | [#x0B4B-#x0B4D] | [#x0B56-#x0B57] | [#x0B82-#x0B83]
490 ;; | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] | [#x0BCA-#x0BCD]
491 ;; | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44]
492 ;; | [#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56]
493 ;; | [#x0C82-#x0C83] | [#x0CBE-#x0CC4] | [#x0CC6-#x0CC8]
494 ;; | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] | [#x0D02-#x0D03]
495 ;; | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D]
496 ;; | #x0D57 | #x0E31 | [#x0E34-#x0E3A]
497 ;; | [#x0E47-#x0E4E] | #x0EB1 | [#x0EB4-#x0EB9]
498 ;; | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19]
499 ;; | #x0F35 | #x0F37 | #x0F39
500 ;; | #x0F3E | #x0F3F | [#x0F71-#x0F84]
501 ;; | [#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97
502 ;; | [#x0F99-#x0FAD] | [#x0FB1-#x0FB7] | #x0FB9
503 ;; | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F]
504 ;; | #x3099 | #x309A
505 ;;
506 ;; Digit ::= [#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9]
507 ;; | [#x0966-#x096F] | [#x09E6-#x09EF] | [#x0A66-#x0A6F]
508 ;; | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] | [#x0BE7-#x0BEF]
509 ;; | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F]
510 ;; | [#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]
511 ;;
512 ;; Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6
513 ;; | #x3005 | [#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]
514 ;;
515 ;;
516 ;; NOTES
517 ;; -----
518 ;;
519 ;; At moment, only the `<!ELEMENT' generates a syntactic chart. The
520 ;; `<!ATTLIST', `<!NOTATION' and `<!ENTITY' are syntacticly checked but they
521 ;; don't generate a syntactic chart.
522 ;;
523 ;; Besides the syntax above, ebnf-dtd also accepts a `pure' dtd file. An
524 ;; example of a `pure' dtd file is:
525 ;;
526 ;; <?xml version="1.0" encoding="UTF-8"?>
527 ;; <!--
528 ;; The main element.
529 ;; -->
530 ;; <!ELEMENT workflow (registers?, trigger-functions?, initial-actions,
531 ;; steps, splits?, joins?)>
532 ;; <!--
533 ;; An action that can be executed (id must be unique among actions for
534 ;; the enclosing step).
535 ;; Used in: actions
536 ;; -->
537 ;; <!ELEMENT action (restrict-to, validators?, pre-functions?, results,
538 ;; post-functions?)>
539 ;; <!ATTLIST action
540 ;; id CDATA #REQUIRED
541 ;; name CDATA #REQUIRED
542 ;; >
543 ;;
544 ;;
545 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
546
547 ;;; Code:
548
549
550 (require 'ebnf-otz)
551
552
553 (defvar ebnf-dtd-lex nil
554 "Value returned by `ebnf-dtd-lex' function.")
555
556 \f
557 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
558 ;; Syntactic analyzer
559
560
561 ;;; document ::= prolog element Misc*
562 ;;; /* Note that *only* the prolog will be parsed */
563
564 (defun ebnf-dtd-parser (start)
565 "DTD parser."
566 (let ((total (+ (- ebnf-limit start) 1))
567 (bias (1- start))
568 (origin (point))
569 rule-list token rule the-end)
570 (goto-char start)
571 (setq token (ebnf-dtd-lex))
572 (and (eq token 'end-of-input)
573 (error "Empty DTD file"))
574 (setq token (ebnf-dtd-prolog token))
575 (unless (eq (car token) 'end-prolog)
576 (setq the-end (cdr token)
577 token (car token))
578 (while (not (eq token the-end))
579 (ebnf-message-float
580 "Parsing...%s%%"
581 (/ (* (- (point) bias) 100.0) total))
582 (setq token (ebnf-dtd-intsubset token)
583 rule (cdr token)
584 token (car token))
585 (or (null rule)
586 (ebnf-add-empty-rule-list rule)
587 (setq rule-list (cons rule rule-list))))
588 (or (eq the-end 'end-of-input)
589 (eq (ebnf-dtd-lex) 'end-decl)
590 (error "Missing end of DOCTYPE"))
591 ;; adjust message, 'cause *only* prolog will be parsed
592 (ebnf-message-float "Parsing...%s%%" 100.0))
593 (goto-char origin)
594 rule-list))
595
596
597 ;;; prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
598 ;;;
599 ;;; XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
600 ;;;
601 ;;; VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')
602 ;;;
603 ;;; Eq ::= S? '=' S?
604 ;;;
605 ;;; VersionNum ::= '1.0'
606 ;;;
607 ;;; Misc ::= Comment | PI | S
608 ;;;
609 ;;; EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
610 ;;;
611 ;;; EncName ::= [A-Za-z] ([-A-Za-z0-9._])*
612 ;;; /* Encoding name contains only Latin characters */
613 ;;;
614 ;;; SDDecl ::= S 'standalone' Eq
615 ;;; (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
616 ;;;
617 ;;; doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
618 ;;; ('[' intSubset ']' S?)? '>'
619
620
621 (defun ebnf-dtd-prolog (token)
622 (when (and (eq token 'begin-pi) (string= ebnf-dtd-lex "xml"))
623 ;; version = "1.0"
624 (setq token (ebnf-dtd-attribute (ebnf-dtd-lex) 'version-attr
625 "^1\\.0$" "XML version"))
626 ;; ( encoding = "encoding name" )?
627 (setq token (ebnf-dtd-attribute-optional
628 token 'encoding-attr
629 "^[A-Za-z][-A-Za-z0-9._]*$" "XML encoding"))
630 ;; ( standalone = ( "yes" | "no" ) )?
631 (setq token (ebnf-dtd-attribute-optional
632 token 'standalone-attr
633 "^yes|no$" "XML standalone"))
634 (or (eq token 'end-pi)
635 (error "Missing end of XML processing instruction")))
636 ;; processing instructions
637 (setq token (ebnf-dtd-pi (ebnf-dtd-lex)))
638 (cond
639 ;; DOCTYPE
640 ((eq token 'doctype-decl)
641 (or (eq (ebnf-dtd-lex) 'name)
642 (error "Document type name is missing"))
643 (cons (if (eq (ebnf-dtd-externalid) 'begin-subset)
644 (ebnf-dtd-lex)
645 'end-prolog)
646 'end-subset))
647 ((memq token '(element-decl attlist-decl entity-decl notation-decl))
648 (cons token 'end-of-input))
649 (t
650 '(end-prolog . end-subset))
651 ))
652
653
654 (defun ebnf-dtd-attribute (token attr match attr-name)
655 (or (eq token attr)
656 (error "%s attribute is missing" attr-name))
657 (ebnf-dtd-attribute-optional token attr match attr-name))
658
659
660 (defun ebnf-dtd-attribute-optional (token attr match attr-name)
661 (when (eq token attr)
662 (or (and (eq (ebnf-dtd-lex) 'equal)
663 (eq (ebnf-dtd-lex) 'string)
664 (string-match match ebnf-dtd-lex))
665 (error "XML %s attribute is invalid" attr-name))
666 (setq token (ebnf-dtd-lex)))
667 token)
668
669
670 ;;; ExternalID ::= 'SYSTEM' S SystemLiteral
671 ;;; | 'PUBLIC' S PubidLiteral S SystemLiteral
672
673
674 (defun ebnf-dtd-externalid (&optional token)
675 (let ((must-have token))
676 (or token (setq token (ebnf-dtd-lex)))
677 (cond ((eq token 'system)
678 (ebnf-dtd-systemliteral))
679 ((eq token 'public)
680 (ebnf-dtd-pubidliteral)
681 (ebnf-dtd-systemliteral))
682 (must-have
683 (error "Missing `SYSTEM' or `PUBLIC' in external id"))
684 (t
685 token))))
686
687
688 ;;; SystemLiteral ::= ('"' [^"]* '"')
689 ;;; | ("'" [^']* "'")
690
691
692 (defun ebnf-dtd-systemliteral ()
693 (or (eq (ebnf-dtd-lex) 'string)
694 (error "System identifier is invalid"))
695 (ebnf-dtd-lex))
696
697
698 ;;; PubidLiteral ::= '"' PubidChar* '"'
699 ;;; | "'" (PubidChar - "'")* "'"
700 ;;;
701 ;;; PubidChar ::= [-'()+,./:=?;!*#@$_%\n\r a-zA-Z0-9]
702
703
704 (defun ebnf-dtd-pubidliteral ()
705 (or (and (eq (ebnf-dtd-lex) 'string)
706 (string-match "^[-'()+,./:=?;!*#@$_%\n\r a-zA-Z0-9]*$"
707 ebnf-dtd-lex))
708 (error "Public identifier is invalid")))
709
710
711 ;;; PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
712 ;;;
713 ;;; PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
714
715
716 (defun ebnf-dtd-pi (token)
717 (while (eq token 'begin-pi)
718 (and (string-match "^[xX][mM][lL]$" ebnf-dtd-lex)
719 (error "Processing instruction name can not be `XML'"))
720 (while (not (eq (ebnf-dtd-lex) 'end-pi)))
721 (setq token (ebnf-dtd-lex)))
722 token)
723
724
725 ;;; doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
726 ;;; ('[' intSubset ']' S?)? '>'
727 ;;;
728 ;;; intSubset ::= (markupdecl | DeclSep)*
729 ;;;
730 ;;; DeclSep ::= PEReference | S
731 ;;;
732 ;;; markupdecl ::= elementdecl | AttlistDecl | EntityDecl
733 ;;; | NotationDecl | PI | Comment
734
735
736 (defun ebnf-dtd-intsubset (token)
737 ;; PI - Processing Instruction
738 (and (eq token 'begin-pi)
739 (setq token (ebnf-dtd-pi token)))
740 (cond
741 ((memq token '(end-subset end-of-input))
742 (cons token nil))
743 ((eq token 'pe-ref)
744 (cons (ebnf-dtd-lex) nil)) ; annotation
745 ((eq token 'element-decl)
746 (ebnf-dtd-elementdecl)) ; rule
747 ((eq token 'attlist-decl)
748 (ebnf-dtd-attlistdecl)) ; annotation
749 ((eq token 'entity-decl)
750 (ebnf-dtd-entitydecl)) ; annotation
751 ((eq token 'notation-decl)
752 (ebnf-dtd-notationdecl)) ; annotation
753 (t
754 (error "Invalid DOCTYPE element"))
755 ))
756
757
758 ;;; elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
759 ;;;
760 ;;; contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
761 ;;;
762 ;;; Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*'
763 ;;; | '(' S? '#PCDATA' S? ')'
764 ;;;
765 ;;; children ::= (choice | seq) ('?' | '*' | '+')?
766 ;;;
767 ;;; choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'
768 ;;;
769 ;;; seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
770 ;;;
771 ;;; cp ::= (Name | choice | seq) ('?' | '*' | '+')?
772
773
774 (defun ebnf-dtd-elementdecl ()
775 (let ((action ebnf-action)
776 name token body)
777 (setq ebnf-action nil)
778 (or (eq (ebnf-dtd-lex) 'name)
779 (error "Invalid ELEMENT name"))
780 (setq name ebnf-dtd-lex
781 token (ebnf-dtd-lex)
782 body (cond ((memq token '(empty any))
783 (let ((term (ebnf-make-terminal ebnf-dtd-lex)))
784 (cons (ebnf-dtd-lex) term)))
785 ((eq token 'begin-group)
786 (setq token (ebnf-dtd-lex))
787 (if (eq token 'pcdata)
788 (ebnf-dtd-mixed)
789 (ebnf-dtd-children token)))
790 (t
791 (error "Invalid ELEMENT content"))
792 ))
793 (or (eq (car body) 'end-decl)
794 (error "Missing `>' in ELEMENT declaration"))
795 (ebnf-eps-add-production name)
796 (cons (ebnf-dtd-lex)
797 (ebnf-make-production name (cdr body) action))))
798
799
800 ;;; Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*'
801 ;;; | '(' S? '#PCDATA' S? ')'
802
803
804 (defun ebnf-dtd-mixed ()
805 (let* ((alt (cons (ebnf-make-terminal ebnf-dtd-lex) nil))
806 (token (ebnf-dtd-lex))
807 (has-alternative (eq token 'alternative)))
808 (while (eq token 'alternative)
809 (or (eq (ebnf-dtd-lex) 'name)
810 (error "Invalid name"))
811 (setq alt (cons ebnf-dtd-lex alt)
812 token (ebnf-dtd-lex)))
813 (or (eq token 'end-group)
814 (error "Missing `)'"))
815 (and has-alternative
816 (or (eq (ebnf-dtd-lex) 'zero-or-more)
817 (error "Missing `*'")))
818 (ebnf-token-alternative alt (cons (ebnf-dtd-lex) nil))))
819
820
821 ;;; children ::= (choice | seq) ('?' | '*' | '+')?
822
823
824 (defun ebnf-dtd-children (token)
825 (ebnf-dtd-operators (ebnf-dtd-choice-seq token)))
826
827
828 ;;; choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'
829 ;;;
830 ;;; seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
831
832
833 (defun ebnf-dtd-choice-seq (token)
834 (setq token (ebnf-dtd-cp token))
835 (let (elist)
836 (cond
837 ;; choice
838 ((eq (car token) 'alternative)
839 (while (eq (car token) 'alternative)
840 (setq elist (cons (cdr token) elist)
841 token (ebnf-dtd-cp (ebnf-dtd-lex))))
842 (setq elist (ebnf-token-alternative elist token)))
843 ;; seq
844 ((eq (car token) 'comma)
845 (while (eq (car token) 'comma)
846 (setq elist (cons (cdr token) elist)
847 token (ebnf-dtd-cp (ebnf-dtd-lex))))
848 (setq elist (ebnf-token-sequence (cons (cdr token) elist))))
849 ;; only one element
850 (t
851 (setq elist (cdr token))))
852 (or (eq (car token) 'end-group)
853 (error "Missing `)' in ELEMENT content"))
854 elist))
855
856
857 ;;; cp ::= (Name | choice | seq) ('?' | '*' | '+')?
858
859
860 (defun ebnf-dtd-cp (token)
861 (ebnf-dtd-operators (cond ((eq token 'name)
862 (ebnf-make-terminal ebnf-dtd-lex))
863 ((eq token 'begin-group)
864 (ebnf-dtd-choice-seq (ebnf-dtd-lex)))
865 (t
866 (error "Invalid element"))
867 )))
868
869
870 ;;; elm ('?' | '*' | '+')?
871
872
873 (defun ebnf-dtd-operators (elm)
874 (let ((token (ebnf-dtd-lex)))
875 (cond ((eq token 'optional) ; ? - optional
876 (cons (ebnf-dtd-lex) (ebnf-token-optional elm)))
877 ((eq token 'zero-or-more) ; * - zero or more
878 (cons (ebnf-dtd-lex) (ebnf-make-zero-or-more elm)))
879 ((eq token 'one-or-more) ; + - one or more
880 (cons (ebnf-dtd-lex) (ebnf-make-one-or-more elm)))
881 (t ; only element
882 (cons token elm))
883 )))
884
885
886 ;;; AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
887 ;;;
888 ;;; AttDef ::= S Name S AttType S DefaultDecl
889 ;;;
890 ;;; AttType ::= StringType | TokenizedType | EnumeratedType
891 ;;;
892 ;;; StringType ::= 'CDATA'
893 ;;;
894 ;;; TokenizedType ::= 'ID'
895 ;;; | 'IDREF'
896 ;;; | 'IDREFS'
897 ;;; | 'ENTITY'
898 ;;; | 'ENTITIES'
899 ;;; | 'NMTOKEN'
900 ;;; | 'NMTOKENS'
901 ;;;
902 ;;; EnumeratedType ::= NotationType | Enumeration
903 ;;;
904 ;;; NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
905 ;;;
906 ;;; Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
907 ;;;
908 ;;; DefaultDecl ::= '#REQUIRED'
909 ;;; | '#IMPLIED'
910 ;;; | (('#FIXED' S)? AttValue)
911 ;;;
912 ;;;
913 ;;; AttValue ::= '"' ([^<&"] | Reference)* '"'
914 ;;; | "'" ([^<&'] | Reference)* "'"
915 ;;;
916 ;;; Reference ::= EntityRef | CharRef
917 ;;;
918 ;;; EntityRef ::= '&' Name ';'
919 ;;;
920 ;;; CharRef ::= '&#' [0-9]+ ';'
921 ;;; | '&#x' [0-9a-fA-F]+ ';'
922
923 ;;; "^\\(&\\([A-Za-z_:][-A-Za-z0-9._:]*\\|#\\(x[0-9a-fA-F]+\\|[0-9]+\\)\\);\\|[^<&]\\)*$"
924
925
926 (defun ebnf-dtd-attlistdecl ()
927 (or (eq (ebnf-dtd-lex) 'name)
928 (error "Invalid ATTLIST name"))
929 (let (token)
930 (while (eq (setq token (ebnf-dtd-lex)) 'name)
931 ;; type
932 (setq token (ebnf-dtd-lex))
933 (cond
934 ((eq token 'notation)
935 (or (eq (ebnf-dtd-lex) 'begin-group)
936 (error "Missing `(' in NOTATION type in ATTLIST declaration"))
937 (ebnf-dtd-namelist "NOTATION" '(name)))
938 ((eq token 'begin-group)
939 (ebnf-dtd-namelist "enumeration" '(name name-char)))
940 ((memq token
941 '(cdata id idref idrefs entity entities nmtoken nmtokens)))
942 (t
943 (error "Invalid type in ATTLIST declaration")))
944 ;; default value
945 (setq token (ebnf-dtd-lex))
946 (unless (memq token '(required implied))
947 (and (eq token 'fixed)
948 (setq token (ebnf-dtd-lex)))
949 (or (and (eq token 'string)
950 (string-match
951 "^\\(&\\([A-Za-z_:][-A-Za-z0-9._:]*\\|#\\(x[0-9a-fA-F]+\\|[0-9]+\\)\\);\\|[^<&]\\)*$"
952 ebnf-dtd-lex))
953 (error "Invalid default value in ATTLIST declaration"))))
954 (or (eq token 'end-decl)
955 (error "Missing `>' in end of ATTLIST"))
956 (cons (ebnf-dtd-lex) nil)))
957
958
959 (defun ebnf-dtd-namelist (type name-list)
960 (let (token)
961 (while (progn
962 (or (memq (ebnf-dtd-lex) name-list)
963 (error "Invalid name in %s type in ATTLIST declaration" type))
964 (eq (setq token (ebnf-dtd-lex)) 'alternative)))
965 (or (eq token 'end-group)
966 (error "Missing `)' in %s type in ATTLIST declaration" type))))
967
968
969 ;;; EntityDecl ::= GEDecl | PEDecl
970 ;;;
971 ;;; GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
972 ;;;
973 ;;; PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
974 ;;;
975 ;;; EntityDef ::= EntityValue | (ExternalID NDataDecl?)
976 ;;;
977 ;;; PEDef ::= EntityValue | ExternalID
978 ;;;
979 ;;; NDataDecl ::= S 'NDATA' S Name
980 ;;;
981 ;;;
982 ;;; EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'
983 ;;; | "'" ([^%&'] | PEReference | Reference)* "'"
984 ;;;
985 ;;; PEReference ::= '%' Name ';'
986 ;;;
987 ;;; Reference ::= EntityRef | CharRef
988 ;;;
989 ;;; EntityRef ::= '&' Name ';'
990 ;;;
991 ;;; CharRef ::= '&#' [0-9]+ ';'
992 ;;; | '&#x' [0-9a-fA-F]+ ';'
993
994 ;;; "^\\(%[A-Za-z_:][-A-Za-z0-9._:]*;\\|&\\([A-Za-z_:][-A-Za-z0-9._:]*\\|#\\(x[0-9a-fA-F]+\\|[0-9]+\\)\\);\\|[^%&]\\)*$"
995
996
997 (defun ebnf-dtd-entitydecl ()
998 (let* ((token (ebnf-dtd-lex))
999 (pedecl (eq token 'percent)))
1000 (and pedecl
1001 (setq token (ebnf-dtd-lex)))
1002 (or (eq token 'name)
1003 (error "Invalid name of ENTITY"))
1004 (setq token (ebnf-dtd-lex))
1005 (if (eq token 'string)
1006 (if (string-match
1007 "^\\(%[A-Za-z_:][-A-Za-z0-9._:]*;\\|&\\([A-Za-z_:][-A-Za-z0-9._:]*\\|#\\(x[0-9a-fA-F]+\\|[0-9]+\\)\\);\\|[^%&]\\)*$"
1008 ebnf-dtd-lex)
1009 (setq token (ebnf-dtd-lex))
1010 (error "Invalid ENTITY definition"))
1011 (setq token (ebnf-dtd-externalid token))
1012 (when (and (not pedecl) (eq token 'ndata))
1013 (or (eq (ebnf-dtd-lex) 'name)
1014 (error "Invalid NDATA name"))
1015 (setq token (ebnf-dtd-lex))))
1016 (or (eq token 'end-decl)
1017 (error "Missing `>' in end of ENTITY"))
1018 (cons (ebnf-dtd-lex) nil)))
1019
1020
1021 ;;; NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
1022 ;;;
1023 ;;; PublicID ::= 'PUBLIC' S PubidLiteral
1024
1025
1026 (defun ebnf-dtd-notationdecl ()
1027 (or (eq (ebnf-dtd-lex) 'name)
1028 (error "Invalid name NOTATION"))
1029 (or (eq (ebnf-dtd-externalid-or-publicid) 'end-decl)
1030 (error "Missing `>' in end of NOTATION"))
1031 (cons (ebnf-dtd-lex) nil))
1032
1033
1034 ;;; ExternalID ::= 'SYSTEM' S SystemLiteral
1035 ;;; | 'PUBLIC' S PubidLiteral S SystemLiteral
1036 ;;;
1037 ;;; PublicID ::= 'PUBLIC' S PubidLiteral
1038
1039
1040 (defun ebnf-dtd-externalid-or-publicid ()
1041 (let ((token (ebnf-dtd-lex)))
1042 (cond ((eq token 'system)
1043 (ebnf-dtd-systemliteral))
1044 ((eq token 'public)
1045 (ebnf-dtd-pubidliteral)
1046 (and (eq (setq token (ebnf-dtd-lex)) 'string)
1047 (setq token (ebnf-dtd-lex)))
1048 token)
1049 (t
1050 (error "Missing `SYSTEM' or `PUBLIC'")))))
1051
1052 \f
1053 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1054 ;; Lexical analyzer
1055
1056
1057 (defconst ebnf-dtd-token-table (make-vector 256 'error)
1058 "Vector used to map characters to a lexical token.")
1059
1060
1061 (defun ebnf-dtd-initialize ()
1062 "Initialize EBNF token table."
1063 ;; control character & control 8-bit character are set to `error'
1064 (let ((char ?\060))
1065 ;; digits: 0-9
1066 (while (< char ?\072)
1067 (aset ebnf-dtd-token-table char 'name-char)
1068 (setq char (1+ char)))
1069 ;; printable character: A-Z
1070 (setq char ?\101)
1071 (while (< char ?\133)
1072 (aset ebnf-dtd-token-table char 'name)
1073 (setq char (1+ char)))
1074 ;; printable character: a-z
1075 (setq char ?\141)
1076 (while (< char ?\173)
1077 (aset ebnf-dtd-token-table char 'name)
1078 (setq char (1+ char)))
1079 ;; European 8-bit accentuated characters:
1080 (setq char ?\240)
1081 (while (< char ?\400)
1082 (aset ebnf-dtd-token-table char 'name)
1083 (setq char (1+ char)))
1084 ;; Override name characters:
1085 (aset ebnf-dtd-token-table ?_ 'name)
1086 (aset ebnf-dtd-token-table ?: 'name)
1087 (aset ebnf-dtd-token-table ?. 'name-char)
1088 (aset ebnf-dtd-token-table ?- 'name-char)
1089 ;; Override space characters:
1090 (aset ebnf-dtd-token-table ?\n 'space) ; [NL] linefeed
1091 (aset ebnf-dtd-token-table ?\r 'space) ; [CR] carriage return
1092 (aset ebnf-dtd-token-table ?\t 'space) ; [HT] horizontal tab
1093 (aset ebnf-dtd-token-table ?\ 'space) ; [SP] space
1094 ;; Override other lexical characters:
1095 (aset ebnf-dtd-token-table ?= 'equal)
1096 (aset ebnf-dtd-token-table ?, 'comma)
1097 (aset ebnf-dtd-token-table ?* 'zero-or-more)
1098 (aset ebnf-dtd-token-table ?+ 'one-or-more)
1099 (aset ebnf-dtd-token-table ?| 'alternative)
1100 (aset ebnf-dtd-token-table ?% 'percent)
1101 (aset ebnf-dtd-token-table ?& 'ampersand)
1102 (aset ebnf-dtd-token-table ?# 'hash)
1103 (aset ebnf-dtd-token-table ?\? 'interrogation)
1104 (aset ebnf-dtd-token-table ?\" 'double-quote)
1105 (aset ebnf-dtd-token-table ?\' 'single-quote)
1106 (aset ebnf-dtd-token-table ?< 'less-than)
1107 (aset ebnf-dtd-token-table ?> 'end-decl)
1108 (aset ebnf-dtd-token-table ?\( 'begin-group)
1109 (aset ebnf-dtd-token-table ?\) 'end-group)
1110 (aset ebnf-dtd-token-table ?\[ 'begin-subset)
1111 (aset ebnf-dtd-token-table ?\] 'end-subset)))
1112
1113
1114 ;; replace the range "\240-\377" (see `ebnf-range-regexp').
1115 (defconst ebnf-dtd-name-chars
1116 (ebnf-range-regexp "-._:0-9A-Za-z" ?\240 ?\377))
1117
1118
1119 (defconst ebnf-dtd-decl-alist
1120 '(("ATTLIST" . attlist-decl)
1121 ("DOCTYPE" . doctype-decl)
1122 ("ELEMENT" . element-decl)
1123 ("ENTITY" . entity-decl)
1124 ("NOTATION" . notation-decl)))
1125
1126
1127 (defconst ebnf-dtd-element-alist
1128 '(("#FIXED" . fixed)
1129 ("#IMPLIED" . implied)
1130 ("#PCDATA" . pcdata)
1131 ("#REQUIRED" . required)))
1132
1133
1134 (defconst ebnf-dtd-name-alist
1135 '(("ANY" . any)
1136 ("CDATA" . cdata)
1137 ("EMPTY" . empty)
1138 ("ENTITIES" . entities)
1139 ("ENTITY" . entity)
1140 ("ID" . id)
1141 ("IDREF" . idref)
1142 ("IDREFS" . idrefs)
1143 ("NDATA" . ndata)
1144 ("NMTOKEN" . nmtoken)
1145 ("NMTOKENS" . nmtokens)
1146 ("NOTATION" . notation)
1147 ("PUBLIC" . public)
1148 ("SYSTEM" . system)
1149 ("encoding" . encoding-attr)
1150 ("standalone" . standalone-attr)
1151 ("version" . version-attr)))
1152
1153
1154 (defun ebnf-dtd-lex ()
1155 "Lexical analyser for DTD.
1156
1157 Return a lexical token.
1158
1159 See documentation for variable `ebnf-dtd-lex'."
1160 (if (>= (point) ebnf-limit)
1161 'end-of-input
1162 (let (token)
1163 ;; skip spaces and comments
1164 (while (if (> (following-char) 255)
1165 (progn
1166 (setq token 'error)
1167 nil)
1168 (setq token (aref ebnf-dtd-token-table (following-char)))
1169 (cond
1170 ((eq token 'space)
1171 (skip-chars-forward " \n\r\t" ebnf-limit)
1172 (< (point) ebnf-limit))
1173 ((and (eq token 'less-than)
1174 (looking-at "<!--"))
1175 (ebnf-dtd-skip-comment))
1176 (t nil)
1177 )))
1178 (cond
1179 ;; end of input
1180 ((>= (point) ebnf-limit)
1181 'end-of-input)
1182 ;; error
1183 ((eq token 'error)
1184 (error "Illegal character"))
1185 ;; beginning of declaration:
1186 ;; <?name, <!ATTLIST, <!DOCTYPE, <!ELEMENT, <!ENTITY, <!NOTATION
1187 ((eq token 'less-than)
1188 (forward-char)
1189 (let ((char (following-char)))
1190 (cond ((= char ?\?) ; <?
1191 (forward-char)
1192 (setq ebnf-dtd-lex (ebnf-buffer-substring ebnf-dtd-name-chars))
1193 'begin-pi)
1194 ((= char ?!) ; <!
1195 (forward-char)
1196 (let ((decl (ebnf-buffer-substring ebnf-dtd-name-chars)))
1197 (or (cdr (assoc decl ebnf-dtd-decl-alist))
1198 (error "Invalid declaration name `%s'" decl))))
1199 (t ; <x
1200 (error "Invalid declaration `<%c'" char)))))
1201 ;; name, namechar
1202 ((memq token '(name name-char))
1203 (setq ebnf-dtd-lex (ebnf-buffer-substring ebnf-dtd-name-chars))
1204 (or (cdr (assoc ebnf-dtd-lex ebnf-dtd-name-alist))
1205 token))
1206 ;; ?, ?>
1207 ((eq token 'interrogation)
1208 (forward-char)
1209 (if (/= (following-char) ?>)
1210 'optional
1211 (forward-char)
1212 'end-pi))
1213 ;; #FIXED, #IMPLIED, #PCDATA, #REQUIRED
1214 ((eq token 'hash)
1215 (forward-char)
1216 (setq ebnf-dtd-lex
1217 (concat "#" (ebnf-buffer-substring ebnf-dtd-name-chars)))
1218 (or (cdr (assoc ebnf-dtd-lex ebnf-dtd-element-alist))
1219 (error "Invalid element `%s'" ebnf-dtd-lex)))
1220 ;; "string"
1221 ((eq token 'double-quote)
1222 (setq ebnf-dtd-lex (ebnf-dtd-string ?\"))
1223 'string)
1224 ;; 'string'
1225 ((eq token 'single-quote)
1226 (setq ebnf-dtd-lex (ebnf-dtd-string ?\'))
1227 'string)
1228 ;; %, %name;
1229 ((eq token 'percent)
1230 (forward-char)
1231 (if (looking-at "[ \n\r\t]")
1232 'percent
1233 (setq ebnf-dtd-lex (ebnf-dtd-name-ref "%"))
1234 'pe-ref))
1235 ;; &#...;, &#x...;, &name;
1236 ((eq token 'ampersand)
1237 (forward-char)
1238 (if (/= (following-char) ?#)
1239 (progn
1240 ;; &name;
1241 (setq ebnf-dtd-lex (ebnf-dtd-name-ref "&"))
1242 'entity-ref)
1243 ;; &#...;, &#x...;
1244 (forward-char)
1245 (setq ebnf-dtd-lex (if (/= (following-char) ?x)
1246 (ebnf-dtd-char-ref "&#" "0-9")
1247 (forward-char)
1248 (ebnf-dtd-char-ref "&#x" "0-9a-fA-F")))
1249 'char-ref))
1250 ;; miscellaneous: (, ), [, ], =, |, *, +, >, `,'
1251 (t
1252 (forward-char)
1253 token)
1254 ))))
1255
1256
1257 (defun ebnf-dtd-name-ref (start)
1258 (ebnf-dtd-char-ref start ebnf-dtd-name-chars))
1259
1260
1261 (defun ebnf-dtd-char-ref (start chars)
1262 (let ((char (ebnf-buffer-substring chars)))
1263 (or (= (following-char) ?\;)
1264 (error "Invalid element `%s%s%c'" start char (following-char)))
1265 (forward-char)
1266 (format "%s%s;" start char)))
1267
1268
1269 ;; replace the range "\240-\377" (see `ebnf-range-regexp').
1270 (defconst ebnf-dtd-double-string-chars
1271 (ebnf-range-regexp "\t -!#-~" ?\240 ?\377))
1272 (defconst ebnf-dtd-single-string-chars
1273 (ebnf-range-regexp "\t -&(-~" ?\240 ?\377))
1274
1275
1276 (defun ebnf-dtd-string (delim)
1277 (buffer-substring-no-properties
1278 (progn
1279 (forward-char)
1280 (point))
1281 (progn
1282 (skip-chars-forward (if (= delim ?\")
1283 ebnf-dtd-double-string-chars
1284 ebnf-dtd-single-string-chars)
1285 ebnf-limit)
1286 (or (= (following-char) delim)
1287 (error "Missing string delimiter `%c'" delim))
1288 (prog1
1289 (point)
1290 (forward-char)))))
1291
1292
1293 ;; replace the range "\177-\237" (see `ebnf-range-regexp').
1294 (defconst ebnf-dtd-comment-chars
1295 (ebnf-range-regexp "^-\000-\010\013\014\016-\037" ?\177 ?\237))
1296 (defconst ebnf-dtd-filename-chars
1297 (ebnf-range-regexp "^-\000-\037" ?\177 ?\237))
1298
1299
1300 (defun ebnf-dtd-skip-comment ()
1301 (forward-char 4) ; <!--
1302 (cond
1303 ;; open EPS file
1304 ((and ebnf-eps-executing (= (following-char) ?\[))
1305 (ebnf-eps-add-context (ebnf-dtd-eps-filename)))
1306 ;; close EPS file
1307 ((and ebnf-eps-executing (= (following-char) ?\]))
1308 (ebnf-eps-remove-context (ebnf-dtd-eps-filename)))
1309 ;; any other action in comment
1310 (t
1311 (setq ebnf-action (aref ebnf-comment-table (following-char))))
1312 )
1313 (while (progn
1314 (skip-chars-forward ebnf-dtd-comment-chars ebnf-limit)
1315 (and (< (point) ebnf-limit)
1316 (not (looking-at "-->"))))
1317 (skip-chars-forward "-" ebnf-limit))
1318 ;; check for a valid end of comment
1319 (cond ((>= (point) ebnf-limit)
1320 nil)
1321 ((looking-at "-->")
1322 (forward-char 3)
1323 t)
1324 (t
1325 (error "Illegal character"))
1326 ))
1327
1328
1329 (defun ebnf-dtd-eps-filename ()
1330 (forward-char)
1331 (let (fname)
1332 (while (progn
1333 (setq fname
1334 (concat fname
1335 (ebnf-buffer-substring ebnf-dtd-filename-chars)))
1336 (and (< (point) ebnf-limit)
1337 (= (following-char) ?-) ; may be \n, \t, \r
1338 (not (looking-at "-->"))))
1339 (setq fname (concat fname (ebnf-buffer-substring "-"))))
1340 fname))
1341
1342 \f
1343 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1344
1345
1346 (provide 'ebnf-dtd)
1347
1348 ;;; arch-tag: c21bb640-135f-4afa-8712-fa11d86301c4
1349 ;;; ebnf-dtd.el ends here