Cleanup uses of "-hooks".
[bpt/emacs.git] / lisp / cedet / semantic / lex.el
CommitLineData
b90caf50 1;;; semantic/lex.el --- Lexical Analyzer builder
57e622d9 2
acaf905b 3;; Copyright (C) 1999-2012 Free Software Foundation, Inc.
57e622d9
CY
4
5;; Author: Eric M. Ludlam <zappo@gnu.org>
6
7;; This file is part of GNU Emacs.
8
9;; GNU Emacs is free software: you can redistribute it and/or modify
10;; it under the terms of the GNU General Public License as published by
11;; the Free Software Foundation, either version 3 of the License, or
12;; (at your option) any later version.
13
14;; GNU Emacs is distributed in the hope that it will be useful,
15;; but WITHOUT ANY WARRANTY; without even the implied warranty of
16;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17;; GNU General Public License for more details.
18
19;; You should have received a copy of the GNU General Public License
20;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
21
22;;; Commentary:
23;;
24;; This file handles the creation of lexical analyzers for different
25;; languages in Emacs Lisp. The purpose of a lexical analyzer is to
26;; convert a buffer into a list of lexical tokens. Each token
27;; contains the token class (such as 'number, 'symbol, 'IF, etc) and
28;; the location in the buffer it was found. Optionally, a token also
29;; contains a string representing what is at the designated buffer
30;; location.
31;;
32;; Tokens are pushed onto a token stream, which is basically a list of
33;; all the lexical tokens from the analyzed region. The token stream
34;; is then handed to the grammar which parsers the file.
35;;
36;;; How it works
37;;
38;; Each analyzer specifies a condition and forms. These conditions
39;; and forms are assembled into a function by `define-lex' that does
40;; the lexical analysis.
41;;
42;; In the lexical analyzer created with `define-lex', each condition
9bf6c65c 43;; is tested for a given point. When the condition is true, the forms
57e622d9
CY
44;; run.
45;;
46;; The forms can push a lexical token onto the token stream. The
47;; analyzer forms also must move the current analyzer point. If the
9bf6c65c 48;; analyzer point is moved without pushing a token, then the matched
57e622d9
CY
49;; syntax is effectively ignored, or skipped.
50;;
51;; Thus, starting at the beginning of a region to be analyzed, each
52;; condition is tested. One will match, and a lexical token might be
53;; pushed, and the point is moved to the end of the lexical token
54;; identified. At the new position, the process occurs again until
55;; the end of the specified region is reached.
56;;
57;;; How to use semantic-lex
58;;
59;; To create a lexer for a language, use the `define-lex' macro.
60;;
61;; The `define-lex' macro accepts a list of lexical analyzers. Each
62;; analyzer is created with `define-lex-analyzer', or one of the
9bf6c65c 63;; derivative macros. A single analyzer defines a regular expression
57e622d9
CY
64;; to match text in a buffer, and a short segment of code to create
65;; one lexical token.
66;;
67;; Each analyzer has a NAME, DOC, a CONDITION, and possibly some
68;; FORMS. The NAME is the name used in `define-lex'. The DOC
69;; describes what the analyzer should do.
70;;
71;; The CONDITION evaluates the text at the current point in the
72;; current buffer. If CONDITION is true, then the FORMS will be
73;; executed.
74;;
75;; The purpose of the FORMS is to push new lexical tokens onto the
76;; list of tokens for the current buffer, and to move point after the
77;; matched text.
78;;
79;; Some macros for creating one analyzer are:
80;;
81;; define-lex-analyzer - A generic analyzer associating any style of
82;; condition to forms.
83;; define-lex-regex-analyzer - Matches a regular expression.
84;; define-lex-simple-regex-analyzer - Matches a regular expressions,
85;; and pushes the match.
86;; define-lex-block-analyzer - Matches list syntax, and defines
87;; handles open/close delimiters.
88;;
89;; These macros are used by the grammar compiler when lexical
90;; information is specified in a grammar:
91;; define-lex- * -type-analyzer - Matches syntax specified in
92;; a grammar, and pushes one token for it. The * would
93;; be `sexp' for things like lists or strings, and
94;; `string' for things that need to match some special
95;; string, such as "\\." where a literal match is needed.
96;;
97;;; Lexical Tables
98;;
99;; There are tables of different symbols managed in semantic-lex.el.
100;; They are:
101;;
102;; Lexical keyword table - A Table of symbols declared in a grammar
103;; file with the %keyword declaration.
104;; Keywords are used by `semantic-lex-symbol-or-keyword'
105;; to create lexical tokens based on the keyword.
106;;
9bf6c65c 107;; Lexical type table - A table of symbols declared in a grammar
57e622d9
CY
108;; file with the %type declaration.
109;; The grammar compiler uses the type table to create new
110;; lexical analyzers. These analyzers are then used to when
111;; a new lexical analyzer is made for a language.
112;;
113;;; Lexical Types
114;;
115;; A lexical type defines a kind of lexical analyzer that will be
116;; automatically generated from a grammar file based on some
9bf6c65c 117;; predetermined attributes. For now these two attributes are
57e622d9
CY
118;; recognized :
119;;
9bf6c65c 120;; * matchdatatype : define the kind of lexical analyzer. That is :
57e622d9
CY
121;;
122;; - regexp : define a regexp analyzer (see
123;; `define-lex-regex-type-analyzer')
124;;
125;; - string : define a string analyzer (see
126;; `define-lex-string-type-analyzer')
127;;
128;; - block : define a block type analyzer (see
129;; `define-lex-block-type-analyzer')
130;;
131;; - sexp : define a sexp analyzer (see
132;; `define-lex-sexp-type-analyzer')
133;;
134;; - keyword : define a keyword analyzer (see
135;; `define-lex-keyword-type-analyzer')
136;;
137;; * syntax : define the syntax that matches a syntactic
9bf6c65c 138;; expression. When syntax is matched the corresponding type
57e622d9
CY
139;; analyzer is entered and the resulting match data will be
140;; interpreted based on the kind of analyzer (see matchdatatype
141;; above).
142;;
143;; The following lexical types are predefined :
144;;
145;; +-------------+---------------+--------------------------------+
146;; | type | matchdatatype | syntax |
147;; +-------------+---------------+--------------------------------+
148;; | punctuation | string | "\\(\\s.\\|\\s$\\|\\s'\\)+" |
149;; | keyword | keyword | "\\(\\sw\\|\\s_\\)+" |
150;; | symbol | regexp | "\\(\\sw\\|\\s_\\)+" |
151;; | string | sexp | "\\s\"" |
152;; | number | regexp | semantic-lex-number-expression |
153;; | block | block | "\\s(\\|\\s)" |
154;; +-------------+---------------+--------------------------------+
155;;
156;; In a grammar you must use a %type expression to automatically generate
157;; the corresponding analyzers of that type.
158;;
159;; Here is an example to auto-generate punctuation analyzers
160;; with 'matchdatatype and 'syntax predefined (see table above)
161;;
162;; %type <punctuation> ;; will auto-generate this kind of analyzers
163;;
164;; It is equivalent to write :
165;;
166;; %type <punctuation> syntax "\\(\\s.\\|\\s$\\|\\s'\\)+" matchdatatype string
167;;
e4920bc9 168;; ;; Some punctuation based on the type defines above
57e622d9
CY
169;;
170;; %token <punctuation> NOT "!"
171;; %token <punctuation> NOTEQ "!="
172;; %token <punctuation> MOD "%"
173;; %token <punctuation> MODEQ "%="
174;;
175
176;;; On the Semantic 1.x lexer
177;;
178;; In semantic 1.x, the lexical analyzer was an all purpose routine.
179;; To boost efficiency, the analyzer is now a series of routines that
180;; are constructed at build time into a single routine. This will
181;; eliminate unneeded if statements to speed the lexer.
182
183(require 'semantic/fw)
b90caf50 184
57e622d9
CY
185;;; Code:
186
57e622d9
CY
187;;; Semantic 2.x lexical analysis
188;;
189(defun semantic-lex-map-symbols (fun table &optional property)
190 "Call function FUN on every symbol in TABLE.
191If optional PROPERTY is non-nil, call FUN only on every symbol which
192as a PROPERTY value. FUN receives a symbol as argument."
193 (if (arrayp table)
194 (mapatoms
195 #'(lambda (symbol)
196 (if (or (null property) (get symbol property))
197 (funcall fun symbol)))
198 table)))
199
200;;; Lexical keyword table handling.
201;;
202;; These keywords are keywords defined for using in a grammar with the
203;; %keyword declaration, and are not keywords used in Emacs Lisp.
204
205(defvar semantic-flex-keywords-obarray nil
206 "Buffer local keyword obarray for the lexical analyzer.
207These keywords are matched explicitly, and converted into special symbols.")
208(make-variable-buffer-local 'semantic-flex-keywords-obarray)
209
210(defmacro semantic-lex-keyword-invalid (name)
211 "Signal that NAME is an invalid keyword name."
212 `(signal 'wrong-type-argument '(semantic-lex-keyword-p ,name)))
213
214(defsubst semantic-lex-keyword-symbol (name)
215 "Return keyword symbol with NAME or nil if not found."
216 (and (arrayp semantic-flex-keywords-obarray)
217 (stringp name)
218 (intern-soft name semantic-flex-keywords-obarray)))
219
220(defsubst semantic-lex-keyword-p (name)
221 "Return non-nil if a keyword with NAME exists in the keyword table.
222Return nil otherwise."
223 (and (setq name (semantic-lex-keyword-symbol name))
224 (symbol-value name)))
225
226(defsubst semantic-lex-keyword-set (name value)
227 "Set value of keyword with NAME to VALUE and return VALUE."
228 (set (intern name semantic-flex-keywords-obarray) value))
229
230(defsubst semantic-lex-keyword-value (name)
231 "Return value of keyword with NAME.
232Signal an error if a keyword with NAME does not exist."
233 (let ((keyword (semantic-lex-keyword-symbol name)))
234 (if keyword
235 (symbol-value keyword)
236 (semantic-lex-keyword-invalid name))))
237
238(defsubst semantic-lex-keyword-put (name property value)
239 "For keyword with NAME, set its PROPERTY to VALUE."
240 (let ((keyword (semantic-lex-keyword-symbol name)))
241 (if keyword
242 (put keyword property value)
243 (semantic-lex-keyword-invalid name))))
244
245(defsubst semantic-lex-keyword-get (name property)
246 "For keyword with NAME, return its PROPERTY value."
247 (let ((keyword (semantic-lex-keyword-symbol name)))
248 (if keyword
249 (get keyword property)
250 (semantic-lex-keyword-invalid name))))
251
252(defun semantic-lex-make-keyword-table (specs &optional propspecs)
253 "Convert keyword SPECS into an obarray and return it.
254SPECS must be a list of (NAME . TOKSYM) elements, where:
255
256 NAME is the name of the keyword symbol to define.
257 TOKSYM is the lexical token symbol of that keyword.
258
259If optional argument PROPSPECS is non nil, then interpret it, and
260apply those properties.
261PROPSPECS must be a list of (NAME PROPERTY VALUE) elements."
262 ;; Create the symbol hash table
263 (let ((semantic-flex-keywords-obarray (make-vector 13 0))
264 spec)
265 ;; fill it with stuff
266 (while specs
267 (setq spec (car specs)
268 specs (cdr specs))
269 (semantic-lex-keyword-set (car spec) (cdr spec)))
270 ;; Apply all properties
271 (while propspecs
272 (setq spec (car propspecs)
273 propspecs (cdr propspecs))
274 (semantic-lex-keyword-put (car spec) (nth 1 spec) (nth 2 spec)))
275 semantic-flex-keywords-obarray))
276
277(defsubst semantic-lex-map-keywords (fun &optional property)
278 "Call function FUN on every lexical keyword.
279If optional PROPERTY is non-nil, call FUN only on every keyword which
280as a PROPERTY value. FUN receives a lexical keyword as argument."
281 (semantic-lex-map-symbols
282 fun semantic-flex-keywords-obarray property))
283
284(defun semantic-lex-keywords (&optional property)
285 "Return a list of lexical keywords.
286If optional PROPERTY is non-nil, return only keywords which have a
287PROPERTY set."
288 (let (keywords)
289 (semantic-lex-map-keywords
290 #'(lambda (symbol) (setq keywords (cons symbol keywords)))
291 property)
292 keywords))
9573e58b
CY
293
294;;; Inline functions:
295
296(defvar semantic-lex-unterminated-syntax-end-function)
297(defvar semantic-lex-analysis-bounds)
298(defvar semantic-lex-end-point)
299
300(defsubst semantic-lex-token-bounds (token)
301 "Fetch the start and end locations of the lexical token TOKEN.
302Return a pair (START . END)."
303 (if (not (numberp (car (cdr token))))
304 (cdr (cdr token))
305 (cdr token)))
306
307(defsubst semantic-lex-token-start (token)
308 "Fetch the start position of the lexical token TOKEN.
309See also the function `semantic-lex-token'."
310 (car (semantic-lex-token-bounds token)))
311
312(defsubst semantic-lex-token-end (token)
313 "Fetch the end position of the lexical token TOKEN.
314See also the function `semantic-lex-token'."
315 (cdr (semantic-lex-token-bounds token)))
316
317(defsubst semantic-lex-unterminated-syntax-detected (syntax)
318 "Inside a lexical analyzer, use this when unterminated syntax was found.
319Argument SYNTAX indicates the type of syntax that is unterminated.
320The job of this function is to move (point) to a new logical location
321so that analysis can continue, if possible."
322 (goto-char
323 (funcall semantic-lex-unterminated-syntax-end-function
324 syntax
325 (car semantic-lex-analysis-bounds)
326 (cdr semantic-lex-analysis-bounds)
327 ))
328 (setq semantic-lex-end-point (point)))
57e622d9
CY
329\f
330;;; Type table handling.
331;;
332;; The lexical type table manages types that occur in a grammar file
333;; with the %type declaration. Types represent different syntaxes.
334;; See code for `semantic-lex-preset-default-types' for the classic
335;; types of syntax.
336(defvar semantic-lex-types-obarray nil
337 "Buffer local types obarray for the lexical analyzer.")
338(make-variable-buffer-local 'semantic-lex-types-obarray)
339
340(defmacro semantic-lex-type-invalid (type)
341 "Signal that TYPE is an invalid lexical type name."
342 `(signal 'wrong-type-argument '(semantic-lex-type-p ,type)))
343
344(defsubst semantic-lex-type-symbol (type)
345 "Return symbol with TYPE or nil if not found."
346 (and (arrayp semantic-lex-types-obarray)
347 (stringp type)
348 (intern-soft type semantic-lex-types-obarray)))
349
350(defsubst semantic-lex-type-p (type)
351 "Return non-nil if a symbol with TYPE name exists."
352 (and (setq type (semantic-lex-type-symbol type))
353 (symbol-value type)))
354
355(defsubst semantic-lex-type-set (type value)
356 "Set value of symbol with TYPE name to VALUE and return VALUE."
357 (set (intern type semantic-lex-types-obarray) value))
358
359(defsubst semantic-lex-type-value (type &optional noerror)
360 "Return value of symbol with TYPE name.
361If optional argument NOERROR is non-nil return nil if a symbol with
362TYPE name does not exist. Otherwise signal an error."
363 (let ((sym (semantic-lex-type-symbol type)))
364 (if sym
365 (symbol-value sym)
366 (unless noerror
367 (semantic-lex-type-invalid type)))))
368
369(defsubst semantic-lex-type-put (type property value &optional add)
370 "For symbol with TYPE name, set its PROPERTY to VALUE.
371If optional argument ADD is non-nil, create a new symbol with TYPE
372name if it does not already exist. Otherwise signal an error."
373 (let ((sym (semantic-lex-type-symbol type)))
374 (unless sym
375 (or add (semantic-lex-type-invalid type))
376 (semantic-lex-type-set type nil)
377 (setq sym (semantic-lex-type-symbol type)))
378 (put sym property value)))
379
380(defsubst semantic-lex-type-get (type property &optional noerror)
381 "For symbol with TYPE name, return its PROPERTY value.
382If optional argument NOERROR is non-nil return nil if a symbol with
383TYPE name does not exist. Otherwise signal an error."
384 (let ((sym (semantic-lex-type-symbol type)))
385 (if sym
386 (get sym property)
387 (unless noerror
388 (semantic-lex-type-invalid type)))))
389
390(defun semantic-lex-preset-default-types ()
391 "Install useful default properties for well known types."
392 (semantic-lex-type-put "punctuation" 'matchdatatype 'string t)
393 (semantic-lex-type-put "punctuation" 'syntax "\\(\\s.\\|\\s$\\|\\s'\\)+")
394 (semantic-lex-type-put "keyword" 'matchdatatype 'keyword t)
395 (semantic-lex-type-put "keyword" 'syntax "\\(\\sw\\|\\s_\\)+")
396 (semantic-lex-type-put "symbol" 'matchdatatype 'regexp t)
397 (semantic-lex-type-put "symbol" 'syntax "\\(\\sw\\|\\s_\\)+")
398 (semantic-lex-type-put "string" 'matchdatatype 'sexp t)
399 (semantic-lex-type-put "string" 'syntax "\\s\"")
400 (semantic-lex-type-put "number" 'matchdatatype 'regexp t)
401 (semantic-lex-type-put "number" 'syntax 'semantic-lex-number-expression)
402 (semantic-lex-type-put "block" 'matchdatatype 'block t)
403 (semantic-lex-type-put "block" 'syntax "\\s(\\|\\s)")
404 )
405
406(defun semantic-lex-make-type-table (specs &optional propspecs)
407 "Convert type SPECS into an obarray and return it.
408SPECS must be a list of (TYPE . TOKENS) elements, where:
409
410 TYPE is the name of the type symbol to define.
411 TOKENS is an list of (TOKSYM . MATCHER) elements, where:
412
413 TOKSYM is any lexical token symbol.
414 MATCHER is a string or regexp a text must match to be a such
415 lexical token.
416
417If optional argument PROPSPECS is non nil, then interpret it, and
418apply those properties.
419PROPSPECS must be a list of (TYPE PROPERTY VALUE)."
420 ;; Create the symbol hash table
421 (let* ((semantic-lex-types-obarray (make-vector 13 0))
422 spec type tokens token alist default)
423 ;; fill it with stuff
424 (while specs
425 (setq spec (car specs)
426 specs (cdr specs)
427 type (car spec)
428 tokens (cdr spec)
429 default nil
430 alist nil)
431 (while tokens
432 (setq token (car tokens)
433 tokens (cdr tokens))
434 (if (cdr token)
435 (setq alist (cons token alist))
436 (setq token (car token))
437 (if default
438 (message
439 "*Warning* default value of <%s> tokens changed to %S, was %S"
440 type default token))
441 (setq default token)))
442 ;; Ensure the default matching spec is the first one.
443 (semantic-lex-type-set type (cons default (nreverse alist))))
444 ;; Install useful default types & properties
445 (semantic-lex-preset-default-types)
446 ;; Apply all properties
447 (while propspecs
448 (setq spec (car propspecs)
449 propspecs (cdr propspecs))
450 ;; Create the type if necessary.
451 (semantic-lex-type-put (car spec) (nth 1 spec) (nth 2 spec) t))
452 semantic-lex-types-obarray))
453
454(defsubst semantic-lex-map-types (fun &optional property)
455 "Call function FUN on every lexical type.
456If optional PROPERTY is non-nil, call FUN only on every type symbol
457which as a PROPERTY value. FUN receives a type symbol as argument."
458 (semantic-lex-map-symbols
459 fun semantic-lex-types-obarray property))
460
461(defun semantic-lex-types (&optional property)
462 "Return a list of lexical type symbols.
463If optional PROPERTY is non-nil, return only type symbols which have
464PROPERTY set."
465 (let (types)
466 (semantic-lex-map-types
467 #'(lambda (symbol) (setq types (cons symbol types)))
468 property)
469 types))
470\f
471;;; Lexical Analyzer framework settings
472;;
473
474(defvar semantic-lex-analyzer 'semantic-flex
475 "The lexical analyzer used for a given buffer.
476See `semantic-lex' for documentation.
477For compatibility with Semantic 1.x it defaults to `semantic-flex'.")
478(make-variable-buffer-local 'semantic-lex-analyzer)
479
480(defvar semantic-lex-tokens
481 '(
482 (bol)
483 (charquote)
484 (close-paren)
485 (comment)
486 (newline)
487 (open-paren)
488 (punctuation)
489 (semantic-list)
490 (string)
491 (symbol)
492 (whitespace)
493 )
58179cce 494 "An alist of semantic token types.
57e622d9
CY
495As of December 2001 (semantic 1.4beta13), this variable is not used in
496any code. The only use is to refer to the doc-string from elsewhere.
497
498The key to this alist is the symbol representing token type that
499\\[semantic-flex] returns. These are
500
501 - bol: Empty string matching a beginning of line.
502 This token is produced with
503 `semantic-lex-beginning-of-line'.
504
505 - charquote: String sequences that match `\\s\\+' regexp.
506 This token is produced with `semantic-lex-charquote'.
507
508 - close-paren: Characters that match `\\s)' regexp.
509 These are typically `)', `}', `]', etc.
510 This token is produced with
511 `semantic-lex-close-paren'.
512
513 - comment: A comment chunk. These token types are not
514 produced by default.
515 This token is produced with `semantic-lex-comments'.
516 Comments are ignored with `semantic-lex-ignore-comments'.
517 Comments are treated as whitespace with
518 `semantic-lex-comments-as-whitespace'.
519
520 - newline Characters matching `\\s-*\\(\n\\|\\s>\\)' regexp.
521 This token is produced with `semantic-lex-newline'.
522
523 - open-paren: Characters that match `\\s(' regexp.
524 These are typically `(', `{', `[', etc.
525 If `semantic-lex-paren-or-list' is used,
526 then `open-paren' is not usually generated unless
527 the `depth' argument to \\[semantic-lex] is
528 greater than 0.
529 This token is always produced if the analyzer
530 `semantic-lex-open-paren' is used.
531
532 - punctuation: Characters matching `{\\(\\s.\\|\\s$\\|\\s'\\)'
533 regexp.
534 This token is produced with `semantic-lex-punctuation'.
535 Always specify this analyzer after the comment
536 analyzer.
537
538 - semantic-list: String delimited by matching parenthesis, braces,
539 etc. that the lexer skipped over, because the
540 `depth' parameter to \\[semantic-flex] was not high
541 enough.
542 This token is produced with `semantic-lex-paren-or-list'.
543
544 - string: Quoted strings, i.e., string sequences that start
545 and end with characters matching `\\s\"'
546 regexp. The lexer relies on @code{forward-sexp} to
547 find the matching end.
548 This token is produced with `semantic-lex-string'.
549
550 - symbol: String sequences that match `\\(\\sw\\|\\s_\\)+'
551 regexp.
552 This token is produced with
553 `semantic-lex-symbol-or-keyword'. Always add this analyzer
554 after `semantic-lex-number', or other analyzers that
555 match its regular expression.
556
557 - whitespace: Characters that match `\\s-+' regexp.
558 This token is produced with `semantic-lex-whitespace'.")
559
560(defvar semantic-lex-syntax-modifications nil
561 "Changes to the syntax table for this buffer.
562These changes are active only while the buffer is being flexed.
563This is a list where each element has the form:
564 (CHAR CLASS)
565CHAR is the char passed to `modify-syntax-entry',
566and CLASS is the string also passed to `modify-syntax-entry' to define
567what syntax class CHAR has.")
568(make-variable-buffer-local 'semantic-lex-syntax-modifications)
569
570(defvar semantic-lex-syntax-table nil
571 "Syntax table used by lexical analysis.
572See also `semantic-lex-syntax-modifications'.")
573(make-variable-buffer-local 'semantic-lex-syntax-table)
574
575(defvar semantic-lex-comment-regex nil
576 "Regular expression for identifying comment start during lexical analysis.
577This may be automatically set when semantic initializes in a mode, but
9bf6c65c 578may need to be overridden for some special languages.")
57e622d9
CY
579(make-variable-buffer-local 'semantic-lex-comment-regex)
580
581(defvar semantic-lex-number-expression
582 ;; This expression was written by David Ponce for Java, and copied
583 ;; here for C and any other similar language.
584 (eval-when-compile
585 (concat "\\("
586 "\\<[0-9]+[.][0-9]+\\([eE][-+]?[0-9]+\\)?[fFdD]?\\>"
587 "\\|"
588 "\\<[0-9]+[.][eE][-+]?[0-9]+[fFdD]?\\>"
589 "\\|"
590 "\\<[0-9]+[.][fFdD]\\>"
591 "\\|"
592 "\\<[0-9]+[.]"
593 "\\|"
594 "[.][0-9]+\\([eE][-+]?[0-9]+\\)?[fFdD]?\\>"
595 "\\|"
596 "\\<[0-9]+[eE][-+]?[0-9]+[fFdD]?\\>"
597 "\\|"
598 "\\<0[xX][0-9a-fA-F]+[lL]?\\>"
599 "\\|"
600 "\\<[0-9]+[lLfFdD]?\\>"
601 "\\)"
602 ))
603 "Regular expression for matching a number.
604If this value is nil, no number extraction is done during lex.
605This expression tries to match C and Java like numbers.
606
607DECIMAL_LITERAL:
608 [1-9][0-9]*
609 ;
610HEX_LITERAL:
611 0[xX][0-9a-fA-F]+
612 ;
613OCTAL_LITERAL:
614 0[0-7]*
615 ;
616INTEGER_LITERAL:
617 <DECIMAL_LITERAL>[lL]?
618 | <HEX_LITERAL>[lL]?
619 | <OCTAL_LITERAL>[lL]?
620 ;
621EXPONENT:
622 [eE][+-]?[09]+
623 ;
624FLOATING_POINT_LITERAL:
625 [0-9]+[.][0-9]*<EXPONENT>?[fFdD]?
626 | [.][0-9]+<EXPONENT>?[fFdD]?
627 | [0-9]+<EXPONENT>[fFdD]?
628 | [0-9]+<EXPONENT>?[fFdD]
629 ;")
630(make-variable-buffer-local 'semantic-lex-number-expression)
631
632(defvar semantic-lex-depth 0
633 "Default lexing depth.
634This specifies how many lists to create tokens in.")
635(make-variable-buffer-local 'semantic-lex-depth)
636
637(defvar semantic-lex-unterminated-syntax-end-function
638 (lambda (syntax syntax-start lex-end) lex-end)
639 "Function called when unterminated syntax is encountered.
640This should be set to one function. That function should take three
641parameters. The SYNTAX, or type of syntax which is unterminated.
642SYNTAX-START where the broken syntax begins.
643LEX-END is where the lexical analysis was asked to end.
644This function can be used for languages that can intelligently fix up
645broken syntax, or the exit lexical analysis via `throw' or `signal'
646when finding unterminated syntax.")
647
648;;; Interactive testing commands
649
55b522b2
CY
650(declare-function semantic-elapsed-time "semantic")
651
57e622d9
CY
652(defun semantic-lex-test (arg)
653 "Test the semantic lexer in the current buffer.
654If universal argument ARG, then try the whole buffer."
655 (interactive "P")
55b522b2 656 (require 'semantic)
57e622d9
CY
657 (let* ((start (current-time))
658 (result (semantic-lex
659 (if arg (point-min) (point))
660 (point-max)))
661 (end (current-time)))
662 (message "Elapsed Time: %.2f seconds."
663 (semantic-elapsed-time start end))
664 (pop-to-buffer "*Lexer Output*")
665 (require 'pp)
666 (erase-buffer)
667 (insert (pp-to-string result))
668 (goto-char (point-min))
669 ))
670
57e622d9
CY
671(defvar semantic-lex-debug nil
672 "When non-nil, debug the local lexical analyzer.")
673
674(defun semantic-lex-debug (arg)
675 "Debug the semantic lexer in the current buffer.
676Argument ARG specifies of the analyze the whole buffer, or start at point.
677While engaged, each token identified by the lexer will be highlighted
678in the target buffer A description of the current token will be
679displayed in the minibuffer. Press SPC to move to the next lexical token."
680 (interactive "P")
681 (require 'semantic/debug)
682 (let ((semantic-lex-debug t))
683 (semantic-lex-test arg)))
684
685(defun semantic-lex-highlight-token (token)
686 "Highlight the lexical TOKEN.
687TOKEN is a lexical token with a START And END position.
688Return the overlay."
689 (let ((o (semantic-make-overlay (semantic-lex-token-start token)
690 (semantic-lex-token-end token))))
691 (semantic-overlay-put o 'face 'highlight)
692 o))
693
57e622d9
CY
694;;; Lexical analyzer creation
695;;
696;; Code for creating a lex function from lists of analyzers.
697;;
698;; A lexical analyzer is created from a list of individual analyzers.
699;; Each individual analyzer specifies a single match, and code that
700;; goes with it.
701;;
702;; Creation of an analyzer assembles these analyzers into a new function
703;; with the behaviors of all the individual analyzers.
704;;
705(defmacro semantic-lex-one-token (analyzers)
706 "Calculate one token from the current buffer at point.
707Uses locally bound variables from `define-lex'.
708Argument ANALYZERS is the list of analyzers being used."
709 (cons 'cond (mapcar #'symbol-value analyzers)))
710
711(defvar semantic-lex-end-point nil
712 "The end point as tracked through lexical functions.")
713
714(defvar semantic-lex-current-depth nil
715 "The current depth as tracked through lexical functions.")
716
717(defvar semantic-lex-maximum-depth nil
9bf6c65c 718 "The maximum depth of parenthesis as tracked through lexical functions.")
57e622d9
CY
719
720(defvar semantic-lex-token-stream nil
721 "The current token stream we are collecting.")
722
723(defvar semantic-lex-analysis-bounds nil
724 "The bounds of the current analysis.")
725
726(defvar semantic-lex-block-streams nil
727 "Streams of tokens inside collapsed blocks.
728This is an alist of (ANCHOR . STREAM) elements where ANCHOR is the
729start position of the block, and STREAM is the list of tokens in that
730block.")
731
d1069532
SM
732(define-obsolete-variable-alias 'semantic-lex-reset-hooks
733 'semantic-lex-reset-functions "24.3")
734(defvar semantic-lex-reset-functions nil
29e1a603
CY
735 "Abnormal hook used by major-modes to reset lexical analyzers.
736Hook functions are called with START and END values for the
737current lexical pass. Should be set with `add-hook', specifying
738a LOCAL option.")
57e622d9
CY
739
740;; Stack of nested blocks.
741(defvar semantic-lex-block-stack nil)
742;;(defvar semantic-lex-timeout 5
743;; "*Number of sections of lexing before giving up.")
744
62a81506
CY
745(defsubst semantic-lex-debug-break (token)
746 "Break during lexical analysis at TOKEN."
747 (when semantic-lex-debug
748 (let ((o nil))
749 (unwind-protect
750 (progn
751 (when token
752 (setq o (semantic-lex-highlight-token token)))
753 (semantic-read-event
754 (format "%S :: Depth: %d :: SPC - continue" token semantic-lex-current-depth))
755 )
756 (when o
757 (semantic-overlay-delete o))))))
758
57e622d9
CY
759(defmacro define-lex (name doc &rest analyzers)
760 "Create a new lexical analyzer with NAME.
761DOC is a documentation string describing this analyzer.
762ANALYZERS are small code snippets of analyzers to use when
763building the new NAMED analyzer. Only use analyzers which
764are written to be used in `define-lex'.
765Each analyzer should be an analyzer created with `define-lex-analyzer'.
766Note: The order in which analyzers are listed is important.
767If two analyzers can match the same text, it is important to order the
768analyzers so that the one you want to match first occurs first. For
9bf6c65c 769example, it is good to put a number analyzer in front of a symbol
57e622d9
CY
770analyzer which might mistake a number for as a symbol."
771 `(defun ,name (start end &optional depth length)
772 ,(concat doc "\nSee `semantic-lex' for more information.")
773 ;; Make sure the state of block parsing starts over.
774 (setq semantic-lex-block-streams nil)
775 ;; Allow specialty reset items.
d1069532 776 (run-hook-with-args 'semantic-lex-reset-functions start end)
57e622d9
CY
777 ;; Lexing state.
778 (let* (;(starttime (current-time))
779 (starting-position (point))
780 (semantic-lex-token-stream nil)
781 (semantic-lex-block-stack nil)
782 (tmp-start start)
783 (semantic-lex-end-point start)
784 (semantic-lex-current-depth 0)
785 ;; Use the default depth when not specified.
786 (semantic-lex-maximum-depth
787 (or depth semantic-lex-depth))
788 ;; Bounds needed for unterminated syntax
789 (semantic-lex-analysis-bounds (cons start end))
790 ;; This entry prevents text properties from
791 ;; confusing our lexical analysis. See Emacs 22 (CVS)
792 ;; version of C++ mode with template hack text properties.
793 (parse-sexp-lookup-properties nil)
794 )
795 ;; Maybe REMOVE THIS LATER.
796 ;; Trying to find incremental parser bug.
797 (when (> end (point-max))
798 (error ,(format "%s: end (%%d) > point-max (%%d)" name)
799 end (point-max)))
800 (with-syntax-table semantic-lex-syntax-table
801 (goto-char start)
802 (while (and (< (point) end)
803 (or (not length)
804 (<= (length semantic-lex-token-stream) length)))
805 (semantic-lex-one-token ,analyzers)
806 (when (eq semantic-lex-end-point tmp-start)
807 (error ,(format "%s: endless loop at %%d, after %%S" name)
808 tmp-start (car semantic-lex-token-stream)))
809 (setq tmp-start semantic-lex-end-point)
810 (goto-char semantic-lex-end-point)
811 ;;(when (> (semantic-elapsed-time starttime (current-time))
812 ;; semantic-lex-timeout)
813 ;; (error "Timeout during lex at char %d" (point)))
814 (semantic-throw-on-input 'lex)
815 (semantic-lex-debug-break (car semantic-lex-token-stream))
816 ))
817 ;; Check that there is no unterminated block.
818 (when semantic-lex-block-stack
819 (let* ((last (pop semantic-lex-block-stack))
820 (blk last))
821 (while blk
822 (message
823 ,(format "%s: `%%s' block from %%S is unterminated" name)
824 (car blk) (cadr blk))
825 (setq blk (pop semantic-lex-block-stack)))
826 (semantic-lex-unterminated-syntax-detected (car last))))
827 ;; Return to where we started.
828 ;; Do not wrap in protective stuff so that if there is an error
829 ;; thrown, the user knows where.
830 (goto-char starting-position)
831 ;; Return the token stream
832 (nreverse semantic-lex-token-stream))))
833\f
834;;; Collapsed block tokens delimited by any tokens.
835;;
836(defun semantic-lex-start-block (syntax)
837 "Mark the last read token as the beginning of a SYNTAX block."
838 (if (or (not semantic-lex-maximum-depth)
839 (< semantic-lex-current-depth semantic-lex-maximum-depth))
840 (setq semantic-lex-current-depth (1+ semantic-lex-current-depth))
841 (push (list syntax (car semantic-lex-token-stream))
842 semantic-lex-block-stack)))
843
844(defun semantic-lex-end-block (syntax)
845 "Process the end of a previously marked SYNTAX block.
846That is, collapse the tokens inside that block, including the
847beginning and end of block tokens, into a high level block token of
848class SYNTAX.
849The token at beginning of block is the one marked by a previous call
850to `semantic-lex-start-block'. The current token is the end of block.
851The collapsed tokens are saved in `semantic-lex-block-streams'."
852 (if (null semantic-lex-block-stack)
853 (setq semantic-lex-current-depth (1- semantic-lex-current-depth))
854 (let* ((stream semantic-lex-token-stream)
855 (blk (pop semantic-lex-block-stack))
856 (bstream (cdr blk))
857 (first (car bstream))
858 (last (pop stream)) ;; The current token mark the EOBLK
859 tok)
860 (if (not (eq (car blk) syntax))
861 ;; SYNTAX doesn't match the syntax of the current block in
862 ;; the stack. So we encountered the end of the SYNTAX block
863 ;; before the end of the current one in the stack which is
864 ;; signaled unterminated.
865 (semantic-lex-unterminated-syntax-detected (car blk))
866 ;; Move tokens found inside the block from the main stream
867 ;; into a separate block stream.
868 (while (and stream (not (eq (setq tok (pop stream)) first)))
869 (push tok bstream))
870 ;; The token marked as beginning of block was not encountered.
871 ;; This should not happen!
872 (or (eq tok first)
873 (error "Token %S not found at beginning of block `%s'"
874 first syntax))
875 ;; Save the block stream for future reuse, to avoid to redo
876 ;; the lexical analysis of the block content!
877 ;; Anchor the block stream with its start position, so we can
878 ;; use: (cdr (assq start semantic-lex-block-streams)) to
879 ;; quickly retrieve the lexical stream associated to a block.
880 (setcar blk (semantic-lex-token-start first))
881 (setcdr blk (nreverse bstream))
882 (push blk semantic-lex-block-streams)
883 ;; In the main stream, replace the tokens inside the block by
884 ;; a high level block token of class SYNTAX.
885 (setq semantic-lex-token-stream stream)
886 (semantic-lex-push-token
887 (semantic-lex-token
888 syntax (car blk) (semantic-lex-token-end last)))
889 ))))
890\f
891;;; Lexical token API
892;;
893;; Functions for accessing parts of a token. Use these functions
894;; instead of accessing the list structure directly because the
895;; contents of the lexical may change.
896;;
897(defmacro semantic-lex-token (symbol start end &optional str)
898 "Create a lexical token.
899SYMBOL is a symbol representing the class of syntax found.
900START and END define the bounds of the token in the current buffer.
a30e71ae
JB
901Optional STR is the string for the token only if the bounds in
902the buffer do not cover the string they represent. (As from
57e622d9 903macro expansion.)"
5a89f0a7 904 ;; This if statement checks the existence of a STR argument at
57e622d9
CY
905 ;; compile time, where STR is some symbol or constant. If the
906 ;; variable STr (runtime) is nil, this will make an incorrect decision.
907 ;;
908 ;; It is like this to maintain the original speed of the compiled
909 ;; code.
910 (if str
911 `(cons ,symbol (cons ,str (cons ,start ,end)))
912 `(cons ,symbol (cons ,start ,end))))
913
914(defun semantic-lex-token-p (thing)
915 "Return non-nil if THING is a semantic lex token.
916This is an exhaustively robust check."
917 (and (consp thing)
918 (symbolp (car thing))
919 (or (and (numberp (nth 1 thing))
920 (numberp (nthcdr 2 thing)))
921 (and (stringp (nth 1 thing))
922 (numberp (nth 2 thing))
923 (numberp (nthcdr 3 thing)))
924 ))
925 )
926
927(defun semantic-lex-token-with-text-p (thing)
928 "Return non-nil if THING is a semantic lex token.
929This is an exhaustively robust check."
930 (and (consp thing)
931 (symbolp (car thing))
932 (= (length thing) 4)
933 (stringp (nth 1 thing))
934 (numberp (nth 2 thing))
935 (numberp (nth 3 thing)))
936 )
937
938(defun semantic-lex-token-without-text-p (thing)
939 "Return non-nil if THING is a semantic lex token.
940This is an exhaustively robust check."
941 (and (consp thing)
942 (symbolp (car thing))
943 (= (length thing) 3)
944 (numberp (nth 1 thing))
945 (numberp (nth 2 thing)))
946 )
947
55b522b2
CY
948(eval-and-compile
949
57e622d9
CY
950(defun semantic-lex-expand-block-specs (specs)
951 "Expand block specifications SPECS into a Lisp form.
952SPECS is a list of (BLOCK BEGIN END) elements where BLOCK, BEGIN, and
953END are token class symbols that indicate to produce one collapsed
954BLOCK token from tokens found between BEGIN and END ones.
955BLOCK must be a non-nil symbol, and at least one of the BEGIN or END
956symbols must be non-nil too.
957When BEGIN is non-nil, generate a call to `semantic-lex-start-block'
958when a BEGIN token class is encountered.
959When END is non-nil, generate a call to `semantic-lex-end-block' when
960an END token class is encountered."
961 (let ((class (make-symbol "class"))
962 (form nil))
963 (dolist (spec specs)
964 (when (car spec)
965 (when (nth 1 spec)
966 (push `((eq ',(nth 1 spec) ,class)
967 (semantic-lex-start-block ',(car spec)))
968 form))
969 (when (nth 2 spec)
970 (push `((eq ',(nth 2 spec) ,class)
971 (semantic-lex-end-block ',(car spec)))
972 form))))
973 (when form
974 `((let ((,class (semantic-lex-token-class
975 (car semantic-lex-token-stream))))
976 (cond ,@(nreverse form))))
977 )))
55b522b2 978)
57e622d9
CY
979
980(defmacro semantic-lex-push-token (token &rest blockspecs)
981 "Push TOKEN in the lexical analyzer token stream.
982Return the lexical analysis current end point.
983If optional arguments BLOCKSPECS is non-nil, it specifies to process
984collapsed block tokens. See `semantic-lex-expand-block-specs' for
985more details.
986This macro should only be called within the bounds of
987`define-lex-analyzer'. It changes the values of the lexical analyzer
988variables `token-stream' and `semantic-lex-end-point'. If you need to
989move `semantic-lex-end-point' somewhere else, just modify this
990variable after calling `semantic-lex-push-token'."
991 `(progn
992 (push ,token semantic-lex-token-stream)
993 ,@(semantic-lex-expand-block-specs blockspecs)
994 (setq semantic-lex-end-point
995 (semantic-lex-token-end (car semantic-lex-token-stream)))
996 ))
997
998(defsubst semantic-lex-token-class (token)
999 "Fetch the class of the lexical token TOKEN.
1000See also the function `semantic-lex-token'."
1001 (car token))
1002
57e622d9
CY
1003(defsubst semantic-lex-token-text (token)
1004 "Fetch the text associated with the lexical token TOKEN.
1005See also the function `semantic-lex-token'."
1006 (if (stringp (car (cdr token)))
1007 (car (cdr token))
1008 (buffer-substring-no-properties
1009 (semantic-lex-token-start token)
1010 (semantic-lex-token-end token))))
1011
1012(defun semantic-lex-init ()
1013 "Initialize any lexical state for this buffer."
1014 (unless semantic-lex-comment-regex
1015 (setq semantic-lex-comment-regex
1016 (if comment-start-skip
1017 (concat "\\(\\s<\\|" comment-start-skip "\\)")
1018 "\\(\\s<\\)")))
1019 ;; Setup the lexer syntax-table
1020 (setq semantic-lex-syntax-table (copy-syntax-table (syntax-table)))
1021 (dolist (mod semantic-lex-syntax-modifications)
1022 (modify-syntax-entry
1023 (car mod) (nth 1 mod) semantic-lex-syntax-table)))
1024
55b522b2 1025;;;###autoload
57e622d9
CY
1026(define-overloadable-function semantic-lex (start end &optional depth length)
1027 "Lexically analyze text in the current buffer between START and END.
1028Optional argument DEPTH indicates at what level to scan over entire
1029lists. The last argument, LENGTH specifies that `semantic-lex'
1030should only return LENGTH tokens. The return value is a token stream.
1031Each element is a list, such of the form
1032 (symbol start-expression . end-expression)
1033where SYMBOL denotes the token type.
1034See `semantic-lex-tokens' variable for details on token types. END
1035does not mark the end of the text scanned, only the end of the
1036beginning of text scanned. Thus, if a string extends past END, the
1037end of the return token will be larger than END. To truly restrict
1038scanning, use `narrow-to-region'."
1039 (funcall semantic-lex-analyzer start end depth length))
1040
1041(defsubst semantic-lex-buffer (&optional depth)
1042 "Lex the current buffer.
1043Optional argument DEPTH is the depth to scan into lists."
1044 (semantic-lex (point-min) (point-max) depth))
1045
1046(defsubst semantic-lex-list (semlist depth)
1047 "Lex the body of SEMLIST to DEPTH."
1048 (semantic-lex (semantic-lex-token-start semlist)
1049 (semantic-lex-token-end semlist)
1050 depth))
1051\f
1052;;; Analyzer creation macros
1053;;
1054;; An individual analyzer is a condition and code that goes with it.
1055;;
1056;; Created analyzers become variables with the code associated with them
1057;; as the symbol value. These analyzers are assembled into a lexer
1058;; to create new lexical analyzers.
57e622d9
CY
1059
1060(defcustom semantic-lex-debug-analyzers nil
1061 "Non nil means to debug analyzers with syntax protection.
1062Only in effect if `debug-on-error' is also non-nil."
1063 :group 'semantic
1064 :type 'boolean)
1065
1066(defmacro semantic-lex-unterminated-syntax-protection (syntax &rest forms)
1067 "For SYNTAX, execute FORMS with protection for unterminated syntax.
1068If FORMS throws an error, treat this as a syntax problem, and
1069execute the unterminated syntax code. FORMS should return a position.
9bf6c65c 1070Irregardless of an error, the cursor should be moved to the end of
57e622d9
CY
1071the desired syntax, and a position returned.
1072If `debug-on-error' is set, errors are not caught, so that you can
1073debug them.
1074Avoid using a large FORMS since it is duplicated."
1075 `(if (and debug-on-error semantic-lex-debug-analyzers)
1076 (progn ,@forms)
1077 (condition-case nil
1078 (progn ,@forms)
1079 (error
1080 (semantic-lex-unterminated-syntax-detected ,syntax)))))
1081(put 'semantic-lex-unterminated-syntax-protection
1082 'lisp-indent-function 1)
1083
1084(defmacro define-lex-analyzer (name doc condition &rest forms)
1085 "Create a single lexical analyzer NAME with DOC.
1086When an analyzer is called, the current buffer and point are
1087positioned in a buffer at the location to be analyzed.
1088CONDITION is an expression which returns t if FORMS should be run.
1089Within the bounds of CONDITION and FORMS, the use of backquote
1090can be used to evaluate expressions at compile time.
1091While forms are running, the following variables will be locally bound:
1092 `semantic-lex-analysis-bounds' - The bounds of the current analysis.
1093 of the form (START . END)
1094 `semantic-lex-maximum-depth' - The maximum depth of semantic-list
1095 for the current analysis.
1096 `semantic-lex-current-depth' - The current depth of `semantic-list' that has
9bf6c65c 1097 been descended.
57e622d9
CY
1098 `semantic-lex-end-point' - End Point after match.
1099 Analyzers should set this to a buffer location if their
1100 match string does not represent the end of the matched text.
1101 `semantic-lex-token-stream' - The token list being collected.
1102 Add new lexical tokens to this list.
1103Proper action in FORMS is to move the value of `semantic-lex-end-point' to
1104after the location of the analyzed entry, and to add any discovered tokens
1105at the beginning of `semantic-lex-token-stream'.
1106This can be done by using `semantic-lex-push-token'."
1107 `(eval-and-compile
1108 (defvar ,name nil ,doc)
1109 (defun ,name nil)
1110 ;; Do this part separately so that re-evaluation rebuilds this code.
1111 (setq ,name '(,condition ,@forms))
1112 ;; Build a single lexical analyzer function, so the doc for
1113 ;; function help is automatically provided, and perhaps the
1114 ;; function could be useful for testing and debugging one
1115 ;; analyzer.
1116 (fset ',name (lambda () ,doc
1117 (let ((semantic-lex-token-stream nil)
1118 (semantic-lex-end-point (point))
1119 (semantic-lex-analysis-bounds
1120 (cons (point) (point-max)))
1121 (semantic-lex-current-depth 0)
1122 (semantic-lex-maximum-depth
1123 semantic-lex-depth)
1124 )
1125 (when ,condition ,@forms)
1126 semantic-lex-token-stream)))
1127 ))
1128
1129(defmacro define-lex-regex-analyzer (name doc regexp &rest forms)
1130 "Create a lexical analyzer with NAME and DOC that will match REGEXP.
1131FORMS are evaluated upon a successful match.
1132See `define-lex-analyzer' for more about analyzers."
1133 `(define-lex-analyzer ,name
1134 ,doc
1135 (looking-at ,regexp)
1136 ,@forms
1137 ))
1138
1139(defmacro define-lex-simple-regex-analyzer (name doc regexp toksym
1140 &optional index
1141 &rest forms)
1142 "Create a lexical analyzer with NAME and DOC that match REGEXP.
1143TOKSYM is the symbol to use when creating a semantic lexical token.
1144INDEX is the index into the match that defines the bounds of the token.
1145Index should be a plain integer, and not specified in the macro as an
1146expression.
1147FORMS are evaluated upon a successful match BEFORE the new token is
1148created. It is valid to ignore FORMS.
1149See `define-lex-analyzer' for more about analyzers."
1150 `(define-lex-analyzer ,name
1151 ,doc
1152 (looking-at ,regexp)
1153 ,@forms
1154 (semantic-lex-push-token
1155 (semantic-lex-token ,toksym
1156 (match-beginning ,(or index 0))
1157 (match-end ,(or index 0))))
1158 ))
1159
1160(defmacro define-lex-block-analyzer (name doc spec1 &rest specs)
1161 "Create a lexical analyzer NAME for paired delimiters blocks.
1162It detects a paired delimiters block or the corresponding open or
1163close delimiter depending on the value of the variable
1164`semantic-lex-current-depth'. DOC is the documentation string of the lexical
1165analyzer. SPEC1 and SPECS specify the token symbols and open, close
1166delimiters used. Each SPEC has the form:
1167
1168\(BLOCK-SYM (OPEN-DELIM OPEN-SYM) (CLOSE-DELIM CLOSE-SYM))
1169
1170where BLOCK-SYM is the symbol returned in a block token. OPEN-DELIM
1171and CLOSE-DELIM are respectively the open and close delimiters
1172identifying a block. OPEN-SYM and CLOSE-SYM are respectively the
1173symbols returned in open and close tokens."
1174 (let ((specs (cons spec1 specs))
1175 spec open olist clist)
1176 (while specs
1177 (setq spec (car specs)
1178 specs (cdr specs)
1179 open (nth 1 spec)
1180 ;; build alist ((OPEN-DELIM OPEN-SYM BLOCK-SYM) ...)
1181 olist (cons (list (car open) (cadr open) (car spec)) olist)
1182 ;; build alist ((CLOSE-DELIM CLOSE-SYM) ...)
1183 clist (cons (nth 2 spec) clist)))
1184 `(define-lex-analyzer ,name
1185 ,doc
1186 (and
1187 (looking-at "\\(\\s(\\|\\s)\\)")
1188 (let ((text (match-string 0)) match)
1189 (cond
1190 ((setq match (assoc text ',olist))
1191 (if (or (not semantic-lex-maximum-depth)
1192 (< semantic-lex-current-depth semantic-lex-maximum-depth))
1193 (progn
1194 (setq semantic-lex-current-depth (1+ semantic-lex-current-depth))
1195 (semantic-lex-push-token
1196 (semantic-lex-token
1197 (nth 1 match)
1198 (match-beginning 0) (match-end 0))))
1199 (semantic-lex-push-token
1200 (semantic-lex-token
1201 (nth 2 match)
1202 (match-beginning 0)
1203 (save-excursion
1204 (semantic-lex-unterminated-syntax-protection (nth 2 match)
1205 (forward-list 1)
1206 (point)))
1207 ))
1208 ))
1209 ((setq match (assoc text ',clist))
62a81506
CY
1210 (if (> semantic-lex-current-depth 0)
1211 (progn
1212 (setq semantic-lex-current-depth (1- semantic-lex-current-depth))
1213 (semantic-lex-push-token
1214 (semantic-lex-token
1215 (nth 1 match)
1216 (match-beginning 0) (match-end 0)))))))))
57e622d9
CY
1217 )))
1218\f
1219;;; Analyzers
1220;;
1221;; Pre-defined common analyzers.
1222;;
1223(define-lex-analyzer semantic-lex-default-action
1224 "The default action when no other lexical actions match text.
1225This action will just throw an error."
1226 t
1227 (error "Unmatched Text during Lexical Analysis"))
1228
1229(define-lex-analyzer semantic-lex-beginning-of-line
1230 "Detect and create a beginning of line token (BOL)."
1231 (and (bolp)
1232 ;; Just insert a (bol N . N) token in the token stream,
1233 ;; without moving the point. N is the point at the
1234 ;; beginning of line.
1235 (semantic-lex-push-token (semantic-lex-token 'bol (point) (point)))
1236 nil) ;; CONTINUE
1237 ;; We identify and add the BOL token onto the stream, but since
1238 ;; semantic-lex-end-point doesn't move, we always fail CONDITION, and have no
1239 ;; FORMS body.
1240 nil)
1241
1242(define-lex-simple-regex-analyzer semantic-lex-newline
1243 "Detect and create newline tokens."
1244 "\\s-*\\(\n\\|\\s>\\)" 'newline 1)
1245
1246(define-lex-regex-analyzer semantic-lex-newline-as-whitespace
1247 "Detect and create newline tokens.
1248Use this ONLY if newlines are not whitespace characters (such as when
1249they are comment end characters) AND when you want whitespace tokens."
1250 "\\s-*\\(\n\\|\\s>\\)"
1251 ;; Language wants whitespaces. Create a token for it.
1252 (if (eq (semantic-lex-token-class (car semantic-lex-token-stream))
1253 'whitespace)
1254 ;; Merge whitespace tokens together if they are adjacent. Two
c80e3b4a 1255 ;; whitespace tokens may be separated by a comment which is not in
57e622d9
CY
1256 ;; the token stream.
1257 (setcdr (semantic-lex-token-bounds (car semantic-lex-token-stream))
1258 (match-end 0))
1259 (semantic-lex-push-token
1260 (semantic-lex-token
1261 'whitespace (match-beginning 0) (match-end 0)))))
1262
1263(define-lex-regex-analyzer semantic-lex-ignore-newline
1264 "Detect and ignore newline tokens.
1265Use this ONLY if newlines are not whitespace characters (such as when
1266they are comment end characters)."
1267 "\\s-*\\(\n\\|\\s>\\)"
1268 (setq semantic-lex-end-point (match-end 0)))
1269
1270(define-lex-regex-analyzer semantic-lex-whitespace
1271 "Detect and create whitespace tokens."
1272 ;; catch whitespace when needed
1273 "\\s-+"
1274 ;; Language wants whitespaces. Create a token for it.
1275 (if (eq (semantic-lex-token-class (car semantic-lex-token-stream))
1276 'whitespace)
1277 ;; Merge whitespace tokens together if they are adjacent. Two
c80e3b4a 1278 ;; whitespace tokens may be separated by a comment which is not in
57e622d9
CY
1279 ;; the token stream.
1280 (progn
1281 (setq semantic-lex-end-point (match-end 0))
1282 (setcdr (semantic-lex-token-bounds (car semantic-lex-token-stream))
1283 semantic-lex-end-point))
1284 (semantic-lex-push-token
1285 (semantic-lex-token
1286 'whitespace (match-beginning 0) (match-end 0)))))
1287
1288(define-lex-regex-analyzer semantic-lex-ignore-whitespace
1289 "Detect and skip over whitespace tokens."
1290 ;; catch whitespace when needed
1291 "\\s-+"
1292 ;; Skip over the detected whitespace, do not create a token for it.
1293 (setq semantic-lex-end-point (match-end 0)))
1294
1295(define-lex-simple-regex-analyzer semantic-lex-number
1296 "Detect and create number tokens.
1297See `semantic-lex-number-expression' for details on matching numbers,
1298and number formats."
1299 semantic-lex-number-expression 'number)
1300
1301(define-lex-regex-analyzer semantic-lex-symbol-or-keyword
1302 "Detect and create symbol and keyword tokens."
1303 "\\(\\sw\\|\\s_\\)+"
1304 (semantic-lex-push-token
1305 (semantic-lex-token
1306 (or (semantic-lex-keyword-p (match-string 0)) 'symbol)
1307 (match-beginning 0) (match-end 0))))
1308
1309(define-lex-simple-regex-analyzer semantic-lex-charquote
1310 "Detect and create charquote tokens."
1311 ;; Character quoting characters (ie, \n as newline)
1312 "\\s\\+" 'charquote)
1313
1314(define-lex-simple-regex-analyzer semantic-lex-punctuation
1315 "Detect and create punctuation tokens."
1316 "\\(\\s.\\|\\s$\\|\\s'\\)" 'punctuation)
1317
1318(define-lex-analyzer semantic-lex-punctuation-type
1319 "Detect and create a punctuation type token.
e4920bc9 1320Recognized punctuation is defined in the current table of lexical
57e622d9
CY
1321types, as the value of the `punctuation' token type."
1322 (and (looking-at "\\(\\s.\\|\\s$\\|\\s'\\)+")
1323 (let* ((key (match-string 0))
1324 (pos (match-beginning 0))
1325 (end (match-end 0))
1326 (len (- end pos))
1327 (lst (semantic-lex-type-value "punctuation" t))
1328 (def (car lst)) ;; default lexical symbol or nil
1329 (lst (cdr lst)) ;; alist of (LEX-SYM . PUNCT-STRING)
1330 (elt nil))
1331 (if lst
1332 ;; Starting with the longest one, search if the
1333 ;; punctuation string is defined for this language.
1334 (while (and (> len 0) (not (setq elt (rassoc key lst))))
1335 (setq len (1- len)
1336 key (substring key 0 len))))
1337 (if elt ;; Return the punctuation token found
1338 (semantic-lex-push-token
1339 (semantic-lex-token (car elt) pos (+ pos len)))
1340 (if def ;; Return a default generic token
1341 (semantic-lex-push-token
1342 (semantic-lex-token def pos end))
1343 ;; Nothing match
1344 )))))
1345
1346(define-lex-regex-analyzer semantic-lex-paren-or-list
1347 "Detect open parenthesis.
1348Return either a paren token or a semantic list token depending on
1349`semantic-lex-current-depth'."
1350 "\\s("
1351 (if (or (not semantic-lex-maximum-depth)
1352 (< semantic-lex-current-depth semantic-lex-maximum-depth))
1353 (progn
1354 (setq semantic-lex-current-depth (1+ semantic-lex-current-depth))
1355 (semantic-lex-push-token
1356 (semantic-lex-token
1357 'open-paren (match-beginning 0) (match-end 0))))
1358 (semantic-lex-push-token
1359 (semantic-lex-token
1360 'semantic-list (match-beginning 0)
1361 (save-excursion
1362 (semantic-lex-unterminated-syntax-protection 'semantic-list
1363 (forward-list 1)
1364 (point))
1365 )))
1366 ))
1367
1368(define-lex-simple-regex-analyzer semantic-lex-open-paren
e1dbe924 1369 "Detect and create an open parenthesis token."
57e622d9
CY
1370 "\\s(" 'open-paren 0 (setq semantic-lex-current-depth (1+ semantic-lex-current-depth)))
1371
1372(define-lex-simple-regex-analyzer semantic-lex-close-paren
e1dbe924 1373 "Detect and create a close parenthesis token."
57e622d9
CY
1374 "\\s)" 'close-paren 0 (setq semantic-lex-current-depth (1- semantic-lex-current-depth)))
1375
1376(define-lex-regex-analyzer semantic-lex-string
1377 "Detect and create a string token."
1378 "\\s\""
1379 ;; Zing to the end of this string.
1380 (semantic-lex-push-token
1381 (semantic-lex-token
1382 'string (point)
1383 (save-excursion
1384 (semantic-lex-unterminated-syntax-protection 'string
1385 (forward-sexp 1)
1386 (point))
1387 ))))
1388
1389(define-lex-regex-analyzer semantic-lex-comments
1390 "Detect and create a comment token."
1391 semantic-lex-comment-regex
1392 (save-excursion
1393 (forward-comment 1)
1394 ;; Generate newline token if enabled
1395 (if (bolp) (backward-char 1))
1396 (setq semantic-lex-end-point (point))
1397 ;; Language wants comments or want them as whitespaces,
1398 ;; link them together.
1399 (if (eq (semantic-lex-token-class (car semantic-lex-token-stream)) 'comment)
1400 (setcdr (semantic-lex-token-bounds (car semantic-lex-token-stream))
1401 semantic-lex-end-point)
1402 (semantic-lex-push-token
1403 (semantic-lex-token
1404 'comment (match-beginning 0) semantic-lex-end-point)))))
1405
1406(define-lex-regex-analyzer semantic-lex-comments-as-whitespace
1407 "Detect comments and create a whitespace token."
1408 semantic-lex-comment-regex
1409 (save-excursion
1410 (forward-comment 1)
1411 ;; Generate newline token if enabled
1412 (if (bolp) (backward-char 1))
1413 (setq semantic-lex-end-point (point))
1414 ;; Language wants comments or want them as whitespaces,
1415 ;; link them together.
1416 (if (eq (semantic-lex-token-class (car semantic-lex-token-stream)) 'whitespace)
1417 (setcdr (semantic-lex-token-bounds (car semantic-lex-token-stream))
1418 semantic-lex-end-point)
1419 (semantic-lex-push-token
1420 (semantic-lex-token
1421 'whitespace (match-beginning 0) semantic-lex-end-point)))))
1422
1423(define-lex-regex-analyzer semantic-lex-ignore-comments
1424 "Detect and create a comment token."
1425 semantic-lex-comment-regex
1426 (let ((comment-start-point (point)))
1427 (forward-comment 1)
1428 (if (eq (point) comment-start-point)
1429 ;; In this case our start-skip string failed
1430 ;; to work properly. Lets try and move over
1431 ;; whatever white space we matched to begin
1432 ;; with.
9b026d9f 1433 (skip-syntax-forward "-.'" (point-at-eol))
57e622d9
CY
1434 ;; We may need to back up so newlines or whitespace is generated.
1435 (if (bolp)
1436 (backward-char 1)))
1437 (if (eq (point) comment-start-point)
1438 (error "Strange comment syntax prevents lexical analysis"))
1439 (setq semantic-lex-end-point (point))))
1440\f
1441;;; Comment lexer
1442;;
1443;; Predefined lexers that could be used instead of creating new
91af3942 1444;; analyzers.
57e622d9
CY
1445
1446(define-lex semantic-comment-lexer
1447 "A simple lexical analyzer that handles comments.
1448This lexer will only return comment tokens. It is the default lexer
1449used by `semantic-find-doc-snarf-comment' to snarf up the comment at
1450point."
1451 semantic-lex-ignore-whitespace
1452 semantic-lex-ignore-newline
1453 semantic-lex-comments
1454 semantic-lex-default-action)
1455
1456;;; Test Lexer
1457;;
1458(define-lex semantic-simple-lexer
1459 "A simple lexical analyzer that handles simple buffers.
1460This lexer ignores comments and whitespace, and will return
1461syntax as specified by the syntax table."
1462 semantic-lex-ignore-whitespace
1463 semantic-lex-ignore-newline
1464 semantic-lex-number
1465 semantic-lex-symbol-or-keyword
1466 semantic-lex-charquote
1467 semantic-lex-paren-or-list
1468 semantic-lex-close-paren
1469 semantic-lex-string
1470 semantic-lex-ignore-comments
1471 semantic-lex-punctuation
1472 semantic-lex-default-action)
1473\f
1474;;; Analyzers generated from grammar.
1475;;
1476;; Some analyzers are hand written. Analyzers created with these
1477;; functions are generated from the grammar files.
1478
1479(defmacro define-lex-keyword-type-analyzer (name doc syntax)
1480 "Define a keyword type analyzer NAME with DOC string.
1481SYNTAX is the regexp that matches a keyword syntactic expression."
1482 (let ((key (make-symbol "key")))
1483 `(define-lex-analyzer ,name
1484 ,doc
1485 (and (looking-at ,syntax)
1486 (let ((,key (semantic-lex-keyword-p (match-string 0))))
1487 (when ,key
1488 (semantic-lex-push-token
1489 (semantic-lex-token
1490 ,key (match-beginning 0) (match-end 0)))))))
1491 ))
1492
1493(defmacro define-lex-sexp-type-analyzer (name doc syntax token)
1494 "Define a sexp type analyzer NAME with DOC string.
1495SYNTAX is the regexp that matches the beginning of the s-expression.
1496TOKEN is the lexical token returned when SYNTAX matches."
1497 `(define-lex-regex-analyzer ,name
1498 ,doc
1499 ,syntax
1500 (semantic-lex-push-token
1501 (semantic-lex-token
1502 ,token (point)
1503 (save-excursion
1504 (semantic-lex-unterminated-syntax-protection ,token
1505 (forward-sexp 1)
1506 (point))))))
1507 )
1508
1509(defmacro define-lex-regex-type-analyzer (name doc syntax matches default)
1510 "Define a regexp type analyzer NAME with DOC string.
1511SYNTAX is the regexp that matches a syntactic expression.
1512MATCHES is an alist of lexical elements used to refine the syntactic
1513expression.
1514DEFAULT is the default lexical token returned when no MATCHES."
1515 (if matches
1516 (let* ((val (make-symbol "val"))
1517 (lst (make-symbol "lst"))
1518 (elt (make-symbol "elt"))
1519 (pos (make-symbol "pos"))
1520 (end (make-symbol "end")))
1521 `(define-lex-analyzer ,name
1522 ,doc
1523 (and (looking-at ,syntax)
1524 (let* ((,val (match-string 0))
1525 (,pos (match-beginning 0))
1526 (,end (match-end 0))
1527 (,lst ,matches)
1528 ,elt)
1529 (while (and ,lst (not ,elt))
1530 (if (string-match (cdar ,lst) ,val)
1531 (setq ,elt (caar ,lst))
1532 (setq ,lst (cdr ,lst))))
1533 (semantic-lex-push-token
1534 (semantic-lex-token (or ,elt ,default) ,pos ,end))))
1535 ))
1536 `(define-lex-simple-regex-analyzer ,name
1537 ,doc
1538 ,syntax ,default)
1539 ))
1540
1541(defmacro define-lex-string-type-analyzer (name doc syntax matches default)
1542 "Define a string type analyzer NAME with DOC string.
1543SYNTAX is the regexp that matches a syntactic expression.
1544MATCHES is an alist of lexical elements used to refine the syntactic
1545expression.
1546DEFAULT is the default lexical token returned when no MATCHES."
1547 (if matches
1548 (let* ((val (make-symbol "val"))
1549 (lst (make-symbol "lst"))
1550 (elt (make-symbol "elt"))
1551 (pos (make-symbol "pos"))
1552 (end (make-symbol "end"))
1553 (len (make-symbol "len")))
1554 `(define-lex-analyzer ,name
1555 ,doc
1556 (and (looking-at ,syntax)
1557 (let* ((,val (match-string 0))
1558 (,pos (match-beginning 0))
1559 (,end (match-end 0))
1560 (,len (- ,end ,pos))
1561 (,lst ,matches)
1562 ,elt)
1563 ;; Starting with the longest one, search if a lexical
1564 ;; value match a token defined for this language.
1565 (while (and (> ,len 0) (not (setq ,elt (rassoc ,val ,lst))))
1566 (setq ,len (1- ,len)
1567 ,val (substring ,val 0 ,len)))
1568 (when ,elt ;; Adjust token end position.
1569 (setq ,elt (car ,elt)
1570 ,end (+ ,pos ,len)))
1571 (semantic-lex-push-token
1572 (semantic-lex-token (or ,elt ,default) ,pos ,end))))
1573 ))
1574 `(define-lex-simple-regex-analyzer ,name
1575 ,doc
1576 ,syntax ,default)
1577 ))
1578
1579(defmacro define-lex-block-type-analyzer (name doc syntax matches)
1580 "Define a block type analyzer NAME with DOC string.
1581
1582SYNTAX is the regexp that matches block delimiters, typically the
1583open (`\\\\s(') and close (`\\\\s)') parenthesis syntax classes.
1584
1585MATCHES is a pair (OPEN-SPECS . CLOSE-SPECS) that defines blocks.
1586
1587 OPEN-SPECS is a list of (OPEN-DELIM OPEN-TOKEN BLOCK-TOKEN) elements
1588 where:
1589
1590 OPEN-DELIM is a string: the block open delimiter character.
1591
1592 OPEN-TOKEN is the lexical token class associated to the OPEN-DELIM
1593 delimiter.
1594
1595 BLOCK-TOKEN is the lexical token class associated to the block
1596 that starts at the OPEN-DELIM delimiter.
1597
1598 CLOSE-SPECS is a list of (CLOSE-DELIM CLOSE-TOKEN) elements where:
1599
1600 CLOSE-DELIM is a string: the block end delimiter character.
1601
1602 CLOSE-TOKEN is the lexical token class associated to the
1603 CLOSE-DELIM delimiter.
1604
1605Each element in OPEN-SPECS must have a corresponding element in
1606CLOSE-SPECS.
1607
1608The lexer will return a BLOCK-TOKEN token when the value of
1609`semantic-lex-current-depth' is greater than or equal to the maximum
1610depth of parenthesis tracking (see also the function `semantic-lex').
1611Otherwise it will return OPEN-TOKEN and CLOSE-TOKEN tokens.
1612
1613TO DO: Put the following in the developer's guide and just put a
1614reference here.
1615
1616In the grammar:
1617
1618The value of a block token must be a string that contains a readable
1619sexp of the form:
1620
1621 \"(OPEN-TOKEN CLOSE-TOKEN)\"
1622
1623OPEN-TOKEN and CLOSE-TOKEN represent the block delimiters, and must be
1624lexical tokens of respectively `open-paren' and `close-paren' types.
1625Their value is the corresponding delimiter character as a string.
1626
1627Here is a small example to analyze a parenthesis block:
1628
1629 %token <block> PAREN_BLOCK \"(LPAREN RPAREN)\"
1630 %token <open-paren> LPAREN \"(\"
1631 %token <close-paren> RPAREN \")\"
1632
1633When the lexer encounters the open-paren delimiter \"(\":
1634
1635 - If the maximum depth of parenthesis tracking is not reached (that
1636 is, current depth < max depth), it returns a (LPAREN start . end)
1637 token, then continue analysis inside the block. Later, when the
1638 corresponding close-paren delimiter \")\" will be encountered, it
1639 will return a (RPAREN start . end) token.
1640
1641 - If the maximum depth of parenthesis tracking is reached (current
1642 depth >= max depth), it returns the whole parenthesis block as
1643 a (PAREN_BLOCK start . end) token."
1644 (let* ((val (make-symbol "val"))
1645 (lst (make-symbol "lst"))
1646 (elt (make-symbol "elt")))
1647 `(define-lex-analyzer ,name
1648 ,doc
1649 (and
1650 (looking-at ,syntax) ;; "\\(\\s(\\|\\s)\\)"
1651 (let ((,val (match-string 0))
1652 (,lst ,matches)
1653 ,elt)
1654 (cond
1655 ((setq ,elt (assoc ,val (car ,lst)))
1656 (if (or (not semantic-lex-maximum-depth)
1657 (< semantic-lex-current-depth semantic-lex-maximum-depth))
1658 (progn
1659 (setq semantic-lex-current-depth (1+ semantic-lex-current-depth))
1660 (semantic-lex-push-token
1661 (semantic-lex-token
1662 (nth 1 ,elt)
1663 (match-beginning 0) (match-end 0))))
1664 (semantic-lex-push-token
1665 (semantic-lex-token
1666 (nth 2 ,elt)
1667 (match-beginning 0)
1668 (save-excursion
1669 (semantic-lex-unterminated-syntax-protection (nth 2 ,elt)
1670 (forward-list 1)
1671 (point)))))))
1672 ((setq ,elt (assoc ,val (cdr ,lst)))
1673 (setq semantic-lex-current-depth (1- semantic-lex-current-depth))
1674 (semantic-lex-push-token
1675 (semantic-lex-token
1676 (nth 1 ,elt)
1677 (match-beginning 0) (match-end 0))))
1678 ))))
1679 ))
1680\f
1681;;; Lexical Safety
1682;;
1683;; The semantic lexers, unlike other lexers, can throw errors on
da6062e6 1684;; unbalanced syntax. Since editing is all about changing text
57e622d9
CY
1685;; we need to provide a convenient way to protect against syntactic
1686;; inequalities.
1687
1688(defmacro semantic-lex-catch-errors (symbol &rest forms)
1689 "Using SYMBOL, execute FORMS catching lexical errors.
1690If FORMS results in a call to the parser that throws a lexical error,
1691the error will be caught here without the buffer's cache being thrown
1692out of date.
1693If there is an error, the syntax that failed is returned.
1694If there is no error, then the last value of FORMS is returned."
1695 (let ((ret (make-symbol "ret"))
1696 (syntax (make-symbol "syntax"))
1697 (start (make-symbol "start"))
1698 (end (make-symbol "end")))
1699 `(let* ((semantic-lex-unterminated-syntax-end-function
1700 (lambda (,syntax ,start ,end)
1701 (throw ',symbol ,syntax)))
1702 ;; Delete the below when semantic-flex is fully retired.
1703 (semantic-flex-unterminated-syntax-end-function
1704 semantic-lex-unterminated-syntax-end-function)
1705 (,ret (catch ',symbol
1706 (save-excursion
1707 ,@forms
1708 nil))))
1709 ;; Great Sadness. Assume that FORMS execute within the
1710 ;; confines of the current buffer only! Mark this thing
fac916bf 1711 ;; unparsable iff the special symbol was thrown. This
57e622d9
CY
1712 ;; will prevent future calls from parsing, but will allow
1713 ;; then to still return the cache.
1714 (when ,ret
1715 ;; Leave this message off. If an APP using this fcn wants
1716 ;; a message, they can do it themselves. This cleans up
1717 ;; problems with the idle scheduler obscuring useful data.
1718 ;;(message "Buffer not currently parsable (%S)." ,ret)
1719 (semantic-parse-tree-unparseable))
1720 ,ret)))
1721(put 'semantic-lex-catch-errors 'lisp-indent-function 1)
1722
1723\f
1724;;; Interfacing with edebug
1725;;
1726(add-hook
1727 'edebug-setup-hook
1728 #'(lambda ()
1729
1730 (def-edebug-spec define-lex
1731 (&define name stringp (&rest symbolp))
1732 )
1733 (def-edebug-spec define-lex-analyzer
1734 (&define name stringp form def-body)
1735 )
1736 (def-edebug-spec define-lex-regex-analyzer
1737 (&define name stringp form def-body)
1738 )
1739 (def-edebug-spec define-lex-simple-regex-analyzer
1740 (&define name stringp form symbolp [ &optional form ] def-body)
1741 )
1742 (def-edebug-spec define-lex-block-analyzer
1743 (&define name stringp form (&rest form))
1744 )
1745 (def-edebug-spec semantic-lex-catch-errors
1746 (symbolp def-body)
1747 )
1748
1749 ))
1750\f
1751;;; Compatibility with Semantic 1.x lexical analysis
1752;;
1753;; NOTE: DELETE THIS SOMEDAY SOON
1754
eefa91db
SM
1755(semantic-alias-obsolete 'semantic-flex-start 'semantic-lex-token-start "23.2")
1756(semantic-alias-obsolete 'semantic-flex-end 'semantic-lex-token-end "23.2")
1757(semantic-alias-obsolete 'semantic-flex-text 'semantic-lex-token-text "23.2")
1758(semantic-alias-obsolete 'semantic-flex-make-keyword-table 'semantic-lex-make-keyword-table "23.2")
1759(semantic-alias-obsolete 'semantic-flex-keyword-p 'semantic-lex-keyword-p "23.2")
1760(semantic-alias-obsolete 'semantic-flex-keyword-put 'semantic-lex-keyword-put "23.2")
1761(semantic-alias-obsolete 'semantic-flex-keyword-get 'semantic-lex-keyword-get "23.2")
1762(semantic-alias-obsolete 'semantic-flex-map-keywords 'semantic-lex-map-keywords "23.2")
1763(semantic-alias-obsolete 'semantic-flex-keywords 'semantic-lex-keywords "23.2")
1764(semantic-alias-obsolete 'semantic-flex-buffer 'semantic-lex-buffer "23.2")
58179cce 1765(semantic-alias-obsolete 'semantic-flex-list 'semantic-lex-list "23.2")
57e622d9
CY
1766
1767;; This simple scanner uses the syntax table to generate a stream of
1768;; simple tokens of the form:
1769;;
1770;; (SYMBOL START . END)
1771;;
1772;; Where symbol is the type of thing it is. START and END mark that
1773;; objects boundary.
1774
1775(defvar semantic-flex-tokens semantic-lex-tokens
58179cce 1776 "An alist of semantic token types.
57e622d9
CY
1777See variable `semantic-lex-tokens'.")
1778
1779(defvar semantic-flex-unterminated-syntax-end-function
1780 (lambda (syntax syntax-start flex-end) flex-end)
1781 "Function called when unterminated syntax is encountered.
1782This should be set to one function. That function should take three
1783parameters. The SYNTAX, or type of syntax which is unterminated.
1784SYNTAX-START where the broken syntax begins.
1785FLEX-END is where the lexical analysis was asked to end.
1786This function can be used for languages that can intelligently fix up
1787broken syntax, or the exit lexical analysis via `throw' or `signal'
1788when finding unterminated syntax.")
1789
1790(defvar semantic-flex-extensions nil
1791 "Buffer local extensions to the lexical analyzer.
1792This should contain an alist with a key of a regex and a data element of
1793a function. The function should both move point, and return a lexical
1794token of the form:
1795 ( TYPE START . END)
1796nil is also a valid return value.
1797TYPE can be any type of symbol, as long as it doesn't occur as a
1798nonterminal in the language definition.")
1799(make-variable-buffer-local 'semantic-flex-extensions)
1800
1801(defvar semantic-flex-syntax-modifications nil
1802 "Changes to the syntax table for this buffer.
1803These changes are active only while the buffer is being flexed.
1804This is a list where each element has the form:
1805 (CHAR CLASS)
1806CHAR is the char passed to `modify-syntax-entry',
1807and CLASS is the string also passed to `modify-syntax-entry' to define
1808what syntax class CHAR has.")
1809(make-variable-buffer-local 'semantic-flex-syntax-modifications)
1810
1811(defvar semantic-ignore-comments t
1812 "Default comment handling.
40b1a3a9
JB
1813The value t means to strip comments when flexing; nil means
1814to keep comments as part of the token stream.")
57e622d9
CY
1815(make-variable-buffer-local 'semantic-ignore-comments)
1816
1817(defvar semantic-flex-enable-newlines nil
1818 "When flexing, report 'newlines as syntactic elements.
1819Useful for languages where the newline is a special case terminator.
1820Only set this on a per mode basis, not globally.")
1821(make-variable-buffer-local 'semantic-flex-enable-newlines)
1822
1823(defvar semantic-flex-enable-whitespace nil
1824 "When flexing, report 'whitespace as syntactic elements.
1825Useful for languages where the syntax is whitespace dependent.
1826Only set this on a per mode basis, not globally.")
1827(make-variable-buffer-local 'semantic-flex-enable-whitespace)
1828
1829(defvar semantic-flex-enable-bol nil
1830 "When flexing, report beginning of lines as syntactic elements.
1831Useful for languages like python which are indentation sensitive.
1832Only set this on a per mode basis, not globally.")
1833(make-variable-buffer-local 'semantic-flex-enable-bol)
1834
1835(defvar semantic-number-expression semantic-lex-number-expression
1836 "See variable `semantic-lex-number-expression'.")
1837(make-variable-buffer-local 'semantic-number-expression)
1838
1839(defvar semantic-flex-depth 0
1840 "Default flexing depth.
1841This specifies how many lists to create tokens in.")
1842(make-variable-buffer-local 'semantic-flex-depth)
1843
1844(defun semantic-flex (start end &optional depth length)
1845 "Using the syntax table, do something roughly equivalent to flex.
1846Semantically check between START and END. Optional argument DEPTH
1847indicates at what level to scan over entire lists.
1848The return value is a token stream. Each element is a list, such of
1849the form (symbol start-expression . end-expression) where SYMBOL
1850denotes the token type.
1851See `semantic-flex-tokens' variable for details on token types.
1852END does not mark the end of the text scanned, only the end of the
1853beginning of text scanned. Thus, if a string extends past END, the
1854end of the return token will be larger than END. To truly restrict
1855scanning, use `narrow-to-region'.
1856The last argument, LENGTH specifies that `semantic-flex' should only
1857return LENGTH tokens."
1858 (message "`semantic-flex' is an obsolete function. Use `define-lex' to create lexers.")
1859 (if (not semantic-flex-keywords-obarray)
1860 (setq semantic-flex-keywords-obarray [ nil ]))
1861 (let ((ts nil)
1862 (pos (point))
1863 (ep nil)
1864 (curdepth 0)
1865 (cs (if comment-start-skip
1866 (concat "\\(\\s<\\|" comment-start-skip "\\)")
1867 (concat "\\(\\s<\\)")))
1868 (newsyntax (copy-syntax-table (syntax-table)))
1869 (mods semantic-flex-syntax-modifications)
1870 ;; Use the default depth if it is not specified.
1871 (depth (or depth semantic-flex-depth)))
1872 ;; Update the syntax table
1873 (while mods
1874 (modify-syntax-entry (car (car mods)) (car (cdr (car mods))) newsyntax)
1875 (setq mods (cdr mods)))
1876 (with-syntax-table newsyntax
1877 (goto-char start)
1878 (while (and (< (point) end) (or (not length) (<= (length ts) length)))
1879 (cond
1880 ;; catch beginning of lines when needed.
1881 ;; Must be done before catching any other tokens!
1882 ((and semantic-flex-enable-bol
1883 (bolp)
1884 ;; Just insert a (bol N . N) token in the token stream,
1885 ;; without moving the point. N is the point at the
1886 ;; beginning of line.
1887 (setq ts (cons (cons 'bol (cons (point) (point))) ts))
1888 nil)) ;; CONTINUE
1889 ;; special extensions, includes whitespace, nl, etc.
1890 ((and semantic-flex-extensions
1891 (let ((fe semantic-flex-extensions)
1892 (r nil))
1893 (while fe
1894 (if (looking-at (car (car fe)))
1895 (setq ts (cons (funcall (cdr (car fe))) ts)
1896 r t
1897 fe nil
1898 ep (point)))
1899 (setq fe (cdr fe)))
1900 (if (and r (not (car ts))) (setq ts (cdr ts)))
1901 r)))
1902 ;; catch newlines when needed
1903 ((looking-at "\\s-*\\(\n\\|\\s>\\)")
1904 (if semantic-flex-enable-newlines
1905 (setq ep (match-end 1)
1906 ts (cons (cons 'newline
1907 (cons (match-beginning 1) ep))
1908 ts))))
1909 ;; catch whitespace when needed
1910 ((looking-at "\\s-+")
1911 (if semantic-flex-enable-whitespace
1912 ;; Language wants whitespaces, link them together.
1913 (if (eq (car (car ts)) 'whitespace)
1914 (setcdr (cdr (car ts)) (match-end 0))
1915 (setq ts (cons (cons 'whitespace
1916 (cons (match-beginning 0)
1917 (match-end 0)))
1918 ts)))))
1919 ;; numbers
1920 ((and semantic-number-expression
1921 (looking-at semantic-number-expression))
1922 (setq ts (cons (cons 'number
1923 (cons (match-beginning 0)
1924 (match-end 0)))
1925 ts)))
1926 ;; symbols
1927 ((looking-at "\\(\\sw\\|\\s_\\)+")
1928 (setq ts (cons (cons
1929 ;; Get info on if this is a keyword or not
55b522b2 1930 (or (semantic-lex-keyword-p (match-string 0))
57e622d9
CY
1931 'symbol)
1932 (cons (match-beginning 0) (match-end 0)))
1933 ts)))
1934 ;; Character quoting characters (ie, \n as newline)
1935 ((looking-at "\\s\\+")
1936 (setq ts (cons (cons 'charquote
1937 (cons (match-beginning 0) (match-end 0)))
1938 ts)))
1939 ;; Open parens, or semantic-lists.
1940 ((looking-at "\\s(")
1941 (if (or (not depth) (< curdepth depth))
1942 (progn
1943 (setq curdepth (1+ curdepth))
1944 (setq ts (cons (cons 'open-paren
1945 (cons (match-beginning 0) (match-end 0)))
1946 ts)))
1947 (setq ts (cons
1948 (cons 'semantic-list
1949 (cons (match-beginning 0)
1950 (save-excursion
1951 (condition-case nil
1952 (forward-list 1)
1953 ;; This case makes flex robust
1954 ;; to broken lists.
1955 (error
1956 (goto-char
1957 (funcall
1958 semantic-flex-unterminated-syntax-end-function
1959 'semantic-list
1960 start end))))
1961 (setq ep (point)))))
1962 ts))))
1963 ;; Close parens
1964 ((looking-at "\\s)")
1965 (setq ts (cons (cons 'close-paren
1966 (cons (match-beginning 0) (match-end 0)))
1967 ts))
1968 (setq curdepth (1- curdepth)))
1969 ;; String initiators
1970 ((looking-at "\\s\"")
1971 ;; Zing to the end of this string.
1972 (setq ts (cons (cons 'string
1973 (cons (match-beginning 0)
1974 (save-excursion
1975 (condition-case nil
1976 (forward-sexp 1)
1977 ;; This case makes flex
1978 ;; robust to broken strings.
1979 (error
1980 (goto-char
1981 (funcall
1982 semantic-flex-unterminated-syntax-end-function
1983 'string
1984 start end))))
1985 (setq ep (point)))))
1986 ts)))
1987 ;; comments
1988 ((looking-at cs)
1989 (if (and semantic-ignore-comments
1990 (not semantic-flex-enable-whitespace))
1991 ;; If the language doesn't deal with comments nor
1992 ;; whitespaces, ignore them here.
1993 (let ((comment-start-point (point)))
1994 (forward-comment 1)
1995 (if (eq (point) comment-start-point)
1996 ;; In this case our start-skip string failed
1997 ;; to work properly. Lets try and move over
1998 ;; whatever white space we matched to begin
1999 ;; with.
9b026d9f 2000 (skip-syntax-forward "-.'" (point-at-eol))
57e622d9
CY
2001 ;;(forward-comment 1)
2002 ;; Generate newline token if enabled
2003 (if (and semantic-flex-enable-newlines
2004 (bolp))
2005 (backward-char 1)))
2006 (if (eq (point) comment-start-point)
2007 (error "Strange comment syntax prevents lexical analysis"))
2008 (setq ep (point)))
2009 (let ((tk (if semantic-ignore-comments 'whitespace 'comment)))
2010 (save-excursion
2011 (forward-comment 1)
2012 ;; Generate newline token if enabled
2013 (if (and semantic-flex-enable-newlines
2014 (bolp))
2015 (backward-char 1))
2016 (setq ep (point)))
2017 ;; Language wants comments or want them as whitespaces,
2018 ;; link them together.
2019 (if (eq (car (car ts)) tk)
2020 (setcdr (cdr (car ts)) ep)
2021 (setq ts (cons (cons tk (cons (match-beginning 0) ep))
2022 ts))))))
2023 ;; punctuation
2024 ((looking-at "\\(\\s.\\|\\s$\\|\\s'\\)")
2025 (setq ts (cons (cons 'punctuation
2026 (cons (match-beginning 0) (match-end 0)))
2027 ts)))
2028 ;; unknown token
2029 (t
2030 (error "What is that?")))
2031 (goto-char (or ep (match-end 0)))
2032 (setq ep nil)))
2033 ;; maybe catch the last beginning of line when needed
2034 (and semantic-flex-enable-bol
2035 (= (point) end)
2036 (bolp)
2037 (setq ts (cons (cons 'bol (cons (point) (point))) ts)))
2038 (goto-char pos)
2039 ;;(message "Flexing muscles...done")
2040 (nreverse ts)))
2041
2042(provide 'semantic/lex)
2043
55b522b2
CY
2044;; Local variables:
2045;; generated-autoload-file: "loaddefs.el"
996bc9bf 2046;; generated-autoload-load-name: "semantic/lex"
55b522b2
CY
2047;; End:
2048
b90caf50 2049;;; semantic/lex.el ends here