1 ;;; lex.el --- Lexical Analyzer builder
3 ;;; Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
4 ;;; 2007, 2008, 2009 Free Software Foundation, Inc.
6 ;; Author: Eric M. Ludlam <zappo@gnu.org>
8 ;; This file is part of GNU Emacs.
10 ;; GNU Emacs is free software: you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation, either version 3 of the License, or
13 ;; (at your option) any later version.
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
25 ;; This file handles the creation of lexical analyzers for different
26 ;; languages in Emacs Lisp. The purpose of a lexical analyzer is to
27 ;; convert a buffer into a list of lexical tokens. Each token
28 ;; contains the token class (such as 'number, 'symbol, 'IF, etc) and
29 ;; the location in the buffer it was found. Optionally, a token also
30 ;; contains a string representing what is at the designated buffer
33 ;; Tokens are pushed onto a token stream, which is basically a list of
34 ;; all the lexical tokens from the analyzed region. The token stream
35 ;; is then handed to the grammar which parsers the file.
39 ;; Each analyzer specifies a condition and forms. These conditions
40 ;; and forms are assembled into a function by `define-lex' that does
41 ;; the lexical analysis.
43 ;; In the lexical analyzer created with `define-lex', each condition
44 ;; is tested for a given point. When the conditin is true, the forms
47 ;; The forms can push a lexical token onto the token stream. The
48 ;; analyzer forms also must move the current analyzer point. If the
49 ;; analyzer point is moved without pushing a token, then tne matched
50 ;; syntax is effectively ignored, or skipped.
52 ;; Thus, starting at the beginning of a region to be analyzed, each
53 ;; condition is tested. One will match, and a lexical token might be
54 ;; pushed, and the point is moved to the end of the lexical token
55 ;; identified. At the new position, the process occurs again until
56 ;; the end of the specified region is reached.
58 ;;; How to use semantic-lex
60 ;; To create a lexer for a language, use the `define-lex' macro.
62 ;; The `define-lex' macro accepts a list of lexical analyzers. Each
63 ;; analyzer is created with `define-lex-analyzer', or one of the
64 ;; derivitive macros. A single analyzer defines a regular expression
65 ;; to match text in a buffer, and a short segment of code to create
68 ;; Each analyzer has a NAME, DOC, a CONDITION, and possibly some
69 ;; FORMS. The NAME is the name used in `define-lex'. The DOC
70 ;; describes what the analyzer should do.
72 ;; The CONDITION evaluates the text at the current point in the
73 ;; current buffer. If CONDITION is true, then the FORMS will be
76 ;; The purpose of the FORMS is to push new lexical tokens onto the
77 ;; list of tokens for the current buffer, and to move point after the
80 ;; Some macros for creating one analyzer are:
82 ;; define-lex-analyzer - A generic analyzer associating any style of
83 ;; condition to forms.
84 ;; define-lex-regex-analyzer - Matches a regular expression.
85 ;; define-lex-simple-regex-analyzer - Matches a regular expressions,
86 ;; and pushes the match.
87 ;; define-lex-block-analyzer - Matches list syntax, and defines
88 ;; handles open/close delimiters.
90 ;; These macros are used by the grammar compiler when lexical
91 ;; information is specified in a grammar:
92 ;; define-lex- * -type-analyzer - Matches syntax specified in
93 ;; a grammar, and pushes one token for it. The * would
94 ;; be `sexp' for things like lists or strings, and
95 ;; `string' for things that need to match some special
96 ;; string, such as "\\." where a literal match is needed.
100 ;; There are tables of different symbols managed in semantic-lex.el.
103 ;; Lexical keyword table - A Table of symbols declared in a grammar
104 ;; file with the %keyword declaration.
105 ;; Keywords are used by `semantic-lex-symbol-or-keyword'
106 ;; to create lexical tokens based on the keyword.
108 ;; Lexical type table - A table of symbols declared in a grammer
109 ;; file with the %type declaration.
110 ;; The grammar compiler uses the type table to create new
111 ;; lexical analyzers. These analyzers are then used to when
112 ;; a new lexical analyzer is made for a language.
116 ;; A lexical type defines a kind of lexical analyzer that will be
117 ;; automatically generated from a grammar file based on some
118 ;; predetermined attributes. For now these two attributes are
121 ;; * matchdatatype : define the kind of lexical analyzer. That is :
123 ;; - regexp : define a regexp analyzer (see
124 ;; `define-lex-regex-type-analyzer')
126 ;; - string : define a string analyzer (see
127 ;; `define-lex-string-type-analyzer')
129 ;; - block : define a block type analyzer (see
130 ;; `define-lex-block-type-analyzer')
132 ;; - sexp : define a sexp analyzer (see
133 ;; `define-lex-sexp-type-analyzer')
135 ;; - keyword : define a keyword analyzer (see
136 ;; `define-lex-keyword-type-analyzer')
138 ;; * syntax : define the syntax that matches a syntactic
139 ;; expression. When syntax is matched the corresponding type
140 ;; analyzer is entered and the resulting match data will be
141 ;; interpreted based on the kind of analyzer (see matchdatatype
144 ;; The following lexical types are predefined :
146 ;; +-------------+---------------+--------------------------------+
147 ;; | type | matchdatatype | syntax |
148 ;; +-------------+---------------+--------------------------------+
149 ;; | punctuation | string | "\\(\\s.\\|\\s$\\|\\s'\\)+" |
150 ;; | keyword | keyword | "\\(\\sw\\|\\s_\\)+" |
151 ;; | symbol | regexp | "\\(\\sw\\|\\s_\\)+" |
152 ;; | string | sexp | "\\s\"" |
153 ;; | number | regexp | semantic-lex-number-expression |
154 ;; | block | block | "\\s(\\|\\s)" |
155 ;; +-------------+---------------+--------------------------------+
157 ;; In a grammar you must use a %type expression to automatically generate
158 ;; the corresponding analyzers of that type.
160 ;; Here is an example to auto-generate punctuation analyzers
161 ;; with 'matchdatatype and 'syntax predefined (see table above)
163 ;; %type <punctuation> ;; will auto-generate this kind of analyzers
165 ;; It is equivalent to write :
167 ;; %type <punctuation> syntax "\\(\\s.\\|\\s$\\|\\s'\\)+" matchdatatype string
169 ;; ;; Some punctuations based on the type defines above
171 ;; %token <punctuation> NOT "!"
172 ;; %token <punctuation> NOTEQ "!="
173 ;; %token <punctuation> MOD "%"
174 ;; %token <punctuation> MODEQ "%="
177 ;;; On the Semantic 1.x lexer
179 ;; In semantic 1.x, the lexical analyzer was an all purpose routine.
180 ;; To boost efficiency, the analyzer is now a series of routines that
181 ;; are constructed at build time into a single routine. This will
182 ;; eliminate unneeded if statements to speed the lexer.
184 (require 'semantic
/fw
)
190 (if (not (fboundp 'with-syntax-table
))
192 ;; Copied from Emacs 21 for compatibility with released Emacses.
193 (defmacro with-syntax-table
(table &rest body
)
194 "With syntax table of current buffer set to a copy of TABLE, evaluate BODY.
195 The syntax table of the current buffer is saved, BODY is evaluated, and the
196 saved table is restored, even in case of an abnormal exit.
197 Value is what BODY returns."
198 (let ((old-table (make-symbol "table"))
199 (old-buffer (make-symbol "buffer")))
200 `(let ((,old-table
(syntax-table))
201 (,old-buffer
(current-buffer)))
204 (set-syntax-table (copy-syntax-table ,table
))
207 (set-buffer ,old-buffer
)
208 (set-syntax-table ,old-table
))))))
212 ;;; Semantic 2.x lexical analysis
214 (defun semantic-lex-map-symbols (fun table
&optional property
)
215 "Call function FUN on every symbol in TABLE.
216 If optional PROPERTY is non-nil, call FUN only on every symbol which
217 as a PROPERTY value. FUN receives a symbol as argument."
221 (if (or (null property
) (get symbol property
))
222 (funcall fun symbol
)))
225 ;;; Lexical keyword table handling.
227 ;; These keywords are keywords defined for using in a grammar with the
228 ;; %keyword declaration, and are not keywords used in Emacs Lisp.
230 (defvar semantic-flex-keywords-obarray nil
231 "Buffer local keyword obarray for the lexical analyzer.
232 These keywords are matched explicitly, and converted into special symbols.")
233 (make-variable-buffer-local 'semantic-flex-keywords-obarray
)
235 (defmacro semantic-lex-keyword-invalid
(name)
236 "Signal that NAME is an invalid keyword name."
237 `(signal 'wrong-type-argument
'(semantic-lex-keyword-p ,name
)))
239 (defsubst semantic-lex-keyword-symbol
(name)
240 "Return keyword symbol with NAME or nil if not found."
241 (and (arrayp semantic-flex-keywords-obarray
)
243 (intern-soft name semantic-flex-keywords-obarray
)))
245 (defsubst semantic-lex-keyword-p
(name)
246 "Return non-nil if a keyword with NAME exists in the keyword table.
247 Return nil otherwise."
248 (and (setq name
(semantic-lex-keyword-symbol name
))
249 (symbol-value name
)))
251 (defsubst semantic-lex-keyword-set
(name value
)
252 "Set value of keyword with NAME to VALUE and return VALUE."
253 (set (intern name semantic-flex-keywords-obarray
) value
))
255 (defsubst semantic-lex-keyword-value
(name)
256 "Return value of keyword with NAME.
257 Signal an error if a keyword with NAME does not exist."
258 (let ((keyword (semantic-lex-keyword-symbol name
)))
260 (symbol-value keyword
)
261 (semantic-lex-keyword-invalid name
))))
263 (defsubst semantic-lex-keyword-put
(name property value
)
264 "For keyword with NAME, set its PROPERTY to VALUE."
265 (let ((keyword (semantic-lex-keyword-symbol name
)))
267 (put keyword property value
)
268 (semantic-lex-keyword-invalid name
))))
270 (defsubst semantic-lex-keyword-get
(name property
)
271 "For keyword with NAME, return its PROPERTY value."
272 (let ((keyword (semantic-lex-keyword-symbol name
)))
274 (get keyword property
)
275 (semantic-lex-keyword-invalid name
))))
277 (defun semantic-lex-make-keyword-table (specs &optional propspecs
)
278 "Convert keyword SPECS into an obarray and return it.
279 SPECS must be a list of (NAME . TOKSYM) elements, where:
281 NAME is the name of the keyword symbol to define.
282 TOKSYM is the lexical token symbol of that keyword.
284 If optional argument PROPSPECS is non nil, then interpret it, and
285 apply those properties.
286 PROPSPECS must be a list of (NAME PROPERTY VALUE) elements."
287 ;; Create the symbol hash table
288 (let ((semantic-flex-keywords-obarray (make-vector 13 0))
290 ;; fill it with stuff
292 (setq spec
(car specs
)
294 (semantic-lex-keyword-set (car spec
) (cdr spec
)))
295 ;; Apply all properties
297 (setq spec
(car propspecs
)
298 propspecs
(cdr propspecs
))
299 (semantic-lex-keyword-put (car spec
) (nth 1 spec
) (nth 2 spec
)))
300 semantic-flex-keywords-obarray
))
302 (defsubst semantic-lex-map-keywords
(fun &optional property
)
303 "Call function FUN on every lexical keyword.
304 If optional PROPERTY is non-nil, call FUN only on every keyword which
305 as a PROPERTY value. FUN receives a lexical keyword as argument."
306 (semantic-lex-map-symbols
307 fun semantic-flex-keywords-obarray property
))
309 (defun semantic-lex-keywords (&optional property
)
310 "Return a list of lexical keywords.
311 If optional PROPERTY is non-nil, return only keywords which have a
314 (semantic-lex-map-keywords
315 #'(lambda (symbol) (setq keywords
(cons symbol keywords
)))
319 ;;; Inline functions:
321 (defvar semantic-lex-unterminated-syntax-end-function
)
322 (defvar semantic-lex-analysis-bounds
)
323 (defvar semantic-lex-end-point
)
325 (defsubst semantic-lex-token-bounds
(token)
326 "Fetch the start and end locations of the lexical token TOKEN.
327 Return a pair (START . END)."
328 (if (not (numberp (car (cdr token
))))
332 (defsubst semantic-lex-token-start
(token)
333 "Fetch the start position of the lexical token TOKEN.
334 See also the function `semantic-lex-token'."
335 (car (semantic-lex-token-bounds token
)))
337 (defsubst semantic-lex-token-end
(token)
338 "Fetch the end position of the lexical token TOKEN.
339 See also the function `semantic-lex-token'."
340 (cdr (semantic-lex-token-bounds token
)))
342 (defsubst semantic-lex-unterminated-syntax-detected
(syntax)
343 "Inside a lexical analyzer, use this when unterminated syntax was found.
344 Argument SYNTAX indicates the type of syntax that is unterminated.
345 The job of this function is to move (point) to a new logical location
346 so that analysis can continue, if possible."
348 (funcall semantic-lex-unterminated-syntax-end-function
350 (car semantic-lex-analysis-bounds
)
351 (cdr semantic-lex-analysis-bounds
)
353 (setq semantic-lex-end-point
(point)))
355 ;;; Type table handling.
357 ;; The lexical type table manages types that occur in a grammar file
358 ;; with the %type declaration. Types represent different syntaxes.
359 ;; See code for `semantic-lex-preset-default-types' for the classic
361 (defvar semantic-lex-types-obarray nil
362 "Buffer local types obarray for the lexical analyzer.")
363 (make-variable-buffer-local 'semantic-lex-types-obarray
)
365 (defmacro semantic-lex-type-invalid
(type)
366 "Signal that TYPE is an invalid lexical type name."
367 `(signal 'wrong-type-argument
'(semantic-lex-type-p ,type
)))
369 (defsubst semantic-lex-type-symbol
(type)
370 "Return symbol with TYPE or nil if not found."
371 (and (arrayp semantic-lex-types-obarray
)
373 (intern-soft type semantic-lex-types-obarray
)))
375 (defsubst semantic-lex-type-p
(type)
376 "Return non-nil if a symbol with TYPE name exists."
377 (and (setq type
(semantic-lex-type-symbol type
))
378 (symbol-value type
)))
380 (defsubst semantic-lex-type-set
(type value
)
381 "Set value of symbol with TYPE name to VALUE and return VALUE."
382 (set (intern type semantic-lex-types-obarray
) value
))
384 (defsubst semantic-lex-type-value
(type &optional noerror
)
385 "Return value of symbol with TYPE name.
386 If optional argument NOERROR is non-nil return nil if a symbol with
387 TYPE name does not exist. Otherwise signal an error."
388 (let ((sym (semantic-lex-type-symbol type
)))
392 (semantic-lex-type-invalid type
)))))
394 (defsubst semantic-lex-type-put
(type property value
&optional add
)
395 "For symbol with TYPE name, set its PROPERTY to VALUE.
396 If optional argument ADD is non-nil, create a new symbol with TYPE
397 name if it does not already exist. Otherwise signal an error."
398 (let ((sym (semantic-lex-type-symbol type
)))
400 (or add
(semantic-lex-type-invalid type
))
401 (semantic-lex-type-set type nil
)
402 (setq sym
(semantic-lex-type-symbol type
)))
403 (put sym property value
)))
405 (defsubst semantic-lex-type-get
(type property
&optional noerror
)
406 "For symbol with TYPE name, return its PROPERTY value.
407 If optional argument NOERROR is non-nil return nil if a symbol with
408 TYPE name does not exist. Otherwise signal an error."
409 (let ((sym (semantic-lex-type-symbol type
)))
413 (semantic-lex-type-invalid type
)))))
415 (defun semantic-lex-preset-default-types ()
416 "Install useful default properties for well known types."
417 (semantic-lex-type-put "punctuation" 'matchdatatype
'string t
)
418 (semantic-lex-type-put "punctuation" 'syntax
"\\(\\s.\\|\\s$\\|\\s'\\)+")
419 (semantic-lex-type-put "keyword" 'matchdatatype
'keyword t
)
420 (semantic-lex-type-put "keyword" 'syntax
"\\(\\sw\\|\\s_\\)+")
421 (semantic-lex-type-put "symbol" 'matchdatatype
'regexp t
)
422 (semantic-lex-type-put "symbol" 'syntax
"\\(\\sw\\|\\s_\\)+")
423 (semantic-lex-type-put "string" 'matchdatatype
'sexp t
)
424 (semantic-lex-type-put "string" 'syntax
"\\s\"")
425 (semantic-lex-type-put "number" 'matchdatatype
'regexp t
)
426 (semantic-lex-type-put "number" 'syntax
'semantic-lex-number-expression
)
427 (semantic-lex-type-put "block" 'matchdatatype
'block t
)
428 (semantic-lex-type-put "block" 'syntax
"\\s(\\|\\s)")
431 (defun semantic-lex-make-type-table (specs &optional propspecs
)
432 "Convert type SPECS into an obarray and return it.
433 SPECS must be a list of (TYPE . TOKENS) elements, where:
435 TYPE is the name of the type symbol to define.
436 TOKENS is an list of (TOKSYM . MATCHER) elements, where:
438 TOKSYM is any lexical token symbol.
439 MATCHER is a string or regexp a text must match to be a such
442 If optional argument PROPSPECS is non nil, then interpret it, and
443 apply those properties.
444 PROPSPECS must be a list of (TYPE PROPERTY VALUE)."
445 ;; Create the symbol hash table
446 (let* ((semantic-lex-types-obarray (make-vector 13 0))
447 spec type tokens token alist default
)
448 ;; fill it with stuff
450 (setq spec
(car specs
)
457 (setq token
(car tokens
)
460 (setq alist
(cons token alist
))
461 (setq token
(car token
))
464 "*Warning* default value of <%s> tokens changed to %S, was %S"
466 (setq default token
)))
467 ;; Ensure the default matching spec is the first one.
468 (semantic-lex-type-set type
(cons default
(nreverse alist
))))
469 ;; Install useful default types & properties
470 (semantic-lex-preset-default-types)
471 ;; Apply all properties
473 (setq spec
(car propspecs
)
474 propspecs
(cdr propspecs
))
475 ;; Create the type if necessary.
476 (semantic-lex-type-put (car spec
) (nth 1 spec
) (nth 2 spec
) t
))
477 semantic-lex-types-obarray
))
479 (defsubst semantic-lex-map-types
(fun &optional property
)
480 "Call function FUN on every lexical type.
481 If optional PROPERTY is non-nil, call FUN only on every type symbol
482 which as a PROPERTY value. FUN receives a type symbol as argument."
483 (semantic-lex-map-symbols
484 fun semantic-lex-types-obarray property
))
486 (defun semantic-lex-types (&optional property
)
487 "Return a list of lexical type symbols.
488 If optional PROPERTY is non-nil, return only type symbols which have
491 (semantic-lex-map-types
492 #'(lambda (symbol) (setq types
(cons symbol types
)))
496 ;;; Lexical Analyzer framework settings
499 (defvar semantic-lex-analyzer
'semantic-flex
500 "The lexical analyzer used for a given buffer.
501 See `semantic-lex' for documentation.
502 For compatibility with Semantic 1.x it defaults to `semantic-flex'.")
503 (make-variable-buffer-local 'semantic-lex-analyzer
)
505 (defvar semantic-lex-tokens
519 "An alist of of semantic token types.
520 As of December 2001 (semantic 1.4beta13), this variable is not used in
521 any code. The only use is to refer to the doc-string from elsewhere.
523 The key to this alist is the symbol representing token type that
524 \\[semantic-flex] returns. These are
526 - bol: Empty string matching a beginning of line.
527 This token is produced with
528 `semantic-lex-beginning-of-line'.
530 - charquote: String sequences that match `\\s\\+' regexp.
531 This token is produced with `semantic-lex-charquote'.
533 - close-paren: Characters that match `\\s)' regexp.
534 These are typically `)', `}', `]', etc.
535 This token is produced with
536 `semantic-lex-close-paren'.
538 - comment: A comment chunk. These token types are not
540 This token is produced with `semantic-lex-comments'.
541 Comments are ignored with `semantic-lex-ignore-comments'.
542 Comments are treated as whitespace with
543 `semantic-lex-comments-as-whitespace'.
545 - newline Characters matching `\\s-*\\(\n\\|\\s>\\)' regexp.
546 This token is produced with `semantic-lex-newline'.
548 - open-paren: Characters that match `\\s(' regexp.
549 These are typically `(', `{', `[', etc.
550 If `semantic-lex-paren-or-list' is used,
551 then `open-paren' is not usually generated unless
552 the `depth' argument to \\[semantic-lex] is
554 This token is always produced if the analyzer
555 `semantic-lex-open-paren' is used.
557 - punctuation: Characters matching `{\\(\\s.\\|\\s$\\|\\s'\\)'
559 This token is produced with `semantic-lex-punctuation'.
560 Always specify this analyzer after the comment
563 - semantic-list: String delimited by matching parenthesis, braces,
564 etc. that the lexer skipped over, because the
565 `depth' parameter to \\[semantic-flex] was not high
567 This token is produced with `semantic-lex-paren-or-list'.
569 - string: Quoted strings, i.e., string sequences that start
570 and end with characters matching `\\s\"'
571 regexp. The lexer relies on @code{forward-sexp} to
572 find the matching end.
573 This token is produced with `semantic-lex-string'.
575 - symbol: String sequences that match `\\(\\sw\\|\\s_\\)+'
577 This token is produced with
578 `semantic-lex-symbol-or-keyword'. Always add this analyzer
579 after `semantic-lex-number', or other analyzers that
580 match its regular expression.
582 - whitespace: Characters that match `\\s-+' regexp.
583 This token is produced with `semantic-lex-whitespace'.")
585 (defvar semantic-lex-syntax-modifications nil
586 "Changes to the syntax table for this buffer.
587 These changes are active only while the buffer is being flexed.
588 This is a list where each element has the form:
590 CHAR is the char passed to `modify-syntax-entry',
591 and CLASS is the string also passed to `modify-syntax-entry' to define
592 what syntax class CHAR has.")
593 (make-variable-buffer-local 'semantic-lex-syntax-modifications
)
595 (defvar semantic-lex-syntax-table nil
596 "Syntax table used by lexical analysis.
597 See also `semantic-lex-syntax-modifications'.")
598 (make-variable-buffer-local 'semantic-lex-syntax-table
)
600 (defvar semantic-lex-comment-regex nil
601 "Regular expression for identifying comment start during lexical analysis.
602 This may be automatically set when semantic initializes in a mode, but
603 may need to be overriden for some special languages.")
604 (make-variable-buffer-local 'semantic-lex-comment-regex
)
606 (defvar semantic-lex-number-expression
607 ;; This expression was written by David Ponce for Java, and copied
608 ;; here for C and any other similar language.
611 "\\<[0-9]+[.][0-9]+\\([eE][-+]?[0-9]+\\)?[fFdD]?\\>"
613 "\\<[0-9]+[.][eE][-+]?[0-9]+[fFdD]?\\>"
615 "\\<[0-9]+[.][fFdD]\\>"
619 "[.][0-9]+\\([eE][-+]?[0-9]+\\)?[fFdD]?\\>"
621 "\\<[0-9]+[eE][-+]?[0-9]+[fFdD]?\\>"
623 "\\<0[xX][0-9a-fA-F]+[lL]?\\>"
625 "\\<[0-9]+[lLfFdD]?\\>"
628 "Regular expression for matching a number.
629 If this value is nil, no number extraction is done during lex.
630 This expression tries to match C and Java like numbers.
642 <DECIMAL_LITERAL>[lL]?
644 | <OCTAL_LITERAL>[lL]?
649 FLOATING_POINT_LITERAL:
650 [0-9]+[.][0-9]*<EXPONENT>?[fFdD]?
651 | [.][0-9]+<EXPONENT>?[fFdD]?
652 | [0-9]+<EXPONENT>[fFdD]?
653 | [0-9]+<EXPONENT>?[fFdD]
655 (make-variable-buffer-local 'semantic-lex-number-expression
)
657 (defvar semantic-lex-depth
0
658 "Default lexing depth.
659 This specifies how many lists to create tokens in.")
660 (make-variable-buffer-local 'semantic-lex-depth
)
662 (defvar semantic-lex-unterminated-syntax-end-function
663 (lambda (syntax syntax-start lex-end
) lex-end
)
664 "Function called when unterminated syntax is encountered.
665 This should be set to one function. That function should take three
666 parameters. The SYNTAX, or type of syntax which is unterminated.
667 SYNTAX-START where the broken syntax begins.
668 LEX-END is where the lexical analysis was asked to end.
669 This function can be used for languages that can intelligently fix up
670 broken syntax, or the exit lexical analysis via `throw' or `signal'
671 when finding unterminated syntax.")
673 ;;; Interactive testing commands
675 (defun semantic-lex-test (arg)
676 "Test the semantic lexer in the current buffer.
677 If universal argument ARG, then try the whole buffer."
679 (let* ((start (current-time))
680 (result (semantic-lex
681 (if arg
(point-min) (point))
683 (end (current-time)))
684 (message "Elapsed Time: %.2f seconds."
685 (semantic-elapsed-time start end
))
686 (pop-to-buffer "*Lexer Output*")
689 (insert (pp-to-string result
))
690 (goto-char (point-min))
693 (defun semantic-lex-test-full-depth (arg)
694 "Test the semantic lexer in the current buffer parsing through lists.
695 Usually the lexer parses
696 If universal argument ARG, then try the whole buffer."
698 (let* ((start (current-time))
699 (result (semantic-lex
700 (if arg
(point-min) (point))
703 (end (current-time)))
704 (message "Elapsed Time: %.2f seconds."
705 (semantic-elapsed-time start end
))
706 (pop-to-buffer "*Lexer Output*")
709 (insert (pp-to-string result
))
710 (goto-char (point-min))
713 (defun semantic-lex-test-region (beg end
)
714 "Test the semantic lexer in the current buffer.
715 Analyze the area between BEG and END."
717 (let ((result (semantic-lex beg end
)))
718 (pop-to-buffer "*Lexer Output*")
721 (insert (pp-to-string result
))
722 (goto-char (point-min))
725 (defvar semantic-lex-debug nil
726 "When non-nil, debug the local lexical analyzer.")
728 (defun semantic-lex-debug (arg)
729 "Debug the semantic lexer in the current buffer.
730 Argument ARG specifies of the analyze the whole buffer, or start at point.
731 While engaged, each token identified by the lexer will be highlighted
732 in the target buffer A description of the current token will be
733 displayed in the minibuffer. Press SPC to move to the next lexical token."
735 (require 'semantic
/debug
)
736 (let ((semantic-lex-debug t
))
737 (semantic-lex-test arg
)))
739 (defun semantic-lex-highlight-token (token)
740 "Highlight the lexical TOKEN.
741 TOKEN is a lexical token with a START And END position.
743 (let ((o (semantic-make-overlay (semantic-lex-token-start token
)
744 (semantic-lex-token-end token
))))
745 (semantic-overlay-put o
'face
'highlight
)
748 (defsubst semantic-lex-debug-break
(token)
749 "Break during lexical analysis at TOKEN."
750 (when semantic-lex-debug
755 (setq o
(semantic-lex-highlight-token token
)))
757 (format "%S :: SPC - continue" token
))
760 (semantic-overlay-delete o
))))))
762 ;;; Lexical analyzer creation
764 ;; Code for creating a lex function from lists of analyzers.
766 ;; A lexical analyzer is created from a list of individual analyzers.
767 ;; Each individual analyzer specifies a single match, and code that
770 ;; Creation of an analyzer assembles these analyzers into a new function
771 ;; with the behaviors of all the individual analyzers.
773 (defmacro semantic-lex-one-token
(analyzers)
774 "Calculate one token from the current buffer at point.
775 Uses locally bound variables from `define-lex'.
776 Argument ANALYZERS is the list of analyzers being used."
777 (cons 'cond
(mapcar #'symbol-value analyzers
)))
779 (defvar semantic-lex-end-point nil
780 "The end point as tracked through lexical functions.")
782 (defvar semantic-lex-current-depth nil
783 "The current depth as tracked through lexical functions.")
785 (defvar semantic-lex-maximum-depth nil
786 "The maximum depth of parenthisis as tracked through lexical functions.")
788 (defvar semantic-lex-token-stream nil
789 "The current token stream we are collecting.")
791 (defvar semantic-lex-analysis-bounds nil
792 "The bounds of the current analysis.")
794 (defvar semantic-lex-block-streams nil
795 "Streams of tokens inside collapsed blocks.
796 This is an alist of (ANCHOR . STREAM) elements where ANCHOR is the
797 start position of the block, and STREAM is the list of tokens in that
800 (defvar semantic-lex-reset-hooks nil
801 "List of hooks major-modes use to reset lexical analyzers.
802 Hooks are called with START and END values for the current lexical pass.
803 Should be set with `add-hook'specifying a LOCAL option.")
805 ;; Stack of nested blocks.
806 (defvar semantic-lex-block-stack nil
)
807 ;;(defvar semantic-lex-timeout 5
808 ;; "*Number of sections of lexing before giving up.")
810 (defmacro define-lex
(name doc
&rest analyzers
)
811 "Create a new lexical analyzer with NAME.
812 DOC is a documentation string describing this analyzer.
813 ANALYZERS are small code snippets of analyzers to use when
814 building the new NAMED analyzer. Only use analyzers which
815 are written to be used in `define-lex'.
816 Each analyzer should be an analyzer created with `define-lex-analyzer'.
817 Note: The order in which analyzers are listed is important.
818 If two analyzers can match the same text, it is important to order the
819 analyzers so that the one you want to match first occurs first. For
820 example, it is good to put a numbe analyzer in front of a symbol
821 analyzer which might mistake a number for as a symbol."
822 `(defun ,name
(start end
&optional depth length
)
823 ,(concat doc
"\nSee `semantic-lex' for more information.")
824 ;; Make sure the state of block parsing starts over.
825 (setq semantic-lex-block-streams nil
)
826 ;; Allow specialty reset items.
827 (run-hook-with-args 'semantic-lex-reset-hooks start end
)
829 (let* (;(starttime (current-time))
830 (starting-position (point))
831 (semantic-lex-token-stream nil
)
832 (semantic-lex-block-stack nil
)
834 (semantic-lex-end-point start
)
835 (semantic-lex-current-depth 0)
836 ;; Use the default depth when not specified.
837 (semantic-lex-maximum-depth
838 (or depth semantic-lex-depth
))
839 ;; Bounds needed for unterminated syntax
840 (semantic-lex-analysis-bounds (cons start end
))
841 ;; This entry prevents text properties from
842 ;; confusing our lexical analysis. See Emacs 22 (CVS)
843 ;; version of C++ mode with template hack text properties.
844 (parse-sexp-lookup-properties nil
)
846 ;; Maybe REMOVE THIS LATER.
847 ;; Trying to find incremental parser bug.
848 (when (> end
(point-max))
849 (error ,(format "%s: end (%%d) > point-max (%%d)" name
)
851 (with-syntax-table semantic-lex-syntax-table
853 (while (and (< (point) end
)
855 (<= (length semantic-lex-token-stream
) length
)))
856 (semantic-lex-one-token ,analyzers
)
857 (when (eq semantic-lex-end-point tmp-start
)
858 (error ,(format "%s: endless loop at %%d, after %%S" name
)
859 tmp-start
(car semantic-lex-token-stream
)))
860 (setq tmp-start semantic-lex-end-point
)
861 (goto-char semantic-lex-end-point
)
862 ;;(when (> (semantic-elapsed-time starttime (current-time))
863 ;; semantic-lex-timeout)
864 ;; (error "Timeout during lex at char %d" (point)))
865 (semantic-throw-on-input 'lex
)
866 (semantic-lex-debug-break (car semantic-lex-token-stream
))
868 ;; Check that there is no unterminated block.
869 (when semantic-lex-block-stack
870 (let* ((last (pop semantic-lex-block-stack
))
874 ,(format "%s: `%%s' block from %%S is unterminated" name
)
875 (car blk
) (cadr blk
))
876 (setq blk
(pop semantic-lex-block-stack
)))
877 (semantic-lex-unterminated-syntax-detected (car last
))))
878 ;; Return to where we started.
879 ;; Do not wrap in protective stuff so that if there is an error
880 ;; thrown, the user knows where.
881 (goto-char starting-position
)
882 ;; Return the token stream
883 (nreverse semantic-lex-token-stream
))))
885 ;;; Collapsed block tokens delimited by any tokens.
887 (defun semantic-lex-start-block (syntax)
888 "Mark the last read token as the beginning of a SYNTAX block."
889 (if (or (not semantic-lex-maximum-depth
)
890 (< semantic-lex-current-depth semantic-lex-maximum-depth
))
891 (setq semantic-lex-current-depth
(1+ semantic-lex-current-depth
))
892 (push (list syntax
(car semantic-lex-token-stream
))
893 semantic-lex-block-stack
)))
895 (defun semantic-lex-end-block (syntax)
896 "Process the end of a previously marked SYNTAX block.
897 That is, collapse the tokens inside that block, including the
898 beginning and end of block tokens, into a high level block token of
900 The token at beginning of block is the one marked by a previous call
901 to `semantic-lex-start-block'. The current token is the end of block.
902 The collapsed tokens are saved in `semantic-lex-block-streams'."
903 (if (null semantic-lex-block-stack
)
904 (setq semantic-lex-current-depth
(1- semantic-lex-current-depth
))
905 (let* ((stream semantic-lex-token-stream
)
906 (blk (pop semantic-lex-block-stack
))
908 (first (car bstream
))
909 (last (pop stream
)) ;; The current token mark the EOBLK
911 (if (not (eq (car blk
) syntax
))
912 ;; SYNTAX doesn't match the syntax of the current block in
913 ;; the stack. So we encountered the end of the SYNTAX block
914 ;; before the end of the current one in the stack which is
915 ;; signaled unterminated.
916 (semantic-lex-unterminated-syntax-detected (car blk
))
917 ;; Move tokens found inside the block from the main stream
918 ;; into a separate block stream.
919 (while (and stream
(not (eq (setq tok
(pop stream
)) first
)))
921 ;; The token marked as beginning of block was not encountered.
922 ;; This should not happen!
924 (error "Token %S not found at beginning of block `%s'"
926 ;; Save the block stream for future reuse, to avoid to redo
927 ;; the lexical analysis of the block content!
928 ;; Anchor the block stream with its start position, so we can
929 ;; use: (cdr (assq start semantic-lex-block-streams)) to
930 ;; quickly retrieve the lexical stream associated to a block.
931 (setcar blk
(semantic-lex-token-start first
))
932 (setcdr blk
(nreverse bstream
))
933 (push blk semantic-lex-block-streams
)
934 ;; In the main stream, replace the tokens inside the block by
935 ;; a high level block token of class SYNTAX.
936 (setq semantic-lex-token-stream stream
)
937 (semantic-lex-push-token
939 syntax
(car blk
) (semantic-lex-token-end last
)))
942 ;;; Lexical token API
944 ;; Functions for accessing parts of a token. Use these functions
945 ;; instead of accessing the list structure directly because the
946 ;; contents of the lexical may change.
948 (defmacro semantic-lex-token
(symbol start end
&optional str
)
949 "Create a lexical token.
950 SYMBOL is a symbol representing the class of syntax found.
951 START and END define the bounds of the token in the current buffer.
952 Optional STR is the string for the token iff the the bounds
953 in the buffer do not cover the string they represent. (As from
955 ;; This if statement checks the existance of a STR argument at
956 ;; compile time, where STR is some symbol or constant. If the
957 ;; variable STr (runtime) is nil, this will make an incorrect decision.
959 ;; It is like this to maintain the original speed of the compiled
962 `(cons ,symbol
(cons ,str
(cons ,start
,end
)))
963 `(cons ,symbol
(cons ,start
,end
))))
965 (defun semantic-lex-token-p (thing)
966 "Return non-nil if THING is a semantic lex token.
967 This is an exhaustively robust check."
969 (symbolp (car thing
))
970 (or (and (numberp (nth 1 thing
))
971 (numberp (nthcdr 2 thing
)))
972 (and (stringp (nth 1 thing
))
973 (numberp (nth 2 thing
))
974 (numberp (nthcdr 3 thing
)))
978 (defun semantic-lex-token-with-text-p (thing)
979 "Return non-nil if THING is a semantic lex token.
980 This is an exhaustively robust check."
982 (symbolp (car thing
))
984 (stringp (nth 1 thing
))
985 (numberp (nth 2 thing
))
986 (numberp (nth 3 thing
)))
989 (defun semantic-lex-token-without-text-p (thing)
990 "Return non-nil if THING is a semantic lex token.
991 This is an exhaustively robust check."
993 (symbolp (car thing
))
995 (numberp (nth 1 thing
))
996 (numberp (nth 2 thing
)))
999 (defun semantic-lex-expand-block-specs (specs)
1000 "Expand block specifications SPECS into a Lisp form.
1001 SPECS is a list of (BLOCK BEGIN END) elements where BLOCK, BEGIN, and
1002 END are token class symbols that indicate to produce one collapsed
1003 BLOCK token from tokens found between BEGIN and END ones.
1004 BLOCK must be a non-nil symbol, and at least one of the BEGIN or END
1005 symbols must be non-nil too.
1006 When BEGIN is non-nil, generate a call to `semantic-lex-start-block'
1007 when a BEGIN token class is encountered.
1008 When END is non-nil, generate a call to `semantic-lex-end-block' when
1009 an END token class is encountered."
1010 (let ((class (make-symbol "class"))
1012 (dolist (spec specs
)
1015 (push `((eq ',(nth 1 spec
) ,class
)
1016 (semantic-lex-start-block ',(car spec
)))
1019 (push `((eq ',(nth 2 spec
) ,class
)
1020 (semantic-lex-end-block ',(car spec
)))
1023 `((let ((,class
(semantic-lex-token-class
1024 (car semantic-lex-token-stream
))))
1025 (cond ,@(nreverse form
))))
1028 (defmacro semantic-lex-push-token
(token &rest blockspecs
)
1029 "Push TOKEN in the lexical analyzer token stream.
1030 Return the lexical analysis current end point.
1031 If optional arguments BLOCKSPECS is non-nil, it specifies to process
1032 collapsed block tokens. See `semantic-lex-expand-block-specs' for
1034 This macro should only be called within the bounds of
1035 `define-lex-analyzer'. It changes the values of the lexical analyzer
1036 variables `token-stream' and `semantic-lex-end-point'. If you need to
1037 move `semantic-lex-end-point' somewhere else, just modify this
1038 variable after calling `semantic-lex-push-token'."
1040 (push ,token semantic-lex-token-stream
)
1041 ,@(semantic-lex-expand-block-specs blockspecs
)
1042 (setq semantic-lex-end-point
1043 (semantic-lex-token-end (car semantic-lex-token-stream
)))
1046 (defsubst semantic-lex-token-class
(token)
1047 "Fetch the class of the lexical token TOKEN.
1048 See also the function `semantic-lex-token'."
1051 (defsubst semantic-lex-token-text
(token)
1052 "Fetch the text associated with the lexical token TOKEN.
1053 See also the function `semantic-lex-token'."
1054 (if (stringp (car (cdr token
)))
1056 (buffer-substring-no-properties
1057 (semantic-lex-token-start token
)
1058 (semantic-lex-token-end token
))))
1060 (defun semantic-lex-init ()
1061 "Initialize any lexical state for this buffer."
1062 (unless semantic-lex-comment-regex
1063 (setq semantic-lex-comment-regex
1064 (if comment-start-skip
1065 (concat "\\(\\s<\\|" comment-start-skip
"\\)")
1067 ;; Setup the lexer syntax-table
1068 (setq semantic-lex-syntax-table
(copy-syntax-table (syntax-table)))
1069 (dolist (mod semantic-lex-syntax-modifications
)
1070 (modify-syntax-entry
1071 (car mod
) (nth 1 mod
) semantic-lex-syntax-table
)))
1073 (define-overloadable-function semantic-lex
(start end
&optional depth length
)
1074 "Lexically analyze text in the current buffer between START and END.
1075 Optional argument DEPTH indicates at what level to scan over entire
1076 lists. The last argument, LENGTH specifies that `semantic-lex'
1077 should only return LENGTH tokens. The return value is a token stream.
1078 Each element is a list, such of the form
1079 (symbol start-expression . end-expression)
1080 where SYMBOL denotes the token type.
1081 See `semantic-lex-tokens' variable for details on token types. END
1082 does not mark the end of the text scanned, only the end of the
1083 beginning of text scanned. Thus, if a string extends past END, the
1084 end of the return token will be larger than END. To truly restrict
1085 scanning, use `narrow-to-region'."
1086 (funcall semantic-lex-analyzer start end depth length
))
1088 (defsubst semantic-lex-buffer
(&optional depth
)
1089 "Lex the current buffer.
1090 Optional argument DEPTH is the depth to scan into lists."
1091 (semantic-lex (point-min) (point-max) depth
))
1093 (defsubst semantic-lex-list
(semlist depth
)
1094 "Lex the body of SEMLIST to DEPTH."
1095 (semantic-lex (semantic-lex-token-start semlist
)
1096 (semantic-lex-token-end semlist
)
1099 ;;; Analyzer creation macros
1101 ;; An individual analyzer is a condition and code that goes with it.
1103 ;; Created analyzers become variables with the code associated with them
1104 ;; as the symbol value. These analyzers are assembled into a lexer
1105 ;; to create new lexical analyzers.
1107 (defcustom semantic-lex-debug-analyzers nil
1108 "Non nil means to debug analyzers with syntax protection.
1109 Only in effect if `debug-on-error' is also non-nil."
1113 (defmacro semantic-lex-unterminated-syntax-protection
(syntax &rest forms
)
1114 "For SYNTAX, execute FORMS with protection for unterminated syntax.
1115 If FORMS throws an error, treat this as a syntax problem, and
1116 execute the unterminated syntax code. FORMS should return a position.
1117 Irreguardless of an error, the cursor should be moved to the end of
1118 the desired syntax, and a position returned.
1119 If `debug-on-error' is set, errors are not caught, so that you can
1121 Avoid using a large FORMS since it is duplicated."
1122 `(if (and debug-on-error semantic-lex-debug-analyzers
)
1127 (semantic-lex-unterminated-syntax-detected ,syntax
)))))
1128 (put 'semantic-lex-unterminated-syntax-protection
1129 'lisp-indent-function
1)
1131 (defmacro define-lex-analyzer
(name doc condition
&rest forms
)
1132 "Create a single lexical analyzer NAME with DOC.
1133 When an analyzer is called, the current buffer and point are
1134 positioned in a buffer at the location to be analyzed.
1135 CONDITION is an expression which returns t if FORMS should be run.
1136 Within the bounds of CONDITION and FORMS, the use of backquote
1137 can be used to evaluate expressions at compile time.
1138 While forms are running, the following variables will be locally bound:
1139 `semantic-lex-analysis-bounds' - The bounds of the current analysis.
1140 of the form (START . END)
1141 `semantic-lex-maximum-depth' - The maximum depth of semantic-list
1142 for the current analysis.
1143 `semantic-lex-current-depth' - The current depth of `semantic-list' that has
1145 `semantic-lex-end-point' - End Point after match.
1146 Analyzers should set this to a buffer location if their
1147 match string does not represent the end of the matched text.
1148 `semantic-lex-token-stream' - The token list being collected.
1149 Add new lexical tokens to this list.
1150 Proper action in FORMS is to move the value of `semantic-lex-end-point' to
1151 after the location of the analyzed entry, and to add any discovered tokens
1152 at the beginning of `semantic-lex-token-stream'.
1153 This can be done by using `semantic-lex-push-token'."
1155 (defvar ,name nil
,doc
)
1157 ;; Do this part separately so that re-evaluation rebuilds this code.
1158 (setq ,name
'(,condition
,@forms
))
1159 ;; Build a single lexical analyzer function, so the doc for
1160 ;; function help is automatically provided, and perhaps the
1161 ;; function could be useful for testing and debugging one
1163 (fset ',name
(lambda () ,doc
1164 (let ((semantic-lex-token-stream nil
)
1165 (semantic-lex-end-point (point))
1166 (semantic-lex-analysis-bounds
1167 (cons (point) (point-max)))
1168 (semantic-lex-current-depth 0)
1169 (semantic-lex-maximum-depth
1172 (when ,condition
,@forms
)
1173 semantic-lex-token-stream
)))
1176 (defmacro define-lex-regex-analyzer
(name doc regexp
&rest forms
)
1177 "Create a lexical analyzer with NAME and DOC that will match REGEXP.
1178 FORMS are evaluated upon a successful match.
1179 See `define-lex-analyzer' for more about analyzers."
1180 `(define-lex-analyzer ,name
1182 (looking-at ,regexp
)
1186 (defmacro define-lex-simple-regex-analyzer
(name doc regexp toksym
1189 "Create a lexical analyzer with NAME and DOC that match REGEXP.
1190 TOKSYM is the symbol to use when creating a semantic lexical token.
1191 INDEX is the index into the match that defines the bounds of the token.
1192 Index should be a plain integer, and not specified in the macro as an
1194 FORMS are evaluated upon a successful match BEFORE the new token is
1195 created. It is valid to ignore FORMS.
1196 See `define-lex-analyzer' for more about analyzers."
1197 `(define-lex-analyzer ,name
1199 (looking-at ,regexp
)
1201 (semantic-lex-push-token
1202 (semantic-lex-token ,toksym
1203 (match-beginning ,(or index
0))
1204 (match-end ,(or index
0))))
1207 (defmacro define-lex-block-analyzer
(name doc spec1
&rest specs
)
1208 "Create a lexical analyzer NAME for paired delimiters blocks.
1209 It detects a paired delimiters block or the corresponding open or
1210 close delimiter depending on the value of the variable
1211 `semantic-lex-current-depth'. DOC is the documentation string of the lexical
1212 analyzer. SPEC1 and SPECS specify the token symbols and open, close
1213 delimiters used. Each SPEC has the form:
1215 \(BLOCK-SYM (OPEN-DELIM OPEN-SYM) (CLOSE-DELIM CLOSE-SYM))
1217 where BLOCK-SYM is the symbol returned in a block token. OPEN-DELIM
1218 and CLOSE-DELIM are respectively the open and close delimiters
1219 identifying a block. OPEN-SYM and CLOSE-SYM are respectively the
1220 symbols returned in open and close tokens."
1221 (let ((specs (cons spec1 specs
))
1222 spec open olist clist
)
1224 (setq spec
(car specs
)
1227 ;; build alist ((OPEN-DELIM OPEN-SYM BLOCK-SYM) ...)
1228 olist
(cons (list (car open
) (cadr open
) (car spec
)) olist
)
1229 ;; build alist ((CLOSE-DELIM CLOSE-SYM) ...)
1230 clist
(cons (nth 2 spec
) clist
)))
1231 `(define-lex-analyzer ,name
1234 (looking-at "\\(\\s(\\|\\s)\\)")
1235 (let ((text (match-string 0)) match
)
1237 ((setq match
(assoc text
',olist
))
1238 (if (or (not semantic-lex-maximum-depth
)
1239 (< semantic-lex-current-depth semantic-lex-maximum-depth
))
1241 (setq semantic-lex-current-depth
(1+ semantic-lex-current-depth
))
1242 (semantic-lex-push-token
1245 (match-beginning 0) (match-end 0))))
1246 (semantic-lex-push-token
1251 (semantic-lex-unterminated-syntax-protection (nth 2 match
)
1256 ((setq match
(assoc text
',clist
))
1257 (setq semantic-lex-current-depth
(1- semantic-lex-current-depth
))
1258 (semantic-lex-push-token
1261 (match-beginning 0) (match-end 0)))))))
1266 ;; Pre-defined common analyzers.
1268 (define-lex-analyzer semantic-lex-default-action
1269 "The default action when no other lexical actions match text.
1270 This action will just throw an error."
1272 (error "Unmatched Text during Lexical Analysis"))
1274 (define-lex-analyzer semantic-lex-beginning-of-line
1275 "Detect and create a beginning of line token (BOL)."
1277 ;; Just insert a (bol N . N) token in the token stream,
1278 ;; without moving the point. N is the point at the
1279 ;; beginning of line.
1280 (semantic-lex-push-token (semantic-lex-token 'bol
(point) (point)))
1282 ;; We identify and add the BOL token onto the stream, but since
1283 ;; semantic-lex-end-point doesn't move, we always fail CONDITION, and have no
1287 (define-lex-simple-regex-analyzer semantic-lex-newline
1288 "Detect and create newline tokens."
1289 "\\s-*\\(\n\\|\\s>\\)" 'newline
1)
1291 (define-lex-regex-analyzer semantic-lex-newline-as-whitespace
1292 "Detect and create newline tokens.
1293 Use this ONLY if newlines are not whitespace characters (such as when
1294 they are comment end characters) AND when you want whitespace tokens."
1295 "\\s-*\\(\n\\|\\s>\\)"
1296 ;; Language wants whitespaces. Create a token for it.
1297 (if (eq (semantic-lex-token-class (car semantic-lex-token-stream
))
1299 ;; Merge whitespace tokens together if they are adjacent. Two
1300 ;; whitespace tokens may be sperated by a comment which is not in
1301 ;; the token stream.
1302 (setcdr (semantic-lex-token-bounds (car semantic-lex-token-stream
))
1304 (semantic-lex-push-token
1306 'whitespace
(match-beginning 0) (match-end 0)))))
1308 (define-lex-regex-analyzer semantic-lex-ignore-newline
1309 "Detect and ignore newline tokens.
1310 Use this ONLY if newlines are not whitespace characters (such as when
1311 they are comment end characters)."
1312 "\\s-*\\(\n\\|\\s>\\)"
1313 (setq semantic-lex-end-point
(match-end 0)))
1315 (define-lex-regex-analyzer semantic-lex-whitespace
1316 "Detect and create whitespace tokens."
1317 ;; catch whitespace when needed
1319 ;; Language wants whitespaces. Create a token for it.
1320 (if (eq (semantic-lex-token-class (car semantic-lex-token-stream
))
1322 ;; Merge whitespace tokens together if they are adjacent. Two
1323 ;; whitespace tokens may be sperated by a comment which is not in
1324 ;; the token stream.
1326 (setq semantic-lex-end-point
(match-end 0))
1327 (setcdr (semantic-lex-token-bounds (car semantic-lex-token-stream
))
1328 semantic-lex-end-point
))
1329 (semantic-lex-push-token
1331 'whitespace
(match-beginning 0) (match-end 0)))))
1333 (define-lex-regex-analyzer semantic-lex-ignore-whitespace
1334 "Detect and skip over whitespace tokens."
1335 ;; catch whitespace when needed
1337 ;; Skip over the detected whitespace, do not create a token for it.
1338 (setq semantic-lex-end-point
(match-end 0)))
1340 (define-lex-simple-regex-analyzer semantic-lex-number
1341 "Detect and create number tokens.
1342 See `semantic-lex-number-expression' for details on matching numbers,
1343 and number formats."
1344 semantic-lex-number-expression
'number
)
1346 (define-lex-regex-analyzer semantic-lex-symbol-or-keyword
1347 "Detect and create symbol and keyword tokens."
1348 "\\(\\sw\\|\\s_\\)+"
1349 (semantic-lex-push-token
1351 (or (semantic-lex-keyword-p (match-string 0)) 'symbol
)
1352 (match-beginning 0) (match-end 0))))
1354 (define-lex-simple-regex-analyzer semantic-lex-charquote
1355 "Detect and create charquote tokens."
1356 ;; Character quoting characters (ie, \n as newline)
1357 "\\s\\+" 'charquote
)
1359 (define-lex-simple-regex-analyzer semantic-lex-punctuation
1360 "Detect and create punctuation tokens."
1361 "\\(\\s.\\|\\s$\\|\\s'\\)" 'punctuation
)
1363 (define-lex-analyzer semantic-lex-punctuation-type
1364 "Detect and create a punctuation type token.
1365 Recognized punctuations are defined in the current table of lexical
1366 types, as the value of the `punctuation' token type."
1367 (and (looking-at "\\(\\s.\\|\\s$\\|\\s'\\)+")
1368 (let* ((key (match-string 0))
1369 (pos (match-beginning 0))
1372 (lst (semantic-lex-type-value "punctuation" t
))
1373 (def (car lst
)) ;; default lexical symbol or nil
1374 (lst (cdr lst
)) ;; alist of (LEX-SYM . PUNCT-STRING)
1377 ;; Starting with the longest one, search if the
1378 ;; punctuation string is defined for this language.
1379 (while (and (> len
0) (not (setq elt
(rassoc key lst
))))
1381 key
(substring key
0 len
))))
1382 (if elt
;; Return the punctuation token found
1383 (semantic-lex-push-token
1384 (semantic-lex-token (car elt
) pos
(+ pos len
)))
1385 (if def
;; Return a default generic token
1386 (semantic-lex-push-token
1387 (semantic-lex-token def pos end
))
1391 (define-lex-regex-analyzer semantic-lex-paren-or-list
1392 "Detect open parenthesis.
1393 Return either a paren token or a semantic list token depending on
1394 `semantic-lex-current-depth'."
1396 (if (or (not semantic-lex-maximum-depth
)
1397 (< semantic-lex-current-depth semantic-lex-maximum-depth
))
1399 (setq semantic-lex-current-depth
(1+ semantic-lex-current-depth
))
1400 (semantic-lex-push-token
1402 'open-paren
(match-beginning 0) (match-end 0))))
1403 (semantic-lex-push-token
1405 'semantic-list
(match-beginning 0)
1407 (semantic-lex-unterminated-syntax-protection 'semantic-list
1413 (define-lex-simple-regex-analyzer semantic-lex-open-paren
1414 "Detect and create an open parenthisis token."
1415 "\\s(" 'open-paren
0 (setq semantic-lex-current-depth
(1+ semantic-lex-current-depth
)))
1417 (define-lex-simple-regex-analyzer semantic-lex-close-paren
1418 "Detect and create a close paren token."
1419 "\\s)" 'close-paren
0 (setq semantic-lex-current-depth
(1- semantic-lex-current-depth
)))
1421 (define-lex-regex-analyzer semantic-lex-string
1422 "Detect and create a string token."
1424 ;; Zing to the end of this string.
1425 (semantic-lex-push-token
1429 (semantic-lex-unterminated-syntax-protection 'string
1434 (define-lex-regex-analyzer semantic-lex-comments
1435 "Detect and create a comment token."
1436 semantic-lex-comment-regex
1439 ;; Generate newline token if enabled
1440 (if (bolp) (backward-char 1))
1441 (setq semantic-lex-end-point
(point))
1442 ;; Language wants comments or want them as whitespaces,
1443 ;; link them together.
1444 (if (eq (semantic-lex-token-class (car semantic-lex-token-stream
)) 'comment
)
1445 (setcdr (semantic-lex-token-bounds (car semantic-lex-token-stream
))
1446 semantic-lex-end-point
)
1447 (semantic-lex-push-token
1449 'comment
(match-beginning 0) semantic-lex-end-point
)))))
1451 (define-lex-regex-analyzer semantic-lex-comments-as-whitespace
1452 "Detect comments and create a whitespace token."
1453 semantic-lex-comment-regex
1456 ;; Generate newline token if enabled
1457 (if (bolp) (backward-char 1))
1458 (setq semantic-lex-end-point
(point))
1459 ;; Language wants comments or want them as whitespaces,
1460 ;; link them together.
1461 (if (eq (semantic-lex-token-class (car semantic-lex-token-stream
)) 'whitespace
)
1462 (setcdr (semantic-lex-token-bounds (car semantic-lex-token-stream
))
1463 semantic-lex-end-point
)
1464 (semantic-lex-push-token
1466 'whitespace
(match-beginning 0) semantic-lex-end-point
)))))
1468 (define-lex-regex-analyzer semantic-lex-ignore-comments
1469 "Detect and create a comment token."
1470 semantic-lex-comment-regex
1471 (let ((comment-start-point (point)))
1473 (if (eq (point) comment-start-point
)
1474 ;; In this case our start-skip string failed
1475 ;; to work properly. Lets try and move over
1476 ;; whatever white space we matched to begin
1478 (skip-syntax-forward "-.'"
1482 ;; We may need to back up so newlines or whitespace is generated.
1485 (if (eq (point) comment-start-point
)
1486 (error "Strange comment syntax prevents lexical analysis"))
1487 (setq semantic-lex-end-point
(point))))
1491 ;; Predefined lexers that could be used instead of creating new
1494 (define-lex semantic-comment-lexer
1495 "A simple lexical analyzer that handles comments.
1496 This lexer will only return comment tokens. It is the default lexer
1497 used by `semantic-find-doc-snarf-comment' to snarf up the comment at
1499 semantic-lex-ignore-whitespace
1500 semantic-lex-ignore-newline
1501 semantic-lex-comments
1502 semantic-lex-default-action
)
1506 (define-lex semantic-simple-lexer
1507 "A simple lexical analyzer that handles simple buffers.
1508 This lexer ignores comments and whitespace, and will return
1509 syntax as specified by the syntax table."
1510 semantic-lex-ignore-whitespace
1511 semantic-lex-ignore-newline
1513 semantic-lex-symbol-or-keyword
1514 semantic-lex-charquote
1515 semantic-lex-paren-or-list
1516 semantic-lex-close-paren
1518 semantic-lex-ignore-comments
1519 semantic-lex-punctuation
1520 semantic-lex-default-action
)
1522 ;;; Analyzers generated from grammar.
1524 ;; Some analyzers are hand written. Analyzers created with these
1525 ;; functions are generated from the grammar files.
1527 (defmacro define-lex-keyword-type-analyzer
(name doc syntax
)
1528 "Define a keyword type analyzer NAME with DOC string.
1529 SYNTAX is the regexp that matches a keyword syntactic expression."
1530 (let ((key (make-symbol "key")))
1531 `(define-lex-analyzer ,name
1533 (and (looking-at ,syntax
)
1534 (let ((,key
(semantic-lex-keyword-p (match-string 0))))
1536 (semantic-lex-push-token
1538 ,key
(match-beginning 0) (match-end 0)))))))
1541 (defmacro define-lex-sexp-type-analyzer
(name doc syntax token
)
1542 "Define a sexp type analyzer NAME with DOC string.
1543 SYNTAX is the regexp that matches the beginning of the s-expression.
1544 TOKEN is the lexical token returned when SYNTAX matches."
1545 `(define-lex-regex-analyzer ,name
1548 (semantic-lex-push-token
1552 (semantic-lex-unterminated-syntax-protection ,token
1557 (defmacro define-lex-regex-type-analyzer
(name doc syntax matches default
)
1558 "Define a regexp type analyzer NAME with DOC string.
1559 SYNTAX is the regexp that matches a syntactic expression.
1560 MATCHES is an alist of lexical elements used to refine the syntactic
1562 DEFAULT is the default lexical token returned when no MATCHES."
1564 (let* ((val (make-symbol "val"))
1565 (lst (make-symbol "lst"))
1566 (elt (make-symbol "elt"))
1567 (pos (make-symbol "pos"))
1568 (end (make-symbol "end")))
1569 `(define-lex-analyzer ,name
1571 (and (looking-at ,syntax
)
1572 (let* ((,val
(match-string 0))
1573 (,pos
(match-beginning 0))
1574 (,end
(match-end 0))
1577 (while (and ,lst
(not ,elt
))
1578 (if (string-match (cdar ,lst
) ,val
)
1579 (setq ,elt
(caar ,lst
))
1580 (setq ,lst
(cdr ,lst
))))
1581 (semantic-lex-push-token
1582 (semantic-lex-token (or ,elt
,default
) ,pos
,end
))))
1584 `(define-lex-simple-regex-analyzer ,name
1589 (defmacro define-lex-string-type-analyzer
(name doc syntax matches default
)
1590 "Define a string type analyzer NAME with DOC string.
1591 SYNTAX is the regexp that matches a syntactic expression.
1592 MATCHES is an alist of lexical elements used to refine the syntactic
1594 DEFAULT is the default lexical token returned when no MATCHES."
1596 (let* ((val (make-symbol "val"))
1597 (lst (make-symbol "lst"))
1598 (elt (make-symbol "elt"))
1599 (pos (make-symbol "pos"))
1600 (end (make-symbol "end"))
1601 (len (make-symbol "len")))
1602 `(define-lex-analyzer ,name
1604 (and (looking-at ,syntax
)
1605 (let* ((,val
(match-string 0))
1606 (,pos
(match-beginning 0))
1607 (,end
(match-end 0))
1608 (,len
(- ,end
,pos
))
1611 ;; Starting with the longest one, search if a lexical
1612 ;; value match a token defined for this language.
1613 (while (and (> ,len
0) (not (setq ,elt
(rassoc ,val
,lst
))))
1614 (setq ,len
(1- ,len
)
1615 ,val
(substring ,val
0 ,len
)))
1616 (when ,elt
;; Adjust token end position.
1617 (setq ,elt
(car ,elt
)
1618 ,end
(+ ,pos
,len
)))
1619 (semantic-lex-push-token
1620 (semantic-lex-token (or ,elt
,default
) ,pos
,end
))))
1622 `(define-lex-simple-regex-analyzer ,name
1627 (defmacro define-lex-block-type-analyzer
(name doc syntax matches
)
1628 "Define a block type analyzer NAME with DOC string.
1630 SYNTAX is the regexp that matches block delimiters, typically the
1631 open (`\\\\s(') and close (`\\\\s)') parenthesis syntax classes.
1633 MATCHES is a pair (OPEN-SPECS . CLOSE-SPECS) that defines blocks.
1635 OPEN-SPECS is a list of (OPEN-DELIM OPEN-TOKEN BLOCK-TOKEN) elements
1638 OPEN-DELIM is a string: the block open delimiter character.
1640 OPEN-TOKEN is the lexical token class associated to the OPEN-DELIM
1643 BLOCK-TOKEN is the lexical token class associated to the block
1644 that starts at the OPEN-DELIM delimiter.
1646 CLOSE-SPECS is a list of (CLOSE-DELIM CLOSE-TOKEN) elements where:
1648 CLOSE-DELIM is a string: the block end delimiter character.
1650 CLOSE-TOKEN is the lexical token class associated to the
1651 CLOSE-DELIM delimiter.
1653 Each element in OPEN-SPECS must have a corresponding element in
1656 The lexer will return a BLOCK-TOKEN token when the value of
1657 `semantic-lex-current-depth' is greater than or equal to the maximum
1658 depth of parenthesis tracking (see also the function `semantic-lex').
1659 Otherwise it will return OPEN-TOKEN and CLOSE-TOKEN tokens.
1661 TO DO: Put the following in the developer's guide and just put a
1666 The value of a block token must be a string that contains a readable
1669 \"(OPEN-TOKEN CLOSE-TOKEN)\"
1671 OPEN-TOKEN and CLOSE-TOKEN represent the block delimiters, and must be
1672 lexical tokens of respectively `open-paren' and `close-paren' types.
1673 Their value is the corresponding delimiter character as a string.
1675 Here is a small example to analyze a parenthesis block:
1677 %token <block> PAREN_BLOCK \"(LPAREN RPAREN)\"
1678 %token <open-paren> LPAREN \"(\"
1679 %token <close-paren> RPAREN \")\"
1681 When the lexer encounters the open-paren delimiter \"(\":
1683 - If the maximum depth of parenthesis tracking is not reached (that
1684 is, current depth < max depth), it returns a (LPAREN start . end)
1685 token, then continue analysis inside the block. Later, when the
1686 corresponding close-paren delimiter \")\" will be encountered, it
1687 will return a (RPAREN start . end) token.
1689 - If the maximum depth of parenthesis tracking is reached (current
1690 depth >= max depth), it returns the whole parenthesis block as
1691 a (PAREN_BLOCK start . end) token."
1692 (let* ((val (make-symbol "val"))
1693 (lst (make-symbol "lst"))
1694 (elt (make-symbol "elt")))
1695 `(define-lex-analyzer ,name
1698 (looking-at ,syntax
) ;; "\\(\\s(\\|\\s)\\)"
1699 (let ((,val
(match-string 0))
1703 ((setq ,elt
(assoc ,val
(car ,lst
)))
1704 (if (or (not semantic-lex-maximum-depth
)
1705 (< semantic-lex-current-depth semantic-lex-maximum-depth
))
1707 (setq semantic-lex-current-depth
(1+ semantic-lex-current-depth
))
1708 (semantic-lex-push-token
1711 (match-beginning 0) (match-end 0))))
1712 (semantic-lex-push-token
1717 (semantic-lex-unterminated-syntax-protection (nth 2 ,elt
)
1720 ((setq ,elt
(assoc ,val
(cdr ,lst
)))
1721 (setq semantic-lex-current-depth
(1- semantic-lex-current-depth
))
1722 (semantic-lex-push-token
1725 (match-beginning 0) (match-end 0))))
1731 ;; The semantic lexers, unlike other lexers, can throw errors on
1732 ;; unbalanced syntax. Since editing is all about changeging test
1733 ;; we need to provide a convenient way to protect against syntactic
1736 (defmacro semantic-lex-catch-errors
(symbol &rest forms
)
1737 "Using SYMBOL, execute FORMS catching lexical errors.
1738 If FORMS results in a call to the parser that throws a lexical error,
1739 the error will be caught here without the buffer's cache being thrown
1741 If there is an error, the syntax that failed is returned.
1742 If there is no error, then the last value of FORMS is returned."
1743 (let ((ret (make-symbol "ret"))
1744 (syntax (make-symbol "syntax"))
1745 (start (make-symbol "start"))
1746 (end (make-symbol "end")))
1747 `(let* ((semantic-lex-unterminated-syntax-end-function
1748 (lambda (,syntax
,start
,end
)
1749 (throw ',symbol
,syntax
)))
1750 ;; Delete the below when semantic-flex is fully retired.
1751 (semantic-flex-unterminated-syntax-end-function
1752 semantic-lex-unterminated-syntax-end-function
)
1753 (,ret
(catch ',symbol
1757 ;; Great Sadness. Assume that FORMS execute within the
1758 ;; confines of the current buffer only! Mark this thing
1759 ;; unparseable iff the special symbol was thrown. This
1760 ;; will prevent future calls from parsing, but will allow
1761 ;; then to still return the cache.
1763 ;; Leave this message off. If an APP using this fcn wants
1764 ;; a message, they can do it themselves. This cleans up
1765 ;; problems with the idle scheduler obscuring useful data.
1766 ;;(message "Buffer not currently parsable (%S)." ,ret)
1767 (semantic-parse-tree-unparseable))
1769 (put 'semantic-lex-catch-errors
'lisp-indent-function
1)
1772 ;;; Interfacing with edebug
1778 (def-edebug-spec define-lex
1779 (&define name stringp
(&rest symbolp
))
1781 (def-edebug-spec define-lex-analyzer
1782 (&define name stringp form def-body
)
1784 (def-edebug-spec define-lex-regex-analyzer
1785 (&define name stringp form def-body
)
1787 (def-edebug-spec define-lex-simple-regex-analyzer
1788 (&define name stringp form symbolp
[ &optional form
] def-body
)
1790 (def-edebug-spec define-lex-block-analyzer
1791 (&define name stringp form
(&rest form
))
1793 (def-edebug-spec semantic-lex-catch-errors
1799 ;;; Compatibility with Semantic 1.x lexical analysis
1801 ;; NOTE: DELETE THIS SOMEDAY SOON
1803 (semantic-alias-obsolete 'semantic-flex-start
'semantic-lex-token-start
)
1804 (semantic-alias-obsolete 'semantic-flex-end
'semantic-lex-token-end
)
1805 (semantic-alias-obsolete 'semantic-flex-text
'semantic-lex-token-text
)
1806 (semantic-alias-obsolete 'semantic-flex-make-keyword-table
'semantic-lex-make-keyword-table
)
1807 (semantic-alias-obsolete 'semantic-flex-keyword-p
'semantic-lex-keyword-p
)
1808 (semantic-alias-obsolete 'semantic-flex-keyword-put
'semantic-lex-keyword-put
)
1809 (semantic-alias-obsolete 'semantic-flex-keyword-get
'semantic-lex-keyword-get
)
1810 (semantic-alias-obsolete 'semantic-flex-map-keywords
'semantic-lex-map-keywords
)
1811 (semantic-alias-obsolete 'semantic-flex-keywords
'semantic-lex-keywords
)
1812 (semantic-alias-obsolete 'semantic-flex-buffer
'semantic-lex-buffer
)
1813 (semantic-alias-obsolete 'semantic-flex-list
'semantic-lex-list
)
1815 ;; This simple scanner uses the syntax table to generate a stream of
1816 ;; simple tokens of the form:
1818 ;; (SYMBOL START . END)
1820 ;; Where symbol is the type of thing it is. START and END mark that
1821 ;; objects boundary.
1823 (defvar semantic-flex-tokens semantic-lex-tokens
1824 "An alist of of semantic token types.
1825 See variable `semantic-lex-tokens'.")
1827 (defvar semantic-flex-unterminated-syntax-end-function
1828 (lambda (syntax syntax-start flex-end
) flex-end
)
1829 "Function called when unterminated syntax is encountered.
1830 This should be set to one function. That function should take three
1831 parameters. The SYNTAX, or type of syntax which is unterminated.
1832 SYNTAX-START where the broken syntax begins.
1833 FLEX-END is where the lexical analysis was asked to end.
1834 This function can be used for languages that can intelligently fix up
1835 broken syntax, or the exit lexical analysis via `throw' or `signal'
1836 when finding unterminated syntax.")
1838 (defvar semantic-flex-extensions nil
1839 "Buffer local extensions to the lexical analyzer.
1840 This should contain an alist with a key of a regex and a data element of
1841 a function. The function should both move point, and return a lexical
1844 nil is also a valid return value.
1845 TYPE can be any type of symbol, as long as it doesn't occur as a
1846 nonterminal in the language definition.")
1847 (make-variable-buffer-local 'semantic-flex-extensions
)
1849 (defvar semantic-flex-syntax-modifications nil
1850 "Changes to the syntax table for this buffer.
1851 These changes are active only while the buffer is being flexed.
1852 This is a list where each element has the form:
1854 CHAR is the char passed to `modify-syntax-entry',
1855 and CLASS is the string also passed to `modify-syntax-entry' to define
1856 what syntax class CHAR has.")
1857 (make-variable-buffer-local 'semantic-flex-syntax-modifications
)
1859 (defvar semantic-ignore-comments t
1860 "Default comment handling.
1861 t means to strip comments when flexing. Nil means to keep comments
1862 as part of the token stream.")
1863 (make-variable-buffer-local 'semantic-ignore-comments
)
1865 (defvar semantic-flex-enable-newlines nil
1866 "When flexing, report 'newlines as syntactic elements.
1867 Useful for languages where the newline is a special case terminator.
1868 Only set this on a per mode basis, not globally.")
1869 (make-variable-buffer-local 'semantic-flex-enable-newlines
)
1871 (defvar semantic-flex-enable-whitespace nil
1872 "When flexing, report 'whitespace as syntactic elements.
1873 Useful for languages where the syntax is whitespace dependent.
1874 Only set this on a per mode basis, not globally.")
1875 (make-variable-buffer-local 'semantic-flex-enable-whitespace
)
1877 (defvar semantic-flex-enable-bol nil
1878 "When flexing, report beginning of lines as syntactic elements.
1879 Useful for languages like python which are indentation sensitive.
1880 Only set this on a per mode basis, not globally.")
1881 (make-variable-buffer-local 'semantic-flex-enable-bol
)
1883 (defvar semantic-number-expression semantic-lex-number-expression
1884 "See variable `semantic-lex-number-expression'.")
1885 (make-variable-buffer-local 'semantic-number-expression
)
1887 (defvar semantic-flex-depth
0
1888 "Default flexing depth.
1889 This specifies how many lists to create tokens in.")
1890 (make-variable-buffer-local 'semantic-flex-depth
)
1892 (defun semantic-flex (start end
&optional depth length
)
1893 "Using the syntax table, do something roughly equivalent to flex.
1894 Semantically check between START and END. Optional argument DEPTH
1895 indicates at what level to scan over entire lists.
1896 The return value is a token stream. Each element is a list, such of
1897 the form (symbol start-expression . end-expression) where SYMBOL
1898 denotes the token type.
1899 See `semantic-flex-tokens' variable for details on token types.
1900 END does not mark the end of the text scanned, only the end of the
1901 beginning of text scanned. Thus, if a string extends past END, the
1902 end of the return token will be larger than END. To truly restrict
1903 scanning, use `narrow-to-region'.
1904 The last argument, LENGTH specifies that `semantic-flex' should only
1905 return LENGTH tokens."
1906 (message "`semantic-flex' is an obsolete function. Use `define-lex' to create lexers.")
1907 (if (not semantic-flex-keywords-obarray
)
1908 (setq semantic-flex-keywords-obarray
[ nil
]))
1913 (cs (if comment-start-skip
1914 (concat "\\(\\s<\\|" comment-start-skip
"\\)")
1915 (concat "\\(\\s<\\)")))
1916 (newsyntax (copy-syntax-table (syntax-table)))
1917 (mods semantic-flex-syntax-modifications
)
1918 ;; Use the default depth if it is not specified.
1919 (depth (or depth semantic-flex-depth
)))
1920 ;; Update the syntax table
1922 (modify-syntax-entry (car (car mods
)) (car (cdr (car mods
))) newsyntax
)
1923 (setq mods
(cdr mods
)))
1924 (with-syntax-table newsyntax
1926 (while (and (< (point) end
) (or (not length
) (<= (length ts
) length
)))
1928 ;; catch beginning of lines when needed.
1929 ;; Must be done before catching any other tokens!
1930 ((and semantic-flex-enable-bol
1932 ;; Just insert a (bol N . N) token in the token stream,
1933 ;; without moving the point. N is the point at the
1934 ;; beginning of line.
1935 (setq ts
(cons (cons 'bol
(cons (point) (point))) ts
))
1937 ;; special extensions, includes whitespace, nl, etc.
1938 ((and semantic-flex-extensions
1939 (let ((fe semantic-flex-extensions
)
1942 (if (looking-at (car (car fe
)))
1943 (setq ts
(cons (funcall (cdr (car fe
))) ts
)
1948 (if (and r
(not (car ts
))) (setq ts
(cdr ts
)))
1950 ;; catch newlines when needed
1951 ((looking-at "\\s-*\\(\n\\|\\s>\\)")
1952 (if semantic-flex-enable-newlines
1953 (setq ep
(match-end 1)
1954 ts
(cons (cons 'newline
1955 (cons (match-beginning 1) ep
))
1957 ;; catch whitespace when needed
1958 ((looking-at "\\s-+")
1959 (if semantic-flex-enable-whitespace
1960 ;; Language wants whitespaces, link them together.
1961 (if (eq (car (car ts
)) 'whitespace
)
1962 (setcdr (cdr (car ts
)) (match-end 0))
1963 (setq ts
(cons (cons 'whitespace
1964 (cons (match-beginning 0)
1968 ((and semantic-number-expression
1969 (looking-at semantic-number-expression
))
1970 (setq ts
(cons (cons 'number
1971 (cons (match-beginning 0)
1975 ((looking-at "\\(\\sw\\|\\s_\\)+")
1976 (setq ts
(cons (cons
1977 ;; Get info on if this is a keyword or not
1978 (or (semantic-flex-keyword-p (match-string 0))
1980 (cons (match-beginning 0) (match-end 0)))
1982 ;; Character quoting characters (ie, \n as newline)
1983 ((looking-at "\\s\\+")
1984 (setq ts
(cons (cons 'charquote
1985 (cons (match-beginning 0) (match-end 0)))
1987 ;; Open parens, or semantic-lists.
1988 ((looking-at "\\s(")
1989 (if (or (not depth
) (< curdepth depth
))
1991 (setq curdepth
(1+ curdepth
))
1992 (setq ts
(cons (cons 'open-paren
1993 (cons (match-beginning 0) (match-end 0)))
1996 (cons 'semantic-list
1997 (cons (match-beginning 0)
2001 ;; This case makes flex robust
2006 semantic-flex-unterminated-syntax-end-function
2009 (setq ep
(point)))))
2012 ((looking-at "\\s)")
2013 (setq ts
(cons (cons 'close-paren
2014 (cons (match-beginning 0) (match-end 0)))
2016 (setq curdepth
(1- curdepth
)))
2017 ;; String initiators
2018 ((looking-at "\\s\"")
2019 ;; Zing to the end of this string.
2020 (setq ts
(cons (cons 'string
2021 (cons (match-beginning 0)
2025 ;; This case makes flex
2026 ;; robust to broken strings.
2030 semantic-flex-unterminated-syntax-end-function
2033 (setq ep
(point)))))
2037 (if (and semantic-ignore-comments
2038 (not semantic-flex-enable-whitespace
))
2039 ;; If the language doesn't deal with comments nor
2040 ;; whitespaces, ignore them here.
2041 (let ((comment-start-point (point)))
2043 (if (eq (point) comment-start-point
)
2044 ;; In this case our start-skip string failed
2045 ;; to work properly. Lets try and move over
2046 ;; whatever white space we matched to begin
2048 (skip-syntax-forward "-.'"
2052 ;;(forward-comment 1)
2053 ;; Generate newline token if enabled
2054 (if (and semantic-flex-enable-newlines
2057 (if (eq (point) comment-start-point
)
2058 (error "Strange comment syntax prevents lexical analysis"))
2060 (let ((tk (if semantic-ignore-comments
'whitespace
'comment
)))
2063 ;; Generate newline token if enabled
2064 (if (and semantic-flex-enable-newlines
2068 ;; Language wants comments or want them as whitespaces,
2069 ;; link them together.
2070 (if (eq (car (car ts
)) tk
)
2071 (setcdr (cdr (car ts
)) ep
)
2072 (setq ts
(cons (cons tk
(cons (match-beginning 0) ep
))
2075 ((looking-at "\\(\\s.\\|\\s$\\|\\s'\\)")
2076 (setq ts
(cons (cons 'punctuation
2077 (cons (match-beginning 0) (match-end 0)))
2081 (error "What is that?")))
2082 (goto-char (or ep
(match-end 0)))
2084 ;; maybe catch the last beginning of line when needed
2085 (and semantic-flex-enable-bol
2088 (setq ts
(cons (cons 'bol
(cons (point) (point))) ts
)))
2090 ;;(message "Flexing muscles...done")
2093 (provide 'semantic
/lex
)
2095 ;;; semantic-lex.el ends here