HCoop Git - bpt/emacs.git/blame_incremental - lisp/cedet/semantic/lex.el

... / ...

Commit	Line	Data
	1	;;; lex.el --- Lexical Analyzer builder
	2
	3	;;; Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
	4	;;; 2007, 2008, 2009 Free Software Foundation, Inc.
	5
	6	;; Author: Eric M. Ludlam <zappo@gnu.org>
	7
	8	;; This file is part of GNU Emacs.
	9
	10	;; GNU Emacs is free software: you can redistribute it and/or modify
	11	;; it under the terms of the GNU General Public License as published by
	12	;; the Free Software Foundation, either version 3 of the License, or
	13	;; (at your option) any later version.
	14
	15	;; GNU Emacs is distributed in the hope that it will be useful,
	16	;; but WITHOUT ANY WARRANTY; without even the implied warranty of
	17	;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	18	;; GNU General Public License for more details.
	19
	20	;; You should have received a copy of the GNU General Public License
	21	;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
	22
	23	;;; Commentary:
	24	;;
	25	;; This file handles the creation of lexical analyzers for different
	26	;; languages in Emacs Lisp. The purpose of a lexical analyzer is to
	27	;; convert a buffer into a list of lexical tokens. Each token
	28	;; contains the token class (such as 'number, 'symbol, 'IF, etc) and
	29	;; the location in the buffer it was found. Optionally, a token also
	30	;; contains a string representing what is at the designated buffer
	31	;; location.
	32	;;
	33	;; Tokens are pushed onto a token stream, which is basically a list of
	34	;; all the lexical tokens from the analyzed region. The token stream
	35	;; is then handed to the grammar which parsers the file.
	36	;;
	37	;;; How it works
	38	;;
	39	;; Each analyzer specifies a condition and forms. These conditions
	40	;; and forms are assembled into a function by `define-lex' that does
	41	;; the lexical analysis.
	42	;;
	43	;; In the lexical analyzer created with `define-lex', each condition
	44	;; is tested for a given point. When the conditin is true, the forms
	45	;; run.
	46	;;
	47	;; The forms can push a lexical token onto the token stream. The
	48	;; analyzer forms also must move the current analyzer point. If the
	49	;; analyzer point is moved without pushing a token, then tne matched
	50	;; syntax is effectively ignored, or skipped.
	51	;;
	52	;; Thus, starting at the beginning of a region to be analyzed, each
	53	;; condition is tested. One will match, and a lexical token might be
	54	;; pushed, and the point is moved to the end of the lexical token
	55	;; identified. At the new position, the process occurs again until
	56	;; the end of the specified region is reached.
	57	;;
	58	;;; How to use semantic-lex
	59	;;
	60	;; To create a lexer for a language, use the `define-lex' macro.
	61	;;
	62	;; The `define-lex' macro accepts a list of lexical analyzers. Each
	63	;; analyzer is created with `define-lex-analyzer', or one of the
	64	;; derivitive macros. A single analyzer defines a regular expression
	65	;; to match text in a buffer, and a short segment of code to create
	66	;; one lexical token.
	67	;;
	68	;; Each analyzer has a NAME, DOC, a CONDITION, and possibly some
	69	;; FORMS. The NAME is the name used in `define-lex'. The DOC
	70	;; describes what the analyzer should do.
	71	;;
	72	;; The CONDITION evaluates the text at the current point in the
	73	;; current buffer. If CONDITION is true, then the FORMS will be
	74	;; executed.
	75	;;
	76	;; The purpose of the FORMS is to push new lexical tokens onto the
	77	;; list of tokens for the current buffer, and to move point after the
	78	;; matched text.
	79	;;
	80	;; Some macros for creating one analyzer are:
	81	;;
	82	;; define-lex-analyzer - A generic analyzer associating any style of
	83	;; condition to forms.
	84	;; define-lex-regex-analyzer - Matches a regular expression.
	85	;; define-lex-simple-regex-analyzer - Matches a regular expressions,
	86	;; and pushes the match.
	87	;; define-lex-block-analyzer - Matches list syntax, and defines
	88	;; handles open/close delimiters.
	89	;;
	90	;; These macros are used by the grammar compiler when lexical
	91	;; information is specified in a grammar:
	92	;; define-lex- * -type-analyzer - Matches syntax specified in
	93	;; a grammar, and pushes one token for it. The * would
	94	;; be `sexp' for things like lists or strings, and
	95	;; `string' for things that need to match some special
	96	;; string, such as "\\." where a literal match is needed.
	97	;;
	98	;;; Lexical Tables
	99	;;
	100	;; There are tables of different symbols managed in semantic-lex.el.
	101	;; They are:
	102	;;
	103	;; Lexical keyword table - A Table of symbols declared in a grammar
	104	;; file with the %keyword declaration.
	105	;; Keywords are used by `semantic-lex-symbol-or-keyword'
	106	;; to create lexical tokens based on the keyword.
	107	;;
	108	;; Lexical type table - A table of symbols declared in a grammer
	109	;; file with the %type declaration.
	110	;; The grammar compiler uses the type table to create new
	111	;; lexical analyzers. These analyzers are then used to when
	112	;; a new lexical analyzer is made for a language.
	113	;;
	114	;;; Lexical Types
	115	;;
	116	;; A lexical type defines a kind of lexical analyzer that will be
	117	;; automatically generated from a grammar file based on some
	118	;; predetermined attributes. For now these two attributes are
	119	;; recognized :
	120	;;
	121	;; * matchdatatype : define the kind of lexical analyzer. That is :
	122	;;
	123	;; - regexp : define a regexp analyzer (see
	124	;; `define-lex-regex-type-analyzer')
	125	;;
	126	;; - string : define a string analyzer (see
	127	;; `define-lex-string-type-analyzer')
	128	;;
	129	;; - block : define a block type analyzer (see
	130	;; `define-lex-block-type-analyzer')
	131	;;
	132	;; - sexp : define a sexp analyzer (see
	133	;; `define-lex-sexp-type-analyzer')
	134	;;
	135	;; - keyword : define a keyword analyzer (see
	136	;; `define-lex-keyword-type-analyzer')
	137	;;
	138	;; * syntax : define the syntax that matches a syntactic
	139	;; expression. When syntax is matched the corresponding type
	140	;; analyzer is entered and the resulting match data will be
	141	;; interpreted based on the kind of analyzer (see matchdatatype
	142	;; above).
	143	;;
	144	;; The following lexical types are predefined :
	145	;;
	146	;; +-------------+---------------+--------------------------------+
	147	;; \| type \| matchdatatype \| syntax \|
	148	;; +-------------+---------------+--------------------------------+
	149	;; \| punctuation \| string \| "\$\\s.\\\|\\s$\\\|\\s'\$+" \|
	150	;; \| keyword \| keyword \| "\$\\sw\\\|\\s_\$+" \|
	151	;; \| symbol \| regexp \| "\$\\sw\\\|\\s_\$+" \|
	152	;; \| string \| sexp \| "\\s\"" \|
	153	;; \| number \| regexp \| semantic-lex-number-expression \|
	154	;; \| block \| block \| "\\s(\\\|\\s)" \|
	155	;; +-------------+---------------+--------------------------------+
	156	;;
	157	;; In a grammar you must use a %type expression to automatically generate
	158	;; the corresponding analyzers of that type.
	159	;;
	160	;; Here is an example to auto-generate punctuation analyzers
	161	;; with 'matchdatatype and 'syntax predefined (see table above)
	162	;;
	163	;; %type <punctuation> ;; will auto-generate this kind of analyzers
	164	;;
	165	;; It is equivalent to write :
	166	;;
	167	;; %type <punctuation> syntax "\$\\s.\\\|\\s$\\\|\\s'\$+" matchdatatype string
	168	;;
	169	;; ;; Some punctuations based on the type defines above
	170	;;
	171	;; %token <punctuation> NOT "!"
	172	;; %token <punctuation> NOTEQ "!="
	173	;; %token <punctuation> MOD "%"
	174	;; %token <punctuation> MODEQ "%="
	175	;;
	176
	177	;;; On the Semantic 1.x lexer
	178	;;
	179	;; In semantic 1.x, the lexical analyzer was an all purpose routine.
	180	;; To boost efficiency, the analyzer is now a series of routines that
	181	;; are constructed at build time into a single routine. This will
	182	;; eliminate unneeded if statements to speed the lexer.
	183
	184	(require 'semantic/fw)
	185	;;; Code:
	186
	187	;;; Compatibility
	188	;;
	189	(eval-and-compile
	190	(if (not (fboundp 'with-syntax-table))
	191
	192	;; Copied from Emacs 21 for compatibility with released Emacses.
	193	(defmacro with-syntax-table (table &rest body)
	194	"With syntax table of current buffer set to a copy of TABLE, evaluate BODY.
	195	The syntax table of the current buffer is saved, BODY is evaluated, and the
	196	saved table is restored, even in case of an abnormal exit.
	197	Value is what BODY returns."
	198	(let ((old-table (make-symbol "table"))
	199	(old-buffer (make-symbol "buffer")))
	200	`(let ((,old-table (syntax-table))
	201	(,old-buffer (current-buffer)))
	202	(unwind-protect
	203	(progn
	204	(set-syntax-table (copy-syntax-table ,table))
	205	,@body)
	206	(save-current-buffer
	207	(set-buffer ,old-buffer)
	208	(set-syntax-table ,old-table))))))
	209
	210	))
	211	\f
	212	;;; Semantic 2.x lexical analysis
	213	;;
	214	(defun semantic-lex-map-symbols (fun table &optional property)
	215	"Call function FUN on every symbol in TABLE.
	216	If optional PROPERTY is non-nil, call FUN only on every symbol which
	217	as a PROPERTY value. FUN receives a symbol as argument."
	218	(if (arrayp table)
	219	(mapatoms
	220	#'(lambda (symbol)
	221	(if (or (null property) (get symbol property))
	222	(funcall fun symbol)))
	223	table)))
	224
	225	;;; Lexical keyword table handling.
	226	;;
	227	;; These keywords are keywords defined for using in a grammar with the
	228	;; %keyword declaration, and are not keywords used in Emacs Lisp.
	229
	230	(defvar semantic-flex-keywords-obarray nil
	231	"Buffer local keyword obarray for the lexical analyzer.
	232	These keywords are matched explicitly, and converted into special symbols.")
	233	(make-variable-buffer-local 'semantic-flex-keywords-obarray)
	234
	235	(defmacro semantic-lex-keyword-invalid (name)
	236	"Signal that NAME is an invalid keyword name."
	237	`(signal 'wrong-type-argument '(semantic-lex-keyword-p ,name)))
	238
	239	(defsubst semantic-lex-keyword-symbol (name)
	240	"Return keyword symbol with NAME or nil if not found."
	241	(and (arrayp semantic-flex-keywords-obarray)
	242	(stringp name)
	243	(intern-soft name semantic-flex-keywords-obarray)))
	244
	245	(defsubst semantic-lex-keyword-p (name)
	246	"Return non-nil if a keyword with NAME exists in the keyword table.
	247	Return nil otherwise."
	248	(and (setq name (semantic-lex-keyword-symbol name))
	249	(symbol-value name)))
	250
	251	(defsubst semantic-lex-keyword-set (name value)
	252	"Set value of keyword with NAME to VALUE and return VALUE."
	253	(set (intern name semantic-flex-keywords-obarray) value))
	254
	255	(defsubst semantic-lex-keyword-value (name)
	256	"Return value of keyword with NAME.
	257	Signal an error if a keyword with NAME does not exist."
	258	(let ((keyword (semantic-lex-keyword-symbol name)))
	259	(if keyword
	260	(symbol-value keyword)
	261	(semantic-lex-keyword-invalid name))))
	262
	263	(defsubst semantic-lex-keyword-put (name property value)
	264	"For keyword with NAME, set its PROPERTY to VALUE."
	265	(let ((keyword (semantic-lex-keyword-symbol name)))
	266	(if keyword
	267	(put keyword property value)
	268	(semantic-lex-keyword-invalid name))))
	269
	270	(defsubst semantic-lex-keyword-get (name property)
	271	"For keyword with NAME, return its PROPERTY value."
	272	(let ((keyword (semantic-lex-keyword-symbol name)))
	273	(if keyword
	274	(get keyword property)
	275	(semantic-lex-keyword-invalid name))))
	276
	277	(defun semantic-lex-make-keyword-table (specs &optional propspecs)
	278	"Convert keyword SPECS into an obarray and return it.
	279	SPECS must be a list of (NAME . TOKSYM) elements, where:
	280
	281	NAME is the name of the keyword symbol to define.
	282	TOKSYM is the lexical token symbol of that keyword.
	283
	284	If optional argument PROPSPECS is non nil, then interpret it, and
	285	apply those properties.
	286	PROPSPECS must be a list of (NAME PROPERTY VALUE) elements."
	287	;; Create the symbol hash table
	288	(let ((semantic-flex-keywords-obarray (make-vector 13 0))
	289	spec)
	290	;; fill it with stuff
	291	(while specs
	292	(setq spec (car specs)
	293	specs (cdr specs))
	294	(semantic-lex-keyword-set (car spec) (cdr spec)))
	295	;; Apply all properties
	296	(while propspecs
	297	(setq spec (car propspecs)
	298	propspecs (cdr propspecs))
	299	(semantic-lex-keyword-put (car spec) (nth 1 spec) (nth 2 spec)))
	300	semantic-flex-keywords-obarray))
	301
	302	(defsubst semantic-lex-map-keywords (fun &optional property)
	303	"Call function FUN on every lexical keyword.
	304	If optional PROPERTY is non-nil, call FUN only on every keyword which
	305	as a PROPERTY value. FUN receives a lexical keyword as argument."
	306	(semantic-lex-map-symbols
	307	fun semantic-flex-keywords-obarray property))
	308
	309	(defun semantic-lex-keywords (&optional property)
	310	"Return a list of lexical keywords.
	311	If optional PROPERTY is non-nil, return only keywords which have a
	312	PROPERTY set."
	313	(let (keywords)
	314	(semantic-lex-map-keywords
	315	#'(lambda (symbol) (setq keywords (cons symbol keywords)))
	316	property)
	317	keywords))
	318
	319	;;; Inline functions:
	320
	321	(defvar semantic-lex-unterminated-syntax-end-function)
	322	(defvar semantic-lex-analysis-bounds)
	323	(defvar semantic-lex-end-point)
	324
	325	(defsubst semantic-lex-token-bounds (token)
	326	"Fetch the start and end locations of the lexical token TOKEN.
	327	Return a pair (START . END)."
	328	(if (not (numberp (car (cdr token))))
	329	(cdr (cdr token))
	330	(cdr token)))
	331
	332	(defsubst semantic-lex-token-start (token)
	333	"Fetch the start position of the lexical token TOKEN.
	334	See also the function `semantic-lex-token'."
	335	(car (semantic-lex-token-bounds token)))
	336
	337	(defsubst semantic-lex-token-end (token)
	338	"Fetch the end position of the lexical token TOKEN.
	339	See also the function `semantic-lex-token'."
	340	(cdr (semantic-lex-token-bounds token)))
	341
	342	(defsubst semantic-lex-unterminated-syntax-detected (syntax)
	343	"Inside a lexical analyzer, use this when unterminated syntax was found.
	344	Argument SYNTAX indicates the type of syntax that is unterminated.
	345	The job of this function is to move (point) to a new logical location
	346	so that analysis can continue, if possible."
	347	(goto-char
	348	(funcall semantic-lex-unterminated-syntax-end-function
	349	syntax
	350	(car semantic-lex-analysis-bounds)
	351	(cdr semantic-lex-analysis-bounds)
	352	))
	353	(setq semantic-lex-end-point (point)))
	354	\f
	355	;;; Type table handling.
	356	;;
	357	;; The lexical type table manages types that occur in a grammar file
	358	;; with the %type declaration. Types represent different syntaxes.
	359	;; See code for `semantic-lex-preset-default-types' for the classic
	360	;; types of syntax.
	361	(defvar semantic-lex-types-obarray nil
	362	"Buffer local types obarray for the lexical analyzer.")
	363	(make-variable-buffer-local 'semantic-lex-types-obarray)
	364
	365	(defmacro semantic-lex-type-invalid (type)
	366	"Signal that TYPE is an invalid lexical type name."
	367	`(signal 'wrong-type-argument '(semantic-lex-type-p ,type)))
	368
	369	(defsubst semantic-lex-type-symbol (type)
	370	"Return symbol with TYPE or nil if not found."
	371	(and (arrayp semantic-lex-types-obarray)
	372	(stringp type)
	373	(intern-soft type semantic-lex-types-obarray)))
	374
	375	(defsubst semantic-lex-type-p (type)
	376	"Return non-nil if a symbol with TYPE name exists."
	377	(and (setq type (semantic-lex-type-symbol type))
	378	(symbol-value type)))
	379
	380	(defsubst semantic-lex-type-set (type value)
	381	"Set value of symbol with TYPE name to VALUE and return VALUE."
	382	(set (intern type semantic-lex-types-obarray) value))
	383
	384	(defsubst semantic-lex-type-value (type &optional noerror)
	385	"Return value of symbol with TYPE name.
	386	If optional argument NOERROR is non-nil return nil if a symbol with
	387	TYPE name does not exist. Otherwise signal an error."
	388	(let ((sym (semantic-lex-type-symbol type)))
	389	(if sym
	390	(symbol-value sym)
	391	(unless noerror
	392	(semantic-lex-type-invalid type)))))
	393
	394	(defsubst semantic-lex-type-put (type property value &optional add)
	395	"For symbol with TYPE name, set its PROPERTY to VALUE.
	396	If optional argument ADD is non-nil, create a new symbol with TYPE
	397	name if it does not already exist. Otherwise signal an error."
	398	(let ((sym (semantic-lex-type-symbol type)))
	399	(unless sym
	400	(or add (semantic-lex-type-invalid type))
	401	(semantic-lex-type-set type nil)
	402	(setq sym (semantic-lex-type-symbol type)))
	403	(put sym property value)))
	404
	405	(defsubst semantic-lex-type-get (type property &optional noerror)
	406	"For symbol with TYPE name, return its PROPERTY value.
	407	If optional argument NOERROR is non-nil return nil if a symbol with
	408	TYPE name does not exist. Otherwise signal an error."
	409	(let ((sym (semantic-lex-type-symbol type)))
	410	(if sym
	411	(get sym property)
	412	(unless noerror
	413	(semantic-lex-type-invalid type)))))
	414
	415	(defun semantic-lex-preset-default-types ()
	416	"Install useful default properties for well known types."
	417	(semantic-lex-type-put "punctuation" 'matchdatatype 'string t)
	418	(semantic-lex-type-put "punctuation" 'syntax "\$\\s.\\\|\\s$\\\|\\s'\$+")
	419	(semantic-lex-type-put "keyword" 'matchdatatype 'keyword t)
	420	(semantic-lex-type-put "keyword" 'syntax "\$\\sw\\\|\\s_\$+")
	421	(semantic-lex-type-put "symbol" 'matchdatatype 'regexp t)
	422	(semantic-lex-type-put "symbol" 'syntax "\$\\sw\\\|\\s_\$+")
	423	(semantic-lex-type-put "string" 'matchdatatype 'sexp t)
	424	(semantic-lex-type-put "string" 'syntax "\\s\"")
	425	(semantic-lex-type-put "number" 'matchdatatype 'regexp t)
	426	(semantic-lex-type-put "number" 'syntax 'semantic-lex-number-expression)
	427	(semantic-lex-type-put "block" 'matchdatatype 'block t)
	428	(semantic-lex-type-put "block" 'syntax "\\s(\\\|\\s)")
	429	)
	430
	431	(defun semantic-lex-make-type-table (specs &optional propspecs)
	432	"Convert type SPECS into an obarray and return it.
	433	SPECS must be a list of (TYPE . TOKENS) elements, where:
	434
	435	TYPE is the name of the type symbol to define.
	436	TOKENS is an list of (TOKSYM . MATCHER) elements, where:
	437
	438	TOKSYM is any lexical token symbol.
	439	MATCHER is a string or regexp a text must match to be a such
	440	lexical token.
	441
	442	If optional argument PROPSPECS is non nil, then interpret it, and
	443	apply those properties.
	444	PROPSPECS must be a list of (TYPE PROPERTY VALUE)."
	445	;; Create the symbol hash table
	446	(let* ((semantic-lex-types-obarray (make-vector 13 0))
	447	spec type tokens token alist default)
	448	;; fill it with stuff
	449	(while specs
	450	(setq spec (car specs)
	451	specs (cdr specs)
	452	type (car spec)
	453	tokens (cdr spec)
	454	default nil
	455	alist nil)
	456	(while tokens
	457	(setq token (car tokens)
	458	tokens (cdr tokens))
	459	(if (cdr token)
	460	(setq alist (cons token alist))
	461	(setq token (car token))
	462	(if default
	463	(message
	464	"Warning default value of <%s> tokens changed to %S, was %S"
	465	type default token))
	466	(setq default token)))
	467	;; Ensure the default matching spec is the first one.
	468	(semantic-lex-type-set type (cons default (nreverse alist))))
	469	;; Install useful default types & properties
	470	(semantic-lex-preset-default-types)
	471	;; Apply all properties
	472	(while propspecs
	473	(setq spec (car propspecs)
	474	propspecs (cdr propspecs))
	475	;; Create the type if necessary.
	476	(semantic-lex-type-put (car spec) (nth 1 spec) (nth 2 spec) t))
	477	semantic-lex-types-obarray))
	478
	479	(defsubst semantic-lex-map-types (fun &optional property)
	480	"Call function FUN on every lexical type.
	481	If optional PROPERTY is non-nil, call FUN only on every type symbol
	482	which as a PROPERTY value. FUN receives a type symbol as argument."
	483	(semantic-lex-map-symbols
	484	fun semantic-lex-types-obarray property))
	485
	486	(defun semantic-lex-types (&optional property)
	487	"Return a list of lexical type symbols.
	488	If optional PROPERTY is non-nil, return only type symbols which have
	489	PROPERTY set."
	490	(let (types)
	491	(semantic-lex-map-types
	492	#'(lambda (symbol) (setq types (cons symbol types)))
	493	property)
	494	types))
	495	\f
	496	;;; Lexical Analyzer framework settings
	497	;;
	498
	499	(defvar semantic-lex-analyzer 'semantic-flex
	500	"The lexical analyzer used for a given buffer.
	501	See `semantic-lex' for documentation.
	502	For compatibility with Semantic 1.x it defaults to `semantic-flex'.")
	503	(make-variable-buffer-local 'semantic-lex-analyzer)
	504
	505	(defvar semantic-lex-tokens
	506	'(
	507	(bol)
	508	(charquote)
	509	(close-paren)
	510	(comment)
	511	(newline)
	512	(open-paren)
	513	(punctuation)
	514	(semantic-list)
	515	(string)
	516	(symbol)
	517	(whitespace)
	518	)
	519	"An alist of of semantic token types.
	520	As of December 2001 (semantic 1.4beta13), this variable is not used in
	521	any code. The only use is to refer to the doc-string from elsewhere.
	522
	523	The key to this alist is the symbol representing token type that
	524	\\[semantic-flex] returns. These are
	525
	526	- bol: Empty string matching a beginning of line.
	527	This token is produced with
	528	`semantic-lex-beginning-of-line'.
	529
	530	- charquote: String sequences that match `\\s\\+' regexp.
	531	This token is produced with `semantic-lex-charquote'.
	532
	533	- close-paren: Characters that match `\\s)' regexp.
	534	These are typically `)', `}', `]', etc.
	535	This token is produced with
	536	`semantic-lex-close-paren'.
	537
	538	- comment: A comment chunk. These token types are not
	539	produced by default.
	540	This token is produced with `semantic-lex-comments'.
	541	Comments are ignored with `semantic-lex-ignore-comments'.
	542	Comments are treated as whitespace with
	543	`semantic-lex-comments-as-whitespace'.
	544
	545	- newline Characters matching `\\s-*\$\n\\\|\\s>\$' regexp.
	546	This token is produced with `semantic-lex-newline'.
	547
	548	- open-paren: Characters that match `\\s(' regexp.
	549	These are typically `(', `{', `[', etc.
	550	If `semantic-lex-paren-or-list' is used,
	551	then `open-paren' is not usually generated unless
	552	the `depth' argument to \\[semantic-lex] is
	553	greater than 0.
	554	This token is always produced if the analyzer
	555	`semantic-lex-open-paren' is used.
	556
	557	- punctuation: Characters matching `{\$\\s.\\\|\\s$\\\|\\s'\$'
	558	regexp.
	559	This token is produced with `semantic-lex-punctuation'.
	560	Always specify this analyzer after the comment
	561	analyzer.
	562
	563	- semantic-list: String delimited by matching parenthesis, braces,
	564	etc. that the lexer skipped over, because the
	565	`depth' parameter to \\[semantic-flex] was not high
	566	enough.
	567	This token is produced with `semantic-lex-paren-or-list'.
	568
	569	- string: Quoted strings, i.e., string sequences that start
	570	and end with characters matching `\\s\"'
	571	regexp. The lexer relies on @code{forward-sexp} to
	572	find the matching end.
	573	This token is produced with `semantic-lex-string'.
	574
	575	- symbol: String sequences that match `\$\\sw\\\|\\s_\$+'
	576	regexp.
	577	This token is produced with
	578	`semantic-lex-symbol-or-keyword'. Always add this analyzer
	579	after `semantic-lex-number', or other analyzers that
	580	match its regular expression.
	581
	582	- whitespace: Characters that match `\\s-+' regexp.
	583	This token is produced with `semantic-lex-whitespace'.")
	584
	585	(defvar semantic-lex-syntax-modifications nil
	586	"Changes to the syntax table for this buffer.
	587	These changes are active only while the buffer is being flexed.
	588	This is a list where each element has the form:
	589	(CHAR CLASS)
	590	CHAR is the char passed to `modify-syntax-entry',
	591	and CLASS is the string also passed to `modify-syntax-entry' to define
	592	what syntax class CHAR has.")
	593	(make-variable-buffer-local 'semantic-lex-syntax-modifications)
	594
	595	(defvar semantic-lex-syntax-table nil
	596	"Syntax table used by lexical analysis.
	597	See also `semantic-lex-syntax-modifications'.")
	598	(make-variable-buffer-local 'semantic-lex-syntax-table)
	599
	600	(defvar semantic-lex-comment-regex nil
	601	"Regular expression for identifying comment start during lexical analysis.
	602	This may be automatically set when semantic initializes in a mode, but
	603	may need to be overriden for some special languages.")
	604	(make-variable-buffer-local 'semantic-lex-comment-regex)
	605
	606	(defvar semantic-lex-number-expression
	607	;; This expression was written by David Ponce for Java, and copied
	608	;; here for C and any other similar language.
	609	(eval-when-compile
	610	(concat "\\("
	611	"\\<[0-9]+[.][0-9]+\$[eE][-+]?[0-9]+\$?[fFdD]?\\>"
	612	"\\\|"
	613	"\\<[0-9]+[.][eE][-+]?[0-9]+[fFdD]?\\>"
	614	"\\\|"
	615	"\\<[0-9]+[.][fFdD]\\>"
	616	"\\\|"
	617	"\\<[0-9]+[.]"
	618	"\\\|"
	619	"[.][0-9]+\$[eE][-+]?[0-9]+\$?[fFdD]?\\>"
	620	"\\\|"
	621	"\\<[0-9]+[eE][-+]?[0-9]+[fFdD]?\\>"
	622	"\\\|"
	623	"\\<0[xX][0-9a-fA-F]+[lL]?\\>"
	624	"\\\|"
	625	"\\<[0-9]+[lLfFdD]?\\>"
	626	"\\)"
	627	))
	628	"Regular expression for matching a number.
	629	If this value is nil, no number extraction is done during lex.
	630	This expression tries to match C and Java like numbers.
	631
	632	DECIMAL_LITERAL:
	633	[1-9][0-9]*
	634	;
	635	HEX_LITERAL:
	636	0[xX][0-9a-fA-F]+
	637	;
	638	OCTAL_LITERAL:
	639	0[0-7]*
	640	;
	641	INTEGER_LITERAL:
	642	<DECIMAL_LITERAL>[lL]?
	643	\| <HEX_LITERAL>[lL]?
	644	\| <OCTAL_LITERAL>[lL]?
	645	;
	646	EXPONENT:
	647	[eE][+-]?[09]+
	648	;
	649	FLOATING_POINT_LITERAL:
	650	[0-9]+[.][0-9]*<EXPONENT>?[fFdD]?
	651	\| [.][0-9]+<EXPONENT>?[fFdD]?
	652	\| [0-9]+<EXPONENT>[fFdD]?
	653	\| [0-9]+<EXPONENT>?[fFdD]
	654	;")
	655	(make-variable-buffer-local 'semantic-lex-number-expression)
	656
	657	(defvar semantic-lex-depth 0
	658	"Default lexing depth.
	659	This specifies how many lists to create tokens in.")
	660	(make-variable-buffer-local 'semantic-lex-depth)
	661
	662	(defvar semantic-lex-unterminated-syntax-end-function
	663	(lambda (syntax syntax-start lex-end) lex-end)
	664	"Function called when unterminated syntax is encountered.
	665	This should be set to one function. That function should take three
	666	parameters. The SYNTAX, or type of syntax which is unterminated.
	667	SYNTAX-START where the broken syntax begins.
	668	LEX-END is where the lexical analysis was asked to end.
	669	This function can be used for languages that can intelligently fix up
	670	broken syntax, or the exit lexical analysis via `throw' or `signal'
	671	when finding unterminated syntax.")
	672
	673	;;; Interactive testing commands
	674
	675	(declare-function semantic-elapsed-time "semantic")
	676
	677	(defun semantic-lex-test (arg)
	678	"Test the semantic lexer in the current buffer.
	679	If universal argument ARG, then try the whole buffer."
	680	(interactive "P")
	681	(require 'semantic)
	682	(let* ((start (current-time))
	683	(result (semantic-lex
	684	(if arg (point-min) (point))
	685	(point-max)))
	686	(end (current-time)))
	687	(message "Elapsed Time: %.2f seconds."
	688	(semantic-elapsed-time start end))
	689	(pop-to-buffer "Lexer Output")
	690	(require 'pp)
	691	(erase-buffer)
	692	(insert (pp-to-string result))
	693	(goto-char (point-min))
	694	))
	695
	696	(defun semantic-lex-test-full-depth (arg)
	697	"Test the semantic lexer in the current buffer parsing through lists.
	698	Usually the lexer parses
	699	If universal argument ARG, then try the whole buffer."
	700	(interactive "P")
	701	(let* ((start (current-time))
	702	(result (semantic-lex
	703	(if arg (point-min) (point))
	704	(point-max)
	705	100))
	706	(end (current-time)))
	707	(message "Elapsed Time: %.2f seconds."
	708	(semantic-elapsed-time start end))
	709	(pop-to-buffer "Lexer Output")
	710	(require 'pp)
	711	(erase-buffer)
	712	(insert (pp-to-string result))
	713	(goto-char (point-min))
	714	))
	715
	716	(defun semantic-lex-test-region (beg end)
	717	"Test the semantic lexer in the current buffer.
	718	Analyze the area between BEG and END."
	719	(interactive "r")
	720	(let ((result (semantic-lex beg end)))
	721	(pop-to-buffer "Lexer Output")
	722	(require 'pp)
	723	(erase-buffer)
	724	(insert (pp-to-string result))
	725	(goto-char (point-min))
	726	))
	727
	728	(defvar semantic-lex-debug nil
	729	"When non-nil, debug the local lexical analyzer.")
	730
	731	(defun semantic-lex-debug (arg)
	732	"Debug the semantic lexer in the current buffer.
	733	Argument ARG specifies of the analyze the whole buffer, or start at point.
	734	While engaged, each token identified by the lexer will be highlighted
	735	in the target buffer A description of the current token will be
	736	displayed in the minibuffer. Press SPC to move to the next lexical token."
	737	(interactive "P")
	738	(require 'semantic/debug)
	739	(let ((semantic-lex-debug t))
	740	(semantic-lex-test arg)))
	741
	742	(defun semantic-lex-highlight-token (token)
	743	"Highlight the lexical TOKEN.
	744	TOKEN is a lexical token with a START And END position.
	745	Return the overlay."
	746	(let ((o (semantic-make-overlay (semantic-lex-token-start token)
	747	(semantic-lex-token-end token))))
	748	(semantic-overlay-put o 'face 'highlight)
	749	o))
	750
	751	(defsubst semantic-lex-debug-break (token)
	752	"Break during lexical analysis at TOKEN."
	753	(when semantic-lex-debug
	754	(let ((o nil))
	755	(unwind-protect
	756	(progn
	757	(when token
	758	(setq o (semantic-lex-highlight-token token)))
	759	(semantic-read-event
	760	(format "%S :: SPC - continue" token))
	761	)
	762	(when o
	763	(semantic-overlay-delete o))))))
	764
	765	;;; Lexical analyzer creation
	766	;;
	767	;; Code for creating a lex function from lists of analyzers.
	768	;;
	769	;; A lexical analyzer is created from a list of individual analyzers.
	770	;; Each individual analyzer specifies a single match, and code that
	771	;; goes with it.
	772	;;
	773	;; Creation of an analyzer assembles these analyzers into a new function
	774	;; with the behaviors of all the individual analyzers.
	775	;;
	776	(defmacro semantic-lex-one-token (analyzers)
	777	"Calculate one token from the current buffer at point.
	778	Uses locally bound variables from `define-lex'.
	779	Argument ANALYZERS is the list of analyzers being used."
	780	(cons 'cond (mapcar #'symbol-value analyzers)))
	781
	782	(defvar semantic-lex-end-point nil
	783	"The end point as tracked through lexical functions.")
	784
	785	(defvar semantic-lex-current-depth nil
	786	"The current depth as tracked through lexical functions.")
	787
	788	(defvar semantic-lex-maximum-depth nil
	789	"The maximum depth of parenthisis as tracked through lexical functions.")
	790
	791	(defvar semantic-lex-token-stream nil
	792	"The current token stream we are collecting.")
	793
	794	(defvar semantic-lex-analysis-bounds nil
	795	"The bounds of the current analysis.")
	796
	797	(defvar semantic-lex-block-streams nil
	798	"Streams of tokens inside collapsed blocks.
	799	This is an alist of (ANCHOR . STREAM) elements where ANCHOR is the
	800	start position of the block, and STREAM is the list of tokens in that
	801	block.")
	802
	803	(defvar semantic-lex-reset-hooks nil
	804	"List of hooks major-modes use to reset lexical analyzers.
	805	Hooks are called with START and END values for the current lexical pass.
	806	Should be set with `add-hook'specifying a LOCAL option.")
	807
	808	;; Stack of nested blocks.
	809	(defvar semantic-lex-block-stack nil)
	810	;;(defvar semantic-lex-timeout 5
	811	;; "*Number of sections of lexing before giving up.")
	812
	813	(defmacro define-lex (name doc &rest analyzers)
	814	"Create a new lexical analyzer with NAME.
	815	DOC is a documentation string describing this analyzer.
	816	ANALYZERS are small code snippets of analyzers to use when
	817	building the new NAMED analyzer. Only use analyzers which
	818	are written to be used in `define-lex'.
	819	Each analyzer should be an analyzer created with `define-lex-analyzer'.
	820	Note: The order in which analyzers are listed is important.
	821	If two analyzers can match the same text, it is important to order the
	822	analyzers so that the one you want to match first occurs first. For
	823	example, it is good to put a numbe analyzer in front of a symbol
	824	analyzer which might mistake a number for as a symbol."
	825	`(defun ,name (start end &optional depth length)
	826	,(concat doc "\nSee `semantic-lex' for more information.")
	827	;; Make sure the state of block parsing starts over.
	828	(setq semantic-lex-block-streams nil)
	829	;; Allow specialty reset items.
	830	(run-hook-with-args 'semantic-lex-reset-hooks start end)
	831	;; Lexing state.
	832	(let* (;(starttime (current-time))
	833	(starting-position (point))
	834	(semantic-lex-token-stream nil)
	835	(semantic-lex-block-stack nil)
	836	(tmp-start start)
	837	(semantic-lex-end-point start)
	838	(semantic-lex-current-depth 0)
	839	;; Use the default depth when not specified.
	840	(semantic-lex-maximum-depth
	841	(or depth semantic-lex-depth))
	842	;; Bounds needed for unterminated syntax
	843	(semantic-lex-analysis-bounds (cons start end))
	844	;; This entry prevents text properties from
	845	;; confusing our lexical analysis. See Emacs 22 (CVS)
	846	;; version of C++ mode with template hack text properties.
	847	(parse-sexp-lookup-properties nil)
	848	)
	849	;; Maybe REMOVE THIS LATER.
	850	;; Trying to find incremental parser bug.
	851	(when (> end (point-max))
	852	(error ,(format "%s: end (%%d) > point-max (%%d)" name)
	853	end (point-max)))
	854	(with-syntax-table semantic-lex-syntax-table
	855	(goto-char start)
	856	(while (and (< (point) end)
	857	(or (not length)
	858	(<= (length semantic-lex-token-stream) length)))
	859	(semantic-lex-one-token ,analyzers)
	860	(when (eq semantic-lex-end-point tmp-start)
	861	(error ,(format "%s: endless loop at %%d, after %%S" name)
	862	tmp-start (car semantic-lex-token-stream)))
	863	(setq tmp-start semantic-lex-end-point)
	864	(goto-char semantic-lex-end-point)
	865	;;(when (> (semantic-elapsed-time starttime (current-time))
	866	;; semantic-lex-timeout)
	867	;; (error "Timeout during lex at char %d" (point)))
	868	(semantic-throw-on-input 'lex)
	869	(semantic-lex-debug-break (car semantic-lex-token-stream))
	870	))
	871	;; Check that there is no unterminated block.
	872	(when semantic-lex-block-stack
	873	(let* ((last (pop semantic-lex-block-stack))
	874	(blk last))
	875	(while blk
	876	(message
	877	,(format "%s: `%%s' block from %%S is unterminated" name)
	878	(car blk) (cadr blk))
	879	(setq blk (pop semantic-lex-block-stack)))
	880	(semantic-lex-unterminated-syntax-detected (car last))))
	881	;; Return to where we started.
	882	;; Do not wrap in protective stuff so that if there is an error
	883	;; thrown, the user knows where.
	884	(goto-char starting-position)
	885	;; Return the token stream
	886	(nreverse semantic-lex-token-stream))))
	887	\f
	888	;;; Collapsed block tokens delimited by any tokens.
	889	;;
	890	(defun semantic-lex-start-block (syntax)
	891	"Mark the last read token as the beginning of a SYNTAX block."
	892	(if (or (not semantic-lex-maximum-depth)
	893	(< semantic-lex-current-depth semantic-lex-maximum-depth))
	894	(setq semantic-lex-current-depth (1+ semantic-lex-current-depth))
	895	(push (list syntax (car semantic-lex-token-stream))
	896	semantic-lex-block-stack)))
	897
	898	(defun semantic-lex-end-block (syntax)
	899	"Process the end of a previously marked SYNTAX block.
	900	That is, collapse the tokens inside that block, including the
	901	beginning and end of block tokens, into a high level block token of
	902	class SYNTAX.
	903	The token at beginning of block is the one marked by a previous call
	904	to `semantic-lex-start-block'. The current token is the end of block.
	905	The collapsed tokens are saved in `semantic-lex-block-streams'."
	906	(if (null semantic-lex-block-stack)
	907	(setq semantic-lex-current-depth (1- semantic-lex-current-depth))
	908	(let* ((stream semantic-lex-token-stream)
	909	(blk (pop semantic-lex-block-stack))
	910	(bstream (cdr blk))
	911	(first (car bstream))
	912	(last (pop stream)) ;; The current token mark the EOBLK
	913	tok)
	914	(if (not (eq (car blk) syntax))
	915	;; SYNTAX doesn't match the syntax of the current block in
	916	;; the stack. So we encountered the end of the SYNTAX block
	917	;; before the end of the current one in the stack which is
	918	;; signaled unterminated.
	919	(semantic-lex-unterminated-syntax-detected (car blk))
	920	;; Move tokens found inside the block from the main stream
	921	;; into a separate block stream.
	922	(while (and stream (not (eq (setq tok (pop stream)) first)))
	923	(push tok bstream))
	924	;; The token marked as beginning of block was not encountered.
	925	;; This should not happen!
	926	(or (eq tok first)
	927	(error "Token %S not found at beginning of block `%s'"
	928	first syntax))
	929	;; Save the block stream for future reuse, to avoid to redo
	930	;; the lexical analysis of the block content!
	931	;; Anchor the block stream with its start position, so we can
	932	;; use: (cdr (assq start semantic-lex-block-streams)) to
	933	;; quickly retrieve the lexical stream associated to a block.
	934	(setcar blk (semantic-lex-token-start first))
	935	(setcdr blk (nreverse bstream))
	936	(push blk semantic-lex-block-streams)
	937	;; In the main stream, replace the tokens inside the block by
	938	;; a high level block token of class SYNTAX.
	939	(setq semantic-lex-token-stream stream)
	940	(semantic-lex-push-token
	941	(semantic-lex-token
	942	syntax (car blk) (semantic-lex-token-end last)))
	943	))))
	944	\f
	945	;;; Lexical token API
	946	;;
	947	;; Functions for accessing parts of a token. Use these functions
	948	;; instead of accessing the list structure directly because the
	949	;; contents of the lexical may change.
	950	;;
	951	(defmacro semantic-lex-token (symbol start end &optional str)
	952	"Create a lexical token.
	953	SYMBOL is a symbol representing the class of syntax found.
	954	START and END define the bounds of the token in the current buffer.
	955	Optional STR is the string for the token iff the the bounds
	956	in the buffer do not cover the string they represent. (As from
	957	macro expansion.)"
	958	;; This if statement checks the existance of a STR argument at
	959	;; compile time, where STR is some symbol or constant. If the
	960	;; variable STr (runtime) is nil, this will make an incorrect decision.
	961	;;
	962	;; It is like this to maintain the original speed of the compiled
	963	;; code.
	964	(if str
	965	`(cons ,symbol (cons ,str (cons ,start ,end)))
	966	`(cons ,symbol (cons ,start ,end))))
	967
	968	(defun semantic-lex-token-p (thing)
	969	"Return non-nil if THING is a semantic lex token.
	970	This is an exhaustively robust check."
	971	(and (consp thing)
	972	(symbolp (car thing))
	973	(or (and (numberp (nth 1 thing))
	974	(numberp (nthcdr 2 thing)))
	975	(and (stringp (nth 1 thing))
	976	(numberp (nth 2 thing))
	977	(numberp (nthcdr 3 thing)))
	978	))
	979	)
	980
	981	(defun semantic-lex-token-with-text-p (thing)
	982	"Return non-nil if THING is a semantic lex token.
	983	This is an exhaustively robust check."
	984	(and (consp thing)
	985	(symbolp (car thing))
	986	(= (length thing) 4)
	987	(stringp (nth 1 thing))
	988	(numberp (nth 2 thing))
	989	(numberp (nth 3 thing)))
	990	)
	991
	992	(defun semantic-lex-token-without-text-p (thing)
	993	"Return non-nil if THING is a semantic lex token.
	994	This is an exhaustively robust check."
	995	(and (consp thing)
	996	(symbolp (car thing))
	997	(= (length thing) 3)
	998	(numberp (nth 1 thing))
	999	(numberp (nth 2 thing)))
	1000	)
	1001
	1002	(eval-and-compile
	1003
	1004	(defun semantic-lex-expand-block-specs (specs)
	1005	"Expand block specifications SPECS into a Lisp form.
	1006	SPECS is a list of (BLOCK BEGIN END) elements where BLOCK, BEGIN, and
	1007	END are token class symbols that indicate to produce one collapsed
	1008	BLOCK token from tokens found between BEGIN and END ones.
	1009	BLOCK must be a non-nil symbol, and at least one of the BEGIN or END
	1010	symbols must be non-nil too.
	1011	When BEGIN is non-nil, generate a call to `semantic-lex-start-block'
	1012	when a BEGIN token class is encountered.
	1013	When END is non-nil, generate a call to `semantic-lex-end-block' when
	1014	an END token class is encountered."
	1015	(let ((class (make-symbol "class"))
	1016	(form nil))
	1017	(dolist (spec specs)
	1018	(when (car spec)
	1019	(when (nth 1 spec)
	1020	(push `((eq ',(nth 1 spec) ,class)
	1021	(semantic-lex-start-block ',(car spec)))
	1022	form))
	1023	(when (nth 2 spec)
	1024	(push `((eq ',(nth 2 spec) ,class)
	1025	(semantic-lex-end-block ',(car spec)))
	1026	form))))
	1027	(when form
	1028	`((let ((,class (semantic-lex-token-class
	1029	(car semantic-lex-token-stream))))
	1030	(cond ,@(nreverse form))))
	1031	)))
	1032	)
	1033
	1034	(defmacro semantic-lex-push-token (token &rest blockspecs)
	1035	"Push TOKEN in the lexical analyzer token stream.
	1036	Return the lexical analysis current end point.
	1037	If optional arguments BLOCKSPECS is non-nil, it specifies to process
	1038	collapsed block tokens. See `semantic-lex-expand-block-specs' for
	1039	more details.
	1040	This macro should only be called within the bounds of
	1041	`define-lex-analyzer'. It changes the values of the lexical analyzer
	1042	variables `token-stream' and `semantic-lex-end-point'. If you need to
	1043	move `semantic-lex-end-point' somewhere else, just modify this
	1044	variable after calling `semantic-lex-push-token'."
	1045	`(progn
	1046	(push ,token semantic-lex-token-stream)
	1047	,@(semantic-lex-expand-block-specs blockspecs)
	1048	(setq semantic-lex-end-point
	1049	(semantic-lex-token-end (car semantic-lex-token-stream)))
	1050	))
	1051
	1052	(defsubst semantic-lex-token-class (token)
	1053	"Fetch the class of the lexical token TOKEN.
	1054	See also the function `semantic-lex-token'."
	1055	(car token))
	1056
	1057	(defsubst semantic-lex-token-text (token)
	1058	"Fetch the text associated with the lexical token TOKEN.
	1059	See also the function `semantic-lex-token'."
	1060	(if (stringp (car (cdr token)))
	1061	(car (cdr token))
	1062	(buffer-substring-no-properties
	1063	(semantic-lex-token-start token)
	1064	(semantic-lex-token-end token))))
	1065
	1066	(defun semantic-lex-init ()
	1067	"Initialize any lexical state for this buffer."
	1068	(unless semantic-lex-comment-regex
	1069	(setq semantic-lex-comment-regex
	1070	(if comment-start-skip
	1071	(concat "\$\\s<\\\|" comment-start-skip "\$")
	1072	"\$\\s<\$")))
	1073	;; Setup the lexer syntax-table
	1074	(setq semantic-lex-syntax-table (copy-syntax-table (syntax-table)))
	1075	(dolist (mod semantic-lex-syntax-modifications)
	1076	(modify-syntax-entry
	1077	(car mod) (nth 1 mod) semantic-lex-syntax-table)))
	1078
	1079	;;;###autoload
	1080	(define-overloadable-function semantic-lex (start end &optional depth length)
	1081	"Lexically analyze text in the current buffer between START and END.
	1082	Optional argument DEPTH indicates at what level to scan over entire
	1083	lists. The last argument, LENGTH specifies that `semantic-lex'
	1084	should only return LENGTH tokens. The return value is a token stream.
	1085	Each element is a list, such of the form
	1086	(symbol start-expression . end-expression)
	1087	where SYMBOL denotes the token type.
	1088	See `semantic-lex-tokens' variable for details on token types. END
	1089	does not mark the end of the text scanned, only the end of the
	1090	beginning of text scanned. Thus, if a string extends past END, the
	1091	end of the return token will be larger than END. To truly restrict
	1092	scanning, use `narrow-to-region'."
	1093	(funcall semantic-lex-analyzer start end depth length))
	1094
	1095	(defsubst semantic-lex-buffer (&optional depth)
	1096	"Lex the current buffer.
	1097	Optional argument DEPTH is the depth to scan into lists."
	1098	(semantic-lex (point-min) (point-max) depth))
	1099
	1100	(defsubst semantic-lex-list (semlist depth)
	1101	"Lex the body of SEMLIST to DEPTH."
	1102	(semantic-lex (semantic-lex-token-start semlist)
	1103	(semantic-lex-token-end semlist)
	1104	depth))
	1105	\f
	1106	;;; Analyzer creation macros
	1107	;;
	1108	;; An individual analyzer is a condition and code that goes with it.
	1109	;;
	1110	;; Created analyzers become variables with the code associated with them
	1111	;; as the symbol value. These analyzers are assembled into a lexer
	1112	;; to create new lexical analyzers.
	1113
	1114	(defcustom semantic-lex-debug-analyzers nil
	1115	"Non nil means to debug analyzers with syntax protection.
	1116	Only in effect if `debug-on-error' is also non-nil."
	1117	:group 'semantic
	1118	:type 'boolean)
	1119
	1120	(defmacro semantic-lex-unterminated-syntax-protection (syntax &rest forms)
	1121	"For SYNTAX, execute FORMS with protection for unterminated syntax.
	1122	If FORMS throws an error, treat this as a syntax problem, and
	1123	execute the unterminated syntax code. FORMS should return a position.
	1124	Irreguardless of an error, the cursor should be moved to the end of
	1125	the desired syntax, and a position returned.
	1126	If `debug-on-error' is set, errors are not caught, so that you can
	1127	debug them.
	1128	Avoid using a large FORMS since it is duplicated."
	1129	`(if (and debug-on-error semantic-lex-debug-analyzers)
	1130	(progn ,@forms)
	1131	(condition-case nil
	1132	(progn ,@forms)
	1133	(error
	1134	(semantic-lex-unterminated-syntax-detected ,syntax)))))
	1135	(put 'semantic-lex-unterminated-syntax-protection
	1136	'lisp-indent-function 1)
	1137
	1138	(defmacro define-lex-analyzer (name doc condition &rest forms)
	1139	"Create a single lexical analyzer NAME with DOC.
	1140	When an analyzer is called, the current buffer and point are
	1141	positioned in a buffer at the location to be analyzed.
	1142	CONDITION is an expression which returns t if FORMS should be run.
	1143	Within the bounds of CONDITION and FORMS, the use of backquote
	1144	can be used to evaluate expressions at compile time.
	1145	While forms are running, the following variables will be locally bound:
	1146	`semantic-lex-analysis-bounds' - The bounds of the current analysis.
	1147	of the form (START . END)
	1148	`semantic-lex-maximum-depth' - The maximum depth of semantic-list
	1149	for the current analysis.
	1150	`semantic-lex-current-depth' - The current depth of `semantic-list' that has
	1151	been decended.
	1152	`semantic-lex-end-point' - End Point after match.
	1153	Analyzers should set this to a buffer location if their
	1154	match string does not represent the end of the matched text.
	1155	`semantic-lex-token-stream' - The token list being collected.
	1156	Add new lexical tokens to this list.
	1157	Proper action in FORMS is to move the value of `semantic-lex-end-point' to
	1158	after the location of the analyzed entry, and to add any discovered tokens
	1159	at the beginning of `semantic-lex-token-stream'.
	1160	This can be done by using `semantic-lex-push-token'."
	1161	`(eval-and-compile
	1162	(defvar ,name nil ,doc)
	1163	(defun ,name nil)
	1164	;; Do this part separately so that re-evaluation rebuilds this code.
	1165	(setq ,name '(,condition ,@forms))
	1166	;; Build a single lexical analyzer function, so the doc for
	1167	;; function help is automatically provided, and perhaps the
	1168	;; function could be useful for testing and debugging one
	1169	;; analyzer.
	1170	(fset ',name (lambda () ,doc
	1171	(let ((semantic-lex-token-stream nil)
	1172	(semantic-lex-end-point (point))
	1173	(semantic-lex-analysis-bounds
	1174	(cons (point) (point-max)))
	1175	(semantic-lex-current-depth 0)
	1176	(semantic-lex-maximum-depth
	1177	semantic-lex-depth)
	1178	)
	1179	(when ,condition ,@forms)
	1180	semantic-lex-token-stream)))
	1181	))
	1182
	1183	(defmacro define-lex-regex-analyzer (name doc regexp &rest forms)
	1184	"Create a lexical analyzer with NAME and DOC that will match REGEXP.
	1185	FORMS are evaluated upon a successful match.
	1186	See `define-lex-analyzer' for more about analyzers."
	1187	`(define-lex-analyzer ,name
	1188	,doc
	1189	(looking-at ,regexp)
	1190	,@forms
	1191	))
	1192
	1193	(defmacro define-lex-simple-regex-analyzer (name doc regexp toksym
	1194	&optional index
	1195	&rest forms)
	1196	"Create a lexical analyzer with NAME and DOC that match REGEXP.
	1197	TOKSYM is the symbol to use when creating a semantic lexical token.
	1198	INDEX is the index into the match that defines the bounds of the token.
	1199	Index should be a plain integer, and not specified in the macro as an
	1200	expression.
	1201	FORMS are evaluated upon a successful match BEFORE the new token is
	1202	created. It is valid to ignore FORMS.
	1203	See `define-lex-analyzer' for more about analyzers."
	1204	`(define-lex-analyzer ,name
	1205	,doc
	1206	(looking-at ,regexp)
	1207	,@forms
	1208	(semantic-lex-push-token
	1209	(semantic-lex-token ,toksym
	1210	(match-beginning ,(or index 0))
	1211	(match-end ,(or index 0))))
	1212	))
	1213
	1214	(defmacro define-lex-block-analyzer (name doc spec1 &rest specs)
	1215	"Create a lexical analyzer NAME for paired delimiters blocks.
	1216	It detects a paired delimiters block or the corresponding open or
	1217	close delimiter depending on the value of the variable
	1218	`semantic-lex-current-depth'. DOC is the documentation string of the lexical
	1219	analyzer. SPEC1 and SPECS specify the token symbols and open, close
	1220	delimiters used. Each SPEC has the form:
	1221
	1222	\(BLOCK-SYM (OPEN-DELIM OPEN-SYM) (CLOSE-DELIM CLOSE-SYM))
	1223
	1224	where BLOCK-SYM is the symbol returned in a block token. OPEN-DELIM
	1225	and CLOSE-DELIM are respectively the open and close delimiters
	1226	identifying a block. OPEN-SYM and CLOSE-SYM are respectively the
	1227	symbols returned in open and close tokens."
	1228	(let ((specs (cons spec1 specs))
	1229	spec open olist clist)
	1230	(while specs
	1231	(setq spec (car specs)
	1232	specs (cdr specs)
	1233	open (nth 1 spec)
	1234	;; build alist ((OPEN-DELIM OPEN-SYM BLOCK-SYM) ...)
	1235	olist (cons (list (car open) (cadr open) (car spec)) olist)
	1236	;; build alist ((CLOSE-DELIM CLOSE-SYM) ...)
	1237	clist (cons (nth 2 spec) clist)))
	1238	`(define-lex-analyzer ,name
	1239	,doc
	1240	(and
	1241	(looking-at "\$\\s(\\\|\\s)\$")
	1242	(let ((text (match-string 0)) match)
	1243	(cond
	1244	((setq match (assoc text ',olist))
	1245	(if (or (not semantic-lex-maximum-depth)
	1246	(< semantic-lex-current-depth semantic-lex-maximum-depth))
	1247	(progn
	1248	(setq semantic-lex-current-depth (1+ semantic-lex-current-depth))
	1249	(semantic-lex-push-token
	1250	(semantic-lex-token
	1251	(nth 1 match)
	1252	(match-beginning 0) (match-end 0))))
	1253	(semantic-lex-push-token
	1254	(semantic-lex-token
	1255	(nth 2 match)
	1256	(match-beginning 0)
	1257	(save-excursion
	1258	(semantic-lex-unterminated-syntax-protection (nth 2 match)
	1259	(forward-list 1)
	1260	(point)))
	1261	))
	1262	))
	1263	((setq match (assoc text ',clist))
	1264	(setq semantic-lex-current-depth (1- semantic-lex-current-depth))
	1265	(semantic-lex-push-token
	1266	(semantic-lex-token
	1267	(nth 1 match)
	1268	(match-beginning 0) (match-end 0)))))))
	1269	)))
	1270	\f
	1271	;;; Analyzers
	1272	;;
	1273	;; Pre-defined common analyzers.
	1274	;;
	1275	(define-lex-analyzer semantic-lex-default-action
	1276	"The default action when no other lexical actions match text.
	1277	This action will just throw an error."
	1278	t
	1279	(error "Unmatched Text during Lexical Analysis"))
	1280
	1281	(define-lex-analyzer semantic-lex-beginning-of-line
	1282	"Detect and create a beginning of line token (BOL)."
	1283	(and (bolp)
	1284	;; Just insert a (bol N . N) token in the token stream,
	1285	;; without moving the point. N is the point at the
	1286	;; beginning of line.
	1287	(semantic-lex-push-token (semantic-lex-token 'bol (point) (point)))
	1288	nil) ;; CONTINUE
	1289	;; We identify and add the BOL token onto the stream, but since
	1290	;; semantic-lex-end-point doesn't move, we always fail CONDITION, and have no
	1291	;; FORMS body.
	1292	nil)
	1293
	1294	(define-lex-simple-regex-analyzer semantic-lex-newline
	1295	"Detect and create newline tokens."
	1296	"\\s-*\$\n\\\|\\s>\$" 'newline 1)
	1297
	1298	(define-lex-regex-analyzer semantic-lex-newline-as-whitespace
	1299	"Detect and create newline tokens.
	1300	Use this ONLY if newlines are not whitespace characters (such as when
	1301	they are comment end characters) AND when you want whitespace tokens."
	1302	"\\s-*\$\n\\\|\\s>\$"
	1303	;; Language wants whitespaces. Create a token for it.
	1304	(if (eq (semantic-lex-token-class (car semantic-lex-token-stream))
	1305	'whitespace)
	1306	;; Merge whitespace tokens together if they are adjacent. Two
	1307	;; whitespace tokens may be sperated by a comment which is not in
	1308	;; the token stream.
	1309	(setcdr (semantic-lex-token-bounds (car semantic-lex-token-stream))
	1310	(match-end 0))
	1311	(semantic-lex-push-token
	1312	(semantic-lex-token
	1313	'whitespace (match-beginning 0) (match-end 0)))))
	1314
	1315	(define-lex-regex-analyzer semantic-lex-ignore-newline
	1316	"Detect and ignore newline tokens.
	1317	Use this ONLY if newlines are not whitespace characters (such as when
	1318	they are comment end characters)."
	1319	"\\s-*\$\n\\\|\\s>\$"
	1320	(setq semantic-lex-end-point (match-end 0)))
	1321
	1322	(define-lex-regex-analyzer semantic-lex-whitespace
	1323	"Detect and create whitespace tokens."
	1324	;; catch whitespace when needed
	1325	"\\s-+"
	1326	;; Language wants whitespaces. Create a token for it.
	1327	(if (eq (semantic-lex-token-class (car semantic-lex-token-stream))
	1328	'whitespace)
	1329	;; Merge whitespace tokens together if they are adjacent. Two
	1330	;; whitespace tokens may be sperated by a comment which is not in
	1331	;; the token stream.
	1332	(progn
	1333	(setq semantic-lex-end-point (match-end 0))
	1334	(setcdr (semantic-lex-token-bounds (car semantic-lex-token-stream))
	1335	semantic-lex-end-point))
	1336	(semantic-lex-push-token
	1337	(semantic-lex-token
	1338	'whitespace (match-beginning 0) (match-end 0)))))
	1339
	1340	(define-lex-regex-analyzer semantic-lex-ignore-whitespace
	1341	"Detect and skip over whitespace tokens."
	1342	;; catch whitespace when needed
	1343	"\\s-+"
	1344	;; Skip over the detected whitespace, do not create a token for it.
	1345	(setq semantic-lex-end-point (match-end 0)))
	1346
	1347	(define-lex-simple-regex-analyzer semantic-lex-number
	1348	"Detect and create number tokens.
	1349	See `semantic-lex-number-expression' for details on matching numbers,
	1350	and number formats."
	1351	semantic-lex-number-expression 'number)
	1352
	1353	(define-lex-regex-analyzer semantic-lex-symbol-or-keyword
	1354	"Detect and create symbol and keyword tokens."
	1355	"\$\\sw\\\|\\s_\$+"
	1356	(semantic-lex-push-token
	1357	(semantic-lex-token
	1358	(or (semantic-lex-keyword-p (match-string 0)) 'symbol)
	1359	(match-beginning 0) (match-end 0))))
	1360
	1361	(define-lex-simple-regex-analyzer semantic-lex-charquote
	1362	"Detect and create charquote tokens."
	1363	;; Character quoting characters (ie, \n as newline)
	1364	"\\s\\+" 'charquote)
	1365
	1366	(define-lex-simple-regex-analyzer semantic-lex-punctuation
	1367	"Detect and create punctuation tokens."
	1368	"\$\\s.\\\|\\s$\\\|\\s'\$" 'punctuation)
	1369
	1370	(define-lex-analyzer semantic-lex-punctuation-type
	1371	"Detect and create a punctuation type token.
	1372	Recognized punctuations are defined in the current table of lexical
	1373	types, as the value of the `punctuation' token type."
	1374	(and (looking-at "\$\\s.\\\|\\s$\\\|\\s'\$+")
	1375	(let* ((key (match-string 0))
	1376	(pos (match-beginning 0))
	1377	(end (match-end 0))
	1378	(len (- end pos))
	1379	(lst (semantic-lex-type-value "punctuation" t))
	1380	(def (car lst)) ;; default lexical symbol or nil
	1381	(lst (cdr lst)) ;; alist of (LEX-SYM . PUNCT-STRING)
	1382	(elt nil))
	1383	(if lst
	1384	;; Starting with the longest one, search if the
	1385	;; punctuation string is defined for this language.
	1386	(while (and (> len 0) (not (setq elt (rassoc key lst))))
	1387	(setq len (1- len)
	1388	key (substring key 0 len))))
	1389	(if elt ;; Return the punctuation token found
	1390	(semantic-lex-push-token
	1391	(semantic-lex-token (car elt) pos (+ pos len)))
	1392	(if def ;; Return a default generic token
	1393	(semantic-lex-push-token
	1394	(semantic-lex-token def pos end))
	1395	;; Nothing match
	1396	)))))
	1397
	1398	(define-lex-regex-analyzer semantic-lex-paren-or-list
	1399	"Detect open parenthesis.
	1400	Return either a paren token or a semantic list token depending on
	1401	`semantic-lex-current-depth'."
	1402	"\\s("
	1403	(if (or (not semantic-lex-maximum-depth)
	1404	(< semantic-lex-current-depth semantic-lex-maximum-depth))
	1405	(progn
	1406	(setq semantic-lex-current-depth (1+ semantic-lex-current-depth))
	1407	(semantic-lex-push-token
	1408	(semantic-lex-token
	1409	'open-paren (match-beginning 0) (match-end 0))))
	1410	(semantic-lex-push-token
	1411	(semantic-lex-token
	1412	'semantic-list (match-beginning 0)
	1413	(save-excursion
	1414	(semantic-lex-unterminated-syntax-protection 'semantic-list
	1415	(forward-list 1)
	1416	(point))
	1417	)))
	1418	))
	1419
	1420	(define-lex-simple-regex-analyzer semantic-lex-open-paren
	1421	"Detect and create an open parenthisis token."
	1422	"\\s(" 'open-paren 0 (setq semantic-lex-current-depth (1+ semantic-lex-current-depth)))
	1423
	1424	(define-lex-simple-regex-analyzer semantic-lex-close-paren
	1425	"Detect and create a close paren token."
	1426	"\\s)" 'close-paren 0 (setq semantic-lex-current-depth (1- semantic-lex-current-depth)))
	1427
	1428	(define-lex-regex-analyzer semantic-lex-string
	1429	"Detect and create a string token."
	1430	"\\s\""
	1431	;; Zing to the end of this string.
	1432	(semantic-lex-push-token
	1433	(semantic-lex-token
	1434	'string (point)
	1435	(save-excursion
	1436	(semantic-lex-unterminated-syntax-protection 'string
	1437	(forward-sexp 1)
	1438	(point))
	1439	))))
	1440
	1441	(define-lex-regex-analyzer semantic-lex-comments
	1442	"Detect and create a comment token."
	1443	semantic-lex-comment-regex
	1444	(save-excursion
	1445	(forward-comment 1)
	1446	;; Generate newline token if enabled
	1447	(if (bolp) (backward-char 1))
	1448	(setq semantic-lex-end-point (point))
	1449	;; Language wants comments or want them as whitespaces,
	1450	;; link them together.
	1451	(if (eq (semantic-lex-token-class (car semantic-lex-token-stream)) 'comment)
	1452	(setcdr (semantic-lex-token-bounds (car semantic-lex-token-stream))
	1453	semantic-lex-end-point)
	1454	(semantic-lex-push-token
	1455	(semantic-lex-token
	1456	'comment (match-beginning 0) semantic-lex-end-point)))))
	1457
	1458	(define-lex-regex-analyzer semantic-lex-comments-as-whitespace
	1459	"Detect comments and create a whitespace token."
	1460	semantic-lex-comment-regex
	1461	(save-excursion
	1462	(forward-comment 1)
	1463	;; Generate newline token if enabled
	1464	(if (bolp) (backward-char 1))
	1465	(setq semantic-lex-end-point (point))
	1466	;; Language wants comments or want them as whitespaces,
	1467	;; link them together.
	1468	(if (eq (semantic-lex-token-class (car semantic-lex-token-stream)) 'whitespace)
	1469	(setcdr (semantic-lex-token-bounds (car semantic-lex-token-stream))
	1470	semantic-lex-end-point)
	1471	(semantic-lex-push-token
	1472	(semantic-lex-token
	1473	'whitespace (match-beginning 0) semantic-lex-end-point)))))
	1474
	1475	(define-lex-regex-analyzer semantic-lex-ignore-comments
	1476	"Detect and create a comment token."
	1477	semantic-lex-comment-regex
	1478	(let ((comment-start-point (point)))
	1479	(forward-comment 1)
	1480	(if (eq (point) comment-start-point)
	1481	;; In this case our start-skip string failed
	1482	;; to work properly. Lets try and move over
	1483	;; whatever white space we matched to begin
	1484	;; with.
	1485	(skip-syntax-forward "-.'"
	1486	(save-excursion
	1487	(end-of-line)
	1488	(point)))
	1489	;; We may need to back up so newlines or whitespace is generated.
	1490	(if (bolp)
	1491	(backward-char 1)))
	1492	(if (eq (point) comment-start-point)
	1493	(error "Strange comment syntax prevents lexical analysis"))
	1494	(setq semantic-lex-end-point (point))))
	1495	\f
	1496	;;; Comment lexer
	1497	;;
	1498	;; Predefined lexers that could be used instead of creating new
	1499	;; analyers.
	1500
	1501	(define-lex semantic-comment-lexer
	1502	"A simple lexical analyzer that handles comments.
	1503	This lexer will only return comment tokens. It is the default lexer
	1504	used by `semantic-find-doc-snarf-comment' to snarf up the comment at
	1505	point."
	1506	semantic-lex-ignore-whitespace
	1507	semantic-lex-ignore-newline
	1508	semantic-lex-comments
	1509	semantic-lex-default-action)
	1510
	1511	;;; Test Lexer
	1512	;;
	1513	(define-lex semantic-simple-lexer
	1514	"A simple lexical analyzer that handles simple buffers.
	1515	This lexer ignores comments and whitespace, and will return
	1516	syntax as specified by the syntax table."
	1517	semantic-lex-ignore-whitespace
	1518	semantic-lex-ignore-newline
	1519	semantic-lex-number
	1520	semantic-lex-symbol-or-keyword
	1521	semantic-lex-charquote
	1522	semantic-lex-paren-or-list
	1523	semantic-lex-close-paren
	1524	semantic-lex-string
	1525	semantic-lex-ignore-comments
	1526	semantic-lex-punctuation
	1527	semantic-lex-default-action)
	1528	\f
	1529	;;; Analyzers generated from grammar.
	1530	;;
	1531	;; Some analyzers are hand written. Analyzers created with these
	1532	;; functions are generated from the grammar files.
	1533
	1534	(defmacro define-lex-keyword-type-analyzer (name doc syntax)
	1535	"Define a keyword type analyzer NAME with DOC string.
	1536	SYNTAX is the regexp that matches a keyword syntactic expression."
	1537	(let ((key (make-symbol "key")))
	1538	`(define-lex-analyzer ,name
	1539	,doc
	1540	(and (looking-at ,syntax)
	1541	(let ((,key (semantic-lex-keyword-p (match-string 0))))
	1542	(when ,key
	1543	(semantic-lex-push-token
	1544	(semantic-lex-token
	1545	,key (match-beginning 0) (match-end 0)))))))
	1546	))
	1547
	1548	(defmacro define-lex-sexp-type-analyzer (name doc syntax token)
	1549	"Define a sexp type analyzer NAME with DOC string.
	1550	SYNTAX is the regexp that matches the beginning of the s-expression.
	1551	TOKEN is the lexical token returned when SYNTAX matches."
	1552	`(define-lex-regex-analyzer ,name
	1553	,doc
	1554	,syntax
	1555	(semantic-lex-push-token
	1556	(semantic-lex-token
	1557	,token (point)
	1558	(save-excursion
	1559	(semantic-lex-unterminated-syntax-protection ,token
	1560	(forward-sexp 1)
	1561	(point))))))
	1562	)
	1563
	1564	(defmacro define-lex-regex-type-analyzer (name doc syntax matches default)
	1565	"Define a regexp type analyzer NAME with DOC string.
	1566	SYNTAX is the regexp that matches a syntactic expression.
	1567	MATCHES is an alist of lexical elements used to refine the syntactic
	1568	expression.
	1569	DEFAULT is the default lexical token returned when no MATCHES."
	1570	(if matches
	1571	(let* ((val (make-symbol "val"))
	1572	(lst (make-symbol "lst"))
	1573	(elt (make-symbol "elt"))
	1574	(pos (make-symbol "pos"))
	1575	(end (make-symbol "end")))
	1576	`(define-lex-analyzer ,name
	1577	,doc
	1578	(and (looking-at ,syntax)
	1579	(let* ((,val (match-string 0))
	1580	(,pos (match-beginning 0))
	1581	(,end (match-end 0))
	1582	(,lst ,matches)
	1583	,elt)
	1584	(while (and ,lst (not ,elt))
	1585	(if (string-match (cdar ,lst) ,val)
	1586	(setq ,elt (caar ,lst))
	1587	(setq ,lst (cdr ,lst))))
	1588	(semantic-lex-push-token
	1589	(semantic-lex-token (or ,elt ,default) ,pos ,end))))
	1590	))
	1591	`(define-lex-simple-regex-analyzer ,name
	1592	,doc
	1593	,syntax ,default)
	1594	))
	1595
	1596	(defmacro define-lex-string-type-analyzer (name doc syntax matches default)
	1597	"Define a string type analyzer NAME with DOC string.
	1598	SYNTAX is the regexp that matches a syntactic expression.
	1599	MATCHES is an alist of lexical elements used to refine the syntactic
	1600	expression.
	1601	DEFAULT is the default lexical token returned when no MATCHES."
	1602	(if matches
	1603	(let* ((val (make-symbol "val"))
	1604	(lst (make-symbol "lst"))
	1605	(elt (make-symbol "elt"))
	1606	(pos (make-symbol "pos"))
	1607	(end (make-symbol "end"))
	1608	(len (make-symbol "len")))
	1609	`(define-lex-analyzer ,name
	1610	,doc
	1611	(and (looking-at ,syntax)
	1612	(let* ((,val (match-string 0))
	1613	(,pos (match-beginning 0))
	1614	(,end (match-end 0))
	1615	(,len (- ,end ,pos))
	1616	(,lst ,matches)
	1617	,elt)
	1618	;; Starting with the longest one, search if a lexical
	1619	;; value match a token defined for this language.
	1620	(while (and (> ,len 0) (not (setq ,elt (rassoc ,val ,lst))))
	1621	(setq ,len (1- ,len)
	1622	,val (substring ,val 0 ,len)))
	1623	(when ,elt ;; Adjust token end position.
	1624	(setq ,elt (car ,elt)
	1625	,end (+ ,pos ,len)))
	1626	(semantic-lex-push-token
	1627	(semantic-lex-token (or ,elt ,default) ,pos ,end))))
	1628	))
	1629	`(define-lex-simple-regex-analyzer ,name
	1630	,doc
	1631	,syntax ,default)
	1632	))
	1633
	1634	(defmacro define-lex-block-type-analyzer (name doc syntax matches)
	1635	"Define a block type analyzer NAME with DOC string.
	1636
	1637	SYNTAX is the regexp that matches block delimiters, typically the
	1638	open (`\\\\s(') and close (`\\\\s)') parenthesis syntax classes.
	1639
	1640	MATCHES is a pair (OPEN-SPECS . CLOSE-SPECS) that defines blocks.
	1641
	1642	OPEN-SPECS is a list of (OPEN-DELIM OPEN-TOKEN BLOCK-TOKEN) elements
	1643	where:
	1644
	1645	OPEN-DELIM is a string: the block open delimiter character.
	1646
	1647	OPEN-TOKEN is the lexical token class associated to the OPEN-DELIM
	1648	delimiter.
	1649
	1650	BLOCK-TOKEN is the lexical token class associated to the block
	1651	that starts at the OPEN-DELIM delimiter.
	1652
	1653	CLOSE-SPECS is a list of (CLOSE-DELIM CLOSE-TOKEN) elements where:
	1654
	1655	CLOSE-DELIM is a string: the block end delimiter character.
	1656
	1657	CLOSE-TOKEN is the lexical token class associated to the
	1658	CLOSE-DELIM delimiter.
	1659
	1660	Each element in OPEN-SPECS must have a corresponding element in
	1661	CLOSE-SPECS.
	1662
	1663	The lexer will return a BLOCK-TOKEN token when the value of
	1664	`semantic-lex-current-depth' is greater than or equal to the maximum
	1665	depth of parenthesis tracking (see also the function `semantic-lex').
	1666	Otherwise it will return OPEN-TOKEN and CLOSE-TOKEN tokens.
	1667
	1668	TO DO: Put the following in the developer's guide and just put a
	1669	reference here.
	1670
	1671	In the grammar:
	1672
	1673	The value of a block token must be a string that contains a readable
	1674	sexp of the form:
	1675
	1676	\"(OPEN-TOKEN CLOSE-TOKEN)\"
	1677
	1678	OPEN-TOKEN and CLOSE-TOKEN represent the block delimiters, and must be
	1679	lexical tokens of respectively `open-paren' and `close-paren' types.
	1680	Their value is the corresponding delimiter character as a string.
	1681
	1682	Here is a small example to analyze a parenthesis block:
	1683
	1684	%token <block> PAREN_BLOCK \"(LPAREN RPAREN)\"
	1685	%token <open-paren> LPAREN \"(\"
	1686	%token <close-paren> RPAREN \")\"
	1687
	1688	When the lexer encounters the open-paren delimiter \"(\":
	1689
	1690	- If the maximum depth of parenthesis tracking is not reached (that
	1691	is, current depth < max depth), it returns a (LPAREN start . end)
	1692	token, then continue analysis inside the block. Later, when the
	1693	corresponding close-paren delimiter \")\" will be encountered, it
	1694	will return a (RPAREN start . end) token.
	1695
	1696	- If the maximum depth of parenthesis tracking is reached (current
	1697	depth >= max depth), it returns the whole parenthesis block as
	1698	a (PAREN_BLOCK start . end) token."
	1699	(let* ((val (make-symbol "val"))
	1700	(lst (make-symbol "lst"))
	1701	(elt (make-symbol "elt")))
	1702	`(define-lex-analyzer ,name
	1703	,doc
	1704	(and
	1705	(looking-at ,syntax) ;; "\$\\s(\\\|\\s)\$"
	1706	(let ((,val (match-string 0))
	1707	(,lst ,matches)
	1708	,elt)
	1709	(cond
	1710	((setq ,elt (assoc ,val (car ,lst)))
	1711	(if (or (not semantic-lex-maximum-depth)
	1712	(< semantic-lex-current-depth semantic-lex-maximum-depth))
	1713	(progn
	1714	(setq semantic-lex-current-depth (1+ semantic-lex-current-depth))
	1715	(semantic-lex-push-token
	1716	(semantic-lex-token
	1717	(nth 1 ,elt)
	1718	(match-beginning 0) (match-end 0))))
	1719	(semantic-lex-push-token
	1720	(semantic-lex-token
	1721	(nth 2 ,elt)
	1722	(match-beginning 0)
	1723	(save-excursion
	1724	(semantic-lex-unterminated-syntax-protection (nth 2 ,elt)
	1725	(forward-list 1)
	1726	(point)))))))
	1727	((setq ,elt (assoc ,val (cdr ,lst)))
	1728	(setq semantic-lex-current-depth (1- semantic-lex-current-depth))
	1729	(semantic-lex-push-token
	1730	(semantic-lex-token
	1731	(nth 1 ,elt)
	1732	(match-beginning 0) (match-end 0))))
	1733	))))
	1734	))
	1735	\f
	1736	;;; Lexical Safety
	1737	;;
	1738	;; The semantic lexers, unlike other lexers, can throw errors on
	1739	;; unbalanced syntax. Since editing is all about changeging test
	1740	;; we need to provide a convenient way to protect against syntactic
	1741	;; inequalities.
	1742
	1743	(defmacro semantic-lex-catch-errors (symbol &rest forms)
	1744	"Using SYMBOL, execute FORMS catching lexical errors.
	1745	If FORMS results in a call to the parser that throws a lexical error,
	1746	the error will be caught here without the buffer's cache being thrown
	1747	out of date.
	1748	If there is an error, the syntax that failed is returned.
	1749	If there is no error, then the last value of FORMS is returned."
	1750	(let ((ret (make-symbol "ret"))
	1751	(syntax (make-symbol "syntax"))
	1752	(start (make-symbol "start"))
	1753	(end (make-symbol "end")))
	1754	`(let* ((semantic-lex-unterminated-syntax-end-function
	1755	(lambda (,syntax ,start ,end)
	1756	(throw ',symbol ,syntax)))
	1757	;; Delete the below when semantic-flex is fully retired.
	1758	(semantic-flex-unterminated-syntax-end-function
	1759	semantic-lex-unterminated-syntax-end-function)
	1760	(,ret (catch ',symbol
	1761	(save-excursion
	1762	,@forms
	1763	nil))))
	1764	;; Great Sadness. Assume that FORMS execute within the
	1765	;; confines of the current buffer only! Mark this thing
	1766	;; unparseable iff the special symbol was thrown. This
	1767	;; will prevent future calls from parsing, but will allow
	1768	;; then to still return the cache.
	1769	(when ,ret
	1770	;; Leave this message off. If an APP using this fcn wants
	1771	;; a message, they can do it themselves. This cleans up
	1772	;; problems with the idle scheduler obscuring useful data.
	1773	;;(message "Buffer not currently parsable (%S)." ,ret)
	1774	(semantic-parse-tree-unparseable))
	1775	,ret)))
	1776	(put 'semantic-lex-catch-errors 'lisp-indent-function 1)
	1777
	1778	\f
	1779	;;; Interfacing with edebug
	1780	;;
	1781	(add-hook
	1782	'edebug-setup-hook
	1783	#'(lambda ()
	1784
	1785	(def-edebug-spec define-lex
	1786	(&define name stringp (&rest symbolp))
	1787	)
	1788	(def-edebug-spec define-lex-analyzer
	1789	(&define name stringp form def-body)
	1790	)
	1791	(def-edebug-spec define-lex-regex-analyzer
	1792	(&define name stringp form def-body)
	1793	)
	1794	(def-edebug-spec define-lex-simple-regex-analyzer
	1795	(&define name stringp form symbolp [ &optional form ] def-body)
	1796	)
	1797	(def-edebug-spec define-lex-block-analyzer
	1798	(&define name stringp form (&rest form))
	1799	)
	1800	(def-edebug-spec semantic-lex-catch-errors
	1801	(symbolp def-body)
	1802	)
	1803
	1804	))
	1805	\f
	1806	;;; Compatibility with Semantic 1.x lexical analysis
	1807	;;
	1808	;; NOTE: DELETE THIS SOMEDAY SOON
	1809
	1810	(semantic-alias-obsolete 'semantic-flex-start 'semantic-lex-token-start)
	1811	(semantic-alias-obsolete 'semantic-flex-end 'semantic-lex-token-end)
	1812	(semantic-alias-obsolete 'semantic-flex-text 'semantic-lex-token-text)
	1813	(semantic-alias-obsolete 'semantic-flex-make-keyword-table 'semantic-lex-make-keyword-table)
	1814	(semantic-alias-obsolete 'semantic-flex-keyword-p 'semantic-lex-keyword-p)
	1815	(semantic-alias-obsolete 'semantic-flex-keyword-put 'semantic-lex-keyword-put)
	1816	(semantic-alias-obsolete 'semantic-flex-keyword-get 'semantic-lex-keyword-get)
	1817	(semantic-alias-obsolete 'semantic-flex-map-keywords 'semantic-lex-map-keywords)
	1818	(semantic-alias-obsolete 'semantic-flex-keywords 'semantic-lex-keywords)
	1819	(semantic-alias-obsolete 'semantic-flex-buffer 'semantic-lex-buffer)
	1820	(semantic-alias-obsolete 'semantic-flex-list 'semantic-lex-list)
	1821
	1822	;; This simple scanner uses the syntax table to generate a stream of
	1823	;; simple tokens of the form:
	1824	;;
	1825	;; (SYMBOL START . END)
	1826	;;
	1827	;; Where symbol is the type of thing it is. START and END mark that
	1828	;; objects boundary.
	1829
	1830	(defvar semantic-flex-tokens semantic-lex-tokens
	1831	"An alist of of semantic token types.
	1832	See variable `semantic-lex-tokens'.")
	1833
	1834	(defvar semantic-flex-unterminated-syntax-end-function
	1835	(lambda (syntax syntax-start flex-end) flex-end)
	1836	"Function called when unterminated syntax is encountered.
	1837	This should be set to one function. That function should take three
	1838	parameters. The SYNTAX, or type of syntax which is unterminated.
	1839	SYNTAX-START where the broken syntax begins.
	1840	FLEX-END is where the lexical analysis was asked to end.
	1841	This function can be used for languages that can intelligently fix up
	1842	broken syntax, or the exit lexical analysis via `throw' or `signal'
	1843	when finding unterminated syntax.")
	1844
	1845	(defvar semantic-flex-extensions nil
	1846	"Buffer local extensions to the lexical analyzer.
	1847	This should contain an alist with a key of a regex and a data element of
	1848	a function. The function should both move point, and return a lexical
	1849	token of the form:
	1850	( TYPE START . END)
	1851	nil is also a valid return value.
	1852	TYPE can be any type of symbol, as long as it doesn't occur as a
	1853	nonterminal in the language definition.")
	1854	(make-variable-buffer-local 'semantic-flex-extensions)
	1855
	1856	(defvar semantic-flex-syntax-modifications nil
	1857	"Changes to the syntax table for this buffer.
	1858	These changes are active only while the buffer is being flexed.
	1859	This is a list where each element has the form:
	1860	(CHAR CLASS)
	1861	CHAR is the char passed to `modify-syntax-entry',
	1862	and CLASS is the string also passed to `modify-syntax-entry' to define
	1863	what syntax class CHAR has.")
	1864	(make-variable-buffer-local 'semantic-flex-syntax-modifications)
	1865
	1866	(defvar semantic-ignore-comments t
	1867	"Default comment handling.
	1868	t means to strip comments when flexing. Nil means to keep comments
	1869	as part of the token stream.")
	1870	(make-variable-buffer-local 'semantic-ignore-comments)
	1871
	1872	(defvar semantic-flex-enable-newlines nil
	1873	"When flexing, report 'newlines as syntactic elements.
	1874	Useful for languages where the newline is a special case terminator.
	1875	Only set this on a per mode basis, not globally.")
	1876	(make-variable-buffer-local 'semantic-flex-enable-newlines)
	1877
	1878	(defvar semantic-flex-enable-whitespace nil
	1879	"When flexing, report 'whitespace as syntactic elements.
	1880	Useful for languages where the syntax is whitespace dependent.
	1881	Only set this on a per mode basis, not globally.")
	1882	(make-variable-buffer-local 'semantic-flex-enable-whitespace)
	1883
	1884	(defvar semantic-flex-enable-bol nil
	1885	"When flexing, report beginning of lines as syntactic elements.
	1886	Useful for languages like python which are indentation sensitive.
	1887	Only set this on a per mode basis, not globally.")
	1888	(make-variable-buffer-local 'semantic-flex-enable-bol)
	1889
	1890	(defvar semantic-number-expression semantic-lex-number-expression
	1891	"See variable `semantic-lex-number-expression'.")
	1892	(make-variable-buffer-local 'semantic-number-expression)
	1893
	1894	(defvar semantic-flex-depth 0
	1895	"Default flexing depth.
	1896	This specifies how many lists to create tokens in.")
	1897	(make-variable-buffer-local 'semantic-flex-depth)
	1898
	1899	(defun semantic-flex (start end &optional depth length)
	1900	"Using the syntax table, do something roughly equivalent to flex.
	1901	Semantically check between START and END. Optional argument DEPTH
	1902	indicates at what level to scan over entire lists.
	1903	The return value is a token stream. Each element is a list, such of
	1904	the form (symbol start-expression . end-expression) where SYMBOL
	1905	denotes the token type.
	1906	See `semantic-flex-tokens' variable for details on token types.
	1907	END does not mark the end of the text scanned, only the end of the
	1908	beginning of text scanned. Thus, if a string extends past END, the
	1909	end of the return token will be larger than END. To truly restrict
	1910	scanning, use `narrow-to-region'.
	1911	The last argument, LENGTH specifies that `semantic-flex' should only
	1912	return LENGTH tokens."
	1913	(message "`semantic-flex' is an obsolete function. Use `define-lex' to create lexers.")
	1914	(if (not semantic-flex-keywords-obarray)
	1915	(setq semantic-flex-keywords-obarray [ nil ]))
	1916	(let ((ts nil)
	1917	(pos (point))
	1918	(ep nil)
	1919	(curdepth 0)
	1920	(cs (if comment-start-skip
	1921	(concat "\$\\s<\\\|" comment-start-skip "\$")
	1922	(concat "\$\\s<\$")))
	1923	(newsyntax (copy-syntax-table (syntax-table)))
	1924	(mods semantic-flex-syntax-modifications)
	1925	;; Use the default depth if it is not specified.
	1926	(depth (or depth semantic-flex-depth)))
	1927	;; Update the syntax table
	1928	(while mods
	1929	(modify-syntax-entry (car (car mods)) (car (cdr (car mods))) newsyntax)
	1930	(setq mods (cdr mods)))
	1931	(with-syntax-table newsyntax
	1932	(goto-char start)
	1933	(while (and (< (point) end) (or (not length) (<= (length ts) length)))
	1934	(cond
	1935	;; catch beginning of lines when needed.
	1936	;; Must be done before catching any other tokens!
	1937	((and semantic-flex-enable-bol
	1938	(bolp)
	1939	;; Just insert a (bol N . N) token in the token stream,
	1940	;; without moving the point. N is the point at the
	1941	;; beginning of line.
	1942	(setq ts (cons (cons 'bol (cons (point) (point))) ts))
	1943	nil)) ;; CONTINUE
	1944	;; special extensions, includes whitespace, nl, etc.
	1945	((and semantic-flex-extensions
	1946	(let ((fe semantic-flex-extensions)
	1947	(r nil))
	1948	(while fe
	1949	(if (looking-at (car (car fe)))
	1950	(setq ts (cons (funcall (cdr (car fe))) ts)
	1951	r t
	1952	fe nil
	1953	ep (point)))
	1954	(setq fe (cdr fe)))
	1955	(if (and r (not (car ts))) (setq ts (cdr ts)))
	1956	r)))
	1957	;; catch newlines when needed
	1958	((looking-at "\\s-*\$\n\\\|\\s>\$")
	1959	(if semantic-flex-enable-newlines
	1960	(setq ep (match-end 1)
	1961	ts (cons (cons 'newline
	1962	(cons (match-beginning 1) ep))
	1963	ts))))
	1964	;; catch whitespace when needed
	1965	((looking-at "\\s-+")
	1966	(if semantic-flex-enable-whitespace
	1967	;; Language wants whitespaces, link them together.
	1968	(if (eq (car (car ts)) 'whitespace)
	1969	(setcdr (cdr (car ts)) (match-end 0))
	1970	(setq ts (cons (cons 'whitespace
	1971	(cons (match-beginning 0)
	1972	(match-end 0)))
	1973	ts)))))
	1974	;; numbers
	1975	((and semantic-number-expression
	1976	(looking-at semantic-number-expression))
	1977	(setq ts (cons (cons 'number
	1978	(cons (match-beginning 0)
	1979	(match-end 0)))
	1980	ts)))
	1981	;; symbols
	1982	((looking-at "\$\\sw\\\|\\s_\$+")
	1983	(setq ts (cons (cons
	1984	;; Get info on if this is a keyword or not
	1985	(or (semantic-lex-keyword-p (match-string 0))
	1986	'symbol)
	1987	(cons (match-beginning 0) (match-end 0)))
	1988	ts)))
	1989	;; Character quoting characters (ie, \n as newline)
	1990	((looking-at "\\s\\+")
	1991	(setq ts (cons (cons 'charquote
	1992	(cons (match-beginning 0) (match-end 0)))
	1993	ts)))
	1994	;; Open parens, or semantic-lists.
	1995	((looking-at "\\s(")
	1996	(if (or (not depth) (< curdepth depth))
	1997	(progn
	1998	(setq curdepth (1+ curdepth))
	1999	(setq ts (cons (cons 'open-paren
	2000	(cons (match-beginning 0) (match-end 0)))
	2001	ts)))
	2002	(setq ts (cons
	2003	(cons 'semantic-list
	2004	(cons (match-beginning 0)
	2005	(save-excursion
	2006	(condition-case nil
	2007	(forward-list 1)
	2008	;; This case makes flex robust
	2009	;; to broken lists.
	2010	(error
	2011	(goto-char
	2012	(funcall
	2013	semantic-flex-unterminated-syntax-end-function
	2014	'semantic-list
	2015	start end))))
	2016	(setq ep (point)))))
	2017	ts))))
	2018	;; Close parens
	2019	((looking-at "\\s)")
	2020	(setq ts (cons (cons 'close-paren
	2021	(cons (match-beginning 0) (match-end 0)))
	2022	ts))
	2023	(setq curdepth (1- curdepth)))
	2024	;; String initiators
	2025	((looking-at "\\s\"")
	2026	;; Zing to the end of this string.
	2027	(setq ts (cons (cons 'string
	2028	(cons (match-beginning 0)
	2029	(save-excursion
	2030	(condition-case nil
	2031	(forward-sexp 1)
	2032	;; This case makes flex
	2033	;; robust to broken strings.
	2034	(error
	2035	(goto-char
	2036	(funcall
	2037	semantic-flex-unterminated-syntax-end-function
	2038	'string
	2039	start end))))
	2040	(setq ep (point)))))
	2041	ts)))
	2042	;; comments
	2043	((looking-at cs)
	2044	(if (and semantic-ignore-comments
	2045	(not semantic-flex-enable-whitespace))
	2046	;; If the language doesn't deal with comments nor
	2047	;; whitespaces, ignore them here.
	2048	(let ((comment-start-point (point)))
	2049	(forward-comment 1)
	2050	(if (eq (point) comment-start-point)
	2051	;; In this case our start-skip string failed
	2052	;; to work properly. Lets try and move over
	2053	;; whatever white space we matched to begin
	2054	;; with.
	2055	(skip-syntax-forward "-.'"
	2056	(save-excursion
	2057	(end-of-line)
	2058	(point)))
	2059	;;(forward-comment 1)
	2060	;; Generate newline token if enabled
	2061	(if (and semantic-flex-enable-newlines
	2062	(bolp))
	2063	(backward-char 1)))
	2064	(if (eq (point) comment-start-point)
	2065	(error "Strange comment syntax prevents lexical analysis"))
	2066	(setq ep (point)))
	2067	(let ((tk (if semantic-ignore-comments 'whitespace 'comment)))
	2068	(save-excursion
	2069	(forward-comment 1)
	2070	;; Generate newline token if enabled
	2071	(if (and semantic-flex-enable-newlines
	2072	(bolp))
	2073	(backward-char 1))
	2074	(setq ep (point)))
	2075	;; Language wants comments or want them as whitespaces,
	2076	;; link them together.
	2077	(if (eq (car (car ts)) tk)
	2078	(setcdr (cdr (car ts)) ep)
	2079	(setq ts (cons (cons tk (cons (match-beginning 0) ep))
	2080	ts))))))
	2081	;; punctuation
	2082	((looking-at "\$\\s.\\\|\\s$\\\|\\s'\$")
	2083	(setq ts (cons (cons 'punctuation
	2084	(cons (match-beginning 0) (match-end 0)))
	2085	ts)))
	2086	;; unknown token
	2087	(t
	2088	(error "What is that?")))
	2089	(goto-char (or ep (match-end 0)))
	2090	(setq ep nil)))
	2091	;; maybe catch the last beginning of line when needed
	2092	(and semantic-flex-enable-bol
	2093	(= (point) end)
	2094	(bolp)
	2095	(setq ts (cons (cons 'bol (cons (point) (point))) ts)))
	2096	(goto-char pos)
	2097	;;(message "Flexing muscles...done")
	2098	(nreverse ts)))
	2099
	2100	(provide 'semantic/lex)
	2101
	2102	;; Local variables:
	2103	;; generated-autoload-file: "loaddefs.el"
	2104	;; generated-autoload-feature: semantic/loaddefs
	2105	;; generated-autoload-load-name: "semantic/lex"
	2106	;; End:
	2107
	2108	;;; semantic-lex.el ends here