(define-charset): New args :min-code and :max-code.
[bpt/emacs.git] / src / syntax.h
CommitLineData
9889c728 1/* Declarations having to do with GNU Emacs syntax tables.
3d7db6f1 2 Copyright (C) 1985, 93, 94, 97, 1998, 2002 Free Software Foundation, Inc.
9889c728
JB
3
4This file is part of GNU Emacs.
5
6GNU Emacs is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
7c938215 8the Free Software Foundation; either version 2, or (at your option)
9889c728
JB
9any later version.
10
11GNU Emacs is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GNU Emacs; see the file COPYING. If not, write to
3b7ad313
EN
18the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19Boston, MA 02111-1307, USA. */
9889c728
JB
20
21
22extern Lisp_Object Qsyntax_table_p;
4c571d09 23extern void update_syntax_table P_ ((int, int, int, Lisp_Object));
9889c728
JB
24
25/* The standard syntax table is stored where it will automatically
26 be used in all new buffers. */
27#define Vstandard_syntax_table buffer_defaults.syntax_table
28
e46c910e
RS
29/* A syntax table is a chartable whose elements are cons cells
30 (CODE+FLAGS . MATCHING-CHAR). MATCHING-CHAR can be nil if the char
31 is not a kind of parenthesis.
9889c728 32
e46c910e 33 The low 8 bits of CODE+FLAGS is a code, as follows: */
9889c728
JB
34
35enum syntaxcode
36 {
37 Swhitespace, /* for a whitespace character */
38 Spunct, /* for random punctuation characters */
39 Sword, /* for a word constituent */
40 Ssymbol, /* symbol constituent but not word constituent */
41 Sopen, /* for a beginning delimiter */
42 Sclose, /* for an ending delimiter */
43 Squote, /* for a prefix character like Lisp ' */
44 Sstring, /* for a string-grouping character like Lisp " */
5eea1c5a 45 Smath, /* for delimiters like $ in Tex. */
9889c728
JB
46 Sescape, /* for a character that begins a C-style escape */
47 Scharquote, /* for a character that quotes the following character */
48 Scomment, /* for a comment-starting character */
49 Sendcomment, /* for a comment-ending character */
c8cdcb16 50 Sinherit, /* use the standard syntax table for this character */
5eea1c5a 51 Scomment_fence, /* Starts/ends comment which is delimited on the
47ab3db5 52 other side by any char with the same syntaxcode. */
5eea1c5a 53 Sstring_fence, /* Starts/ends string which is delimited on the
47ab3db5 54 other side by any char with the same syntaxcode. */
9889c728
JB
55 Smax /* Upper bound on codes that are meaningful */
56 };
57
e0b8ff93 58/* Set the syntax entry VAL for char C in table TABLE. */
e46c910e 59
f4926ee8 60#define SET_RAW_SYNTAX_ENTRY(table, c, val) \
dcb82a5e 61 CHAR_TABLE_SET ((table), c, (val))
e0b8ff93 62
f4926ee8
KH
63/* Set the syntax entry VAL for char-range RANGE in table TABLE.
64 RANGE is a cons (FROM . TO) specifying the range of characters. */
e0b8ff93 65
f4926ee8
KH
66#define SET_RAW_SYNTAX_ENTRY_RANGE(table, range, val) \
67 Fset_char_table_range ((table), (range), (val))
9889c728 68
5eea1c5a
RS
69/* SYNTAX_ENTRY fetches the information from the entry for character C
70 in syntax table TABLE, or from globally kept data (gl_state).
71 Does inheritance. */
72/* CURRENT_SYNTAX_TABLE gives the syntax table valid for current
73 position, it is either the buffer's syntax table, or syntax table
74 found in text properties. */
75
76#ifdef SYNTAX_ENTRY_VIA_PROPERTY
77# define SYNTAX_ENTRY(c) \
78 (gl_state.use_global ? gl_state.global_code : SYNTAX_ENTRY_INT (c))
79# define CURRENT_SYNTAX_TABLE gl_state.current_syntax_table
80#else
81# define SYNTAX_ENTRY SYNTAX_ENTRY_INT
82# define CURRENT_SYNTAX_TABLE current_buffer->syntax_table
83#endif
e0b8ff93 84
f4926ee8
KH
85#define SYNTAX_ENTRY_INT(c) \
86 CHAR_TABLE_REF (CURRENT_SYNTAX_TABLE, make_number (c))
e0b8ff93 87
e46c910e 88/* Extract the information from the entry for character C
e0b8ff93 89 in the current syntax table. */
c8cdcb16
RS
90
91#ifdef __GNUC__
e46c910e
RS
92#define SYNTAX(c) \
93 ({ Lisp_Object temp; \
94 temp = SYNTAX_ENTRY (c); \
95 (CONSP (temp) \
3331fb06 96 ? (enum syntaxcode) (XINT (XCAR (temp)) & 0xff) \
e0b8ff93 97 : Swhitespace); })
e46c910e
RS
98
99#define SYNTAX_WITH_FLAGS(c) \
100 ({ Lisp_Object temp; \
101 temp = SYNTAX_ENTRY (c); \
102 (CONSP (temp) \
3331fb06 103 ? XINT (XCAR (temp)) \
e0b8ff93 104 : (int) Swhitespace); })
e46c910e
RS
105
106#define SYNTAX_MATCH(c) \
107 ({ Lisp_Object temp; \
108 temp = SYNTAX_ENTRY (c); \
109 (CONSP (temp) \
3331fb06 110 ? XCDR (temp) \
e0b8ff93 111 : Qnil); })
c8cdcb16 112#else
3d7db6f1 113extern Lisp_Object syntax_temp;
e46c910e 114#define SYNTAX(c) \
9d40ebd2 115 (syntax_temp = SYNTAX_ENTRY ((c)), \
e46c910e 116 (CONSP (syntax_temp) \
3331fb06 117 ? (enum syntaxcode) (XINT (XCAR (syntax_temp)) & 0xff) \
e0b8ff93 118 : Swhitespace))
e46c910e
RS
119
120#define SYNTAX_WITH_FLAGS(c) \
9d40ebd2 121 (syntax_temp = SYNTAX_ENTRY ((c)), \
e46c910e 122 (CONSP (syntax_temp) \
3331fb06 123 ? XINT (XCAR (syntax_temp)) \
e0b8ff93 124 : (int) Swhitespace))
e46c910e
RS
125
126#define SYNTAX_MATCH(c) \
9d40ebd2 127 (syntax_temp = SYNTAX_ENTRY ((c)), \
e46c910e 128 (CONSP (syntax_temp) \
3331fb06 129 ? XCDR (syntax_temp) \
e0b8ff93 130 : Qnil))
c8cdcb16 131#endif
9889c728 132
c0364919 133/* Then there are seven single-bit flags that have the following meanings:
9889c728
JB
134 1. This character is the first of a two-character comment-start sequence.
135 2. This character is the second of a two-character comment-start sequence.
136 3. This character is the first of a two-character comment-end sequence.
137 4. This character is the second of a two-character comment-end sequence.
138 5. This character is a prefix, for backward-prefix-chars.
c0364919
RS
139 6. see below
140 7. This character is part of a nestable comment sequence.
a306d6f1
RS
141 Note that any two-character sequence whose first character has flag 1
142 and whose second character has flag 2 will be interpreted as a comment start.
143
144 bit 6 is used to discriminate between two different comment styles.
145 Languages such as C++ allow two orthogonal syntax start/end pairs
146 and bit 6 is used to determine whether a comment-end or Scommentend
5eea1c5a 147 ends style a or b. Comment start sequences can start style a or b.
a306d6f1
RS
148 Style a is always the default.
149 */
9889c728 150
98bcfee8
RS
151/* These macros extract a particular flag for a given character. */
152
e46c910e 153#define SYNTAX_COMSTART_FIRST(c) ((SYNTAX_WITH_FLAGS (c) >> 16) & 1)
c8cdcb16 154
e46c910e 155#define SYNTAX_COMSTART_SECOND(c) ((SYNTAX_WITH_FLAGS (c) >> 17) & 1)
c8cdcb16 156
e46c910e 157#define SYNTAX_COMEND_FIRST(c) ((SYNTAX_WITH_FLAGS (c) >> 18) & 1)
c8cdcb16 158
e46c910e 159#define SYNTAX_COMEND_SECOND(c) ((SYNTAX_WITH_FLAGS (c) >> 19) & 1)
c8cdcb16 160
e46c910e 161#define SYNTAX_PREFIX(c) ((SYNTAX_WITH_FLAGS (c) >> 20) & 1)
9889c728 162
e46c910e 163#define SYNTAX_COMMENT_STYLE(c) ((SYNTAX_WITH_FLAGS (c) >> 21) & 1)
a306d6f1 164
c0364919
RS
165#define SYNTAX_COMMENT_NESTED(c) ((SYNTAX_WITH_FLAGS (c) >> 22) & 1)
166
98bcfee8
RS
167/* These macros extract specific flags from an integer
168 that holds the syntax code and the flags. */
169
170#define SYNTAX_FLAGS_COMSTART_FIRST(flags) (((flags) >> 16) & 1)
171
172#define SYNTAX_FLAGS_COMSTART_SECOND(flags) (((flags) >> 17) & 1)
173
174#define SYNTAX_FLAGS_COMEND_FIRST(flags) (((flags) >> 18) & 1)
175
176#define SYNTAX_FLAGS_COMEND_SECOND(flags) (((flags) >> 19) & 1)
177
178#define SYNTAX_FLAGS_PREFIX(flags) (((flags) >> 20) & 1)
179
180#define SYNTAX_FLAGS_COMMENT_STYLE(flags) (((flags) >> 21) & 1)
181
c0364919
RS
182#define SYNTAX_FLAGS_COMMENT_NESTED(flags) (((flags) >> 22) & 1)
183
9889c728
JB
184/* This array, indexed by a character, contains the syntax code which that
185 character signifies (as a char). For example,
5eea1c5a 186 (enum syntaxcode) syntax_spec_code['w'] is Sword. */
9889c728
JB
187
188extern unsigned char syntax_spec_code[0400];
189
5eea1c5a
RS
190/* Indexed by syntax code, give the letter that describes it. */
191
192extern char syntax_code_spec[16];
193
c292db29 194/* Convert the byte offset BYTEPOS into a character position,
2f16e7fd
RS
195 for the object recorded in gl_state with SETUP_SYNTAX_TABLE_FOR_OBJECT.
196
197 The value is meant for use in the UPDATE_SYNTAX_TABLE... macros.
198 These macros do nothing when parse_sexp_lookup_properties is 0,
199 so we return 0 in that case, for speed. */
c292db29
RS
200
201#define SYNTAX_TABLE_BYTE_TO_CHAR(bytepos) \
2f16e7fd
RS
202 (! parse_sexp_lookup_properties \
203 ? 0 \
204 : STRINGP (gl_state.object) \
c292db29
RS
205 ? string_byte_to_char (gl_state.object, (bytepos)) \
206 : BUFFERP (gl_state.object) \
f79b4b7e
KH
207 ? buf_bytepos_to_charpos (XBUFFER (gl_state.object), \
208 (bytepos) + BUF_BEGV_BYTE (XBUFFER (gl_state.object)) - 1) - BUF_BEGV (XBUFFER (gl_state.object)) + 1 \
c292db29 209 : NILP (gl_state.object) \
f79b4b7e 210 ? BYTE_TO_CHAR ((bytepos) + BEGV_BYTE - 1) - BEGV + 1 \
c292db29
RS
211 : (bytepos))
212
f79b4b7e
KH
213/* Make syntax table state (gl_state) good for CHARPOS, assuming it is
214 currently good for a position before CHARPOS. */
5eea1c5a 215
f79b4b7e 216#define UPDATE_SYNTAX_TABLE_FORWARD(charpos) \
2f16e7fd 217 (parse_sexp_lookup_properties \
f79b4b7e
KH
218 && (charpos) >= gl_state.e_property \
219 ? (update_syntax_table ((charpos) + gl_state.offset, 1, 0, \
c292db29
RS
220 gl_state.object), \
221 1) \
222 : 0)
5eea1c5a 223
f79b4b7e
KH
224/* Make syntax table state (gl_state) good for CHARPOS, assuming it is
225 currently good for a position after CHARPOS. */
5eea1c5a 226
f79b4b7e 227#define UPDATE_SYNTAX_TABLE_BACKWARD(charpos) \
2f16e7fd 228 (parse_sexp_lookup_properties \
4948e1f2 229 && (charpos) < gl_state.b_property \
f79b4b7e 230 ? (update_syntax_table ((charpos) + gl_state.offset, -1, 0, \
c292db29
RS
231 gl_state.object), \
232 1) \
233 : 0)
e2d8d746 234
f79b4b7e 235/* Make syntax table good for CHARPOS. */
e2d8d746 236
f79b4b7e 237#define UPDATE_SYNTAX_TABLE(charpos) \
2f16e7fd 238 (parse_sexp_lookup_properties \
4948e1f2 239 && (charpos) < gl_state.b_property \
f79b4b7e 240 ? (update_syntax_table ((charpos) + gl_state.offset, -1, 0, \
c292db29
RS
241 gl_state.object), \
242 1) \
2f16e7fd 243 : (parse_sexp_lookup_properties \
f79b4b7e
KH
244 && (charpos) >= gl_state.e_property \
245 ? (update_syntax_table ((charpos) + gl_state.offset, 1, 0,\
c292db29
RS
246 gl_state.object), \
247 1) \
248 : 0))
5eea1c5a
RS
249
250/* This macro should be called with FROM at the start of forward
251 search, or after the last position of the backward search. It
252 makes sure that the first char is picked up with correct table, so
253 one does not need to call UPDATE_SYNTAX_TABLE immediately after the
254 call.
255 Sign of COUNT gives the direction of the search.
256 */
257
c292db29 258#define SETUP_SYNTAX_TABLE(FROM, COUNT) \
2f16e7fd
RS
259if (1) \
260 { \
4948e1f2 261 gl_state.b_property = BEGV; \
2f16e7fd
RS
262 gl_state.e_property = ZV + 1; \
263 gl_state.object = Qnil; \
264 gl_state.use_global = 0; \
265 gl_state.offset = 0; \
266 gl_state.current_syntax_table = current_buffer->syntax_table; \
267 if (parse_sexp_lookup_properties) \
9b9794f0
RS
268 if ((COUNT) > 0 || (FROM) > BEGV) \
269 update_syntax_table ((COUNT) > 0 ? (FROM) : (FROM) - 1, (COUNT),\
270 1, Qnil); \
2f16e7fd
RS
271 } \
272else
5eea1c5a
RS
273
274/* Same as above, but in OBJECT. If OBJECT is nil, use current buffer.
e2d8d746
RS
275 If it is t, ignore properties altogether.
276
277 This is meant for regex.c to use. For buffers, regex.c passes arguments
278 to the UPDATE_SYNTAX_TABLE macros which are relative to BEGV.
4bbd5bc3 279 So if it is a buffer, we set the offset field to BEGV. */
5eea1c5a 280
c292db29 281#define SETUP_SYNTAX_TABLE_FOR_OBJECT(OBJECT, FROM, COUNT) \
4bbd5bc3
RS
282if (1) \
283 { \
c292db29
RS
284 gl_state.object = (OBJECT); \
285 if (BUFFERP (gl_state.object)) \
286 { \
287 struct buffer *buf = XBUFFER (gl_state.object); \
4948e1f2 288 gl_state.b_property = 1; \
1d1293dd 289 gl_state.e_property = BUF_ZV (buf) - BUF_BEGV (buf) + 1; \
c292db29
RS
290 gl_state.offset = BUF_BEGV (buf) - 1; \
291 } \
292 else if (NILP (gl_state.object)) \
4bbd5bc3 293 { \
4948e1f2 294 gl_state.b_property = 1; \
1d1293dd 295 gl_state.e_property = ZV - BEGV + 1; \
4bbd5bc3
RS
296 gl_state.offset = BEGV - 1; \
297 } \
c292db29 298 else if (EQ (gl_state.object, Qt)) \
4bbd5bc3 299 { \
4948e1f2 300 gl_state.b_property = 0; \
4bbd5bc3
RS
301 gl_state.e_property = 1500000000; \
302 gl_state.offset = 0; \
303 } \
304 else \
305 { \
4948e1f2 306 gl_state.b_property = 0; \
c292db29 307 gl_state.e_property = 1 + XSTRING (gl_state.object)->size; \
4bbd5bc3
RS
308 gl_state.offset = 0; \
309 } \
310 gl_state.use_global = 0; \
311 gl_state.current_syntax_table = current_buffer->syntax_table; \
312 if (parse_sexp_lookup_properties) \
f79b4b7e 313 update_syntax_table (((FROM) + gl_state.offset \
c292db29
RS
314 + (COUNT > 0 ? 0 : -1)), \
315 COUNT, 1, gl_state.object); \
4bbd5bc3
RS
316 } \
317else
5eea1c5a
RS
318
319struct gl_state_s
320{
c292db29 321 Lisp_Object object; /* The object we are scanning. */
5eea1c5a
RS
322 int start; /* Where to stop. */
323 int stop; /* Where to stop. */
324 int use_global; /* Whether to use global_code
325 or c_s_t. */
326 Lisp_Object global_code; /* Syntax code of current char. */
327 Lisp_Object current_syntax_table; /* Syntax table for current pos. */
328 Lisp_Object old_prop; /* Syntax-table prop at prev pos. */
4948e1f2 329 int b_property; /* First index where c_s_t is valid. */
5eea1c5a
RS
330 int e_property; /* First index where c_s_t is
331 not valid. */
332 INTERVAL forward_i; /* Where to start lookup on forward */
333 INTERVAL backward_i; /* or backward movement. The
334 data in c_s_t is valid
335 between these intervals,
336 and possibly at the
337 intervals too, depending
338 on: */
e2d8d746
RS
339 /* Offset for positions specified to UPDATE_SYNTAX_TABLE. */
340 int offset;
5eea1c5a
RS
341};
342
343extern struct gl_state_s gl_state;
344extern int parse_sexp_lookup_properties;
4c571d09
AS
345extern INTERVAL interval_of P_ ((int, Lisp_Object));
346
347extern int scan_words P_ ((int, int));