(mouse-drag-region-1): When remapping mouse-1 to mouse-2, go back to
[bpt/emacs.git] / src / syntax.h
CommitLineData
9889c728 1/* Declarations having to do with GNU Emacs syntax tables.
0b5538bd
TTN
2 Copyright (C) 1985, 1993, 1994, 1997, 1998, 2002, 2003, 2004,
3 2005 Free Software Foundation, Inc.
9889c728
JB
4
5This file is part of GNU Emacs.
6
7GNU Emacs is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
7c938215 9the Free Software Foundation; either version 2, or (at your option)
9889c728
JB
10any later version.
11
12GNU Emacs is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU Emacs; see the file COPYING. If not, write to
4fc5845f
LK
19the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20Boston, MA 02110-1301, USA. */
9889c728
JB
21
22
23extern Lisp_Object Qsyntax_table_p;
4c571d09 24extern void update_syntax_table P_ ((int, int, int, Lisp_Object));
9889c728
JB
25
26/* The standard syntax table is stored where it will automatically
27 be used in all new buffers. */
28#define Vstandard_syntax_table buffer_defaults.syntax_table
29
e46c910e
RS
30/* A syntax table is a chartable whose elements are cons cells
31 (CODE+FLAGS . MATCHING-CHAR). MATCHING-CHAR can be nil if the char
32 is not a kind of parenthesis.
9889c728 33
e46c910e 34 The low 8 bits of CODE+FLAGS is a code, as follows: */
9889c728
JB
35
36enum syntaxcode
37 {
38 Swhitespace, /* for a whitespace character */
39 Spunct, /* for random punctuation characters */
40 Sword, /* for a word constituent */
41 Ssymbol, /* symbol constituent but not word constituent */
42 Sopen, /* for a beginning delimiter */
43 Sclose, /* for an ending delimiter */
44 Squote, /* for a prefix character like Lisp ' */
45 Sstring, /* for a string-grouping character like Lisp " */
5eea1c5a 46 Smath, /* for delimiters like $ in Tex. */
9889c728
JB
47 Sescape, /* for a character that begins a C-style escape */
48 Scharquote, /* for a character that quotes the following character */
49 Scomment, /* for a comment-starting character */
50 Sendcomment, /* for a comment-ending character */
c8cdcb16 51 Sinherit, /* use the standard syntax table for this character */
5eea1c5a 52 Scomment_fence, /* Starts/ends comment which is delimited on the
47ab3db5 53 other side by any char with the same syntaxcode. */
5eea1c5a 54 Sstring_fence, /* Starts/ends string which is delimited on the
47ab3db5 55 other side by any char with the same syntaxcode. */
9889c728
JB
56 Smax /* Upper bound on codes that are meaningful */
57 };
58
e0b8ff93 59/* Set the syntax entry VAL for char C in table TABLE. */
e46c910e 60
0e35bfd8 61#define SET_RAW_SYNTAX_ENTRY(table, c, val) \
06a18d94 62 ((((c) & 0xFF) == (c)) \
0e35bfd8
KH
63 ? (XCHAR_TABLE (table)->contents[(unsigned char) (c)] = (val)) \
64 : Faset ((table), make_number (c), (val)))
e0b8ff93
KH
65
66/* Fetch the syntax entry for char C in syntax table TABLE.
67 This macro is called only when C is less than CHAR_TABLE_ORDINARY_SLOTS.
68 Do inheritance. */
69
70#ifdef __GNUC__
71#define SYNTAX_ENTRY_FOLLOW_PARENT(table, c) \
874757e8
AS
72 ({ Lisp_Object _syntax_tbl = (table); \
73 Lisp_Object _syntax_temp = XCHAR_TABLE (_syntax_tbl)->contents[(c)]; \
74 while (NILP (_syntax_temp)) \
e0b8ff93 75 { \
874757e8
AS
76 _syntax_tbl = XCHAR_TABLE (_syntax_tbl)->parent; \
77 if (NILP (_syntax_tbl)) \
e0b8ff93 78 break; \
874757e8 79 _syntax_temp = XCHAR_TABLE (_syntax_tbl)->contents[(c)]; \
e0b8ff93 80 } \
874757e8 81 _syntax_temp; })
c8cdcb16 82#else
e0b8ff93 83extern Lisp_Object syntax_temp;
4c571d09 84extern Lisp_Object syntax_parent_lookup P_ ((Lisp_Object, int));
e46c910e 85
e0b8ff93
KH
86#define SYNTAX_ENTRY_FOLLOW_PARENT(table, c) \
87 (syntax_temp = XCHAR_TABLE (table)->contents[(c)], \
88 (NILP (syntax_temp) \
89 ? syntax_parent_lookup (table, (c)) \
90 : syntax_temp))
c8cdcb16 91#endif
9889c728 92
5eea1c5a 93/* SYNTAX_ENTRY fetches the information from the entry for character C
177c0ea7 94 in syntax table TABLE, or from globally kept data (gl_state).
5eea1c5a
RS
95 Does inheritance. */
96/* CURRENT_SYNTAX_TABLE gives the syntax table valid for current
97 position, it is either the buffer's syntax table, or syntax table
98 found in text properties. */
99
100#ifdef SYNTAX_ENTRY_VIA_PROPERTY
101# define SYNTAX_ENTRY(c) \
102 (gl_state.use_global ? gl_state.global_code : SYNTAX_ENTRY_INT (c))
103# define CURRENT_SYNTAX_TABLE gl_state.current_syntax_table
104#else
105# define SYNTAX_ENTRY SYNTAX_ENTRY_INT
106# define CURRENT_SYNTAX_TABLE current_buffer->syntax_table
107#endif
e0b8ff93 108
aa9b6dd6 109#define SYNTAX_ENTRY_INT(c) \
06a18d94 110 ((((c) & 0xFF) == (c)) \
aa9b6dd6
KH
111 ? SYNTAX_ENTRY_FOLLOW_PARENT (CURRENT_SYNTAX_TABLE, \
112 (unsigned char) (c)) \
113 : Faref (CURRENT_SYNTAX_TABLE, \
114 make_number (c)))
e0b8ff93 115
e46c910e 116/* Extract the information from the entry for character C
e0b8ff93 117 in the current syntax table. */
c8cdcb16
RS
118
119#ifdef __GNUC__
e46c910e 120#define SYNTAX(c) \
874757e8
AS
121 ({ Lisp_Object _syntax_temp; \
122 _syntax_temp = SYNTAX_ENTRY (c); \
123 (CONSP (_syntax_temp) \
124 ? (enum syntaxcode) (XINT (XCAR (_syntax_temp)) & 0xff) \
e0b8ff93 125 : Swhitespace); })
e46c910e
RS
126
127#define SYNTAX_WITH_FLAGS(c) \
874757e8
AS
128 ({ Lisp_Object _syntax_temp; \
129 _syntax_temp = SYNTAX_ENTRY (c); \
130 (CONSP (_syntax_temp) \
131 ? XINT (XCAR (_syntax_temp)) \
e0b8ff93 132 : (int) Swhitespace); })
e46c910e
RS
133
134#define SYNTAX_MATCH(c) \
874757e8
AS
135 ({ Lisp_Object _syntax_temp; \
136 _syntax_temp = SYNTAX_ENTRY (c); \
137 (CONSP (_syntax_temp) \
138 ? XCDR (_syntax_temp) \
e0b8ff93 139 : Qnil); })
c8cdcb16 140#else
e46c910e 141#define SYNTAX(c) \
9d40ebd2 142 (syntax_temp = SYNTAX_ENTRY ((c)), \
e46c910e 143 (CONSP (syntax_temp) \
3331fb06 144 ? (enum syntaxcode) (XINT (XCAR (syntax_temp)) & 0xff) \
e0b8ff93 145 : Swhitespace))
e46c910e
RS
146
147#define SYNTAX_WITH_FLAGS(c) \
9d40ebd2 148 (syntax_temp = SYNTAX_ENTRY ((c)), \
e46c910e 149 (CONSP (syntax_temp) \
3331fb06 150 ? XINT (XCAR (syntax_temp)) \
e0b8ff93 151 : (int) Swhitespace))
e46c910e
RS
152
153#define SYNTAX_MATCH(c) \
9d40ebd2 154 (syntax_temp = SYNTAX_ENTRY ((c)), \
e46c910e 155 (CONSP (syntax_temp) \
3331fb06 156 ? XCDR (syntax_temp) \
e0b8ff93 157 : Qnil))
c8cdcb16 158#endif
9889c728 159
c0364919 160/* Then there are seven single-bit flags that have the following meanings:
9889c728
JB
161 1. This character is the first of a two-character comment-start sequence.
162 2. This character is the second of a two-character comment-start sequence.
163 3. This character is the first of a two-character comment-end sequence.
164 4. This character is the second of a two-character comment-end sequence.
165 5. This character is a prefix, for backward-prefix-chars.
c0364919
RS
166 6. see below
167 7. This character is part of a nestable comment sequence.
a306d6f1
RS
168 Note that any two-character sequence whose first character has flag 1
169 and whose second character has flag 2 will be interpreted as a comment start.
170
171 bit 6 is used to discriminate between two different comment styles.
172 Languages such as C++ allow two orthogonal syntax start/end pairs
173 and bit 6 is used to determine whether a comment-end or Scommentend
5eea1c5a 174 ends style a or b. Comment start sequences can start style a or b.
a306d6f1
RS
175 Style a is always the default.
176 */
9889c728 177
98bcfee8
RS
178/* These macros extract a particular flag for a given character. */
179
e46c910e 180#define SYNTAX_COMSTART_FIRST(c) ((SYNTAX_WITH_FLAGS (c) >> 16) & 1)
c8cdcb16 181
e46c910e 182#define SYNTAX_COMSTART_SECOND(c) ((SYNTAX_WITH_FLAGS (c) >> 17) & 1)
c8cdcb16 183
e46c910e 184#define SYNTAX_COMEND_FIRST(c) ((SYNTAX_WITH_FLAGS (c) >> 18) & 1)
c8cdcb16 185
e46c910e 186#define SYNTAX_COMEND_SECOND(c) ((SYNTAX_WITH_FLAGS (c) >> 19) & 1)
c8cdcb16 187
e46c910e 188#define SYNTAX_PREFIX(c) ((SYNTAX_WITH_FLAGS (c) >> 20) & 1)
9889c728 189
e46c910e 190#define SYNTAX_COMMENT_STYLE(c) ((SYNTAX_WITH_FLAGS (c) >> 21) & 1)
a306d6f1 191
c0364919
RS
192#define SYNTAX_COMMENT_NESTED(c) ((SYNTAX_WITH_FLAGS (c) >> 22) & 1)
193
98bcfee8
RS
194/* These macros extract specific flags from an integer
195 that holds the syntax code and the flags. */
196
197#define SYNTAX_FLAGS_COMSTART_FIRST(flags) (((flags) >> 16) & 1)
198
199#define SYNTAX_FLAGS_COMSTART_SECOND(flags) (((flags) >> 17) & 1)
200
201#define SYNTAX_FLAGS_COMEND_FIRST(flags) (((flags) >> 18) & 1)
202
203#define SYNTAX_FLAGS_COMEND_SECOND(flags) (((flags) >> 19) & 1)
204
205#define SYNTAX_FLAGS_PREFIX(flags) (((flags) >> 20) & 1)
206
207#define SYNTAX_FLAGS_COMMENT_STYLE(flags) (((flags) >> 21) & 1)
208
c0364919
RS
209#define SYNTAX_FLAGS_COMMENT_NESTED(flags) (((flags) >> 22) & 1)
210
9889c728
JB
211/* This array, indexed by a character, contains the syntax code which that
212 character signifies (as a char). For example,
5eea1c5a 213 (enum syntaxcode) syntax_spec_code['w'] is Sword. */
9889c728
JB
214
215extern unsigned char syntax_spec_code[0400];
216
5eea1c5a
RS
217/* Indexed by syntax code, give the letter that describes it. */
218
219extern char syntax_code_spec[16];
220
c292db29 221/* Convert the byte offset BYTEPOS into a character position,
2f16e7fd
RS
222 for the object recorded in gl_state with SETUP_SYNTAX_TABLE_FOR_OBJECT.
223
224 The value is meant for use in the UPDATE_SYNTAX_TABLE... macros.
225 These macros do nothing when parse_sexp_lookup_properties is 0,
226 so we return 0 in that case, for speed. */
c292db29
RS
227
228#define SYNTAX_TABLE_BYTE_TO_CHAR(bytepos) \
2f16e7fd
RS
229 (! parse_sexp_lookup_properties \
230 ? 0 \
231 : STRINGP (gl_state.object) \
c292db29
RS
232 ? string_byte_to_char (gl_state.object, (bytepos)) \
233 : BUFFERP (gl_state.object) \
f79b4b7e
KH
234 ? buf_bytepos_to_charpos (XBUFFER (gl_state.object), \
235 (bytepos) + BUF_BEGV_BYTE (XBUFFER (gl_state.object)) - 1) - BUF_BEGV (XBUFFER (gl_state.object)) + 1 \
c292db29 236 : NILP (gl_state.object) \
f79b4b7e 237 ? BYTE_TO_CHAR ((bytepos) + BEGV_BYTE - 1) - BEGV + 1 \
c292db29
RS
238 : (bytepos))
239
f79b4b7e
KH
240/* Make syntax table state (gl_state) good for CHARPOS, assuming it is
241 currently good for a position before CHARPOS. */
5eea1c5a 242
f79b4b7e 243#define UPDATE_SYNTAX_TABLE_FORWARD(charpos) \
2f16e7fd 244 (parse_sexp_lookup_properties \
f79b4b7e
KH
245 && (charpos) >= gl_state.e_property \
246 ? (update_syntax_table ((charpos) + gl_state.offset, 1, 0, \
c292db29
RS
247 gl_state.object), \
248 1) \
249 : 0)
5eea1c5a 250
f79b4b7e
KH
251/* Make syntax table state (gl_state) good for CHARPOS, assuming it is
252 currently good for a position after CHARPOS. */
5eea1c5a 253
f79b4b7e 254#define UPDATE_SYNTAX_TABLE_BACKWARD(charpos) \
2f16e7fd 255 (parse_sexp_lookup_properties \
4948e1f2 256 && (charpos) < gl_state.b_property \
f79b4b7e 257 ? (update_syntax_table ((charpos) + gl_state.offset, -1, 0, \
c292db29
RS
258 gl_state.object), \
259 1) \
260 : 0)
e2d8d746 261
f79b4b7e 262/* Make syntax table good for CHARPOS. */
e2d8d746 263
f79b4b7e 264#define UPDATE_SYNTAX_TABLE(charpos) \
2f16e7fd 265 (parse_sexp_lookup_properties \
4948e1f2 266 && (charpos) < gl_state.b_property \
f79b4b7e 267 ? (update_syntax_table ((charpos) + gl_state.offset, -1, 0, \
c292db29
RS
268 gl_state.object), \
269 1) \
2f16e7fd 270 : (parse_sexp_lookup_properties \
f79b4b7e
KH
271 && (charpos) >= gl_state.e_property \
272 ? (update_syntax_table ((charpos) + gl_state.offset, 1, 0,\
c292db29
RS
273 gl_state.object), \
274 1) \
275 : 0))
5eea1c5a
RS
276
277/* This macro should be called with FROM at the start of forward
278 search, or after the last position of the backward search. It
279 makes sure that the first char is picked up with correct table, so
280 one does not need to call UPDATE_SYNTAX_TABLE immediately after the
177c0ea7 281 call.
5eea1c5a
RS
282 Sign of COUNT gives the direction of the search.
283 */
284
c292db29 285#define SETUP_SYNTAX_TABLE(FROM, COUNT) \
2f16e7fd
RS
286if (1) \
287 { \
4948e1f2 288 gl_state.b_property = BEGV; \
2f16e7fd
RS
289 gl_state.e_property = ZV + 1; \
290 gl_state.object = Qnil; \
291 gl_state.use_global = 0; \
292 gl_state.offset = 0; \
293 gl_state.current_syntax_table = current_buffer->syntax_table; \
294 if (parse_sexp_lookup_properties) \
9b9794f0
RS
295 if ((COUNT) > 0 || (FROM) > BEGV) \
296 update_syntax_table ((COUNT) > 0 ? (FROM) : (FROM) - 1, (COUNT),\
297 1, Qnil); \
2f16e7fd
RS
298 } \
299else
5eea1c5a
RS
300
301/* Same as above, but in OBJECT. If OBJECT is nil, use current buffer.
e2d8d746
RS
302 If it is t, ignore properties altogether.
303
304 This is meant for regex.c to use. For buffers, regex.c passes arguments
305 to the UPDATE_SYNTAX_TABLE macros which are relative to BEGV.
4bbd5bc3 306 So if it is a buffer, we set the offset field to BEGV. */
5eea1c5a 307
c292db29 308#define SETUP_SYNTAX_TABLE_FOR_OBJECT(OBJECT, FROM, COUNT) \
4bbd5bc3
RS
309if (1) \
310 { \
c292db29
RS
311 gl_state.object = (OBJECT); \
312 if (BUFFERP (gl_state.object)) \
313 { \
314 struct buffer *buf = XBUFFER (gl_state.object); \
4948e1f2 315 gl_state.b_property = 1; \
1d1293dd 316 gl_state.e_property = BUF_ZV (buf) - BUF_BEGV (buf) + 1; \
c292db29
RS
317 gl_state.offset = BUF_BEGV (buf) - 1; \
318 } \
319 else if (NILP (gl_state.object)) \
4bbd5bc3 320 { \
4948e1f2 321 gl_state.b_property = 1; \
1d1293dd 322 gl_state.e_property = ZV - BEGV + 1; \
4bbd5bc3
RS
323 gl_state.offset = BEGV - 1; \
324 } \
c292db29 325 else if (EQ (gl_state.object, Qt)) \
4bbd5bc3 326 { \
4948e1f2 327 gl_state.b_property = 0; \
4bbd5bc3
RS
328 gl_state.e_property = 1500000000; \
329 gl_state.offset = 0; \
330 } \
331 else \
332 { \
4948e1f2 333 gl_state.b_property = 0; \
d5db4077 334 gl_state.e_property = 1 + SCHARS (gl_state.object); \
4bbd5bc3
RS
335 gl_state.offset = 0; \
336 } \
337 gl_state.use_global = 0; \
338 gl_state.current_syntax_table = current_buffer->syntax_table; \
339 if (parse_sexp_lookup_properties) \
f79b4b7e 340 update_syntax_table (((FROM) + gl_state.offset \
c292db29
RS
341 + (COUNT > 0 ? 0 : -1)), \
342 COUNT, 1, gl_state.object); \
4bbd5bc3
RS
343 } \
344else
5eea1c5a
RS
345
346struct gl_state_s
347{
c292db29 348 Lisp_Object object; /* The object we are scanning. */
5eea1c5a
RS
349 int start; /* Where to stop. */
350 int stop; /* Where to stop. */
351 int use_global; /* Whether to use global_code
352 or c_s_t. */
353 Lisp_Object global_code; /* Syntax code of current char. */
354 Lisp_Object current_syntax_table; /* Syntax table for current pos. */
355 Lisp_Object old_prop; /* Syntax-table prop at prev pos. */
4948e1f2 356 int b_property; /* First index where c_s_t is valid. */
5eea1c5a
RS
357 int e_property; /* First index where c_s_t is
358 not valid. */
359 INTERVAL forward_i; /* Where to start lookup on forward */
360 INTERVAL backward_i; /* or backward movement. The
361 data in c_s_t is valid
362 between these intervals,
363 and possibly at the
364 intervals too, depending
365 on: */
e2d8d746
RS
366 /* Offset for positions specified to UPDATE_SYNTAX_TABLE. */
367 int offset;
5eea1c5a
RS
368};
369
370extern struct gl_state_s gl_state;
371extern int parse_sexp_lookup_properties;
4c571d09
AS
372extern INTERVAL interval_of P_ ((int, Lisp_Object));
373
374extern int scan_words P_ ((int, int));
ab5796a9
MB
375
376/* arch-tag: 28833cca-cd73-4741-8c85-a3111166a0e0
377 (do not change this comment) */