Trailing whitespace deleted.
[bpt/emacs.git] / src / syntax.h
CommitLineData
9889c728 1/* Declarations having to do with GNU Emacs syntax tables.
4a2f9c6a 2 Copyright (C) 1985, 93, 94, 97, 1998 Free Software Foundation, Inc.
9889c728
JB
3
4This file is part of GNU Emacs.
5
6GNU Emacs is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
7c938215 8the Free Software Foundation; either version 2, or (at your option)
9889c728
JB
9any later version.
10
11GNU Emacs is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GNU Emacs; see the file COPYING. If not, write to
3b7ad313
EN
18the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19Boston, MA 02111-1307, USA. */
9889c728
JB
20
21
22extern Lisp_Object Qsyntax_table_p;
4c571d09 23extern void update_syntax_table P_ ((int, int, int, Lisp_Object));
9889c728
JB
24
25/* The standard syntax table is stored where it will automatically
26 be used in all new buffers. */
27#define Vstandard_syntax_table buffer_defaults.syntax_table
28
e46c910e
RS
29/* A syntax table is a chartable whose elements are cons cells
30 (CODE+FLAGS . MATCHING-CHAR). MATCHING-CHAR can be nil if the char
31 is not a kind of parenthesis.
9889c728 32
e46c910e 33 The low 8 bits of CODE+FLAGS is a code, as follows: */
9889c728
JB
34
35enum syntaxcode
36 {
37 Swhitespace, /* for a whitespace character */
38 Spunct, /* for random punctuation characters */
39 Sword, /* for a word constituent */
40 Ssymbol, /* symbol constituent but not word constituent */
41 Sopen, /* for a beginning delimiter */
42 Sclose, /* for an ending delimiter */
43 Squote, /* for a prefix character like Lisp ' */
44 Sstring, /* for a string-grouping character like Lisp " */
5eea1c5a 45 Smath, /* for delimiters like $ in Tex. */
9889c728
JB
46 Sescape, /* for a character that begins a C-style escape */
47 Scharquote, /* for a character that quotes the following character */
48 Scomment, /* for a comment-starting character */
49 Sendcomment, /* for a comment-ending character */
c8cdcb16 50 Sinherit, /* use the standard syntax table for this character */
5eea1c5a 51 Scomment_fence, /* Starts/ends comment which is delimited on the
47ab3db5 52 other side by any char with the same syntaxcode. */
5eea1c5a 53 Sstring_fence, /* Starts/ends string which is delimited on the
47ab3db5 54 other side by any char with the same syntaxcode. */
9889c728
JB
55 Smax /* Upper bound on codes that are meaningful */
56 };
57
e0b8ff93 58/* Set the syntax entry VAL for char C in table TABLE. */
e46c910e 59
0e35bfd8 60#define SET_RAW_SYNTAX_ENTRY(table, c, val) \
9ca6ab7d 61 ((c) < CHAR_TABLE_SINGLE_BYTE_SLOTS \
0e35bfd8
KH
62 ? (XCHAR_TABLE (table)->contents[(unsigned char) (c)] = (val)) \
63 : Faset ((table), make_number (c), (val)))
e0b8ff93
KH
64
65/* Fetch the syntax entry for char C in syntax table TABLE.
66 This macro is called only when C is less than CHAR_TABLE_ORDINARY_SLOTS.
67 Do inheritance. */
68
69#ifdef __GNUC__
70#define SYNTAX_ENTRY_FOLLOW_PARENT(table, c) \
71 ({ Lisp_Object tbl = table; \
72 Lisp_Object temp = XCHAR_TABLE (tbl)->contents[(c)]; \
73 while (NILP (temp)) \
74 { \
75 tbl = XCHAR_TABLE (tbl)->parent; \
76 if (NILP (tbl)) \
77 break; \
78 temp = XCHAR_TABLE (tbl)->contents[(c)]; \
79 } \
80 temp; })
c8cdcb16 81#else
e0b8ff93 82extern Lisp_Object syntax_temp;
4c571d09 83extern Lisp_Object syntax_parent_lookup P_ ((Lisp_Object, int));
e46c910e 84
e0b8ff93
KH
85#define SYNTAX_ENTRY_FOLLOW_PARENT(table, c) \
86 (syntax_temp = XCHAR_TABLE (table)->contents[(c)], \
87 (NILP (syntax_temp) \
88 ? syntax_parent_lookup (table, (c)) \
89 : syntax_temp))
c8cdcb16 90#endif
9889c728 91
5eea1c5a 92/* SYNTAX_ENTRY fetches the information from the entry for character C
177c0ea7 93 in syntax table TABLE, or from globally kept data (gl_state).
5eea1c5a
RS
94 Does inheritance. */
95/* CURRENT_SYNTAX_TABLE gives the syntax table valid for current
96 position, it is either the buffer's syntax table, or syntax table
97 found in text properties. */
98
99#ifdef SYNTAX_ENTRY_VIA_PROPERTY
100# define SYNTAX_ENTRY(c) \
101 (gl_state.use_global ? gl_state.global_code : SYNTAX_ENTRY_INT (c))
102# define CURRENT_SYNTAX_TABLE gl_state.current_syntax_table
103#else
104# define SYNTAX_ENTRY SYNTAX_ENTRY_INT
105# define CURRENT_SYNTAX_TABLE current_buffer->syntax_table
106#endif
e0b8ff93 107
aa9b6dd6
KH
108#define SYNTAX_ENTRY_INT(c) \
109 ((c) < CHAR_TABLE_SINGLE_BYTE_SLOTS \
110 ? SYNTAX_ENTRY_FOLLOW_PARENT (CURRENT_SYNTAX_TABLE, \
111 (unsigned char) (c)) \
112 : Faref (CURRENT_SYNTAX_TABLE, \
113 make_number (c)))
e0b8ff93 114
e46c910e 115/* Extract the information from the entry for character C
e0b8ff93 116 in the current syntax table. */
c8cdcb16
RS
117
118#ifdef __GNUC__
e46c910e
RS
119#define SYNTAX(c) \
120 ({ Lisp_Object temp; \
121 temp = SYNTAX_ENTRY (c); \
122 (CONSP (temp) \
3331fb06 123 ? (enum syntaxcode) (XINT (XCAR (temp)) & 0xff) \
e0b8ff93 124 : Swhitespace); })
e46c910e
RS
125
126#define SYNTAX_WITH_FLAGS(c) \
127 ({ Lisp_Object temp; \
128 temp = SYNTAX_ENTRY (c); \
129 (CONSP (temp) \
3331fb06 130 ? XINT (XCAR (temp)) \
e0b8ff93 131 : (int) Swhitespace); })
e46c910e
RS
132
133#define SYNTAX_MATCH(c) \
134 ({ Lisp_Object temp; \
135 temp = SYNTAX_ENTRY (c); \
136 (CONSP (temp) \
3331fb06 137 ? XCDR (temp) \
e0b8ff93 138 : Qnil); })
c8cdcb16 139#else
e46c910e 140#define SYNTAX(c) \
9d40ebd2 141 (syntax_temp = SYNTAX_ENTRY ((c)), \
e46c910e 142 (CONSP (syntax_temp) \
3331fb06 143 ? (enum syntaxcode) (XINT (XCAR (syntax_temp)) & 0xff) \
e0b8ff93 144 : Swhitespace))
e46c910e
RS
145
146#define SYNTAX_WITH_FLAGS(c) \
9d40ebd2 147 (syntax_temp = SYNTAX_ENTRY ((c)), \
e46c910e 148 (CONSP (syntax_temp) \
3331fb06 149 ? XINT (XCAR (syntax_temp)) \
e0b8ff93 150 : (int) Swhitespace))
e46c910e
RS
151
152#define SYNTAX_MATCH(c) \
9d40ebd2 153 (syntax_temp = SYNTAX_ENTRY ((c)), \
e46c910e 154 (CONSP (syntax_temp) \
3331fb06 155 ? XCDR (syntax_temp) \
e0b8ff93 156 : Qnil))
c8cdcb16 157#endif
9889c728 158
c0364919 159/* Then there are seven single-bit flags that have the following meanings:
9889c728
JB
160 1. This character is the first of a two-character comment-start sequence.
161 2. This character is the second of a two-character comment-start sequence.
162 3. This character is the first of a two-character comment-end sequence.
163 4. This character is the second of a two-character comment-end sequence.
164 5. This character is a prefix, for backward-prefix-chars.
c0364919
RS
165 6. see below
166 7. This character is part of a nestable comment sequence.
a306d6f1
RS
167 Note that any two-character sequence whose first character has flag 1
168 and whose second character has flag 2 will be interpreted as a comment start.
169
170 bit 6 is used to discriminate between two different comment styles.
171 Languages such as C++ allow two orthogonal syntax start/end pairs
172 and bit 6 is used to determine whether a comment-end or Scommentend
5eea1c5a 173 ends style a or b. Comment start sequences can start style a or b.
a306d6f1
RS
174 Style a is always the default.
175 */
9889c728 176
98bcfee8
RS
177/* These macros extract a particular flag for a given character. */
178
e46c910e 179#define SYNTAX_COMSTART_FIRST(c) ((SYNTAX_WITH_FLAGS (c) >> 16) & 1)
c8cdcb16 180
e46c910e 181#define SYNTAX_COMSTART_SECOND(c) ((SYNTAX_WITH_FLAGS (c) >> 17) & 1)
c8cdcb16 182
e46c910e 183#define SYNTAX_COMEND_FIRST(c) ((SYNTAX_WITH_FLAGS (c) >> 18) & 1)
c8cdcb16 184
e46c910e 185#define SYNTAX_COMEND_SECOND(c) ((SYNTAX_WITH_FLAGS (c) >> 19) & 1)
c8cdcb16 186
e46c910e 187#define SYNTAX_PREFIX(c) ((SYNTAX_WITH_FLAGS (c) >> 20) & 1)
9889c728 188
e46c910e 189#define SYNTAX_COMMENT_STYLE(c) ((SYNTAX_WITH_FLAGS (c) >> 21) & 1)
a306d6f1 190
c0364919
RS
191#define SYNTAX_COMMENT_NESTED(c) ((SYNTAX_WITH_FLAGS (c) >> 22) & 1)
192
98bcfee8
RS
193/* These macros extract specific flags from an integer
194 that holds the syntax code and the flags. */
195
196#define SYNTAX_FLAGS_COMSTART_FIRST(flags) (((flags) >> 16) & 1)
197
198#define SYNTAX_FLAGS_COMSTART_SECOND(flags) (((flags) >> 17) & 1)
199
200#define SYNTAX_FLAGS_COMEND_FIRST(flags) (((flags) >> 18) & 1)
201
202#define SYNTAX_FLAGS_COMEND_SECOND(flags) (((flags) >> 19) & 1)
203
204#define SYNTAX_FLAGS_PREFIX(flags) (((flags) >> 20) & 1)
205
206#define SYNTAX_FLAGS_COMMENT_STYLE(flags) (((flags) >> 21) & 1)
207
c0364919
RS
208#define SYNTAX_FLAGS_COMMENT_NESTED(flags) (((flags) >> 22) & 1)
209
9889c728
JB
210/* This array, indexed by a character, contains the syntax code which that
211 character signifies (as a char). For example,
5eea1c5a 212 (enum syntaxcode) syntax_spec_code['w'] is Sword. */
9889c728
JB
213
214extern unsigned char syntax_spec_code[0400];
215
5eea1c5a
RS
216/* Indexed by syntax code, give the letter that describes it. */
217
218extern char syntax_code_spec[16];
219
c292db29 220/* Convert the byte offset BYTEPOS into a character position,
2f16e7fd
RS
221 for the object recorded in gl_state with SETUP_SYNTAX_TABLE_FOR_OBJECT.
222
223 The value is meant for use in the UPDATE_SYNTAX_TABLE... macros.
224 These macros do nothing when parse_sexp_lookup_properties is 0,
225 so we return 0 in that case, for speed. */
c292db29
RS
226
227#define SYNTAX_TABLE_BYTE_TO_CHAR(bytepos) \
2f16e7fd
RS
228 (! parse_sexp_lookup_properties \
229 ? 0 \
230 : STRINGP (gl_state.object) \
c292db29
RS
231 ? string_byte_to_char (gl_state.object, (bytepos)) \
232 : BUFFERP (gl_state.object) \
f79b4b7e
KH
233 ? buf_bytepos_to_charpos (XBUFFER (gl_state.object), \
234 (bytepos) + BUF_BEGV_BYTE (XBUFFER (gl_state.object)) - 1) - BUF_BEGV (XBUFFER (gl_state.object)) + 1 \
c292db29 235 : NILP (gl_state.object) \
f79b4b7e 236 ? BYTE_TO_CHAR ((bytepos) + BEGV_BYTE - 1) - BEGV + 1 \
c292db29
RS
237 : (bytepos))
238
f79b4b7e
KH
239/* Make syntax table state (gl_state) good for CHARPOS, assuming it is
240 currently good for a position before CHARPOS. */
5eea1c5a 241
f79b4b7e 242#define UPDATE_SYNTAX_TABLE_FORWARD(charpos) \
2f16e7fd 243 (parse_sexp_lookup_properties \
f79b4b7e
KH
244 && (charpos) >= gl_state.e_property \
245 ? (update_syntax_table ((charpos) + gl_state.offset, 1, 0, \
c292db29
RS
246 gl_state.object), \
247 1) \
248 : 0)
5eea1c5a 249
f79b4b7e
KH
250/* Make syntax table state (gl_state) good for CHARPOS, assuming it is
251 currently good for a position after CHARPOS. */
5eea1c5a 252
f79b4b7e 253#define UPDATE_SYNTAX_TABLE_BACKWARD(charpos) \
2f16e7fd 254 (parse_sexp_lookup_properties \
4948e1f2 255 && (charpos) < gl_state.b_property \
f79b4b7e 256 ? (update_syntax_table ((charpos) + gl_state.offset, -1, 0, \
c292db29
RS
257 gl_state.object), \
258 1) \
259 : 0)
e2d8d746 260
f79b4b7e 261/* Make syntax table good for CHARPOS. */
e2d8d746 262
f79b4b7e 263#define UPDATE_SYNTAX_TABLE(charpos) \
2f16e7fd 264 (parse_sexp_lookup_properties \
4948e1f2 265 && (charpos) < gl_state.b_property \
f79b4b7e 266 ? (update_syntax_table ((charpos) + gl_state.offset, -1, 0, \
c292db29
RS
267 gl_state.object), \
268 1) \
2f16e7fd 269 : (parse_sexp_lookup_properties \
f79b4b7e
KH
270 && (charpos) >= gl_state.e_property \
271 ? (update_syntax_table ((charpos) + gl_state.offset, 1, 0,\
c292db29
RS
272 gl_state.object), \
273 1) \
274 : 0))
5eea1c5a
RS
275
276/* This macro should be called with FROM at the start of forward
277 search, or after the last position of the backward search. It
278 makes sure that the first char is picked up with correct table, so
279 one does not need to call UPDATE_SYNTAX_TABLE immediately after the
177c0ea7 280 call.
5eea1c5a
RS
281 Sign of COUNT gives the direction of the search.
282 */
283
c292db29 284#define SETUP_SYNTAX_TABLE(FROM, COUNT) \
2f16e7fd
RS
285if (1) \
286 { \
4948e1f2 287 gl_state.b_property = BEGV; \
2f16e7fd
RS
288 gl_state.e_property = ZV + 1; \
289 gl_state.object = Qnil; \
290 gl_state.use_global = 0; \
291 gl_state.offset = 0; \
292 gl_state.current_syntax_table = current_buffer->syntax_table; \
293 if (parse_sexp_lookup_properties) \
9b9794f0
RS
294 if ((COUNT) > 0 || (FROM) > BEGV) \
295 update_syntax_table ((COUNT) > 0 ? (FROM) : (FROM) - 1, (COUNT),\
296 1, Qnil); \
2f16e7fd
RS
297 } \
298else
5eea1c5a
RS
299
300/* Same as above, but in OBJECT. If OBJECT is nil, use current buffer.
e2d8d746
RS
301 If it is t, ignore properties altogether.
302
303 This is meant for regex.c to use. For buffers, regex.c passes arguments
304 to the UPDATE_SYNTAX_TABLE macros which are relative to BEGV.
4bbd5bc3 305 So if it is a buffer, we set the offset field to BEGV. */
5eea1c5a 306
c292db29 307#define SETUP_SYNTAX_TABLE_FOR_OBJECT(OBJECT, FROM, COUNT) \
4bbd5bc3
RS
308if (1) \
309 { \
c292db29
RS
310 gl_state.object = (OBJECT); \
311 if (BUFFERP (gl_state.object)) \
312 { \
313 struct buffer *buf = XBUFFER (gl_state.object); \
4948e1f2 314 gl_state.b_property = 1; \
1d1293dd 315 gl_state.e_property = BUF_ZV (buf) - BUF_BEGV (buf) + 1; \
c292db29
RS
316 gl_state.offset = BUF_BEGV (buf) - 1; \
317 } \
318 else if (NILP (gl_state.object)) \
4bbd5bc3 319 { \
4948e1f2 320 gl_state.b_property = 1; \
1d1293dd 321 gl_state.e_property = ZV - BEGV + 1; \
4bbd5bc3
RS
322 gl_state.offset = BEGV - 1; \
323 } \
c292db29 324 else if (EQ (gl_state.object, Qt)) \
4bbd5bc3 325 { \
4948e1f2 326 gl_state.b_property = 0; \
4bbd5bc3
RS
327 gl_state.e_property = 1500000000; \
328 gl_state.offset = 0; \
329 } \
330 else \
331 { \
4948e1f2 332 gl_state.b_property = 0; \
d5db4077 333 gl_state.e_property = 1 + SCHARS (gl_state.object); \
4bbd5bc3
RS
334 gl_state.offset = 0; \
335 } \
336 gl_state.use_global = 0; \
337 gl_state.current_syntax_table = current_buffer->syntax_table; \
338 if (parse_sexp_lookup_properties) \
f79b4b7e 339 update_syntax_table (((FROM) + gl_state.offset \
c292db29
RS
340 + (COUNT > 0 ? 0 : -1)), \
341 COUNT, 1, gl_state.object); \
4bbd5bc3
RS
342 } \
343else
5eea1c5a
RS
344
345struct gl_state_s
346{
c292db29 347 Lisp_Object object; /* The object we are scanning. */
5eea1c5a
RS
348 int start; /* Where to stop. */
349 int stop; /* Where to stop. */
350 int use_global; /* Whether to use global_code
351 or c_s_t. */
352 Lisp_Object global_code; /* Syntax code of current char. */
353 Lisp_Object current_syntax_table; /* Syntax table for current pos. */
354 Lisp_Object old_prop; /* Syntax-table prop at prev pos. */
4948e1f2 355 int b_property; /* First index where c_s_t is valid. */
5eea1c5a
RS
356 int e_property; /* First index where c_s_t is
357 not valid. */
358 INTERVAL forward_i; /* Where to start lookup on forward */
359 INTERVAL backward_i; /* or backward movement. The
360 data in c_s_t is valid
361 between these intervals,
362 and possibly at the
363 intervals too, depending
364 on: */
e2d8d746
RS
365 /* Offset for positions specified to UPDATE_SYNTAX_TABLE. */
366 int offset;
5eea1c5a
RS
367};
368
369extern struct gl_state_s gl_state;
370extern int parse_sexp_lookup_properties;
4c571d09
AS
371extern INTERVAL interval_of P_ ((int, Lisp_Object));
372
373extern int scan_words P_ ((int, int));