Use XCAR and XCDR.
[bpt/emacs.git] / src / syntax.h
1 /* Declarations having to do with GNU Emacs syntax tables.
2 Copyright (C) 1985, 93, 94, 97, 1998 Free Software Foundation, Inc.
3
4 This file is part of GNU Emacs.
5
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
20
21
22 extern Lisp_Object Qsyntax_table_p;
23 extern void update_syntax_table P_ ((int, int, int, Lisp_Object));
24
25 /* The standard syntax table is stored where it will automatically
26 be used in all new buffers. */
27 #define Vstandard_syntax_table buffer_defaults.syntax_table
28
29 /* A syntax table is a chartable whose elements are cons cells
30 (CODE+FLAGS . MATCHING-CHAR). MATCHING-CHAR can be nil if the char
31 is not a kind of parenthesis.
32
33 The low 8 bits of CODE+FLAGS is a code, as follows: */
34
35 enum syntaxcode
36 {
37 Swhitespace, /* for a whitespace character */
38 Spunct, /* for random punctuation characters */
39 Sword, /* for a word constituent */
40 Ssymbol, /* symbol constituent but not word constituent */
41 Sopen, /* for a beginning delimiter */
42 Sclose, /* for an ending delimiter */
43 Squote, /* for a prefix character like Lisp ' */
44 Sstring, /* for a string-grouping character like Lisp " */
45 Smath, /* for delimiters like $ in Tex. */
46 Sescape, /* for a character that begins a C-style escape */
47 Scharquote, /* for a character that quotes the following character */
48 Scomment, /* for a comment-starting character */
49 Sendcomment, /* for a comment-ending character */
50 Sinherit, /* use the standard syntax table for this character */
51 Scomment_fence, /* Starts/ends comment which is delimited on the
52 other side by any char with the same syntaxcode. */
53 Sstring_fence, /* Starts/ends string which is delimited on the
54 other side by any char with the same syntaxcode. */
55 Smax /* Upper bound on codes that are meaningful */
56 };
57
58 /* Set the syntax entry VAL for char C in table TABLE. */
59
60 #define SET_RAW_SYNTAX_ENTRY(table, c, val) \
61 ((c) < CHAR_TABLE_SINGLE_BYTE_SLOTS \
62 ? (XCHAR_TABLE (table)->contents[(unsigned char) (c)] = (val)) \
63 : Faset ((table), make_number (c), (val)))
64
65 /* Fetch the syntax entry for char C in syntax table TABLE.
66 This macro is called only when C is less than CHAR_TABLE_ORDINARY_SLOTS.
67 Do inheritance. */
68
69 #ifdef __GNUC__
70 #define SYNTAX_ENTRY_FOLLOW_PARENT(table, c) \
71 ({ Lisp_Object tbl = table; \
72 Lisp_Object temp = XCHAR_TABLE (tbl)->contents[(c)]; \
73 while (NILP (temp)) \
74 { \
75 tbl = XCHAR_TABLE (tbl)->parent; \
76 if (NILP (tbl)) \
77 break; \
78 temp = XCHAR_TABLE (tbl)->contents[(c)]; \
79 } \
80 temp; })
81 #else
82 extern Lisp_Object syntax_temp;
83 extern Lisp_Object syntax_parent_lookup P_ ((Lisp_Object, int));
84
85 #define SYNTAX_ENTRY_FOLLOW_PARENT(table, c) \
86 (syntax_temp = XCHAR_TABLE (table)->contents[(c)], \
87 (NILP (syntax_temp) \
88 ? syntax_parent_lookup (table, (c)) \
89 : syntax_temp))
90 #endif
91
92 /* SYNTAX_ENTRY fetches the information from the entry for character C
93 in syntax table TABLE, or from globally kept data (gl_state).
94 Does inheritance. */
95 /* CURRENT_SYNTAX_TABLE gives the syntax table valid for current
96 position, it is either the buffer's syntax table, or syntax table
97 found in text properties. */
98
99 #ifdef SYNTAX_ENTRY_VIA_PROPERTY
100 # define SYNTAX_ENTRY(c) \
101 (gl_state.use_global ? gl_state.global_code : SYNTAX_ENTRY_INT (c))
102 # define CURRENT_SYNTAX_TABLE gl_state.current_syntax_table
103 #else
104 # define SYNTAX_ENTRY SYNTAX_ENTRY_INT
105 # define CURRENT_SYNTAX_TABLE current_buffer->syntax_table
106 #endif
107
108 #define SYNTAX_ENTRY_INT(c) \
109 ((c) < CHAR_TABLE_SINGLE_BYTE_SLOTS \
110 ? SYNTAX_ENTRY_FOLLOW_PARENT (CURRENT_SYNTAX_TABLE, \
111 (unsigned char) (c)) \
112 : Faref (CURRENT_SYNTAX_TABLE, \
113 make_number (COMPOSITE_CHAR_P (c) \
114 ? cmpchar_component ((c), 0, 1) \
115 : (c))))
116
117 /* Extract the information from the entry for character C
118 in the current syntax table. */
119
120 #ifdef __GNUC__
121 #define SYNTAX(c) \
122 ({ Lisp_Object temp; \
123 temp = SYNTAX_ENTRY (c); \
124 (CONSP (temp) \
125 ? (enum syntaxcode) (XINT (XCAR (temp)) & 0xff) \
126 : Swhitespace); })
127
128 #define SYNTAX_WITH_FLAGS(c) \
129 ({ Lisp_Object temp; \
130 temp = SYNTAX_ENTRY (c); \
131 (CONSP (temp) \
132 ? XINT (XCAR (temp)) \
133 : (int) Swhitespace); })
134
135 #define SYNTAX_MATCH(c) \
136 ({ Lisp_Object temp; \
137 temp = SYNTAX_ENTRY (c); \
138 (CONSP (temp) \
139 ? XCDR (temp) \
140 : Qnil); })
141 #else
142 #define SYNTAX(c) \
143 (syntax_temp = SYNTAX_ENTRY ((c)), \
144 (CONSP (syntax_temp) \
145 ? (enum syntaxcode) (XINT (XCAR (syntax_temp)) & 0xff) \
146 : Swhitespace))
147
148 #define SYNTAX_WITH_FLAGS(c) \
149 (syntax_temp = SYNTAX_ENTRY ((c)), \
150 (CONSP (syntax_temp) \
151 ? XINT (XCAR (syntax_temp)) \
152 : (int) Swhitespace))
153
154 #define SYNTAX_MATCH(c) \
155 (syntax_temp = SYNTAX_ENTRY ((c)), \
156 (CONSP (syntax_temp) \
157 ? XCDR (syntax_temp) \
158 : Qnil))
159 #endif
160
161 /* Then there are seven single-bit flags that have the following meanings:
162 1. This character is the first of a two-character comment-start sequence.
163 2. This character is the second of a two-character comment-start sequence.
164 3. This character is the first of a two-character comment-end sequence.
165 4. This character is the second of a two-character comment-end sequence.
166 5. This character is a prefix, for backward-prefix-chars.
167 6. see below
168 7. This character is part of a nestable comment sequence.
169 Note that any two-character sequence whose first character has flag 1
170 and whose second character has flag 2 will be interpreted as a comment start.
171
172 bit 6 is used to discriminate between two different comment styles.
173 Languages such as C++ allow two orthogonal syntax start/end pairs
174 and bit 6 is used to determine whether a comment-end or Scommentend
175 ends style a or b. Comment start sequences can start style a or b.
176 Style a is always the default.
177 */
178
179 /* These macros extract a particular flag for a given character. */
180
181 #define SYNTAX_COMSTART_FIRST(c) ((SYNTAX_WITH_FLAGS (c) >> 16) & 1)
182
183 #define SYNTAX_COMSTART_SECOND(c) ((SYNTAX_WITH_FLAGS (c) >> 17) & 1)
184
185 #define SYNTAX_COMEND_FIRST(c) ((SYNTAX_WITH_FLAGS (c) >> 18) & 1)
186
187 #define SYNTAX_COMEND_SECOND(c) ((SYNTAX_WITH_FLAGS (c) >> 19) & 1)
188
189 #define SYNTAX_PREFIX(c) ((SYNTAX_WITH_FLAGS (c) >> 20) & 1)
190
191 #define SYNTAX_COMMENT_STYLE(c) ((SYNTAX_WITH_FLAGS (c) >> 21) & 1)
192
193 #define SYNTAX_COMMENT_NESTED(c) ((SYNTAX_WITH_FLAGS (c) >> 22) & 1)
194
195 /* These macros extract specific flags from an integer
196 that holds the syntax code and the flags. */
197
198 #define SYNTAX_FLAGS_COMSTART_FIRST(flags) (((flags) >> 16) & 1)
199
200 #define SYNTAX_FLAGS_COMSTART_SECOND(flags) (((flags) >> 17) & 1)
201
202 #define SYNTAX_FLAGS_COMEND_FIRST(flags) (((flags) >> 18) & 1)
203
204 #define SYNTAX_FLAGS_COMEND_SECOND(flags) (((flags) >> 19) & 1)
205
206 #define SYNTAX_FLAGS_PREFIX(flags) (((flags) >> 20) & 1)
207
208 #define SYNTAX_FLAGS_COMMENT_STYLE(flags) (((flags) >> 21) & 1)
209
210 #define SYNTAX_FLAGS_COMMENT_NESTED(flags) (((flags) >> 22) & 1)
211
212 /* This array, indexed by a character, contains the syntax code which that
213 character signifies (as a char). For example,
214 (enum syntaxcode) syntax_spec_code['w'] is Sword. */
215
216 extern unsigned char syntax_spec_code[0400];
217
218 /* Indexed by syntax code, give the letter that describes it. */
219
220 extern char syntax_code_spec[16];
221
222 /* Convert the byte offset BYTEPOS into a character position,
223 for the object recorded in gl_state with SETUP_SYNTAX_TABLE_FOR_OBJECT.
224
225 The value is meant for use in the UPDATE_SYNTAX_TABLE... macros.
226 These macros do nothing when parse_sexp_lookup_properties is 0,
227 so we return 0 in that case, for speed. */
228
229 #define SYNTAX_TABLE_BYTE_TO_CHAR(bytepos) \
230 (! parse_sexp_lookup_properties \
231 ? 0 \
232 : STRINGP (gl_state.object) \
233 ? string_byte_to_char (gl_state.object, (bytepos)) \
234 : BUFFERP (gl_state.object) \
235 ? buf_bytepos_to_charpos (XBUFFER (gl_state.object), \
236 (bytepos) + BUF_BEGV_BYTE (XBUFFER (gl_state.object)) - 1) - BUF_BEGV (XBUFFER (gl_state.object)) + 1 \
237 : NILP (gl_state.object) \
238 ? BYTE_TO_CHAR ((bytepos) + BEGV_BYTE - 1) - BEGV + 1 \
239 : (bytepos))
240
241 /* Make syntax table state (gl_state) good for CHARPOS, assuming it is
242 currently good for a position before CHARPOS. */
243
244 #define UPDATE_SYNTAX_TABLE_FORWARD(charpos) \
245 (parse_sexp_lookup_properties \
246 && (charpos) >= gl_state.e_property \
247 ? (update_syntax_table ((charpos) + gl_state.offset, 1, 0, \
248 gl_state.object), \
249 1) \
250 : 0)
251
252 /* Make syntax table state (gl_state) good for CHARPOS, assuming it is
253 currently good for a position after CHARPOS. */
254
255 #define UPDATE_SYNTAX_TABLE_BACKWARD(charpos) \
256 (parse_sexp_lookup_properties \
257 && (charpos) <= gl_state.b_property \
258 ? (update_syntax_table ((charpos) + gl_state.offset, -1, 0, \
259 gl_state.object), \
260 1) \
261 : 0)
262
263 /* Make syntax table good for CHARPOS. */
264
265 #define UPDATE_SYNTAX_TABLE(charpos) \
266 (parse_sexp_lookup_properties \
267 && (charpos) <= gl_state.b_property \
268 ? (update_syntax_table ((charpos) + gl_state.offset, -1, 0, \
269 gl_state.object), \
270 1) \
271 : (parse_sexp_lookup_properties \
272 && (charpos) >= gl_state.e_property \
273 ? (update_syntax_table ((charpos) + gl_state.offset, 1, 0,\
274 gl_state.object), \
275 1) \
276 : 0))
277
278 /* This macro should be called with FROM at the start of forward
279 search, or after the last position of the backward search. It
280 makes sure that the first char is picked up with correct table, so
281 one does not need to call UPDATE_SYNTAX_TABLE immediately after the
282 call.
283 Sign of COUNT gives the direction of the search.
284 */
285
286 #define SETUP_SYNTAX_TABLE(FROM, COUNT) \
287 if (1) \
288 { \
289 gl_state.b_property = BEGV - 1; \
290 gl_state.e_property = ZV + 1; \
291 gl_state.object = Qnil; \
292 gl_state.use_global = 0; \
293 gl_state.offset = 0; \
294 gl_state.current_syntax_table = current_buffer->syntax_table; \
295 if (parse_sexp_lookup_properties) \
296 if ((COUNT) > 0 || (FROM) > BEGV) \
297 update_syntax_table ((COUNT) > 0 ? (FROM) : (FROM) - 1, (COUNT),\
298 1, Qnil); \
299 } \
300 else
301
302 /* Same as above, but in OBJECT. If OBJECT is nil, use current buffer.
303 If it is t, ignore properties altogether.
304
305 This is meant for regex.c to use. For buffers, regex.c passes arguments
306 to the UPDATE_SYNTAX_TABLE macros which are relative to BEGV.
307 So if it is a buffer, we set the offset field to BEGV. */
308
309 #define SETUP_SYNTAX_TABLE_FOR_OBJECT(OBJECT, FROM, COUNT) \
310 if (1) \
311 { \
312 gl_state.object = (OBJECT); \
313 if (BUFFERP (gl_state.object)) \
314 { \
315 struct buffer *buf = XBUFFER (gl_state.object); \
316 gl_state.b_property = 0; \
317 gl_state.e_property = BUF_ZV (buf) - BUF_BEGV (buf) + 1; \
318 gl_state.offset = BUF_BEGV (buf) - 1; \
319 } \
320 else if (NILP (gl_state.object)) \
321 { \
322 gl_state.b_property = 0; \
323 gl_state.e_property = ZV - BEGV + 1; \
324 gl_state.offset = BEGV - 1; \
325 } \
326 else if (EQ (gl_state.object, Qt)) \
327 { \
328 gl_state.b_property = - 1; \
329 gl_state.e_property = 1500000000; \
330 gl_state.offset = 0; \
331 } \
332 else \
333 { \
334 gl_state.b_property = -1; \
335 gl_state.e_property = 1 + XSTRING (gl_state.object)->size; \
336 gl_state.offset = 0; \
337 } \
338 gl_state.use_global = 0; \
339 gl_state.current_syntax_table = current_buffer->syntax_table; \
340 if (parse_sexp_lookup_properties) \
341 update_syntax_table (((FROM) + gl_state.offset \
342 + (COUNT > 0 ? 0 : -1)), \
343 COUNT, 1, gl_state.object); \
344 } \
345 else
346
347 struct gl_state_s
348 {
349 Lisp_Object object; /* The object we are scanning. */
350 int start; /* Where to stop. */
351 int stop; /* Where to stop. */
352 int use_global; /* Whether to use global_code
353 or c_s_t. */
354 Lisp_Object global_code; /* Syntax code of current char. */
355 Lisp_Object current_syntax_table; /* Syntax table for current pos. */
356 Lisp_Object old_prop; /* Syntax-table prop at prev pos. */
357 int b_property; /* Last index where c_s_t is
358 not valid. */
359 int e_property; /* First index where c_s_t is
360 not valid. */
361 INTERVAL forward_i; /* Where to start lookup on forward */
362 INTERVAL backward_i; /* or backward movement. The
363 data in c_s_t is valid
364 between these intervals,
365 and possibly at the
366 intervals too, depending
367 on: */
368 /* Offset for positions specified to UPDATE_SYNTAX_TABLE. */
369 int offset;
370 char left_ok;
371 char right_ok;
372 };
373
374 extern struct gl_state_s gl_state;
375 extern int parse_sexp_lookup_properties;
376 extern INTERVAL interval_of P_ ((int, Lisp_Object));
377
378 extern int scan_words P_ ((int, int));