(translate_char): Accept list of translation tables.
[bpt/emacs.git] / src / syntax.h
CommitLineData
9889c728 1/* Declarations having to do with GNU Emacs syntax tables.
4a2f9c6a 2 Copyright (C) 1985, 93, 94, 97, 1998 Free Software Foundation, Inc.
9889c728
JB
3
4This file is part of GNU Emacs.
5
6GNU Emacs is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
7c938215 8the Free Software Foundation; either version 2, or (at your option)
9889c728
JB
9any later version.
10
11GNU Emacs is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GNU Emacs; see the file COPYING. If not, write to
3b7ad313
EN
18the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19Boston, MA 02111-1307, USA. */
9889c728
JB
20
21
22extern Lisp_Object Qsyntax_table_p;
4c571d09 23extern void update_syntax_table P_ ((int, int, int, Lisp_Object));
9889c728
JB
24
25/* The standard syntax table is stored where it will automatically
26 be used in all new buffers. */
27#define Vstandard_syntax_table buffer_defaults.syntax_table
28
e46c910e
RS
29/* A syntax table is a chartable whose elements are cons cells
30 (CODE+FLAGS . MATCHING-CHAR). MATCHING-CHAR can be nil if the char
31 is not a kind of parenthesis.
9889c728 32
e46c910e 33 The low 8 bits of CODE+FLAGS is a code, as follows: */
9889c728
JB
34
35enum syntaxcode
36 {
37 Swhitespace, /* for a whitespace character */
38 Spunct, /* for random punctuation characters */
39 Sword, /* for a word constituent */
40 Ssymbol, /* symbol constituent but not word constituent */
41 Sopen, /* for a beginning delimiter */
42 Sclose, /* for an ending delimiter */
43 Squote, /* for a prefix character like Lisp ' */
44 Sstring, /* for a string-grouping character like Lisp " */
5eea1c5a 45 Smath, /* for delimiters like $ in Tex. */
9889c728
JB
46 Sescape, /* for a character that begins a C-style escape */
47 Scharquote, /* for a character that quotes the following character */
48 Scomment, /* for a comment-starting character */
49 Sendcomment, /* for a comment-ending character */
c8cdcb16 50 Sinherit, /* use the standard syntax table for this character */
5eea1c5a 51 Scomment_fence, /* Starts/ends comment which is delimited on the
47ab3db5 52 other side by any char with the same syntaxcode. */
5eea1c5a 53 Sstring_fence, /* Starts/ends string which is delimited on the
47ab3db5 54 other side by any char with the same syntaxcode. */
9889c728
JB
55 Smax /* Upper bound on codes that are meaningful */
56 };
57
e0b8ff93 58/* Set the syntax entry VAL for char C in table TABLE. */
e46c910e 59
f4926ee8 60#define SET_RAW_SYNTAX_ENTRY(table, c, val) \
dcb82a5e 61 CHAR_TABLE_SET ((table), c, (val))
e0b8ff93 62
f4926ee8
KH
63/* Set the syntax entry VAL for char-range RANGE in table TABLE.
64 RANGE is a cons (FROM . TO) specifying the range of characters. */
e0b8ff93 65
f4926ee8
KH
66#define SET_RAW_SYNTAX_ENTRY_RANGE(table, range, val) \
67 Fset_char_table_range ((table), (range), (val))
9889c728 68
5eea1c5a 69/* SYNTAX_ENTRY fetches the information from the entry for character C
177c0ea7 70 in syntax table TABLE, or from globally kept data (gl_state).
5eea1c5a
RS
71 Does inheritance. */
72/* CURRENT_SYNTAX_TABLE gives the syntax table valid for current
73 position, it is either the buffer's syntax table, or syntax table
74 found in text properties. */
75
76#ifdef SYNTAX_ENTRY_VIA_PROPERTY
77# define SYNTAX_ENTRY(c) \
78 (gl_state.use_global ? gl_state.global_code : SYNTAX_ENTRY_INT (c))
79# define CURRENT_SYNTAX_TABLE gl_state.current_syntax_table
80#else
81# define SYNTAX_ENTRY SYNTAX_ENTRY_INT
82# define CURRENT_SYNTAX_TABLE current_buffer->syntax_table
83#endif
e0b8ff93 84
501d7ac6 85#define SYNTAX_ENTRY_INT(c) CHAR_TABLE_REF (CURRENT_SYNTAX_TABLE, (c))
e0b8ff93 86
e46c910e 87/* Extract the information from the entry for character C
e0b8ff93 88 in the current syntax table. */
c8cdcb16
RS
89
90#ifdef __GNUC__
e46c910e
RS
91#define SYNTAX(c) \
92 ({ Lisp_Object temp; \
93 temp = SYNTAX_ENTRY (c); \
94 (CONSP (temp) \
3331fb06 95 ? (enum syntaxcode) (XINT (XCAR (temp)) & 0xff) \
e0b8ff93 96 : Swhitespace); })
e46c910e
RS
97
98#define SYNTAX_WITH_FLAGS(c) \
99 ({ Lisp_Object temp; \
100 temp = SYNTAX_ENTRY (c); \
101 (CONSP (temp) \
3331fb06 102 ? XINT (XCAR (temp)) \
e0b8ff93 103 : (int) Swhitespace); })
e46c910e
RS
104
105#define SYNTAX_MATCH(c) \
106 ({ Lisp_Object temp; \
107 temp = SYNTAX_ENTRY (c); \
108 (CONSP (temp) \
3331fb06 109 ? XCDR (temp) \
e0b8ff93 110 : Qnil); })
c8cdcb16 111#else
3d7db6f1 112extern Lisp_Object syntax_temp;
e46c910e 113#define SYNTAX(c) \
9d40ebd2 114 (syntax_temp = SYNTAX_ENTRY ((c)), \
e46c910e 115 (CONSP (syntax_temp) \
3331fb06 116 ? (enum syntaxcode) (XINT (XCAR (syntax_temp)) & 0xff) \
e0b8ff93 117 : Swhitespace))
e46c910e
RS
118
119#define SYNTAX_WITH_FLAGS(c) \
9d40ebd2 120 (syntax_temp = SYNTAX_ENTRY ((c)), \
e46c910e 121 (CONSP (syntax_temp) \
3331fb06 122 ? XINT (XCAR (syntax_temp)) \
e0b8ff93 123 : (int) Swhitespace))
e46c910e
RS
124
125#define SYNTAX_MATCH(c) \
9d40ebd2 126 (syntax_temp = SYNTAX_ENTRY ((c)), \
e46c910e 127 (CONSP (syntax_temp) \
3331fb06 128 ? XCDR (syntax_temp) \
e0b8ff93 129 : Qnil))
c8cdcb16 130#endif
9889c728 131
c0364919 132/* Then there are seven single-bit flags that have the following meanings:
9889c728
JB
133 1. This character is the first of a two-character comment-start sequence.
134 2. This character is the second of a two-character comment-start sequence.
135 3. This character is the first of a two-character comment-end sequence.
136 4. This character is the second of a two-character comment-end sequence.
137 5. This character is a prefix, for backward-prefix-chars.
c0364919
RS
138 6. see below
139 7. This character is part of a nestable comment sequence.
a306d6f1
RS
140 Note that any two-character sequence whose first character has flag 1
141 and whose second character has flag 2 will be interpreted as a comment start.
142
143 bit 6 is used to discriminate between two different comment styles.
144 Languages such as C++ allow two orthogonal syntax start/end pairs
145 and bit 6 is used to determine whether a comment-end or Scommentend
5eea1c5a 146 ends style a or b. Comment start sequences can start style a or b.
a306d6f1
RS
147 Style a is always the default.
148 */
9889c728 149
98bcfee8
RS
150/* These macros extract a particular flag for a given character. */
151
e46c910e 152#define SYNTAX_COMSTART_FIRST(c) ((SYNTAX_WITH_FLAGS (c) >> 16) & 1)
c8cdcb16 153
e46c910e 154#define SYNTAX_COMSTART_SECOND(c) ((SYNTAX_WITH_FLAGS (c) >> 17) & 1)
c8cdcb16 155
e46c910e 156#define SYNTAX_COMEND_FIRST(c) ((SYNTAX_WITH_FLAGS (c) >> 18) & 1)
c8cdcb16 157
e46c910e 158#define SYNTAX_COMEND_SECOND(c) ((SYNTAX_WITH_FLAGS (c) >> 19) & 1)
c8cdcb16 159
e46c910e 160#define SYNTAX_PREFIX(c) ((SYNTAX_WITH_FLAGS (c) >> 20) & 1)
9889c728 161
e46c910e 162#define SYNTAX_COMMENT_STYLE(c) ((SYNTAX_WITH_FLAGS (c) >> 21) & 1)
a306d6f1 163
c0364919
RS
164#define SYNTAX_COMMENT_NESTED(c) ((SYNTAX_WITH_FLAGS (c) >> 22) & 1)
165
98bcfee8
RS
166/* These macros extract specific flags from an integer
167 that holds the syntax code and the flags. */
168
169#define SYNTAX_FLAGS_COMSTART_FIRST(flags) (((flags) >> 16) & 1)
170
171#define SYNTAX_FLAGS_COMSTART_SECOND(flags) (((flags) >> 17) & 1)
172
173#define SYNTAX_FLAGS_COMEND_FIRST(flags) (((flags) >> 18) & 1)
174
175#define SYNTAX_FLAGS_COMEND_SECOND(flags) (((flags) >> 19) & 1)
176
177#define SYNTAX_FLAGS_PREFIX(flags) (((flags) >> 20) & 1)
178
179#define SYNTAX_FLAGS_COMMENT_STYLE(flags) (((flags) >> 21) & 1)
180
c0364919
RS
181#define SYNTAX_FLAGS_COMMENT_NESTED(flags) (((flags) >> 22) & 1)
182
9889c728
JB
183/* This array, indexed by a character, contains the syntax code which that
184 character signifies (as a char). For example,
5eea1c5a 185 (enum syntaxcode) syntax_spec_code['w'] is Sword. */
9889c728
JB
186
187extern unsigned char syntax_spec_code[0400];
188
5eea1c5a
RS
189/* Indexed by syntax code, give the letter that describes it. */
190
191extern char syntax_code_spec[16];
192
c292db29 193/* Convert the byte offset BYTEPOS into a character position,
2f16e7fd
RS
194 for the object recorded in gl_state with SETUP_SYNTAX_TABLE_FOR_OBJECT.
195
196 The value is meant for use in the UPDATE_SYNTAX_TABLE... macros.
197 These macros do nothing when parse_sexp_lookup_properties is 0,
198 so we return 0 in that case, for speed. */
c292db29
RS
199
200#define SYNTAX_TABLE_BYTE_TO_CHAR(bytepos) \
2f16e7fd
RS
201 (! parse_sexp_lookup_properties \
202 ? 0 \
203 : STRINGP (gl_state.object) \
c292db29
RS
204 ? string_byte_to_char (gl_state.object, (bytepos)) \
205 : BUFFERP (gl_state.object) \
f79b4b7e
KH
206 ? buf_bytepos_to_charpos (XBUFFER (gl_state.object), \
207 (bytepos) + BUF_BEGV_BYTE (XBUFFER (gl_state.object)) - 1) - BUF_BEGV (XBUFFER (gl_state.object)) + 1 \
c292db29 208 : NILP (gl_state.object) \
f79b4b7e 209 ? BYTE_TO_CHAR ((bytepos) + BEGV_BYTE - 1) - BEGV + 1 \
c292db29
RS
210 : (bytepos))
211
f79b4b7e
KH
212/* Make syntax table state (gl_state) good for CHARPOS, assuming it is
213 currently good for a position before CHARPOS. */
5eea1c5a 214
f79b4b7e 215#define UPDATE_SYNTAX_TABLE_FORWARD(charpos) \
2f16e7fd 216 (parse_sexp_lookup_properties \
f79b4b7e
KH
217 && (charpos) >= gl_state.e_property \
218 ? (update_syntax_table ((charpos) + gl_state.offset, 1, 0, \
c292db29
RS
219 gl_state.object), \
220 1) \
221 : 0)
5eea1c5a 222
f79b4b7e
KH
223/* Make syntax table state (gl_state) good for CHARPOS, assuming it is
224 currently good for a position after CHARPOS. */
5eea1c5a 225
f79b4b7e 226#define UPDATE_SYNTAX_TABLE_BACKWARD(charpos) \
2f16e7fd 227 (parse_sexp_lookup_properties \
4948e1f2 228 && (charpos) < gl_state.b_property \
f79b4b7e 229 ? (update_syntax_table ((charpos) + gl_state.offset, -1, 0, \
c292db29
RS
230 gl_state.object), \
231 1) \
232 : 0)
e2d8d746 233
f79b4b7e 234/* Make syntax table good for CHARPOS. */
e2d8d746 235
f79b4b7e 236#define UPDATE_SYNTAX_TABLE(charpos) \
2f16e7fd 237 (parse_sexp_lookup_properties \
4948e1f2 238 && (charpos) < gl_state.b_property \
f79b4b7e 239 ? (update_syntax_table ((charpos) + gl_state.offset, -1, 0, \
c292db29
RS
240 gl_state.object), \
241 1) \
2f16e7fd 242 : (parse_sexp_lookup_properties \
f79b4b7e
KH
243 && (charpos) >= gl_state.e_property \
244 ? (update_syntax_table ((charpos) + gl_state.offset, 1, 0,\
c292db29
RS
245 gl_state.object), \
246 1) \
247 : 0))
5eea1c5a
RS
248
249/* This macro should be called with FROM at the start of forward
250 search, or after the last position of the backward search. It
251 makes sure that the first char is picked up with correct table, so
252 one does not need to call UPDATE_SYNTAX_TABLE immediately after the
177c0ea7 253 call.
5eea1c5a
RS
254 Sign of COUNT gives the direction of the search.
255 */
256
c292db29 257#define SETUP_SYNTAX_TABLE(FROM, COUNT) \
2f16e7fd
RS
258if (1) \
259 { \
4948e1f2 260 gl_state.b_property = BEGV; \
2f16e7fd
RS
261 gl_state.e_property = ZV + 1; \
262 gl_state.object = Qnil; \
263 gl_state.use_global = 0; \
264 gl_state.offset = 0; \
265 gl_state.current_syntax_table = current_buffer->syntax_table; \
266 if (parse_sexp_lookup_properties) \
9b9794f0
RS
267 if ((COUNT) > 0 || (FROM) > BEGV) \
268 update_syntax_table ((COUNT) > 0 ? (FROM) : (FROM) - 1, (COUNT),\
269 1, Qnil); \
2f16e7fd
RS
270 } \
271else
5eea1c5a
RS
272
273/* Same as above, but in OBJECT. If OBJECT is nil, use current buffer.
e2d8d746
RS
274 If it is t, ignore properties altogether.
275
276 This is meant for regex.c to use. For buffers, regex.c passes arguments
277 to the UPDATE_SYNTAX_TABLE macros which are relative to BEGV.
4bbd5bc3 278 So if it is a buffer, we set the offset field to BEGV. */
5eea1c5a 279
c292db29 280#define SETUP_SYNTAX_TABLE_FOR_OBJECT(OBJECT, FROM, COUNT) \
4bbd5bc3
RS
281if (1) \
282 { \
c292db29
RS
283 gl_state.object = (OBJECT); \
284 if (BUFFERP (gl_state.object)) \
285 { \
286 struct buffer *buf = XBUFFER (gl_state.object); \
4948e1f2 287 gl_state.b_property = 1; \
1d1293dd 288 gl_state.e_property = BUF_ZV (buf) - BUF_BEGV (buf) + 1; \
c292db29
RS
289 gl_state.offset = BUF_BEGV (buf) - 1; \
290 } \
291 else if (NILP (gl_state.object)) \
4bbd5bc3 292 { \
4948e1f2 293 gl_state.b_property = 1; \
1d1293dd 294 gl_state.e_property = ZV - BEGV + 1; \
4bbd5bc3
RS
295 gl_state.offset = BEGV - 1; \
296 } \
c292db29 297 else if (EQ (gl_state.object, Qt)) \
4bbd5bc3 298 { \
4948e1f2 299 gl_state.b_property = 0; \
4bbd5bc3
RS
300 gl_state.e_property = 1500000000; \
301 gl_state.offset = 0; \
302 } \
303 else \
304 { \
4948e1f2 305 gl_state.b_property = 0; \
d5db4077 306 gl_state.e_property = 1 + SCHARS (gl_state.object); \
4bbd5bc3
RS
307 gl_state.offset = 0; \
308 } \
309 gl_state.use_global = 0; \
310 gl_state.current_syntax_table = current_buffer->syntax_table; \
311 if (parse_sexp_lookup_properties) \
f79b4b7e 312 update_syntax_table (((FROM) + gl_state.offset \
c292db29
RS
313 + (COUNT > 0 ? 0 : -1)), \
314 COUNT, 1, gl_state.object); \
4bbd5bc3
RS
315 } \
316else
5eea1c5a
RS
317
318struct gl_state_s
319{
c292db29 320 Lisp_Object object; /* The object we are scanning. */
5eea1c5a
RS
321 int start; /* Where to stop. */
322 int stop; /* Where to stop. */
323 int use_global; /* Whether to use global_code
324 or c_s_t. */
325 Lisp_Object global_code; /* Syntax code of current char. */
326 Lisp_Object current_syntax_table; /* Syntax table for current pos. */
327 Lisp_Object old_prop; /* Syntax-table prop at prev pos. */
4948e1f2 328 int b_property; /* First index where c_s_t is valid. */
5eea1c5a
RS
329 int e_property; /* First index where c_s_t is
330 not valid. */
331 INTERVAL forward_i; /* Where to start lookup on forward */
332 INTERVAL backward_i; /* or backward movement. The
333 data in c_s_t is valid
334 between these intervals,
335 and possibly at the
336 intervals too, depending
337 on: */
e2d8d746
RS
338 /* Offset for positions specified to UPDATE_SYNTAX_TABLE. */
339 int offset;
5eea1c5a
RS
340};
341
342extern struct gl_state_s gl_state;
343extern int parse_sexp_lookup_properties;
4c571d09
AS
344extern INTERVAL interval_of P_ ((int, Lisp_Object));
345
346extern int scan_words P_ ((int, int));