Fix previous conflict.
[bpt/emacs.git] / src / syntax.h
CommitLineData
9889c728 1/* Declarations having to do with GNU Emacs syntax tables.
429ab54e 2 Copyright (C) 1985, 1993, 1994, 1997, 1998, 2001, 2002, 2003, 2004,
114f9c96 3 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
9889c728
JB
4
5This file is part of GNU Emacs.
6
b9b1cc14 7GNU Emacs is free software: you can redistribute it and/or modify
9889c728 8it under the terms of the GNU General Public License as published by
b9b1cc14
GM
9the Free Software Foundation, either version 3 of the License, or
10(at your option) any later version.
9889c728
JB
11
12GNU Emacs is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
b9b1cc14 18along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
9889c728
JB
19
20
21extern Lisp_Object Qsyntax_table_p;
383e0970 22extern void update_syntax_table (int, int, int, Lisp_Object);
9889c728
JB
23
24/* The standard syntax table is stored where it will automatically
25 be used in all new buffers. */
26#define Vstandard_syntax_table buffer_defaults.syntax_table
27
e46c910e
RS
28/* A syntax table is a chartable whose elements are cons cells
29 (CODE+FLAGS . MATCHING-CHAR). MATCHING-CHAR can be nil if the char
30 is not a kind of parenthesis.
9889c728 31
e46c910e 32 The low 8 bits of CODE+FLAGS is a code, as follows: */
9889c728
JB
33
34enum syntaxcode
35 {
36 Swhitespace, /* for a whitespace character */
37 Spunct, /* for random punctuation characters */
38 Sword, /* for a word constituent */
39 Ssymbol, /* symbol constituent but not word constituent */
40 Sopen, /* for a beginning delimiter */
41 Sclose, /* for an ending delimiter */
42 Squote, /* for a prefix character like Lisp ' */
43 Sstring, /* for a string-grouping character like Lisp " */
5eea1c5a 44 Smath, /* for delimiters like $ in Tex. */
9889c728
JB
45 Sescape, /* for a character that begins a C-style escape */
46 Scharquote, /* for a character that quotes the following character */
47 Scomment, /* for a comment-starting character */
48 Sendcomment, /* for a comment-ending character */
c8cdcb16 49 Sinherit, /* use the standard syntax table for this character */
5eea1c5a 50 Scomment_fence, /* Starts/ends comment which is delimited on the
47ab3db5 51 other side by any char with the same syntaxcode. */
5eea1c5a 52 Sstring_fence, /* Starts/ends string which is delimited on the
47ab3db5 53 other side by any char with the same syntaxcode. */
9889c728
JB
54 Smax /* Upper bound on codes that are meaningful */
55 };
56
e0b8ff93 57/* Set the syntax entry VAL for char C in table TABLE. */
e46c910e 58
f4926ee8 59#define SET_RAW_SYNTAX_ENTRY(table, c, val) \
dcb82a5e 60 CHAR_TABLE_SET ((table), c, (val))
e0b8ff93 61
f4926ee8
KH
62/* Set the syntax entry VAL for char-range RANGE in table TABLE.
63 RANGE is a cons (FROM . TO) specifying the range of characters. */
e0b8ff93 64
f4926ee8
KH
65#define SET_RAW_SYNTAX_ENTRY_RANGE(table, range, val) \
66 Fset_char_table_range ((table), (range), (val))
9889c728 67
5eea1c5a 68/* SYNTAX_ENTRY fetches the information from the entry for character C
177c0ea7 69 in syntax table TABLE, or from globally kept data (gl_state).
5eea1c5a
RS
70 Does inheritance. */
71/* CURRENT_SYNTAX_TABLE gives the syntax table valid for current
72 position, it is either the buffer's syntax table, or syntax table
73 found in text properties. */
74
75#ifdef SYNTAX_ENTRY_VIA_PROPERTY
76# define SYNTAX_ENTRY(c) \
77 (gl_state.use_global ? gl_state.global_code : SYNTAX_ENTRY_INT (c))
78# define CURRENT_SYNTAX_TABLE gl_state.current_syntax_table
79#else
80# define SYNTAX_ENTRY SYNTAX_ENTRY_INT
81# define CURRENT_SYNTAX_TABLE current_buffer->syntax_table
82#endif
e0b8ff93 83
501d7ac6 84#define SYNTAX_ENTRY_INT(c) CHAR_TABLE_REF (CURRENT_SYNTAX_TABLE, (c))
e0b8ff93 85
e46c910e 86/* Extract the information from the entry for character C
e0b8ff93 87 in the current syntax table. */
c8cdcb16
RS
88
89#ifdef __GNUC__
e46c910e 90#define SYNTAX(c) \
874757e8
AS
91 ({ Lisp_Object _syntax_temp; \
92 _syntax_temp = SYNTAX_ENTRY (c); \
93 (CONSP (_syntax_temp) \
94 ? (enum syntaxcode) (XINT (XCAR (_syntax_temp)) & 0xff) \
e0b8ff93 95 : Swhitespace); })
e46c910e
RS
96
97#define SYNTAX_WITH_FLAGS(c) \
874757e8
AS
98 ({ Lisp_Object _syntax_temp; \
99 _syntax_temp = SYNTAX_ENTRY (c); \
100 (CONSP (_syntax_temp) \
101 ? XINT (XCAR (_syntax_temp)) \
e0b8ff93 102 : (int) Swhitespace); })
e46c910e
RS
103
104#define SYNTAX_MATCH(c) \
874757e8
AS
105 ({ Lisp_Object _syntax_temp; \
106 _syntax_temp = SYNTAX_ENTRY (c); \
107 (CONSP (_syntax_temp) \
108 ? XCDR (_syntax_temp) \
e0b8ff93 109 : Qnil); })
c8cdcb16 110#else
3d7db6f1 111extern Lisp_Object syntax_temp;
e46c910e 112#define SYNTAX(c) \
9d40ebd2 113 (syntax_temp = SYNTAX_ENTRY ((c)), \
e46c910e 114 (CONSP (syntax_temp) \
3331fb06 115 ? (enum syntaxcode) (XINT (XCAR (syntax_temp)) & 0xff) \
e0b8ff93 116 : Swhitespace))
e46c910e
RS
117
118#define SYNTAX_WITH_FLAGS(c) \
9d40ebd2 119 (syntax_temp = SYNTAX_ENTRY ((c)), \
e46c910e 120 (CONSP (syntax_temp) \
3331fb06 121 ? XINT (XCAR (syntax_temp)) \
e0b8ff93 122 : (int) Swhitespace))
e46c910e
RS
123
124#define SYNTAX_MATCH(c) \
9d40ebd2 125 (syntax_temp = SYNTAX_ENTRY ((c)), \
e46c910e 126 (CONSP (syntax_temp) \
3331fb06 127 ? XCDR (syntax_temp) \
e0b8ff93 128 : Qnil))
c8cdcb16 129#endif
9889c728 130
c0364919 131/* Then there are seven single-bit flags that have the following meanings:
9889c728
JB
132 1. This character is the first of a two-character comment-start sequence.
133 2. This character is the second of a two-character comment-start sequence.
134 3. This character is the first of a two-character comment-end sequence.
135 4. This character is the second of a two-character comment-end sequence.
136 5. This character is a prefix, for backward-prefix-chars.
c0364919
RS
137 6. see below
138 7. This character is part of a nestable comment sequence.
a306d6f1
RS
139 Note that any two-character sequence whose first character has flag 1
140 and whose second character has flag 2 will be interpreted as a comment start.
141
142 bit 6 is used to discriminate between two different comment styles.
143 Languages such as C++ allow two orthogonal syntax start/end pairs
144 and bit 6 is used to determine whether a comment-end or Scommentend
5eea1c5a 145 ends style a or b. Comment start sequences can start style a or b.
a306d6f1
RS
146 Style a is always the default.
147 */
9889c728 148
98bcfee8
RS
149/* These macros extract a particular flag for a given character. */
150
e46c910e 151#define SYNTAX_COMSTART_FIRST(c) ((SYNTAX_WITH_FLAGS (c) >> 16) & 1)
c8cdcb16 152
e46c910e 153#define SYNTAX_COMSTART_SECOND(c) ((SYNTAX_WITH_FLAGS (c) >> 17) & 1)
c8cdcb16 154
e46c910e 155#define SYNTAX_COMEND_FIRST(c) ((SYNTAX_WITH_FLAGS (c) >> 18) & 1)
c8cdcb16 156
e46c910e 157#define SYNTAX_COMEND_SECOND(c) ((SYNTAX_WITH_FLAGS (c) >> 19) & 1)
c8cdcb16 158
e46c910e 159#define SYNTAX_PREFIX(c) ((SYNTAX_WITH_FLAGS (c) >> 20) & 1)
9889c728 160
e46c910e 161#define SYNTAX_COMMENT_STYLE(c) ((SYNTAX_WITH_FLAGS (c) >> 21) & 1)
a306d6f1 162
c0364919
RS
163#define SYNTAX_COMMENT_NESTED(c) ((SYNTAX_WITH_FLAGS (c) >> 22) & 1)
164
98bcfee8
RS
165/* These macros extract specific flags from an integer
166 that holds the syntax code and the flags. */
167
168#define SYNTAX_FLAGS_COMSTART_FIRST(flags) (((flags) >> 16) & 1)
169
170#define SYNTAX_FLAGS_COMSTART_SECOND(flags) (((flags) >> 17) & 1)
171
172#define SYNTAX_FLAGS_COMEND_FIRST(flags) (((flags) >> 18) & 1)
173
174#define SYNTAX_FLAGS_COMEND_SECOND(flags) (((flags) >> 19) & 1)
175
176#define SYNTAX_FLAGS_PREFIX(flags) (((flags) >> 20) & 1)
177
178#define SYNTAX_FLAGS_COMMENT_STYLE(flags) (((flags) >> 21) & 1)
179
c0364919
RS
180#define SYNTAX_FLAGS_COMMENT_NESTED(flags) (((flags) >> 22) & 1)
181
9889c728
JB
182/* This array, indexed by a character, contains the syntax code which that
183 character signifies (as a char). For example,
5eea1c5a 184 (enum syntaxcode) syntax_spec_code['w'] is Sword. */
9889c728
JB
185
186extern unsigned char syntax_spec_code[0400];
187
5eea1c5a
RS
188/* Indexed by syntax code, give the letter that describes it. */
189
190extern char syntax_code_spec[16];
191
c292db29 192/* Convert the byte offset BYTEPOS into a character position,
2f16e7fd
RS
193 for the object recorded in gl_state with SETUP_SYNTAX_TABLE_FOR_OBJECT.
194
195 The value is meant for use in the UPDATE_SYNTAX_TABLE... macros.
196 These macros do nothing when parse_sexp_lookup_properties is 0,
197 so we return 0 in that case, for speed. */
c292db29
RS
198
199#define SYNTAX_TABLE_BYTE_TO_CHAR(bytepos) \
2f16e7fd
RS
200 (! parse_sexp_lookup_properties \
201 ? 0 \
202 : STRINGP (gl_state.object) \
c292db29
RS
203 ? string_byte_to_char (gl_state.object, (bytepos)) \
204 : BUFFERP (gl_state.object) \
f79b4b7e
KH
205 ? buf_bytepos_to_charpos (XBUFFER (gl_state.object), \
206 (bytepos) + BUF_BEGV_BYTE (XBUFFER (gl_state.object)) - 1) - BUF_BEGV (XBUFFER (gl_state.object)) + 1 \
c292db29 207 : NILP (gl_state.object) \
f79b4b7e 208 ? BYTE_TO_CHAR ((bytepos) + BEGV_BYTE - 1) - BEGV + 1 \
c292db29
RS
209 : (bytepos))
210
f79b4b7e
KH
211/* Make syntax table state (gl_state) good for CHARPOS, assuming it is
212 currently good for a position before CHARPOS. */
5eea1c5a 213
f79b4b7e 214#define UPDATE_SYNTAX_TABLE_FORWARD(charpos) \
2f16e7fd 215 (parse_sexp_lookup_properties \
f79b4b7e
KH
216 && (charpos) >= gl_state.e_property \
217 ? (update_syntax_table ((charpos) + gl_state.offset, 1, 0, \
c292db29
RS
218 gl_state.object), \
219 1) \
220 : 0)
5eea1c5a 221
f79b4b7e
KH
222/* Make syntax table state (gl_state) good for CHARPOS, assuming it is
223 currently good for a position after CHARPOS. */
5eea1c5a 224
f79b4b7e 225#define UPDATE_SYNTAX_TABLE_BACKWARD(charpos) \
2f16e7fd 226 (parse_sexp_lookup_properties \
4948e1f2 227 && (charpos) < gl_state.b_property \
f79b4b7e 228 ? (update_syntax_table ((charpos) + gl_state.offset, -1, 0, \
c292db29
RS
229 gl_state.object), \
230 1) \
231 : 0)
e2d8d746 232
f79b4b7e 233/* Make syntax table good for CHARPOS. */
e2d8d746 234
f79b4b7e 235#define UPDATE_SYNTAX_TABLE(charpos) \
2f16e7fd 236 (parse_sexp_lookup_properties \
4948e1f2 237 && (charpos) < gl_state.b_property \
f79b4b7e 238 ? (update_syntax_table ((charpos) + gl_state.offset, -1, 0, \
c292db29
RS
239 gl_state.object), \
240 1) \
2f16e7fd 241 : (parse_sexp_lookup_properties \
f79b4b7e
KH
242 && (charpos) >= gl_state.e_property \
243 ? (update_syntax_table ((charpos) + gl_state.offset, 1, 0,\
c292db29
RS
244 gl_state.object), \
245 1) \
246 : 0))
5eea1c5a 247
d48cd3f4
SM
248/* This macro sets up the buffer-global syntax table. */
249#define SETUP_BUFFER_SYNTAX_TABLE() \
250do \
251 { \
252 gl_state.use_global = 0; \
253 gl_state.current_syntax_table = current_buffer->syntax_table; \
254 } while (0)
255
5eea1c5a
RS
256/* This macro should be called with FROM at the start of forward
257 search, or after the last position of the backward search. It
258 makes sure that the first char is picked up with correct table, so
259 one does not need to call UPDATE_SYNTAX_TABLE immediately after the
177c0ea7 260 call.
5eea1c5a
RS
261 Sign of COUNT gives the direction of the search.
262 */
263
c292db29 264#define SETUP_SYNTAX_TABLE(FROM, COUNT) \
92413ef3 265do \
2f16e7fd 266 { \
d48cd3f4 267 SETUP_BUFFER_SYNTAX_TABLE (); \
4948e1f2 268 gl_state.b_property = BEGV; \
2f16e7fd
RS
269 gl_state.e_property = ZV + 1; \
270 gl_state.object = Qnil; \
2f16e7fd 271 gl_state.offset = 0; \
2f16e7fd 272 if (parse_sexp_lookup_properties) \
9b9794f0
RS
273 if ((COUNT) > 0 || (FROM) > BEGV) \
274 update_syntax_table ((COUNT) > 0 ? (FROM) : (FROM) - 1, (COUNT),\
275 1, Qnil); \
2f16e7fd 276 } \
92413ef3 277while (0)
5eea1c5a
RS
278
279/* Same as above, but in OBJECT. If OBJECT is nil, use current buffer.
e2d8d746
RS
280 If it is t, ignore properties altogether.
281
282 This is meant for regex.c to use. For buffers, regex.c passes arguments
283 to the UPDATE_SYNTAX_TABLE macros which are relative to BEGV.
4bbd5bc3 284 So if it is a buffer, we set the offset field to BEGV. */
5eea1c5a 285
c292db29 286#define SETUP_SYNTAX_TABLE_FOR_OBJECT(OBJECT, FROM, COUNT) \
92413ef3 287do \
4bbd5bc3 288 { \
d48cd3f4 289 SETUP_BUFFER_SYNTAX_TABLE (); \
c292db29
RS
290 gl_state.object = (OBJECT); \
291 if (BUFFERP (gl_state.object)) \
292 { \
293 struct buffer *buf = XBUFFER (gl_state.object); \
4948e1f2 294 gl_state.b_property = 1; \
1d1293dd 295 gl_state.e_property = BUF_ZV (buf) - BUF_BEGV (buf) + 1; \
c292db29
RS
296 gl_state.offset = BUF_BEGV (buf) - 1; \
297 } \
298 else if (NILP (gl_state.object)) \
4bbd5bc3 299 { \
4948e1f2 300 gl_state.b_property = 1; \
1d1293dd 301 gl_state.e_property = ZV - BEGV + 1; \
4bbd5bc3
RS
302 gl_state.offset = BEGV - 1; \
303 } \
c292db29 304 else if (EQ (gl_state.object, Qt)) \
4bbd5bc3 305 { \
4948e1f2 306 gl_state.b_property = 0; \
4bbd5bc3
RS
307 gl_state.e_property = 1500000000; \
308 gl_state.offset = 0; \
309 } \
310 else \
311 { \
4948e1f2 312 gl_state.b_property = 0; \
d5db4077 313 gl_state.e_property = 1 + SCHARS (gl_state.object); \
4bbd5bc3
RS
314 gl_state.offset = 0; \
315 } \
4bbd5bc3 316 if (parse_sexp_lookup_properties) \
f79b4b7e 317 update_syntax_table (((FROM) + gl_state.offset \
c292db29
RS
318 + (COUNT > 0 ? 0 : -1)), \
319 COUNT, 1, gl_state.object); \
4bbd5bc3 320 } \
92413ef3 321while (0)
5eea1c5a
RS
322
323struct gl_state_s
324{
c292db29 325 Lisp_Object object; /* The object we are scanning. */
5eea1c5a
RS
326 int start; /* Where to stop. */
327 int stop; /* Where to stop. */
328 int use_global; /* Whether to use global_code
329 or c_s_t. */
330 Lisp_Object global_code; /* Syntax code of current char. */
331 Lisp_Object current_syntax_table; /* Syntax table for current pos. */
332 Lisp_Object old_prop; /* Syntax-table prop at prev pos. */
4948e1f2 333 int b_property; /* First index where c_s_t is valid. */
5eea1c5a
RS
334 int e_property; /* First index where c_s_t is
335 not valid. */
336 INTERVAL forward_i; /* Where to start lookup on forward */
337 INTERVAL backward_i; /* or backward movement. The
338 data in c_s_t is valid
339 between these intervals,
340 and possibly at the
341 intervals too, depending
342 on: */
e2d8d746
RS
343 /* Offset for positions specified to UPDATE_SYNTAX_TABLE. */
344 int offset;
5eea1c5a
RS
345};
346
347extern struct gl_state_s gl_state;
348extern int parse_sexp_lookup_properties;
383e0970 349extern INTERVAL interval_of (int, Lisp_Object);
4c571d09 350
383e0970 351extern int scan_words (int, int);
839966f3
KH
352
353/* arch-tag: 28833cca-cd73-4741-8c85-a3111166a0e0
354 (do not change this comment) */