Add 2011 to FSF/AIST copyright years.
[bpt/emacs.git] / src / syntax.h
CommitLineData
9889c728 1/* Declarations having to do with GNU Emacs syntax tables.
429ab54e 2 Copyright (C) 1985, 1993, 1994, 1997, 1998, 2001, 2002, 2003, 2004,
5df4f04c 3 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
9889c728
JB
4
5This file is part of GNU Emacs.
6
b9b1cc14 7GNU Emacs is free software: you can redistribute it and/or modify
9889c728 8it under the terms of the GNU General Public License as published by
b9b1cc14
GM
9the Free Software Foundation, either version 3 of the License, or
10(at your option) any later version.
9889c728
JB
11
12GNU Emacs is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
b9b1cc14 18along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
9889c728
JB
19
20
21extern Lisp_Object Qsyntax_table_p;
4c571d09 22extern void update_syntax_table P_ ((int, int, int, Lisp_Object));
9889c728
JB
23
24/* The standard syntax table is stored where it will automatically
25 be used in all new buffers. */
26#define Vstandard_syntax_table buffer_defaults.syntax_table
27
e46c910e
RS
28/* A syntax table is a chartable whose elements are cons cells
29 (CODE+FLAGS . MATCHING-CHAR). MATCHING-CHAR can be nil if the char
30 is not a kind of parenthesis.
9889c728 31
e46c910e 32 The low 8 bits of CODE+FLAGS is a code, as follows: */
9889c728
JB
33
34enum syntaxcode
35 {
36 Swhitespace, /* for a whitespace character */
37 Spunct, /* for random punctuation characters */
38 Sword, /* for a word constituent */
39 Ssymbol, /* symbol constituent but not word constituent */
40 Sopen, /* for a beginning delimiter */
41 Sclose, /* for an ending delimiter */
42 Squote, /* for a prefix character like Lisp ' */
43 Sstring, /* for a string-grouping character like Lisp " */
5eea1c5a 44 Smath, /* for delimiters like $ in Tex. */
9889c728
JB
45 Sescape, /* for a character that begins a C-style escape */
46 Scharquote, /* for a character that quotes the following character */
47 Scomment, /* for a comment-starting character */
48 Sendcomment, /* for a comment-ending character */
c8cdcb16 49 Sinherit, /* use the standard syntax table for this character */
5eea1c5a 50 Scomment_fence, /* Starts/ends comment which is delimited on the
47ab3db5 51 other side by any char with the same syntaxcode. */
5eea1c5a 52 Sstring_fence, /* Starts/ends string which is delimited on the
47ab3db5 53 other side by any char with the same syntaxcode. */
9889c728
JB
54 Smax /* Upper bound on codes that are meaningful */
55 };
56
e0b8ff93 57/* Set the syntax entry VAL for char C in table TABLE. */
e46c910e 58
f4926ee8 59#define SET_RAW_SYNTAX_ENTRY(table, c, val) \
dcb82a5e 60 CHAR_TABLE_SET ((table), c, (val))
e0b8ff93 61
f4926ee8
KH
62/* Set the syntax entry VAL for char-range RANGE in table TABLE.
63 RANGE is a cons (FROM . TO) specifying the range of characters. */
e0b8ff93 64
f4926ee8
KH
65#define SET_RAW_SYNTAX_ENTRY_RANGE(table, range, val) \
66 Fset_char_table_range ((table), (range), (val))
9889c728 67
5eea1c5a 68/* SYNTAX_ENTRY fetches the information from the entry for character C
177c0ea7 69 in syntax table TABLE, or from globally kept data (gl_state).
5eea1c5a
RS
70 Does inheritance. */
71/* CURRENT_SYNTAX_TABLE gives the syntax table valid for current
72 position, it is either the buffer's syntax table, or syntax table
73 found in text properties. */
74
75#ifdef SYNTAX_ENTRY_VIA_PROPERTY
76# define SYNTAX_ENTRY(c) \
77 (gl_state.use_global ? gl_state.global_code : SYNTAX_ENTRY_INT (c))
78# define CURRENT_SYNTAX_TABLE gl_state.current_syntax_table
79#else
80# define SYNTAX_ENTRY SYNTAX_ENTRY_INT
81# define CURRENT_SYNTAX_TABLE current_buffer->syntax_table
82#endif
e0b8ff93 83
501d7ac6 84#define SYNTAX_ENTRY_INT(c) CHAR_TABLE_REF (CURRENT_SYNTAX_TABLE, (c))
e0b8ff93 85
e46c910e 86/* Extract the information from the entry for character C
e0b8ff93 87 in the current syntax table. */
c8cdcb16
RS
88
89#ifdef __GNUC__
e46c910e 90#define SYNTAX(c) \
874757e8
AS
91 ({ Lisp_Object _syntax_temp; \
92 _syntax_temp = SYNTAX_ENTRY (c); \
93 (CONSP (_syntax_temp) \
94 ? (enum syntaxcode) (XINT (XCAR (_syntax_temp)) & 0xff) \
e0b8ff93 95 : Swhitespace); })
e46c910e
RS
96
97#define SYNTAX_WITH_FLAGS(c) \
874757e8
AS
98 ({ Lisp_Object _syntax_temp; \
99 _syntax_temp = SYNTAX_ENTRY (c); \
100 (CONSP (_syntax_temp) \
101 ? XINT (XCAR (_syntax_temp)) \
e0b8ff93 102 : (int) Swhitespace); })
e46c910e
RS
103
104#define SYNTAX_MATCH(c) \
874757e8
AS
105 ({ Lisp_Object _syntax_temp; \
106 _syntax_temp = SYNTAX_ENTRY (c); \
107 (CONSP (_syntax_temp) \
108 ? XCDR (_syntax_temp) \
e0b8ff93 109 : Qnil); })
c8cdcb16 110#else
3d7db6f1 111extern Lisp_Object syntax_temp;
e46c910e 112#define SYNTAX(c) \
9d40ebd2 113 (syntax_temp = SYNTAX_ENTRY ((c)), \
e46c910e 114 (CONSP (syntax_temp) \
3331fb06 115 ? (enum syntaxcode) (XINT (XCAR (syntax_temp)) & 0xff) \
e0b8ff93 116 : Swhitespace))
e46c910e
RS
117
118#define SYNTAX_WITH_FLAGS(c) \
9d40ebd2 119 (syntax_temp = SYNTAX_ENTRY ((c)), \
e46c910e 120 (CONSP (syntax_temp) \
3331fb06 121 ? XINT (XCAR (syntax_temp)) \
e0b8ff93 122 : (int) Swhitespace))
e46c910e
RS
123
124#define SYNTAX_MATCH(c) \
9d40ebd2 125 (syntax_temp = SYNTAX_ENTRY ((c)), \
e46c910e 126 (CONSP (syntax_temp) \
3331fb06 127 ? XCDR (syntax_temp) \
e0b8ff93 128 : Qnil))
c8cdcb16 129#endif
9889c728 130
c0364919 131/* Then there are seven single-bit flags that have the following meanings:
9889c728
JB
132 1. This character is the first of a two-character comment-start sequence.
133 2. This character is the second of a two-character comment-start sequence.
134 3. This character is the first of a two-character comment-end sequence.
135 4. This character is the second of a two-character comment-end sequence.
136 5. This character is a prefix, for backward-prefix-chars.
c0364919
RS
137 6. see below
138 7. This character is part of a nestable comment sequence.
a306d6f1
RS
139 Note that any two-character sequence whose first character has flag 1
140 and whose second character has flag 2 will be interpreted as a comment start.
141
142 bit 6 is used to discriminate between two different comment styles.
143 Languages such as C++ allow two orthogonal syntax start/end pairs
144 and bit 6 is used to determine whether a comment-end or Scommentend
5eea1c5a 145 ends style a or b. Comment start sequences can start style a or b.
a306d6f1
RS
146 Style a is always the default.
147 */
9889c728 148
98bcfee8
RS
149/* These macros extract a particular flag for a given character. */
150
e46c910e 151#define SYNTAX_COMSTART_FIRST(c) ((SYNTAX_WITH_FLAGS (c) >> 16) & 1)
c8cdcb16 152
e46c910e 153#define SYNTAX_COMSTART_SECOND(c) ((SYNTAX_WITH_FLAGS (c) >> 17) & 1)
c8cdcb16 154
e46c910e 155#define SYNTAX_COMEND_FIRST(c) ((SYNTAX_WITH_FLAGS (c) >> 18) & 1)
c8cdcb16 156
e46c910e 157#define SYNTAX_COMEND_SECOND(c) ((SYNTAX_WITH_FLAGS (c) >> 19) & 1)
c8cdcb16 158
e46c910e 159#define SYNTAX_PREFIX(c) ((SYNTAX_WITH_FLAGS (c) >> 20) & 1)
9889c728 160
e46c910e 161#define SYNTAX_COMMENT_STYLE(c) ((SYNTAX_WITH_FLAGS (c) >> 21) & 1)
a306d6f1 162
c0364919
RS
163#define SYNTAX_COMMENT_NESTED(c) ((SYNTAX_WITH_FLAGS (c) >> 22) & 1)
164
98bcfee8
RS
165/* These macros extract specific flags from an integer
166 that holds the syntax code and the flags. */
167
168#define SYNTAX_FLAGS_COMSTART_FIRST(flags) (((flags) >> 16) & 1)
169
170#define SYNTAX_FLAGS_COMSTART_SECOND(flags) (((flags) >> 17) & 1)
171
172#define SYNTAX_FLAGS_COMEND_FIRST(flags) (((flags) >> 18) & 1)
173
174#define SYNTAX_FLAGS_COMEND_SECOND(flags) (((flags) >> 19) & 1)
175
176#define SYNTAX_FLAGS_PREFIX(flags) (((flags) >> 20) & 1)
177
178#define SYNTAX_FLAGS_COMMENT_STYLE(flags) (((flags) >> 21) & 1)
179
c0364919
RS
180#define SYNTAX_FLAGS_COMMENT_NESTED(flags) (((flags) >> 22) & 1)
181
9889c728
JB
182/* This array, indexed by a character, contains the syntax code which that
183 character signifies (as a char). For example,
5eea1c5a 184 (enum syntaxcode) syntax_spec_code['w'] is Sword. */
9889c728
JB
185
186extern unsigned char syntax_spec_code[0400];
187
5eea1c5a
RS
188/* Indexed by syntax code, give the letter that describes it. */
189
190extern char syntax_code_spec[16];
191
c292db29 192/* Convert the byte offset BYTEPOS into a character position,
2f16e7fd
RS
193 for the object recorded in gl_state with SETUP_SYNTAX_TABLE_FOR_OBJECT.
194
195 The value is meant for use in the UPDATE_SYNTAX_TABLE... macros.
196 These macros do nothing when parse_sexp_lookup_properties is 0,
197 so we return 0 in that case, for speed. */
c292db29
RS
198
199#define SYNTAX_TABLE_BYTE_TO_CHAR(bytepos) \
2f16e7fd
RS
200 (! parse_sexp_lookup_properties \
201 ? 0 \
202 : STRINGP (gl_state.object) \
c292db29
RS
203 ? string_byte_to_char (gl_state.object, (bytepos)) \
204 : BUFFERP (gl_state.object) \
f79b4b7e
KH
205 ? buf_bytepos_to_charpos (XBUFFER (gl_state.object), \
206 (bytepos) + BUF_BEGV_BYTE (XBUFFER (gl_state.object)) - 1) - BUF_BEGV (XBUFFER (gl_state.object)) + 1 \
c292db29 207 : NILP (gl_state.object) \
f79b4b7e 208 ? BYTE_TO_CHAR ((bytepos) + BEGV_BYTE - 1) - BEGV + 1 \
c292db29
RS
209 : (bytepos))
210
f79b4b7e
KH
211/* Make syntax table state (gl_state) good for CHARPOS, assuming it is
212 currently good for a position before CHARPOS. */
5eea1c5a 213
f79b4b7e 214#define UPDATE_SYNTAX_TABLE_FORWARD(charpos) \
2f16e7fd 215 (parse_sexp_lookup_properties \
f79b4b7e
KH
216 && (charpos) >= gl_state.e_property \
217 ? (update_syntax_table ((charpos) + gl_state.offset, 1, 0, \
c292db29
RS
218 gl_state.object), \
219 1) \
220 : 0)
5eea1c5a 221
f79b4b7e
KH
222/* Make syntax table state (gl_state) good for CHARPOS, assuming it is
223 currently good for a position after CHARPOS. */
5eea1c5a 224
f79b4b7e 225#define UPDATE_SYNTAX_TABLE_BACKWARD(charpos) \
2f16e7fd 226 (parse_sexp_lookup_properties \
4948e1f2 227 && (charpos) < gl_state.b_property \
f79b4b7e 228 ? (update_syntax_table ((charpos) + gl_state.offset, -1, 0, \
c292db29
RS
229 gl_state.object), \
230 1) \
231 : 0)
e2d8d746 232
f79b4b7e 233/* Make syntax table good for CHARPOS. */
e2d8d746 234
f79b4b7e 235#define UPDATE_SYNTAX_TABLE(charpos) \
2f16e7fd 236 (parse_sexp_lookup_properties \
4948e1f2 237 && (charpos) < gl_state.b_property \
f79b4b7e 238 ? (update_syntax_table ((charpos) + gl_state.offset, -1, 0, \
c292db29
RS
239 gl_state.object), \
240 1) \
2f16e7fd 241 : (parse_sexp_lookup_properties \
f79b4b7e
KH
242 && (charpos) >= gl_state.e_property \
243 ? (update_syntax_table ((charpos) + gl_state.offset, 1, 0,\
c292db29
RS
244 gl_state.object), \
245 1) \
246 : 0))
5eea1c5a
RS
247
248/* This macro should be called with FROM at the start of forward
249 search, or after the last position of the backward search. It
250 makes sure that the first char is picked up with correct table, so
251 one does not need to call UPDATE_SYNTAX_TABLE immediately after the
177c0ea7 252 call.
5eea1c5a
RS
253 Sign of COUNT gives the direction of the search.
254 */
255
c292db29 256#define SETUP_SYNTAX_TABLE(FROM, COUNT) \
92413ef3 257do \
2f16e7fd 258 { \
4948e1f2 259 gl_state.b_property = BEGV; \
2f16e7fd
RS
260 gl_state.e_property = ZV + 1; \
261 gl_state.object = Qnil; \
262 gl_state.use_global = 0; \
263 gl_state.offset = 0; \
264 gl_state.current_syntax_table = current_buffer->syntax_table; \
265 if (parse_sexp_lookup_properties) \
9b9794f0
RS
266 if ((COUNT) > 0 || (FROM) > BEGV) \
267 update_syntax_table ((COUNT) > 0 ? (FROM) : (FROM) - 1, (COUNT),\
268 1, Qnil); \
2f16e7fd 269 } \
92413ef3 270while (0)
5eea1c5a
RS
271
272/* Same as above, but in OBJECT. If OBJECT is nil, use current buffer.
e2d8d746
RS
273 If it is t, ignore properties altogether.
274
275 This is meant for regex.c to use. For buffers, regex.c passes arguments
276 to the UPDATE_SYNTAX_TABLE macros which are relative to BEGV.
4bbd5bc3 277 So if it is a buffer, we set the offset field to BEGV. */
5eea1c5a 278
c292db29 279#define SETUP_SYNTAX_TABLE_FOR_OBJECT(OBJECT, FROM, COUNT) \
92413ef3 280do \
4bbd5bc3 281 { \
c292db29
RS
282 gl_state.object = (OBJECT); \
283 if (BUFFERP (gl_state.object)) \
284 { \
285 struct buffer *buf = XBUFFER (gl_state.object); \
4948e1f2 286 gl_state.b_property = 1; \
1d1293dd 287 gl_state.e_property = BUF_ZV (buf) - BUF_BEGV (buf) + 1; \
c292db29
RS
288 gl_state.offset = BUF_BEGV (buf) - 1; \
289 } \
290 else if (NILP (gl_state.object)) \
4bbd5bc3 291 { \
4948e1f2 292 gl_state.b_property = 1; \
1d1293dd 293 gl_state.e_property = ZV - BEGV + 1; \
4bbd5bc3
RS
294 gl_state.offset = BEGV - 1; \
295 } \
c292db29 296 else if (EQ (gl_state.object, Qt)) \
4bbd5bc3 297 { \
4948e1f2 298 gl_state.b_property = 0; \
4bbd5bc3
RS
299 gl_state.e_property = 1500000000; \
300 gl_state.offset = 0; \
301 } \
302 else \
303 { \
4948e1f2 304 gl_state.b_property = 0; \
d5db4077 305 gl_state.e_property = 1 + SCHARS (gl_state.object); \
4bbd5bc3
RS
306 gl_state.offset = 0; \
307 } \
308 gl_state.use_global = 0; \
309 gl_state.current_syntax_table = current_buffer->syntax_table; \
310 if (parse_sexp_lookup_properties) \
f79b4b7e 311 update_syntax_table (((FROM) + gl_state.offset \
c292db29
RS
312 + (COUNT > 0 ? 0 : -1)), \
313 COUNT, 1, gl_state.object); \
4bbd5bc3 314 } \
92413ef3 315while (0)
5eea1c5a
RS
316
317struct gl_state_s
318{
c292db29 319 Lisp_Object object; /* The object we are scanning. */
5eea1c5a
RS
320 int start; /* Where to stop. */
321 int stop; /* Where to stop. */
322 int use_global; /* Whether to use global_code
323 or c_s_t. */
324 Lisp_Object global_code; /* Syntax code of current char. */
325 Lisp_Object current_syntax_table; /* Syntax table for current pos. */
326 Lisp_Object old_prop; /* Syntax-table prop at prev pos. */
4948e1f2 327 int b_property; /* First index where c_s_t is valid. */
5eea1c5a
RS
328 int e_property; /* First index where c_s_t is
329 not valid. */
330 INTERVAL forward_i; /* Where to start lookup on forward */
331 INTERVAL backward_i; /* or backward movement. The
332 data in c_s_t is valid
333 between these intervals,
334 and possibly at the
335 intervals too, depending
336 on: */
e2d8d746
RS
337 /* Offset for positions specified to UPDATE_SYNTAX_TABLE. */
338 int offset;
5eea1c5a
RS
339};
340
341extern struct gl_state_s gl_state;
342extern int parse_sexp_lookup_properties;
4c571d09
AS
343extern INTERVAL interval_of P_ ((int, Lisp_Object));
344
345extern int scan_words P_ ((int, int));
839966f3
KH
346
347/* arch-tag: 28833cca-cd73-4741-8c85-a3111166a0e0
348 (do not change this comment) */