1 /* Declarations having to do with GNU Emacs syntax tables.
2 Copyright (C) 1985, 93, 94, 97, 1998 Free Software Foundation, Inc.
4 This file is part of GNU Emacs.
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
22 extern Lisp_Object Qsyntax_table_p
;
23 extern void update_syntax_table
P_ ((int, int, int, Lisp_Object
));
25 /* The standard syntax table is stored where it will automatically
26 be used in all new buffers. */
27 #define Vstandard_syntax_table buffer_defaults.syntax_table
29 /* A syntax table is a chartable whose elements are cons cells
30 (CODE+FLAGS . MATCHING-CHAR). MATCHING-CHAR can be nil if the char
31 is not a kind of parenthesis.
33 The low 8 bits of CODE+FLAGS is a code, as follows: */
37 Swhitespace
, /* for a whitespace character */
38 Spunct
, /* for random punctuation characters */
39 Sword
, /* for a word constituent */
40 Ssymbol
, /* symbol constituent but not word constituent */
41 Sopen
, /* for a beginning delimiter */
42 Sclose
, /* for an ending delimiter */
43 Squote
, /* for a prefix character like Lisp ' */
44 Sstring
, /* for a string-grouping character like Lisp " */
45 Smath
, /* for delimiters like $ in Tex. */
46 Sescape
, /* for a character that begins a C-style escape */
47 Scharquote
, /* for a character that quotes the following character */
48 Scomment
, /* for a comment-starting character */
49 Sendcomment
, /* for a comment-ending character */
50 Sinherit
, /* use the standard syntax table for this character */
51 Scomment_fence
, /* Starts/ends comment which is delimited on the
52 other side by any char with the same syntaxcode. */
53 Sstring_fence
, /* Starts/ends string which is delimited on the
54 other side by any char with the same syntaxcode. */
55 Smax
/* Upper bound on codes that are meaningful */
58 /* Set the syntax entry VAL for char C in table TABLE. */
60 #define SET_RAW_SYNTAX_ENTRY(table, c, val) \
61 ((c) < CHAR_TABLE_SINGLE_BYTE_SLOTS \
62 ? (XCHAR_TABLE (table)->contents[(unsigned char) (c)] = (val)) \
63 : Faset ((table), make_number (c), (val)))
65 /* Fetch the syntax entry for char C in syntax table TABLE.
66 This macro is called only when C is less than CHAR_TABLE_ORDINARY_SLOTS.
70 #define SYNTAX_ENTRY_FOLLOW_PARENT(table, c) \
71 ({ Lisp_Object tbl = table; \
72 Lisp_Object temp = XCHAR_TABLE (tbl)->contents[(c)]; \
75 tbl = XCHAR_TABLE (tbl)->parent; \
78 temp = XCHAR_TABLE (tbl)->contents[(c)]; \
82 extern Lisp_Object syntax_temp
;
83 extern Lisp_Object syntax_parent_lookup
P_ ((Lisp_Object
, int));
85 #define SYNTAX_ENTRY_FOLLOW_PARENT(table, c) \
86 (syntax_temp = XCHAR_TABLE (table)->contents[(c)], \
88 ? syntax_parent_lookup (table, (c)) \
92 /* SYNTAX_ENTRY fetches the information from the entry for character C
93 in syntax table TABLE, or from globally kept data (gl_state).
95 /* CURRENT_SYNTAX_TABLE gives the syntax table valid for current
96 position, it is either the buffer's syntax table, or syntax table
97 found in text properties. */
99 #ifdef SYNTAX_ENTRY_VIA_PROPERTY
100 # define SYNTAX_ENTRY(c) \
101 (gl_state.use_global ? gl_state.global_code : SYNTAX_ENTRY_INT (c))
102 # define CURRENT_SYNTAX_TABLE gl_state.current_syntax_table
104 # define SYNTAX_ENTRY SYNTAX_ENTRY_INT
105 # define CURRENT_SYNTAX_TABLE current_buffer->syntax_table
108 #define SYNTAX_ENTRY_INT(c) \
109 ((c) < CHAR_TABLE_SINGLE_BYTE_SLOTS \
110 ? SYNTAX_ENTRY_FOLLOW_PARENT (CURRENT_SYNTAX_TABLE, \
111 (unsigned char) (c)) \
112 : Faref (CURRENT_SYNTAX_TABLE, make_number ((c))))
114 /* Extract the information from the entry for character C
115 in the current syntax table. */
119 ({ Lisp_Object temp; \
120 temp = SYNTAX_ENTRY (c); \
122 ? (enum syntaxcode) (XINT (XCONS (temp)->car) & 0xff) \
125 #define SYNTAX_WITH_FLAGS(c) \
126 ({ Lisp_Object temp; \
127 temp = SYNTAX_ENTRY (c); \
129 ? XINT (XCONS (temp)->car) \
130 : (int) Swhitespace); })
132 #define SYNTAX_MATCH(c) \
133 ({ Lisp_Object temp; \
134 temp = SYNTAX_ENTRY (c); \
136 ? XCONS (temp)->cdr \
140 (syntax_temp = SYNTAX_ENTRY ((c)), \
141 (CONSP (syntax_temp) \
142 ? (enum syntaxcode) (XINT (XCONS (syntax_temp)->car) & 0xff) \
145 #define SYNTAX_WITH_FLAGS(c) \
146 (syntax_temp = SYNTAX_ENTRY ((c)), \
147 (CONSP (syntax_temp) \
148 ? XINT (XCONS (syntax_temp)->car) \
149 : (int) Swhitespace))
151 #define SYNTAX_MATCH(c) \
152 (syntax_temp = SYNTAX_ENTRY ((c)), \
153 (CONSP (syntax_temp) \
154 ? XCONS (syntax_temp)->cdr \
158 /* Then there are six single-bit flags that have the following meanings:
159 1. This character is the first of a two-character comment-start sequence.
160 2. This character is the second of a two-character comment-start sequence.
161 3. This character is the first of a two-character comment-end sequence.
162 4. This character is the second of a two-character comment-end sequence.
163 5. This character is a prefix, for backward-prefix-chars.
164 Note that any two-character sequence whose first character has flag 1
165 and whose second character has flag 2 will be interpreted as a comment start.
167 bit 6 is used to discriminate between two different comment styles.
168 Languages such as C++ allow two orthogonal syntax start/end pairs
169 and bit 6 is used to determine whether a comment-end or Scommentend
170 ends style a or b. Comment start sequences can start style a or b.
171 Style a is always the default.
174 #define SYNTAX_COMSTART_FIRST(c) ((SYNTAX_WITH_FLAGS (c) >> 16) & 1)
176 #define SYNTAX_COMSTART_SECOND(c) ((SYNTAX_WITH_FLAGS (c) >> 17) & 1)
178 #define SYNTAX_COMEND_FIRST(c) ((SYNTAX_WITH_FLAGS (c) >> 18) & 1)
180 #define SYNTAX_COMEND_SECOND(c) ((SYNTAX_WITH_FLAGS (c) >> 19) & 1)
182 #define SYNTAX_PREFIX(c) ((SYNTAX_WITH_FLAGS (c) >> 20) & 1)
184 /* extract the comment style bit from the syntax table entry */
185 #define SYNTAX_COMMENT_STYLE(c) ((SYNTAX_WITH_FLAGS (c) >> 21) & 1)
187 /* This array, indexed by a character, contains the syntax code which that
188 character signifies (as a char). For example,
189 (enum syntaxcode) syntax_spec_code['w'] is Sword. */
191 extern unsigned char syntax_spec_code
[0400];
193 /* Indexed by syntax code, give the letter that describes it. */
195 extern char syntax_code_spec
[16];
197 /* Convert the byte offset BYTEPOS into a character position,
198 for the object recorded in gl_state with SETUP_SYNTAX_TABLE_FOR_OBJECT. */
200 #define SYNTAX_TABLE_BYTE_TO_CHAR(bytepos) \
201 (STRINGP (gl_state.object) \
202 ? string_byte_to_char (gl_state.object, (bytepos)) \
203 : BUFFERP (gl_state.object) \
204 ? buf_bytepos_to_charpos (XBUFFER (gl_state.object), (bytepos)) \
205 : NILP (gl_state.object) \
206 ? BYTE_TO_CHAR ((bytepos)) \
209 /* Make syntax table state (gl_state) good for POS, assuming it is
210 currently good for a position before POS. */
212 #define UPDATE_SYNTAX_TABLE_FORWARD(pos) \
213 ((pos) >= gl_state.e_property \
214 ? (update_syntax_table ((pos) + gl_state.offset, 1, 0, \
219 /* Make syntax table state (gl_state) good for POS, assuming it is
220 currently good for a position after POS. */
222 #define UPDATE_SYNTAX_TABLE_BACKWARD(pos) \
223 ((pos) <= gl_state.b_property \
224 ? (update_syntax_table ((pos) + gl_state.offset, -1, 0, \
229 /* Make syntax table good for POS. */
231 #define UPDATE_SYNTAX_TABLE(pos) \
232 ((pos) <= gl_state.b_property \
233 ? (update_syntax_table ((pos) + gl_state.offset, -1, 0, \
236 : ((pos) >= gl_state.e_property \
237 ? (update_syntax_table ((pos) + gl_state.offset, 1, 0, \
242 /* This macro should be called with FROM at the start of forward
243 search, or after the last position of the backward search. It
244 makes sure that the first char is picked up with correct table, so
245 one does not need to call UPDATE_SYNTAX_TABLE immediately after the
247 Sign of COUNT gives the direction of the search.
250 #define SETUP_SYNTAX_TABLE(FROM, COUNT) \
251 gl_state.b_property = BEGV - 1; \
252 gl_state.e_property = ZV + 1; \
253 gl_state.object = Qnil; \
254 gl_state.use_global = 0; \
255 gl_state.offset = 0; \
256 gl_state.current_syntax_table = current_buffer->syntax_table; \
257 if (parse_sexp_lookup_properties) \
258 update_syntax_table ((COUNT) > 0 ? (FROM) : (FROM) - 1, (COUNT), \
261 /* Same as above, but in OBJECT. If OBJECT is nil, use current buffer.
262 If it is t, ignore properties altogether.
264 This is meant for regex.c to use. For buffers, regex.c passes arguments
265 to the UPDATE_SYNTAX_TABLE macros which are relative to BEGV.
266 So if it is a buffer, we set the offset field to BEGV. */
268 #define SETUP_SYNTAX_TABLE_FOR_OBJECT(OBJECT, FROM, COUNT) \
271 gl_state.object = (OBJECT); \
272 if (BUFFERP (gl_state.object)) \
274 struct buffer *buf = XBUFFER (gl_state.object); \
275 gl_state.b_property = 0; \
276 gl_state.e_property = BUF_ZV (buf) - BUF_BEGV (buf) + 1; \
277 gl_state.offset = BUF_BEGV (buf) - 1; \
279 else if (NILP (gl_state.object)) \
281 gl_state.b_property = 0; \
282 gl_state.e_property = ZV - BEGV + 1; \
283 gl_state.offset = BEGV - 1; \
285 else if (EQ (gl_state.object, Qt)) \
287 gl_state.b_property = - 1; \
288 gl_state.e_property = 1500000000; \
289 gl_state.offset = 0; \
293 gl_state.b_property = -1; \
294 gl_state.e_property = 1 + XSTRING (gl_state.object)->size; \
295 gl_state.offset = 0; \
297 gl_state.use_global = 0; \
298 gl_state.current_syntax_table = current_buffer->syntax_table; \
299 if (parse_sexp_lookup_properties) \
300 update_syntax_table ((BYTE_TO_CHAR ((FROM) + gl_state.offset) \
301 + (COUNT > 0 ? 0 : -1)), \
302 COUNT, 1, gl_state.object); \
308 Lisp_Object object
; /* The object we are scanning. */
309 int start
; /* Where to stop. */
310 int stop
; /* Where to stop. */
311 int use_global
; /* Whether to use global_code
313 Lisp_Object global_code
; /* Syntax code of current char. */
314 Lisp_Object current_syntax_table
; /* Syntax table for current pos. */
315 Lisp_Object old_prop
; /* Syntax-table prop at prev pos. */
316 int b_property
; /* Last index where c_s_t is
318 int e_property
; /* First index where c_s_t is
320 INTERVAL forward_i
; /* Where to start lookup on forward */
321 INTERVAL backward_i
; /* or backward movement. The
322 data in c_s_t is valid
323 between these intervals,
325 intervals too, depending
327 /* Offset for positions specified to UPDATE_SYNTAX_TABLE. */
333 extern struct gl_state_s gl_state
;
334 extern int parse_sexp_lookup_properties
;
335 extern INTERVAL interval_of
P_ ((int, Lisp_Object
));
337 extern int scan_words
P_ ((int, int));