* buffer.h (FETCH_MULTIBYTE_CHAR): Define as inline.
[bpt/emacs.git] / src / syntax.c
CommitLineData
8489eb67 1/* GNU Emacs routines to deal with syntax tables; also word and list parsing.
acaf905b 2 Copyright (C) 1985, 1987, 1993-1995, 1997-1999, 2001-2012
429ab54e 3 Free Software Foundation, Inc.
8489eb67
RS
4
5This file is part of GNU Emacs.
6
9ec0b715 7GNU Emacs is free software: you can redistribute it and/or modify
8489eb67 8it under the terms of the GNU General Public License as published by
9ec0b715
GM
9the Free Software Foundation, either version 3 of the License, or
10(at your option) any later version.
8489eb67
RS
11
12GNU Emacs is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
9ec0b715 18along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
8489eb67
RS
19
20
18160b98 21#include <config.h>
1b2de274 22
8489eb67 23#include <ctype.h>
1b2de274 24#include <sys/types.h>
d7306fe6 25#include <setjmp.h>
8489eb67
RS
26#include "lisp.h"
27#include "commands.h"
5c7b02ab 28#include "character.h"
e5560ff7 29#include "buffer.h"
e35f6ff7 30#include "keymap.h"
a1bc88d4 31#include "regex.h"
195d1361
RS
32
33/* Make syntax table lookup grant data in gl_state. */
34#define SYNTAX_ENTRY_VIA_PROPERTY
35
8489eb67 36#include "syntax.h"
195d1361 37#include "intervals.h"
c5683ceb
SM
38#include "category.h"
39
40/* Then there are seven single-bit flags that have the following meanings:
41 1. This character is the first of a two-character comment-start sequence.
42 2. This character is the second of a two-character comment-start sequence.
43 3. This character is the first of a two-character comment-end sequence.
44 4. This character is the second of a two-character comment-end sequence.
45 5. This character is a prefix, for backward-prefix-chars.
46 6. The char is part of a delimiter for comments of style "b".
47 7. This character is part of a nestable comment sequence.
48 8. The char is part of a delimiter for comments of style "c".
49 Note that any two-character sequence whose first character has flag 1
50 and whose second character has flag 2 will be interpreted as a comment start.
51
52 bit 6 and 8 are used to discriminate between different comment styles.
53 Languages such as C++ allow two orthogonal syntax start/end pairs
54 and bit 6 is used to determine whether a comment-end or Scommentend
55 ends style a or b. Comment markers can start style a, b, c, or bc.
56 Style a is always the default.
57 For 2-char comment markers, the style b flag is only looked up on the second
58 char of the comment marker and on the first char of the comment ender.
59 For style c (like to for the nested flag), the flag can be placed on any
60 one of the chars.
61 */
62
63/* These macros extract specific flags from an integer
64 that holds the syntax code and the flags. */
65
66#define SYNTAX_FLAGS_COMSTART_FIRST(flags) (((flags) >> 16) & 1)
67
68#define SYNTAX_FLAGS_COMSTART_SECOND(flags) (((flags) >> 17) & 1)
69
70#define SYNTAX_FLAGS_COMEND_FIRST(flags) (((flags) >> 18) & 1)
71
72#define SYNTAX_FLAGS_COMEND_SECOND(flags) (((flags) >> 19) & 1)
73
74#define SYNTAX_FLAGS_PREFIX(flags) (((flags) >> 20) & 1)
75
76#define SYNTAX_FLAGS_COMMENT_STYLEB(flags) (((flags) >> 21) & 1)
77#define SYNTAX_FLAGS_COMMENT_STYLEC(flags) (((flags) >> 22) & 2)
78/* FLAGS should be the flags of the main char of the comment marker, e.g.
79 the second for comstart and the first for comend. */
80#define SYNTAX_FLAGS_COMMENT_STYLE(flags, other_flags) \
81 (SYNTAX_FLAGS_COMMENT_STYLEB (flags) \
82 | SYNTAX_FLAGS_COMMENT_STYLEC (flags) \
83 | SYNTAX_FLAGS_COMMENT_STYLEC (other_flags))
84
85#define SYNTAX_FLAGS_COMMENT_NESTED(flags) (((flags) >> 22) & 1)
86
87/* These macros extract a particular flag for a given character. */
88
89#define SYNTAX_COMEND_FIRST(c) \
90 (SYNTAX_FLAGS_COMEND_FIRST (SYNTAX_WITH_FLAGS (c)))
91#define SYNTAX_PREFIX(c) (SYNTAX_FLAGS_PREFIX (SYNTAX_WITH_FLAGS (c)))
195d1361
RS
92
93/* We use these constants in place for comment-style and
94 string-ender-char to distinguish comments/strings started by
95 comment_fence and string_fence codes. */
96
eacd378d
PE
97#define ST_COMMENT_STYLE (256 + 1)
98#define ST_STRING_STYLE (256 + 2)
8489eb67 99
955cbe7b
PE
100static Lisp_Object Qsyntax_table_p;
101static Lisp_Object Qsyntax_table, Qscan_error;
8489eb67 102
d4b43b22 103#ifndef __GNUC__
8ea151b2
RS
104/* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
105 if not compiled with GCC. No need to mark it, since it is used
106 only very temporarily. */
107Lisp_Object syntax_temp;
d4b43b22 108#endif
8ea151b2 109
e5d4f4dc
RS
110/* This is the internal form of the parse state used in parse-partial-sexp. */
111
112struct lisp_parse_state
113 {
d311d28c 114 EMACS_INT depth; /* Depth at end of parsing. */
6449674e 115 int instring; /* -1 if not within string, else desired terminator. */
d311d28c 116 EMACS_INT incomment; /* -1 if in unnestable comment else comment nesting */
6449674e
SM
117 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
118 int quoted; /* Nonzero if just after an escape char at end of parsing */
d311d28c 119 EMACS_INT mindepth; /* Minimum depth seen while scanning. */
6449674e 120 /* Char number of most recent start-of-expression at current level */
d311d28c 121 ptrdiff_t thislevelstart;
6449674e 122 /* Char number of start of containing expression */
d311d28c
PE
123 ptrdiff_t prevlevelstart;
124 ptrdiff_t location; /* Char number at which parsing stopped. */
125 ptrdiff_t comstr_start; /* Position of last comment/string starter. */
6449674e
SM
126 Lisp_Object levelstarts; /* Char numbers of starts-of-expression
127 of levels (starting from outermost). */
e5d4f4dc
RS
128 };
129\f
37bef230
RS
130/* These variables are a cache for finding the start of a defun.
131 find_start_pos is the place for which the defun start was found.
132 find_start_value is the defun start position found for it.
6a140a74 133 find_start_value_byte is the corresponding byte position.
37bef230
RS
134 find_start_buffer is the buffer it was found in.
135 find_start_begv is the BEGV value when it was found.
136 find_start_modiff is the value of MODIFF when it was found. */
137
d311d28c
PE
138static ptrdiff_t find_start_pos;
139static ptrdiff_t find_start_value;
140static ptrdiff_t find_start_value_byte;
37bef230 141static struct buffer *find_start_buffer;
d311d28c
PE
142static ptrdiff_t find_start_begv;
143static EMACS_INT find_start_modiff;
6a140a74
RS
144
145
cd64ea1d 146static Lisp_Object Fsyntax_table_p (Lisp_Object);
f57e2426
J
147static Lisp_Object skip_chars (int, Lisp_Object, Lisp_Object, int);
148static Lisp_Object skip_syntaxes (int, Lisp_Object, Lisp_Object);
149static Lisp_Object scan_lists (EMACS_INT, EMACS_INT, EMACS_INT, int);
150static void scan_sexps_forward (struct lisp_parse_state *,
d311d28c 151 ptrdiff_t, ptrdiff_t, ptrdiff_t, EMACS_INT,
f57e2426
J
152 int, Lisp_Object, int);
153static int in_classes (int, Lisp_Object);
195d1361 154\f
c5683ceb
SM
155/* Whether the syntax of the character C has the prefix flag set. */
156int syntax_prefix_flag_p (int c)
157{
158 return SYNTAX_PREFIX (c);
159}
195d1361
RS
160
161struct gl_state_s gl_state; /* Global state of syntax parser. */
162
195d1361
RS
163#define INTERVALS_AT_ONCE 10 /* 1 + max-number of intervals
164 to scan to property-change. */
165
6a140a74
RS
166/* Update gl_state to an appropriate interval which contains CHARPOS. The
167 sign of COUNT give the relative position of CHARPOS wrt the previously
195d1361 168 valid interval. If INIT, only [be]_property fields of gl_state are
6a140a74 169 valid at start, the rest is filled basing on OBJECT.
195d1361 170
6a140a74 171 `gl_state.*_i' are the intervals, and CHARPOS is further in the search
195d1361
RS
172 direction than the intervals - or in an interval. We update the
173 current syntax-table basing on the property of this interval, and
6a140a74 174 update the interval to start further than CHARPOS - or be
195d1361 175 NULL_INTERVAL. We also update lim_property to be the next value of
6a140a74 176 charpos to call this subroutine again - or be before/after the
195d1361
RS
177 start/end of OBJECT. */
178
179void
d311d28c 180update_syntax_table (ptrdiff_t charpos, EMACS_INT count, int init,
4f3a2f8d 181 Lisp_Object object)
195d1361
RS
182{
183 Lisp_Object tmp_table;
12cbf13f
PE
184 unsigned cnt = 0;
185 int invalidate = 1;
5a774522 186 INTERVAL i;
195d1361
RS
187
188 if (init)
189 {
bb0de084 190 gl_state.old_prop = Qnil;
195d1361
RS
191 gl_state.start = gl_state.b_property;
192 gl_state.stop = gl_state.e_property;
bb0de084
SM
193 i = interval_of (charpos, object);
194 gl_state.backward_i = gl_state.forward_i = i;
195d1361
RS
195 invalidate = 0;
196 if (NULL_INTERVAL_P (i))
197 return;
f902a008 198 /* interval_of updates only ->position of the return value, so
d80f4cc7 199 update the parents manually to speed up update_interval. */
bb0de084 200 while (!NULL_PARENT (i))
d80f4cc7
RS
201 {
202 if (AM_RIGHT_CHILD (i))
439d5cb4 203 INTERVAL_PARENT (i)->position = i->position
d80f4cc7 204 - LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */
439d5cb4
KR
205 - TOTAL_LENGTH (INTERVAL_PARENT (i))
206 + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i));
d80f4cc7 207 else
439d5cb4 208 INTERVAL_PARENT (i)->position = i->position - LEFT_TOTAL_LENGTH (i)
d80f4cc7 209 + TOTAL_LENGTH (i);
439d5cb4 210 i = INTERVAL_PARENT (i);
d80f4cc7
RS
211 }
212 i = gl_state.forward_i;
bb0de084 213 gl_state.b_property = i->position - gl_state.offset;
ee0cdb48 214 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361
RS
215 goto update;
216 }
5a774522 217 i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
195d1361 218
423e705d 219 /* We are guaranteed to be called with CHARPOS either in i,
6a140a74 220 or further off. */
195d1361
RS
221 if (NULL_INTERVAL_P (i))
222 error ("Error in syntax_table logic for to-the-end intervals");
6a140a74 223 else if (charpos < i->position) /* Move left. */
195d1361
RS
224 {
225 if (count > 0)
f902a008 226 error ("Error in syntax_table logic for intervals <-");
195d1361 227 /* Update the interval. */
6a140a74 228 i = update_interval (i, charpos);
bb0de084 229 if (INTERVAL_LAST_POS (i) != gl_state.b_property)
195d1361
RS
230 {
231 invalidate = 0;
195d1361 232 gl_state.forward_i = i;
ee0cdb48 233 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361 234 }
423e705d 235 }
6a140a74 236 else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right. */
195d1361
RS
237 {
238 if (count < 0)
f902a008 239 error ("Error in syntax_table logic for intervals ->");
195d1361 240 /* Update the interval. */
6a140a74 241 i = update_interval (i, charpos);
bb0de084 242 if (i->position != gl_state.e_property)
195d1361
RS
243 {
244 invalidate = 0;
195d1361 245 gl_state.backward_i = i;
bb0de084 246 gl_state.b_property = i->position - gl_state.offset;
195d1361
RS
247 }
248 }
195d1361
RS
249
250 update:
251 tmp_table = textget (i->plist, Qsyntax_table);
252
253 if (invalidate)
254 invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
7d0393cf 255
423e705d
SM
256 if (invalidate) /* Did not get to adjacent interval. */
257 { /* with the same table => */
258 /* invalidate the old range. */
195d1361
RS
259 if (count > 0)
260 {
261 gl_state.backward_i = i;
bb0de084
SM
262 gl_state.b_property = i->position - gl_state.offset;
263 }
264 else
195d1361 265 {
bb0de084 266 gl_state.forward_i = i;
ee0cdb48 267 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361
RS
268 }
269 }
37bef230 270
bb0de084 271 if (!EQ (tmp_table, gl_state.old_prop))
195d1361 272 {
bb0de084
SM
273 gl_state.current_syntax_table = tmp_table;
274 gl_state.old_prop = tmp_table;
275 if (EQ (Fsyntax_table_p (tmp_table), Qt))
276 {
277 gl_state.use_global = 0;
7d0393cf 278 }
bb0de084
SM
279 else if (CONSP (tmp_table))
280 {
281 gl_state.use_global = 1;
282 gl_state.global_code = tmp_table;
283 }
7d0393cf 284 else
bb0de084
SM
285 {
286 gl_state.use_global = 0;
4b4deea2 287 gl_state.current_syntax_table = BVAR (current_buffer, syntax_table);
bb0de084 288 }
195d1361
RS
289 }
290
291 while (!NULL_INTERVAL_P (i))
292 {
293 if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
294 {
295 if (count > 0)
bb0de084
SM
296 {
297 gl_state.e_property = i->position - gl_state.offset;
298 gl_state.forward_i = i;
299 }
300 else
301 {
5a774522
SM
302 gl_state.b_property
303 = i->position + LENGTH (i) - gl_state.offset;
bb0de084
SM
304 gl_state.backward_i = i;
305 }
306 return;
195d1361 307 }
7d0393cf 308 else if (cnt == INTERVALS_AT_ONCE)
195d1361
RS
309 {
310 if (count > 0)
bb0de084 311 {
5a774522
SM
312 gl_state.e_property
313 = i->position + LENGTH (i) - gl_state.offset
314 /* e_property at EOB is not set to ZV but to ZV+1, so that
315 we can do INC(from);UPDATE_SYNTAX_TABLE_FORWARD without
316 having to check eob between the two. */
317 + (NULL_INTERVAL_P (next_interval (i)) ? 1 : 0);
bb0de084
SM
318 gl_state.forward_i = i;
319 }
320 else
321 {
322 gl_state.b_property = i->position - gl_state.offset;
323 gl_state.backward_i = i;
324 }
325 return;
195d1361
RS
326 }
327 cnt++;
328 i = count > 0 ? next_interval (i) : previous_interval (i);
329 }
bb0de084
SM
330 eassert (NULL_INTERVAL_P (i)); /* This property goes to the end. */
331 if (count > 0)
332 gl_state.e_property = gl_state.stop;
333 else
334 gl_state.b_property = gl_state.start;
195d1361
RS
335}
336\f
6a140a74
RS
337/* Returns TRUE if char at CHARPOS is quoted.
338 Global syntax-table data should be set up already to be good at CHARPOS
339 or after. On return global syntax data is good for lookup at CHARPOS. */
195d1361
RS
340
341static int
d311d28c 342char_quoted (ptrdiff_t charpos, ptrdiff_t bytepos)
195d1361
RS
343{
344 register enum syntaxcode code;
d311d28c 345 register ptrdiff_t beg = BEGV;
195d1361 346 register int quoted = 0;
d311d28c 347 ptrdiff_t orig = charpos;
6a140a74 348
02d8b017 349 while (charpos > beg)
195d1361 350 {
ab229fdd 351 int c;
02d8b017 352 DEC_BOTH (charpos, bytepos);
ab229fdd 353
6a140a74 354 UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
327719ee 355 c = FETCH_CHAR_AS_MULTIBYTE (bytepos);
ab229fdd 356 code = SYNTAX (c);
6a140a74
RS
357 if (! (code == Scharquote || code == Sescape))
358 break;
359
6a140a74 360 quoted = !quoted;
195d1361 361 }
6a140a74
RS
362
363 UPDATE_SYNTAX_TABLE (orig);
195d1361
RS
364 return quoted;
365}
6a140a74 366
6a140a74
RS
367/* Return the bytepos one character before BYTEPOS.
368 We assume that BYTEPOS is not at the start of the buffer. */
369
d311d28c
PE
370static inline ptrdiff_t
371dec_bytepos (ptrdiff_t bytepos)
6a140a74 372{
4b4deea2 373 if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
ef316cf0
RS
374 return bytepos - 1;
375
6a140a74
RS
376 DEC_POS (bytepos);
377 return bytepos;
378}
195d1361 379\f
2b34df4e 380/* Return a defun-start position before POS and not too far before.
7cc80f0a
RS
381 It should be the last one before POS, or nearly the last.
382
383 When open_paren_in_column_0_is_defun_start is nonzero,
1fd1cc2f 384 only the beginning of the buffer is treated as a defun-start.
7cc80f0a
RS
385
386 We record the information about where the scan started
387 and what its result was, so that another call in the same area
388 can return the same value very quickly.
195d1361
RS
389
390 There is no promise at which position the global syntax data is
391 valid on return from the subroutine, so the caller should explicitly
392 update the global data. */
37bef230 393
d311d28c
PE
394static ptrdiff_t
395find_defun_start (ptrdiff_t pos, ptrdiff_t pos_byte)
37bef230 396{
d311d28c 397 ptrdiff_t opoint = PT, opoint_byte = PT_BYTE;
37bef230 398
1fd1cc2f
RS
399 if (!open_paren_in_column_0_is_defun_start)
400 {
6e20a0d4 401 find_start_value = BEGV;
1fd1cc2f 402 find_start_value_byte = BEGV_BYTE;
6e20a0d4
CY
403 find_start_buffer = current_buffer;
404 find_start_modiff = MODIFF;
405 find_start_begv = BEGV;
406 find_start_pos = pos;
1fd1cc2f
RS
407 return BEGV;
408 }
409
37bef230
RS
410 /* Use previous finding, if it's valid and applies to this inquiry. */
411 if (current_buffer == find_start_buffer
412 /* Reuse the defun-start even if POS is a little farther on.
413 POS might be in the next defun, but that's ok.
414 Our value may not be the best possible, but will still be usable. */
415 && pos <= find_start_pos + 1000
416 && pos >= find_start_value
417 && BEGV == find_start_begv
418 && MODIFF == find_start_modiff)
419 return find_start_value;
420
421 /* Back up to start of line. */
6a140a74 422 scan_newline (pos, pos_byte, BEGV, BEGV_BYTE, -1, 1);
37bef230 423
195d1361
RS
424 /* We optimize syntax-table lookup for rare updates. Thus we accept
425 only those `^\s(' which are good in global _and_ text-property
426 syntax-tables. */
d48cd3f4 427 SETUP_BUFFER_SYNTAX_TABLE ();
1fd1cc2f 428 while (PT > BEGV)
37bef230 429 {
ab229fdd
AS
430 int c;
431
1fd1cc2f
RS
432 /* Open-paren at start of line means we may have found our
433 defun-start. */
327719ee 434 c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
ab229fdd 435 if (SYNTAX (c) == Sopen)
195d1361 436 {
1fd1cc2f 437 SETUP_SYNTAX_TABLE (PT + 1, -1); /* Try again... */
327719ee 438 c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
ab229fdd 439 if (SYNTAX (c) == Sopen)
1fd1cc2f
RS
440 break;
441 /* Now fallback to the default value. */
d48cd3f4 442 SETUP_BUFFER_SYNTAX_TABLE ();
195d1361 443 }
1fd1cc2f
RS
444 /* Move to beg of previous line. */
445 scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
37bef230
RS
446 }
447
448 /* Record what we found, for the next try. */
6a140a74
RS
449 find_start_value = PT;
450 find_start_value_byte = PT_BYTE;
37bef230
RS
451 find_start_buffer = current_buffer;
452 find_start_modiff = MODIFF;
453 find_start_begv = BEGV;
454 find_start_pos = pos;
455
6a140a74
RS
456 TEMP_SET_PT_BOTH (opoint, opoint_byte);
457
37bef230
RS
458 return find_start_value;
459}
460\f
f902a008
RS
461/* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE. */
462
463static int
d311d28c 464prev_char_comend_first (ptrdiff_t pos, ptrdiff_t pos_byte)
f902a008
RS
465{
466 int c, val;
467
468 DEC_BOTH (pos, pos_byte);
469 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
470 c = FETCH_CHAR (pos_byte);
471 val = SYNTAX_COMEND_FIRST (c);
472 UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
473 return val;
474}
475
476/* Return the SYNTAX_COMSTART_FIRST of the character before POS, POS_BYTE. */
477
3f679f55
SM
478/* static int
479 * prev_char_comstart_first (pos, pos_byte)
480 * int pos, pos_byte;
481 * {
482 * int c, val;
7d0393cf 483 *
3f679f55
SM
484 * DEC_BOTH (pos, pos_byte);
485 * UPDATE_SYNTAX_TABLE_BACKWARD (pos);
486 * c = FETCH_CHAR (pos_byte);
487 * val = SYNTAX_COMSTART_FIRST (c);
488 * UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
489 * return val;
490 * } */
f902a008 491
6a140a74
RS
492/* Checks whether charpos FROM is at the end of a comment.
493 FROM_BYTE is the bytepos corresponding to FROM.
494 Do not move back before STOP.
495
496 Return a positive value if we find a comment ending at FROM/FROM_BYTE;
497 return -1 otherwise.
498
499 If successful, store the charpos of the comment's beginning
500 into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
527a32d9
KH
501
502 Global syntax data remains valid for backward search starting at
503 the returned value (or at FROM, if the search was not successful). */
195d1361
RS
504
505static int
d311d28c 506back_comment (ptrdiff_t from, ptrdiff_t from_byte, ptrdiff_t stop, int comnested, int comstyle, ptrdiff_t *charpos_ptr, ptrdiff_t *bytepos_ptr)
195d1361
RS
507{
508 /* Look back, counting the parity of string-quotes,
509 and recording the comment-starters seen.
510 When we reach a safe place, assume that's not in a string;
511 then step the main scan to the earliest comment-starter seen
512 an even number of string quotes away from the safe place.
513
514 OFROM[I] is position of the earliest comment-starter seen
515 which is I+2X quotes from the comment-end.
516 PARITY is current parity of quotes from the comment end. */
3ee5041c 517 int string_style = -1; /* Presumed outside of any string. */
195d1361 518 int string_lossage = 0;
f7c436c1 519 /* Not a real lossage: indicates that we have passed a matching comment
7d0393cf 520 starter plus a non-matching comment-ender, meaning that any matching
f7c436c1
SM
521 comment-starter we might see later could be a false positive (hidden
522 inside another comment).
523 Test case: { a (* b } c (* d *) */
524 int comment_lossage = 0;
d311d28c
PE
525 ptrdiff_t comment_end = from;
526 ptrdiff_t comment_end_byte = from_byte;
527 ptrdiff_t comstart_pos = 0;
528 ptrdiff_t comstart_byte IF_LINT (= 0);
eb35b628
RS
529 /* Place where the containing defun starts,
530 or 0 if we didn't come across it yet. */
d311d28c
PE
531 ptrdiff_t defun_start = 0;
532 ptrdiff_t defun_start_byte = 0;
195d1361 533 register enum syntaxcode code;
95ff8dfc 534 int nesting = 1; /* current comment nesting */
ea315ed6 535 int c;
3f679f55
SM
536 int syntax = 0;
537
538 /* FIXME: A }} comment-ender style leads to incorrect behavior
539 in the case of {{ c }}} because we ignore the last two chars which are
540 assumed to be comment-enders although they aren't. */
195d1361
RS
541
542 /* At beginning of range to scan, we're outside of strings;
543 that determines quote parity to the comment-end. */
544 while (from != stop)
545 {
d311d28c 546 ptrdiff_t temp_byte;
4f3a2f8d 547 int prev_syntax, com2start, com2end;
fbb3da77 548 int comstart;
6a140a74 549
195d1361 550 /* Move back and examine a character. */
6a140a74 551 DEC_BOTH (from, from_byte);
195d1361
RS
552 UPDATE_SYNTAX_TABLE_BACKWARD (from);
553
3f679f55 554 prev_syntax = syntax;
b7dbcc19 555 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3f679f55 556 syntax = SYNTAX_WITH_FLAGS (c);
195d1361
RS
557 code = SYNTAX (c);
558
3f679f55
SM
559 /* Check for 2-char comment markers. */
560 com2start = (SYNTAX_FLAGS_COMSTART_FIRST (syntax)
561 && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax)
c5683ceb
SM
562 && (comstyle
563 == SYNTAX_FLAGS_COMMENT_STYLE (prev_syntax, syntax))
3f679f55
SM
564 && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax)
565 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested);
566 com2end = (SYNTAX_FLAGS_COMEND_FIRST (syntax)
567 && SYNTAX_FLAGS_COMEND_SECOND (prev_syntax));
fbb3da77 568 comstart = (com2start || code == Scomment);
01f44d5a 569
3f679f55
SM
570 /* Nasty cases with overlapping 2-char comment markers:
571 - snmp-mode: -- c -- foo -- c --
572 --- c --
573 ------ c --
574 - c-mode: *||*
575 |* *|* *|
576 |*| |* |*|
577 /// */
578
579 /* If a 2-char comment sequence partly overlaps with another,
fbb3da77
SM
580 we don't try to be clever. E.g. |*| in C, or }% in modes that
581 have %..\n and %{..}%. */
582 if (from > stop && (com2end || comstart))
195d1361 583 {
d311d28c 584 ptrdiff_t next = from, next_byte = from_byte;
4f3a2f8d 585 int next_c, next_syntax;
3f679f55
SM
586 DEC_BOTH (next, next_byte);
587 UPDATE_SYNTAX_TABLE_BACKWARD (next);
b7dbcc19 588 next_c = FETCH_CHAR_AS_MULTIBYTE (next_byte);
3f679f55 589 next_syntax = SYNTAX_WITH_FLAGS (next_c);
fbb3da77 590 if (((comstart || comnested)
3f679f55
SM
591 && SYNTAX_FLAGS_COMEND_SECOND (syntax)
592 && SYNTAX_FLAGS_COMEND_FIRST (next_syntax))
593 || ((com2end || comnested)
594 && SYNTAX_FLAGS_COMSTART_SECOND (syntax)
c5683ceb
SM
595 && (comstyle
596 == SYNTAX_FLAGS_COMMENT_STYLE (syntax, prev_syntax))
3f679f55
SM
597 && SYNTAX_FLAGS_COMSTART_FIRST (next_syntax)))
598 goto lossage;
599 /* UPDATE_SYNTAX_TABLE_FORWARD (next + 1); */
f902a008 600 }
3f679f55
SM
601
602 if (com2start && comstart_pos == 0)
603 /* We're looking at a comment starter. But it might be a comment
604 ender as well (see snmp-mode). The first time we see one, we
605 need to consider it as a comment starter,
606 and the subsequent times as a comment ender. */
607 com2end = 0;
608
609 /* Turn a 2-char comment sequences into the appropriate syntax. */
610 if (com2end)
611 code = Sendcomment;
612 else if (com2start)
613 code = Scomment;
614 /* Ignore comment starters of a different style. */
615 else if (code == Scomment
c5683ceb 616 && (comstyle != SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0)
3f679f55 617 || SYNTAX_FLAGS_COMMENT_NESTED (syntax) != comnested))
1aa963c8 618 continue;
195d1361 619
9828a477
KH
620 /* Ignore escaped characters, except comment-enders. */
621 if (code != Sendcomment && char_quoted (from, from_byte))
195d1361
RS
622 continue;
623
3ee5041c 624 switch (code)
195d1361 625 {
3ee5041c
SM
626 case Sstring_fence:
627 case Scomment_fence:
628 c = (code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE);
629 case Sstring:
630 /* Track parity of quotes. */
631 if (string_style == -1)
632 /* Entering a string. */
633 string_style = c;
634 else if (string_style == c)
635 /* Leaving the string. */
636 string_style = -1;
637 else
638 /* If we have two kinds of string delimiters.
639 There's no way to grok this scanning backwards. */
195d1361 640 string_lossage = 1;
3ee5041c 641 break;
7d0393cf 642
3ee5041c
SM
643 case Scomment:
644 /* We've already checked that it is the relevant comstyle. */
f7c436c1 645 if (string_style != -1 || comment_lossage || string_lossage)
3ee5041c
SM
646 /* There are odd string quotes involved, so let's be careful.
647 Test case in Pascal: " { " a { " } */
648 goto lossage;
649
f7c436c1
SM
650 if (!comnested)
651 {
652 /* Record best comment-starter so far. */
653 comstart_pos = from;
654 comstart_byte = from_byte;
655 }
656 else if (--nesting <= 0)
95ff8dfc
RS
657 /* nested comments have to be balanced, so we don't need to
658 keep looking for earlier ones. We use here the same (slightly
659 incorrect) reasoning as below: since it is followed by uniform
660 paired string quotes, this comment-start has to be outside of
661 strings, else the comment-end itself would be inside a string. */
662 goto done;
3ee5041c
SM
663 break;
664
02010917 665 case Sendcomment:
c5683ceb 666 if (SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == comstyle
d070eb22 667 && ((com2end && SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax))
3f679f55 668 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested)
02010917
SM
669 /* This is the same style of comment ender as ours. */
670 {
671 if (comnested)
672 nesting++;
673 else
674 /* Anything before that can't count because it would match
675 this comment-ender rather than ours. */
676 from = stop; /* Break out of the loop. */
677 }
f7c436c1
SM
678 else if (comstart_pos != 0 || c != '\n')
679 /* We're mixing comment styles here, so we'd better be careful.
680 The (comstart_pos != 0 || c != '\n') check is not quite correct
681 (we should just always set comment_lossage), but removing it
682 would imply that any multiline comment in C would go through
683 lossage, which seems overkill.
684 The failure should only happen in the rare cases such as
685 { (* } *) */
686 comment_lossage = 1;
02010917 687 break;
195d1361 688
02010917
SM
689 case Sopen:
690 /* Assume a defun-start point is outside of strings. */
691 if (open_paren_in_column_0_is_defun_start
692 && (from == stop
693 || (temp_byte = dec_bytepos (from_byte),
694 FETCH_CHAR (temp_byte) == '\n')))
695 {
696 defun_start = from;
697 defun_start_byte = from_byte;
698 from = stop; /* Break out of the loop. */
699 }
eb35b628 700 break;
02010917
SM
701
702 default:
f7c436c1 703 break;
eb35b628 704 }
195d1361
RS
705 }
706
707 if (comstart_pos == 0)
708 {
709 from = comment_end;
6a140a74 710 from_byte = comment_end_byte;
195d1361
RS
711 UPDATE_SYNTAX_TABLE_FORWARD (comment_end - 1);
712 }
f7c436c1
SM
713 /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
714 or `done'), then we've found the beginning of the non-nested comment. */
715 else if (1) /* !comnested */
195d1361
RS
716 {
717 from = comstart_pos;
6a140a74 718 from_byte = comstart_byte;
789f3320 719 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
195d1361
RS
720 }
721 else
722 {
3ee5041c
SM
723 struct lisp_parse_state state;
724 lossage:
195d1361
RS
725 /* We had two kinds of string delimiters mixed up
726 together. Decode this going forwards.
02010917 727 Scan fwd from a known safe place (beginning-of-defun)
195d1361
RS
728 to the one in question; this records where we
729 last passed a comment starter. */
eb35b628
RS
730 /* If we did not already find the defun start, find it now. */
731 if (defun_start == 0)
732 {
733 defun_start = find_defun_start (comment_end, comment_end_byte);
734 defun_start_byte = find_start_value_byte;
735 }
02010917 736 do
195d1361 737 {
02010917
SM
738 scan_sexps_forward (&state,
739 defun_start, defun_start_byte,
d311d28c
PE
740 comment_end, TYPE_MINIMUM (EMACS_INT),
741 0, Qnil, 0);
02010917
SM
742 defun_start = comment_end;
743 if (state.incomment == (comnested ? 1 : -1)
744 && state.comstyle == comstyle)
745 from = state.comstr_start;
746 else
747 {
748 from = comment_end;
749 if (state.incomment)
750 /* If comment_end is inside some other comment, maybe ours
751 is nested, so we need to try again from within the
752 surrounding comment. Example: { a (* " *) */
753 {
754 /* FIXME: We should advance by one or two chars. */
755 defun_start = state.comstr_start + 2;
756 defun_start_byte = CHAR_TO_BYTE (defun_start);
757 }
758 }
759 } while (defun_start < comment_end);
760
6a140a74 761 from_byte = CHAR_TO_BYTE (from);
195d1361
RS
762 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
763 }
7d0393cf 764
95ff8dfc 765 done:
6a140a74
RS
766 *charpos_ptr = from;
767 *bytepos_ptr = from_byte;
768
1aa963c8 769 return (from == comment_end) ? -1 : from;
195d1361
RS
770}
771\f
8489eb67 772DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
fdb82f93
PJ
773 doc: /* Return t if OBJECT is a syntax table.
774Currently, any char-table counts as a syntax table. */)
5842a27b 775 (Lisp_Object object)
8489eb67 776{
2203e1e8 777 if (CHAR_TABLE_P (object)
e704cb4b 778 && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
8489eb67
RS
779 return Qt;
780 return Qnil;
781}
782
8ea151b2 783static void
971de7fb 784check_syntax_table (Lisp_Object obj)
8489eb67 785{
47f5f6ae
KS
786 CHECK_TYPE (CHAR_TABLE_P (obj) && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table),
787 Qsyntax_table_p, obj);
7d0393cf 788}
8489eb67 789
8489eb67 790DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
fdb82f93
PJ
791 doc: /* Return the current syntax table.
792This is the one specified by the current buffer. */)
5842a27b 793 (void)
8489eb67 794{
4b4deea2 795 return BVAR (current_buffer, syntax_table);
8489eb67
RS
796}
797
798DEFUN ("standard-syntax-table", Fstandard_syntax_table,
799 Sstandard_syntax_table, 0, 0, 0,
fdb82f93
PJ
800 doc: /* Return the standard syntax table.
801This is the one used for new buffers. */)
5842a27b 802 (void)
8489eb67
RS
803{
804 return Vstandard_syntax_table;
805}
806
807DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
fdb82f93
PJ
808 doc: /* Construct a new syntax table and return it.
809It is a copy of the TABLE, which defaults to the standard syntax table. */)
5842a27b 810 (Lisp_Object table)
8489eb67 811{
8ea151b2
RS
812 Lisp_Object copy;
813
265a9e55 814 if (!NILP (table))
8ea151b2
RS
815 check_syntax_table (table);
816 else
817 table = Vstandard_syntax_table;
818
819 copy = Fcopy_sequence (table);
0f867324
RS
820
821 /* Only the standard syntax table should have a default element.
822 Other syntax tables should inherit from parents instead. */
823 XCHAR_TABLE (copy)->defalt = Qnil;
824
825 /* Copied syntax tables should all have parents.
826 If we copied one with no parent, such as the standard syntax table,
827 use the standard syntax table as the copy's parent. */
828 if (NILP (XCHAR_TABLE (copy)->parent))
829 Fset_char_table_parent (copy, Vstandard_syntax_table);
8ea151b2 830 return copy;
8489eb67
RS
831}
832
833DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
fdb82f93
PJ
834 doc: /* Select a new syntax table for the current buffer.
835One argument, a syntax table. */)
5842a27b 836 (Lisp_Object table)
8489eb67 837{
4e4dfa78 838 int idx;
8ea151b2 839 check_syntax_table (table);
4b4deea2 840 BVAR (current_buffer, syntax_table) = table;
8489eb67 841 /* Indicate that this buffer now has a specified syntax table. */
f6cd0527
GM
842 idx = PER_BUFFER_VAR_IDX (syntax_table);
843 SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
8489eb67
RS
844 return table;
845}
846\f
847/* Convert a letter which signifies a syntax code
848 into the code it signifies.
195d1361 849 This is used by modify-syntax-entry, and other things. */
8489eb67
RS
850
851unsigned char syntax_spec_code[0400] =
852 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
853 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
854 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
855 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
195d1361 856 (char) Swhitespace, (char) Scomment_fence, (char) Sstring, 0377,
8489eb67
RS
857 (char) Smath, 0377, 0377, (char) Squote,
858 (char) Sopen, (char) Sclose, 0377, 0377,
859 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
860 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
861 0377, 0377, 0377, 0377,
862 (char) Scomment, 0377, (char) Sendcomment, 0377,
6cb71bf6 863 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
8489eb67
RS
864 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
865 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
866 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
867 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
868 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
869 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
195d1361 870 0377, 0377, 0377, 0377, (char) Sstring_fence, 0377, 0377, 0377
8489eb67
RS
871 };
872
195d1361 873/* Indexed by syntax code, give the letter that describes it. */
8489eb67 874
195d1361 875char syntax_code_spec[16] =
8489eb67 876 {
195d1361
RS
877 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
878 '!', '|'
8489eb67 879 };
93da5fff
KH
880
881/* Indexed by syntax code, give the object (cons of syntax code and
882 nil) to be stored in syntax table. Since these objects can be
883 shared among syntax tables, we generate them in advance. By
884 sharing objects, the function `describe-syntax' can give a more
885 compact listing. */
886static Lisp_Object Vsyntax_code_object;
887
8489eb67
RS
888\f
889DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
fdb82f93 890 doc: /* Return the syntax code of CHARACTER, described by a character.
e2b6daf4
CY
891For example, if CHARACTER is a word constituent, the
892character `w' (119) is returned.
fdb82f93
PJ
893The characters that correspond to various syntax codes
894are listed in the documentation of `modify-syntax-entry'. */)
5842a27b 895 (Lisp_Object character)
8489eb67 896{
8ea151b2 897 int char_int;
774b9a60 898 CHECK_CHARACTER (character);
2203e1e8 899 char_int = XINT (character);
d48cd3f4 900 SETUP_BUFFER_SYNTAX_TABLE ();
8ea151b2 901 return make_number (syntax_code_spec[(int) SYNTAX (char_int)]);
beefa22e
RS
902}
903
904DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
fdb82f93 905 doc: /* Return the matching parenthesis of CHARACTER, or nil if none. */)
5842a27b 906 (Lisp_Object character)
beefa22e 907{
8ea151b2 908 int char_int, code;
b7826503 909 CHECK_NUMBER (character);
2203e1e8 910 char_int = XINT (character);
d48cd3f4 911 SETUP_BUFFER_SYNTAX_TABLE ();
8ea151b2 912 code = SYNTAX (char_int);
a8bd7cd8 913 if (code == Sopen || code == Sclose)
2e34157c 914 return SYNTAX_MATCH (char_int);
beefa22e 915 return Qnil;
8489eb67
RS
916}
917
c65adb44 918DEFUN ("string-to-syntax", Fstring_to_syntax, Sstring_to_syntax, 1, 1, 0,
fdb82f93
PJ
919 doc: /* Convert a syntax specification STRING into syntax cell form.
920STRING should be a string as it is allowed as argument of
921`modify-syntax-entry'. Value is the equivalent cons cell
bce3dc36 922\(CODE . MATCHING-CHAR) that can be used as value of a `syntax-table'
fdb82f93 923text property. */)
5842a27b 924 (Lisp_Object string)
8489eb67 925{
2e567bd3 926 register const unsigned char *p;
8489eb67 927 register enum syntaxcode code;
247e20a8 928 int val;
8ea151b2 929 Lisp_Object match;
8489eb67 930
b7826503 931 CHECK_STRING (string);
8489eb67 932
d5db4077 933 p = SDATA (string);
8489eb67
RS
934 code = (enum syntaxcode) syntax_spec_code[*p++];
935 if (((int) code & 0377) == 0377)
b764a653 936 error ("Invalid syntax description letter: %c", p[-1]);
8489eb67 937
8ea151b2 938 if (code == Sinherit)
c65adb44 939 return Qnil;
8ea151b2
RS
940
941 if (*p)
d1be9f0f 942 {
93da5fff 943 int len;
62a6e103 944 int character = STRING_CHAR_AND_LENGTH (p, len);
93da5fff 945 XSETINT (match, character);
d1be9f0f
RS
946 if (XFASTINT (match) == ' ')
947 match = Qnil;
93da5fff 948 p += len;
d1be9f0f
RS
949 }
950 else
8ea151b2 951 match = Qnil;
8489eb67 952
8ea151b2 953 val = (int) code;
8489eb67
RS
954 while (*p)
955 switch (*p++)
956 {
957 case '1':
247e20a8 958 val |= 1 << 16;
8489eb67
RS
959 break;
960
961 case '2':
247e20a8 962 val |= 1 << 17;
8489eb67
RS
963 break;
964
965 case '3':
247e20a8 966 val |= 1 << 18;
8489eb67
RS
967 break;
968
969 case '4':
247e20a8 970 val |= 1 << 19;
8489eb67
RS
971 break;
972
973 case 'p':
247e20a8 974 val |= 1 << 20;
8489eb67 975 break;
e5d4f4dc
RS
976
977 case 'b':
247e20a8 978 val |= 1 << 21;
e5d4f4dc 979 break;
95ff8dfc
RS
980
981 case 'n':
982 val |= 1 << 22;
983 break;
c5683ceb
SM
984
985 case 'c':
986 val |= 1 << 23;
987 break;
8489eb67 988 }
7d0393cf 989
77b37c05 990 if (val < ASIZE (Vsyntax_code_object) && NILP (match))
c65adb44 991 return XVECTOR (Vsyntax_code_object)->contents[val];
93da5fff
KH
992 else
993 /* Since we can't use a shared object, let's make a new one. */
c65adb44
SM
994 return Fcons (make_number (val), match);
995}
996
fdb82f93 997/* I really don't know why this is interactive
2c9e1900 998 help-form should at least be made useful whilst reading the second arg. */
7d0393cf 999DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
c65adb44 1000 "cSet syntax for character: \nsSet syntax for %s to: ",
a50a10a0 1001 doc: /* Set syntax for character CHAR according to string NEWENTRY.
d7ee9fab 1002The syntax is changed only for table SYNTAX-TABLE, which defaults to
fdb82f93 1003 the current buffer's syntax table.
5c7b02ab 1004CHAR may be a cons (MIN . MAX), in which case, syntaxes of all characters
fedc6ab5 1005in the range MIN to MAX are changed.
fdb82f93
PJ
1006The first character of NEWENTRY should be one of the following:
1007 Space or - whitespace syntax. w word constituent.
1008 _ symbol constituent. . punctuation.
1009 ( open-parenthesis. ) close-parenthesis.
1010 " string quote. \\ escape.
1011 $ paired delimiter. ' expression quote or prefix operator.
1012 < comment starter. > comment ender.
1013 / character-quote. @ inherit from `standard-syntax-table'.
1014 | generic string fence. ! generic comment fence.
1015
1016Only single-character comment start and end sequences are represented thus.
1017Two-character sequences are represented as described below.
1018The second character of NEWENTRY is the matching parenthesis,
1019 used only if the first character is `(' or `)'.
1020Any additional characters are flags.
1021Defined flags are the characters 1, 2, 3, 4, b, p, and n.
a50a10a0
PJ
1022 1 means CHAR is the start of a two-char comment start sequence.
1023 2 means CHAR is the second character of such a sequence.
1024 3 means CHAR is the start of a two-char comment end sequence.
1025 4 means CHAR is the second character of such a sequence.
fdb82f93 1026
c5683ceb 1027There can be several orthogonal comment sequences. This is to support
fdb82f93
PJ
1028language modes such as C++. By default, all comment sequences are of style
1029a, but you can set the comment sequence style to b (on the second character
c5683ceb
SM
1030of a comment-start, and the first character of a comment-end sequence) and/or
1031c (on any of its chars) using this flag:
a50a10a0 1032 b means CHAR is part of comment sequence b.
c5683ceb 1033 c means CHAR is part of comment sequence c.
a50a10a0 1034 n means CHAR is part of a nestable comment sequence.
fdb82f93 1035
a50a10a0 1036 p means CHAR is a prefix character for `backward-prefix-chars';
fdb82f93 1037 such characters are treated as whitespace when they occur
a50a10a0 1038 between expressions.
fedc6ab5 1039usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE) */)
5842a27b 1040 (Lisp_Object c, Lisp_Object newentry, Lisp_Object syntax_table)
c65adb44 1041{
5c7b02ab
KH
1042 if (CONSP (c))
1043 {
8f924df7
KH
1044 CHECK_CHARACTER_CAR (c);
1045 CHECK_CHARACTER_CDR (c);
5c7b02ab
KH
1046 }
1047 else
1048 CHECK_CHARACTER (c);
c65adb44
SM
1049
1050 if (NILP (syntax_table))
4b4deea2 1051 syntax_table = BVAR (current_buffer, syntax_table);
c65adb44
SM
1052 else
1053 check_syntax_table (syntax_table);
8489eb67 1054
5c7b02ab
KH
1055 newentry = Fstring_to_syntax (newentry);
1056 if (CONSP (c))
1057 SET_RAW_SYNTAX_ENTRY_RANGE (syntax_table, c, newentry);
1058 else
1059 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), newentry);
e5b94d44
CY
1060
1061 /* We clear the regexp cache, since character classes can now have
1062 different values from those in the compiled regexps.*/
1063 clear_regexp_cache ();
1064
8489eb67
RS
1065 return Qnil;
1066}
1067\f
1068/* Dump syntax table to buffer in human-readable format */
1069
62abe9cb
SM
1070DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value,
1071 Sinternal_describe_syntax_value, 1, 1, 0,
1072 doc: /* Insert a description of the internal syntax description SYNTAX at point. */)
5842a27b 1073 (Lisp_Object syntax)
8489eb67
RS
1074{
1075 register enum syntaxcode code;
c5683ceb
SM
1076 int syntax_code;
1077 char desc, start1, start2, end1, end2, prefix,
1078 comstyleb, comstylec, comnested;
8489eb67 1079 char str[2];
62abe9cb 1080 Lisp_Object first, match_lisp, value = syntax;
8489eb67 1081
8ea151b2
RS
1082 if (NILP (value))
1083 {
62abe9cb
SM
1084 insert_string ("default");
1085 return syntax;
8ea151b2
RS
1086 }
1087
908b7fea
KH
1088 if (CHAR_TABLE_P (value))
1089 {
62abe9cb
SM
1090 insert_string ("deeper char-table ...");
1091 return syntax;
908b7fea
KH
1092 }
1093
8ea151b2
RS
1094 if (!CONSP (value))
1095 {
62abe9cb
SM
1096 insert_string ("invalid");
1097 return syntax;
8ea151b2
RS
1098 }
1099
c1d497be
KR
1100 first = XCAR (value);
1101 match_lisp = XCDR (value);
8ea151b2 1102
d311d28c 1103 if (!INTEGERP (first) || !(NILP (match_lisp) || CHARACTERP (match_lisp)))
8489eb67 1104 {
62abe9cb
SM
1105 insert_string ("invalid");
1106 return syntax;
8489eb67
RS
1107 }
1108
d311d28c 1109 syntax_code = XINT (first) & INT_MAX;
c5683ceb
SM
1110 code = (enum syntaxcode) (syntax_code & 0377);
1111 start1 = SYNTAX_FLAGS_COMSTART_FIRST (syntax_code);
1112 start2 = SYNTAX_FLAGS_COMSTART_SECOND (syntax_code);;
1113 end1 = SYNTAX_FLAGS_COMEND_FIRST (syntax_code);
1114 end2 = SYNTAX_FLAGS_COMEND_SECOND (syntax_code);
1115 prefix = SYNTAX_FLAGS_PREFIX (syntax_code);
1116 comstyleb = SYNTAX_FLAGS_COMMENT_STYLEB (syntax_code);
1117 comstylec = SYNTAX_FLAGS_COMMENT_STYLEC (syntax_code);
1118 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax_code);
8489eb67
RS
1119
1120 if ((int) code < 0 || (int) code >= (int) Smax)
1121 {
1122 insert_string ("invalid");
62abe9cb 1123 return syntax;
8489eb67
RS
1124 }
1125 desc = syntax_code_spec[(int) code];
1126
1127 str[0] = desc, str[1] = 0;
1128 insert (str, 1);
1129
93da5fff
KH
1130 if (NILP (match_lisp))
1131 insert (" ", 1);
1132 else
1133 insert_char (XINT (match_lisp));
8489eb67 1134
8489eb67
RS
1135 if (start1)
1136 insert ("1", 1);
1137 if (start2)
1138 insert ("2", 1);
1139
1140 if (end1)
1141 insert ("3", 1);
1142 if (end2)
1143 insert ("4", 1);
1144
1145 if (prefix)
1146 insert ("p", 1);
c5683ceb 1147 if (comstyleb)
e5d4f4dc 1148 insert ("b", 1);
c5683ceb
SM
1149 if (comstylec)
1150 insert ("c", 1);
e6365855
SM
1151 if (comnested)
1152 insert ("n", 1);
8489eb67
RS
1153
1154 insert_string ("\twhich means: ");
1155
0220c518 1156 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
1157 {
1158 case Swhitespace:
1159 insert_string ("whitespace"); break;
1160 case Spunct:
1161 insert_string ("punctuation"); break;
1162 case Sword:
1163 insert_string ("word"); break;
1164 case Ssymbol:
1165 insert_string ("symbol"); break;
1166 case Sopen:
1167 insert_string ("open"); break;
1168 case Sclose:
1169 insert_string ("close"); break;
1170 case Squote:
d671382e 1171 insert_string ("prefix"); break;
8489eb67
RS
1172 case Sstring:
1173 insert_string ("string"); break;
1174 case Smath:
1175 insert_string ("math"); break;
1176 case Sescape:
1177 insert_string ("escape"); break;
1178 case Scharquote:
1179 insert_string ("charquote"); break;
1180 case Scomment:
1181 insert_string ("comment"); break;
1182 case Sendcomment:
1183 insert_string ("endcomment"); break;
d671382e
SM
1184 case Sinherit:
1185 insert_string ("inherit"); break;
1186 case Scomment_fence:
1187 insert_string ("comment fence"); break;
1188 case Sstring_fence:
1189 insert_string ("string fence"); break;
8489eb67
RS
1190 default:
1191 insert_string ("invalid");
62abe9cb 1192 return syntax;
8489eb67
RS
1193 }
1194
8ea151b2 1195 if (!NILP (match_lisp))
8489eb67
RS
1196 {
1197 insert_string (", matches ");
8ea151b2 1198 insert_char (XINT (match_lisp));
8489eb67
RS
1199 }
1200
1201 if (start1)
1202 insert_string (",\n\t is the first character of a comment-start sequence");
1203 if (start2)
1204 insert_string (",\n\t is the second character of a comment-start sequence");
1205
1206 if (end1)
1207 insert_string (",\n\t is the first character of a comment-end sequence");
1208 if (end2)
1209 insert_string (",\n\t is the second character of a comment-end sequence");
c5683ceb 1210 if (comstyleb)
e5d4f4dc 1211 insert_string (" (comment style b)");
c5683ceb
SM
1212 if (comstylec)
1213 insert_string (" (comment style c)");
e6365855
SM
1214 if (comnested)
1215 insert_string (" (nestable)");
e5d4f4dc 1216
8489eb67
RS
1217 if (prefix)
1218 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
1219
62abe9cb
SM
1220 return syntax;
1221}
8489eb67
RS
1222\f
1223/* Return the position across COUNT words from FROM.
1224 If that many words cannot be found before the end of the buffer, return 0.
1225 COUNT negative means scan backward and stop at word beginning. */
1226
d311d28c
PE
1227ptrdiff_t
1228scan_words (register ptrdiff_t from, register EMACS_INT count)
8489eb67 1229{
d311d28c
PE
1230 register ptrdiff_t beg = BEGV;
1231 register ptrdiff_t end = ZV;
1232 register ptrdiff_t from_byte = CHAR_TO_BYTE (from);
93da5fff
KH
1233 register enum syntaxcode code;
1234 int ch0, ch1;
fca8fe46 1235 Lisp_Object func, pos;
8489eb67
RS
1236
1237 immediate_quit = 1;
1238 QUIT;
1239
195d1361
RS
1240 SETUP_SYNTAX_TABLE (from, count);
1241
8489eb67
RS
1242 while (count > 0)
1243 {
1244 while (1)
1245 {
1246 if (from == end)
1247 {
1248 immediate_quit = 0;
1249 return 0;
1250 }
195d1361 1251 UPDATE_SYNTAX_TABLE_FORWARD (from);
b7dbcc19 1252 ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
93da5fff 1253 code = SYNTAX (ch0);
6a140a74 1254 INC_BOTH (from, from_byte);
8489eb67
RS
1255 if (words_include_escapes
1256 && (code == Sescape || code == Scharquote))
1257 break;
1258 if (code == Sword)
1259 break;
8489eb67 1260 }
93da5fff
KH
1261 /* Now CH0 is a character which begins a word and FROM is the
1262 position of the next character. */
8f924df7 1263 func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch0);
869bb237 1264 if (! NILP (Ffboundp (func)))
8489eb67 1265 {
869bb237 1266 pos = call2 (func, make_number (from - 1), make_number (end));
17f85215 1267 if (INTEGERP (pos) && from < XINT (pos) && XINT (pos) <= ZV)
8f924df7
KH
1268 {
1269 from = XINT (pos);
1270 from_byte = CHAR_TO_BYTE (from);
1271 }
8489eb67 1272 }
869bb237 1273 else
8cb8232a 1274 {
8cb8232a
KH
1275 while (1)
1276 {
1277 if (from == end) break;
1278 UPDATE_SYNTAX_TABLE_FORWARD (from);
b7dbcc19 1279 ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
8cb8232a
KH
1280 code = SYNTAX (ch1);
1281 if ((code != Sword
1282 && (! words_include_escapes
1283 || (code != Sescape && code != Scharquote)))
2b70a11c 1284 || word_boundary_p (ch0, ch1))
869bb237 1285 break;
8cb8232a
KH
1286 INC_BOTH (from, from_byte);
1287 ch0 = ch1;
1288 }
8489eb67
RS
1289 }
1290 count--;
1291 }
1292 while (count < 0)
1293 {
1294 while (1)
1295 {
1296 if (from == beg)
1297 {
1298 immediate_quit = 0;
1299 return 0;
1300 }
6a140a74 1301 DEC_BOTH (from, from_byte);
195d1361 1302 UPDATE_SYNTAX_TABLE_BACKWARD (from);
b7dbcc19 1303 ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
93da5fff 1304 code = SYNTAX (ch1);
8489eb67
RS
1305 if (words_include_escapes
1306 && (code == Sescape || code == Scharquote))
1307 break;
1308 if (code == Sword)
1309 break;
8489eb67 1310 }
93da5fff
KH
1311 /* Now CH1 is a character which ends a word and FROM is the
1312 position of it. */
8f924df7 1313 func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch1);
869bb237 1314 if (! NILP (Ffboundp (func)))
8f924df7 1315 {
869bb237 1316 pos = call2 (func, make_number (from), make_number (beg));
17f85215 1317 if (INTEGERP (pos) && BEGV <= XINT (pos) && XINT (pos) < from)
8f924df7
KH
1318 {
1319 from = XINT (pos);
1320 from_byte = CHAR_TO_BYTE (from);
1321 }
8489eb67 1322 }
869bb237 1323 else
8489eb67 1324 {
8cb8232a
KH
1325 while (1)
1326 {
8cb8232a
KH
1327 if (from == beg)
1328 break;
262be72a 1329 DEC_BOTH (from, from_byte);
8cb8232a 1330 UPDATE_SYNTAX_TABLE_BACKWARD (from);
262be72a 1331 ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
8cb8232a
KH
1332 code = SYNTAX (ch0);
1333 if ((code != Sword
1334 && (! words_include_escapes
1335 || (code != Sescape && code != Scharquote)))
2b70a11c 1336 || word_boundary_p (ch0, ch1))
262be72a
MB
1337 {
1338 INC_BOTH (from, from_byte);
1339 break;
1340 }
8cb8232a
KH
1341 ch1 = ch0;
1342 }
8489eb67
RS
1343 }
1344 count++;
1345 }
1346
1347 immediate_quit = 0;
1348
1349 return from;
1350}
1351
a7ca3326 1352DEFUN ("forward-word", Fforward_word, Sforward_word, 0, 1, "^p",
fdb82f93
PJ
1353 doc: /* Move point forward ARG words (backward if ARG is negative).
1354Normally returns t.
1355If an edge of the buffer or a field boundary is reached, point is left there
1356and the function returns nil. Field boundaries are not noticed if
1357`inhibit-field-text-motion' is non-nil. */)
5842a27b 1358 (Lisp_Object arg)
8489eb67 1359{
c096ae4d 1360 Lisp_Object tmp;
d311d28c 1361 ptrdiff_t orig_val, val;
8489eb67 1362
839966f3
KH
1363 if (NILP (arg))
1364 XSETFASTINT (arg, 1);
1365 else
1366 CHECK_NUMBER (arg);
1367
1368 val = orig_val = scan_words (PT, XINT (arg));
2c6ea900 1369 if (! orig_val)
839966f3 1370 val = XINT (arg) > 0 ? ZV : BEGV;
5878ee6f 1371
b8855607 1372 /* Avoid jumping out of an input field. */
c096ae4d
SM
1373 tmp = Fconstrain_to_field (make_number (val), make_number (PT),
1374 Qt, Qnil, Qnil);
1375 val = XFASTINT (tmp);
7d0393cf 1376
8489eb67 1377 SET_PT (val);
e6d8341f 1378 return val == orig_val ? Qt : Qnil;
8489eb67
RS
1379}
1380\f
a7ca3326 1381DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
fdb82f93
PJ
1382 doc: /* Move point forward, stopping before a char not in STRING, or at pos LIM.
1383STRING is like the inside of a `[...]' in a regular expression
1384except that `]' is never special and `\\' quotes `^', `-' or `\\'
7087d5e9 1385 (but not at the end of a range; quoting is never needed there).
fdb82f93
PJ
1386Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1387With arg "^a-zA-Z", skips nonletters stopping before first letter.
a1bc88d4
RS
1388Char classes, e.g. `[:alpha:]', are supported.
1389
1390Returns the distance traveled, either zero or positive. */)
5842a27b 1391 (Lisp_Object string, Lisp_Object lim)
195d1361 1392{
327719ee 1393 return skip_chars (1, string, lim, 1);
195d1361
RS
1394}
1395
a7ca3326 1396DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
fdb82f93
PJ
1397 doc: /* Move point backward, stopping after a char not in STRING, or at pos LIM.
1398See `skip-chars-forward' for details.
1399Returns the distance traveled, either zero or negative. */)
5842a27b 1400 (Lisp_Object string, Lisp_Object lim)
195d1361 1401{
327719ee 1402 return skip_chars (0, string, lim, 1);
195d1361
RS
1403}
1404
1405DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
fdb82f93
PJ
1406 doc: /* Move point forward across chars in specified syntax classes.
1407SYNTAX is a string of syntax code characters.
1408Stop before a char whose syntax is not in SYNTAX, or at position LIM.
1409If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1410This function returns the distance traveled, either zero or positive. */)
5842a27b 1411 (Lisp_Object syntax, Lisp_Object lim)
195d1361 1412{
b7dbcc19 1413 return skip_syntaxes (1, syntax, lim);
195d1361
RS
1414}
1415
1416DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
fdb82f93
PJ
1417 doc: /* Move point backward across chars in specified syntax classes.
1418SYNTAX is a string of syntax code characters.
1419Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.
1420If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1421This function returns the distance traveled, either zero or negative. */)
5842a27b 1422 (Lisp_Object syntax, Lisp_Object lim)
195d1361 1423{
b7dbcc19 1424 return skip_syntaxes (0, syntax, lim);
195d1361
RS
1425}
1426
6a140a74 1427static Lisp_Object
971de7fb 1428skip_chars (int forwardp, Lisp_Object string, Lisp_Object lim, int handle_iso_classes)
195d1361 1429{
195d1361
RS
1430 register unsigned int c;
1431 unsigned char fastmap[0400];
b7dbcc19 1432 /* Store the ranges of non-ASCII characters. */
4f63c6bb 1433 int *char_ranges IF_LINT (= NULL);
7e68b0ea 1434 int n_char_ranges = 0;
195d1361 1435 int negate = 0;
d311d28c 1436 register ptrdiff_t i, i_byte;
b7dbcc19
KH
1437 /* Set to 1 if the current buffer is multibyte and the region
1438 contains non-ASCII chars. */
1439 int multibyte;
1440 /* Set to 1 if STRING is multibyte and it contains non-ASCII
1441 chars. */
1674d9a2 1442 int string_multibyte;
d311d28c 1443 ptrdiff_t size_byte;
2e567bd3 1444 const unsigned char *str;
82d497fc 1445 int len;
a1bc88d4 1446 Lisp_Object iso_classes;
195d1361 1447
b7826503 1448 CHECK_STRING (string);
a1bc88d4 1449 iso_classes = Qnil;
82d497fc 1450
195d1361
RS
1451 if (NILP (lim))
1452 XSETINT (lim, forwardp ? ZV : BEGV);
1453 else
b7826503 1454 CHECK_NUMBER_COERCE_MARKER (lim);
195d1361
RS
1455
1456 /* In any case, don't allow scan outside bounds of buffer. */
195d1361
RS
1457 if (XINT (lim) > ZV)
1458 XSETFASTINT (lim, ZV);
1459 if (XINT (lim) < BEGV)
1460 XSETFASTINT (lim, BEGV);
1461
4b4deea2 1462 multibyte = (!NILP (BVAR (current_buffer, enable_multibyte_characters))
92eaa22e 1463 && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
8f924df7 1464 string_multibyte = SBYTES (string) > SCHARS (string);
b7dbcc19 1465
72af86bd 1466 memset (fastmap, 0, sizeof fastmap);
195d1361 1467
8f924df7
KH
1468 str = SDATA (string);
1469 size_byte = SBYTES (string);
4101e6fe 1470
82d497fc 1471 i_byte = 0;
13090112 1472 if (i_byte < size_byte
d5db4077 1473 && SREF (string, 0) == '^')
195d1361 1474 {
82d497fc 1475 negate = 1; i_byte++;
195d1361
RS
1476 }
1477
1478 /* Find the characters specified and set their elements of fastmap.
b7dbcc19 1479 Handle backslashes and ranges specially.
195d1361 1480
b7dbcc19
KH
1481 If STRING contains non-ASCII characters, setup char_ranges for
1482 them and use fastmap only for their leading codes. */
4101e6fe 1483
b7dbcc19 1484 if (! string_multibyte)
195d1361 1485 {
b7dbcc19 1486 int string_has_eight_bit = 0;
4101e6fe 1487
b7dbcc19
KH
1488 /* At first setup fastmap. */
1489 while (i_byte < size_byte)
1490 {
1491 c = str[i_byte++];
1492
a1bc88d4
RS
1493 if (handle_iso_classes && c == '['
1494 && i_byte < size_byte
327719ee 1495 && str[i_byte] == ':')
a1bc88d4
RS
1496 {
1497 const unsigned char *class_beg = str + i_byte + 1;
1498 const unsigned char *class_end = class_beg;
b3bda4fd 1499 const unsigned char *class_limit = str + size_byte - 2;
db9cd97a 1500 /* Leave room for the null. */
a1bc88d4
RS
1501 unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1502 re_wctype_t cc;
1503
1504 if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1505 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1506
b3bda4fd
KS
1507 while (class_end < class_limit
1508 && *class_end >= 'a' && *class_end <= 'z')
a1bc88d4
RS
1509 class_end++;
1510
b3bda4fd
KS
1511 if (class_end == class_beg
1512 || *class_end != ':' || class_end[1] != ']')
1513 goto not_a_class_name;
a1bc88d4 1514
72af86bd 1515 memcpy (class_name, class_beg, class_end - class_beg);
a1bc88d4
RS
1516 class_name[class_end - class_beg] = 0;
1517
1518 cc = re_wctype (class_name);
1519 if (cc == 0)
1520 error ("Invalid ISO C character class");
1521
1522 iso_classes = Fcons (make_number (cc), iso_classes);
1523
1524 i_byte = class_end + 2 - str;
1525 continue;
1526 }
1527
b3bda4fd 1528 not_a_class_name:
b7dbcc19
KH
1529 if (c == '\\')
1530 {
13090112 1531 if (i_byte == size_byte)
4101e6fe
RS
1532 break;
1533
5c7b02ab 1534 c = str[i_byte++];
195d1361 1535 }
839966f3
KH
1536 /* Treat `-' as range character only if another character
1537 follows. */
1538 if (i_byte + 1 < size_byte
82d497fc 1539 && str[i_byte] == '-')
195d1361 1540 {
9690d026 1541 unsigned int c2;
4101e6fe
RS
1542
1543 /* Skip over the dash. */
82d497fc 1544 i_byte++;
4101e6fe 1545
4101e6fe 1546 /* Get the end of the range. */
5c7b02ab 1547 c2 = str[i_byte++];
b7dbcc19
KH
1548 if (c2 == '\\'
1549 && i_byte < size_byte)
1550 c2 = str[i_byte++];
1551
8f924df7
KH
1552 if (c <= c2)
1553 {
1e69125e
PE
1554 unsigned lim2 = c2 + 1;
1555 while (c < lim2)
8f924df7
KH
1556 fastmap[c++] = 1;
1557 if (! ASCII_CHAR_P (c2))
1558 string_has_eight_bit = 1;
1559 }
195d1361
RS
1560 }
1561 else
b7dbcc19
KH
1562 {
1563 fastmap[c] = 1;
1564 if (! ASCII_CHAR_P (c))
1565 string_has_eight_bit = 1;
1566 }
1567 }
1568
1569 /* If the current range is multibyte and STRING contains
1570 eight-bit chars, arrange fastmap and setup char_ranges for
1571 the corresponding multibyte chars. */
1572 if (multibyte && string_has_eight_bit)
1573 {
1574 unsigned char fastmap2[0400];
1575 int range_start_byte, range_start_char;
1576
72af86bd
AS
1577 memcpy (fastmap + 0200, fastmap2 + 0200, 0200);
1578 memset (fastmap + 0200, 0, 0200);
b7dbcc19
KH
1579 /* We are sure that this loop stops. */
1580 for (i = 0200; ! fastmap2[i]; i++);
4c0354d7 1581 c = BYTE8_TO_CHAR (i);
b7dbcc19
KH
1582 fastmap[CHAR_LEADING_CODE (c)] = 1;
1583 range_start_byte = i;
1584 range_start_char = c;
f003ca54 1585 char_ranges = (int *) alloca (sizeof (int) * 128 * 2);
b7dbcc19
KH
1586 for (i = 129; i < 0400; i++)
1587 {
4c0354d7 1588 c = BYTE8_TO_CHAR (i);
b7dbcc19
KH
1589 fastmap[CHAR_LEADING_CODE (c)] = 1;
1590 if (i - range_start_byte != c - range_start_char)
1591 {
1592 char_ranges[n_char_ranges++] = range_start_char;
1593 char_ranges[n_char_ranges++] = ((i - 1 - range_start_byte)
1594 + range_start_char);
1595 range_start_byte = i;
1596 range_start_char = c;
8f924df7 1597 }
b7dbcc19
KH
1598 }
1599 char_ranges[n_char_ranges++] = range_start_char;
1600 char_ranges[n_char_ranges++] = ((i - 1 - range_start_byte)
1601 + range_start_char);
195d1361
RS
1602 }
1603 }
f003ca54 1604 else /* STRING is multibyte */
b7dbcc19 1605 {
f003ca54
KH
1606 char_ranges = (int *) alloca (sizeof (int) * SCHARS (string) * 2);
1607
b7dbcc19 1608 while (i_byte < size_byte)
195d1361 1609 {
b7dbcc19 1610 unsigned char leading_code;
5c7b02ab 1611
b7dbcc19 1612 leading_code = str[i_byte];
62a6e103 1613 c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
b7dbcc19 1614 i_byte += len;
5c7b02ab 1615
327719ee
MB
1616 if (handle_iso_classes && c == '['
1617 && i_byte < size_byte
62a6e103 1618 && STRING_CHAR (str + i_byte) == ':')
327719ee
MB
1619 {
1620 const unsigned char *class_beg = str + i_byte + 1;
1621 const unsigned char *class_end = class_beg;
1622 const unsigned char *class_limit = str + size_byte - 2;
1623 /* Leave room for the null. */
1624 unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1625 re_wctype_t cc;
1626
1627 if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1628 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1629
1630 while (class_end < class_limit
1631 && *class_end >= 'a' && *class_end <= 'z')
1632 class_end++;
1633
1634 if (class_end == class_beg
1635 || *class_end != ':' || class_end[1] != ']')
1636 goto not_a_class_name_multibyte;
1637
72af86bd 1638 memcpy (class_name, class_beg, class_end - class_beg);
327719ee
MB
1639 class_name[class_end - class_beg] = 0;
1640
1641 cc = re_wctype (class_name);
1642 if (cc == 0)
1643 error ("Invalid ISO C character class");
1644
1645 iso_classes = Fcons (make_number (cc), iso_classes);
1646
1647 i_byte = class_end + 2 - str;
1648 continue;
1649 }
1650
1651 not_a_class_name_multibyte:
195d1361
RS
1652 if (c == '\\')
1653 {
13090112 1654 if (i_byte == size_byte)
4101e6fe
RS
1655 break;
1656
b7dbcc19 1657 leading_code = str[i_byte];
62a6e103 1658 c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
82d497fc 1659 i_byte += len;
195d1361 1660 }
839966f3
KH
1661 /* Treat `-' as range character only if another character
1662 follows. */
1663 if (i_byte + 1 < size_byte
82d497fc 1664 && str[i_byte] == '-')
195d1361 1665 {
9690d026 1666 unsigned int c2;
b7dbcc19 1667 unsigned char leading_code2;
4101e6fe
RS
1668
1669 /* Skip over the dash. */
82d497fc 1670 i_byte++;
4101e6fe 1671
4101e6fe 1672 /* Get the end of the range. */
b7dbcc19 1673 leading_code2 = str[i_byte];
62a6e103 1674 c2 = STRING_CHAR_AND_LENGTH (str + i_byte, len);
82d497fc 1675 i_byte += len;
7e68b0ea 1676
b7dbcc19
KH
1677 if (c2 == '\\'
1678 && i_byte < size_byte)
1679 {
1680 leading_code2 = str[i_byte];
62a6e103 1681 c2 =STRING_CHAR_AND_LENGTH (str + i_byte, len);
b7dbcc19
KH
1682 i_byte += len;
1683 }
1684
f003ca54
KH
1685 if (c > c2)
1686 continue;
b7dbcc19 1687 if (ASCII_CHAR_P (c))
06274af5 1688 {
5c7b02ab
KH
1689 while (c <= c2 && c < 0x80)
1690 fastmap[c++] = 1;
b7dbcc19
KH
1691 leading_code = CHAR_LEADING_CODE (c);
1692 }
1693 if (! ASCII_CHAR_P (c))
1694 {
1e69125e
PE
1695 unsigned lim2 = leading_code2 + 1;
1696 while (leading_code < lim2)
b7dbcc19
KH
1697 fastmap[leading_code++] = 1;
1698 if (c <= c2)
e39091b6 1699 {
b7dbcc19 1700 char_ranges[n_char_ranges++] = c;
e39091b6 1701 char_ranges[n_char_ranges++] = c2;
06274af5
KH
1702 }
1703 }
195d1361
RS
1704 }
1705 else
7e68b0ea 1706 {
b7dbcc19 1707 if (ASCII_CHAR_P (c))
e39091b6
KH
1708 fastmap[c] = 1;
1709 else
7e68b0ea 1710 {
b7dbcc19 1711 fastmap[leading_code] = 1;
4101e6fe
RS
1712 char_ranges[n_char_ranges++] = c;
1713 char_ranges[n_char_ranges++] = c;
7e68b0ea
RS
1714 }
1715 }
195d1361 1716 }
b7dbcc19
KH
1717
1718 /* If the current range is unibyte and STRING contains non-ASCII
1719 chars, arrange fastmap for the corresponding unibyte
1720 chars. */
1721
1722 if (! multibyte && n_char_ranges > 0)
1723 {
72af86bd 1724 memset (fastmap + 0200, 0, 0200);
b7dbcc19
KH
1725 for (i = 0; i < n_char_ranges; i += 2)
1726 {
1727 int c1 = char_ranges[i];
1e69125e 1728 unsigned lim2 = char_ranges[i + 1] + 1;
b7dbcc19 1729
1e69125e 1730 for (; c1 < lim2; c1++)
2afc21f5
SM
1731 {
1732 int b = CHAR_TO_BYTE_SAFE (c1);
1733 if (b >= 0)
1734 fastmap[b] = 1;
1735 }
b7dbcc19
KH
1736 }
1737 }
195d1361
RS
1738 }
1739
9690d026 1740 /* If ^ was the first character, complement the fastmap. */
195d1361 1741 if (negate)
b7dbcc19
KH
1742 {
1743 if (! multibyte)
1744 for (i = 0; i < sizeof fastmap; i++)
1745 fastmap[i] ^= 1;
1746 else
1747 {
1748 for (i = 0; i < 0200; i++)
1749 fastmap[i] ^= 1;
1750 /* All non-ASCII chars possibly match. */
1751 for (; i < sizeof fastmap; i++)
1752 fastmap[i] = 1;
1753 }
1754 }
195d1361
RS
1755
1756 {
d311d28c
PE
1757 ptrdiff_t start_point = PT;
1758 ptrdiff_t pos = PT;
1759 ptrdiff_t pos_byte = PT_BYTE;
9af7511a
KH
1760 unsigned char *p = PT_ADDR, *endp, *stop;
1761
1762 if (forwardp)
1763 {
1764 endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
d7ee9fab 1765 stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
9af7511a
KH
1766 }
1767 else
1768 {
1769 endp = CHAR_POS_ADDR (XINT (lim));
d7ee9fab 1770 stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
9af7511a 1771 }
195d1361
RS
1772
1773 immediate_quit = 1;
618db430
SM
1774 /* This code may look up syntax tables using macros that rely on the
1775 gl_state object. To make sure this object is not out of date,
1776 let's initialize it manually.
1777 We ignore syntax-table text-properties for now, since that's
1778 what we've done in the past. */
d48cd3f4 1779 SETUP_BUFFER_SYNTAX_TABLE ();
b7dbcc19 1780 if (forwardp)
195d1361 1781 {
b7dbcc19 1782 if (multibyte)
8f924df7 1783 while (1)
b7dbcc19 1784 {
8f924df7 1785 int nbytes;
9af7511a 1786
8f924df7 1787 if (p >= stop)
9af7511a 1788 {
8f924df7 1789 if (p >= endp)
9af7511a 1790 break;
8f924df7
KH
1791 p = GAP_END_ADDR;
1792 stop = endp;
9af7511a 1793 }
62a6e103 1794 c = STRING_CHAR_AND_LENGTH (p, nbytes);
327719ee 1795 if (! NILP (iso_classes) && in_classes (c, iso_classes))
9af7511a 1796 {
327719ee 1797 if (negate)
9af7511a 1798 break;
327719ee
MB
1799 else
1800 goto fwd_ok;
9af7511a 1801 }
9af7511a 1802
8f924df7 1803 if (! fastmap[*p])
b7dbcc19
KH
1804 break;
1805 if (! ASCII_CHAR_P (c))
9af7511a 1806 {
b7dbcc19
KH
1807 /* As we are looking at a multibyte character, we
1808 must look up the character in the table
1809 CHAR_RANGES. If there's no data in the table,
1810 that character is not what we want to skip. */
1811
1812 /* The following code do the right thing even if
1813 n_char_ranges is zero (i.e. no data in
1814 CHAR_RANGES). */
1815 for (i = 0; i < n_char_ranges; i += 2)
1816 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
9af7511a 1817 break;
b7dbcc19
KH
1818 if (!(negate ^ (i < n_char_ranges)))
1819 break;
9af7511a 1820 }
327719ee 1821 fwd_ok:
8f924df7 1822 p += nbytes, pos++, pos_byte += nbytes;
b7dbcc19
KH
1823 }
1824 else
8f924df7
KH
1825 while (1)
1826 {
1827 if (p >= stop)
9af7511a 1828 {
8f924df7 1829 if (p >= endp)
9af7511a 1830 break;
8f924df7
KH
1831 p = GAP_END_ADDR;
1832 stop = endp;
9af7511a 1833 }
327719ee
MB
1834
1835 if (!NILP (iso_classes) && in_classes (*p, iso_classes))
1836 {
1837 if (negate)
1838 break;
1839 else
1840 goto fwd_unibyte_ok;
1841 }
1842
8f924df7
KH
1843 if (!fastmap[*p])
1844 break;
327719ee 1845 fwd_unibyte_ok:
8f924df7
KH
1846 p++, pos++, pos_byte++;
1847 }
195d1361
RS
1848 }
1849 else
1850 {
b7dbcc19 1851 if (multibyte)
8f924df7 1852 while (1)
b7dbcc19 1853 {
8f924df7 1854 unsigned char *prev_p;
9af7511a 1855
8f924df7 1856 if (p <= stop)
9af7511a 1857 {
8f924df7 1858 if (p <= endp)
9af7511a 1859 break;
8f924df7
KH
1860 p = GPT_ADDR;
1861 stop = endp;
9af7511a 1862 }
8f924df7
KH
1863 prev_p = p;
1864 while (--p >= stop && ! CHAR_HEAD_P (*p));
62a6e103 1865 c = STRING_CHAR (p);
a1bc88d4 1866
327719ee
MB
1867 if (! NILP (iso_classes) && in_classes (c, iso_classes))
1868 {
1869 if (negate)
1870 break;
9690d026 1871 else
327719ee 1872 goto back_ok;
7e68b0ea 1873 }
a1bc88d4 1874
8f924df7 1875 if (! fastmap[*p])
b7dbcc19
KH
1876 break;
1877 if (! ASCII_CHAR_P (c))
7e68b0ea 1878 {
b7dbcc19
KH
1879 /* See the comment in the previous similar code. */
1880 for (i = 0; i < n_char_ranges; i += 2)
1881 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1882 break;
1883 if (!(negate ^ (i < n_char_ranges)))
1884 break;
7e68b0ea 1885 }
327719ee 1886 back_ok:
8f924df7 1887 pos--, pos_byte -= prev_p - p;
b7dbcc19
KH
1888 }
1889 else
8f924df7
KH
1890 while (1)
1891 {
1892 if (p <= stop)
9af7511a 1893 {
8f924df7 1894 if (p <= endp)
9af7511a 1895 break;
8f924df7
KH
1896 p = GPT_ADDR;
1897 stop = endp;
9af7511a 1898 }
e39091b6 1899
327719ee
MB
1900 if (! NILP (iso_classes) && in_classes (p[-1], iso_classes))
1901 {
1902 if (negate)
1903 break;
9af7511a 1904 else
327719ee
MB
1905 goto back_unibyte_ok;
1906 }
a1bc88d4 1907
8f924df7
KH
1908 if (!fastmap[p[-1]])
1909 break;
327719ee 1910 back_unibyte_ok:
8f924df7
KH
1911 p--, pos--, pos_byte--;
1912 }
195d1361 1913 }
7e68b0ea 1914
b7dbcc19
KH
1915 SET_PT_BOTH (pos, pos_byte);
1916 immediate_quit = 0;
1917
1918 return make_number (PT - start_point);
1919 }
1920}
7e68b0ea 1921
b7dbcc19
KH
1922
1923static Lisp_Object
971de7fb 1924skip_syntaxes (int forwardp, Lisp_Object string, Lisp_Object lim)
b7dbcc19
KH
1925{
1926 register unsigned int c;
1927 unsigned char fastmap[0400];
1928 int negate = 0;
d311d28c 1929 register ptrdiff_t i, i_byte;
b7dbcc19 1930 int multibyte;
d311d28c 1931 ptrdiff_t size_byte;
b7dbcc19
KH
1932 unsigned char *str;
1933
1934 CHECK_STRING (string);
1935
1936 if (NILP (lim))
1937 XSETINT (lim, forwardp ? ZV : BEGV);
1938 else
1939 CHECK_NUMBER_COERCE_MARKER (lim);
1940
1941 /* In any case, don't allow scan outside bounds of buffer. */
1942 if (XINT (lim) > ZV)
1943 XSETFASTINT (lim, ZV);
1944 if (XINT (lim) < BEGV)
1945 XSETFASTINT (lim, BEGV);
1946
ade8ee9e 1947 if (forwardp ? (PT >= XFASTINT (lim)) : (PT <= XFASTINT (lim)))
49feb1cd 1948 return make_number (0);
8c6e735b 1949
4b4deea2 1950 multibyte = (!NILP (BVAR (current_buffer, enable_multibyte_characters))
92eaa22e 1951 && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
b7dbcc19 1952
72af86bd 1953 memset (fastmap, 0, sizeof fastmap);
b7dbcc19 1954
8f924df7
KH
1955 if (SBYTES (string) > SCHARS (string))
1956 /* As this is very rare case (syntax spec is ASCII only), don't
1957 consider efficiency. */
bc796a59
KH
1958 string = string_make_unibyte (string);
1959
8f924df7
KH
1960 str = SDATA (string);
1961 size_byte = SBYTES (string);
bc796a59 1962
b7dbcc19
KH
1963 i_byte = 0;
1964 if (i_byte < size_byte
8f924df7 1965 && SREF (string, 0) == '^')
b7dbcc19
KH
1966 {
1967 negate = 1; i_byte++;
1968 }
1969
b7dbcc19
KH
1970 /* Find the syntaxes specified and set their elements of fastmap. */
1971
1972 while (i_byte < size_byte)
1973 {
1974 c = str[i_byte++];
8f924df7 1975 fastmap[syntax_spec_code[c]] = 1;
b7dbcc19 1976 }
195d1361 1977
9690d026 1978 /* If ^ was the first character, complement the fastmap. */
195d1361
RS
1979 if (negate)
1980 for (i = 0; i < sizeof fastmap; i++)
9690d026 1981 fastmap[i] ^= 1;
195d1361
RS
1982
1983 {
d311d28c
PE
1984 ptrdiff_t start_point = PT;
1985 ptrdiff_t pos = PT;
1986 ptrdiff_t pos_byte = PT_BYTE;
8f924df7
KH
1987 unsigned char *p = PT_ADDR, *endp, *stop;
1988
1989 if (forwardp)
1990 {
1991 endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
1992 stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
1993 }
1994 else
1995 {
1996 endp = CHAR_POS_ADDR (XINT (lim));
1997 stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
1998 }
195d1361
RS
1999
2000 immediate_quit = 1;
b7dbcc19
KH
2001 SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
2002 if (forwardp)
195d1361 2003 {
b7dbcc19 2004 if (multibyte)
195d1361 2005 {
8f924df7 2006 while (1)
8c6e735b 2007 {
8f924df7
KH
2008 int nbytes;
2009
2010 if (p >= stop)
2011 {
2012 if (p >= endp)
2013 break;
2014 p = GAP_END_ADDR;
2015 stop = endp;
2016 }
62a6e103 2017 c = STRING_CHAR_AND_LENGTH (p, nbytes);
8f924df7 2018 if (! fastmap[(int) SYNTAX (c)])
8c6e735b 2019 break;
8f924df7 2020 p += nbytes, pos++, pos_byte += nbytes;
8c6e735b
KH
2021 UPDATE_SYNTAX_TABLE_FORWARD (pos);
2022 }
195d1361
RS
2023 }
2024 else
2025 {
8c6e735b 2026 while (1)
7e68b0ea 2027 {
8f924df7
KH
2028 if (p >= stop)
2029 {
2030 if (p >= endp)
2031 break;
2032 p = GAP_END_ADDR;
2033 stop = endp;
2034 }
2035 if (! fastmap[(int) SYNTAX (*p)])
8c6e735b 2036 break;
8f924df7 2037 p++, pos++, pos_byte++;
b7dbcc19 2038 UPDATE_SYNTAX_TABLE_FORWARD (pos);
195d1361
RS
2039 }
2040 }
2041 }
2042 else
2043 {
b7dbcc19 2044 if (multibyte)
195d1361 2045 {
8c6e735b 2046 while (1)
b7dbcc19 2047 {
8f924df7
KH
2048 unsigned char *prev_p;
2049
2050 if (p <= stop)
b7dbcc19 2051 {
8f924df7
KH
2052 if (p <= endp)
2053 break;
2054 p = GPT_ADDR;
2055 stop = endp;
b7dbcc19 2056 }
262be72a 2057 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
8f924df7
KH
2058 prev_p = p;
2059 while (--p >= stop && ! CHAR_HEAD_P (*p));
62a6e103 2060 c = STRING_CHAR (p);
8f924df7
KH
2061 if (! fastmap[(int) SYNTAX (c)])
2062 break;
2063 pos--, pos_byte -= prev_p - p;
b7dbcc19 2064 }
195d1361
RS
2065 }
2066 else
2067 {
8c6e735b
KH
2068 while (1)
2069 {
8f924df7
KH
2070 if (p <= stop)
2071 {
2072 if (p <= endp)
2073 break;
2074 p = GPT_ADDR;
2075 stop = endp;
2076 }
102f6132 2077 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
8f924df7 2078 if (! fastmap[(int) SYNTAX (p[-1])])
8c6e735b 2079 break;
8f924df7 2080 p--, pos--, pos_byte--;
8c6e735b 2081 }
195d1361
RS
2082 }
2083 }
6a140a74
RS
2084
2085 SET_PT_BOTH (pos, pos_byte);
195d1361
RS
2086 immediate_quit = 0;
2087
2088 return make_number (PT - start_point);
2089 }
2090}
a1bc88d4
RS
2091
2092/* Return 1 if character C belongs to one of the ISO classes
2093 in the list ISO_CLASSES. Each class is represented by an
2094 integer which is its type according to re_wctype. */
2095
2096static int
971de7fb 2097in_classes (int c, Lisp_Object iso_classes)
a1bc88d4
RS
2098{
2099 int fits_class = 0;
2100
618db430 2101 while (CONSP (iso_classes))
a1bc88d4
RS
2102 {
2103 Lisp_Object elt;
2104 elt = XCAR (iso_classes);
2105 iso_classes = XCDR (iso_classes);
2106
2107 if (re_iswctype (c, XFASTINT (elt)))
2108 fits_class = 1;
2109 }
2110
2111 return fits_class;
2112}
195d1361 2113\f
95ff8dfc
RS
2114/* Jump over a comment, assuming we are at the beginning of one.
2115 FROM is the current position.
2116 FROM_BYTE is the bytepos corresponding to FROM.
2117 Do not move past STOP (a charpos).
2118 The comment over which we have to jump is of style STYLE
c5683ceb 2119 (either SYNTAX_FLAGS_COMMENT_STYLE(foo) or ST_COMMENT_STYLE).
95ff8dfc
RS
2120 NESTING should be positive to indicate the nesting at the beginning
2121 for nested comments and should be zero or negative else.
2122 ST_COMMENT_STYLE cannot be nested.
2123 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
2124 (or 0 If the search cannot start in the middle of a two-character).
2125
2126 If successful, return 1 and store the charpos of the comment's end
2127 into *CHARPOS_PTR and the corresponding bytepos into *BYTEPOS_PTR.
2128 Else, return 0 and store the charpos STOP into *CHARPOS_PTR, the
2129 corresponding bytepos into *BYTEPOS_PTR and the current nesting
2130 (as defined for state.incomment) in *INCOMMENT_PTR.
2131
2132 The comment end is the last character of the comment rather than the
2133 character just after the comment.
2134
2135 Global syntax data is assumed to initially be valid for FROM and
2136 remains valid for forward search starting at the returned position. */
2137
2138static int
d311d28c
PE
2139forw_comment (ptrdiff_t from, ptrdiff_t from_byte, ptrdiff_t stop,
2140 EMACS_INT nesting, int style, int prev_syntax,
2141 ptrdiff_t *charpos_ptr, ptrdiff_t *bytepos_ptr,
2142 EMACS_INT *incomment_ptr)
95ff8dfc
RS
2143{
2144 register int c, c1;
2145 register enum syntaxcode code;
c5683ceb 2146 register int syntax, other_syntax;
95ff8dfc
RS
2147
2148 if (nesting <= 0) nesting = -1;
2149
2150 /* Enter the loop in the middle so that we find
2151 a 2-char comment ender if we start in the middle of it. */
2152 syntax = prev_syntax;
2153 if (syntax != 0) goto forw_incomment;
2154
2155 while (1)
2156 {
2157 if (from == stop)
2158 {
2159 *incomment_ptr = nesting;
2160 *charpos_ptr = from;
2161 *bytepos_ptr = from_byte;
2162 return 0;
2163 }
8f924df7 2164 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
95ff8dfc
RS
2165 syntax = SYNTAX_WITH_FLAGS (c);
2166 code = syntax & 0xff;
2167 if (code == Sendcomment
c5683ceb 2168 && SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == style
abf8a9ff
SM
2169 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ?
2170 (nesting > 0 && --nesting == 0) : nesting < 0))
95ff8dfc
RS
2171 /* we have encountered a comment end of the same style
2172 as the comment sequence which began this comment
2173 section */
2174 break;
2175 if (code == Scomment_fence
2176 && style == ST_COMMENT_STYLE)
2177 /* we have encountered a comment end of the same style
2178 as the comment sequence which began this comment
2179 section. */
2180 break;
2181 if (nesting > 0
2182 && code == Scomment
abf8a9ff 2183 && SYNTAX_FLAGS_COMMENT_NESTED (syntax)
c5683ceb 2184 && SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == style)
95ff8dfc
RS
2185 /* we have encountered a nested comment of the same style
2186 as the comment sequence which began this comment section */
2187 nesting++;
2188 INC_BOTH (from, from_byte);
2189 UPDATE_SYNTAX_TABLE_FORWARD (from);
7d0393cf 2190
95ff8dfc
RS
2191 forw_incomment:
2192 if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
8f924df7 2193 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
c5683ceb
SM
2194 other_syntax = SYNTAX_WITH_FLAGS (c1),
2195 SYNTAX_FLAGS_COMEND_SECOND (other_syntax))
2196 && SYNTAX_FLAGS_COMMENT_STYLE (syntax, other_syntax) == style
abf8a9ff 2197 && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
c5683ceb
SM
2198 SYNTAX_FLAGS_COMMENT_NESTED (other_syntax))
2199 ? nesting > 0 : nesting < 0))
4ffe723b
GM
2200 {
2201 if (--nesting <= 0)
2202 /* we have encountered a comment end of the same style
2203 as the comment sequence which began this comment
2204 section */
2205 break;
2206 else
2207 {
2208 INC_BOTH (from, from_byte);
2209 UPDATE_SYNTAX_TABLE_FORWARD (from);
2210 }
2211 }
95ff8dfc
RS
2212 if (nesting > 0
2213 && from < stop
2214 && SYNTAX_FLAGS_COMSTART_FIRST (syntax)
8f924df7 2215 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
c5683ceb
SM
2216 other_syntax = SYNTAX_WITH_FLAGS (c1),
2217 SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax) == style
2218 && SYNTAX_FLAGS_COMSTART_SECOND (other_syntax))
abf8a9ff 2219 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
c5683ceb 2220 SYNTAX_FLAGS_COMMENT_NESTED (other_syntax)))
95ff8dfc
RS
2221 /* we have encountered a nested comment of the same style
2222 as the comment sequence which began this comment
2223 section */
2224 {
2225 INC_BOTH (from, from_byte);
2226 UPDATE_SYNTAX_TABLE_FORWARD (from);
2227 nesting++;
2228 }
2229 }
2230 *charpos_ptr = from;
2231 *bytepos_ptr = from_byte;
2232 return 1;
2233}
2234
b3cfe0c8 2235DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
177c0ea7 2236 doc: /*
ee648c11 2237Move forward across up to COUNT comments. If COUNT is negative, move backward.
fdb82f93
PJ
2238Stop scanning if we find something other than a comment or whitespace.
2239Set point to where scanning stops.
ee648c11 2240If COUNT comments are found as expected, with nothing except whitespace
fdb82f93 2241between them, return t; otherwise return nil. */)
5842a27b 2242 (Lisp_Object count)
b3cfe0c8 2243{
d311d28c
PE
2244 register ptrdiff_t from;
2245 ptrdiff_t from_byte;
2246 register ptrdiff_t stop;
8ea151b2 2247 register int c, c1;
b3cfe0c8
RS
2248 register enum syntaxcode code;
2249 int comstyle = 0; /* style of comment encountered */
95ff8dfc 2250 int comnested = 0; /* whether the comment is nestable or not */
be720845 2251 int found;
2312c580 2252 EMACS_INT count1;
d311d28c 2253 ptrdiff_t out_charpos, out_bytepos;
7c85f529 2254 EMACS_INT dummy;
840f481c 2255
b7826503 2256 CHECK_NUMBER (count);
840f481c 2257 count1 = XINT (count);
195d1361 2258 stop = count1 > 0 ? ZV : BEGV;
b3cfe0c8
RS
2259
2260 immediate_quit = 1;
2261 QUIT;
2262
2263 from = PT;
6a140a74 2264 from_byte = PT_BYTE;
b3cfe0c8 2265
195d1361 2266 SETUP_SYNTAX_TABLE (from, count1);
840f481c 2267 while (count1 > 0)
b3cfe0c8 2268 {
04882296 2269 do
b3cfe0c8 2270 {
c5683ceb 2271 int comstart_first, syntax, other_syntax;
f902a008 2272
04882296
KH
2273 if (from == stop)
2274 {
ef316cf0 2275 SET_PT_BOTH (from, from_byte);
b7e6e612 2276 immediate_quit = 0;
04882296
KH
2277 return Qnil;
2278 }
b7dbcc19 2279 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
c5683ceb 2280 syntax = SYNTAX_WITH_FLAGS (c);
b3cfe0c8 2281 code = SYNTAX (c);
c5683ceb
SM
2282 comstart_first = SYNTAX_FLAGS_COMSTART_FIRST (syntax);
2283 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2284 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
6a140a74 2285 INC_BOTH (from, from_byte);
f902a008 2286 UPDATE_SYNTAX_TABLE_FORWARD (from);
f902a008 2287 if (from < stop && comstart_first
b7dbcc19 2288 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
c5683ceb
SM
2289 other_syntax = SYNTAX_WITH_FLAGS (c1),
2290 SYNTAX_FLAGS_COMSTART_SECOND (other_syntax)))
b3cfe0c8 2291 {
7d0393cf 2292 /* We have encountered a comment start sequence and we
7fc8191e 2293 are ignoring all text inside comments. We must record
b3cfe0c8
RS
2294 the comment style this sequence begins so that later,
2295 only a comment end of the same style actually ends
7fc8191e 2296 the comment section. */
b3cfe0c8 2297 code = Scomment;
c5683ceb
SM
2298 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2299 comnested
2300 = comnested || SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
6a140a74 2301 INC_BOTH (from, from_byte);
95ff8dfc 2302 UPDATE_SYNTAX_TABLE_FORWARD (from);
b3cfe0c8 2303 }
04882296 2304 }
abf8a9ff 2305 while (code == Swhitespace || (code == Sendcomment && c == '\n'));
6a140a74 2306
da71ab91
KH
2307 if (code == Scomment_fence)
2308 comstyle = ST_COMMENT_STYLE;
2309 else if (code != Scomment)
04882296
KH
2310 {
2311 immediate_quit = 0;
6a140a74 2312 DEC_BOTH (from, from_byte);
ef316cf0 2313 SET_PT_BOTH (from, from_byte);
04882296
KH
2314 return Qnil;
2315 }
2316 /* We're at the start of a comment. */
95ff8dfc
RS
2317 found = forw_comment (from, from_byte, stop, comnested, comstyle, 0,
2318 &out_charpos, &out_bytepos, &dummy);
2319 from = out_charpos; from_byte = out_bytepos;
2320 if (!found)
04882296 2321 {
95ff8dfc
RS
2322 immediate_quit = 0;
2323 SET_PT_BOTH (from, from_byte);
2324 return Qnil;
b3cfe0c8 2325 }
95ff8dfc
RS
2326 INC_BOTH (from, from_byte);
2327 UPDATE_SYNTAX_TABLE_FORWARD (from);
04882296 2328 /* We have skipped one comment. */
840f481c 2329 count1--;
b3cfe0c8
RS
2330 }
2331
840f481c 2332 while (count1 < 0)
b3cfe0c8 2333 {
b9145dbb 2334 while (1)
b3cfe0c8 2335 {
c5683ceb 2336 int quoted, syntax;
f902a008 2337
b9145dbb
RS
2338 if (from <= stop)
2339 {
6a140a74 2340 SET_PT_BOTH (BEGV, BEGV_BYTE);
b9145dbb
RS
2341 immediate_quit = 0;
2342 return Qnil;
2343 }
b3cfe0c8 2344
6a140a74 2345 DEC_BOTH (from, from_byte);
f902a008 2346 /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */
6a140a74 2347 quoted = char_quoted (from, from_byte);
b7dbcc19 2348 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
c5683ceb 2349 syntax = SYNTAX_WITH_FLAGS (c);
b3cfe0c8
RS
2350 code = SYNTAX (c);
2351 comstyle = 0;
c5683ceb 2352 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
7fc8191e 2353 if (code == Sendcomment)
c5683ceb
SM
2354 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2355 if (from > stop && SYNTAX_FLAGS_COMEND_SECOND (syntax)
f902a008 2356 && prev_char_comend_first (from, from_byte)
89c6809a 2357 && !char_quoted (from - 1, dec_bytepos (from_byte)))
b3cfe0c8 2358 {
c5683ceb 2359 int other_syntax;
7fc8191e 2360 /* We must record the comment style encountered so that
b3cfe0c8 2361 later, we can match only the proper comment begin
7fc8191e 2362 sequence of the same style. */
6a140a74 2363 DEC_BOTH (from, from_byte);
f902a008
RS
2364 code = Sendcomment;
2365 /* Calling char_quoted, above, set up global syntax position
2366 at the new value of FROM. */
b7dbcc19 2367 c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
c5683ceb
SM
2368 other_syntax = SYNTAX_WITH_FLAGS (c1);
2369 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2370 comnested
2371 = comnested || SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
b3cfe0c8
RS
2372 }
2373
195d1361
RS
2374 if (code == Scomment_fence)
2375 {
2376 /* Skip until first preceding unquoted comment_fence. */
01f44d5a 2377 int fence_found = 0;
d311d28c 2378 ptrdiff_t ini = from, ini_byte = from_byte;
7d0393cf 2379
6a140a74 2380 while (1)
195d1361 2381 {
6a140a74 2382 DEC_BOTH (from, from_byte);
195d1361 2383 UPDATE_SYNTAX_TABLE_BACKWARD (from);
b7dbcc19 2384 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
6a140a74 2385 if (SYNTAX (c) == Scomment_fence
7d0393cf 2386 && !char_quoted (from, from_byte))
195d1361 2387 {
01f44d5a 2388 fence_found = 1;
195d1361
RS
2389 break;
2390 }
fcdd4585
SM
2391 else if (from == stop)
2392 break;
195d1361 2393 }
01f44d5a 2394 if (fence_found == 0)
195d1361
RS
2395 {
2396 from = ini; /* Set point to ini + 1. */
6a140a74 2397 from_byte = ini_byte;
195d1361
RS
2398 goto leave;
2399 }
39c41ad4
SM
2400 else
2401 /* We have skipped one comment. */
2402 break;
195d1361
RS
2403 }
2404 else if (code == Sendcomment)
b3cfe0c8 2405 {
95ff8dfc 2406 found = back_comment (from, from_byte, stop, comnested, comstyle,
6a140a74 2407 &out_charpos, &out_bytepos);
1aa963c8
SM
2408 if (found == -1)
2409 {
abf8a9ff
SM
2410 if (c == '\n')
2411 /* This end-of-line is not an end-of-comment.
2412 Treat it like a whitespace.
2413 CC-mode (and maybe others) relies on this behavior. */
2414 ;
2415 else
2416 {
2417 /* Failure: we should go back to the end of this
2418 not-quite-endcomment. */
c5683ceb 2419 if (SYNTAX (c) != code)
abf8a9ff
SM
2420 /* It was a two-char Sendcomment. */
2421 INC_BOTH (from, from_byte);
2422 goto leave;
2423 }
1aa963c8 2424 }
2b927d02 2425 else
abf8a9ff
SM
2426 {
2427 /* We have skipped one comment. */
2428 from = out_charpos, from_byte = out_bytepos;
2429 break;
2430 }
b3cfe0c8 2431 }
bb0de084 2432 else if (code != Swhitespace || quoted)
b3cfe0c8 2433 {
195d1361 2434 leave:
b3cfe0c8 2435 immediate_quit = 0;
6a140a74 2436 INC_BOTH (from, from_byte);
ef316cf0 2437 SET_PT_BOTH (from, from_byte);
b3cfe0c8
RS
2438 return Qnil;
2439 }
2440 }
2441
840f481c 2442 count1++;
b3cfe0c8
RS
2443 }
2444
ef316cf0 2445 SET_PT_BOTH (from, from_byte);
b3cfe0c8
RS
2446 immediate_quit = 0;
2447 return Qt;
2448}
2449\f
b7dbcc19 2450/* Return syntax code of character C if C is an ASCII character
db9f48c3 2451 or `multibyte_symbol_p' is zero. Otherwise, return Ssymbol. */
bd25db08 2452
b7dbcc19
KH
2453#define SYNTAX_WITH_MULTIBYTE_CHECK(c) \
2454 ((ASCII_CHAR_P (c) || !multibyte_symbol_p) \
bd25db08
KH
2455 ? SYNTAX (c) : Ssymbol)
2456
6a140a74 2457static Lisp_Object
971de7fb 2458scan_lists (register EMACS_INT from, EMACS_INT count, EMACS_INT depth, int sexpflag)
8489eb67
RS
2459{
2460 Lisp_Object val;
d311d28c 2461 register ptrdiff_t stop = count > 0 ? ZV : BEGV;
93da5fff
KH
2462 register int c, c1;
2463 int stringterm;
8489eb67
RS
2464 int quoted;
2465 int mathexit = 0;
93da5fff 2466 register enum syntaxcode code, temp_code;
d311d28c 2467 EMACS_INT min_depth = depth; /* Err out if depth gets less than this. */
e5d4f4dc 2468 int comstyle = 0; /* style of comment encountered */
95ff8dfc 2469 int comnested = 0; /* whether the comment is nestable or not */
d311d28c 2470 ptrdiff_t temp_pos;
932e6895 2471 EMACS_INT last_good = from;
195d1361 2472 int found;
d311d28c
PE
2473 ptrdiff_t from_byte;
2474 ptrdiff_t out_bytepos, out_charpos;
2475 int temp;
7c85f529 2476 EMACS_INT dummy;
bd25db08 2477 int multibyte_symbol_p = sexpflag && multibyte_syntax_as_symbol;
8489eb67
RS
2478
2479 if (depth > 0) min_depth = 0;
2480
c3907a7e
RS
2481 if (from > ZV) from = ZV;
2482 if (from < BEGV) from = BEGV;
2483
2484 from_byte = CHAR_TO_BYTE (from);
2485
8489eb67
RS
2486 immediate_quit = 1;
2487 QUIT;
2488
195d1361 2489 SETUP_SYNTAX_TABLE (from, count);
8489eb67
RS
2490 while (count > 0)
2491 {
8489eb67
RS
2492 while (from < stop)
2493 {
c5683ceb 2494 int comstart_first, prefix, syntax, other_syntax;
195d1361 2495 UPDATE_SYNTAX_TABLE_FORWARD (from);
b7dbcc19 2496 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
c5683ceb 2497 syntax = SYNTAX_WITH_FLAGS (c);
bd25db08 2498 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
c5683ceb
SM
2499 comstart_first = SYNTAX_FLAGS_COMSTART_FIRST (syntax);
2500 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2501 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2502 prefix = SYNTAX_FLAGS_PREFIX (syntax);
7bf5e9e4
RS
2503 if (depth == min_depth)
2504 last_good = from;
6a140a74 2505 INC_BOTH (from, from_byte);
195d1361 2506 UPDATE_SYNTAX_TABLE_FORWARD (from);
f902a008 2507 if (from < stop && comstart_first
327719ee 2508 && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
c5683ceb
SM
2509 other_syntax = SYNTAX_WITH_FLAGS (c),
2510 SYNTAX_FLAGS_COMSTART_SECOND (other_syntax))
8489eb67 2511 && parse_sexp_ignore_comments)
e5d4f4dc 2512 {
7d0393cf 2513 /* we have encountered a comment start sequence and we
195d1361 2514 are ignoring all text inside comments. We must record
e5d4f4dc
RS
2515 the comment style this sequence begins so that later,
2516 only a comment end of the same style actually ends
2517 the comment section */
2518 code = Scomment;
c5683ceb
SM
2519 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2520 comnested
2521 = comnested || SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
6a140a74 2522 INC_BOTH (from, from_byte);
f902a008 2523 UPDATE_SYNTAX_TABLE_FORWARD (from);
e5d4f4dc 2524 }
7d0393cf 2525
f902a008 2526 if (prefix)
8489eb67
RS
2527 continue;
2528
0220c518 2529 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
2530 {
2531 case Sescape:
2532 case Scharquote:
8b8bf8e6
SM
2533 if (from == stop)
2534 goto lose;
6a140a74 2535 INC_BOTH (from, from_byte);
8489eb67
RS
2536 /* treat following character as a word constituent */
2537 case Sword:
2538 case Ssymbol:
2539 if (depth || !sexpflag) break;
195d1361 2540 /* This word counts as a sexp; return at end of it. */
8489eb67
RS
2541 while (from < stop)
2542 {
195d1361 2543 UPDATE_SYNTAX_TABLE_FORWARD (from);
7c7ca3d2
RS
2544
2545 /* Some compilers can't handle this inside the switch. */
b7dbcc19 2546 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
bd25db08 2547 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
7c7ca3d2 2548 switch (temp)
8489eb67
RS
2549 {
2550 case Scharquote:
2551 case Sescape:
6a140a74 2552 INC_BOTH (from, from_byte);
8b8bf8e6
SM
2553 if (from == stop)
2554 goto lose;
8489eb67
RS
2555 break;
2556 case Sword:
2557 case Ssymbol:
2558 case Squote:
2559 break;
2560 default:
2561 goto done;
2562 }
6a140a74 2563 INC_BOTH (from, from_byte);
8489eb67
RS
2564 }
2565 goto done;
2566
195d1361 2567 case Scomment_fence:
95ff8dfc
RS
2568 comstyle = ST_COMMENT_STYLE;
2569 /* FALLTHROUGH */
2570 case Scomment:
8489eb67 2571 if (!parse_sexp_ignore_comments) break;
95ff8dfc
RS
2572 UPDATE_SYNTAX_TABLE_FORWARD (from);
2573 found = forw_comment (from, from_byte, stop,
2574 comnested, comstyle, 0,
2575 &out_charpos, &out_bytepos, &dummy);
2576 from = out_charpos, from_byte = out_bytepos;
2577 if (!found)
8489eb67 2578 {
95ff8dfc
RS
2579 if (depth == 0)
2580 goto done;
2581 goto lose;
8489eb67 2582 }
95ff8dfc
RS
2583 INC_BOTH (from, from_byte);
2584 UPDATE_SYNTAX_TABLE_FORWARD (from);
8489eb67
RS
2585 break;
2586
2587 case Smath:
2588 if (!sexpflag)
2589 break;
b7dbcc19 2590 if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (from_byte))
6a140a74
RS
2591 {
2592 INC_BOTH (from, from_byte);
2593 }
8489eb67
RS
2594 if (mathexit)
2595 {
2596 mathexit = 0;
2597 goto close1;
2598 }
2599 mathexit = 1;
2600
2601 case Sopen:
2602 if (!++depth) goto done;
2603 break;
2604
2605 case Sclose:
2606 close1:
2607 if (!--depth) goto done;
2608 if (depth < min_depth)
9471945b
KS
2609 xsignal3 (Qscan_error,
2610 build_string ("Containing expression ends prematurely"),
2611 make_number (last_good), make_number (from));
8489eb67
RS
2612 break;
2613
2614 case Sstring:
195d1361 2615 case Sstring_fence:
ef316cf0 2616 temp_pos = dec_bytepos (from_byte);
b7dbcc19 2617 stringterm = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
8489eb67
RS
2618 while (1)
2619 {
8b8bf8e6
SM
2620 if (from >= stop)
2621 goto lose;
195d1361 2622 UPDATE_SYNTAX_TABLE_FORWARD (from);
b7dbcc19 2623 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
42ebfa31
SM
2624 if (code == Sstring
2625 ? (c == stringterm
2626 && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
bd25db08 2627 : SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring_fence)
195d1361 2628 break;
7c7ca3d2
RS
2629
2630 /* Some compilers can't handle this inside the switch. */
bd25db08 2631 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
7c7ca3d2 2632 switch (temp)
8489eb67
RS
2633 {
2634 case Scharquote:
2635 case Sescape:
6a140a74 2636 INC_BOTH (from, from_byte);
8489eb67 2637 }
6a140a74 2638 INC_BOTH (from, from_byte);
8489eb67 2639 }
6a140a74 2640 INC_BOTH (from, from_byte);
8489eb67
RS
2641 if (!depth && sexpflag) goto done;
2642 break;
240c43e8
SM
2643 default:
2644 /* Ignore whitespace, punctuation, quote, endcomment. */
2645 break;
8489eb67
RS
2646 }
2647 }
2648
2649 /* Reached end of buffer. Error if within object, return nil if between */
8b8bf8e6
SM
2650 if (depth)
2651 goto lose;
8489eb67
RS
2652
2653 immediate_quit = 0;
2654 return Qnil;
2655
2656 /* End of object reached */
2657 done:
2658 count--;
2659 }
2660
2661 while (count < 0)
2662 {
8489eb67
RS
2663 while (from > stop)
2664 {
c5683ceb 2665 int syntax;
6a140a74 2666 DEC_BOTH (from, from_byte);
195d1361 2667 UPDATE_SYNTAX_TABLE_BACKWARD (from);
b7dbcc19 2668 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
c5683ceb 2669 syntax= SYNTAX_WITH_FLAGS (c);
bd25db08 2670 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
7bf5e9e4
RS
2671 if (depth == min_depth)
2672 last_good = from;
7fc8191e 2673 comstyle = 0;
c5683ceb 2674 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
7fc8191e 2675 if (code == Sendcomment)
c5683ceb
SM
2676 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2677 if (from > stop && SYNTAX_FLAGS_COMEND_SECOND (syntax)
1674d9a2 2678 && prev_char_comend_first (from, from_byte)
8489eb67 2679 && parse_sexp_ignore_comments)
e5d4f4dc 2680 {
f902a008 2681 /* We must record the comment style encountered so that
e5d4f4dc 2682 later, we can match only the proper comment begin
f902a008 2683 sequence of the same style. */
01f44d5a 2684 int c2, other_syntax;
6a140a74 2685 DEC_BOTH (from, from_byte);
f902a008
RS
2686 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2687 code = Sendcomment;
01f44d5a
PE
2688 c2 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2689 other_syntax = SYNTAX_WITH_FLAGS (c2);
c5683ceb
SM
2690 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2691 comnested
2692 = comnested || SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
e5d4f4dc 2693 }
7d0393cf 2694
9828a477 2695 /* Quoting turns anything except a comment-ender
e6365855 2696 into a word character. Note that this cannot be true
f902a008 2697 if we decremented FROM in the if-statement above. */
9828a477 2698 if (code != Sendcomment && char_quoted (from, from_byte))
240c43e8
SM
2699 {
2700 DEC_BOTH (from, from_byte);
2701 code = Sword;
2702 }
c5683ceb 2703 else if (SYNTAX_FLAGS_PREFIX (syntax))
8489eb67
RS
2704 continue;
2705
9828a477 2706 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
2707 {
2708 case Sword:
2709 case Ssymbol:
9828a477
KH
2710 case Sescape:
2711 case Scharquote:
8489eb67 2712 if (depth || !sexpflag) break;
195d1361
RS
2713 /* This word counts as a sexp; count object finished
2714 after passing it. */
8489eb67
RS
2715 while (from > stop)
2716 {
6a140a74 2717 temp_pos = from_byte;
4b4deea2 2718 if (! NILP (BVAR (current_buffer, enable_multibyte_characters)))
ef316cf0
RS
2719 DEC_POS (temp_pos);
2720 else
2721 temp_pos--;
6a140a74 2722 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
b7dbcc19 2723 c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
bd25db08 2724 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
9828a477
KH
2725 /* Don't allow comment-end to be quoted. */
2726 if (temp_code == Sendcomment)
2727 goto done2;
6a140a74 2728 quoted = char_quoted (from - 1, temp_pos);
8489eb67 2729 if (quoted)
93da5fff 2730 {
6a140a74 2731 DEC_BOTH (from, from_byte);
ef316cf0 2732 temp_pos = dec_bytepos (temp_pos);
6a140a74 2733 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
93da5fff 2734 }
b7dbcc19 2735 c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
bd25db08 2736 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
93da5fff
KH
2737 if (! (quoted || temp_code == Sword
2738 || temp_code == Ssymbol
2739 || temp_code == Squote))
8489eb67 2740 goto done2;
6a140a74 2741 DEC_BOTH (from, from_byte);
8489eb67
RS
2742 }
2743 goto done2;
2744
2745 case Smath:
2746 if (!sexpflag)
2747 break;
ef316cf0 2748 temp_pos = dec_bytepos (from_byte);
6a140a74 2749 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
b7dbcc19 2750 if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (temp_pos))
6a140a74 2751 DEC_BOTH (from, from_byte);
8489eb67
RS
2752 if (mathexit)
2753 {
2754 mathexit = 0;
2755 goto open2;
2756 }
2757 mathexit = 1;
2758
2759 case Sclose:
2760 if (!++depth) goto done2;
2761 break;
2762
2763 case Sopen:
2764 open2:
2765 if (!--depth) goto done2;
2766 if (depth < min_depth)
9471945b
KS
2767 xsignal3 (Qscan_error,
2768 build_string ("Containing expression ends prematurely"),
2769 make_number (last_good), make_number (from));
8489eb67
RS
2770 break;
2771
2772 case Sendcomment:
2773 if (!parse_sexp_ignore_comments)
2774 break;
95ff8dfc 2775 found = back_comment (from, from_byte, stop, comnested, comstyle,
6a140a74 2776 &out_charpos, &out_bytepos);
1aa963c8
SM
2777 /* FIXME: if found == -1, then it really wasn't a comment-end.
2778 For single-char Sendcomment, we can't do much about it apart
2779 from skipping the char.
2780 For 2-char endcomments, we could try again, taking both
2781 chars as separate entities, but it's a lot of trouble
2782 for very little gain, so we don't bother either. -sm */
6a140a74
RS
2783 if (found != -1)
2784 from = out_charpos, from_byte = out_bytepos;
8489eb67
RS
2785 break;
2786
195d1361
RS
2787 case Scomment_fence:
2788 case Sstring_fence:
2789 while (1)
2790 {
8b8bf8e6
SM
2791 if (from == stop)
2792 goto lose;
6b61353c 2793 DEC_BOTH (from, from_byte);
195d1361 2794 UPDATE_SYNTAX_TABLE_BACKWARD (from);
7d0393cf 2795 if (!char_quoted (from, from_byte)
b7dbcc19 2796 && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
bd25db08 2797 SYNTAX_WITH_MULTIBYTE_CHECK (c) == code))
195d1361
RS
2798 break;
2799 }
2800 if (code == Sstring_fence && !depth && sexpflag) goto done2;
2801 break;
7d0393cf 2802
8489eb67 2803 case Sstring:
b7dbcc19 2804 stringterm = FETCH_CHAR_AS_MULTIBYTE (from_byte);
8489eb67
RS
2805 while (1)
2806 {
8b8bf8e6
SM
2807 if (from == stop)
2808 goto lose;
6b61353c
KH
2809 DEC_BOTH (from, from_byte);
2810 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2811 if (!char_quoted (from, from_byte)
2812 && (stringterm
2813 == (c = FETCH_CHAR_AS_MULTIBYTE (from_byte)))
42ebfa31 2814 && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
8489eb67 2815 break;
8489eb67 2816 }
8489eb67
RS
2817 if (!depth && sexpflag) goto done2;
2818 break;
240c43e8
SM
2819 default:
2820 /* Ignore whitespace, punctuation, quote, endcomment. */
2821 break;
8489eb67
RS
2822 }
2823 }
2824
2825 /* Reached start of buffer. Error if within object, return nil if between */
8b8bf8e6
SM
2826 if (depth)
2827 goto lose;
8489eb67
RS
2828
2829 immediate_quit = 0;
2830 return Qnil;
2831
2832 done2:
2833 count++;
2834 }
2835
2836
2837 immediate_quit = 0;
1e142fb7 2838 XSETFASTINT (val, from);
8489eb67
RS
2839 return val;
2840
2841 lose:
9471945b
KS
2842 xsignal3 (Qscan_error,
2843 build_string ("Unbalanced parentheses"),
2844 make_number (last_good), make_number (from));
8489eb67
RS
2845}
2846
8489eb67 2847DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
fdb82f93 2848 doc: /* Scan from character number FROM by COUNT lists.
7b926f3f
CY
2849Scan forward if COUNT is positive, backward if COUNT is negative.
2850Return the character number of the position thus found.
2851
2852A \"list", in this context, refers to a balanced parenthetical
2853grouping, as determined by the syntax table.
fdb82f93 2854
7b926f3f
CY
2855If DEPTH is nonzero, treat that as the nesting depth of the starting
2856point (i.e. the starting point is DEPTH parentheses deep). This
2857function scans over parentheses until the depth goes to zero COUNT
2858times. Hence, positive DEPTH moves out that number of levels of
221cd413 2859parentheses, while negative DEPTH moves to a deeper level.
fdb82f93
PJ
2860
2861Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2862
7b926f3f
CY
2863If we reach the beginning or end of the accessible part of the buffer
2864before we have scanned over COUNT lists, return nil if the depth at
2865that point is zero, and signal a error if the depth is nonzero. */)
5842a27b 2866 (Lisp_Object from, Lisp_Object count, Lisp_Object depth)
8489eb67 2867{
b7826503
PJ
2868 CHECK_NUMBER (from);
2869 CHECK_NUMBER (count);
2870 CHECK_NUMBER (depth);
8489eb67
RS
2871
2872 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
2873}
2874
2875DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
fdb82f93
PJ
2876 doc: /* Scan from character number FROM by COUNT balanced expressions.
2877If COUNT is negative, scan backwards.
2878Returns the character number of the position thus found.
2879
2880Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2881
2882If the beginning or end of (the accessible part of) the buffer is reached
2883in the middle of a parenthetical grouping, an error is signaled.
2884If the beginning or end is reached between groupings
2885but before count is used up, nil is returned. */)
5842a27b 2886 (Lisp_Object from, Lisp_Object count)
8489eb67 2887{
b7826503
PJ
2888 CHECK_NUMBER (from);
2889 CHECK_NUMBER (count);
8489eb67
RS
2890
2891 return scan_lists (XINT (from), XINT (count), 0, 1);
2892}
2893
2894DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
fdb82f93
PJ
2895 0, 0, 0,
2896 doc: /* Move point backward over any number of chars with prefix syntax.
2897This includes chars with "quote" or "prefix" syntax (' or p). */)
5842a27b 2898 (void)
8489eb67 2899{
d311d28c
PE
2900 ptrdiff_t beg = BEGV;
2901 ptrdiff_t opoint = PT;
2902 ptrdiff_t opoint_byte = PT_BYTE;
2903 ptrdiff_t pos = PT;
2904 ptrdiff_t pos_byte = PT_BYTE;
93da5fff 2905 int c;
93da5fff 2906
7d0393cf 2907 if (pos <= beg)
195d1361 2908 {
f902a008
RS
2909 SET_PT_BOTH (opoint, opoint_byte);
2910
2911 return Qnil;
195d1361 2912 }
8489eb67 2913
f902a008
RS
2914 SETUP_SYNTAX_TABLE (pos, -1);
2915
6a140a74
RS
2916 DEC_BOTH (pos, pos_byte);
2917
1fd3172d 2918 while (!char_quoted (pos, pos_byte)
195d1361 2919 /* Previous statement updates syntax table. */
b7dbcc19 2920 && ((c = FETCH_CHAR_AS_MULTIBYTE (pos_byte), SYNTAX (c) == Squote)
93da5fff
KH
2921 || SYNTAX_PREFIX (c)))
2922 {
1fd3172d
RS
2923 opoint = pos;
2924 opoint_byte = pos_byte;
2925
2926 if (pos + 1 > beg)
2927 DEC_BOTH (pos, pos_byte);
93da5fff 2928 }
8489eb67 2929
6a140a74 2930 SET_PT_BOTH (opoint, opoint_byte);
8489eb67
RS
2931
2932 return Qnil;
2933}
2934\f
6a140a74 2935/* Parse forward from FROM / FROM_BYTE to END,
e5d4f4dc
RS
2936 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
2937 and return a description of the state of the parse at END.
c81a3712 2938 If STOPBEFORE is nonzero, stop at the start of an atom.
644ea4df
RS
2939 If COMMENTSTOP is 1, stop at the start of a comment.
2940 If COMMENTSTOP is -1, stop at the start or end of a comment,
2941 after the beginning of a string, or after the end of a string. */
8489eb67 2942
340f92b5 2943static void
d5a3eaaf 2944scan_sexps_forward (struct lisp_parse_state *stateptr,
d311d28c
PE
2945 ptrdiff_t from, ptrdiff_t from_byte, ptrdiff_t end,
2946 EMACS_INT targetdepth, int stopbefore,
d5a3eaaf 2947 Lisp_Object oldstate, int commentstop)
8489eb67
RS
2948{
2949 struct lisp_parse_state state;
2950
2951 register enum syntaxcode code;
95ff8dfc
RS
2952 int c1;
2953 int comnested;
d311d28c 2954 struct level { ptrdiff_t last, prev; };
8489eb67
RS
2955 struct level levelstart[100];
2956 register struct level *curlevel = levelstart;
2957 struct level *endlevel = levelstart + 100;
d311d28c 2958 register EMACS_INT depth; /* Paren depth of current scanning location.
8489eb67
RS
2959 level - levelstart equals this except
2960 when the depth becomes negative. */
d311d28c 2961 EMACS_INT mindepth; /* Lowest DEPTH value seen. */
8489eb67
RS
2962 int start_quoted = 0; /* Nonzero means starting after a char quote */
2963 Lisp_Object tem;
d311d28c
PE
2964 ptrdiff_t prev_from; /* Keep one character before FROM. */
2965 ptrdiff_t prev_from_byte;
1fd3172d 2966 int prev_from_syntax;
195d1361
RS
2967 int boundary_stop = commentstop == -1;
2968 int nofence;
95ff8dfc 2969 int found;
d311d28c 2970 ptrdiff_t out_bytepos, out_charpos;
7c7ca3d2 2971 int temp;
93da5fff
KH
2972
2973 prev_from = from;
6a140a74
RS
2974 prev_from_byte = from_byte;
2975 if (from != BEGV)
2976 DEC_BOTH (prev_from, prev_from_byte);
93da5fff
KH
2977
2978 /* Use this macro instead of `from++'. */
6a140a74
RS
2979#define INC_FROM \
2980do { prev_from = from; \
2981 prev_from_byte = from_byte; \
327719ee 2982 temp = FETCH_CHAR_AS_MULTIBYTE (prev_from_byte); \
ab229fdd 2983 prev_from_syntax = SYNTAX_WITH_FLAGS (temp); \
ef316cf0 2984 INC_BOTH (from, from_byte); \
3ceb4629
SM
2985 if (from < end) \
2986 UPDATE_SYNTAX_TABLE_FORWARD (from); \
6a140a74 2987 } while (0)
8489eb67
RS
2988
2989 immediate_quit = 1;
2990 QUIT;
2991
265a9e55 2992 if (NILP (oldstate))
8489eb67
RS
2993 {
2994 depth = 0;
2995 state.instring = -1;
2996 state.incomment = 0;
195d1361
RS
2997 state.comstyle = 0; /* comment style a by default. */
2998 state.comstr_start = -1; /* no comment/string seen. */
8489eb67
RS
2999 }
3000 else
3001 {
3002 tem = Fcar (oldstate);
265a9e55 3003 if (!NILP (tem))
8489eb67
RS
3004 depth = XINT (tem);
3005 else
3006 depth = 0;
3007
3008 oldstate = Fcdr (oldstate);
3009 oldstate = Fcdr (oldstate);
3010 oldstate = Fcdr (oldstate);
3011 tem = Fcar (oldstate);
195d1361 3012 /* Check whether we are inside string_fence-style string: */
7d0393cf 3013 state.instring = (!NILP (tem)
d311d28c 3014 ? (CHARACTERP (tem) ? XFASTINT (tem) : ST_STRING_STYLE)
e76f1c44 3015 : -1);
8489eb67
RS
3016
3017 oldstate = Fcdr (oldstate);
3018 tem = Fcar (oldstate);
e76f1c44
GM
3019 state.incomment = (!NILP (tem)
3020 ? (INTEGERP (tem) ? XINT (tem) : -1)
95ff8dfc 3021 : 0);
8489eb67
RS
3022
3023 oldstate = Fcdr (oldstate);
3024 tem = Fcar (oldstate);
265a9e55 3025 start_quoted = !NILP (tem);
e5d4f4dc 3026
95ff8dfc 3027 /* if the eighth element of the list is nil, we are in comment
195d1361
RS
3028 style a. If it is non-nil, we are in comment style b */
3029 oldstate = Fcdr (oldstate);
e5d4f4dc 3030 oldstate = Fcdr (oldstate);
195d1361 3031 tem = Fcar (oldstate);
c5683ceb
SM
3032 state.comstyle = (NILP (tem)
3033 ? 0
d311d28c
PE
3034 : (RANGED_INTEGERP (0, tem, ST_COMMENT_STYLE)
3035 ? XINT (tem)
3036 : ST_COMMENT_STYLE));
195d1361 3037
e5d4f4dc 3038 oldstate = Fcdr (oldstate);
e5d4f4dc 3039 tem = Fcar (oldstate);
d311d28c
PE
3040 state.comstr_start =
3041 RANGED_INTEGERP (PTRDIFF_MIN, tem, PTRDIFF_MAX) ? XINT (tem) : -1;
1a102a58
RS
3042 oldstate = Fcdr (oldstate);
3043 tem = Fcar (oldstate);
3044 while (!NILP (tem)) /* >= second enclosing sexps. */
3045 {
d311d28c
PE
3046 Lisp_Object temhd = Fcar (tem);
3047 if (RANGED_INTEGERP (PTRDIFF_MIN, temhd, PTRDIFF_MAX))
3048 curlevel->last = XINT (temhd);
1a102a58 3049 if (++curlevel == endlevel)
02010917 3050 curlevel--; /* error ("Nesting too deep for parser"); */
1a102a58
RS
3051 curlevel->prev = -1;
3052 curlevel->last = -1;
3053 tem = Fcdr (tem);
3054 }
8489eb67
RS
3055 }
3056 state.quoted = 0;
3057 mindepth = depth;
3058
3059 curlevel->prev = -1;
3060 curlevel->last = -1;
3061
326b283b 3062 SETUP_SYNTAX_TABLE (prev_from, 1);
ab229fdd
AS
3063 temp = FETCH_CHAR (prev_from_byte);
3064 prev_from_syntax = SYNTAX_WITH_FLAGS (temp);
3ceb4629 3065 UPDATE_SYNTAX_TABLE_FORWARD (from);
326b283b 3066
195d1361 3067 /* Enter the loop at a place appropriate for initial state. */
8489eb67 3068
326b283b
RS
3069 if (state.incomment)
3070 goto startincomment;
8489eb67
RS
3071 if (state.instring >= 0)
3072 {
195d1361 3073 nofence = state.instring != ST_STRING_STYLE;
326b283b
RS
3074 if (start_quoted)
3075 goto startquotedinstring;
8489eb67
RS
3076 goto startinstring;
3077 }
326b283b
RS
3078 else if (start_quoted)
3079 goto startquoted;
1fd3172d 3080
8489eb67
RS
3081 while (from < end)
3082 {
c5683ceb 3083 int syntax;
93da5fff 3084 INC_FROM;
1fd3172d 3085 code = prev_from_syntax & 0xff;
4c920633 3086
c5d4b96f
SM
3087 if (from < end
3088 && SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax)
3089 && (c1 = FETCH_CHAR (from_byte),
c5683ceb
SM
3090 syntax = SYNTAX_WITH_FLAGS (c1),
3091 SYNTAX_FLAGS_COMSTART_SECOND (syntax)))
c5d4b96f
SM
3092 /* Duplicate code to avoid a complex if-expression
3093 which causes trouble for the SGI compiler. */
95ff8dfc 3094 {
c5d4b96f
SM
3095 /* Record the comment style we have entered so that only
3096 the comment-end sequence of the same style actually
3097 terminates the comment section. */
c5683ceb
SM
3098 state.comstyle
3099 = SYNTAX_FLAGS_COMMENT_STYLE (syntax, prev_from_syntax);
c5d4b96f 3100 comnested = SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax);
c5683ceb 3101 comnested = comnested || SYNTAX_FLAGS_COMMENT_NESTED (syntax);
c5d4b96f 3102 state.incomment = comnested ? 1 : -1;
95ff8dfc 3103 state.comstr_start = prev_from;
c5d4b96f
SM
3104 INC_FROM;
3105 code = Scomment;
95ff8dfc 3106 }
4c920633 3107 else if (code == Scomment_fence)
e5d4f4dc
RS
3108 {
3109 /* Record the comment style we have entered so that only
3110 the comment-end sequence of the same style actually
3111 terminates the comment section. */
95ff8dfc
RS
3112 state.comstyle = ST_COMMENT_STYLE;
3113 state.incomment = -1;
195d1361 3114 state.comstr_start = prev_from;
e5d4f4dc 3115 code = Scomment;
e5d4f4dc 3116 }
c5d4b96f
SM
3117 else if (code == Scomment)
3118 {
c5683ceb 3119 state.comstyle = SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax, 0);
c5d4b96f
SM
3120 state.incomment = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax) ?
3121 1 : -1);
3122 state.comstr_start = prev_from;
3123 }
e5d4f4dc 3124
1fd3172d 3125 if (SYNTAX_FLAGS_PREFIX (prev_from_syntax))
8489eb67 3126 continue;
0220c518 3127 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
3128 {
3129 case Sescape:
3130 case Scharquote:
3131 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 3132 curlevel->last = prev_from;
8489eb67
RS
3133 startquoted:
3134 if (from == end) goto endquoted;
93da5fff 3135 INC_FROM;
8489eb67
RS
3136 goto symstarted;
3137 /* treat following character as a word constituent */
3138 case Sword:
3139 case Ssymbol:
3140 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 3141 curlevel->last = prev_from;
8489eb67
RS
3142 symstarted:
3143 while (from < end)
3144 {
7c7ca3d2 3145 /* Some compilers can't handle this inside the switch. */
327719ee 3146 temp = FETCH_CHAR_AS_MULTIBYTE (from_byte);
ab229fdd 3147 temp = SYNTAX (temp);
7c7ca3d2 3148 switch (temp)
8489eb67
RS
3149 {
3150 case Scharquote:
3151 case Sescape:
93da5fff 3152 INC_FROM;
8489eb67
RS
3153 if (from == end) goto endquoted;
3154 break;
3155 case Sword:
3156 case Ssymbol:
3157 case Squote:
3158 break;
3159 default:
3160 goto symdone;
3161 }
93da5fff 3162 INC_FROM;
8489eb67
RS
3163 }
3164 symdone:
3165 curlevel->prev = curlevel->last;
3166 break;
3167
240c43e8 3168 case Scomment_fence: /* Can't happen because it's handled above. */
8489eb67 3169 case Scomment:
195d1361 3170 if (commentstop || boundary_stop) goto done;
d355bd8a
SM
3171 startincomment:
3172 /* The (from == BEGV) test was to enter the loop in the middle so
95ff8dfc 3173 that we find a 2-char comment ender even if we start in the
e6365855 3174 middle of it. We don't want to do that if we're just at the
d355bd8a 3175 beginning of the comment (think of (*) ... (*)). */
95ff8dfc
RS
3176 found = forw_comment (from, from_byte, end,
3177 state.incomment, state.comstyle,
e6365855
SM
3178 (from == BEGV || from < state.comstr_start + 3)
3179 ? 0 : prev_from_syntax,
95ff8dfc
RS
3180 &out_charpos, &out_bytepos, &state.incomment);
3181 from = out_charpos; from_byte = out_bytepos;
3182 /* Beware! prev_from and friends are invalid now.
3183 Luckily, the `done' doesn't use them and the INC_FROM
3184 sets them to a sane value without looking at them. */
3185 if (!found) goto done;
d355bd8a 3186 INC_FROM;
8489eb67 3187 state.incomment = 0;
e5d4f4dc 3188 state.comstyle = 0; /* reset the comment style */
195d1361 3189 if (boundary_stop) goto done;
8489eb67
RS
3190 break;
3191
3192 case Sopen:
3193 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3194 depth++;
3195 /* curlevel++->last ran into compiler bug on Apollo */
93da5fff 3196 curlevel->last = prev_from;
8489eb67 3197 if (++curlevel == endlevel)
02010917 3198 curlevel--; /* error ("Nesting too deep for parser"); */
8489eb67
RS
3199 curlevel->prev = -1;
3200 curlevel->last = -1;
30844415 3201 if (targetdepth == depth) goto done;
8489eb67
RS
3202 break;
3203
3204 case Sclose:
3205 depth--;
3206 if (depth < mindepth)
3207 mindepth = depth;
3208 if (curlevel != levelstart)
3209 curlevel--;
3210 curlevel->prev = curlevel->last;
30844415 3211 if (targetdepth == depth) goto done;
8489eb67
RS
3212 break;
3213
3214 case Sstring:
195d1361
RS
3215 case Sstring_fence:
3216 state.comstr_start = from - 1;
8489eb67 3217 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 3218 curlevel->last = prev_from;
7d0393cf 3219 state.instring = (code == Sstring
b7dbcc19 3220 ? (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte))
195d1361
RS
3221 : ST_STRING_STYLE);
3222 if (boundary_stop) goto done;
8489eb67 3223 startinstring:
195d1361 3224 {
644ea4df 3225 nofence = state.instring != ST_STRING_STYLE;
7d0393cf 3226
644ea4df
RS
3227 while (1)
3228 {
3229 int c;
195d1361 3230
644ea4df 3231 if (from >= end) goto done;
b7dbcc19 3232 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
7c7ca3d2
RS
3233 /* Some compilers can't handle this inside the switch. */
3234 temp = SYNTAX (c);
ccf89641
KH
3235
3236 /* Check TEMP here so that if the char has
3237 a syntax-table property which says it is NOT
3238 a string character, it does not end the string. */
3239 if (nofence && c == state.instring && temp == Sstring)
3240 break;
3241
7c7ca3d2 3242 switch (temp)
644ea4df
RS
3243 {
3244 case Sstring_fence:
3245 if (!nofence) goto string_end;
3246 break;
3247 case Scharquote:
3248 case Sescape:
3249 INC_FROM;
3250 startquotedinstring:
3251 if (from >= end) goto endquoted;
195d1361 3252 }
644ea4df
RS
3253 INC_FROM;
3254 }
195d1361
RS
3255 }
3256 string_end:
8489eb67
RS
3257 state.instring = -1;
3258 curlevel->prev = curlevel->last;
93da5fff 3259 INC_FROM;
195d1361 3260 if (boundary_stop) goto done;
8489eb67
RS
3261 break;
3262
3263 case Smath:
240c43e8
SM
3264 /* FIXME: We should do something with it. */
3265 break;
3266 default:
3267 /* Ignore whitespace, punctuation, quote, endcomment. */
8489eb67
RS
3268 break;
3269 }
3270 }
3271 goto done;
3272
3273 stop: /* Here if stopping before start of sexp. */
93da5fff 3274 from = prev_from; /* We have just fetched the char that starts it; */
8489eb67
RS
3275 goto done; /* but return the position before it. */
3276
3277 endquoted:
3278 state.quoted = 1;
3279 done:
3280 state.depth = depth;
3281 state.mindepth = mindepth;
3282 state.thislevelstart = curlevel->prev;
3283 state.prevlevelstart
3284 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
3285 state.location = from;
1a102a58 3286 state.levelstarts = Qnil;
eb4d412d
PE
3287 while (curlevel > levelstart)
3288 state.levelstarts = Fcons (make_number ((--curlevel)->last),
3289 state.levelstarts);
8489eb67
RS
3290 immediate_quit = 0;
3291
e5d4f4dc 3292 *stateptr = state;
8489eb67
RS
3293}
3294
c81a3712 3295DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
fdb82f93
PJ
3296 doc: /* Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
3297Parsing stops at TO or when certain criteria are met;
3298 point is set to where parsing stops.
3299If fifth arg OLDSTATE is omitted or nil,
3300 parsing assumes that FROM is the beginning of a function.
0e6228bc 3301Value is a list of elements describing final state of parsing:
fdb82f93
PJ
3302 0. depth in parens.
3303 1. character address of start of innermost containing list; nil if none.
3304 2. character address of start of last complete sexp terminated.
3305 3. non-nil if inside a string.
3306 (it is the character that will terminate the string,
3307 or t if the string should be terminated by a generic string delimiter.)
7d0393cf 3308 4. nil if outside a comment, t if inside a non-nestable comment,
fdb82f93
PJ
3309 else an integer (the current comment nesting).
3310 5. t if following a quote character.
3311 6. the minimum paren-depth encountered during this scan.
c5683ceb 3312 7. style of comment, if any.
fdb82f93
PJ
3313 8. character address of start of comment or string; nil if not in one.
3314 9. Intermediate data for continuation of parsing (subject to change).
3315If third arg TARGETDEPTH is non-nil, parsing stops if the depth
3316in parentheses becomes equal to TARGETDEPTH.
3317Fourth arg STOPBEFORE non-nil means stop when come to
3318 any character that starts a sexp.
0e6228bc 3319Fifth arg OLDSTATE is a list like what this function returns.
fdb82f93 3320 It is used to initialize the state of the parse. Elements number 1, 2, 6
d311d28c 3321 are ignored.
fdb82f93
PJ
3322Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.
3323 If it is symbol `syntax-table', stop after the start of a comment or a
3324 string, or after end of a comment or a string. */)
5842a27b 3325 (Lisp_Object from, Lisp_Object to, Lisp_Object targetdepth, Lisp_Object stopbefore, Lisp_Object oldstate, Lisp_Object commentstop)
8489eb67
RS
3326{
3327 struct lisp_parse_state state;
d311d28c 3328 EMACS_INT target;
8489eb67 3329
265a9e55 3330 if (!NILP (targetdepth))
8489eb67 3331 {
b7826503 3332 CHECK_NUMBER (targetdepth);
8489eb67
RS
3333 target = XINT (targetdepth);
3334 }
3335 else
d311d28c 3336 target = TYPE_MINIMUM (EMACS_INT); /* We won't reach this depth */
8489eb67
RS
3337
3338 validate_region (&from, &to);
6a140a74
RS
3339 scan_sexps_forward (&state, XINT (from), CHAR_TO_BYTE (XINT (from)),
3340 XINT (to),
c81a3712 3341 target, !NILP (stopbefore), oldstate,
7d0393cf 3342 (NILP (commentstop)
195d1361 3343 ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
8489eb67
RS
3344
3345 SET_PT (state.location);
7d0393cf 3346
8489eb67 3347 return Fcons (make_number (state.depth),
c5683ceb
SM
3348 Fcons (state.prevlevelstart < 0
3349 ? Qnil : make_number (state.prevlevelstart),
3350 Fcons (state.thislevelstart < 0
3351 ? Qnil : make_number (state.thislevelstart),
7d0393cf
JB
3352 Fcons (state.instring >= 0
3353 ? (state.instring == ST_STRING_STYLE
195d1361 3354 ? Qt : make_number (state.instring)) : Qnil,
95ff8dfc
RS
3355 Fcons (state.incomment < 0 ? Qt :
3356 (state.incomment == 0 ? Qnil :
3357 make_number (state.incomment)),
8489eb67 3358 Fcons (state.quoted ? Qt : Qnil,
bff37d2a 3359 Fcons (make_number (state.mindepth),
7d0393cf 3360 Fcons ((state.comstyle
bff37d2a 3361 ? (state.comstyle == ST_COMMENT_STYLE
c5683ceb
SM
3362 ? Qsyntax_table
3363 : make_number (state.comstyle))
3364 : Qnil),
0beaf54f
DL
3365 Fcons (((state.incomment
3366 || (state.instring >= 0))
bff37d2a
RS
3367 ? make_number (state.comstr_start)
3368 : Qnil),
1a102a58 3369 Fcons (state.levelstarts, Qnil))))))))));
8489eb67
RS
3370}
3371\f
dfcf069d 3372void
971de7fb 3373init_syntax_once (void)
8489eb67 3374{
78f9a1f7 3375 register int i, c;
8ea151b2 3376 Lisp_Object temp;
8489eb67 3377
5ebaddf5 3378 /* This has to be done here, before we call Fmake_char_table. */
cd3520a4 3379 DEFSYM (Qsyntax_table, "syntax-table");
5ebaddf5 3380
d67b4f80 3381 /* Intern_C_String this now in case it isn't already done.
5ebaddf5
RS
3382 Setting this variable twice is harmless.
3383 But don't staticpro it here--that is done in alloc.c. */
d67b4f80 3384 Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
5ebaddf5 3385
93da5fff 3386 /* Create objects which can be shared among syntax tables. */
42ebfa31 3387 Vsyntax_code_object = Fmake_vector (make_number (Smax), Qnil);
77b37c05 3388 for (i = 0; i < ASIZE (Vsyntax_code_object); i++)
93da5fff
KH
3389 XVECTOR (Vsyntax_code_object)->contents[i]
3390 = Fcons (make_number (i), Qnil);
3391
5ebaddf5
RS
3392 /* Now we are ready to set up this property, so we can
3393 create syntax tables. */
3394 Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
3395
93da5fff 3396 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
8489eb67 3397
5ebaddf5 3398 Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
8489eb67 3399
aa7b08b4
RS
3400 /* Control characters should not be whitespace. */
3401 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
3402 for (i = 0; i <= ' ' - 1; i++)
3403 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3404 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 0177, temp);
3405
3406 /* Except that a few really are whitespace. */
3407 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
3408 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ' ', temp);
3409 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\t', temp);
3410 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\n', temp);
3411 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 015, temp);
3412 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 014, temp);
3413
93da5fff 3414 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
8489eb67 3415 for (i = 'a'; i <= 'z'; i++)
8ea151b2 3416 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
8489eb67 3417 for (i = 'A'; i <= 'Z'; i++)
8ea151b2 3418 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
8489eb67 3419 for (i = '0'; i <= '9'; i++)
8ea151b2
RS
3420 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3421
3422 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
3423 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
3424
3425 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
3426 Fcons (make_number (Sopen), make_number (')')));
3427 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
3428 Fcons (make_number (Sclose), make_number ('(')));
3429 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
3430 Fcons (make_number (Sopen), make_number (']')));
3431 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
3432 Fcons (make_number (Sclose), make_number ('[')));
3433 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
3434 Fcons (make_number (Sopen), make_number ('}')));
3435 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
3436 Fcons (make_number (Sclose), make_number ('{')));
3437 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
3438 Fcons (make_number ((int) Sstring), Qnil));
3439 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
3440 Fcons (make_number ((int) Sescape), Qnil));
3441
93da5fff 3442 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Ssymbol];
8489eb67 3443 for (i = 0; i < 10; i++)
78f9a1f7
KH
3444 {
3445 c = "_-+*/&|<>="[i];
3446 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3447 }
8489eb67 3448
93da5fff 3449 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
8489eb67 3450 for (i = 0; i < 12; i++)
78f9a1f7
KH
3451 {
3452 c = ".,;:?!#@~^'`"[i];
3453 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3454 }
bd25db08
KH
3455
3456 /* All multibyte characters have syntax `word' by default. */
3457 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
5c7b02ab 3458 char_table_set_range (Vstandard_syntax_table, 0x80, MAX_CHAR, temp);
8489eb67
RS
3459}
3460
dfcf069d 3461void
971de7fb 3462syms_of_syntax (void)
8489eb67 3463{
cd3520a4 3464 DEFSYM (Qsyntax_table_p, "syntax-table-p");
8489eb67 3465
93da5fff
KH
3466 staticpro (&Vsyntax_code_object);
3467
a2994c46
KS
3468 staticpro (&gl_state.object);
3469 staticpro (&gl_state.global_code);
3470 staticpro (&gl_state.current_syntax_table);
3471 staticpro (&gl_state.old_prop);
3472
3473 /* Defined in regex.c */
3474 staticpro (&re_match_object);
3475
cd3520a4 3476 DEFSYM (Qscan_error, "scan-error");
7bf5e9e4 3477 Fput (Qscan_error, Qerror_conditions,
d67b4f80 3478 pure_cons (Qscan_error, pure_cons (Qerror, Qnil)));
7bf5e9e4 3479 Fput (Qscan_error, Qerror_message,
d67b4f80 3480 make_pure_c_string ("Scan error"));
7bf5e9e4 3481
29208e82 3482 DEFVAR_BOOL ("parse-sexp-ignore-comments", parse_sexp_ignore_comments,
fdb82f93 3483 doc: /* Non-nil means `forward-sexp', etc., should treat comments as whitespace. */);
8489eb67 3484
29208e82 3485 DEFVAR_BOOL ("parse-sexp-lookup-properties", parse_sexp_lookup_properties,
fdb82f93
PJ
3486 doc: /* Non-nil means `forward-sexp', etc., obey `syntax-table' property.
3487Otherwise, that text property is simply ignored.
3488See the info node `(elisp)Syntax Properties' for a description of the
3489`syntax-table' property. */);
195d1361 3490
8489eb67 3491 words_include_escapes = 0;
29208e82 3492 DEFVAR_BOOL ("words-include-escapes", words_include_escapes,
fdb82f93 3493 doc: /* Non-nil means `forward-word', etc., should treat escape chars part of words. */);
8489eb67 3494
29208e82 3495 DEFVAR_BOOL ("multibyte-syntax-as-symbol", multibyte_syntax_as_symbol,
fdb82f93 3496 doc: /* Non-nil means `scan-sexps' treats all multibyte characters as symbol. */);
bd25db08
KH
3497 multibyte_syntax_as_symbol = 0;
3498
f4ed767f 3499 DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
29208e82 3500 open_paren_in_column_0_is_defun_start,
fb7ada5f 3501 doc: /* Non-nil means an open paren in column 0 denotes the start of a defun. */);
f4ed767f
GM
3502 open_paren_in_column_0_is_defun_start = 1;
3503
8f924df7
KH
3504
3505 DEFVAR_LISP ("find-word-boundary-function-table",
29208e82 3506 Vfind_word_boundary_function_table,
869bb237 3507 doc: /*
8f924df7 3508Char table of functions to search for the word boundary.
869bb237
KH
3509Each function is called with two arguments; POS and LIMIT.
3510POS and LIMIT are character positions in the current buffer.
3511
3512If POS is less than LIMIT, POS is at the first character of a word,
3513and the return value of a function is a position after the last
3514character of that word.
3515
3516If POS is not less than LIMIT, POS is at the last character of a word,
3517and the return value of a function is a position at the first
3518character of that word.
3519
3520In both cases, LIMIT bounds the search. */);
8f924df7 3521 Vfind_word_boundary_function_table = Fmake_char_table (Qnil, Qnil);
869bb237 3522
8489eb67
RS
3523 defsubr (&Ssyntax_table_p);
3524 defsubr (&Ssyntax_table);
3525 defsubr (&Sstandard_syntax_table);
3526 defsubr (&Scopy_syntax_table);
3527 defsubr (&Sset_syntax_table);
3528 defsubr (&Schar_syntax);
beefa22e 3529 defsubr (&Smatching_paren);
c65adb44 3530 defsubr (&Sstring_to_syntax);
8489eb67 3531 defsubr (&Smodify_syntax_entry);
62abe9cb 3532 defsubr (&Sinternal_describe_syntax_value);
8489eb67
RS
3533
3534 defsubr (&Sforward_word);
3535
195d1361
RS
3536 defsubr (&Sskip_chars_forward);
3537 defsubr (&Sskip_chars_backward);
3538 defsubr (&Sskip_syntax_forward);
3539 defsubr (&Sskip_syntax_backward);
3540
b3cfe0c8 3541 defsubr (&Sforward_comment);
8489eb67
RS
3542 defsubr (&Sscan_lists);
3543 defsubr (&Sscan_sexps);
3544 defsubr (&Sbackward_prefix_chars);
3545 defsubr (&Sparse_partial_sexp);
3546}