A simpler, centralized INLINE.
[bpt/emacs.git] / src / syntax.c
CommitLineData
8489eb67 1/* GNU Emacs routines to deal with syntax tables; also word and list parsing.
ab422c4d
PE
2 Copyright (C) 1985, 1987, 1993-1995, 1997-1999, 2001-2013 Free
3 Software Foundation, Inc.
8489eb67
RS
4
5This file is part of GNU Emacs.
6
9ec0b715 7GNU Emacs is free software: you can redistribute it and/or modify
8489eb67 8it under the terms of the GNU General Public License as published by
9ec0b715
GM
9the Free Software Foundation, either version 3 of the License, or
10(at your option) any later version.
8489eb67
RS
11
12GNU Emacs is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
9ec0b715 18along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
8489eb67
RS
19
20
18160b98 21#include <config.h>
1b2de274 22
1b2de274 23#include <sys/types.h>
0328b6de 24
8489eb67
RS
25#include "lisp.h"
26#include "commands.h"
5c7b02ab 27#include "character.h"
e5560ff7 28#include "buffer.h"
e35f6ff7 29#include "keymap.h"
a1bc88d4 30#include "regex.h"
195d1361
RS
31
32/* Make syntax table lookup grant data in gl_state. */
33#define SYNTAX_ENTRY_VIA_PROPERTY
34
8489eb67 35#include "syntax.h"
195d1361 36#include "intervals.h"
c5683ceb
SM
37#include "category.h"
38
1fc71008 39/* Eight single-bit flags have the following meanings:
c5683ceb
SM
40 1. This character is the first of a two-character comment-start sequence.
41 2. This character is the second of a two-character comment-start sequence.
42 3. This character is the first of a two-character comment-end sequence.
43 4. This character is the second of a two-character comment-end sequence.
44 5. This character is a prefix, for backward-prefix-chars.
45 6. The char is part of a delimiter for comments of style "b".
46 7. This character is part of a nestable comment sequence.
47 8. The char is part of a delimiter for comments of style "c".
48 Note that any two-character sequence whose first character has flag 1
49 and whose second character has flag 2 will be interpreted as a comment start.
50
1fc71008 51 Bits 6 and 8 discriminate among different comment styles.
c5683ceb 52 Languages such as C++ allow two orthogonal syntax start/end pairs
1fc71008 53 and bit 6 determines whether a comment-end or Scommentend
c5683ceb
SM
54 ends style a or b. Comment markers can start style a, b, c, or bc.
55 Style a is always the default.
1fc71008 56 For 2-char comment markers, the style b flag is looked up only on the second
c5683ceb 57 char of the comment marker and on the first char of the comment ender.
1fc71008
PE
58 For style c (like the nested flag), the flag can be placed on any of
59 the chars. */
c5683ceb 60
45b683a1 61/* These functions extract specific flags from an integer
c5683ceb
SM
62 that holds the syntax code and the flags. */
63
45b683a1
PE
64static bool
65SYNTAX_FLAGS_COMSTART_FIRST (int flags)
66{
67 return (flags >> 16) & 1;
68}
69static bool
70SYNTAX_FLAGS_COMSTART_SECOND (int flags)
71{
72 return (flags >> 17) & 1;
73}
74static bool
75SYNTAX_FLAGS_COMEND_FIRST (int flags)
76{
77 return (flags >> 18) & 1;
78}
79static bool
80SYNTAX_FLAGS_COMEND_SECOND (int flags)
81{
82 return (flags >> 19) & 1;
83}
84static bool
85SYNTAX_FLAGS_PREFIX (int flags)
86{
87 return (flags >> 20) & 1;
88}
89static bool
90SYNTAX_FLAGS_COMMENT_STYLEB (int flags)
91{
92 return (flags >> 21) & 1;
93}
94static bool
95SYNTAX_FLAGS_COMMENT_STYLEC (int flags)
96{
97 return (flags >> 23) & 1;
98}
99static int
100SYNTAX_FLAGS_COMMENT_STYLEC2 (int flags)
101{
102 return (flags >> 22) & 2; /* SYNTAX_FLAGS_COMMENT_STYLEC (flags) * 2 */
103}
104static bool
105SYNTAX_FLAGS_COMMENT_NESTED (int flags)
106{
107 return (flags >> 22) & 1;
108}
c5683ceb 109
c5683ceb
SM
110/* FLAGS should be the flags of the main char of the comment marker, e.g.
111 the second for comstart and the first for comend. */
45b683a1
PE
112static int
113SYNTAX_FLAGS_COMMENT_STYLE (int flags, int other_flags)
114{
115 return (SYNTAX_FLAGS_COMMENT_STYLEB (flags)
116 | SYNTAX_FLAGS_COMMENT_STYLEC2 (flags)
117 | SYNTAX_FLAGS_COMMENT_STYLEC2 (other_flags));
118}
c5683ceb 119
45b683a1 120/* Extract a particular flag for a given character. */
c5683ceb 121
45b683a1
PE
122static bool
123SYNTAX_COMEND_FIRST (int c)
124{
125 return SYNTAX_FLAGS_COMEND_FIRST (SYNTAX_WITH_FLAGS (c));
126}
195d1361
RS
127
128/* We use these constants in place for comment-style and
45b683a1 129 string-ender-char to distinguish comments/strings started by
195d1361
RS
130 comment_fence and string_fence codes. */
131
45b683a1
PE
132enum
133 {
134 ST_COMMENT_STYLE = 256 + 1,
135 ST_STRING_STYLE = 256 + 2
136 };
8489eb67 137
955cbe7b
PE
138static Lisp_Object Qsyntax_table_p;
139static Lisp_Object Qsyntax_table, Qscan_error;
8489eb67 140
e5d4f4dc
RS
141/* This is the internal form of the parse state used in parse-partial-sexp. */
142
143struct lisp_parse_state
144 {
d311d28c 145 EMACS_INT depth; /* Depth at end of parsing. */
6449674e 146 int instring; /* -1 if not within string, else desired terminator. */
d311d28c 147 EMACS_INT incomment; /* -1 if in unnestable comment else comment nesting */
6449674e 148 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
1fc71008 149 bool quoted; /* True if just after an escape char at end of parsing. */
d311d28c 150 EMACS_INT mindepth; /* Minimum depth seen while scanning. */
6449674e 151 /* Char number of most recent start-of-expression at current level */
d311d28c 152 ptrdiff_t thislevelstart;
6449674e 153 /* Char number of start of containing expression */
d311d28c
PE
154 ptrdiff_t prevlevelstart;
155 ptrdiff_t location; /* Char number at which parsing stopped. */
8ca30920 156 ptrdiff_t location_byte; /* Corresponding byte position. */
d311d28c 157 ptrdiff_t comstr_start; /* Position of last comment/string starter. */
6449674e
SM
158 Lisp_Object levelstarts; /* Char numbers of starts-of-expression
159 of levels (starting from outermost). */
e5d4f4dc
RS
160 };
161\f
37bef230
RS
162/* These variables are a cache for finding the start of a defun.
163 find_start_pos is the place for which the defun start was found.
164 find_start_value is the defun start position found for it.
6a140a74 165 find_start_value_byte is the corresponding byte position.
37bef230
RS
166 find_start_buffer is the buffer it was found in.
167 find_start_begv is the BEGV value when it was found.
168 find_start_modiff is the value of MODIFF when it was found. */
169
d311d28c
PE
170static ptrdiff_t find_start_pos;
171static ptrdiff_t find_start_value;
172static ptrdiff_t find_start_value_byte;
37bef230 173static struct buffer *find_start_buffer;
d311d28c
PE
174static ptrdiff_t find_start_begv;
175static EMACS_INT find_start_modiff;
6a140a74
RS
176
177
1fc71008
PE
178static Lisp_Object skip_chars (bool, Lisp_Object, Lisp_Object, bool);
179static Lisp_Object skip_syntaxes (bool, Lisp_Object, Lisp_Object);
180static Lisp_Object scan_lists (EMACS_INT, EMACS_INT, EMACS_INT, bool);
f57e2426 181static void scan_sexps_forward (struct lisp_parse_state *,
d311d28c 182 ptrdiff_t, ptrdiff_t, ptrdiff_t, EMACS_INT,
1fc71008
PE
183 bool, Lisp_Object, int);
184static bool in_classes (int, Lisp_Object);
39eb03f1
PE
185
186/* This setter is used only in this file, so it can be private. */
b0ab8123 187static void
39eb03f1
PE
188bset_syntax_table (struct buffer *b, Lisp_Object val)
189{
190 b->INTERNAL_FIELD (syntax_table) = val;
191}
195d1361 192\f
c5683ceb 193/* Whether the syntax of the character C has the prefix flag set. */
1fc71008
PE
194bool
195syntax_prefix_flag_p (int c)
c5683ceb 196{
45b683a1 197 return SYNTAX_FLAGS_PREFIX (SYNTAX_WITH_FLAGS (c));
c5683ceb 198}
195d1361
RS
199
200struct gl_state_s gl_state; /* Global state of syntax parser. */
201
45b683a1 202enum { INTERVALS_AT_ONCE = 10 }; /* 1 + max-number of intervals
195d1361
RS
203 to scan to property-change. */
204
45b683a1
PE
205/* Set the syntax entry VAL for char C in table TABLE. */
206
207static void
208SET_RAW_SYNTAX_ENTRY (Lisp_Object table, int c, Lisp_Object val)
209{
210 CHAR_TABLE_SET (table, c, val);
211}
212
213/* Set the syntax entry VAL for char-range RANGE in table TABLE.
214 RANGE is a cons (FROM . TO) specifying the range of characters. */
215
216static void
217SET_RAW_SYNTAX_ENTRY_RANGE (Lisp_Object table, Lisp_Object range,
218 Lisp_Object val)
219{
220 Fset_char_table_range (table, range, val);
221}
222
223/* Extract the information from the entry for character C
224 in the current syntax table. */
225
226static Lisp_Object
227SYNTAX_MATCH (int c)
228{
229 Lisp_Object ent = SYNTAX_ENTRY (c);
230 return CONSP (ent) ? XCDR (ent) : Qnil;
231}
232
233/* This should be called with FROM at the start of forward
234 search, or after the last position of the backward search. It
235 makes sure that the first char is picked up with correct table, so
236 one does not need to call UPDATE_SYNTAX_TABLE immediately after the
237 call.
238 Sign of COUNT gives the direction of the search.
239 */
240
241static void
242SETUP_SYNTAX_TABLE (ptrdiff_t from, ptrdiff_t count)
243{
244 SETUP_BUFFER_SYNTAX_TABLE ();
245 gl_state.b_property = BEGV;
246 gl_state.e_property = ZV + 1;
247 gl_state.object = Qnil;
248 gl_state.offset = 0;
249 if (parse_sexp_lookup_properties)
250 if (count > 0 || from > BEGV)
251 update_syntax_table (count > 0 ? from : from - 1, count, 1, Qnil);
252}
253
254/* Same as above, but in OBJECT. If OBJECT is nil, use current buffer.
255 If it is t (which is only used in fast_c_string_match_ignore_case),
256 ignore properties altogether.
257
258 This is meant for regex.c to use. For buffers, regex.c passes arguments
259 to the UPDATE_SYNTAX_TABLE functions which are relative to BEGV.
260 So if it is a buffer, we set the offset field to BEGV. */
261
262void
263SETUP_SYNTAX_TABLE_FOR_OBJECT (Lisp_Object object,
264 ptrdiff_t from, ptrdiff_t count)
265{
266 SETUP_BUFFER_SYNTAX_TABLE ();
267 gl_state.object = object;
268 if (BUFFERP (gl_state.object))
269 {
270 struct buffer *buf = XBUFFER (gl_state.object);
271 gl_state.b_property = 1;
272 gl_state.e_property = BUF_ZV (buf) - BUF_BEGV (buf) + 1;
273 gl_state.offset = BUF_BEGV (buf) - 1;
274 }
275 else if (NILP (gl_state.object))
276 {
277 gl_state.b_property = 1;
278 gl_state.e_property = ZV - BEGV + 1;
279 gl_state.offset = BEGV - 1;
280 }
281 else if (EQ (gl_state.object, Qt))
282 {
283 gl_state.b_property = 0;
284 gl_state.e_property = PTRDIFF_MAX;
285 gl_state.offset = 0;
286 }
287 else
288 {
289 gl_state.b_property = 0;
290 gl_state.e_property = 1 + SCHARS (gl_state.object);
291 gl_state.offset = 0;
292 }
293 if (parse_sexp_lookup_properties)
294 update_syntax_table (from + gl_state.offset - (count <= 0),
295 count, 1, gl_state.object);
296}
297
6a140a74
RS
298/* Update gl_state to an appropriate interval which contains CHARPOS. The
299 sign of COUNT give the relative position of CHARPOS wrt the previously
195d1361 300 valid interval. If INIT, only [be]_property fields of gl_state are
6a140a74 301 valid at start, the rest is filled basing on OBJECT.
195d1361 302
6a140a74 303 `gl_state.*_i' are the intervals, and CHARPOS is further in the search
195d1361
RS
304 direction than the intervals - or in an interval. We update the
305 current syntax-table basing on the property of this interval, and
6a140a74 306 update the interval to start further than CHARPOS - or be
77c7bcb1 307 NULL. We also update lim_property to be the next value of
6a140a74 308 charpos to call this subroutine again - or be before/after the
195d1361
RS
309 start/end of OBJECT. */
310
311void
1fc71008 312update_syntax_table (ptrdiff_t charpos, EMACS_INT count, bool init,
4f3a2f8d 313 Lisp_Object object)
195d1361
RS
314{
315 Lisp_Object tmp_table;
1fc71008
PE
316 int cnt = 0;
317 bool invalidate = 1;
5a774522 318 INTERVAL i;
195d1361
RS
319
320 if (init)
321 {
bb0de084 322 gl_state.old_prop = Qnil;
195d1361
RS
323 gl_state.start = gl_state.b_property;
324 gl_state.stop = gl_state.e_property;
bb0de084
SM
325 i = interval_of (charpos, object);
326 gl_state.backward_i = gl_state.forward_i = i;
195d1361 327 invalidate = 0;
77c7bcb1 328 if (!i)
195d1361 329 return;
f902a008 330 /* interval_of updates only ->position of the return value, so
d80f4cc7 331 update the parents manually to speed up update_interval. */
bb0de084 332 while (!NULL_PARENT (i))
d80f4cc7
RS
333 {
334 if (AM_RIGHT_CHILD (i))
439d5cb4 335 INTERVAL_PARENT (i)->position = i->position
d80f4cc7 336 - LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */
439d5cb4
KR
337 - TOTAL_LENGTH (INTERVAL_PARENT (i))
338 + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i));
d80f4cc7 339 else
439d5cb4 340 INTERVAL_PARENT (i)->position = i->position - LEFT_TOTAL_LENGTH (i)
d80f4cc7 341 + TOTAL_LENGTH (i);
439d5cb4 342 i = INTERVAL_PARENT (i);
d80f4cc7
RS
343 }
344 i = gl_state.forward_i;
bb0de084 345 gl_state.b_property = i->position - gl_state.offset;
ee0cdb48 346 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361
RS
347 goto update;
348 }
5a774522 349 i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
195d1361 350
423e705d 351 /* We are guaranteed to be called with CHARPOS either in i,
6a140a74 352 or further off. */
77c7bcb1 353 if (!i)
195d1361 354 error ("Error in syntax_table logic for to-the-end intervals");
6a140a74 355 else if (charpos < i->position) /* Move left. */
195d1361
RS
356 {
357 if (count > 0)
f902a008 358 error ("Error in syntax_table logic for intervals <-");
195d1361 359 /* Update the interval. */
6a140a74 360 i = update_interval (i, charpos);
bb0de084 361 if (INTERVAL_LAST_POS (i) != gl_state.b_property)
195d1361
RS
362 {
363 invalidate = 0;
195d1361 364 gl_state.forward_i = i;
ee0cdb48 365 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361 366 }
423e705d 367 }
6a140a74 368 else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right. */
195d1361
RS
369 {
370 if (count < 0)
f902a008 371 error ("Error in syntax_table logic for intervals ->");
195d1361 372 /* Update the interval. */
6a140a74 373 i = update_interval (i, charpos);
bb0de084 374 if (i->position != gl_state.e_property)
195d1361
RS
375 {
376 invalidate = 0;
195d1361 377 gl_state.backward_i = i;
bb0de084 378 gl_state.b_property = i->position - gl_state.offset;
195d1361
RS
379 }
380 }
195d1361
RS
381
382 update:
383 tmp_table = textget (i->plist, Qsyntax_table);
384
385 if (invalidate)
386 invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
7d0393cf 387
423e705d
SM
388 if (invalidate) /* Did not get to adjacent interval. */
389 { /* with the same table => */
390 /* invalidate the old range. */
195d1361
RS
391 if (count > 0)
392 {
393 gl_state.backward_i = i;
bb0de084
SM
394 gl_state.b_property = i->position - gl_state.offset;
395 }
396 else
195d1361 397 {
bb0de084 398 gl_state.forward_i = i;
ee0cdb48 399 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361
RS
400 }
401 }
37bef230 402
bb0de084 403 if (!EQ (tmp_table, gl_state.old_prop))
195d1361 404 {
bb0de084
SM
405 gl_state.current_syntax_table = tmp_table;
406 gl_state.old_prop = tmp_table;
407 if (EQ (Fsyntax_table_p (tmp_table), Qt))
408 {
409 gl_state.use_global = 0;
7d0393cf 410 }
bb0de084
SM
411 else if (CONSP (tmp_table))
412 {
413 gl_state.use_global = 1;
414 gl_state.global_code = tmp_table;
415 }
7d0393cf 416 else
bb0de084
SM
417 {
418 gl_state.use_global = 0;
4b4deea2 419 gl_state.current_syntax_table = BVAR (current_buffer, syntax_table);
bb0de084 420 }
195d1361
RS
421 }
422
77c7bcb1 423 while (i)
195d1361
RS
424 {
425 if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
426 {
427 if (count > 0)
bb0de084
SM
428 {
429 gl_state.e_property = i->position - gl_state.offset;
430 gl_state.forward_i = i;
431 }
432 else
433 {
5a774522
SM
434 gl_state.b_property
435 = i->position + LENGTH (i) - gl_state.offset;
bb0de084
SM
436 gl_state.backward_i = i;
437 }
438 return;
195d1361 439 }
7d0393cf 440 else if (cnt == INTERVALS_AT_ONCE)
195d1361
RS
441 {
442 if (count > 0)
bb0de084 443 {
5a774522
SM
444 gl_state.e_property
445 = i->position + LENGTH (i) - gl_state.offset
446 /* e_property at EOB is not set to ZV but to ZV+1, so that
447 we can do INC(from);UPDATE_SYNTAX_TABLE_FORWARD without
448 having to check eob between the two. */
77c7bcb1 449 + (next_interval (i) ? 0 : 1);
bb0de084
SM
450 gl_state.forward_i = i;
451 }
452 else
453 {
454 gl_state.b_property = i->position - gl_state.offset;
455 gl_state.backward_i = i;
456 }
457 return;
195d1361
RS
458 }
459 cnt++;
460 i = count > 0 ? next_interval (i) : previous_interval (i);
461 }
77c7bcb1 462 eassert (i == NULL); /* This property goes to the end. */
bb0de084
SM
463 if (count > 0)
464 gl_state.e_property = gl_state.stop;
465 else
466 gl_state.b_property = gl_state.start;
195d1361
RS
467}
468\f
1fc71008 469/* Returns true if char at CHARPOS is quoted.
6a140a74
RS
470 Global syntax-table data should be set up already to be good at CHARPOS
471 or after. On return global syntax data is good for lookup at CHARPOS. */
195d1361 472
1fc71008 473static bool
d311d28c 474char_quoted (ptrdiff_t charpos, ptrdiff_t bytepos)
195d1361 475{
1fc71008
PE
476 enum syntaxcode code;
477 ptrdiff_t beg = BEGV;
478 bool quoted = 0;
d311d28c 479 ptrdiff_t orig = charpos;
6a140a74 480
02d8b017 481 while (charpos > beg)
195d1361 482 {
ab229fdd 483 int c;
02d8b017 484 DEC_BOTH (charpos, bytepos);
ab229fdd 485
6a140a74 486 UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
327719ee 487 c = FETCH_CHAR_AS_MULTIBYTE (bytepos);
ab229fdd 488 code = SYNTAX (c);
6a140a74
RS
489 if (! (code == Scharquote || code == Sescape))
490 break;
491
6a140a74 492 quoted = !quoted;
195d1361 493 }
6a140a74
RS
494
495 UPDATE_SYNTAX_TABLE (orig);
195d1361
RS
496 return quoted;
497}
6a140a74 498
6a140a74
RS
499/* Return the bytepos one character before BYTEPOS.
500 We assume that BYTEPOS is not at the start of the buffer. */
501
b0ab8123 502static ptrdiff_t
d311d28c 503dec_bytepos (ptrdiff_t bytepos)
6a140a74 504{
4b4deea2 505 if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
ef316cf0
RS
506 return bytepos - 1;
507
6a140a74
RS
508 DEC_POS (bytepos);
509 return bytepos;
510}
195d1361 511\f
2b34df4e 512/* Return a defun-start position before POS and not too far before.
7cc80f0a
RS
513 It should be the last one before POS, or nearly the last.
514
515 When open_paren_in_column_0_is_defun_start is nonzero,
1fd1cc2f 516 only the beginning of the buffer is treated as a defun-start.
7cc80f0a
RS
517
518 We record the information about where the scan started
519 and what its result was, so that another call in the same area
520 can return the same value very quickly.
195d1361
RS
521
522 There is no promise at which position the global syntax data is
523 valid on return from the subroutine, so the caller should explicitly
524 update the global data. */
37bef230 525
d311d28c
PE
526static ptrdiff_t
527find_defun_start (ptrdiff_t pos, ptrdiff_t pos_byte)
37bef230 528{
d311d28c 529 ptrdiff_t opoint = PT, opoint_byte = PT_BYTE;
37bef230 530
1fd1cc2f
RS
531 if (!open_paren_in_column_0_is_defun_start)
532 {
6e20a0d4 533 find_start_value = BEGV;
1fd1cc2f 534 find_start_value_byte = BEGV_BYTE;
6e20a0d4
CY
535 find_start_buffer = current_buffer;
536 find_start_modiff = MODIFF;
537 find_start_begv = BEGV;
538 find_start_pos = pos;
1fd1cc2f
RS
539 return BEGV;
540 }
541
37bef230
RS
542 /* Use previous finding, if it's valid and applies to this inquiry. */
543 if (current_buffer == find_start_buffer
544 /* Reuse the defun-start even if POS is a little farther on.
545 POS might be in the next defun, but that's ok.
546 Our value may not be the best possible, but will still be usable. */
547 && pos <= find_start_pos + 1000
548 && pos >= find_start_value
549 && BEGV == find_start_begv
550 && MODIFF == find_start_modiff)
551 return find_start_value;
552
553 /* Back up to start of line. */
6a140a74 554 scan_newline (pos, pos_byte, BEGV, BEGV_BYTE, -1, 1);
37bef230 555
195d1361
RS
556 /* We optimize syntax-table lookup for rare updates. Thus we accept
557 only those `^\s(' which are good in global _and_ text-property
558 syntax-tables. */
d48cd3f4 559 SETUP_BUFFER_SYNTAX_TABLE ();
1fd1cc2f 560 while (PT > BEGV)
37bef230 561 {
ab229fdd
AS
562 int c;
563
1fd1cc2f
RS
564 /* Open-paren at start of line means we may have found our
565 defun-start. */
327719ee 566 c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
ab229fdd 567 if (SYNTAX (c) == Sopen)
195d1361 568 {
1fd1cc2f 569 SETUP_SYNTAX_TABLE (PT + 1, -1); /* Try again... */
327719ee 570 c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
ab229fdd 571 if (SYNTAX (c) == Sopen)
1fd1cc2f
RS
572 break;
573 /* Now fallback to the default value. */
d48cd3f4 574 SETUP_BUFFER_SYNTAX_TABLE ();
195d1361 575 }
1fd1cc2f
RS
576 /* Move to beg of previous line. */
577 scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
37bef230
RS
578 }
579
580 /* Record what we found, for the next try. */
6a140a74
RS
581 find_start_value = PT;
582 find_start_value_byte = PT_BYTE;
37bef230
RS
583 find_start_buffer = current_buffer;
584 find_start_modiff = MODIFF;
585 find_start_begv = BEGV;
586 find_start_pos = pos;
587
6a140a74
RS
588 TEMP_SET_PT_BOTH (opoint, opoint_byte);
589
37bef230
RS
590 return find_start_value;
591}
592\f
f902a008
RS
593/* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE. */
594
1fc71008 595static bool
d311d28c 596prev_char_comend_first (ptrdiff_t pos, ptrdiff_t pos_byte)
f902a008 597{
1fc71008
PE
598 int c;
599 bool val;
f902a008
RS
600
601 DEC_BOTH (pos, pos_byte);
602 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
603 c = FETCH_CHAR (pos_byte);
604 val = SYNTAX_COMEND_FIRST (c);
605 UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
606 return val;
607}
608
1fc71008 609/* Check whether charpos FROM is at the end of a comment.
6a140a74
RS
610 FROM_BYTE is the bytepos corresponding to FROM.
611 Do not move back before STOP.
612
1fc71008 613 Return true if we find a comment ending at FROM/FROM_BYTE.
6a140a74
RS
614
615 If successful, store the charpos of the comment's beginning
616 into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
527a32d9
KH
617
618 Global syntax data remains valid for backward search starting at
619 the returned value (or at FROM, if the search was not successful). */
195d1361 620
1fc71008
PE
621static bool
622back_comment (ptrdiff_t from, ptrdiff_t from_byte, ptrdiff_t stop,
623 bool comnested, int comstyle, ptrdiff_t *charpos_ptr,
624 ptrdiff_t *bytepos_ptr)
195d1361
RS
625{
626 /* Look back, counting the parity of string-quotes,
627 and recording the comment-starters seen.
628 When we reach a safe place, assume that's not in a string;
629 then step the main scan to the earliest comment-starter seen
630 an even number of string quotes away from the safe place.
631
632 OFROM[I] is position of the earliest comment-starter seen
633 which is I+2X quotes from the comment-end.
634 PARITY is current parity of quotes from the comment end. */
3ee5041c 635 int string_style = -1; /* Presumed outside of any string. */
1fc71008 636 bool string_lossage = 0;
f7c436c1 637 /* Not a real lossage: indicates that we have passed a matching comment
7d0393cf 638 starter plus a non-matching comment-ender, meaning that any matching
f7c436c1
SM
639 comment-starter we might see later could be a false positive (hidden
640 inside another comment).
641 Test case: { a (* b } c (* d *) */
1fc71008 642 bool comment_lossage = 0;
d311d28c
PE
643 ptrdiff_t comment_end = from;
644 ptrdiff_t comment_end_byte = from_byte;
645 ptrdiff_t comstart_pos = 0;
646 ptrdiff_t comstart_byte IF_LINT (= 0);
eb35b628
RS
647 /* Place where the containing defun starts,
648 or 0 if we didn't come across it yet. */
d311d28c
PE
649 ptrdiff_t defun_start = 0;
650 ptrdiff_t defun_start_byte = 0;
1fc71008
PE
651 enum syntaxcode code;
652 ptrdiff_t nesting = 1; /* current comment nesting */
ea315ed6 653 int c;
3f679f55
SM
654 int syntax = 0;
655
656 /* FIXME: A }} comment-ender style leads to incorrect behavior
657 in the case of {{ c }}} because we ignore the last two chars which are
658 assumed to be comment-enders although they aren't. */
195d1361
RS
659
660 /* At beginning of range to scan, we're outside of strings;
661 that determines quote parity to the comment-end. */
662 while (from != stop)
663 {
d311d28c 664 ptrdiff_t temp_byte;
1fc71008
PE
665 int prev_syntax;
666 bool com2start, com2end, comstart;
6a140a74 667
195d1361 668 /* Move back and examine a character. */
6a140a74 669 DEC_BOTH (from, from_byte);
195d1361
RS
670 UPDATE_SYNTAX_TABLE_BACKWARD (from);
671
3f679f55 672 prev_syntax = syntax;
b7dbcc19 673 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3f679f55 674 syntax = SYNTAX_WITH_FLAGS (c);
195d1361
RS
675 code = SYNTAX (c);
676
3f679f55
SM
677 /* Check for 2-char comment markers. */
678 com2start = (SYNTAX_FLAGS_COMSTART_FIRST (syntax)
679 && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax)
c5683ceb
SM
680 && (comstyle
681 == SYNTAX_FLAGS_COMMENT_STYLE (prev_syntax, syntax))
3f679f55
SM
682 && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax)
683 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested);
684 com2end = (SYNTAX_FLAGS_COMEND_FIRST (syntax)
685 && SYNTAX_FLAGS_COMEND_SECOND (prev_syntax));
fbb3da77 686 comstart = (com2start || code == Scomment);
01f44d5a 687
3f679f55
SM
688 /* Nasty cases with overlapping 2-char comment markers:
689 - snmp-mode: -- c -- foo -- c --
690 --- c --
691 ------ c --
692 - c-mode: *||*
693 |* *|* *|
694 |*| |* |*|
695 /// */
696
697 /* If a 2-char comment sequence partly overlaps with another,
fbb3da77
SM
698 we don't try to be clever. E.g. |*| in C, or }% in modes that
699 have %..\n and %{..}%. */
700 if (from > stop && (com2end || comstart))
195d1361 701 {
d311d28c 702 ptrdiff_t next = from, next_byte = from_byte;
4f3a2f8d 703 int next_c, next_syntax;
3f679f55
SM
704 DEC_BOTH (next, next_byte);
705 UPDATE_SYNTAX_TABLE_BACKWARD (next);
b7dbcc19 706 next_c = FETCH_CHAR_AS_MULTIBYTE (next_byte);
3f679f55 707 next_syntax = SYNTAX_WITH_FLAGS (next_c);
fbb3da77 708 if (((comstart || comnested)
3f679f55
SM
709 && SYNTAX_FLAGS_COMEND_SECOND (syntax)
710 && SYNTAX_FLAGS_COMEND_FIRST (next_syntax))
711 || ((com2end || comnested)
712 && SYNTAX_FLAGS_COMSTART_SECOND (syntax)
c5683ceb
SM
713 && (comstyle
714 == SYNTAX_FLAGS_COMMENT_STYLE (syntax, prev_syntax))
3f679f55
SM
715 && SYNTAX_FLAGS_COMSTART_FIRST (next_syntax)))
716 goto lossage;
717 /* UPDATE_SYNTAX_TABLE_FORWARD (next + 1); */
f902a008 718 }
3f679f55
SM
719
720 if (com2start && comstart_pos == 0)
721 /* We're looking at a comment starter. But it might be a comment
722 ender as well (see snmp-mode). The first time we see one, we
723 need to consider it as a comment starter,
724 and the subsequent times as a comment ender. */
725 com2end = 0;
726
727 /* Turn a 2-char comment sequences into the appropriate syntax. */
728 if (com2end)
729 code = Sendcomment;
730 else if (com2start)
731 code = Scomment;
732 /* Ignore comment starters of a different style. */
733 else if (code == Scomment
c5683ceb 734 && (comstyle != SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0)
3f679f55 735 || SYNTAX_FLAGS_COMMENT_NESTED (syntax) != comnested))
1aa963c8 736 continue;
195d1361 737
9828a477
KH
738 /* Ignore escaped characters, except comment-enders. */
739 if (code != Sendcomment && char_quoted (from, from_byte))
195d1361
RS
740 continue;
741
3ee5041c 742 switch (code)
195d1361 743 {
3ee5041c
SM
744 case Sstring_fence:
745 case Scomment_fence:
746 c = (code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE);
747 case Sstring:
748 /* Track parity of quotes. */
749 if (string_style == -1)
750 /* Entering a string. */
751 string_style = c;
752 else if (string_style == c)
753 /* Leaving the string. */
754 string_style = -1;
755 else
756 /* If we have two kinds of string delimiters.
757 There's no way to grok this scanning backwards. */
195d1361 758 string_lossage = 1;
3ee5041c 759 break;
7d0393cf 760
3ee5041c
SM
761 case Scomment:
762 /* We've already checked that it is the relevant comstyle. */
f7c436c1 763 if (string_style != -1 || comment_lossage || string_lossage)
3ee5041c
SM
764 /* There are odd string quotes involved, so let's be careful.
765 Test case in Pascal: " { " a { " } */
766 goto lossage;
767
f7c436c1
SM
768 if (!comnested)
769 {
770 /* Record best comment-starter so far. */
771 comstart_pos = from;
772 comstart_byte = from_byte;
773 }
774 else if (--nesting <= 0)
95ff8dfc
RS
775 /* nested comments have to be balanced, so we don't need to
776 keep looking for earlier ones. We use here the same (slightly
777 incorrect) reasoning as below: since it is followed by uniform
778 paired string quotes, this comment-start has to be outside of
779 strings, else the comment-end itself would be inside a string. */
780 goto done;
3ee5041c
SM
781 break;
782
02010917 783 case Sendcomment:
c5683ceb 784 if (SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == comstyle
d070eb22 785 && ((com2end && SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax))
3f679f55 786 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested)
02010917
SM
787 /* This is the same style of comment ender as ours. */
788 {
789 if (comnested)
790 nesting++;
791 else
792 /* Anything before that can't count because it would match
793 this comment-ender rather than ours. */
794 from = stop; /* Break out of the loop. */
795 }
f7c436c1
SM
796 else if (comstart_pos != 0 || c != '\n')
797 /* We're mixing comment styles here, so we'd better be careful.
798 The (comstart_pos != 0 || c != '\n') check is not quite correct
799 (we should just always set comment_lossage), but removing it
800 would imply that any multiline comment in C would go through
801 lossage, which seems overkill.
802 The failure should only happen in the rare cases such as
803 { (* } *) */
804 comment_lossage = 1;
02010917 805 break;
195d1361 806
02010917
SM
807 case Sopen:
808 /* Assume a defun-start point is outside of strings. */
809 if (open_paren_in_column_0_is_defun_start
810 && (from == stop
811 || (temp_byte = dec_bytepos (from_byte),
812 FETCH_CHAR (temp_byte) == '\n')))
813 {
814 defun_start = from;
815 defun_start_byte = from_byte;
816 from = stop; /* Break out of the loop. */
817 }
eb35b628 818 break;
02010917
SM
819
820 default:
f7c436c1 821 break;
eb35b628 822 }
195d1361
RS
823 }
824
825 if (comstart_pos == 0)
826 {
827 from = comment_end;
6a140a74 828 from_byte = comment_end_byte;
195d1361
RS
829 UPDATE_SYNTAX_TABLE_FORWARD (comment_end - 1);
830 }
f7c436c1
SM
831 /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
832 or `done'), then we've found the beginning of the non-nested comment. */
833 else if (1) /* !comnested */
195d1361
RS
834 {
835 from = comstart_pos;
6a140a74 836 from_byte = comstart_byte;
789f3320 837 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
195d1361
RS
838 }
839 else
840 {
3ee5041c
SM
841 struct lisp_parse_state state;
842 lossage:
195d1361
RS
843 /* We had two kinds of string delimiters mixed up
844 together. Decode this going forwards.
02010917 845 Scan fwd from a known safe place (beginning-of-defun)
195d1361
RS
846 to the one in question; this records where we
847 last passed a comment starter. */
eb35b628
RS
848 /* If we did not already find the defun start, find it now. */
849 if (defun_start == 0)
850 {
851 defun_start = find_defun_start (comment_end, comment_end_byte);
852 defun_start_byte = find_start_value_byte;
853 }
02010917 854 do
195d1361 855 {
02010917
SM
856 scan_sexps_forward (&state,
857 defun_start, defun_start_byte,
d311d28c
PE
858 comment_end, TYPE_MINIMUM (EMACS_INT),
859 0, Qnil, 0);
02010917
SM
860 defun_start = comment_end;
861 if (state.incomment == (comnested ? 1 : -1)
862 && state.comstyle == comstyle)
863 from = state.comstr_start;
864 else
865 {
866 from = comment_end;
867 if (state.incomment)
868 /* If comment_end is inside some other comment, maybe ours
869 is nested, so we need to try again from within the
870 surrounding comment. Example: { a (* " *) */
871 {
872 /* FIXME: We should advance by one or two chars. */
873 defun_start = state.comstr_start + 2;
874 defun_start_byte = CHAR_TO_BYTE (defun_start);
875 }
876 }
877 } while (defun_start < comment_end);
878
6a140a74 879 from_byte = CHAR_TO_BYTE (from);
195d1361
RS
880 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
881 }
7d0393cf 882
95ff8dfc 883 done:
6a140a74
RS
884 *charpos_ptr = from;
885 *bytepos_ptr = from_byte;
886
1fc71008 887 return from != comment_end;
195d1361
RS
888}
889\f
8489eb67 890DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
fdb82f93
PJ
891 doc: /* Return t if OBJECT is a syntax table.
892Currently, any char-table counts as a syntax table. */)
5842a27b 893 (Lisp_Object object)
8489eb67 894{
2203e1e8 895 if (CHAR_TABLE_P (object)
e704cb4b 896 && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
8489eb67
RS
897 return Qt;
898 return Qnil;
899}
900
8ea151b2 901static void
971de7fb 902check_syntax_table (Lisp_Object obj)
8489eb67 903{
47f5f6ae
KS
904 CHECK_TYPE (CHAR_TABLE_P (obj) && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table),
905 Qsyntax_table_p, obj);
7d0393cf 906}
8489eb67 907
8489eb67 908DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
fdb82f93
PJ
909 doc: /* Return the current syntax table.
910This is the one specified by the current buffer. */)
5842a27b 911 (void)
8489eb67 912{
4b4deea2 913 return BVAR (current_buffer, syntax_table);
8489eb67
RS
914}
915
916DEFUN ("standard-syntax-table", Fstandard_syntax_table,
917 Sstandard_syntax_table, 0, 0, 0,
fdb82f93
PJ
918 doc: /* Return the standard syntax table.
919This is the one used for new buffers. */)
5842a27b 920 (void)
8489eb67
RS
921{
922 return Vstandard_syntax_table;
923}
924
925DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
fdb82f93
PJ
926 doc: /* Construct a new syntax table and return it.
927It is a copy of the TABLE, which defaults to the standard syntax table. */)
5842a27b 928 (Lisp_Object table)
8489eb67 929{
8ea151b2
RS
930 Lisp_Object copy;
931
265a9e55 932 if (!NILP (table))
8ea151b2
RS
933 check_syntax_table (table);
934 else
935 table = Vstandard_syntax_table;
936
937 copy = Fcopy_sequence (table);
0f867324
RS
938
939 /* Only the standard syntax table should have a default element.
940 Other syntax tables should inherit from parents instead. */
742af32f 941 set_char_table_defalt (copy, Qnil);
0f867324
RS
942
943 /* Copied syntax tables should all have parents.
944 If we copied one with no parent, such as the standard syntax table,
945 use the standard syntax table as the copy's parent. */
946 if (NILP (XCHAR_TABLE (copy)->parent))
947 Fset_char_table_parent (copy, Vstandard_syntax_table);
8ea151b2 948 return copy;
8489eb67
RS
949}
950
951DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
fdb82f93
PJ
952 doc: /* Select a new syntax table for the current buffer.
953One argument, a syntax table. */)
5842a27b 954 (Lisp_Object table)
8489eb67 955{
4e4dfa78 956 int idx;
8ea151b2 957 check_syntax_table (table);
39eb03f1 958 bset_syntax_table (current_buffer, table);
8489eb67 959 /* Indicate that this buffer now has a specified syntax table. */
f6cd0527
GM
960 idx = PER_BUFFER_VAR_IDX (syntax_table);
961 SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
8489eb67
RS
962 return table;
963}
964\f
965/* Convert a letter which signifies a syntax code
966 into the code it signifies.
195d1361 967 This is used by modify-syntax-entry, and other things. */
8489eb67 968
1fc71008 969unsigned char const syntax_spec_code[0400] =
8489eb67
RS
970 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
971 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
972 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
973 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1fc71008
PE
974 Swhitespace, Scomment_fence, Sstring, 0377, Smath, 0377, 0377, Squote,
975 Sopen, Sclose, 0377, 0377, 0377, Swhitespace, Spunct, Scharquote,
8489eb67 976 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1fc71008
PE
977 0377, 0377, 0377, 0377, Scomment, 0377, Sendcomment, 0377,
978 Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
8489eb67 979 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1fc71008
PE
980 0377, 0377, 0377, 0377, 0377, 0377, 0377, Sword,
981 0377, 0377, 0377, 0377, Sescape, 0377, 0377, Ssymbol,
8489eb67
RS
982 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
983 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1fc71008
PE
984 0377, 0377, 0377, 0377, 0377, 0377, 0377, Sword,
985 0377, 0377, 0377, 0377, Sstring_fence, 0377, 0377, 0377
8489eb67
RS
986 };
987
195d1361 988/* Indexed by syntax code, give the letter that describes it. */
8489eb67 989
1fc71008 990char const syntax_code_spec[16] =
8489eb67 991 {
195d1361
RS
992 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
993 '!', '|'
8489eb67 994 };
93da5fff
KH
995
996/* Indexed by syntax code, give the object (cons of syntax code and
997 nil) to be stored in syntax table. Since these objects can be
998 shared among syntax tables, we generate them in advance. By
999 sharing objects, the function `describe-syntax' can give a more
1000 compact listing. */
1001static Lisp_Object Vsyntax_code_object;
1002
8489eb67
RS
1003\f
1004DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
fdb82f93 1005 doc: /* Return the syntax code of CHARACTER, described by a character.
e2b6daf4
CY
1006For example, if CHARACTER is a word constituent, the
1007character `w' (119) is returned.
fdb82f93
PJ
1008The characters that correspond to various syntax codes
1009are listed in the documentation of `modify-syntax-entry'. */)
5842a27b 1010 (Lisp_Object character)
8489eb67 1011{
8ea151b2 1012 int char_int;
774b9a60 1013 CHECK_CHARACTER (character);
2203e1e8 1014 char_int = XINT (character);
d48cd3f4 1015 SETUP_BUFFER_SYNTAX_TABLE ();
1fc71008 1016 return make_number (syntax_code_spec[SYNTAX (char_int)]);
beefa22e
RS
1017}
1018
1019DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
fdb82f93 1020 doc: /* Return the matching parenthesis of CHARACTER, or nil if none. */)
5842a27b 1021 (Lisp_Object character)
beefa22e 1022{
1fc71008
PE
1023 int char_int;
1024 enum syntaxcode code;
1025 CHECK_CHARACTER (character);
2203e1e8 1026 char_int = XINT (character);
d48cd3f4 1027 SETUP_BUFFER_SYNTAX_TABLE ();
8ea151b2 1028 code = SYNTAX (char_int);
a8bd7cd8 1029 if (code == Sopen || code == Sclose)
2e34157c 1030 return SYNTAX_MATCH (char_int);
beefa22e 1031 return Qnil;
8489eb67
RS
1032}
1033
c65adb44 1034DEFUN ("string-to-syntax", Fstring_to_syntax, Sstring_to_syntax, 1, 1, 0,
6dcef6ec
CY
1035 doc: /* Convert a syntax descriptor STRING into a raw syntax descriptor.
1036STRING should be a string of the form allowed as argument of
1037`modify-syntax-entry'. The return value is a raw syntax descriptor: a
1038cons cell \(CODE . MATCHING-CHAR) which can be used, for example, as
1039the value of a `syntax-table' text property. */)
5842a27b 1040 (Lisp_Object string)
8489eb67 1041{
1fc71008 1042 const unsigned char *p;
247e20a8 1043 int val;
8ea151b2 1044 Lisp_Object match;
8489eb67 1045
b7826503 1046 CHECK_STRING (string);
8489eb67 1047
d5db4077 1048 p = SDATA (string);
1fc71008
PE
1049 val = syntax_spec_code[*p++];
1050 if (val == 0377)
b764a653 1051 error ("Invalid syntax description letter: %c", p[-1]);
8489eb67 1052
1fc71008 1053 if (val == Sinherit)
c65adb44 1054 return Qnil;
8ea151b2
RS
1055
1056 if (*p)
d1be9f0f 1057 {
93da5fff 1058 int len;
62a6e103 1059 int character = STRING_CHAR_AND_LENGTH (p, len);
93da5fff 1060 XSETINT (match, character);
d1be9f0f
RS
1061 if (XFASTINT (match) == ' ')
1062 match = Qnil;
93da5fff 1063 p += len;
d1be9f0f
RS
1064 }
1065 else
8ea151b2 1066 match = Qnil;
8489eb67 1067
8489eb67
RS
1068 while (*p)
1069 switch (*p++)
1070 {
1071 case '1':
247e20a8 1072 val |= 1 << 16;
8489eb67
RS
1073 break;
1074
1075 case '2':
247e20a8 1076 val |= 1 << 17;
8489eb67
RS
1077 break;
1078
1079 case '3':
247e20a8 1080 val |= 1 << 18;
8489eb67
RS
1081 break;
1082
1083 case '4':
247e20a8 1084 val |= 1 << 19;
8489eb67
RS
1085 break;
1086
1087 case 'p':
247e20a8 1088 val |= 1 << 20;
8489eb67 1089 break;
e5d4f4dc
RS
1090
1091 case 'b':
247e20a8 1092 val |= 1 << 21;
e5d4f4dc 1093 break;
95ff8dfc
RS
1094
1095 case 'n':
1096 val |= 1 << 22;
1097 break;
c5683ceb
SM
1098
1099 case 'c':
1100 val |= 1 << 23;
1101 break;
8489eb67 1102 }
7d0393cf 1103
77b37c05 1104 if (val < ASIZE (Vsyntax_code_object) && NILP (match))
28be1ada 1105 return AREF (Vsyntax_code_object, val);
93da5fff
KH
1106 else
1107 /* Since we can't use a shared object, let's make a new one. */
c65adb44
SM
1108 return Fcons (make_number (val), match);
1109}
1110
fdb82f93 1111/* I really don't know why this is interactive
2c9e1900 1112 help-form should at least be made useful whilst reading the second arg. */
7d0393cf 1113DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
c65adb44 1114 "cSet syntax for character: \nsSet syntax for %s to: ",
a50a10a0 1115 doc: /* Set syntax for character CHAR according to string NEWENTRY.
d7ee9fab 1116The syntax is changed only for table SYNTAX-TABLE, which defaults to
fdb82f93 1117 the current buffer's syntax table.
5c7b02ab 1118CHAR may be a cons (MIN . MAX), in which case, syntaxes of all characters
fedc6ab5 1119in the range MIN to MAX are changed.
fdb82f93
PJ
1120The first character of NEWENTRY should be one of the following:
1121 Space or - whitespace syntax. w word constituent.
1122 _ symbol constituent. . punctuation.
1123 ( open-parenthesis. ) close-parenthesis.
1124 " string quote. \\ escape.
1125 $ paired delimiter. ' expression quote or prefix operator.
1126 < comment starter. > comment ender.
02676e5d 1127 / character-quote. @ inherit from parent table.
fdb82f93
PJ
1128 | generic string fence. ! generic comment fence.
1129
1130Only single-character comment start and end sequences are represented thus.
1131Two-character sequences are represented as described below.
1132The second character of NEWENTRY is the matching parenthesis,
1133 used only if the first character is `(' or `)'.
1134Any additional characters are flags.
1135Defined flags are the characters 1, 2, 3, 4, b, p, and n.
a50a10a0
PJ
1136 1 means CHAR is the start of a two-char comment start sequence.
1137 2 means CHAR is the second character of such a sequence.
1138 3 means CHAR is the start of a two-char comment end sequence.
1139 4 means CHAR is the second character of such a sequence.
fdb82f93 1140
c5683ceb 1141There can be several orthogonal comment sequences. This is to support
fdb82f93
PJ
1142language modes such as C++. By default, all comment sequences are of style
1143a, but you can set the comment sequence style to b (on the second character
c5683ceb
SM
1144of a comment-start, and the first character of a comment-end sequence) and/or
1145c (on any of its chars) using this flag:
a50a10a0 1146 b means CHAR is part of comment sequence b.
c5683ceb 1147 c means CHAR is part of comment sequence c.
a50a10a0 1148 n means CHAR is part of a nestable comment sequence.
fdb82f93 1149
a50a10a0 1150 p means CHAR is a prefix character for `backward-prefix-chars';
fdb82f93 1151 such characters are treated as whitespace when they occur
a50a10a0 1152 between expressions.
fedc6ab5 1153usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE) */)
5842a27b 1154 (Lisp_Object c, Lisp_Object newentry, Lisp_Object syntax_table)
c65adb44 1155{
5c7b02ab
KH
1156 if (CONSP (c))
1157 {
8f924df7
KH
1158 CHECK_CHARACTER_CAR (c);
1159 CHECK_CHARACTER_CDR (c);
5c7b02ab
KH
1160 }
1161 else
1162 CHECK_CHARACTER (c);
c65adb44
SM
1163
1164 if (NILP (syntax_table))
4b4deea2 1165 syntax_table = BVAR (current_buffer, syntax_table);
c65adb44
SM
1166 else
1167 check_syntax_table (syntax_table);
8489eb67 1168
5c7b02ab
KH
1169 newentry = Fstring_to_syntax (newentry);
1170 if (CONSP (c))
1171 SET_RAW_SYNTAX_ENTRY_RANGE (syntax_table, c, newentry);
1172 else
1173 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), newentry);
e5b94d44
CY
1174
1175 /* We clear the regexp cache, since character classes can now have
1176 different values from those in the compiled regexps.*/
1177 clear_regexp_cache ();
1178
8489eb67
RS
1179 return Qnil;
1180}
1181\f
1182/* Dump syntax table to buffer in human-readable format */
1183
62abe9cb
SM
1184DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value,
1185 Sinternal_describe_syntax_value, 1, 1, 0,
1186 doc: /* Insert a description of the internal syntax description SYNTAX at point. */)
5842a27b 1187 (Lisp_Object syntax)
8489eb67 1188{
1fc71008
PE
1189 int code, syntax_code;
1190 bool start1, start2, end1, end2, prefix, comstyleb, comstylec, comnested;
8489eb67 1191 char str[2];
62abe9cb 1192 Lisp_Object first, match_lisp, value = syntax;
8489eb67 1193
8ea151b2
RS
1194 if (NILP (value))
1195 {
62abe9cb
SM
1196 insert_string ("default");
1197 return syntax;
8ea151b2
RS
1198 }
1199
908b7fea
KH
1200 if (CHAR_TABLE_P (value))
1201 {
62abe9cb
SM
1202 insert_string ("deeper char-table ...");
1203 return syntax;
908b7fea
KH
1204 }
1205
8ea151b2
RS
1206 if (!CONSP (value))
1207 {
62abe9cb
SM
1208 insert_string ("invalid");
1209 return syntax;
8ea151b2
RS
1210 }
1211
c1d497be
KR
1212 first = XCAR (value);
1213 match_lisp = XCDR (value);
8ea151b2 1214
d311d28c 1215 if (!INTEGERP (first) || !(NILP (match_lisp) || CHARACTERP (match_lisp)))
8489eb67 1216 {
62abe9cb
SM
1217 insert_string ("invalid");
1218 return syntax;
8489eb67
RS
1219 }
1220
d311d28c 1221 syntax_code = XINT (first) & INT_MAX;
1fc71008 1222 code = syntax_code & 0377;
c5683ceb
SM
1223 start1 = SYNTAX_FLAGS_COMSTART_FIRST (syntax_code);
1224 start2 = SYNTAX_FLAGS_COMSTART_SECOND (syntax_code);;
1225 end1 = SYNTAX_FLAGS_COMEND_FIRST (syntax_code);
1226 end2 = SYNTAX_FLAGS_COMEND_SECOND (syntax_code);
1227 prefix = SYNTAX_FLAGS_PREFIX (syntax_code);
1228 comstyleb = SYNTAX_FLAGS_COMMENT_STYLEB (syntax_code);
1229 comstylec = SYNTAX_FLAGS_COMMENT_STYLEC (syntax_code);
1230 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax_code);
8489eb67 1231
1fc71008 1232 if (Smax <= code)
8489eb67
RS
1233 {
1234 insert_string ("invalid");
62abe9cb 1235 return syntax;
8489eb67 1236 }
8489eb67 1237
1fc71008 1238 str[0] = syntax_code_spec[code], str[1] = 0;
8489eb67
RS
1239 insert (str, 1);
1240
93da5fff
KH
1241 if (NILP (match_lisp))
1242 insert (" ", 1);
1243 else
1244 insert_char (XINT (match_lisp));
8489eb67 1245
8489eb67
RS
1246 if (start1)
1247 insert ("1", 1);
1248 if (start2)
1249 insert ("2", 1);
1250
1251 if (end1)
1252 insert ("3", 1);
1253 if (end2)
1254 insert ("4", 1);
1255
1256 if (prefix)
1257 insert ("p", 1);
c5683ceb 1258 if (comstyleb)
e5d4f4dc 1259 insert ("b", 1);
c5683ceb
SM
1260 if (comstylec)
1261 insert ("c", 1);
e6365855
SM
1262 if (comnested)
1263 insert ("n", 1);
8489eb67
RS
1264
1265 insert_string ("\twhich means: ");
1266
7393bcbb 1267 switch (code)
8489eb67
RS
1268 {
1269 case Swhitespace:
1270 insert_string ("whitespace"); break;
1271 case Spunct:
1272 insert_string ("punctuation"); break;
1273 case Sword:
1274 insert_string ("word"); break;
1275 case Ssymbol:
1276 insert_string ("symbol"); break;
1277 case Sopen:
1278 insert_string ("open"); break;
1279 case Sclose:
1280 insert_string ("close"); break;
1281 case Squote:
d671382e 1282 insert_string ("prefix"); break;
8489eb67
RS
1283 case Sstring:
1284 insert_string ("string"); break;
1285 case Smath:
1286 insert_string ("math"); break;
1287 case Sescape:
1288 insert_string ("escape"); break;
1289 case Scharquote:
1290 insert_string ("charquote"); break;
1291 case Scomment:
1292 insert_string ("comment"); break;
1293 case Sendcomment:
1294 insert_string ("endcomment"); break;
d671382e
SM
1295 case Sinherit:
1296 insert_string ("inherit"); break;
1297 case Scomment_fence:
1298 insert_string ("comment fence"); break;
1299 case Sstring_fence:
1300 insert_string ("string fence"); break;
8489eb67
RS
1301 default:
1302 insert_string ("invalid");
62abe9cb 1303 return syntax;
8489eb67
RS
1304 }
1305
8ea151b2 1306 if (!NILP (match_lisp))
8489eb67
RS
1307 {
1308 insert_string (", matches ");
8ea151b2 1309 insert_char (XINT (match_lisp));
8489eb67
RS
1310 }
1311
1312 if (start1)
1313 insert_string (",\n\t is the first character of a comment-start sequence");
1314 if (start2)
1315 insert_string (",\n\t is the second character of a comment-start sequence");
1316
1317 if (end1)
1318 insert_string (",\n\t is the first character of a comment-end sequence");
1319 if (end2)
1320 insert_string (",\n\t is the second character of a comment-end sequence");
c5683ceb 1321 if (comstyleb)
e5d4f4dc 1322 insert_string (" (comment style b)");
c5683ceb
SM
1323 if (comstylec)
1324 insert_string (" (comment style c)");
e6365855
SM
1325 if (comnested)
1326 insert_string (" (nestable)");
e5d4f4dc 1327
8489eb67
RS
1328 if (prefix)
1329 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
1330
62abe9cb
SM
1331 return syntax;
1332}
8489eb67
RS
1333\f
1334/* Return the position across COUNT words from FROM.
1335 If that many words cannot be found before the end of the buffer, return 0.
1336 COUNT negative means scan backward and stop at word beginning. */
1337
d311d28c
PE
1338ptrdiff_t
1339scan_words (register ptrdiff_t from, register EMACS_INT count)
8489eb67 1340{
d311d28c
PE
1341 register ptrdiff_t beg = BEGV;
1342 register ptrdiff_t end = ZV;
1343 register ptrdiff_t from_byte = CHAR_TO_BYTE (from);
93da5fff
KH
1344 register enum syntaxcode code;
1345 int ch0, ch1;
fca8fe46 1346 Lisp_Object func, pos;
8489eb67
RS
1347
1348 immediate_quit = 1;
1349 QUIT;
1350
195d1361
RS
1351 SETUP_SYNTAX_TABLE (from, count);
1352
8489eb67
RS
1353 while (count > 0)
1354 {
1355 while (1)
1356 {
1357 if (from == end)
1358 {
1359 immediate_quit = 0;
1360 return 0;
1361 }
195d1361 1362 UPDATE_SYNTAX_TABLE_FORWARD (from);
b7dbcc19 1363 ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
93da5fff 1364 code = SYNTAX (ch0);
6a140a74 1365 INC_BOTH (from, from_byte);
8489eb67
RS
1366 if (words_include_escapes
1367 && (code == Sescape || code == Scharquote))
1368 break;
1369 if (code == Sword)
1370 break;
8489eb67 1371 }
93da5fff
KH
1372 /* Now CH0 is a character which begins a word and FROM is the
1373 position of the next character. */
8f924df7 1374 func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch0);
869bb237 1375 if (! NILP (Ffboundp (func)))
8489eb67 1376 {
869bb237 1377 pos = call2 (func, make_number (from - 1), make_number (end));
17f85215 1378 if (INTEGERP (pos) && from < XINT (pos) && XINT (pos) <= ZV)
8f924df7
KH
1379 {
1380 from = XINT (pos);
1381 from_byte = CHAR_TO_BYTE (from);
1382 }
8489eb67 1383 }
869bb237 1384 else
8cb8232a 1385 {
8cb8232a
KH
1386 while (1)
1387 {
1388 if (from == end) break;
1389 UPDATE_SYNTAX_TABLE_FORWARD (from);
b7dbcc19 1390 ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
8cb8232a
KH
1391 code = SYNTAX (ch1);
1392 if ((code != Sword
1393 && (! words_include_escapes
1394 || (code != Sescape && code != Scharquote)))
2b70a11c 1395 || word_boundary_p (ch0, ch1))
869bb237 1396 break;
8cb8232a
KH
1397 INC_BOTH (from, from_byte);
1398 ch0 = ch1;
1399 }
8489eb67
RS
1400 }
1401 count--;
1402 }
1403 while (count < 0)
1404 {
1405 while (1)
1406 {
1407 if (from == beg)
1408 {
1409 immediate_quit = 0;
1410 return 0;
1411 }
6a140a74 1412 DEC_BOTH (from, from_byte);
195d1361 1413 UPDATE_SYNTAX_TABLE_BACKWARD (from);
b7dbcc19 1414 ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
93da5fff 1415 code = SYNTAX (ch1);
8489eb67
RS
1416 if (words_include_escapes
1417 && (code == Sescape || code == Scharquote))
1418 break;
1419 if (code == Sword)
1420 break;
8489eb67 1421 }
93da5fff
KH
1422 /* Now CH1 is a character which ends a word and FROM is the
1423 position of it. */
8f924df7 1424 func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch1);
869bb237 1425 if (! NILP (Ffboundp (func)))
8f924df7 1426 {
869bb237 1427 pos = call2 (func, make_number (from), make_number (beg));
17f85215 1428 if (INTEGERP (pos) && BEGV <= XINT (pos) && XINT (pos) < from)
8f924df7
KH
1429 {
1430 from = XINT (pos);
1431 from_byte = CHAR_TO_BYTE (from);
1432 }
8489eb67 1433 }
869bb237 1434 else
8489eb67 1435 {
8cb8232a
KH
1436 while (1)
1437 {
8cb8232a
KH
1438 if (from == beg)
1439 break;
262be72a 1440 DEC_BOTH (from, from_byte);
8cb8232a 1441 UPDATE_SYNTAX_TABLE_BACKWARD (from);
262be72a 1442 ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
8cb8232a
KH
1443 code = SYNTAX (ch0);
1444 if ((code != Sword
1445 && (! words_include_escapes
1446 || (code != Sescape && code != Scharquote)))
2b70a11c 1447 || word_boundary_p (ch0, ch1))
262be72a
MB
1448 {
1449 INC_BOTH (from, from_byte);
1450 break;
1451 }
8cb8232a
KH
1452 ch1 = ch0;
1453 }
8489eb67
RS
1454 }
1455 count++;
1456 }
1457
1458 immediate_quit = 0;
1459
1460 return from;
1461}
1462
a7ca3326 1463DEFUN ("forward-word", Fforward_word, Sforward_word, 0, 1, "^p",
fdb82f93 1464 doc: /* Move point forward ARG words (backward if ARG is negative).
f73f4ce6 1465If ARG is omitted or nil, move point forward one word.
fdb82f93
PJ
1466Normally returns t.
1467If an edge of the buffer or a field boundary is reached, point is left there
1468and the function returns nil. Field boundaries are not noticed if
1469`inhibit-field-text-motion' is non-nil. */)
5842a27b 1470 (Lisp_Object arg)
8489eb67 1471{
c096ae4d 1472 Lisp_Object tmp;
d311d28c 1473 ptrdiff_t orig_val, val;
8489eb67 1474
839966f3
KH
1475 if (NILP (arg))
1476 XSETFASTINT (arg, 1);
1477 else
1478 CHECK_NUMBER (arg);
1479
1480 val = orig_val = scan_words (PT, XINT (arg));
2c6ea900 1481 if (! orig_val)
839966f3 1482 val = XINT (arg) > 0 ? ZV : BEGV;
5878ee6f 1483
b8855607 1484 /* Avoid jumping out of an input field. */
c096ae4d
SM
1485 tmp = Fconstrain_to_field (make_number (val), make_number (PT),
1486 Qt, Qnil, Qnil);
1487 val = XFASTINT (tmp);
7d0393cf 1488
8489eb67 1489 SET_PT (val);
e6d8341f 1490 return val == orig_val ? Qt : Qnil;
8489eb67
RS
1491}
1492\f
a7ca3326 1493DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
fdb82f93
PJ
1494 doc: /* Move point forward, stopping before a char not in STRING, or at pos LIM.
1495STRING is like the inside of a `[...]' in a regular expression
1496except that `]' is never special and `\\' quotes `^', `-' or `\\'
7087d5e9 1497 (but not at the end of a range; quoting is never needed there).
fdb82f93
PJ
1498Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1499With arg "^a-zA-Z", skips nonletters stopping before first letter.
a1bc88d4
RS
1500Char classes, e.g. `[:alpha:]', are supported.
1501
1502Returns the distance traveled, either zero or positive. */)
5842a27b 1503 (Lisp_Object string, Lisp_Object lim)
195d1361 1504{
327719ee 1505 return skip_chars (1, string, lim, 1);
195d1361
RS
1506}
1507
a7ca3326 1508DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
fdb82f93
PJ
1509 doc: /* Move point backward, stopping after a char not in STRING, or at pos LIM.
1510See `skip-chars-forward' for details.
1511Returns the distance traveled, either zero or negative. */)
5842a27b 1512 (Lisp_Object string, Lisp_Object lim)
195d1361 1513{
327719ee 1514 return skip_chars (0, string, lim, 1);
195d1361
RS
1515}
1516
1517DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
fdb82f93
PJ
1518 doc: /* Move point forward across chars in specified syntax classes.
1519SYNTAX is a string of syntax code characters.
1520Stop before a char whose syntax is not in SYNTAX, or at position LIM.
1521If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1522This function returns the distance traveled, either zero or positive. */)
5842a27b 1523 (Lisp_Object syntax, Lisp_Object lim)
195d1361 1524{
b7dbcc19 1525 return skip_syntaxes (1, syntax, lim);
195d1361
RS
1526}
1527
1528DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
fdb82f93
PJ
1529 doc: /* Move point backward across chars in specified syntax classes.
1530SYNTAX is a string of syntax code characters.
1531Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.
1532If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1533This function returns the distance traveled, either zero or negative. */)
5842a27b 1534 (Lisp_Object syntax, Lisp_Object lim)
195d1361 1535{
b7dbcc19 1536 return skip_syntaxes (0, syntax, lim);
195d1361
RS
1537}
1538
6a140a74 1539static Lisp_Object
1fc71008
PE
1540skip_chars (bool forwardp, Lisp_Object string, Lisp_Object lim,
1541 bool handle_iso_classes)
195d1361 1542{
1fc71008
PE
1543 int c;
1544 char fastmap[0400];
b7dbcc19 1545 /* Store the ranges of non-ASCII characters. */
4f63c6bb 1546 int *char_ranges IF_LINT (= NULL);
7e68b0ea 1547 int n_char_ranges = 0;
1fc71008
PE
1548 bool negate = 0;
1549 ptrdiff_t i, i_byte;
1550 /* True if the current buffer is multibyte and the region contains
1551 non-ASCII chars. */
1552 bool multibyte;
1553 /* True if STRING is multibyte and it contains non-ASCII chars. */
1554 bool string_multibyte;
d311d28c 1555 ptrdiff_t size_byte;
2e567bd3 1556 const unsigned char *str;
82d497fc 1557 int len;
a1bc88d4 1558 Lisp_Object iso_classes;
195d1361 1559
b7826503 1560 CHECK_STRING (string);
a1bc88d4 1561 iso_classes = Qnil;
82d497fc 1562
195d1361
RS
1563 if (NILP (lim))
1564 XSETINT (lim, forwardp ? ZV : BEGV);
1565 else
b7826503 1566 CHECK_NUMBER_COERCE_MARKER (lim);
195d1361
RS
1567
1568 /* In any case, don't allow scan outside bounds of buffer. */
195d1361
RS
1569 if (XINT (lim) > ZV)
1570 XSETFASTINT (lim, ZV);
1571 if (XINT (lim) < BEGV)
1572 XSETFASTINT (lim, BEGV);
1573
4b4deea2 1574 multibyte = (!NILP (BVAR (current_buffer, enable_multibyte_characters))
92eaa22e 1575 && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
8f924df7 1576 string_multibyte = SBYTES (string) > SCHARS (string);
b7dbcc19 1577
72af86bd 1578 memset (fastmap, 0, sizeof fastmap);
195d1361 1579
8f924df7
KH
1580 str = SDATA (string);
1581 size_byte = SBYTES (string);
4101e6fe 1582
82d497fc 1583 i_byte = 0;
13090112 1584 if (i_byte < size_byte
d5db4077 1585 && SREF (string, 0) == '^')
195d1361 1586 {
82d497fc 1587 negate = 1; i_byte++;
195d1361
RS
1588 }
1589
1590 /* Find the characters specified and set their elements of fastmap.
b7dbcc19 1591 Handle backslashes and ranges specially.
195d1361 1592
b7dbcc19
KH
1593 If STRING contains non-ASCII characters, setup char_ranges for
1594 them and use fastmap only for their leading codes. */
4101e6fe 1595
b7dbcc19 1596 if (! string_multibyte)
195d1361 1597 {
1fc71008 1598 bool string_has_eight_bit = 0;
4101e6fe 1599
b7dbcc19
KH
1600 /* At first setup fastmap. */
1601 while (i_byte < size_byte)
1602 {
1603 c = str[i_byte++];
1604
a1bc88d4
RS
1605 if (handle_iso_classes && c == '['
1606 && i_byte < size_byte
327719ee 1607 && str[i_byte] == ':')
a1bc88d4
RS
1608 {
1609 const unsigned char *class_beg = str + i_byte + 1;
1610 const unsigned char *class_end = class_beg;
b3bda4fd 1611 const unsigned char *class_limit = str + size_byte - 2;
db9cd97a 1612 /* Leave room for the null. */
a1bc88d4
RS
1613 unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1614 re_wctype_t cc;
1615
1616 if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1617 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1618
b3bda4fd
KS
1619 while (class_end < class_limit
1620 && *class_end >= 'a' && *class_end <= 'z')
a1bc88d4
RS
1621 class_end++;
1622
b3bda4fd
KS
1623 if (class_end == class_beg
1624 || *class_end != ':' || class_end[1] != ']')
1625 goto not_a_class_name;
a1bc88d4 1626
72af86bd 1627 memcpy (class_name, class_beg, class_end - class_beg);
a1bc88d4
RS
1628 class_name[class_end - class_beg] = 0;
1629
1630 cc = re_wctype (class_name);
1631 if (cc == 0)
1632 error ("Invalid ISO C character class");
1633
1634 iso_classes = Fcons (make_number (cc), iso_classes);
1635
1636 i_byte = class_end + 2 - str;
1637 continue;
1638 }
1639
b3bda4fd 1640 not_a_class_name:
b7dbcc19
KH
1641 if (c == '\\')
1642 {
13090112 1643 if (i_byte == size_byte)
4101e6fe
RS
1644 break;
1645
5c7b02ab 1646 c = str[i_byte++];
195d1361 1647 }
839966f3
KH
1648 /* Treat `-' as range character only if another character
1649 follows. */
1650 if (i_byte + 1 < size_byte
82d497fc 1651 && str[i_byte] == '-')
195d1361 1652 {
1fc71008 1653 int c2;
4101e6fe
RS
1654
1655 /* Skip over the dash. */
82d497fc 1656 i_byte++;
4101e6fe 1657
4101e6fe 1658 /* Get the end of the range. */
5c7b02ab 1659 c2 = str[i_byte++];
b7dbcc19
KH
1660 if (c2 == '\\'
1661 && i_byte < size_byte)
1662 c2 = str[i_byte++];
1663
8f924df7
KH
1664 if (c <= c2)
1665 {
1fc71008 1666 int lim2 = c2 + 1;
1e69125e 1667 while (c < lim2)
8f924df7
KH
1668 fastmap[c++] = 1;
1669 if (! ASCII_CHAR_P (c2))
1670 string_has_eight_bit = 1;
1671 }
195d1361
RS
1672 }
1673 else
b7dbcc19
KH
1674 {
1675 fastmap[c] = 1;
1676 if (! ASCII_CHAR_P (c))
1677 string_has_eight_bit = 1;
1678 }
1679 }
1680
1681 /* If the current range is multibyte and STRING contains
1682 eight-bit chars, arrange fastmap and setup char_ranges for
1683 the corresponding multibyte chars. */
1684 if (multibyte && string_has_eight_bit)
1685 {
178ba3e6
PE
1686 char *p1;
1687 char himap[0200 + 1];
1688 memcpy (himap, fastmap + 0200, 0200);
1689 himap[0200] = 0;
72af86bd 1690 memset (fastmap + 0200, 0, 0200);
38182d90 1691 char_ranges = alloca (sizeof *char_ranges * 128 * 2);
178ba3e6
PE
1692 i = 0;
1693
1694 while ((p1 = memchr (himap + i, 1, 0200 - i)))
b7dbcc19 1695 {
178ba3e6
PE
1696 /* Deduce the next range C..C2 from the next clump of 1s
1697 in HIMAP starting with &HIMAP[I]. HIMAP is the high
1698 order half of the old FASTMAP. */
1699 int c2, leading_code;
1700 i = p1 - himap;
1701 c = BYTE8_TO_CHAR (i + 0200);
1702 i += strlen (p1);
1703 c2 = BYTE8_TO_CHAR (i + 0200 - 1);
1704
1705 char_ranges[n_char_ranges++] = c;
1706 char_ranges[n_char_ranges++] = c2;
1707 leading_code = CHAR_LEADING_CODE (c);
1708 memset (fastmap + leading_code, 1,
1709 CHAR_LEADING_CODE (c2) - leading_code + 1);
b7dbcc19 1710 }
195d1361
RS
1711 }
1712 }
f003ca54 1713 else /* STRING is multibyte */
b7dbcc19 1714 {
38182d90 1715 char_ranges = alloca (sizeof *char_ranges * SCHARS (string) * 2);
f003ca54 1716
b7dbcc19 1717 while (i_byte < size_byte)
195d1361 1718 {
1fc71008 1719 int leading_code = str[i_byte];
62a6e103 1720 c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
b7dbcc19 1721 i_byte += len;
5c7b02ab 1722
327719ee
MB
1723 if (handle_iso_classes && c == '['
1724 && i_byte < size_byte
62a6e103 1725 && STRING_CHAR (str + i_byte) == ':')
327719ee
MB
1726 {
1727 const unsigned char *class_beg = str + i_byte + 1;
1728 const unsigned char *class_end = class_beg;
1729 const unsigned char *class_limit = str + size_byte - 2;
1730 /* Leave room for the null. */
1731 unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1732 re_wctype_t cc;
1733
1734 if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1735 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1736
1737 while (class_end < class_limit
1738 && *class_end >= 'a' && *class_end <= 'z')
1739 class_end++;
1740
1741 if (class_end == class_beg
1742 || *class_end != ':' || class_end[1] != ']')
1743 goto not_a_class_name_multibyte;
1744
72af86bd 1745 memcpy (class_name, class_beg, class_end - class_beg);
327719ee
MB
1746 class_name[class_end - class_beg] = 0;
1747
1748 cc = re_wctype (class_name);
1749 if (cc == 0)
1750 error ("Invalid ISO C character class");
1751
1752 iso_classes = Fcons (make_number (cc), iso_classes);
1753
1754 i_byte = class_end + 2 - str;
1755 continue;
1756 }
1757
1758 not_a_class_name_multibyte:
195d1361
RS
1759 if (c == '\\')
1760 {
13090112 1761 if (i_byte == size_byte)
4101e6fe
RS
1762 break;
1763
b7dbcc19 1764 leading_code = str[i_byte];
62a6e103 1765 c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
82d497fc 1766 i_byte += len;
195d1361 1767 }
839966f3
KH
1768 /* Treat `-' as range character only if another character
1769 follows. */
1770 if (i_byte + 1 < size_byte
82d497fc 1771 && str[i_byte] == '-')
195d1361 1772 {
1fc71008 1773 int c2, leading_code2;
4101e6fe
RS
1774
1775 /* Skip over the dash. */
82d497fc 1776 i_byte++;
4101e6fe 1777
4101e6fe 1778 /* Get the end of the range. */
b7dbcc19 1779 leading_code2 = str[i_byte];
62a6e103 1780 c2 = STRING_CHAR_AND_LENGTH (str + i_byte, len);
82d497fc 1781 i_byte += len;
7e68b0ea 1782
b7dbcc19
KH
1783 if (c2 == '\\'
1784 && i_byte < size_byte)
1785 {
1786 leading_code2 = str[i_byte];
1fc71008 1787 c2 = STRING_CHAR_AND_LENGTH (str + i_byte, len);
b7dbcc19
KH
1788 i_byte += len;
1789 }
1790
f003ca54
KH
1791 if (c > c2)
1792 continue;
b7dbcc19 1793 if (ASCII_CHAR_P (c))
06274af5 1794 {
5c7b02ab
KH
1795 while (c <= c2 && c < 0x80)
1796 fastmap[c++] = 1;
b7dbcc19
KH
1797 leading_code = CHAR_LEADING_CODE (c);
1798 }
1799 if (! ASCII_CHAR_P (c))
1800 {
1fc71008 1801 int lim2 = leading_code2 + 1;
1e69125e 1802 while (leading_code < lim2)
b7dbcc19
KH
1803 fastmap[leading_code++] = 1;
1804 if (c <= c2)
e39091b6 1805 {
b7dbcc19 1806 char_ranges[n_char_ranges++] = c;
e39091b6 1807 char_ranges[n_char_ranges++] = c2;
06274af5
KH
1808 }
1809 }
195d1361
RS
1810 }
1811 else
7e68b0ea 1812 {
b7dbcc19 1813 if (ASCII_CHAR_P (c))
e39091b6
KH
1814 fastmap[c] = 1;
1815 else
7e68b0ea 1816 {
b7dbcc19 1817 fastmap[leading_code] = 1;
4101e6fe
RS
1818 char_ranges[n_char_ranges++] = c;
1819 char_ranges[n_char_ranges++] = c;
7e68b0ea
RS
1820 }
1821 }
195d1361 1822 }
b7dbcc19
KH
1823
1824 /* If the current range is unibyte and STRING contains non-ASCII
1825 chars, arrange fastmap for the corresponding unibyte
1826 chars. */
1827
1828 if (! multibyte && n_char_ranges > 0)
1829 {
72af86bd 1830 memset (fastmap + 0200, 0, 0200);
b7dbcc19
KH
1831 for (i = 0; i < n_char_ranges; i += 2)
1832 {
1833 int c1 = char_ranges[i];
1fc71008 1834 int lim2 = char_ranges[i + 1] + 1;
b7dbcc19 1835
1e69125e 1836 for (; c1 < lim2; c1++)
2afc21f5
SM
1837 {
1838 int b = CHAR_TO_BYTE_SAFE (c1);
1839 if (b >= 0)
1840 fastmap[b] = 1;
1841 }
b7dbcc19
KH
1842 }
1843 }
195d1361
RS
1844 }
1845
9690d026 1846 /* If ^ was the first character, complement the fastmap. */
195d1361 1847 if (negate)
b7dbcc19
KH
1848 {
1849 if (! multibyte)
1850 for (i = 0; i < sizeof fastmap; i++)
1851 fastmap[i] ^= 1;
1852 else
1853 {
1854 for (i = 0; i < 0200; i++)
1855 fastmap[i] ^= 1;
1856 /* All non-ASCII chars possibly match. */
1857 for (; i < sizeof fastmap; i++)
1858 fastmap[i] = 1;
1859 }
1860 }
195d1361
RS
1861
1862 {
d311d28c
PE
1863 ptrdiff_t start_point = PT;
1864 ptrdiff_t pos = PT;
1865 ptrdiff_t pos_byte = PT_BYTE;
9af7511a
KH
1866 unsigned char *p = PT_ADDR, *endp, *stop;
1867
1868 if (forwardp)
1869 {
1870 endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
d7ee9fab 1871 stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
9af7511a
KH
1872 }
1873 else
1874 {
1875 endp = CHAR_POS_ADDR (XINT (lim));
d7ee9fab 1876 stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
9af7511a 1877 }
195d1361
RS
1878
1879 immediate_quit = 1;
45b683a1 1880 /* This code may look up syntax tables using functions that rely on the
618db430
SM
1881 gl_state object. To make sure this object is not out of date,
1882 let's initialize it manually.
1883 We ignore syntax-table text-properties for now, since that's
1884 what we've done in the past. */
d48cd3f4 1885 SETUP_BUFFER_SYNTAX_TABLE ();
b7dbcc19 1886 if (forwardp)
195d1361 1887 {
b7dbcc19 1888 if (multibyte)
8f924df7 1889 while (1)
b7dbcc19 1890 {
8f924df7 1891 int nbytes;
9af7511a 1892
8f924df7 1893 if (p >= stop)
9af7511a 1894 {
8f924df7 1895 if (p >= endp)
9af7511a 1896 break;
8f924df7
KH
1897 p = GAP_END_ADDR;
1898 stop = endp;
9af7511a 1899 }
62a6e103 1900 c = STRING_CHAR_AND_LENGTH (p, nbytes);
327719ee 1901 if (! NILP (iso_classes) && in_classes (c, iso_classes))
9af7511a 1902 {
327719ee 1903 if (negate)
9af7511a 1904 break;
327719ee
MB
1905 else
1906 goto fwd_ok;
9af7511a 1907 }
9af7511a 1908
8f924df7 1909 if (! fastmap[*p])
b7dbcc19
KH
1910 break;
1911 if (! ASCII_CHAR_P (c))
9af7511a 1912 {
b7dbcc19
KH
1913 /* As we are looking at a multibyte character, we
1914 must look up the character in the table
1915 CHAR_RANGES. If there's no data in the table,
1916 that character is not what we want to skip. */
1917
1918 /* The following code do the right thing even if
1919 n_char_ranges is zero (i.e. no data in
1920 CHAR_RANGES). */
1921 for (i = 0; i < n_char_ranges; i += 2)
1922 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
9af7511a 1923 break;
b7dbcc19
KH
1924 if (!(negate ^ (i < n_char_ranges)))
1925 break;
9af7511a 1926 }
327719ee 1927 fwd_ok:
8f924df7 1928 p += nbytes, pos++, pos_byte += nbytes;
b7dbcc19
KH
1929 }
1930 else
8f924df7
KH
1931 while (1)
1932 {
1933 if (p >= stop)
9af7511a 1934 {
8f924df7 1935 if (p >= endp)
9af7511a 1936 break;
8f924df7
KH
1937 p = GAP_END_ADDR;
1938 stop = endp;
9af7511a 1939 }
327719ee
MB
1940
1941 if (!NILP (iso_classes) && in_classes (*p, iso_classes))
1942 {
1943 if (negate)
1944 break;
1945 else
1946 goto fwd_unibyte_ok;
1947 }
1948
8f924df7
KH
1949 if (!fastmap[*p])
1950 break;
327719ee 1951 fwd_unibyte_ok:
8f924df7
KH
1952 p++, pos++, pos_byte++;
1953 }
195d1361
RS
1954 }
1955 else
1956 {
b7dbcc19 1957 if (multibyte)
8f924df7 1958 while (1)
b7dbcc19 1959 {
8f924df7 1960 unsigned char *prev_p;
9af7511a 1961
8f924df7 1962 if (p <= stop)
9af7511a 1963 {
8f924df7 1964 if (p <= endp)
9af7511a 1965 break;
8f924df7
KH
1966 p = GPT_ADDR;
1967 stop = endp;
9af7511a 1968 }
8f924df7
KH
1969 prev_p = p;
1970 while (--p >= stop && ! CHAR_HEAD_P (*p));
62a6e103 1971 c = STRING_CHAR (p);
a1bc88d4 1972
327719ee
MB
1973 if (! NILP (iso_classes) && in_classes (c, iso_classes))
1974 {
1975 if (negate)
1976 break;
9690d026 1977 else
327719ee 1978 goto back_ok;
7e68b0ea 1979 }
a1bc88d4 1980
8f924df7 1981 if (! fastmap[*p])
b7dbcc19
KH
1982 break;
1983 if (! ASCII_CHAR_P (c))
7e68b0ea 1984 {
b7dbcc19
KH
1985 /* See the comment in the previous similar code. */
1986 for (i = 0; i < n_char_ranges; i += 2)
1987 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1988 break;
1989 if (!(negate ^ (i < n_char_ranges)))
1990 break;
7e68b0ea 1991 }
327719ee 1992 back_ok:
8f924df7 1993 pos--, pos_byte -= prev_p - p;
b7dbcc19
KH
1994 }
1995 else
8f924df7
KH
1996 while (1)
1997 {
1998 if (p <= stop)
9af7511a 1999 {
8f924df7 2000 if (p <= endp)
9af7511a 2001 break;
8f924df7
KH
2002 p = GPT_ADDR;
2003 stop = endp;
9af7511a 2004 }
e39091b6 2005
327719ee
MB
2006 if (! NILP (iso_classes) && in_classes (p[-1], iso_classes))
2007 {
2008 if (negate)
2009 break;
9af7511a 2010 else
327719ee
MB
2011 goto back_unibyte_ok;
2012 }
a1bc88d4 2013
8f924df7
KH
2014 if (!fastmap[p[-1]])
2015 break;
327719ee 2016 back_unibyte_ok:
8f924df7
KH
2017 p--, pos--, pos_byte--;
2018 }
195d1361 2019 }
7e68b0ea 2020
b7dbcc19
KH
2021 SET_PT_BOTH (pos, pos_byte);
2022 immediate_quit = 0;
2023
2024 return make_number (PT - start_point);
2025 }
2026}
7e68b0ea 2027
b7dbcc19
KH
2028
2029static Lisp_Object
1fc71008 2030skip_syntaxes (bool forwardp, Lisp_Object string, Lisp_Object lim)
b7dbcc19 2031{
1fc71008 2032 int c;
b7dbcc19 2033 unsigned char fastmap[0400];
1fc71008
PE
2034 bool negate = 0;
2035 ptrdiff_t i, i_byte;
2036 bool multibyte;
d311d28c 2037 ptrdiff_t size_byte;
b7dbcc19
KH
2038 unsigned char *str;
2039
2040 CHECK_STRING (string);
2041
2042 if (NILP (lim))
2043 XSETINT (lim, forwardp ? ZV : BEGV);
2044 else
2045 CHECK_NUMBER_COERCE_MARKER (lim);
2046
2047 /* In any case, don't allow scan outside bounds of buffer. */
2048 if (XINT (lim) > ZV)
2049 XSETFASTINT (lim, ZV);
2050 if (XINT (lim) < BEGV)
2051 XSETFASTINT (lim, BEGV);
2052
ade8ee9e 2053 if (forwardp ? (PT >= XFASTINT (lim)) : (PT <= XFASTINT (lim)))
49feb1cd 2054 return make_number (0);
8c6e735b 2055
4b4deea2 2056 multibyte = (!NILP (BVAR (current_buffer, enable_multibyte_characters))
92eaa22e 2057 && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
b7dbcc19 2058
72af86bd 2059 memset (fastmap, 0, sizeof fastmap);
b7dbcc19 2060
8f924df7
KH
2061 if (SBYTES (string) > SCHARS (string))
2062 /* As this is very rare case (syntax spec is ASCII only), don't
2063 consider efficiency. */
bc796a59
KH
2064 string = string_make_unibyte (string);
2065
8f924df7
KH
2066 str = SDATA (string);
2067 size_byte = SBYTES (string);
bc796a59 2068
b7dbcc19
KH
2069 i_byte = 0;
2070 if (i_byte < size_byte
8f924df7 2071 && SREF (string, 0) == '^')
b7dbcc19
KH
2072 {
2073 negate = 1; i_byte++;
2074 }
2075
b7dbcc19
KH
2076 /* Find the syntaxes specified and set their elements of fastmap. */
2077
2078 while (i_byte < size_byte)
2079 {
2080 c = str[i_byte++];
8f924df7 2081 fastmap[syntax_spec_code[c]] = 1;
b7dbcc19 2082 }
195d1361 2083
9690d026 2084 /* If ^ was the first character, complement the fastmap. */
195d1361
RS
2085 if (negate)
2086 for (i = 0; i < sizeof fastmap; i++)
9690d026 2087 fastmap[i] ^= 1;
195d1361
RS
2088
2089 {
d311d28c
PE
2090 ptrdiff_t start_point = PT;
2091 ptrdiff_t pos = PT;
2092 ptrdiff_t pos_byte = PT_BYTE;
8f924df7
KH
2093 unsigned char *p = PT_ADDR, *endp, *stop;
2094
2095 if (forwardp)
2096 {
2097 endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
2098 stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
2099 }
2100 else
2101 {
2102 endp = CHAR_POS_ADDR (XINT (lim));
2103 stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
2104 }
195d1361
RS
2105
2106 immediate_quit = 1;
b7dbcc19
KH
2107 SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
2108 if (forwardp)
195d1361 2109 {
b7dbcc19 2110 if (multibyte)
195d1361 2111 {
8f924df7 2112 while (1)
8c6e735b 2113 {
8f924df7
KH
2114 int nbytes;
2115
2116 if (p >= stop)
2117 {
2118 if (p >= endp)
2119 break;
2120 p = GAP_END_ADDR;
2121 stop = endp;
2122 }
62a6e103 2123 c = STRING_CHAR_AND_LENGTH (p, nbytes);
1fc71008 2124 if (! fastmap[SYNTAX (c)])
8c6e735b 2125 break;
8f924df7 2126 p += nbytes, pos++, pos_byte += nbytes;
8c6e735b
KH
2127 UPDATE_SYNTAX_TABLE_FORWARD (pos);
2128 }
195d1361
RS
2129 }
2130 else
2131 {
8c6e735b 2132 while (1)
7e68b0ea 2133 {
8f924df7
KH
2134 if (p >= stop)
2135 {
2136 if (p >= endp)
2137 break;
2138 p = GAP_END_ADDR;
2139 stop = endp;
2140 }
1fc71008 2141 if (! fastmap[SYNTAX (*p)])
8c6e735b 2142 break;
8f924df7 2143 p++, pos++, pos_byte++;
b7dbcc19 2144 UPDATE_SYNTAX_TABLE_FORWARD (pos);
195d1361
RS
2145 }
2146 }
2147 }
2148 else
2149 {
b7dbcc19 2150 if (multibyte)
195d1361 2151 {
8c6e735b 2152 while (1)
b7dbcc19 2153 {
8f924df7
KH
2154 unsigned char *prev_p;
2155
2156 if (p <= stop)
b7dbcc19 2157 {
8f924df7
KH
2158 if (p <= endp)
2159 break;
2160 p = GPT_ADDR;
2161 stop = endp;
b7dbcc19 2162 }
262be72a 2163 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
8f924df7
KH
2164 prev_p = p;
2165 while (--p >= stop && ! CHAR_HEAD_P (*p));
62a6e103 2166 c = STRING_CHAR (p);
1fc71008 2167 if (! fastmap[SYNTAX (c)])
8f924df7
KH
2168 break;
2169 pos--, pos_byte -= prev_p - p;
b7dbcc19 2170 }
195d1361
RS
2171 }
2172 else
2173 {
8c6e735b
KH
2174 while (1)
2175 {
8f924df7
KH
2176 if (p <= stop)
2177 {
2178 if (p <= endp)
2179 break;
2180 p = GPT_ADDR;
2181 stop = endp;
2182 }
102f6132 2183 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
1fc71008 2184 if (! fastmap[SYNTAX (p[-1])])
8c6e735b 2185 break;
8f924df7 2186 p--, pos--, pos_byte--;
8c6e735b 2187 }
195d1361
RS
2188 }
2189 }
6a140a74
RS
2190
2191 SET_PT_BOTH (pos, pos_byte);
195d1361
RS
2192 immediate_quit = 0;
2193
2194 return make_number (PT - start_point);
2195 }
2196}
a1bc88d4 2197
1fc71008 2198/* Return true if character C belongs to one of the ISO classes
a1bc88d4
RS
2199 in the list ISO_CLASSES. Each class is represented by an
2200 integer which is its type according to re_wctype. */
2201
1fc71008 2202static bool
971de7fb 2203in_classes (int c, Lisp_Object iso_classes)
a1bc88d4 2204{
1fc71008 2205 bool fits_class = 0;
a1bc88d4 2206
618db430 2207 while (CONSP (iso_classes))
a1bc88d4
RS
2208 {
2209 Lisp_Object elt;
2210 elt = XCAR (iso_classes);
2211 iso_classes = XCDR (iso_classes);
2212
2213 if (re_iswctype (c, XFASTINT (elt)))
2214 fits_class = 1;
2215 }
2216
2217 return fits_class;
2218}
195d1361 2219\f
95ff8dfc
RS
2220/* Jump over a comment, assuming we are at the beginning of one.
2221 FROM is the current position.
2222 FROM_BYTE is the bytepos corresponding to FROM.
2223 Do not move past STOP (a charpos).
2224 The comment over which we have to jump is of style STYLE
1fc71008 2225 (either SYNTAX_FLAGS_COMMENT_STYLE (foo) or ST_COMMENT_STYLE).
95ff8dfc
RS
2226 NESTING should be positive to indicate the nesting at the beginning
2227 for nested comments and should be zero or negative else.
2228 ST_COMMENT_STYLE cannot be nested.
2229 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
2230 (or 0 If the search cannot start in the middle of a two-character).
2231
1fc71008 2232 If successful, return true and store the charpos of the comment's end
95ff8dfc 2233 into *CHARPOS_PTR and the corresponding bytepos into *BYTEPOS_PTR.
1fc71008 2234 Else, return false and store the charpos STOP into *CHARPOS_PTR, the
95ff8dfc
RS
2235 corresponding bytepos into *BYTEPOS_PTR and the current nesting
2236 (as defined for state.incomment) in *INCOMMENT_PTR.
2237
2238 The comment end is the last character of the comment rather than the
1fc71008 2239 character just after the comment.
95ff8dfc
RS
2240
2241 Global syntax data is assumed to initially be valid for FROM and
2242 remains valid for forward search starting at the returned position. */
2243
1fc71008 2244static bool
d311d28c
PE
2245forw_comment (ptrdiff_t from, ptrdiff_t from_byte, ptrdiff_t stop,
2246 EMACS_INT nesting, int style, int prev_syntax,
2247 ptrdiff_t *charpos_ptr, ptrdiff_t *bytepos_ptr,
2248 EMACS_INT *incomment_ptr)
95ff8dfc
RS
2249{
2250 register int c, c1;
2251 register enum syntaxcode code;
c5683ceb 2252 register int syntax, other_syntax;
95ff8dfc
RS
2253
2254 if (nesting <= 0) nesting = -1;
2255
2256 /* Enter the loop in the middle so that we find
2257 a 2-char comment ender if we start in the middle of it. */
2258 syntax = prev_syntax;
2259 if (syntax != 0) goto forw_incomment;
2260
2261 while (1)
2262 {
2263 if (from == stop)
2264 {
2265 *incomment_ptr = nesting;
2266 *charpos_ptr = from;
2267 *bytepos_ptr = from_byte;
2268 return 0;
2269 }
8f924df7 2270 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
95ff8dfc
RS
2271 syntax = SYNTAX_WITH_FLAGS (c);
2272 code = syntax & 0xff;
2273 if (code == Sendcomment
c5683ceb 2274 && SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == style
abf8a9ff
SM
2275 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ?
2276 (nesting > 0 && --nesting == 0) : nesting < 0))
95ff8dfc
RS
2277 /* we have encountered a comment end of the same style
2278 as the comment sequence which began this comment
2279 section */
2280 break;
2281 if (code == Scomment_fence
2282 && style == ST_COMMENT_STYLE)
2283 /* we have encountered a comment end of the same style
2284 as the comment sequence which began this comment
2285 section. */
2286 break;
2287 if (nesting > 0
2288 && code == Scomment
abf8a9ff 2289 && SYNTAX_FLAGS_COMMENT_NESTED (syntax)
c5683ceb 2290 && SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == style)
95ff8dfc
RS
2291 /* we have encountered a nested comment of the same style
2292 as the comment sequence which began this comment section */
2293 nesting++;
2294 INC_BOTH (from, from_byte);
2295 UPDATE_SYNTAX_TABLE_FORWARD (from);
7d0393cf 2296
95ff8dfc
RS
2297 forw_incomment:
2298 if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
8f924df7 2299 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
c5683ceb
SM
2300 other_syntax = SYNTAX_WITH_FLAGS (c1),
2301 SYNTAX_FLAGS_COMEND_SECOND (other_syntax))
2302 && SYNTAX_FLAGS_COMMENT_STYLE (syntax, other_syntax) == style
abf8a9ff 2303 && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
c5683ceb
SM
2304 SYNTAX_FLAGS_COMMENT_NESTED (other_syntax))
2305 ? nesting > 0 : nesting < 0))
4ffe723b
GM
2306 {
2307 if (--nesting <= 0)
2308 /* we have encountered a comment end of the same style
2309 as the comment sequence which began this comment
2310 section */
2311 break;
2312 else
2313 {
2314 INC_BOTH (from, from_byte);
2315 UPDATE_SYNTAX_TABLE_FORWARD (from);
2316 }
2317 }
95ff8dfc
RS
2318 if (nesting > 0
2319 && from < stop
2320 && SYNTAX_FLAGS_COMSTART_FIRST (syntax)
8f924df7 2321 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
c5683ceb
SM
2322 other_syntax = SYNTAX_WITH_FLAGS (c1),
2323 SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax) == style
2324 && SYNTAX_FLAGS_COMSTART_SECOND (other_syntax))
abf8a9ff 2325 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
c5683ceb 2326 SYNTAX_FLAGS_COMMENT_NESTED (other_syntax)))
95ff8dfc
RS
2327 /* we have encountered a nested comment of the same style
2328 as the comment sequence which began this comment
2329 section */
2330 {
2331 INC_BOTH (from, from_byte);
2332 UPDATE_SYNTAX_TABLE_FORWARD (from);
2333 nesting++;
2334 }
2335 }
2336 *charpos_ptr = from;
2337 *bytepos_ptr = from_byte;
2338 return 1;
2339}
2340
b3cfe0c8 2341DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
177c0ea7 2342 doc: /*
ee648c11 2343Move forward across up to COUNT comments. If COUNT is negative, move backward.
fdb82f93
PJ
2344Stop scanning if we find something other than a comment or whitespace.
2345Set point to where scanning stops.
ee648c11 2346If COUNT comments are found as expected, with nothing except whitespace
fdb82f93 2347between them, return t; otherwise return nil. */)
5842a27b 2348 (Lisp_Object count)
b3cfe0c8 2349{
1fc71008
PE
2350 ptrdiff_t from, from_byte, stop;
2351 int c, c1;
2352 enum syntaxcode code;
b3cfe0c8 2353 int comstyle = 0; /* style of comment encountered */
1fc71008
PE
2354 bool comnested = 0; /* whether the comment is nestable or not */
2355 bool found;
2312c580 2356 EMACS_INT count1;
d311d28c 2357 ptrdiff_t out_charpos, out_bytepos;
7c85f529 2358 EMACS_INT dummy;
840f481c 2359
b7826503 2360 CHECK_NUMBER (count);
840f481c 2361 count1 = XINT (count);
195d1361 2362 stop = count1 > 0 ? ZV : BEGV;
b3cfe0c8
RS
2363
2364 immediate_quit = 1;
2365 QUIT;
2366
2367 from = PT;
6a140a74 2368 from_byte = PT_BYTE;
b3cfe0c8 2369
195d1361 2370 SETUP_SYNTAX_TABLE (from, count1);
840f481c 2371 while (count1 > 0)
b3cfe0c8 2372 {
04882296 2373 do
b3cfe0c8 2374 {
1fc71008
PE
2375 bool comstart_first;
2376 int syntax, other_syntax;
f902a008 2377
04882296
KH
2378 if (from == stop)
2379 {
ef316cf0 2380 SET_PT_BOTH (from, from_byte);
b7e6e612 2381 immediate_quit = 0;
04882296
KH
2382 return Qnil;
2383 }
b7dbcc19 2384 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
c5683ceb 2385 syntax = SYNTAX_WITH_FLAGS (c);
b3cfe0c8 2386 code = SYNTAX (c);
c5683ceb
SM
2387 comstart_first = SYNTAX_FLAGS_COMSTART_FIRST (syntax);
2388 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2389 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
6a140a74 2390 INC_BOTH (from, from_byte);
f902a008 2391 UPDATE_SYNTAX_TABLE_FORWARD (from);
f902a008 2392 if (from < stop && comstart_first
b7dbcc19 2393 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
c5683ceb
SM
2394 other_syntax = SYNTAX_WITH_FLAGS (c1),
2395 SYNTAX_FLAGS_COMSTART_SECOND (other_syntax)))
b3cfe0c8 2396 {
7d0393cf 2397 /* We have encountered a comment start sequence and we
7fc8191e 2398 are ignoring all text inside comments. We must record
b3cfe0c8
RS
2399 the comment style this sequence begins so that later,
2400 only a comment end of the same style actually ends
7fc8191e 2401 the comment section. */
b3cfe0c8 2402 code = Scomment;
c5683ceb 2403 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
1fc71008 2404 comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
6a140a74 2405 INC_BOTH (from, from_byte);
95ff8dfc 2406 UPDATE_SYNTAX_TABLE_FORWARD (from);
b3cfe0c8 2407 }
04882296 2408 }
abf8a9ff 2409 while (code == Swhitespace || (code == Sendcomment && c == '\n'));
6a140a74 2410
da71ab91
KH
2411 if (code == Scomment_fence)
2412 comstyle = ST_COMMENT_STYLE;
2413 else if (code != Scomment)
04882296
KH
2414 {
2415 immediate_quit = 0;
6a140a74 2416 DEC_BOTH (from, from_byte);
ef316cf0 2417 SET_PT_BOTH (from, from_byte);
04882296
KH
2418 return Qnil;
2419 }
2420 /* We're at the start of a comment. */
95ff8dfc
RS
2421 found = forw_comment (from, from_byte, stop, comnested, comstyle, 0,
2422 &out_charpos, &out_bytepos, &dummy);
2423 from = out_charpos; from_byte = out_bytepos;
2424 if (!found)
04882296 2425 {
95ff8dfc
RS
2426 immediate_quit = 0;
2427 SET_PT_BOTH (from, from_byte);
2428 return Qnil;
b3cfe0c8 2429 }
95ff8dfc
RS
2430 INC_BOTH (from, from_byte);
2431 UPDATE_SYNTAX_TABLE_FORWARD (from);
04882296 2432 /* We have skipped one comment. */
840f481c 2433 count1--;
b3cfe0c8
RS
2434 }
2435
840f481c 2436 while (count1 < 0)
b3cfe0c8 2437 {
b9145dbb 2438 while (1)
b3cfe0c8 2439 {
1fc71008
PE
2440 bool quoted;
2441 int syntax;
f902a008 2442
b9145dbb
RS
2443 if (from <= stop)
2444 {
6a140a74 2445 SET_PT_BOTH (BEGV, BEGV_BYTE);
b9145dbb
RS
2446 immediate_quit = 0;
2447 return Qnil;
2448 }
b3cfe0c8 2449
6a140a74 2450 DEC_BOTH (from, from_byte);
f902a008 2451 /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */
6a140a74 2452 quoted = char_quoted (from, from_byte);
b7dbcc19 2453 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
c5683ceb 2454 syntax = SYNTAX_WITH_FLAGS (c);
b3cfe0c8
RS
2455 code = SYNTAX (c);
2456 comstyle = 0;
c5683ceb 2457 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
7fc8191e 2458 if (code == Sendcomment)
c5683ceb
SM
2459 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2460 if (from > stop && SYNTAX_FLAGS_COMEND_SECOND (syntax)
f902a008 2461 && prev_char_comend_first (from, from_byte)
89c6809a 2462 && !char_quoted (from - 1, dec_bytepos (from_byte)))
b3cfe0c8 2463 {
c5683ceb 2464 int other_syntax;
7fc8191e 2465 /* We must record the comment style encountered so that
b3cfe0c8 2466 later, we can match only the proper comment begin
7fc8191e 2467 sequence of the same style. */
6a140a74 2468 DEC_BOTH (from, from_byte);
f902a008
RS
2469 code = Sendcomment;
2470 /* Calling char_quoted, above, set up global syntax position
2471 at the new value of FROM. */
b7dbcc19 2472 c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
c5683ceb
SM
2473 other_syntax = SYNTAX_WITH_FLAGS (c1);
2474 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
1fc71008 2475 comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
b3cfe0c8
RS
2476 }
2477
195d1361
RS
2478 if (code == Scomment_fence)
2479 {
2480 /* Skip until first preceding unquoted comment_fence. */
1fc71008 2481 bool fence_found = 0;
d311d28c 2482 ptrdiff_t ini = from, ini_byte = from_byte;
7d0393cf 2483
6a140a74 2484 while (1)
195d1361 2485 {
6a140a74 2486 DEC_BOTH (from, from_byte);
195d1361 2487 UPDATE_SYNTAX_TABLE_BACKWARD (from);
b7dbcc19 2488 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
6a140a74 2489 if (SYNTAX (c) == Scomment_fence
7d0393cf 2490 && !char_quoted (from, from_byte))
195d1361 2491 {
01f44d5a 2492 fence_found = 1;
195d1361
RS
2493 break;
2494 }
fcdd4585
SM
2495 else if (from == stop)
2496 break;
195d1361 2497 }
01f44d5a 2498 if (fence_found == 0)
195d1361
RS
2499 {
2500 from = ini; /* Set point to ini + 1. */
6a140a74 2501 from_byte = ini_byte;
195d1361
RS
2502 goto leave;
2503 }
39c41ad4
SM
2504 else
2505 /* We have skipped one comment. */
2506 break;
195d1361
RS
2507 }
2508 else if (code == Sendcomment)
b3cfe0c8 2509 {
95ff8dfc 2510 found = back_comment (from, from_byte, stop, comnested, comstyle,
6a140a74 2511 &out_charpos, &out_bytepos);
1fc71008 2512 if (!found)
1aa963c8 2513 {
abf8a9ff
SM
2514 if (c == '\n')
2515 /* This end-of-line is not an end-of-comment.
2516 Treat it like a whitespace.
2517 CC-mode (and maybe others) relies on this behavior. */
2518 ;
2519 else
2520 {
2521 /* Failure: we should go back to the end of this
2522 not-quite-endcomment. */
c5683ceb 2523 if (SYNTAX (c) != code)
abf8a9ff
SM
2524 /* It was a two-char Sendcomment. */
2525 INC_BOTH (from, from_byte);
2526 goto leave;
2527 }
1aa963c8 2528 }
2b927d02 2529 else
abf8a9ff
SM
2530 {
2531 /* We have skipped one comment. */
2532 from = out_charpos, from_byte = out_bytepos;
2533 break;
2534 }
b3cfe0c8 2535 }
bb0de084 2536 else if (code != Swhitespace || quoted)
b3cfe0c8 2537 {
195d1361 2538 leave:
b3cfe0c8 2539 immediate_quit = 0;
6a140a74 2540 INC_BOTH (from, from_byte);
ef316cf0 2541 SET_PT_BOTH (from, from_byte);
b3cfe0c8
RS
2542 return Qnil;
2543 }
2544 }
2545
840f481c 2546 count1++;
b3cfe0c8
RS
2547 }
2548
ef316cf0 2549 SET_PT_BOTH (from, from_byte);
b3cfe0c8
RS
2550 immediate_quit = 0;
2551 return Qt;
2552}
2553\f
b7dbcc19 2554/* Return syntax code of character C if C is an ASCII character
45b683a1 2555 or if MULTIBYTE_SYMBOL_P is false. Otherwise, return Ssymbol. */
bd25db08 2556
45b683a1
PE
2557static enum syntaxcode
2558syntax_multibyte (int c, bool multibyte_symbol_p)
2559{
2560 return ASCII_CHAR_P (c) || !multibyte_symbol_p ? SYNTAX (c) : Ssymbol;
2561}
bd25db08 2562
6a140a74 2563static Lisp_Object
1fc71008 2564scan_lists (EMACS_INT from, EMACS_INT count, EMACS_INT depth, bool sexpflag)
8489eb67
RS
2565{
2566 Lisp_Object val;
1fc71008
PE
2567 ptrdiff_t stop = count > 0 ? ZV : BEGV;
2568 int c, c1;
93da5fff 2569 int stringterm;
1fc71008
PE
2570 bool quoted;
2571 bool mathexit = 0;
45b683a1 2572 enum syntaxcode code;
d311d28c 2573 EMACS_INT min_depth = depth; /* Err out if depth gets less than this. */
e5d4f4dc 2574 int comstyle = 0; /* style of comment encountered */
1fc71008 2575 bool comnested = 0; /* whether the comment is nestable or not */
d311d28c 2576 ptrdiff_t temp_pos;
932e6895 2577 EMACS_INT last_good = from;
1fc71008 2578 bool found;
d311d28c
PE
2579 ptrdiff_t from_byte;
2580 ptrdiff_t out_bytepos, out_charpos;
7c85f529 2581 EMACS_INT dummy;
1fc71008 2582 bool multibyte_symbol_p = sexpflag && multibyte_syntax_as_symbol;
8489eb67
RS
2583
2584 if (depth > 0) min_depth = 0;
2585
c3907a7e
RS
2586 if (from > ZV) from = ZV;
2587 if (from < BEGV) from = BEGV;
2588
2589 from_byte = CHAR_TO_BYTE (from);
2590
8489eb67
RS
2591 immediate_quit = 1;
2592 QUIT;
2593
195d1361 2594 SETUP_SYNTAX_TABLE (from, count);
8489eb67
RS
2595 while (count > 0)
2596 {
8489eb67
RS
2597 while (from < stop)
2598 {
1fc71008
PE
2599 bool comstart_first, prefix;
2600 int syntax, other_syntax;
195d1361 2601 UPDATE_SYNTAX_TABLE_FORWARD (from);
b7dbcc19 2602 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
c5683ceb 2603 syntax = SYNTAX_WITH_FLAGS (c);
45b683a1 2604 code = syntax_multibyte (c, multibyte_symbol_p);
c5683ceb
SM
2605 comstart_first = SYNTAX_FLAGS_COMSTART_FIRST (syntax);
2606 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2607 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2608 prefix = SYNTAX_FLAGS_PREFIX (syntax);
7bf5e9e4
RS
2609 if (depth == min_depth)
2610 last_good = from;
6a140a74 2611 INC_BOTH (from, from_byte);
195d1361 2612 UPDATE_SYNTAX_TABLE_FORWARD (from);
f902a008 2613 if (from < stop && comstart_first
327719ee 2614 && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
c5683ceb
SM
2615 other_syntax = SYNTAX_WITH_FLAGS (c),
2616 SYNTAX_FLAGS_COMSTART_SECOND (other_syntax))
8489eb67 2617 && parse_sexp_ignore_comments)
e5d4f4dc 2618 {
7d0393cf 2619 /* we have encountered a comment start sequence and we
195d1361 2620 are ignoring all text inside comments. We must record
e5d4f4dc
RS
2621 the comment style this sequence begins so that later,
2622 only a comment end of the same style actually ends
2623 the comment section */
2624 code = Scomment;
c5683ceb 2625 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
1fc71008 2626 comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
6a140a74 2627 INC_BOTH (from, from_byte);
f902a008 2628 UPDATE_SYNTAX_TABLE_FORWARD (from);
e5d4f4dc 2629 }
7d0393cf 2630
f902a008 2631 if (prefix)
8489eb67
RS
2632 continue;
2633
7393bcbb 2634 switch (code)
8489eb67
RS
2635 {
2636 case Sescape:
2637 case Scharquote:
8b8bf8e6
SM
2638 if (from == stop)
2639 goto lose;
6a140a74 2640 INC_BOTH (from, from_byte);
8489eb67
RS
2641 /* treat following character as a word constituent */
2642 case Sword:
2643 case Ssymbol:
2644 if (depth || !sexpflag) break;
195d1361 2645 /* This word counts as a sexp; return at end of it. */
8489eb67
RS
2646 while (from < stop)
2647 {
195d1361 2648 UPDATE_SYNTAX_TABLE_FORWARD (from);
7c7ca3d2 2649
b7dbcc19 2650 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
45b683a1 2651 switch (syntax_multibyte (c, multibyte_symbol_p))
8489eb67
RS
2652 {
2653 case Scharquote:
2654 case Sescape:
6a140a74 2655 INC_BOTH (from, from_byte);
8b8bf8e6
SM
2656 if (from == stop)
2657 goto lose;
8489eb67
RS
2658 break;
2659 case Sword:
2660 case Ssymbol:
2661 case Squote:
2662 break;
2663 default:
2664 goto done;
2665 }
6a140a74 2666 INC_BOTH (from, from_byte);
8489eb67
RS
2667 }
2668 goto done;
2669
195d1361 2670 case Scomment_fence:
95ff8dfc
RS
2671 comstyle = ST_COMMENT_STYLE;
2672 /* FALLTHROUGH */
2673 case Scomment:
8489eb67 2674 if (!parse_sexp_ignore_comments) break;
95ff8dfc
RS
2675 UPDATE_SYNTAX_TABLE_FORWARD (from);
2676 found = forw_comment (from, from_byte, stop,
2677 comnested, comstyle, 0,
2678 &out_charpos, &out_bytepos, &dummy);
2679 from = out_charpos, from_byte = out_bytepos;
2680 if (!found)
8489eb67 2681 {
95ff8dfc
RS
2682 if (depth == 0)
2683 goto done;
2684 goto lose;
8489eb67 2685 }
95ff8dfc
RS
2686 INC_BOTH (from, from_byte);
2687 UPDATE_SYNTAX_TABLE_FORWARD (from);
8489eb67
RS
2688 break;
2689
2690 case Smath:
2691 if (!sexpflag)
2692 break;
b7dbcc19 2693 if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (from_byte))
6a140a74
RS
2694 {
2695 INC_BOTH (from, from_byte);
2696 }
8489eb67
RS
2697 if (mathexit)
2698 {
2699 mathexit = 0;
2700 goto close1;
2701 }
2702 mathexit = 1;
2703
2704 case Sopen:
2705 if (!++depth) goto done;
2706 break;
2707
2708 case Sclose:
2709 close1:
2710 if (!--depth) goto done;
2711 if (depth < min_depth)
9471945b
KS
2712 xsignal3 (Qscan_error,
2713 build_string ("Containing expression ends prematurely"),
2714 make_number (last_good), make_number (from));
8489eb67
RS
2715 break;
2716
2717 case Sstring:
195d1361 2718 case Sstring_fence:
ef316cf0 2719 temp_pos = dec_bytepos (from_byte);
b7dbcc19 2720 stringterm = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
8489eb67
RS
2721 while (1)
2722 {
45b683a1 2723 enum syntaxcode c_code;
8b8bf8e6
SM
2724 if (from >= stop)
2725 goto lose;
195d1361 2726 UPDATE_SYNTAX_TABLE_FORWARD (from);
b7dbcc19 2727 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
45b683a1 2728 c_code = syntax_multibyte (c, multibyte_symbol_p);
42ebfa31 2729 if (code == Sstring
45b683a1
PE
2730 ? c == stringterm && c_code == Sstring
2731 : c_code == Sstring_fence)
195d1361 2732 break;
7c7ca3d2 2733
1fc71008 2734 switch (c_code)
8489eb67
RS
2735 {
2736 case Scharquote:
2737 case Sescape:
6a140a74 2738 INC_BOTH (from, from_byte);
8489eb67 2739 }
6a140a74 2740 INC_BOTH (from, from_byte);
8489eb67 2741 }
6a140a74 2742 INC_BOTH (from, from_byte);
8489eb67
RS
2743 if (!depth && sexpflag) goto done;
2744 break;
240c43e8
SM
2745 default:
2746 /* Ignore whitespace, punctuation, quote, endcomment. */
2747 break;
8489eb67
RS
2748 }
2749 }
2750
2751 /* Reached end of buffer. Error if within object, return nil if between */
8b8bf8e6
SM
2752 if (depth)
2753 goto lose;
8489eb67
RS
2754
2755 immediate_quit = 0;
2756 return Qnil;
2757
2758 /* End of object reached */
2759 done:
2760 count--;
2761 }
2762
2763 while (count < 0)
2764 {
8489eb67
RS
2765 while (from > stop)
2766 {
c5683ceb 2767 int syntax;
6a140a74 2768 DEC_BOTH (from, from_byte);
195d1361 2769 UPDATE_SYNTAX_TABLE_BACKWARD (from);
b7dbcc19 2770 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
c5683ceb 2771 syntax= SYNTAX_WITH_FLAGS (c);
45b683a1 2772 code = syntax_multibyte (c, multibyte_symbol_p);
7bf5e9e4
RS
2773 if (depth == min_depth)
2774 last_good = from;
7fc8191e 2775 comstyle = 0;
c5683ceb 2776 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
7fc8191e 2777 if (code == Sendcomment)
c5683ceb
SM
2778 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2779 if (from > stop && SYNTAX_FLAGS_COMEND_SECOND (syntax)
1674d9a2 2780 && prev_char_comend_first (from, from_byte)
8489eb67 2781 && parse_sexp_ignore_comments)
e5d4f4dc 2782 {
f902a008 2783 /* We must record the comment style encountered so that
e5d4f4dc 2784 later, we can match only the proper comment begin
f902a008 2785 sequence of the same style. */
01f44d5a 2786 int c2, other_syntax;
6a140a74 2787 DEC_BOTH (from, from_byte);
f902a008
RS
2788 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2789 code = Sendcomment;
01f44d5a
PE
2790 c2 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2791 other_syntax = SYNTAX_WITH_FLAGS (c2);
c5683ceb 2792 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
1fc71008 2793 comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
e5d4f4dc 2794 }
7d0393cf 2795
9828a477 2796 /* Quoting turns anything except a comment-ender
e6365855 2797 into a word character. Note that this cannot be true
f902a008 2798 if we decremented FROM in the if-statement above. */
9828a477 2799 if (code != Sendcomment && char_quoted (from, from_byte))
240c43e8
SM
2800 {
2801 DEC_BOTH (from, from_byte);
2802 code = Sword;
2803 }
c5683ceb 2804 else if (SYNTAX_FLAGS_PREFIX (syntax))
8489eb67
RS
2805 continue;
2806
7393bcbb 2807 switch (code)
8489eb67
RS
2808 {
2809 case Sword:
2810 case Ssymbol:
9828a477
KH
2811 case Sescape:
2812 case Scharquote:
8489eb67 2813 if (depth || !sexpflag) break;
195d1361
RS
2814 /* This word counts as a sexp; count object finished
2815 after passing it. */
8489eb67
RS
2816 while (from > stop)
2817 {
6a140a74 2818 temp_pos = from_byte;
4b4deea2 2819 if (! NILP (BVAR (current_buffer, enable_multibyte_characters)))
ef316cf0
RS
2820 DEC_POS (temp_pos);
2821 else
2822 temp_pos--;
6a140a74 2823 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
b7dbcc19 2824 c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
9828a477 2825 /* Don't allow comment-end to be quoted. */
45b683a1 2826 if (syntax_multibyte (c1, multibyte_symbol_p) == Sendcomment)
9828a477 2827 goto done2;
6a140a74 2828 quoted = char_quoted (from - 1, temp_pos);
8489eb67 2829 if (quoted)
93da5fff 2830 {
6a140a74 2831 DEC_BOTH (from, from_byte);
ef316cf0 2832 temp_pos = dec_bytepos (temp_pos);
6a140a74 2833 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
93da5fff 2834 }
b7dbcc19 2835 c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
45b683a1
PE
2836 if (! quoted)
2837 switch (syntax_multibyte (c1, multibyte_symbol_p))
2838 {
2839 case Sword: case Ssymbol: case Squote: break;
2840 default: goto done2;
2841 }
6a140a74 2842 DEC_BOTH (from, from_byte);
8489eb67
RS
2843 }
2844 goto done2;
2845
2846 case Smath:
2847 if (!sexpflag)
2848 break;
ef316cf0 2849 temp_pos = dec_bytepos (from_byte);
6a140a74 2850 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
b7dbcc19 2851 if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (temp_pos))
6a140a74 2852 DEC_BOTH (from, from_byte);
8489eb67
RS
2853 if (mathexit)
2854 {
2855 mathexit = 0;
2856 goto open2;
2857 }
2858 mathexit = 1;
2859
2860 case Sclose:
2861 if (!++depth) goto done2;
2862 break;
2863
2864 case Sopen:
2865 open2:
2866 if (!--depth) goto done2;
2867 if (depth < min_depth)
9471945b
KS
2868 xsignal3 (Qscan_error,
2869 build_string ("Containing expression ends prematurely"),
2870 make_number (last_good), make_number (from));
8489eb67
RS
2871 break;
2872
2873 case Sendcomment:
2874 if (!parse_sexp_ignore_comments)
2875 break;
95ff8dfc 2876 found = back_comment (from, from_byte, stop, comnested, comstyle,
6a140a74 2877 &out_charpos, &out_bytepos);
1fc71008 2878 /* FIXME: if !found, it really wasn't a comment-end.
1aa963c8
SM
2879 For single-char Sendcomment, we can't do much about it apart
2880 from skipping the char.
2881 For 2-char endcomments, we could try again, taking both
2882 chars as separate entities, but it's a lot of trouble
2883 for very little gain, so we don't bother either. -sm */
1fc71008 2884 if (found)
6a140a74 2885 from = out_charpos, from_byte = out_bytepos;
8489eb67
RS
2886 break;
2887
195d1361
RS
2888 case Scomment_fence:
2889 case Sstring_fence:
2890 while (1)
2891 {
8b8bf8e6
SM
2892 if (from == stop)
2893 goto lose;
6b61353c 2894 DEC_BOTH (from, from_byte);
195d1361 2895 UPDATE_SYNTAX_TABLE_BACKWARD (from);
45b683a1
PE
2896 if (!char_quoted (from, from_byte))
2897 {
2898 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2899 if (syntax_multibyte (c, multibyte_symbol_p) == code)
2900 break;
2901 }
195d1361
RS
2902 }
2903 if (code == Sstring_fence && !depth && sexpflag) goto done2;
2904 break;
7d0393cf 2905
8489eb67 2906 case Sstring:
b7dbcc19 2907 stringterm = FETCH_CHAR_AS_MULTIBYTE (from_byte);
8489eb67
RS
2908 while (1)
2909 {
8b8bf8e6
SM
2910 if (from == stop)
2911 goto lose;
6b61353c
KH
2912 DEC_BOTH (from, from_byte);
2913 UPDATE_SYNTAX_TABLE_BACKWARD (from);
45b683a1
PE
2914 if (!char_quoted (from, from_byte))
2915 {
2916 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2917 if (c == stringterm
2918 && (syntax_multibyte (c, multibyte_symbol_p)
2919 == Sstring))
2920 break;
2921 }
8489eb67 2922 }
8489eb67
RS
2923 if (!depth && sexpflag) goto done2;
2924 break;
240c43e8
SM
2925 default:
2926 /* Ignore whitespace, punctuation, quote, endcomment. */
2927 break;
8489eb67
RS
2928 }
2929 }
2930
2931 /* Reached start of buffer. Error if within object, return nil if between */
8b8bf8e6
SM
2932 if (depth)
2933 goto lose;
8489eb67
RS
2934
2935 immediate_quit = 0;
2936 return Qnil;
2937
2938 done2:
2939 count++;
2940 }
2941
2942
2943 immediate_quit = 0;
1e142fb7 2944 XSETFASTINT (val, from);
8489eb67
RS
2945 return val;
2946
2947 lose:
9471945b
KS
2948 xsignal3 (Qscan_error,
2949 build_string ("Unbalanced parentheses"),
2950 make_number (last_good), make_number (from));
8489eb67
RS
2951}
2952
8489eb67 2953DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
fdb82f93 2954 doc: /* Scan from character number FROM by COUNT lists.
7b926f3f
CY
2955Scan forward if COUNT is positive, backward if COUNT is negative.
2956Return the character number of the position thus found.
2957
2958A \"list", in this context, refers to a balanced parenthetical
2959grouping, as determined by the syntax table.
fdb82f93 2960
7b926f3f
CY
2961If DEPTH is nonzero, treat that as the nesting depth of the starting
2962point (i.e. the starting point is DEPTH parentheses deep). This
2963function scans over parentheses until the depth goes to zero COUNT
2964times. Hence, positive DEPTH moves out that number of levels of
221cd413 2965parentheses, while negative DEPTH moves to a deeper level.
fdb82f93
PJ
2966
2967Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2968
7b926f3f
CY
2969If we reach the beginning or end of the accessible part of the buffer
2970before we have scanned over COUNT lists, return nil if the depth at
2971that point is zero, and signal a error if the depth is nonzero. */)
5842a27b 2972 (Lisp_Object from, Lisp_Object count, Lisp_Object depth)
8489eb67 2973{
b7826503
PJ
2974 CHECK_NUMBER (from);
2975 CHECK_NUMBER (count);
2976 CHECK_NUMBER (depth);
8489eb67
RS
2977
2978 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
2979}
2980
2981DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
fdb82f93
PJ
2982 doc: /* Scan from character number FROM by COUNT balanced expressions.
2983If COUNT is negative, scan backwards.
2984Returns the character number of the position thus found.
2985
2986Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2987
2988If the beginning or end of (the accessible part of) the buffer is reached
2989in the middle of a parenthetical grouping, an error is signaled.
2990If the beginning or end is reached between groupings
2991but before count is used up, nil is returned. */)
5842a27b 2992 (Lisp_Object from, Lisp_Object count)
8489eb67 2993{
b7826503
PJ
2994 CHECK_NUMBER (from);
2995 CHECK_NUMBER (count);
8489eb67
RS
2996
2997 return scan_lists (XINT (from), XINT (count), 0, 1);
2998}
2999
3000DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
fdb82f93
PJ
3001 0, 0, 0,
3002 doc: /* Move point backward over any number of chars with prefix syntax.
3003This includes chars with "quote" or "prefix" syntax (' or p). */)
5842a27b 3004 (void)
8489eb67 3005{
d311d28c
PE
3006 ptrdiff_t beg = BEGV;
3007 ptrdiff_t opoint = PT;
3008 ptrdiff_t opoint_byte = PT_BYTE;
3009 ptrdiff_t pos = PT;
3010 ptrdiff_t pos_byte = PT_BYTE;
93da5fff 3011 int c;
93da5fff 3012
7d0393cf 3013 if (pos <= beg)
195d1361 3014 {
f902a008
RS
3015 SET_PT_BOTH (opoint, opoint_byte);
3016
3017 return Qnil;
195d1361 3018 }
8489eb67 3019
f902a008
RS
3020 SETUP_SYNTAX_TABLE (pos, -1);
3021
6a140a74
RS
3022 DEC_BOTH (pos, pos_byte);
3023
1fd3172d 3024 while (!char_quoted (pos, pos_byte)
195d1361 3025 /* Previous statement updates syntax table. */
b7dbcc19 3026 && ((c = FETCH_CHAR_AS_MULTIBYTE (pos_byte), SYNTAX (c) == Squote)
45b683a1 3027 || syntax_prefix_flag_p (c)))
93da5fff 3028 {
1fd3172d
RS
3029 opoint = pos;
3030 opoint_byte = pos_byte;
3031
3032 if (pos + 1 > beg)
3033 DEC_BOTH (pos, pos_byte);
93da5fff 3034 }
8489eb67 3035
6a140a74 3036 SET_PT_BOTH (opoint, opoint_byte);
8489eb67
RS
3037
3038 return Qnil;
3039}
3040\f
6a140a74 3041/* Parse forward from FROM / FROM_BYTE to END,
e5d4f4dc
RS
3042 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
3043 and return a description of the state of the parse at END.
1fc71008 3044 If STOPBEFORE, stop at the start of an atom.
644ea4df
RS
3045 If COMMENTSTOP is 1, stop at the start of a comment.
3046 If COMMENTSTOP is -1, stop at the start or end of a comment,
3047 after the beginning of a string, or after the end of a string. */
8489eb67 3048
340f92b5 3049static void
d5a3eaaf 3050scan_sexps_forward (struct lisp_parse_state *stateptr,
d311d28c 3051 ptrdiff_t from, ptrdiff_t from_byte, ptrdiff_t end,
1fc71008 3052 EMACS_INT targetdepth, bool stopbefore,
d5a3eaaf 3053 Lisp_Object oldstate, int commentstop)
8489eb67
RS
3054{
3055 struct lisp_parse_state state;
1fc71008 3056 enum syntaxcode code;
95ff8dfc 3057 int c1;
1fc71008 3058 bool comnested;
d311d28c 3059 struct level { ptrdiff_t last, prev; };
8489eb67 3060 struct level levelstart[100];
1fc71008 3061 struct level *curlevel = levelstart;
8489eb67 3062 struct level *endlevel = levelstart + 100;
1fc71008 3063 EMACS_INT depth; /* Paren depth of current scanning location.
8489eb67
RS
3064 level - levelstart equals this except
3065 when the depth becomes negative. */
d311d28c 3066 EMACS_INT mindepth; /* Lowest DEPTH value seen. */
1fc71008 3067 bool start_quoted = 0; /* True means starting after a char quote. */
8489eb67 3068 Lisp_Object tem;
d311d28c
PE
3069 ptrdiff_t prev_from; /* Keep one character before FROM. */
3070 ptrdiff_t prev_from_byte;
1fd3172d 3071 int prev_from_syntax;
1fc71008
PE
3072 bool boundary_stop = commentstop == -1;
3073 bool nofence;
3074 bool found;
d311d28c 3075 ptrdiff_t out_bytepos, out_charpos;
7c7ca3d2 3076 int temp;
93da5fff
KH
3077
3078 prev_from = from;
6a140a74
RS
3079 prev_from_byte = from_byte;
3080 if (from != BEGV)
3081 DEC_BOTH (prev_from, prev_from_byte);
93da5fff
KH
3082
3083 /* Use this macro instead of `from++'. */
6a140a74
RS
3084#define INC_FROM \
3085do { prev_from = from; \
3086 prev_from_byte = from_byte; \
327719ee 3087 temp = FETCH_CHAR_AS_MULTIBYTE (prev_from_byte); \
ab229fdd 3088 prev_from_syntax = SYNTAX_WITH_FLAGS (temp); \
ef316cf0 3089 INC_BOTH (from, from_byte); \
3ceb4629
SM
3090 if (from < end) \
3091 UPDATE_SYNTAX_TABLE_FORWARD (from); \
6a140a74 3092 } while (0)
8489eb67
RS
3093
3094 immediate_quit = 1;
3095 QUIT;
3096
265a9e55 3097 if (NILP (oldstate))
8489eb67
RS
3098 {
3099 depth = 0;
3100 state.instring = -1;
3101 state.incomment = 0;
195d1361
RS
3102 state.comstyle = 0; /* comment style a by default. */
3103 state.comstr_start = -1; /* no comment/string seen. */
8489eb67
RS
3104 }
3105 else
3106 {
3107 tem = Fcar (oldstate);
265a9e55 3108 if (!NILP (tem))
8489eb67
RS
3109 depth = XINT (tem);
3110 else
3111 depth = 0;
3112
3113 oldstate = Fcdr (oldstate);
3114 oldstate = Fcdr (oldstate);
3115 oldstate = Fcdr (oldstate);
3116 tem = Fcar (oldstate);
195d1361 3117 /* Check whether we are inside string_fence-style string: */
7d0393cf 3118 state.instring = (!NILP (tem)
d311d28c 3119 ? (CHARACTERP (tem) ? XFASTINT (tem) : ST_STRING_STYLE)
e76f1c44 3120 : -1);
8489eb67
RS
3121
3122 oldstate = Fcdr (oldstate);
3123 tem = Fcar (oldstate);
e76f1c44
GM
3124 state.incomment = (!NILP (tem)
3125 ? (INTEGERP (tem) ? XINT (tem) : -1)
95ff8dfc 3126 : 0);
8489eb67
RS
3127
3128 oldstate = Fcdr (oldstate);
3129 tem = Fcar (oldstate);
265a9e55 3130 start_quoted = !NILP (tem);
e5d4f4dc 3131
95ff8dfc 3132 /* if the eighth element of the list is nil, we are in comment
195d1361
RS
3133 style a. If it is non-nil, we are in comment style b */
3134 oldstate = Fcdr (oldstate);
e5d4f4dc 3135 oldstate = Fcdr (oldstate);
195d1361 3136 tem = Fcar (oldstate);
c5683ceb
SM
3137 state.comstyle = (NILP (tem)
3138 ? 0
d311d28c
PE
3139 : (RANGED_INTEGERP (0, tem, ST_COMMENT_STYLE)
3140 ? XINT (tem)
3141 : ST_COMMENT_STYLE));
195d1361 3142
e5d4f4dc 3143 oldstate = Fcdr (oldstate);
e5d4f4dc 3144 tem = Fcar (oldstate);
d311d28c
PE
3145 state.comstr_start =
3146 RANGED_INTEGERP (PTRDIFF_MIN, tem, PTRDIFF_MAX) ? XINT (tem) : -1;
1a102a58
RS
3147 oldstate = Fcdr (oldstate);
3148 tem = Fcar (oldstate);
3149 while (!NILP (tem)) /* >= second enclosing sexps. */
3150 {
d311d28c
PE
3151 Lisp_Object temhd = Fcar (tem);
3152 if (RANGED_INTEGERP (PTRDIFF_MIN, temhd, PTRDIFF_MAX))
3153 curlevel->last = XINT (temhd);
1a102a58 3154 if (++curlevel == endlevel)
02010917 3155 curlevel--; /* error ("Nesting too deep for parser"); */
1a102a58
RS
3156 curlevel->prev = -1;
3157 curlevel->last = -1;
3158 tem = Fcdr (tem);
3159 }
8489eb67
RS
3160 }
3161 state.quoted = 0;
3162 mindepth = depth;
3163
3164 curlevel->prev = -1;
3165 curlevel->last = -1;
3166
326b283b 3167 SETUP_SYNTAX_TABLE (prev_from, 1);
ab229fdd
AS
3168 temp = FETCH_CHAR (prev_from_byte);
3169 prev_from_syntax = SYNTAX_WITH_FLAGS (temp);
3ceb4629 3170 UPDATE_SYNTAX_TABLE_FORWARD (from);
326b283b 3171
195d1361 3172 /* Enter the loop at a place appropriate for initial state. */
8489eb67 3173
326b283b
RS
3174 if (state.incomment)
3175 goto startincomment;
8489eb67
RS
3176 if (state.instring >= 0)
3177 {
195d1361 3178 nofence = state.instring != ST_STRING_STYLE;
326b283b
RS
3179 if (start_quoted)
3180 goto startquotedinstring;
8489eb67
RS
3181 goto startinstring;
3182 }
326b283b
RS
3183 else if (start_quoted)
3184 goto startquoted;
1fd3172d 3185
8489eb67
RS
3186 while (from < end)
3187 {
c5683ceb 3188 int syntax;
93da5fff 3189 INC_FROM;
1fd3172d 3190 code = prev_from_syntax & 0xff;
4c920633 3191
c5d4b96f
SM
3192 if (from < end
3193 && SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax)
3194 && (c1 = FETCH_CHAR (from_byte),
c5683ceb
SM
3195 syntax = SYNTAX_WITH_FLAGS (c1),
3196 SYNTAX_FLAGS_COMSTART_SECOND (syntax)))
c5d4b96f
SM
3197 /* Duplicate code to avoid a complex if-expression
3198 which causes trouble for the SGI compiler. */
95ff8dfc 3199 {
c5d4b96f
SM
3200 /* Record the comment style we have entered so that only
3201 the comment-end sequence of the same style actually
3202 terminates the comment section. */
c5683ceb
SM
3203 state.comstyle
3204 = SYNTAX_FLAGS_COMMENT_STYLE (syntax, prev_from_syntax);
1fc71008
PE
3205 comnested = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax)
3206 | SYNTAX_FLAGS_COMMENT_NESTED (syntax));
c5d4b96f 3207 state.incomment = comnested ? 1 : -1;
95ff8dfc 3208 state.comstr_start = prev_from;
c5d4b96f
SM
3209 INC_FROM;
3210 code = Scomment;
95ff8dfc 3211 }
4c920633 3212 else if (code == Scomment_fence)
e5d4f4dc
RS
3213 {
3214 /* Record the comment style we have entered so that only
3215 the comment-end sequence of the same style actually
3216 terminates the comment section. */
95ff8dfc
RS
3217 state.comstyle = ST_COMMENT_STYLE;
3218 state.incomment = -1;
195d1361 3219 state.comstr_start = prev_from;
e5d4f4dc 3220 code = Scomment;
e5d4f4dc 3221 }
c5d4b96f
SM
3222 else if (code == Scomment)
3223 {
c5683ceb 3224 state.comstyle = SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax, 0);
c5d4b96f
SM
3225 state.incomment = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax) ?
3226 1 : -1);
3227 state.comstr_start = prev_from;
3228 }
e5d4f4dc 3229
1fd3172d 3230 if (SYNTAX_FLAGS_PREFIX (prev_from_syntax))
8489eb67 3231 continue;
7393bcbb 3232 switch (code)
8489eb67
RS
3233 {
3234 case Sescape:
3235 case Scharquote:
3236 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 3237 curlevel->last = prev_from;
8489eb67
RS
3238 startquoted:
3239 if (from == end) goto endquoted;
93da5fff 3240 INC_FROM;
8489eb67
RS
3241 goto symstarted;
3242 /* treat following character as a word constituent */
3243 case Sword:
3244 case Ssymbol:
3245 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 3246 curlevel->last = prev_from;
8489eb67
RS
3247 symstarted:
3248 while (from < end)
3249 {
1fc71008 3250 int symchar = FETCH_CHAR_AS_MULTIBYTE (from_byte);
45b683a1 3251 switch (SYNTAX (symchar))
8489eb67
RS
3252 {
3253 case Scharquote:
3254 case Sescape:
93da5fff 3255 INC_FROM;
8489eb67
RS
3256 if (from == end) goto endquoted;
3257 break;
3258 case Sword:
3259 case Ssymbol:
3260 case Squote:
3261 break;
3262 default:
3263 goto symdone;
3264 }
93da5fff 3265 INC_FROM;
8489eb67
RS
3266 }
3267 symdone:
3268 curlevel->prev = curlevel->last;
3269 break;
3270
240c43e8 3271 case Scomment_fence: /* Can't happen because it's handled above. */
8489eb67 3272 case Scomment:
195d1361 3273 if (commentstop || boundary_stop) goto done;
d355bd8a
SM
3274 startincomment:
3275 /* The (from == BEGV) test was to enter the loop in the middle so
95ff8dfc 3276 that we find a 2-char comment ender even if we start in the
e6365855 3277 middle of it. We don't want to do that if we're just at the
d355bd8a 3278 beginning of the comment (think of (*) ... (*)). */
95ff8dfc
RS
3279 found = forw_comment (from, from_byte, end,
3280 state.incomment, state.comstyle,
e6365855
SM
3281 (from == BEGV || from < state.comstr_start + 3)
3282 ? 0 : prev_from_syntax,
95ff8dfc
RS
3283 &out_charpos, &out_bytepos, &state.incomment);
3284 from = out_charpos; from_byte = out_bytepos;
3285 /* Beware! prev_from and friends are invalid now.
3286 Luckily, the `done' doesn't use them and the INC_FROM
3287 sets them to a sane value without looking at them. */
3288 if (!found) goto done;
d355bd8a 3289 INC_FROM;
8489eb67 3290 state.incomment = 0;
e5d4f4dc 3291 state.comstyle = 0; /* reset the comment style */
195d1361 3292 if (boundary_stop) goto done;
8489eb67
RS
3293 break;
3294
3295 case Sopen:
3296 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3297 depth++;
3298 /* curlevel++->last ran into compiler bug on Apollo */
93da5fff 3299 curlevel->last = prev_from;
8489eb67 3300 if (++curlevel == endlevel)
02010917 3301 curlevel--; /* error ("Nesting too deep for parser"); */
8489eb67
RS
3302 curlevel->prev = -1;
3303 curlevel->last = -1;
30844415 3304 if (targetdepth == depth) goto done;
8489eb67
RS
3305 break;
3306
3307 case Sclose:
3308 depth--;
3309 if (depth < mindepth)
3310 mindepth = depth;
3311 if (curlevel != levelstart)
3312 curlevel--;
3313 curlevel->prev = curlevel->last;
30844415 3314 if (targetdepth == depth) goto done;
8489eb67
RS
3315 break;
3316
3317 case Sstring:
195d1361
RS
3318 case Sstring_fence:
3319 state.comstr_start = from - 1;
8489eb67 3320 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 3321 curlevel->last = prev_from;
7d0393cf 3322 state.instring = (code == Sstring
b7dbcc19 3323 ? (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte))
195d1361
RS
3324 : ST_STRING_STYLE);
3325 if (boundary_stop) goto done;
8489eb67 3326 startinstring:
195d1361 3327 {
644ea4df 3328 nofence = state.instring != ST_STRING_STYLE;
7d0393cf 3329
644ea4df
RS
3330 while (1)
3331 {
3332 int c;
1fc71008 3333 enum syntaxcode c_code;
195d1361 3334
644ea4df 3335 if (from >= end) goto done;
b7dbcc19 3336 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1fc71008 3337 c_code = SYNTAX (c);
ccf89641 3338
1fc71008 3339 /* Check C_CODE here so that if the char has
ccf89641
KH
3340 a syntax-table property which says it is NOT
3341 a string character, it does not end the string. */
1fc71008 3342 if (nofence && c == state.instring && c_code == Sstring)
ccf89641
KH
3343 break;
3344
1fc71008 3345 switch (c_code)
644ea4df
RS
3346 {
3347 case Sstring_fence:
3348 if (!nofence) goto string_end;
3349 break;
3350 case Scharquote:
3351 case Sescape:
3352 INC_FROM;
3353 startquotedinstring:
3354 if (from >= end) goto endquoted;
195d1361 3355 }
644ea4df
RS
3356 INC_FROM;
3357 }
195d1361
RS
3358 }
3359 string_end:
8489eb67
RS
3360 state.instring = -1;
3361 curlevel->prev = curlevel->last;
93da5fff 3362 INC_FROM;
195d1361 3363 if (boundary_stop) goto done;
8489eb67
RS
3364 break;
3365
3366 case Smath:
240c43e8
SM
3367 /* FIXME: We should do something with it. */
3368 break;
3369 default:
3370 /* Ignore whitespace, punctuation, quote, endcomment. */
8489eb67
RS
3371 break;
3372 }
3373 }
3374 goto done;
3375
3376 stop: /* Here if stopping before start of sexp. */
93da5fff 3377 from = prev_from; /* We have just fetched the char that starts it; */
68643cde 3378 from_byte = prev_from_byte;
8489eb67
RS
3379 goto done; /* but return the position before it. */
3380
3381 endquoted:
3382 state.quoted = 1;
3383 done:
3384 state.depth = depth;
3385 state.mindepth = mindepth;
3386 state.thislevelstart = curlevel->prev;
3387 state.prevlevelstart
3388 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
3389 state.location = from;
8ca30920 3390 state.location_byte = from_byte;
1a102a58 3391 state.levelstarts = Qnil;
eb4d412d
PE
3392 while (curlevel > levelstart)
3393 state.levelstarts = Fcons (make_number ((--curlevel)->last),
3394 state.levelstarts);
8489eb67
RS
3395 immediate_quit = 0;
3396
e5d4f4dc 3397 *stateptr = state;
8489eb67
RS
3398}
3399
c81a3712 3400DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
fdb82f93
PJ
3401 doc: /* Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
3402Parsing stops at TO or when certain criteria are met;
3403 point is set to where parsing stops.
3404If fifth arg OLDSTATE is omitted or nil,
3405 parsing assumes that FROM is the beginning of a function.
0e6228bc 3406Value is a list of elements describing final state of parsing:
fdb82f93
PJ
3407 0. depth in parens.
3408 1. character address of start of innermost containing list; nil if none.
3409 2. character address of start of last complete sexp terminated.
3410 3. non-nil if inside a string.
3411 (it is the character that will terminate the string,
3412 or t if the string should be terminated by a generic string delimiter.)
7d0393cf 3413 4. nil if outside a comment, t if inside a non-nestable comment,
fdb82f93
PJ
3414 else an integer (the current comment nesting).
3415 5. t if following a quote character.
3416 6. the minimum paren-depth encountered during this scan.
c5683ceb 3417 7. style of comment, if any.
fdb82f93
PJ
3418 8. character address of start of comment or string; nil if not in one.
3419 9. Intermediate data for continuation of parsing (subject to change).
3420If third arg TARGETDEPTH is non-nil, parsing stops if the depth
3421in parentheses becomes equal to TARGETDEPTH.
3422Fourth arg STOPBEFORE non-nil means stop when come to
3423 any character that starts a sexp.
0e6228bc 3424Fifth arg OLDSTATE is a list like what this function returns.
fdb82f93 3425 It is used to initialize the state of the parse. Elements number 1, 2, 6
d311d28c 3426 are ignored.
fdb82f93
PJ
3427Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.
3428 If it is symbol `syntax-table', stop after the start of a comment or a
3429 string, or after end of a comment or a string. */)
8ca30920
DA
3430 (Lisp_Object from, Lisp_Object to, Lisp_Object targetdepth,
3431 Lisp_Object stopbefore, Lisp_Object oldstate, Lisp_Object commentstop)
8489eb67
RS
3432{
3433 struct lisp_parse_state state;
d311d28c 3434 EMACS_INT target;
8489eb67 3435
265a9e55 3436 if (!NILP (targetdepth))
8489eb67 3437 {
b7826503 3438 CHECK_NUMBER (targetdepth);
8489eb67
RS
3439 target = XINT (targetdepth);
3440 }
3441 else
d311d28c 3442 target = TYPE_MINIMUM (EMACS_INT); /* We won't reach this depth */
8489eb67
RS
3443
3444 validate_region (&from, &to);
6a140a74
RS
3445 scan_sexps_forward (&state, XINT (from), CHAR_TO_BYTE (XINT (from)),
3446 XINT (to),
c81a3712 3447 target, !NILP (stopbefore), oldstate,
7d0393cf 3448 (NILP (commentstop)
195d1361 3449 ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
8489eb67 3450
8ca30920 3451 SET_PT_BOTH (state.location, state.location_byte);
7d0393cf 3452
8489eb67 3453 return Fcons (make_number (state.depth),
c5683ceb
SM
3454 Fcons (state.prevlevelstart < 0
3455 ? Qnil : make_number (state.prevlevelstart),
3456 Fcons (state.thislevelstart < 0
3457 ? Qnil : make_number (state.thislevelstart),
7d0393cf
JB
3458 Fcons (state.instring >= 0
3459 ? (state.instring == ST_STRING_STYLE
195d1361 3460 ? Qt : make_number (state.instring)) : Qnil,
95ff8dfc
RS
3461 Fcons (state.incomment < 0 ? Qt :
3462 (state.incomment == 0 ? Qnil :
3463 make_number (state.incomment)),
8489eb67 3464 Fcons (state.quoted ? Qt : Qnil,
bff37d2a 3465 Fcons (make_number (state.mindepth),
7d0393cf 3466 Fcons ((state.comstyle
bff37d2a 3467 ? (state.comstyle == ST_COMMENT_STYLE
c5683ceb
SM
3468 ? Qsyntax_table
3469 : make_number (state.comstyle))
3470 : Qnil),
0beaf54f
DL
3471 Fcons (((state.incomment
3472 || (state.instring >= 0))
bff37d2a
RS
3473 ? make_number (state.comstr_start)
3474 : Qnil),
1a102a58 3475 Fcons (state.levelstarts, Qnil))))))))));
8489eb67
RS
3476}
3477\f
dfcf069d 3478void
971de7fb 3479init_syntax_once (void)
8489eb67 3480{
78f9a1f7 3481 register int i, c;
8ea151b2 3482 Lisp_Object temp;
8489eb67 3483
5ebaddf5 3484 /* This has to be done here, before we call Fmake_char_table. */
cd3520a4 3485 DEFSYM (Qsyntax_table, "syntax-table");
5ebaddf5 3486
c76826d9
DA
3487 /* This variable is DEFSYMed in alloc.c and not initialized yet, so
3488 intern it here. NOTE: you must guarantee that init_syntax_once
3489 is called before all other users of this variable. */
d67b4f80 3490 Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
5ebaddf5 3491
93da5fff 3492 /* Create objects which can be shared among syntax tables. */
25721f5b
DA
3493 Vsyntax_code_object = make_uninit_vector (Smax);
3494 for (i = 0; i < Smax; i++)
28be1ada 3495 ASET (Vsyntax_code_object, i, Fcons (make_number (i), Qnil));
93da5fff 3496
5ebaddf5
RS
3497 /* Now we are ready to set up this property, so we can
3498 create syntax tables. */
3499 Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
3500
1fc71008 3501 temp = AREF (Vsyntax_code_object, Swhitespace);
8489eb67 3502
5ebaddf5 3503 Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
8489eb67 3504
aa7b08b4 3505 /* Control characters should not be whitespace. */
1fc71008 3506 temp = AREF (Vsyntax_code_object, Spunct);
aa7b08b4
RS
3507 for (i = 0; i <= ' ' - 1; i++)
3508 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3509 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 0177, temp);
3510
3511 /* Except that a few really are whitespace. */
1fc71008 3512 temp = AREF (Vsyntax_code_object, Swhitespace);
aa7b08b4
RS
3513 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ' ', temp);
3514 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\t', temp);
3515 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\n', temp);
3516 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 015, temp);
3517 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 014, temp);
3518
1fc71008 3519 temp = AREF (Vsyntax_code_object, Sword);
8489eb67 3520 for (i = 'a'; i <= 'z'; i++)
8ea151b2 3521 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
8489eb67 3522 for (i = 'A'; i <= 'Z'; i++)
8ea151b2 3523 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
8489eb67 3524 for (i = '0'; i <= '9'; i++)
8ea151b2
RS
3525 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3526
3527 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
3528 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
3529
3530 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
3531 Fcons (make_number (Sopen), make_number (')')));
3532 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
3533 Fcons (make_number (Sclose), make_number ('(')));
3534 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
3535 Fcons (make_number (Sopen), make_number (']')));
3536 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
3537 Fcons (make_number (Sclose), make_number ('[')));
3538 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
3539 Fcons (make_number (Sopen), make_number ('}')));
3540 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
3541 Fcons (make_number (Sclose), make_number ('{')));
3542 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
1fc71008 3543 Fcons (make_number (Sstring), Qnil));
8ea151b2 3544 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
1fc71008 3545 Fcons (make_number (Sescape), Qnil));
8ea151b2 3546
1fc71008 3547 temp = AREF (Vsyntax_code_object, Ssymbol);
8489eb67 3548 for (i = 0; i < 10; i++)
78f9a1f7
KH
3549 {
3550 c = "_-+*/&|<>="[i];
3551 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3552 }
8489eb67 3553
1fc71008 3554 temp = AREF (Vsyntax_code_object, Spunct);
8489eb67 3555 for (i = 0; i < 12; i++)
78f9a1f7
KH
3556 {
3557 c = ".,;:?!#@~^'`"[i];
3558 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3559 }
bd25db08
KH
3560
3561 /* All multibyte characters have syntax `word' by default. */
1fc71008 3562 temp = AREF (Vsyntax_code_object, Sword);
5c7b02ab 3563 char_table_set_range (Vstandard_syntax_table, 0x80, MAX_CHAR, temp);
8489eb67
RS
3564}
3565
dfcf069d 3566void
971de7fb 3567syms_of_syntax (void)
8489eb67 3568{
cd3520a4 3569 DEFSYM (Qsyntax_table_p, "syntax-table-p");
8489eb67 3570
93da5fff
KH
3571 staticpro (&Vsyntax_code_object);
3572
a2994c46
KS
3573 staticpro (&gl_state.object);
3574 staticpro (&gl_state.global_code);
3575 staticpro (&gl_state.current_syntax_table);
3576 staticpro (&gl_state.old_prop);
3577
3578 /* Defined in regex.c */
3579 staticpro (&re_match_object);
3580
cd3520a4 3581 DEFSYM (Qscan_error, "scan-error");
7bf5e9e4 3582 Fput (Qscan_error, Qerror_conditions,
3438fe21 3583 listn (CONSTYPE_PURE, 2, Qscan_error, Qerror));
7bf5e9e4 3584 Fput (Qscan_error, Qerror_message,
2a0213a6 3585 build_pure_c_string ("Scan error"));
7bf5e9e4 3586
29208e82 3587 DEFVAR_BOOL ("parse-sexp-ignore-comments", parse_sexp_ignore_comments,
fdb82f93 3588 doc: /* Non-nil means `forward-sexp', etc., should treat comments as whitespace. */);
8489eb67 3589
29208e82 3590 DEFVAR_BOOL ("parse-sexp-lookup-properties", parse_sexp_lookup_properties,
fdb82f93
PJ
3591 doc: /* Non-nil means `forward-sexp', etc., obey `syntax-table' property.
3592Otherwise, that text property is simply ignored.
3593See the info node `(elisp)Syntax Properties' for a description of the
3594`syntax-table' property. */);
195d1361 3595
8489eb67 3596 words_include_escapes = 0;
29208e82 3597 DEFVAR_BOOL ("words-include-escapes", words_include_escapes,
fdb82f93 3598 doc: /* Non-nil means `forward-word', etc., should treat escape chars part of words. */);
8489eb67 3599
29208e82 3600 DEFVAR_BOOL ("multibyte-syntax-as-symbol", multibyte_syntax_as_symbol,
fdb82f93 3601 doc: /* Non-nil means `scan-sexps' treats all multibyte characters as symbol. */);
bd25db08
KH
3602 multibyte_syntax_as_symbol = 0;
3603
f4ed767f 3604 DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
29208e82 3605 open_paren_in_column_0_is_defun_start,
fb7ada5f 3606 doc: /* Non-nil means an open paren in column 0 denotes the start of a defun. */);
f4ed767f
GM
3607 open_paren_in_column_0_is_defun_start = 1;
3608
8f924df7
KH
3609
3610 DEFVAR_LISP ("find-word-boundary-function-table",
29208e82 3611 Vfind_word_boundary_function_table,
869bb237 3612 doc: /*
8f924df7 3613Char table of functions to search for the word boundary.
869bb237
KH
3614Each function is called with two arguments; POS and LIMIT.
3615POS and LIMIT are character positions in the current buffer.
3616
3617If POS is less than LIMIT, POS is at the first character of a word,
3618and the return value of a function is a position after the last
3619character of that word.
3620
3621If POS is not less than LIMIT, POS is at the last character of a word,
3622and the return value of a function is a position at the first
3623character of that word.
3624
3625In both cases, LIMIT bounds the search. */);
8f924df7 3626 Vfind_word_boundary_function_table = Fmake_char_table (Qnil, Qnil);
869bb237 3627
8489eb67
RS
3628 defsubr (&Ssyntax_table_p);
3629 defsubr (&Ssyntax_table);
3630 defsubr (&Sstandard_syntax_table);
3631 defsubr (&Scopy_syntax_table);
3632 defsubr (&Sset_syntax_table);
3633 defsubr (&Schar_syntax);
beefa22e 3634 defsubr (&Smatching_paren);
c65adb44 3635 defsubr (&Sstring_to_syntax);
8489eb67 3636 defsubr (&Smodify_syntax_entry);
62abe9cb 3637 defsubr (&Sinternal_describe_syntax_value);
8489eb67
RS
3638
3639 defsubr (&Sforward_word);
3640
195d1361
RS
3641 defsubr (&Sskip_chars_forward);
3642 defsubr (&Sskip_chars_backward);
3643 defsubr (&Sskip_syntax_forward);
3644 defsubr (&Sskip_syntax_backward);
3645
b3cfe0c8 3646 defsubr (&Sforward_comment);
8489eb67
RS
3647 defsubr (&Sscan_lists);
3648 defsubr (&Sscan_sexps);
3649 defsubr (&Sbackward_prefix_chars);
3650 defsubr (&Sparse_partial_sexp);
3651}