* lread.c (safe_to_load_p): Make the end-of-loop test the inverse of the in-loop.
[bpt/emacs.git] / src / syntax.c
CommitLineData
8489eb67 1/* GNU Emacs routines to deal with syntax tables; also word and list parsing.
73b0cd50 2 Copyright (C) 1985, 1987, 1993-1995, 1997-1999, 2001-2011
429ab54e 3 Free Software Foundation, Inc.
8489eb67
RS
4
5This file is part of GNU Emacs.
6
9ec0b715 7GNU Emacs is free software: you can redistribute it and/or modify
8489eb67 8it under the terms of the GNU General Public License as published by
9ec0b715
GM
9the Free Software Foundation, either version 3 of the License, or
10(at your option) any later version.
8489eb67
RS
11
12GNU Emacs is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
9ec0b715 18along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
8489eb67
RS
19
20
18160b98 21#include <config.h>
8489eb67 22#include <ctype.h>
d7306fe6 23#include <setjmp.h>
8489eb67
RS
24#include "lisp.h"
25#include "commands.h"
26#include "buffer.h"
5c7b02ab 27#include "character.h"
e35f6ff7 28#include "keymap.h"
a1bc88d4 29#include "regex.h"
195d1361
RS
30
31/* Make syntax table lookup grant data in gl_state. */
32#define SYNTAX_ENTRY_VIA_PROPERTY
33
8489eb67 34#include "syntax.h"
195d1361 35#include "intervals.h"
c5683ceb
SM
36#include "category.h"
37
38/* Then there are seven single-bit flags that have the following meanings:
39 1. This character is the first of a two-character comment-start sequence.
40 2. This character is the second of a two-character comment-start sequence.
41 3. This character is the first of a two-character comment-end sequence.
42 4. This character is the second of a two-character comment-end sequence.
43 5. This character is a prefix, for backward-prefix-chars.
44 6. The char is part of a delimiter for comments of style "b".
45 7. This character is part of a nestable comment sequence.
46 8. The char is part of a delimiter for comments of style "c".
47 Note that any two-character sequence whose first character has flag 1
48 and whose second character has flag 2 will be interpreted as a comment start.
49
50 bit 6 and 8 are used to discriminate between different comment styles.
51 Languages such as C++ allow two orthogonal syntax start/end pairs
52 and bit 6 is used to determine whether a comment-end or Scommentend
53 ends style a or b. Comment markers can start style a, b, c, or bc.
54 Style a is always the default.
55 For 2-char comment markers, the style b flag is only looked up on the second
56 char of the comment marker and on the first char of the comment ender.
57 For style c (like to for the nested flag), the flag can be placed on any
58 one of the chars.
59 */
60
61/* These macros extract specific flags from an integer
62 that holds the syntax code and the flags. */
63
64#define SYNTAX_FLAGS_COMSTART_FIRST(flags) (((flags) >> 16) & 1)
65
66#define SYNTAX_FLAGS_COMSTART_SECOND(flags) (((flags) >> 17) & 1)
67
68#define SYNTAX_FLAGS_COMEND_FIRST(flags) (((flags) >> 18) & 1)
69
70#define SYNTAX_FLAGS_COMEND_SECOND(flags) (((flags) >> 19) & 1)
71
72#define SYNTAX_FLAGS_PREFIX(flags) (((flags) >> 20) & 1)
73
74#define SYNTAX_FLAGS_COMMENT_STYLEB(flags) (((flags) >> 21) & 1)
75#define SYNTAX_FLAGS_COMMENT_STYLEC(flags) (((flags) >> 22) & 2)
76/* FLAGS should be the flags of the main char of the comment marker, e.g.
77 the second for comstart and the first for comend. */
78#define SYNTAX_FLAGS_COMMENT_STYLE(flags, other_flags) \
79 (SYNTAX_FLAGS_COMMENT_STYLEB (flags) \
80 | SYNTAX_FLAGS_COMMENT_STYLEC (flags) \
81 | SYNTAX_FLAGS_COMMENT_STYLEC (other_flags))
82
83#define SYNTAX_FLAGS_COMMENT_NESTED(flags) (((flags) >> 22) & 1)
84
85/* These macros extract a particular flag for a given character. */
86
87#define SYNTAX_COMEND_FIRST(c) \
88 (SYNTAX_FLAGS_COMEND_FIRST (SYNTAX_WITH_FLAGS (c)))
89#define SYNTAX_PREFIX(c) (SYNTAX_FLAGS_PREFIX (SYNTAX_WITH_FLAGS (c)))
195d1361
RS
90
91/* We use these constants in place for comment-style and
92 string-ender-char to distinguish comments/strings started by
93 comment_fence and string_fence codes. */
94
95#define ST_COMMENT_STYLE (256 + 1)
96#define ST_STRING_STYLE (256 + 2)
8489eb67 97
7bf5e9e4 98Lisp_Object Qsyntax_table_p, Qsyntax_table, Qscan_error;
8489eb67 99
8ea151b2
RS
100/* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
101 if not compiled with GCC. No need to mark it, since it is used
102 only very temporarily. */
103Lisp_Object syntax_temp;
104
e5d4f4dc
RS
105/* This is the internal form of the parse state used in parse-partial-sexp. */
106
107struct lisp_parse_state
108 {
6449674e
SM
109 int depth; /* Depth at end of parsing. */
110 int instring; /* -1 if not within string, else desired terminator. */
111 int incomment; /* -1 if in unnestable comment else comment nesting */
112 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
113 int quoted; /* Nonzero if just after an escape char at end of parsing */
114 int mindepth; /* Minimum depth seen while scanning. */
115 /* Char number of most recent start-of-expression at current level */
116 EMACS_INT thislevelstart;
117 /* Char number of start of containing expression */
118 EMACS_INT prevlevelstart;
119 EMACS_INT location; /* Char number at which parsing stopped. */
120 EMACS_INT comstr_start; /* Position of last comment/string starter. */
121 Lisp_Object levelstarts; /* Char numbers of starts-of-expression
122 of levels (starting from outermost). */
e5d4f4dc
RS
123 };
124\f
37bef230
RS
125/* These variables are a cache for finding the start of a defun.
126 find_start_pos is the place for which the defun start was found.
127 find_start_value is the defun start position found for it.
6a140a74 128 find_start_value_byte is the corresponding byte position.
37bef230
RS
129 find_start_buffer is the buffer it was found in.
130 find_start_begv is the BEGV value when it was found.
131 find_start_modiff is the value of MODIFF when it was found. */
132
8e2911c2
AS
133static EMACS_INT find_start_pos;
134static EMACS_INT find_start_value;
135static EMACS_INT find_start_value_byte;
37bef230 136static struct buffer *find_start_buffer;
8e2911c2 137static EMACS_INT find_start_begv;
37bef230 138static int find_start_modiff;
6a140a74
RS
139
140
f57e2426
J
141static Lisp_Object skip_chars (int, Lisp_Object, Lisp_Object, int);
142static Lisp_Object skip_syntaxes (int, Lisp_Object, Lisp_Object);
143static Lisp_Object scan_lists (EMACS_INT, EMACS_INT, EMACS_INT, int);
144static void scan_sexps_forward (struct lisp_parse_state *,
145 EMACS_INT, EMACS_INT, EMACS_INT, int,
146 int, Lisp_Object, int);
147static int in_classes (int, Lisp_Object);
195d1361 148\f
c5683ceb
SM
149/* Whether the syntax of the character C has the prefix flag set. */
150int syntax_prefix_flag_p (int c)
151{
152 return SYNTAX_PREFIX (c);
153}
195d1361
RS
154
155struct gl_state_s gl_state; /* Global state of syntax parser. */
156
195d1361
RS
157#define INTERVALS_AT_ONCE 10 /* 1 + max-number of intervals
158 to scan to property-change. */
159
6a140a74
RS
160/* Update gl_state to an appropriate interval which contains CHARPOS. The
161 sign of COUNT give the relative position of CHARPOS wrt the previously
195d1361 162 valid interval. If INIT, only [be]_property fields of gl_state are
6a140a74 163 valid at start, the rest is filled basing on OBJECT.
195d1361 164
6a140a74 165 `gl_state.*_i' are the intervals, and CHARPOS is further in the search
195d1361
RS
166 direction than the intervals - or in an interval. We update the
167 current syntax-table basing on the property of this interval, and
6a140a74 168 update the interval to start further than CHARPOS - or be
195d1361 169 NULL_INTERVAL. We also update lim_property to be the next value of
6a140a74 170 charpos to call this subroutine again - or be before/after the
195d1361
RS
171 start/end of OBJECT. */
172
173void
4f3a2f8d
EZ
174update_syntax_table (EMACS_INT charpos, int count, int init,
175 Lisp_Object object)
195d1361
RS
176{
177 Lisp_Object tmp_table;
4ffe723b 178 int cnt = 0, invalidate = 1;
5a774522 179 INTERVAL i;
195d1361
RS
180
181 if (init)
182 {
bb0de084 183 gl_state.old_prop = Qnil;
195d1361
RS
184 gl_state.start = gl_state.b_property;
185 gl_state.stop = gl_state.e_property;
bb0de084
SM
186 i = interval_of (charpos, object);
187 gl_state.backward_i = gl_state.forward_i = i;
195d1361
RS
188 invalidate = 0;
189 if (NULL_INTERVAL_P (i))
190 return;
f902a008 191 /* interval_of updates only ->position of the return value, so
d80f4cc7 192 update the parents manually to speed up update_interval. */
bb0de084 193 while (!NULL_PARENT (i))
d80f4cc7
RS
194 {
195 if (AM_RIGHT_CHILD (i))
439d5cb4 196 INTERVAL_PARENT (i)->position = i->position
d80f4cc7 197 - LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */
439d5cb4
KR
198 - TOTAL_LENGTH (INTERVAL_PARENT (i))
199 + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i));
d80f4cc7 200 else
439d5cb4 201 INTERVAL_PARENT (i)->position = i->position - LEFT_TOTAL_LENGTH (i)
d80f4cc7 202 + TOTAL_LENGTH (i);
439d5cb4 203 i = INTERVAL_PARENT (i);
d80f4cc7
RS
204 }
205 i = gl_state.forward_i;
bb0de084 206 gl_state.b_property = i->position - gl_state.offset;
ee0cdb48 207 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361
RS
208 goto update;
209 }
5a774522 210 i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
195d1361 211
423e705d 212 /* We are guaranteed to be called with CHARPOS either in i,
6a140a74 213 or further off. */
195d1361
RS
214 if (NULL_INTERVAL_P (i))
215 error ("Error in syntax_table logic for to-the-end intervals");
6a140a74 216 else if (charpos < i->position) /* Move left. */
195d1361
RS
217 {
218 if (count > 0)
f902a008 219 error ("Error in syntax_table logic for intervals <-");
195d1361 220 /* Update the interval. */
6a140a74 221 i = update_interval (i, charpos);
bb0de084 222 if (INTERVAL_LAST_POS (i) != gl_state.b_property)
195d1361
RS
223 {
224 invalidate = 0;
195d1361 225 gl_state.forward_i = i;
ee0cdb48 226 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361 227 }
423e705d 228 }
6a140a74 229 else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right. */
195d1361
RS
230 {
231 if (count < 0)
f902a008 232 error ("Error in syntax_table logic for intervals ->");
195d1361 233 /* Update the interval. */
6a140a74 234 i = update_interval (i, charpos);
bb0de084 235 if (i->position != gl_state.e_property)
195d1361
RS
236 {
237 invalidate = 0;
195d1361 238 gl_state.backward_i = i;
bb0de084 239 gl_state.b_property = i->position - gl_state.offset;
195d1361
RS
240 }
241 }
195d1361
RS
242
243 update:
244 tmp_table = textget (i->plist, Qsyntax_table);
245
246 if (invalidate)
247 invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
7d0393cf 248
423e705d
SM
249 if (invalidate) /* Did not get to adjacent interval. */
250 { /* with the same table => */
251 /* invalidate the old range. */
195d1361
RS
252 if (count > 0)
253 {
254 gl_state.backward_i = i;
bb0de084
SM
255 gl_state.b_property = i->position - gl_state.offset;
256 }
257 else
195d1361 258 {
bb0de084 259 gl_state.forward_i = i;
ee0cdb48 260 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361
RS
261 }
262 }
37bef230 263
bb0de084 264 if (!EQ (tmp_table, gl_state.old_prop))
195d1361 265 {
bb0de084
SM
266 gl_state.current_syntax_table = tmp_table;
267 gl_state.old_prop = tmp_table;
268 if (EQ (Fsyntax_table_p (tmp_table), Qt))
269 {
270 gl_state.use_global = 0;
7d0393cf 271 }
bb0de084
SM
272 else if (CONSP (tmp_table))
273 {
274 gl_state.use_global = 1;
275 gl_state.global_code = tmp_table;
276 }
7d0393cf 277 else
bb0de084
SM
278 {
279 gl_state.use_global = 0;
4b4deea2 280 gl_state.current_syntax_table = BVAR (current_buffer, syntax_table);
bb0de084 281 }
195d1361
RS
282 }
283
284 while (!NULL_INTERVAL_P (i))
285 {
286 if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
287 {
288 if (count > 0)
bb0de084
SM
289 {
290 gl_state.e_property = i->position - gl_state.offset;
291 gl_state.forward_i = i;
292 }
293 else
294 {
5a774522
SM
295 gl_state.b_property
296 = i->position + LENGTH (i) - gl_state.offset;
bb0de084
SM
297 gl_state.backward_i = i;
298 }
299 return;
195d1361 300 }
7d0393cf 301 else if (cnt == INTERVALS_AT_ONCE)
195d1361
RS
302 {
303 if (count > 0)
bb0de084 304 {
5a774522
SM
305 gl_state.e_property
306 = i->position + LENGTH (i) - gl_state.offset
307 /* e_property at EOB is not set to ZV but to ZV+1, so that
308 we can do INC(from);UPDATE_SYNTAX_TABLE_FORWARD without
309 having to check eob between the two. */
310 + (NULL_INTERVAL_P (next_interval (i)) ? 1 : 0);
bb0de084
SM
311 gl_state.forward_i = i;
312 }
313 else
314 {
315 gl_state.b_property = i->position - gl_state.offset;
316 gl_state.backward_i = i;
317 }
318 return;
195d1361
RS
319 }
320 cnt++;
321 i = count > 0 ? next_interval (i) : previous_interval (i);
322 }
bb0de084
SM
323 eassert (NULL_INTERVAL_P (i)); /* This property goes to the end. */
324 if (count > 0)
325 gl_state.e_property = gl_state.stop;
326 else
327 gl_state.b_property = gl_state.start;
195d1361
RS
328}
329\f
6a140a74
RS
330/* Returns TRUE if char at CHARPOS is quoted.
331 Global syntax-table data should be set up already to be good at CHARPOS
332 or after. On return global syntax data is good for lookup at CHARPOS. */
195d1361
RS
333
334static int
6449674e 335char_quoted (EMACS_INT charpos, EMACS_INT bytepos)
195d1361
RS
336{
337 register enum syntaxcode code;
6449674e 338 register EMACS_INT beg = BEGV;
195d1361 339 register int quoted = 0;
6449674e 340 EMACS_INT orig = charpos;
6a140a74 341
02d8b017 342 while (charpos > beg)
195d1361 343 {
ab229fdd 344 int c;
02d8b017 345 DEC_BOTH (charpos, bytepos);
ab229fdd 346
6a140a74 347 UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
327719ee 348 c = FETCH_CHAR_AS_MULTIBYTE (bytepos);
ab229fdd 349 code = SYNTAX (c);
6a140a74
RS
350 if (! (code == Scharquote || code == Sescape))
351 break;
352
6a140a74 353 quoted = !quoted;
195d1361 354 }
6a140a74
RS
355
356 UPDATE_SYNTAX_TABLE (orig);
195d1361
RS
357 return quoted;
358}
6a140a74 359
6a140a74
RS
360/* Return the bytepos one character before BYTEPOS.
361 We assume that BYTEPOS is not at the start of the buffer. */
362
b6557553 363static INLINE EMACS_INT
971de7fb 364dec_bytepos (EMACS_INT bytepos)
6a140a74 365{
4b4deea2 366 if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
ef316cf0
RS
367 return bytepos - 1;
368
6a140a74
RS
369 DEC_POS (bytepos);
370 return bytepos;
371}
195d1361 372\f
2b34df4e 373/* Return a defun-start position before POS and not too far before.
7cc80f0a
RS
374 It should be the last one before POS, or nearly the last.
375
376 When open_paren_in_column_0_is_defun_start is nonzero,
1fd1cc2f 377 only the beginning of the buffer is treated as a defun-start.
7cc80f0a
RS
378
379 We record the information about where the scan started
380 and what its result was, so that another call in the same area
381 can return the same value very quickly.
195d1361
RS
382
383 There is no promise at which position the global syntax data is
384 valid on return from the subroutine, so the caller should explicitly
385 update the global data. */
37bef230 386
6449674e 387static EMACS_INT
971de7fb 388find_defun_start (EMACS_INT pos, EMACS_INT pos_byte)
37bef230 389{
8e2911c2 390 EMACS_INT opoint = PT, opoint_byte = PT_BYTE;
37bef230 391
1fd1cc2f
RS
392 if (!open_paren_in_column_0_is_defun_start)
393 {
394 find_start_value_byte = BEGV_BYTE;
395 return BEGV;
396 }
397
37bef230
RS
398 /* Use previous finding, if it's valid and applies to this inquiry. */
399 if (current_buffer == find_start_buffer
400 /* Reuse the defun-start even if POS is a little farther on.
401 POS might be in the next defun, but that's ok.
402 Our value may not be the best possible, but will still be usable. */
403 && pos <= find_start_pos + 1000
404 && pos >= find_start_value
405 && BEGV == find_start_begv
406 && MODIFF == find_start_modiff)
407 return find_start_value;
408
409 /* Back up to start of line. */
6a140a74 410 scan_newline (pos, pos_byte, BEGV, BEGV_BYTE, -1, 1);
37bef230 411
195d1361
RS
412 /* We optimize syntax-table lookup for rare updates. Thus we accept
413 only those `^\s(' which are good in global _and_ text-property
414 syntax-tables. */
d48cd3f4 415 SETUP_BUFFER_SYNTAX_TABLE ();
1fd1cc2f 416 while (PT > BEGV)
37bef230 417 {
ab229fdd
AS
418 int c;
419
1fd1cc2f
RS
420 /* Open-paren at start of line means we may have found our
421 defun-start. */
327719ee 422 c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
ab229fdd 423 if (SYNTAX (c) == Sopen)
195d1361 424 {
1fd1cc2f 425 SETUP_SYNTAX_TABLE (PT + 1, -1); /* Try again... */
327719ee 426 c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
ab229fdd 427 if (SYNTAX (c) == Sopen)
1fd1cc2f
RS
428 break;
429 /* Now fallback to the default value. */
d48cd3f4 430 SETUP_BUFFER_SYNTAX_TABLE ();
195d1361 431 }
1fd1cc2f
RS
432 /* Move to beg of previous line. */
433 scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
37bef230
RS
434 }
435
436 /* Record what we found, for the next try. */
6a140a74
RS
437 find_start_value = PT;
438 find_start_value_byte = PT_BYTE;
37bef230
RS
439 find_start_buffer = current_buffer;
440 find_start_modiff = MODIFF;
441 find_start_begv = BEGV;
442 find_start_pos = pos;
443
6a140a74
RS
444 TEMP_SET_PT_BOTH (opoint, opoint_byte);
445
37bef230
RS
446 return find_start_value;
447}
448\f
f902a008
RS
449/* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE. */
450
451static int
4f3a2f8d 452prev_char_comend_first (EMACS_INT pos, EMACS_INT pos_byte)
f902a008
RS
453{
454 int c, val;
455
456 DEC_BOTH (pos, pos_byte);
457 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
458 c = FETCH_CHAR (pos_byte);
459 val = SYNTAX_COMEND_FIRST (c);
460 UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
461 return val;
462}
463
464/* Return the SYNTAX_COMSTART_FIRST of the character before POS, POS_BYTE. */
465
3f679f55
SM
466/* static int
467 * prev_char_comstart_first (pos, pos_byte)
468 * int pos, pos_byte;
469 * {
470 * int c, val;
7d0393cf 471 *
3f679f55
SM
472 * DEC_BOTH (pos, pos_byte);
473 * UPDATE_SYNTAX_TABLE_BACKWARD (pos);
474 * c = FETCH_CHAR (pos_byte);
475 * val = SYNTAX_COMSTART_FIRST (c);
476 * UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
477 * return val;
478 * } */
f902a008 479
6a140a74
RS
480/* Checks whether charpos FROM is at the end of a comment.
481 FROM_BYTE is the bytepos corresponding to FROM.
482 Do not move back before STOP.
483
484 Return a positive value if we find a comment ending at FROM/FROM_BYTE;
485 return -1 otherwise.
486
487 If successful, store the charpos of the comment's beginning
488 into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
527a32d9
KH
489
490 Global syntax data remains valid for backward search starting at
491 the returned value (or at FROM, if the search was not successful). */
195d1361
RS
492
493static int
971de7fb 494back_comment (EMACS_INT from, EMACS_INT from_byte, EMACS_INT stop, int comnested, int comstyle, EMACS_INT *charpos_ptr, EMACS_INT *bytepos_ptr)
195d1361
RS
495{
496 /* Look back, counting the parity of string-quotes,
497 and recording the comment-starters seen.
498 When we reach a safe place, assume that's not in a string;
499 then step the main scan to the earliest comment-starter seen
500 an even number of string quotes away from the safe place.
501
502 OFROM[I] is position of the earliest comment-starter seen
503 which is I+2X quotes from the comment-end.
504 PARITY is current parity of quotes from the comment end. */
3ee5041c 505 int string_style = -1; /* Presumed outside of any string. */
195d1361 506 int string_lossage = 0;
f7c436c1 507 /* Not a real lossage: indicates that we have passed a matching comment
7d0393cf 508 starter plus a non-matching comment-ender, meaning that any matching
f7c436c1
SM
509 comment-starter we might see later could be a false positive (hidden
510 inside another comment).
511 Test case: { a (* b } c (* d *) */
512 int comment_lossage = 0;
8e2911c2
AS
513 EMACS_INT comment_end = from;
514 EMACS_INT comment_end_byte = from_byte;
515 EMACS_INT comstart_pos = 0;
4f63c6bb 516 EMACS_INT comstart_byte IF_LINT (= 0);
eb35b628
RS
517 /* Place where the containing defun starts,
518 or 0 if we didn't come across it yet. */
8e2911c2
AS
519 EMACS_INT defun_start = 0;
520 EMACS_INT defun_start_byte = 0;
195d1361 521 register enum syntaxcode code;
95ff8dfc 522 int nesting = 1; /* current comment nesting */
ea315ed6 523 int c;
3f679f55
SM
524 int syntax = 0;
525
526 /* FIXME: A }} comment-ender style leads to incorrect behavior
527 in the case of {{ c }}} because we ignore the last two chars which are
528 assumed to be comment-enders although they aren't. */
195d1361
RS
529
530 /* At beginning of range to scan, we're outside of strings;
531 that determines quote parity to the comment-end. */
532 while (from != stop)
533 {
4f3a2f8d
EZ
534 EMACS_INT temp_byte;
535 int prev_syntax, com2start, com2end;
fbb3da77 536 int comstart;
6a140a74 537
195d1361 538 /* Move back and examine a character. */
6a140a74 539 DEC_BOTH (from, from_byte);
195d1361
RS
540 UPDATE_SYNTAX_TABLE_BACKWARD (from);
541
3f679f55 542 prev_syntax = syntax;
b7dbcc19 543 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3f679f55 544 syntax = SYNTAX_WITH_FLAGS (c);
195d1361
RS
545 code = SYNTAX (c);
546
3f679f55
SM
547 /* Check for 2-char comment markers. */
548 com2start = (SYNTAX_FLAGS_COMSTART_FIRST (syntax)
549 && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax)
c5683ceb
SM
550 && (comstyle
551 == SYNTAX_FLAGS_COMMENT_STYLE (prev_syntax, syntax))
3f679f55
SM
552 && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax)
553 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested);
554 com2end = (SYNTAX_FLAGS_COMEND_FIRST (syntax)
555 && SYNTAX_FLAGS_COMEND_SECOND (prev_syntax));
fbb3da77 556 comstart = (com2start || code == Scomment);
01f44d5a 557
3f679f55
SM
558 /* Nasty cases with overlapping 2-char comment markers:
559 - snmp-mode: -- c -- foo -- c --
560 --- c --
561 ------ c --
562 - c-mode: *||*
563 |* *|* *|
564 |*| |* |*|
565 /// */
566
567 /* If a 2-char comment sequence partly overlaps with another,
fbb3da77
SM
568 we don't try to be clever. E.g. |*| in C, or }% in modes that
569 have %..\n and %{..}%. */
570 if (from > stop && (com2end || comstart))
195d1361 571 {
4f3a2f8d
EZ
572 EMACS_INT next = from, next_byte = from_byte;
573 int next_c, next_syntax;
3f679f55
SM
574 DEC_BOTH (next, next_byte);
575 UPDATE_SYNTAX_TABLE_BACKWARD (next);
b7dbcc19 576 next_c = FETCH_CHAR_AS_MULTIBYTE (next_byte);
3f679f55 577 next_syntax = SYNTAX_WITH_FLAGS (next_c);
fbb3da77 578 if (((comstart || comnested)
3f679f55
SM
579 && SYNTAX_FLAGS_COMEND_SECOND (syntax)
580 && SYNTAX_FLAGS_COMEND_FIRST (next_syntax))
581 || ((com2end || comnested)
582 && SYNTAX_FLAGS_COMSTART_SECOND (syntax)
c5683ceb
SM
583 && (comstyle
584 == SYNTAX_FLAGS_COMMENT_STYLE (syntax, prev_syntax))
3f679f55
SM
585 && SYNTAX_FLAGS_COMSTART_FIRST (next_syntax)))
586 goto lossage;
587 /* UPDATE_SYNTAX_TABLE_FORWARD (next + 1); */
f902a008 588 }
3f679f55
SM
589
590 if (com2start && comstart_pos == 0)
591 /* We're looking at a comment starter. But it might be a comment
592 ender as well (see snmp-mode). The first time we see one, we
593 need to consider it as a comment starter,
594 and the subsequent times as a comment ender. */
595 com2end = 0;
596
597 /* Turn a 2-char comment sequences into the appropriate syntax. */
598 if (com2end)
599 code = Sendcomment;
600 else if (com2start)
601 code = Scomment;
602 /* Ignore comment starters of a different style. */
603 else if (code == Scomment
c5683ceb 604 && (comstyle != SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0)
3f679f55 605 || SYNTAX_FLAGS_COMMENT_NESTED (syntax) != comnested))
1aa963c8 606 continue;
195d1361 607
9828a477
KH
608 /* Ignore escaped characters, except comment-enders. */
609 if (code != Sendcomment && char_quoted (from, from_byte))
195d1361
RS
610 continue;
611
3ee5041c 612 switch (code)
195d1361 613 {
3ee5041c
SM
614 case Sstring_fence:
615 case Scomment_fence:
616 c = (code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE);
617 case Sstring:
618 /* Track parity of quotes. */
619 if (string_style == -1)
620 /* Entering a string. */
621 string_style = c;
622 else if (string_style == c)
623 /* Leaving the string. */
624 string_style = -1;
625 else
626 /* If we have two kinds of string delimiters.
627 There's no way to grok this scanning backwards. */
195d1361 628 string_lossage = 1;
3ee5041c 629 break;
7d0393cf 630
3ee5041c
SM
631 case Scomment:
632 /* We've already checked that it is the relevant comstyle. */
f7c436c1 633 if (string_style != -1 || comment_lossage || string_lossage)
3ee5041c
SM
634 /* There are odd string quotes involved, so let's be careful.
635 Test case in Pascal: " { " a { " } */
636 goto lossage;
637
f7c436c1
SM
638 if (!comnested)
639 {
640 /* Record best comment-starter so far. */
641 comstart_pos = from;
642 comstart_byte = from_byte;
643 }
644 else if (--nesting <= 0)
95ff8dfc
RS
645 /* nested comments have to be balanced, so we don't need to
646 keep looking for earlier ones. We use here the same (slightly
647 incorrect) reasoning as below: since it is followed by uniform
648 paired string quotes, this comment-start has to be outside of
649 strings, else the comment-end itself would be inside a string. */
650 goto done;
3ee5041c
SM
651 break;
652
02010917 653 case Sendcomment:
c5683ceb 654 if (SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == comstyle
d070eb22 655 && ((com2end && SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax))
3f679f55 656 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested)
02010917
SM
657 /* This is the same style of comment ender as ours. */
658 {
659 if (comnested)
660 nesting++;
661 else
662 /* Anything before that can't count because it would match
663 this comment-ender rather than ours. */
664 from = stop; /* Break out of the loop. */
665 }
f7c436c1
SM
666 else if (comstart_pos != 0 || c != '\n')
667 /* We're mixing comment styles here, so we'd better be careful.
668 The (comstart_pos != 0 || c != '\n') check is not quite correct
669 (we should just always set comment_lossage), but removing it
670 would imply that any multiline comment in C would go through
671 lossage, which seems overkill.
672 The failure should only happen in the rare cases such as
673 { (* } *) */
674 comment_lossage = 1;
02010917 675 break;
195d1361 676
02010917
SM
677 case Sopen:
678 /* Assume a defun-start point is outside of strings. */
679 if (open_paren_in_column_0_is_defun_start
680 && (from == stop
681 || (temp_byte = dec_bytepos (from_byte),
682 FETCH_CHAR (temp_byte) == '\n')))
683 {
684 defun_start = from;
685 defun_start_byte = from_byte;
686 from = stop; /* Break out of the loop. */
687 }
eb35b628 688 break;
02010917
SM
689
690 default:
f7c436c1 691 break;
eb35b628 692 }
195d1361
RS
693 }
694
695 if (comstart_pos == 0)
696 {
697 from = comment_end;
6a140a74 698 from_byte = comment_end_byte;
195d1361
RS
699 UPDATE_SYNTAX_TABLE_FORWARD (comment_end - 1);
700 }
f7c436c1
SM
701 /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
702 or `done'), then we've found the beginning of the non-nested comment. */
703 else if (1) /* !comnested */
195d1361
RS
704 {
705 from = comstart_pos;
6a140a74 706 from_byte = comstart_byte;
789f3320 707 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
195d1361
RS
708 }
709 else
710 {
3ee5041c
SM
711 struct lisp_parse_state state;
712 lossage:
195d1361
RS
713 /* We had two kinds of string delimiters mixed up
714 together. Decode this going forwards.
02010917 715 Scan fwd from a known safe place (beginning-of-defun)
195d1361
RS
716 to the one in question; this records where we
717 last passed a comment starter. */
eb35b628
RS
718 /* If we did not already find the defun start, find it now. */
719 if (defun_start == 0)
720 {
721 defun_start = find_defun_start (comment_end, comment_end_byte);
722 defun_start_byte = find_start_value_byte;
723 }
02010917 724 do
195d1361 725 {
02010917
SM
726 scan_sexps_forward (&state,
727 defun_start, defun_start_byte,
728 comment_end, -10000, 0, Qnil, 0);
729 defun_start = comment_end;
730 if (state.incomment == (comnested ? 1 : -1)
731 && state.comstyle == comstyle)
732 from = state.comstr_start;
733 else
734 {
735 from = comment_end;
736 if (state.incomment)
737 /* If comment_end is inside some other comment, maybe ours
738 is nested, so we need to try again from within the
739 surrounding comment. Example: { a (* " *) */
740 {
741 /* FIXME: We should advance by one or two chars. */
742 defun_start = state.comstr_start + 2;
743 defun_start_byte = CHAR_TO_BYTE (defun_start);
744 }
745 }
746 } while (defun_start < comment_end);
747
6a140a74 748 from_byte = CHAR_TO_BYTE (from);
195d1361
RS
749 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
750 }
7d0393cf 751
95ff8dfc 752 done:
6a140a74
RS
753 *charpos_ptr = from;
754 *bytepos_ptr = from_byte;
755
1aa963c8 756 return (from == comment_end) ? -1 : from;
195d1361
RS
757}
758\f
8489eb67 759DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
fdb82f93
PJ
760 doc: /* Return t if OBJECT is a syntax table.
761Currently, any char-table counts as a syntax table. */)
5842a27b 762 (Lisp_Object object)
8489eb67 763{
2203e1e8 764 if (CHAR_TABLE_P (object)
e704cb4b 765 && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
8489eb67
RS
766 return Qt;
767 return Qnil;
768}
769
8ea151b2 770static void
971de7fb 771check_syntax_table (Lisp_Object obj)
8489eb67 772{
47f5f6ae
KS
773 CHECK_TYPE (CHAR_TABLE_P (obj) && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table),
774 Qsyntax_table_p, obj);
7d0393cf 775}
8489eb67 776
8489eb67 777DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
fdb82f93
PJ
778 doc: /* Return the current syntax table.
779This is the one specified by the current buffer. */)
5842a27b 780 (void)
8489eb67 781{
4b4deea2 782 return BVAR (current_buffer, syntax_table);
8489eb67
RS
783}
784
785DEFUN ("standard-syntax-table", Fstandard_syntax_table,
786 Sstandard_syntax_table, 0, 0, 0,
fdb82f93
PJ
787 doc: /* Return the standard syntax table.
788This is the one used for new buffers. */)
5842a27b 789 (void)
8489eb67
RS
790{
791 return Vstandard_syntax_table;
792}
793
794DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
fdb82f93
PJ
795 doc: /* Construct a new syntax table and return it.
796It is a copy of the TABLE, which defaults to the standard syntax table. */)
5842a27b 797 (Lisp_Object table)
8489eb67 798{
8ea151b2
RS
799 Lisp_Object copy;
800
265a9e55 801 if (!NILP (table))
8ea151b2
RS
802 check_syntax_table (table);
803 else
804 table = Vstandard_syntax_table;
805
806 copy = Fcopy_sequence (table);
0f867324
RS
807
808 /* Only the standard syntax table should have a default element.
809 Other syntax tables should inherit from parents instead. */
810 XCHAR_TABLE (copy)->defalt = Qnil;
811
812 /* Copied syntax tables should all have parents.
813 If we copied one with no parent, such as the standard syntax table,
814 use the standard syntax table as the copy's parent. */
815 if (NILP (XCHAR_TABLE (copy)->parent))
816 Fset_char_table_parent (copy, Vstandard_syntax_table);
8ea151b2 817 return copy;
8489eb67
RS
818}
819
820DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
fdb82f93
PJ
821 doc: /* Select a new syntax table for the current buffer.
822One argument, a syntax table. */)
5842a27b 823 (Lisp_Object table)
8489eb67 824{
4e4dfa78 825 int idx;
8ea151b2 826 check_syntax_table (table);
4b4deea2 827 BVAR (current_buffer, syntax_table) = table;
8489eb67 828 /* Indicate that this buffer now has a specified syntax table. */
f6cd0527
GM
829 idx = PER_BUFFER_VAR_IDX (syntax_table);
830 SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
8489eb67
RS
831 return table;
832}
833\f
834/* Convert a letter which signifies a syntax code
835 into the code it signifies.
195d1361 836 This is used by modify-syntax-entry, and other things. */
8489eb67
RS
837
838unsigned char syntax_spec_code[0400] =
839 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
840 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
841 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
842 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
195d1361 843 (char) Swhitespace, (char) Scomment_fence, (char) Sstring, 0377,
8489eb67
RS
844 (char) Smath, 0377, 0377, (char) Squote,
845 (char) Sopen, (char) Sclose, 0377, 0377,
846 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
847 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
848 0377, 0377, 0377, 0377,
849 (char) Scomment, 0377, (char) Sendcomment, 0377,
6cb71bf6 850 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
8489eb67
RS
851 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
852 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
853 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
854 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
855 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
856 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
195d1361 857 0377, 0377, 0377, 0377, (char) Sstring_fence, 0377, 0377, 0377
8489eb67
RS
858 };
859
195d1361 860/* Indexed by syntax code, give the letter that describes it. */
8489eb67 861
195d1361 862char syntax_code_spec[16] =
8489eb67 863 {
195d1361
RS
864 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
865 '!', '|'
8489eb67 866 };
93da5fff
KH
867
868/* Indexed by syntax code, give the object (cons of syntax code and
869 nil) to be stored in syntax table. Since these objects can be
870 shared among syntax tables, we generate them in advance. By
871 sharing objects, the function `describe-syntax' can give a more
872 compact listing. */
873static Lisp_Object Vsyntax_code_object;
874
8489eb67
RS
875\f
876DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
fdb82f93 877 doc: /* Return the syntax code of CHARACTER, described by a character.
e2b6daf4
CY
878For example, if CHARACTER is a word constituent, the
879character `w' (119) is returned.
fdb82f93
PJ
880The characters that correspond to various syntax codes
881are listed in the documentation of `modify-syntax-entry'. */)
5842a27b 882 (Lisp_Object character)
8489eb67 883{
8ea151b2 884 int char_int;
774b9a60 885 CHECK_CHARACTER (character);
2203e1e8 886 char_int = XINT (character);
d48cd3f4 887 SETUP_BUFFER_SYNTAX_TABLE ();
8ea151b2 888 return make_number (syntax_code_spec[(int) SYNTAX (char_int)]);
beefa22e
RS
889}
890
891DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
fdb82f93 892 doc: /* Return the matching parenthesis of CHARACTER, or nil if none. */)
5842a27b 893 (Lisp_Object character)
beefa22e 894{
8ea151b2 895 int char_int, code;
b7826503 896 CHECK_NUMBER (character);
2203e1e8 897 char_int = XINT (character);
d48cd3f4 898 SETUP_BUFFER_SYNTAX_TABLE ();
8ea151b2 899 code = SYNTAX (char_int);
a8bd7cd8 900 if (code == Sopen || code == Sclose)
2e34157c 901 return SYNTAX_MATCH (char_int);
beefa22e 902 return Qnil;
8489eb67
RS
903}
904
c65adb44 905DEFUN ("string-to-syntax", Fstring_to_syntax, Sstring_to_syntax, 1, 1, 0,
fdb82f93
PJ
906 doc: /* Convert a syntax specification STRING into syntax cell form.
907STRING should be a string as it is allowed as argument of
908`modify-syntax-entry'. Value is the equivalent cons cell
bce3dc36 909\(CODE . MATCHING-CHAR) that can be used as value of a `syntax-table'
fdb82f93 910text property. */)
5842a27b 911 (Lisp_Object string)
8489eb67 912{
2e567bd3 913 register const unsigned char *p;
8489eb67 914 register enum syntaxcode code;
247e20a8 915 int val;
8ea151b2 916 Lisp_Object match;
8489eb67 917
b7826503 918 CHECK_STRING (string);
8489eb67 919
d5db4077 920 p = SDATA (string);
8489eb67
RS
921 code = (enum syntaxcode) syntax_spec_code[*p++];
922 if (((int) code & 0377) == 0377)
b764a653 923 error ("Invalid syntax description letter: %c", p[-1]);
8489eb67 924
8ea151b2 925 if (code == Sinherit)
c65adb44 926 return Qnil;
8ea151b2
RS
927
928 if (*p)
d1be9f0f 929 {
93da5fff 930 int len;
62a6e103 931 int character = STRING_CHAR_AND_LENGTH (p, len);
93da5fff 932 XSETINT (match, character);
d1be9f0f
RS
933 if (XFASTINT (match) == ' ')
934 match = Qnil;
93da5fff 935 p += len;
d1be9f0f
RS
936 }
937 else
8ea151b2 938 match = Qnil;
8489eb67 939
8ea151b2 940 val = (int) code;
8489eb67
RS
941 while (*p)
942 switch (*p++)
943 {
944 case '1':
247e20a8 945 val |= 1 << 16;
8489eb67
RS
946 break;
947
948 case '2':
247e20a8 949 val |= 1 << 17;
8489eb67
RS
950 break;
951
952 case '3':
247e20a8 953 val |= 1 << 18;
8489eb67
RS
954 break;
955
956 case '4':
247e20a8 957 val |= 1 << 19;
8489eb67
RS
958 break;
959
960 case 'p':
247e20a8 961 val |= 1 << 20;
8489eb67 962 break;
e5d4f4dc
RS
963
964 case 'b':
247e20a8 965 val |= 1 << 21;
e5d4f4dc 966 break;
95ff8dfc
RS
967
968 case 'n':
969 val |= 1 << 22;
970 break;
c5683ceb
SM
971
972 case 'c':
973 val |= 1 << 23;
974 break;
8489eb67 975 }
7d0393cf 976
93da5fff 977 if (val < XVECTOR (Vsyntax_code_object)->size && NILP (match))
c65adb44 978 return XVECTOR (Vsyntax_code_object)->contents[val];
93da5fff
KH
979 else
980 /* Since we can't use a shared object, let's make a new one. */
c65adb44
SM
981 return Fcons (make_number (val), match);
982}
983
fdb82f93 984/* I really don't know why this is interactive
2c9e1900 985 help-form should at least be made useful whilst reading the second arg. */
7d0393cf 986DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
c65adb44 987 "cSet syntax for character: \nsSet syntax for %s to: ",
a50a10a0 988 doc: /* Set syntax for character CHAR according to string NEWENTRY.
d7ee9fab 989The syntax is changed only for table SYNTAX-TABLE, which defaults to
fdb82f93 990 the current buffer's syntax table.
5c7b02ab 991CHAR may be a cons (MIN . MAX), in which case, syntaxes of all characters
fedc6ab5 992in the range MIN to MAX are changed.
fdb82f93
PJ
993The first character of NEWENTRY should be one of the following:
994 Space or - whitespace syntax. w word constituent.
995 _ symbol constituent. . punctuation.
996 ( open-parenthesis. ) close-parenthesis.
997 " string quote. \\ escape.
998 $ paired delimiter. ' expression quote or prefix operator.
999 < comment starter. > comment ender.
1000 / character-quote. @ inherit from `standard-syntax-table'.
1001 | generic string fence. ! generic comment fence.
1002
1003Only single-character comment start and end sequences are represented thus.
1004Two-character sequences are represented as described below.
1005The second character of NEWENTRY is the matching parenthesis,
1006 used only if the first character is `(' or `)'.
1007Any additional characters are flags.
1008Defined flags are the characters 1, 2, 3, 4, b, p, and n.
a50a10a0
PJ
1009 1 means CHAR is the start of a two-char comment start sequence.
1010 2 means CHAR is the second character of such a sequence.
1011 3 means CHAR is the start of a two-char comment end sequence.
1012 4 means CHAR is the second character of such a sequence.
fdb82f93 1013
c5683ceb 1014There can be several orthogonal comment sequences. This is to support
fdb82f93
PJ
1015language modes such as C++. By default, all comment sequences are of style
1016a, but you can set the comment sequence style to b (on the second character
c5683ceb
SM
1017of a comment-start, and the first character of a comment-end sequence) and/or
1018c (on any of its chars) using this flag:
a50a10a0 1019 b means CHAR is part of comment sequence b.
c5683ceb 1020 c means CHAR is part of comment sequence c.
a50a10a0 1021 n means CHAR is part of a nestable comment sequence.
fdb82f93 1022
a50a10a0 1023 p means CHAR is a prefix character for `backward-prefix-chars';
fdb82f93 1024 such characters are treated as whitespace when they occur
a50a10a0 1025 between expressions.
fedc6ab5 1026usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE) */)
5842a27b 1027 (Lisp_Object c, Lisp_Object newentry, Lisp_Object syntax_table)
c65adb44 1028{
5c7b02ab
KH
1029 if (CONSP (c))
1030 {
8f924df7
KH
1031 CHECK_CHARACTER_CAR (c);
1032 CHECK_CHARACTER_CDR (c);
5c7b02ab
KH
1033 }
1034 else
1035 CHECK_CHARACTER (c);
c65adb44
SM
1036
1037 if (NILP (syntax_table))
4b4deea2 1038 syntax_table = BVAR (current_buffer, syntax_table);
c65adb44
SM
1039 else
1040 check_syntax_table (syntax_table);
8489eb67 1041
5c7b02ab
KH
1042 newentry = Fstring_to_syntax (newentry);
1043 if (CONSP (c))
1044 SET_RAW_SYNTAX_ENTRY_RANGE (syntax_table, c, newentry);
1045 else
1046 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), newentry);
e5b94d44
CY
1047
1048 /* We clear the regexp cache, since character classes can now have
1049 different values from those in the compiled regexps.*/
1050 clear_regexp_cache ();
1051
8489eb67
RS
1052 return Qnil;
1053}
1054\f
1055/* Dump syntax table to buffer in human-readable format */
1056
62abe9cb
SM
1057DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value,
1058 Sinternal_describe_syntax_value, 1, 1, 0,
1059 doc: /* Insert a description of the internal syntax description SYNTAX at point. */)
5842a27b 1060 (Lisp_Object syntax)
8489eb67
RS
1061{
1062 register enum syntaxcode code;
c5683ceb
SM
1063 int syntax_code;
1064 char desc, start1, start2, end1, end2, prefix,
1065 comstyleb, comstylec, comnested;
8489eb67 1066 char str[2];
62abe9cb 1067 Lisp_Object first, match_lisp, value = syntax;
8489eb67 1068
8ea151b2
RS
1069 if (NILP (value))
1070 {
62abe9cb
SM
1071 insert_string ("default");
1072 return syntax;
8ea151b2
RS
1073 }
1074
908b7fea
KH
1075 if (CHAR_TABLE_P (value))
1076 {
62abe9cb
SM
1077 insert_string ("deeper char-table ...");
1078 return syntax;
908b7fea
KH
1079 }
1080
8ea151b2
RS
1081 if (!CONSP (value))
1082 {
62abe9cb
SM
1083 insert_string ("invalid");
1084 return syntax;
8ea151b2
RS
1085 }
1086
c1d497be
KR
1087 first = XCAR (value);
1088 match_lisp = XCDR (value);
8ea151b2
RS
1089
1090 if (!INTEGERP (first) || !(NILP (match_lisp) || INTEGERP (match_lisp)))
8489eb67 1091 {
62abe9cb
SM
1092 insert_string ("invalid");
1093 return syntax;
8489eb67
RS
1094 }
1095
c5683ceb
SM
1096 syntax_code = XINT (first);
1097 code = (enum syntaxcode) (syntax_code & 0377);
1098 start1 = SYNTAX_FLAGS_COMSTART_FIRST (syntax_code);
1099 start2 = SYNTAX_FLAGS_COMSTART_SECOND (syntax_code);;
1100 end1 = SYNTAX_FLAGS_COMEND_FIRST (syntax_code);
1101 end2 = SYNTAX_FLAGS_COMEND_SECOND (syntax_code);
1102 prefix = SYNTAX_FLAGS_PREFIX (syntax_code);
1103 comstyleb = SYNTAX_FLAGS_COMMENT_STYLEB (syntax_code);
1104 comstylec = SYNTAX_FLAGS_COMMENT_STYLEC (syntax_code);
1105 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax_code);
8489eb67
RS
1106
1107 if ((int) code < 0 || (int) code >= (int) Smax)
1108 {
1109 insert_string ("invalid");
62abe9cb 1110 return syntax;
8489eb67
RS
1111 }
1112 desc = syntax_code_spec[(int) code];
1113
1114 str[0] = desc, str[1] = 0;
1115 insert (str, 1);
1116
93da5fff
KH
1117 if (NILP (match_lisp))
1118 insert (" ", 1);
1119 else
1120 insert_char (XINT (match_lisp));
8489eb67 1121
8489eb67
RS
1122 if (start1)
1123 insert ("1", 1);
1124 if (start2)
1125 insert ("2", 1);
1126
1127 if (end1)
1128 insert ("3", 1);
1129 if (end2)
1130 insert ("4", 1);
1131
1132 if (prefix)
1133 insert ("p", 1);
c5683ceb 1134 if (comstyleb)
e5d4f4dc 1135 insert ("b", 1);
c5683ceb
SM
1136 if (comstylec)
1137 insert ("c", 1);
e6365855
SM
1138 if (comnested)
1139 insert ("n", 1);
8489eb67
RS
1140
1141 insert_string ("\twhich means: ");
1142
0220c518 1143 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
1144 {
1145 case Swhitespace:
1146 insert_string ("whitespace"); break;
1147 case Spunct:
1148 insert_string ("punctuation"); break;
1149 case Sword:
1150 insert_string ("word"); break;
1151 case Ssymbol:
1152 insert_string ("symbol"); break;
1153 case Sopen:
1154 insert_string ("open"); break;
1155 case Sclose:
1156 insert_string ("close"); break;
1157 case Squote:
d671382e 1158 insert_string ("prefix"); break;
8489eb67
RS
1159 case Sstring:
1160 insert_string ("string"); break;
1161 case Smath:
1162 insert_string ("math"); break;
1163 case Sescape:
1164 insert_string ("escape"); break;
1165 case Scharquote:
1166 insert_string ("charquote"); break;
1167 case Scomment:
1168 insert_string ("comment"); break;
1169 case Sendcomment:
1170 insert_string ("endcomment"); break;
d671382e
SM
1171 case Sinherit:
1172 insert_string ("inherit"); break;
1173 case Scomment_fence:
1174 insert_string ("comment fence"); break;
1175 case Sstring_fence:
1176 insert_string ("string fence"); break;
8489eb67
RS
1177 default:
1178 insert_string ("invalid");
62abe9cb 1179 return syntax;
8489eb67
RS
1180 }
1181
8ea151b2 1182 if (!NILP (match_lisp))
8489eb67
RS
1183 {
1184 insert_string (", matches ");
8ea151b2 1185 insert_char (XINT (match_lisp));
8489eb67
RS
1186 }
1187
1188 if (start1)
1189 insert_string (",\n\t is the first character of a comment-start sequence");
1190 if (start2)
1191 insert_string (",\n\t is the second character of a comment-start sequence");
1192
1193 if (end1)
1194 insert_string (",\n\t is the first character of a comment-end sequence");
1195 if (end2)
1196 insert_string (",\n\t is the second character of a comment-end sequence");
c5683ceb 1197 if (comstyleb)
e5d4f4dc 1198 insert_string (" (comment style b)");
c5683ceb
SM
1199 if (comstylec)
1200 insert_string (" (comment style c)");
e6365855
SM
1201 if (comnested)
1202 insert_string (" (nestable)");
e5d4f4dc 1203
8489eb67
RS
1204 if (prefix)
1205 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
1206
62abe9cb
SM
1207 return syntax;
1208}
8489eb67
RS
1209\f
1210/* Return the position across COUNT words from FROM.
1211 If that many words cannot be found before the end of the buffer, return 0.
1212 COUNT negative means scan backward and stop at word beginning. */
1213
4f3a2f8d
EZ
1214EMACS_INT
1215scan_words (register EMACS_INT from, register EMACS_INT count)
8489eb67 1216{
4f3a2f8d
EZ
1217 register EMACS_INT beg = BEGV;
1218 register EMACS_INT end = ZV;
1219 register EMACS_INT from_byte = CHAR_TO_BYTE (from);
93da5fff
KH
1220 register enum syntaxcode code;
1221 int ch0, ch1;
8f924df7 1222 Lisp_Object func, script, pos;
8489eb67
RS
1223
1224 immediate_quit = 1;
1225 QUIT;
1226
195d1361
RS
1227 SETUP_SYNTAX_TABLE (from, count);
1228
8489eb67
RS
1229 while (count > 0)
1230 {
1231 while (1)
1232 {
1233 if (from == end)
1234 {
1235 immediate_quit = 0;
1236 return 0;
1237 }
195d1361 1238 UPDATE_SYNTAX_TABLE_FORWARD (from);
b7dbcc19 1239 ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
93da5fff 1240 code = SYNTAX (ch0);
6a140a74 1241 INC_BOTH (from, from_byte);
8489eb67
RS
1242 if (words_include_escapes
1243 && (code == Sescape || code == Scharquote))
1244 break;
1245 if (code == Sword)
1246 break;
8489eb67 1247 }
93da5fff
KH
1248 /* Now CH0 is a character which begins a word and FROM is the
1249 position of the next character. */
8f924df7 1250 func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch0);
869bb237 1251 if (! NILP (Ffboundp (func)))
8489eb67 1252 {
869bb237 1253 pos = call2 (func, make_number (from - 1), make_number (end));
8f924df7
KH
1254 if (INTEGERP (pos) && XINT (pos) > from)
1255 {
1256 from = XINT (pos);
1257 from_byte = CHAR_TO_BYTE (from);
1258 }
8489eb67 1259 }
869bb237 1260 else
8cb8232a 1261 {
8cb8232a
KH
1262 script = CHAR_TABLE_REF (Vchar_script_table, ch0);
1263 while (1)
1264 {
1265 if (from == end) break;
1266 UPDATE_SYNTAX_TABLE_FORWARD (from);
b7dbcc19 1267 ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
8cb8232a
KH
1268 code = SYNTAX (ch1);
1269 if ((code != Sword
1270 && (! words_include_escapes
1271 || (code != Sescape && code != Scharquote)))
2b70a11c 1272 || word_boundary_p (ch0, ch1))
869bb237 1273 break;
8cb8232a
KH
1274 INC_BOTH (from, from_byte);
1275 ch0 = ch1;
1276 }
8489eb67
RS
1277 }
1278 count--;
1279 }
1280 while (count < 0)
1281 {
1282 while (1)
1283 {
1284 if (from == beg)
1285 {
1286 immediate_quit = 0;
1287 return 0;
1288 }
6a140a74 1289 DEC_BOTH (from, from_byte);
195d1361 1290 UPDATE_SYNTAX_TABLE_BACKWARD (from);
b7dbcc19 1291 ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
93da5fff 1292 code = SYNTAX (ch1);
8489eb67
RS
1293 if (words_include_escapes
1294 && (code == Sescape || code == Scharquote))
1295 break;
1296 if (code == Sword)
1297 break;
8489eb67 1298 }
93da5fff
KH
1299 /* Now CH1 is a character which ends a word and FROM is the
1300 position of it. */
8f924df7 1301 func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch1);
869bb237 1302 if (! NILP (Ffboundp (func)))
8f924df7 1303 {
869bb237 1304 pos = call2 (func, make_number (from), make_number (beg));
8f924df7
KH
1305 if (INTEGERP (pos) && XINT (pos) < from)
1306 {
1307 from = XINT (pos);
1308 from_byte = CHAR_TO_BYTE (from);
1309 }
8489eb67 1310 }
869bb237 1311 else
8489eb67 1312 {
8cb8232a
KH
1313 script = CHAR_TABLE_REF (Vchar_script_table, ch1);
1314 while (1)
1315 {
8cb8232a
KH
1316 if (from == beg)
1317 break;
262be72a 1318 DEC_BOTH (from, from_byte);
8cb8232a 1319 UPDATE_SYNTAX_TABLE_BACKWARD (from);
262be72a 1320 ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
8cb8232a
KH
1321 code = SYNTAX (ch0);
1322 if ((code != Sword
1323 && (! words_include_escapes
1324 || (code != Sescape && code != Scharquote)))
2b70a11c 1325 || word_boundary_p (ch0, ch1))
262be72a
MB
1326 {
1327 INC_BOTH (from, from_byte);
1328 break;
1329 }
8cb8232a
KH
1330 ch1 = ch0;
1331 }
8489eb67
RS
1332 }
1333 count++;
1334 }
1335
1336 immediate_quit = 0;
1337
1338 return from;
1339}
1340
1e9dbb5f 1341DEFUN ("forward-word", Fforward_word, Sforward_word, 0, 1, "^p",
fdb82f93
PJ
1342 doc: /* Move point forward ARG words (backward if ARG is negative).
1343Normally returns t.
1344If an edge of the buffer or a field boundary is reached, point is left there
1345and the function returns nil. Field boundaries are not noticed if
1346`inhibit-field-text-motion' is non-nil. */)
5842a27b 1347 (Lisp_Object arg)
8489eb67 1348{
c096ae4d 1349 Lisp_Object tmp;
2c6ea900 1350 int orig_val, val;
8489eb67 1351
839966f3
KH
1352 if (NILP (arg))
1353 XSETFASTINT (arg, 1);
1354 else
1355 CHECK_NUMBER (arg);
1356
1357 val = orig_val = scan_words (PT, XINT (arg));
2c6ea900 1358 if (! orig_val)
839966f3 1359 val = XINT (arg) > 0 ? ZV : BEGV;
5878ee6f 1360
b8855607 1361 /* Avoid jumping out of an input field. */
c096ae4d
SM
1362 tmp = Fconstrain_to_field (make_number (val), make_number (PT),
1363 Qt, Qnil, Qnil);
1364 val = XFASTINT (tmp);
7d0393cf 1365
8489eb67 1366 SET_PT (val);
e6d8341f 1367 return val == orig_val ? Qt : Qnil;
8489eb67
RS
1368}
1369\f
dd4c5104 1370Lisp_Object skip_chars (int, Lisp_Object, Lisp_Object, int);
195d1361
RS
1371
1372DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
fdb82f93
PJ
1373 doc: /* Move point forward, stopping before a char not in STRING, or at pos LIM.
1374STRING is like the inside of a `[...]' in a regular expression
1375except that `]' is never special and `\\' quotes `^', `-' or `\\'
7087d5e9 1376 (but not at the end of a range; quoting is never needed there).
fdb82f93
PJ
1377Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1378With arg "^a-zA-Z", skips nonletters stopping before first letter.
a1bc88d4
RS
1379Char classes, e.g. `[:alpha:]', are supported.
1380
1381Returns the distance traveled, either zero or positive. */)
5842a27b 1382 (Lisp_Object string, Lisp_Object lim)
195d1361 1383{
327719ee 1384 return skip_chars (1, string, lim, 1);
195d1361
RS
1385}
1386
1387DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
fdb82f93
PJ
1388 doc: /* Move point backward, stopping after a char not in STRING, or at pos LIM.
1389See `skip-chars-forward' for details.
1390Returns the distance traveled, either zero or negative. */)
5842a27b 1391 (Lisp_Object string, Lisp_Object lim)
195d1361 1392{
327719ee 1393 return skip_chars (0, string, lim, 1);
195d1361
RS
1394}
1395
1396DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
fdb82f93
PJ
1397 doc: /* Move point forward across chars in specified syntax classes.
1398SYNTAX is a string of syntax code characters.
1399Stop before a char whose syntax is not in SYNTAX, or at position LIM.
1400If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1401This function returns the distance traveled, either zero or positive. */)
5842a27b 1402 (Lisp_Object syntax, Lisp_Object lim)
195d1361 1403{
b7dbcc19 1404 return skip_syntaxes (1, syntax, lim);
195d1361
RS
1405}
1406
1407DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
fdb82f93
PJ
1408 doc: /* Move point backward across chars in specified syntax classes.
1409SYNTAX is a string of syntax code characters.
1410Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.
1411If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1412This function returns the distance traveled, either zero or negative. */)
5842a27b 1413 (Lisp_Object syntax, Lisp_Object lim)
195d1361 1414{
b7dbcc19 1415 return skip_syntaxes (0, syntax, lim);
195d1361
RS
1416}
1417
6a140a74 1418static Lisp_Object
971de7fb 1419skip_chars (int forwardp, Lisp_Object string, Lisp_Object lim, int handle_iso_classes)
195d1361 1420{
195d1361
RS
1421 register unsigned int c;
1422 unsigned char fastmap[0400];
b7dbcc19 1423 /* Store the ranges of non-ASCII characters. */
4f63c6bb 1424 int *char_ranges IF_LINT (= NULL);
7e68b0ea 1425 int n_char_ranges = 0;
195d1361 1426 int negate = 0;
4f3a2f8d 1427 register EMACS_INT i, i_byte;
b7dbcc19
KH
1428 /* Set to 1 if the current buffer is multibyte and the region
1429 contains non-ASCII chars. */
1430 int multibyte;
1431 /* Set to 1 if STRING is multibyte and it contains non-ASCII
1432 chars. */
1674d9a2 1433 int string_multibyte;
4f3a2f8d 1434 EMACS_INT size_byte;
2e567bd3 1435 const unsigned char *str;
82d497fc 1436 int len;
a1bc88d4 1437 Lisp_Object iso_classes;
195d1361 1438
b7826503 1439 CHECK_STRING (string);
a1bc88d4 1440 iso_classes = Qnil;
82d497fc 1441
195d1361
RS
1442 if (NILP (lim))
1443 XSETINT (lim, forwardp ? ZV : BEGV);
1444 else
b7826503 1445 CHECK_NUMBER_COERCE_MARKER (lim);
195d1361
RS
1446
1447 /* In any case, don't allow scan outside bounds of buffer. */
195d1361
RS
1448 if (XINT (lim) > ZV)
1449 XSETFASTINT (lim, ZV);
1450 if (XINT (lim) < BEGV)
1451 XSETFASTINT (lim, BEGV);
1452
4b4deea2 1453 multibyte = (!NILP (BVAR (current_buffer, enable_multibyte_characters))
92eaa22e 1454 && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
8f924df7 1455 string_multibyte = SBYTES (string) > SCHARS (string);
b7dbcc19 1456
72af86bd 1457 memset (fastmap, 0, sizeof fastmap);
195d1361 1458
8f924df7
KH
1459 str = SDATA (string);
1460 size_byte = SBYTES (string);
4101e6fe 1461
82d497fc 1462 i_byte = 0;
13090112 1463 if (i_byte < size_byte
d5db4077 1464 && SREF (string, 0) == '^')
195d1361 1465 {
82d497fc 1466 negate = 1; i_byte++;
195d1361
RS
1467 }
1468
1469 /* Find the characters specified and set their elements of fastmap.
b7dbcc19 1470 Handle backslashes and ranges specially.
195d1361 1471
b7dbcc19
KH
1472 If STRING contains non-ASCII characters, setup char_ranges for
1473 them and use fastmap only for their leading codes. */
4101e6fe 1474
b7dbcc19 1475 if (! string_multibyte)
195d1361 1476 {
b7dbcc19 1477 int string_has_eight_bit = 0;
4101e6fe 1478
b7dbcc19
KH
1479 /* At first setup fastmap. */
1480 while (i_byte < size_byte)
1481 {
1482 c = str[i_byte++];
1483
a1bc88d4
RS
1484 if (handle_iso_classes && c == '['
1485 && i_byte < size_byte
327719ee 1486 && str[i_byte] == ':')
a1bc88d4
RS
1487 {
1488 const unsigned char *class_beg = str + i_byte + 1;
1489 const unsigned char *class_end = class_beg;
b3bda4fd 1490 const unsigned char *class_limit = str + size_byte - 2;
db9cd97a 1491 /* Leave room for the null. */
a1bc88d4
RS
1492 unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1493 re_wctype_t cc;
1494
1495 if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1496 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1497
b3bda4fd
KS
1498 while (class_end < class_limit
1499 && *class_end >= 'a' && *class_end <= 'z')
a1bc88d4
RS
1500 class_end++;
1501
b3bda4fd
KS
1502 if (class_end == class_beg
1503 || *class_end != ':' || class_end[1] != ']')
1504 goto not_a_class_name;
a1bc88d4 1505
72af86bd 1506 memcpy (class_name, class_beg, class_end - class_beg);
a1bc88d4
RS
1507 class_name[class_end - class_beg] = 0;
1508
1509 cc = re_wctype (class_name);
1510 if (cc == 0)
1511 error ("Invalid ISO C character class");
1512
1513 iso_classes = Fcons (make_number (cc), iso_classes);
1514
1515 i_byte = class_end + 2 - str;
1516 continue;
1517 }
1518
b3bda4fd 1519 not_a_class_name:
b7dbcc19
KH
1520 if (c == '\\')
1521 {
13090112 1522 if (i_byte == size_byte)
4101e6fe
RS
1523 break;
1524
5c7b02ab 1525 c = str[i_byte++];
195d1361 1526 }
839966f3
KH
1527 /* Treat `-' as range character only if another character
1528 follows. */
1529 if (i_byte + 1 < size_byte
82d497fc 1530 && str[i_byte] == '-')
195d1361 1531 {
9690d026 1532 unsigned int c2;
4101e6fe
RS
1533
1534 /* Skip over the dash. */
82d497fc 1535 i_byte++;
4101e6fe 1536
4101e6fe 1537 /* Get the end of the range. */
5c7b02ab 1538 c2 = str[i_byte++];
b7dbcc19
KH
1539 if (c2 == '\\'
1540 && i_byte < size_byte)
1541 c2 = str[i_byte++];
1542
8f924df7
KH
1543 if (c <= c2)
1544 {
1545 while (c <= c2)
1546 fastmap[c++] = 1;
1547 if (! ASCII_CHAR_P (c2))
1548 string_has_eight_bit = 1;
1549 }
195d1361
RS
1550 }
1551 else
b7dbcc19
KH
1552 {
1553 fastmap[c] = 1;
1554 if (! ASCII_CHAR_P (c))
1555 string_has_eight_bit = 1;
1556 }
1557 }
1558
1559 /* If the current range is multibyte and STRING contains
1560 eight-bit chars, arrange fastmap and setup char_ranges for
1561 the corresponding multibyte chars. */
1562 if (multibyte && string_has_eight_bit)
1563 {
1564 unsigned char fastmap2[0400];
1565 int range_start_byte, range_start_char;
1566
72af86bd
AS
1567 memcpy (fastmap + 0200, fastmap2 + 0200, 0200);
1568 memset (fastmap + 0200, 0, 0200);
b7dbcc19
KH
1569 /* We are sure that this loop stops. */
1570 for (i = 0200; ! fastmap2[i]; i++);
4c0354d7 1571 c = BYTE8_TO_CHAR (i);
b7dbcc19
KH
1572 fastmap[CHAR_LEADING_CODE (c)] = 1;
1573 range_start_byte = i;
1574 range_start_char = c;
f003ca54 1575 char_ranges = (int *) alloca (sizeof (int) * 128 * 2);
b7dbcc19
KH
1576 for (i = 129; i < 0400; i++)
1577 {
4c0354d7 1578 c = BYTE8_TO_CHAR (i);
b7dbcc19
KH
1579 fastmap[CHAR_LEADING_CODE (c)] = 1;
1580 if (i - range_start_byte != c - range_start_char)
1581 {
1582 char_ranges[n_char_ranges++] = range_start_char;
1583 char_ranges[n_char_ranges++] = ((i - 1 - range_start_byte)
1584 + range_start_char);
1585 range_start_byte = i;
1586 range_start_char = c;
8f924df7 1587 }
b7dbcc19
KH
1588 }
1589 char_ranges[n_char_ranges++] = range_start_char;
1590 char_ranges[n_char_ranges++] = ((i - 1 - range_start_byte)
1591 + range_start_char);
195d1361
RS
1592 }
1593 }
f003ca54 1594 else /* STRING is multibyte */
b7dbcc19 1595 {
f003ca54
KH
1596 char_ranges = (int *) alloca (sizeof (int) * SCHARS (string) * 2);
1597
b7dbcc19 1598 while (i_byte < size_byte)
195d1361 1599 {
b7dbcc19 1600 unsigned char leading_code;
5c7b02ab 1601
b7dbcc19 1602 leading_code = str[i_byte];
62a6e103 1603 c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
b7dbcc19 1604 i_byte += len;
5c7b02ab 1605
327719ee
MB
1606 if (handle_iso_classes && c == '['
1607 && i_byte < size_byte
62a6e103 1608 && STRING_CHAR (str + i_byte) == ':')
327719ee
MB
1609 {
1610 const unsigned char *class_beg = str + i_byte + 1;
1611 const unsigned char *class_end = class_beg;
1612 const unsigned char *class_limit = str + size_byte - 2;
1613 /* Leave room for the null. */
1614 unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1615 re_wctype_t cc;
1616
1617 if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1618 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1619
1620 while (class_end < class_limit
1621 && *class_end >= 'a' && *class_end <= 'z')
1622 class_end++;
1623
1624 if (class_end == class_beg
1625 || *class_end != ':' || class_end[1] != ']')
1626 goto not_a_class_name_multibyte;
1627
72af86bd 1628 memcpy (class_name, class_beg, class_end - class_beg);
327719ee
MB
1629 class_name[class_end - class_beg] = 0;
1630
1631 cc = re_wctype (class_name);
1632 if (cc == 0)
1633 error ("Invalid ISO C character class");
1634
1635 iso_classes = Fcons (make_number (cc), iso_classes);
1636
1637 i_byte = class_end + 2 - str;
1638 continue;
1639 }
1640
1641 not_a_class_name_multibyte:
195d1361
RS
1642 if (c == '\\')
1643 {
13090112 1644 if (i_byte == size_byte)
4101e6fe
RS
1645 break;
1646
b7dbcc19 1647 leading_code = str[i_byte];
62a6e103 1648 c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
82d497fc 1649 i_byte += len;
195d1361 1650 }
839966f3
KH
1651 /* Treat `-' as range character only if another character
1652 follows. */
1653 if (i_byte + 1 < size_byte
82d497fc 1654 && str[i_byte] == '-')
195d1361 1655 {
9690d026 1656 unsigned int c2;
b7dbcc19 1657 unsigned char leading_code2;
4101e6fe
RS
1658
1659 /* Skip over the dash. */
82d497fc 1660 i_byte++;
4101e6fe 1661
4101e6fe 1662 /* Get the end of the range. */
b7dbcc19 1663 leading_code2 = str[i_byte];
62a6e103 1664 c2 = STRING_CHAR_AND_LENGTH (str + i_byte, len);
82d497fc 1665 i_byte += len;
7e68b0ea 1666
b7dbcc19
KH
1667 if (c2 == '\\'
1668 && i_byte < size_byte)
1669 {
1670 leading_code2 = str[i_byte];
62a6e103 1671 c2 =STRING_CHAR_AND_LENGTH (str + i_byte, len);
b7dbcc19
KH
1672 i_byte += len;
1673 }
1674
f003ca54
KH
1675 if (c > c2)
1676 continue;
b7dbcc19 1677 if (ASCII_CHAR_P (c))
06274af5 1678 {
5c7b02ab
KH
1679 while (c <= c2 && c < 0x80)
1680 fastmap[c++] = 1;
b7dbcc19
KH
1681 leading_code = CHAR_LEADING_CODE (c);
1682 }
1683 if (! ASCII_CHAR_P (c))
1684 {
1685 while (leading_code <= leading_code2)
1686 fastmap[leading_code++] = 1;
1687 if (c <= c2)
e39091b6 1688 {
b7dbcc19 1689 char_ranges[n_char_ranges++] = c;
e39091b6 1690 char_ranges[n_char_ranges++] = c2;
06274af5
KH
1691 }
1692 }
195d1361
RS
1693 }
1694 else
7e68b0ea 1695 {
b7dbcc19 1696 if (ASCII_CHAR_P (c))
e39091b6
KH
1697 fastmap[c] = 1;
1698 else
7e68b0ea 1699 {
b7dbcc19 1700 fastmap[leading_code] = 1;
4101e6fe
RS
1701 char_ranges[n_char_ranges++] = c;
1702 char_ranges[n_char_ranges++] = c;
7e68b0ea
RS
1703 }
1704 }
195d1361 1705 }
b7dbcc19
KH
1706
1707 /* If the current range is unibyte and STRING contains non-ASCII
1708 chars, arrange fastmap for the corresponding unibyte
1709 chars. */
1710
1711 if (! multibyte && n_char_ranges > 0)
1712 {
72af86bd 1713 memset (fastmap + 0200, 0, 0200);
b7dbcc19
KH
1714 for (i = 0; i < n_char_ranges; i += 2)
1715 {
1716 int c1 = char_ranges[i];
1717 int c2 = char_ranges[i + 1];
1718
1719 for (; c1 <= c2; c1++)
2afc21f5
SM
1720 {
1721 int b = CHAR_TO_BYTE_SAFE (c1);
1722 if (b >= 0)
1723 fastmap[b] = 1;
1724 }
b7dbcc19
KH
1725 }
1726 }
195d1361
RS
1727 }
1728
9690d026 1729 /* If ^ was the first character, complement the fastmap. */
195d1361 1730 if (negate)
b7dbcc19
KH
1731 {
1732 if (! multibyte)
1733 for (i = 0; i < sizeof fastmap; i++)
1734 fastmap[i] ^= 1;
1735 else
1736 {
1737 for (i = 0; i < 0200; i++)
1738 fastmap[i] ^= 1;
1739 /* All non-ASCII chars possibly match. */
1740 for (; i < sizeof fastmap; i++)
1741 fastmap[i] = 1;
1742 }
1743 }
195d1361
RS
1744
1745 {
4f3a2f8d
EZ
1746 EMACS_INT start_point = PT;
1747 EMACS_INT pos = PT;
1748 EMACS_INT pos_byte = PT_BYTE;
9af7511a
KH
1749 unsigned char *p = PT_ADDR, *endp, *stop;
1750
1751 if (forwardp)
1752 {
1753 endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
d7ee9fab 1754 stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
9af7511a
KH
1755 }
1756 else
1757 {
1758 endp = CHAR_POS_ADDR (XINT (lim));
d7ee9fab 1759 stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
9af7511a 1760 }
195d1361
RS
1761
1762 immediate_quit = 1;
618db430
SM
1763 /* This code may look up syntax tables using macros that rely on the
1764 gl_state object. To make sure this object is not out of date,
1765 let's initialize it manually.
1766 We ignore syntax-table text-properties for now, since that's
1767 what we've done in the past. */
d48cd3f4 1768 SETUP_BUFFER_SYNTAX_TABLE ();
b7dbcc19 1769 if (forwardp)
195d1361 1770 {
b7dbcc19 1771 if (multibyte)
8f924df7 1772 while (1)
b7dbcc19 1773 {
8f924df7 1774 int nbytes;
9af7511a 1775
8f924df7 1776 if (p >= stop)
9af7511a 1777 {
8f924df7 1778 if (p >= endp)
9af7511a 1779 break;
8f924df7
KH
1780 p = GAP_END_ADDR;
1781 stop = endp;
9af7511a 1782 }
62a6e103 1783 c = STRING_CHAR_AND_LENGTH (p, nbytes);
327719ee 1784 if (! NILP (iso_classes) && in_classes (c, iso_classes))
9af7511a 1785 {
327719ee 1786 if (negate)
9af7511a 1787 break;
327719ee
MB
1788 else
1789 goto fwd_ok;
9af7511a 1790 }
9af7511a 1791
8f924df7 1792 if (! fastmap[*p])
b7dbcc19
KH
1793 break;
1794 if (! ASCII_CHAR_P (c))
9af7511a 1795 {
b7dbcc19
KH
1796 /* As we are looking at a multibyte character, we
1797 must look up the character in the table
1798 CHAR_RANGES. If there's no data in the table,
1799 that character is not what we want to skip. */
1800
1801 /* The following code do the right thing even if
1802 n_char_ranges is zero (i.e. no data in
1803 CHAR_RANGES). */
1804 for (i = 0; i < n_char_ranges; i += 2)
1805 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
9af7511a 1806 break;
b7dbcc19
KH
1807 if (!(negate ^ (i < n_char_ranges)))
1808 break;
9af7511a 1809 }
327719ee 1810 fwd_ok:
8f924df7 1811 p += nbytes, pos++, pos_byte += nbytes;
b7dbcc19
KH
1812 }
1813 else
8f924df7
KH
1814 while (1)
1815 {
1816 if (p >= stop)
9af7511a 1817 {
8f924df7 1818 if (p >= endp)
9af7511a 1819 break;
8f924df7
KH
1820 p = GAP_END_ADDR;
1821 stop = endp;
9af7511a 1822 }
327719ee
MB
1823
1824 if (!NILP (iso_classes) && in_classes (*p, iso_classes))
1825 {
1826 if (negate)
1827 break;
1828 else
1829 goto fwd_unibyte_ok;
1830 }
1831
8f924df7
KH
1832 if (!fastmap[*p])
1833 break;
327719ee 1834 fwd_unibyte_ok:
8f924df7
KH
1835 p++, pos++, pos_byte++;
1836 }
195d1361
RS
1837 }
1838 else
1839 {
b7dbcc19 1840 if (multibyte)
8f924df7 1841 while (1)
b7dbcc19 1842 {
8f924df7 1843 unsigned char *prev_p;
9af7511a 1844
8f924df7 1845 if (p <= stop)
9af7511a 1846 {
8f924df7 1847 if (p <= endp)
9af7511a 1848 break;
8f924df7
KH
1849 p = GPT_ADDR;
1850 stop = endp;
9af7511a 1851 }
8f924df7
KH
1852 prev_p = p;
1853 while (--p >= stop && ! CHAR_HEAD_P (*p));
62a6e103 1854 c = STRING_CHAR (p);
a1bc88d4 1855
327719ee
MB
1856 if (! NILP (iso_classes) && in_classes (c, iso_classes))
1857 {
1858 if (negate)
1859 break;
9690d026 1860 else
327719ee 1861 goto back_ok;
7e68b0ea 1862 }
a1bc88d4 1863
8f924df7 1864 if (! fastmap[*p])
b7dbcc19
KH
1865 break;
1866 if (! ASCII_CHAR_P (c))
7e68b0ea 1867 {
b7dbcc19
KH
1868 /* See the comment in the previous similar code. */
1869 for (i = 0; i < n_char_ranges; i += 2)
1870 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1871 break;
1872 if (!(negate ^ (i < n_char_ranges)))
1873 break;
7e68b0ea 1874 }
327719ee 1875 back_ok:
8f924df7 1876 pos--, pos_byte -= prev_p - p;
b7dbcc19
KH
1877 }
1878 else
8f924df7
KH
1879 while (1)
1880 {
1881 if (p <= stop)
9af7511a 1882 {
8f924df7 1883 if (p <= endp)
9af7511a 1884 break;
8f924df7
KH
1885 p = GPT_ADDR;
1886 stop = endp;
9af7511a 1887 }
e39091b6 1888
327719ee
MB
1889 if (! NILP (iso_classes) && in_classes (p[-1], iso_classes))
1890 {
1891 if (negate)
1892 break;
9af7511a 1893 else
327719ee
MB
1894 goto back_unibyte_ok;
1895 }
a1bc88d4 1896
8f924df7
KH
1897 if (!fastmap[p[-1]])
1898 break;
327719ee 1899 back_unibyte_ok:
8f924df7
KH
1900 p--, pos--, pos_byte--;
1901 }
195d1361 1902 }
7e68b0ea 1903
b7dbcc19
KH
1904 SET_PT_BOTH (pos, pos_byte);
1905 immediate_quit = 0;
1906
1907 return make_number (PT - start_point);
1908 }
1909}
7e68b0ea 1910
b7dbcc19
KH
1911
1912static Lisp_Object
971de7fb 1913skip_syntaxes (int forwardp, Lisp_Object string, Lisp_Object lim)
b7dbcc19
KH
1914{
1915 register unsigned int c;
1916 unsigned char fastmap[0400];
1917 int negate = 0;
4f3a2f8d 1918 register EMACS_INT i, i_byte;
b7dbcc19 1919 int multibyte;
4f3a2f8d 1920 EMACS_INT size_byte;
b7dbcc19
KH
1921 unsigned char *str;
1922
1923 CHECK_STRING (string);
1924
1925 if (NILP (lim))
1926 XSETINT (lim, forwardp ? ZV : BEGV);
1927 else
1928 CHECK_NUMBER_COERCE_MARKER (lim);
1929
1930 /* In any case, don't allow scan outside bounds of buffer. */
1931 if (XINT (lim) > ZV)
1932 XSETFASTINT (lim, ZV);
1933 if (XINT (lim) < BEGV)
1934 XSETFASTINT (lim, BEGV);
1935
ade8ee9e 1936 if (forwardp ? (PT >= XFASTINT (lim)) : (PT <= XFASTINT (lim)))
49feb1cd 1937 return make_number (0);
8c6e735b 1938
4b4deea2 1939 multibyte = (!NILP (BVAR (current_buffer, enable_multibyte_characters))
92eaa22e 1940 && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
b7dbcc19 1941
72af86bd 1942 memset (fastmap, 0, sizeof fastmap);
b7dbcc19 1943
8f924df7
KH
1944 if (SBYTES (string) > SCHARS (string))
1945 /* As this is very rare case (syntax spec is ASCII only), don't
1946 consider efficiency. */
bc796a59
KH
1947 string = string_make_unibyte (string);
1948
8f924df7
KH
1949 str = SDATA (string);
1950 size_byte = SBYTES (string);
bc796a59 1951
b7dbcc19
KH
1952 i_byte = 0;
1953 if (i_byte < size_byte
8f924df7 1954 && SREF (string, 0) == '^')
b7dbcc19
KH
1955 {
1956 negate = 1; i_byte++;
1957 }
1958
b7dbcc19
KH
1959 /* Find the syntaxes specified and set their elements of fastmap. */
1960
1961 while (i_byte < size_byte)
1962 {
1963 c = str[i_byte++];
8f924df7 1964 fastmap[syntax_spec_code[c]] = 1;
b7dbcc19 1965 }
195d1361 1966
9690d026 1967 /* If ^ was the first character, complement the fastmap. */
195d1361
RS
1968 if (negate)
1969 for (i = 0; i < sizeof fastmap; i++)
9690d026 1970 fastmap[i] ^= 1;
195d1361
RS
1971
1972 {
4f3a2f8d
EZ
1973 EMACS_INT start_point = PT;
1974 EMACS_INT pos = PT;
1975 EMACS_INT pos_byte = PT_BYTE;
8f924df7
KH
1976 unsigned char *p = PT_ADDR, *endp, *stop;
1977
1978 if (forwardp)
1979 {
1980 endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
1981 stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
1982 }
1983 else
1984 {
1985 endp = CHAR_POS_ADDR (XINT (lim));
1986 stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
1987 }
195d1361
RS
1988
1989 immediate_quit = 1;
b7dbcc19
KH
1990 SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
1991 if (forwardp)
195d1361 1992 {
b7dbcc19 1993 if (multibyte)
195d1361 1994 {
8f924df7 1995 while (1)
8c6e735b 1996 {
8f924df7
KH
1997 int nbytes;
1998
1999 if (p >= stop)
2000 {
2001 if (p >= endp)
2002 break;
2003 p = GAP_END_ADDR;
2004 stop = endp;
2005 }
62a6e103 2006 c = STRING_CHAR_AND_LENGTH (p, nbytes);
8f924df7 2007 if (! fastmap[(int) SYNTAX (c)])
8c6e735b 2008 break;
8f924df7 2009 p += nbytes, pos++, pos_byte += nbytes;
8c6e735b
KH
2010 UPDATE_SYNTAX_TABLE_FORWARD (pos);
2011 }
195d1361
RS
2012 }
2013 else
2014 {
8c6e735b 2015 while (1)
7e68b0ea 2016 {
8f924df7
KH
2017 if (p >= stop)
2018 {
2019 if (p >= endp)
2020 break;
2021 p = GAP_END_ADDR;
2022 stop = endp;
2023 }
2024 if (! fastmap[(int) SYNTAX (*p)])
8c6e735b 2025 break;
8f924df7 2026 p++, pos++, pos_byte++;
b7dbcc19 2027 UPDATE_SYNTAX_TABLE_FORWARD (pos);
195d1361
RS
2028 }
2029 }
2030 }
2031 else
2032 {
b7dbcc19 2033 if (multibyte)
195d1361 2034 {
8c6e735b 2035 while (1)
b7dbcc19 2036 {
8f924df7
KH
2037 unsigned char *prev_p;
2038
2039 if (p <= stop)
b7dbcc19 2040 {
8f924df7
KH
2041 if (p <= endp)
2042 break;
2043 p = GPT_ADDR;
2044 stop = endp;
b7dbcc19 2045 }
262be72a 2046 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
8f924df7
KH
2047 prev_p = p;
2048 while (--p >= stop && ! CHAR_HEAD_P (*p));
62a6e103 2049 c = STRING_CHAR (p);
8f924df7
KH
2050 if (! fastmap[(int) SYNTAX (c)])
2051 break;
2052 pos--, pos_byte -= prev_p - p;
b7dbcc19 2053 }
195d1361
RS
2054 }
2055 else
2056 {
8c6e735b
KH
2057 while (1)
2058 {
8f924df7
KH
2059 if (p <= stop)
2060 {
2061 if (p <= endp)
2062 break;
2063 p = GPT_ADDR;
2064 stop = endp;
2065 }
102f6132 2066 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
8f924df7 2067 if (! fastmap[(int) SYNTAX (p[-1])])
8c6e735b 2068 break;
8f924df7 2069 p--, pos--, pos_byte--;
8c6e735b 2070 }
195d1361
RS
2071 }
2072 }
6a140a74
RS
2073
2074 SET_PT_BOTH (pos, pos_byte);
195d1361
RS
2075 immediate_quit = 0;
2076
2077 return make_number (PT - start_point);
2078 }
2079}
a1bc88d4
RS
2080
2081/* Return 1 if character C belongs to one of the ISO classes
2082 in the list ISO_CLASSES. Each class is represented by an
2083 integer which is its type according to re_wctype. */
2084
2085static int
971de7fb 2086in_classes (int c, Lisp_Object iso_classes)
a1bc88d4
RS
2087{
2088 int fits_class = 0;
2089
618db430 2090 while (CONSP (iso_classes))
a1bc88d4
RS
2091 {
2092 Lisp_Object elt;
2093 elt = XCAR (iso_classes);
2094 iso_classes = XCDR (iso_classes);
2095
2096 if (re_iswctype (c, XFASTINT (elt)))
2097 fits_class = 1;
2098 }
2099
2100 return fits_class;
2101}
195d1361 2102\f
95ff8dfc
RS
2103/* Jump over a comment, assuming we are at the beginning of one.
2104 FROM is the current position.
2105 FROM_BYTE is the bytepos corresponding to FROM.
2106 Do not move past STOP (a charpos).
2107 The comment over which we have to jump is of style STYLE
c5683ceb 2108 (either SYNTAX_FLAGS_COMMENT_STYLE(foo) or ST_COMMENT_STYLE).
95ff8dfc
RS
2109 NESTING should be positive to indicate the nesting at the beginning
2110 for nested comments and should be zero or negative else.
2111 ST_COMMENT_STYLE cannot be nested.
2112 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
2113 (or 0 If the search cannot start in the middle of a two-character).
2114
2115 If successful, return 1 and store the charpos of the comment's end
2116 into *CHARPOS_PTR and the corresponding bytepos into *BYTEPOS_PTR.
2117 Else, return 0 and store the charpos STOP into *CHARPOS_PTR, the
2118 corresponding bytepos into *BYTEPOS_PTR and the current nesting
2119 (as defined for state.incomment) in *INCOMMENT_PTR.
2120
2121 The comment end is the last character of the comment rather than the
2122 character just after the comment.
2123
2124 Global syntax data is assumed to initially be valid for FROM and
2125 remains valid for forward search starting at the returned position. */
2126
2127static int
dd4c5104
DN
2128forw_comment (EMACS_INT from, EMACS_INT from_byte, EMACS_INT stop,
2129 int nesting, int style, int prev_syntax,
2130 EMACS_INT *charpos_ptr, EMACS_INT *bytepos_ptr,
2131 int *incomment_ptr)
95ff8dfc
RS
2132{
2133 register int c, c1;
2134 register enum syntaxcode code;
c5683ceb 2135 register int syntax, other_syntax;
95ff8dfc
RS
2136
2137 if (nesting <= 0) nesting = -1;
2138
2139 /* Enter the loop in the middle so that we find
2140 a 2-char comment ender if we start in the middle of it. */
2141 syntax = prev_syntax;
2142 if (syntax != 0) goto forw_incomment;
2143
2144 while (1)
2145 {
2146 if (from == stop)
2147 {
2148 *incomment_ptr = nesting;
2149 *charpos_ptr = from;
2150 *bytepos_ptr = from_byte;
2151 return 0;
2152 }
8f924df7 2153 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
95ff8dfc
RS
2154 syntax = SYNTAX_WITH_FLAGS (c);
2155 code = syntax & 0xff;
2156 if (code == Sendcomment
c5683ceb 2157 && SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == style
abf8a9ff
SM
2158 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ?
2159 (nesting > 0 && --nesting == 0) : nesting < 0))
95ff8dfc
RS
2160 /* we have encountered a comment end of the same style
2161 as the comment sequence which began this comment
2162 section */
2163 break;
2164 if (code == Scomment_fence
2165 && style == ST_COMMENT_STYLE)
2166 /* we have encountered a comment end of the same style
2167 as the comment sequence which began this comment
2168 section. */
2169 break;
2170 if (nesting > 0
2171 && code == Scomment
abf8a9ff 2172 && SYNTAX_FLAGS_COMMENT_NESTED (syntax)
c5683ceb 2173 && SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == style)
95ff8dfc
RS
2174 /* we have encountered a nested comment of the same style
2175 as the comment sequence which began this comment section */
2176 nesting++;
2177 INC_BOTH (from, from_byte);
2178 UPDATE_SYNTAX_TABLE_FORWARD (from);
7d0393cf 2179
95ff8dfc
RS
2180 forw_incomment:
2181 if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
8f924df7 2182 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
c5683ceb
SM
2183 other_syntax = SYNTAX_WITH_FLAGS (c1),
2184 SYNTAX_FLAGS_COMEND_SECOND (other_syntax))
2185 && SYNTAX_FLAGS_COMMENT_STYLE (syntax, other_syntax) == style
abf8a9ff 2186 && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
c5683ceb
SM
2187 SYNTAX_FLAGS_COMMENT_NESTED (other_syntax))
2188 ? nesting > 0 : nesting < 0))
4ffe723b
GM
2189 {
2190 if (--nesting <= 0)
2191 /* we have encountered a comment end of the same style
2192 as the comment sequence which began this comment
2193 section */
2194 break;
2195 else
2196 {
2197 INC_BOTH (from, from_byte);
2198 UPDATE_SYNTAX_TABLE_FORWARD (from);
2199 }
2200 }
95ff8dfc
RS
2201 if (nesting > 0
2202 && from < stop
2203 && SYNTAX_FLAGS_COMSTART_FIRST (syntax)
8f924df7 2204 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
c5683ceb
SM
2205 other_syntax = SYNTAX_WITH_FLAGS (c1),
2206 SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax) == style
2207 && SYNTAX_FLAGS_COMSTART_SECOND (other_syntax))
abf8a9ff 2208 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
c5683ceb 2209 SYNTAX_FLAGS_COMMENT_NESTED (other_syntax)))
95ff8dfc
RS
2210 /* we have encountered a nested comment of the same style
2211 as the comment sequence which began this comment
2212 section */
2213 {
2214 INC_BOTH (from, from_byte);
2215 UPDATE_SYNTAX_TABLE_FORWARD (from);
2216 nesting++;
2217 }
2218 }
2219 *charpos_ptr = from;
2220 *bytepos_ptr = from_byte;
2221 return 1;
2222}
2223
b3cfe0c8 2224DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
177c0ea7 2225 doc: /*
ee648c11 2226Move forward across up to COUNT comments. If COUNT is negative, move backward.
fdb82f93
PJ
2227Stop scanning if we find something other than a comment or whitespace.
2228Set point to where scanning stops.
ee648c11 2229If COUNT comments are found as expected, with nothing except whitespace
fdb82f93 2230between them, return t; otherwise return nil. */)
5842a27b 2231 (Lisp_Object count)
b3cfe0c8 2232{
2312c580
SM
2233 register EMACS_INT from;
2234 EMACS_INT from_byte;
2235 register EMACS_INT stop;
8ea151b2 2236 register int c, c1;
b3cfe0c8
RS
2237 register enum syntaxcode code;
2238 int comstyle = 0; /* style of comment encountered */
95ff8dfc 2239 int comnested = 0; /* whether the comment is nestable or not */
be720845 2240 int found;
2312c580
SM
2241 EMACS_INT count1;
2242 EMACS_INT out_charpos, out_bytepos;
95ff8dfc 2243 int dummy;
840f481c 2244
b7826503 2245 CHECK_NUMBER (count);
840f481c 2246 count1 = XINT (count);
195d1361 2247 stop = count1 > 0 ? ZV : BEGV;
b3cfe0c8
RS
2248
2249 immediate_quit = 1;
2250 QUIT;
2251
2252 from = PT;
6a140a74 2253 from_byte = PT_BYTE;
b3cfe0c8 2254
195d1361 2255 SETUP_SYNTAX_TABLE (from, count1);
840f481c 2256 while (count1 > 0)
b3cfe0c8 2257 {
04882296 2258 do
b3cfe0c8 2259 {
c5683ceb 2260 int comstart_first, syntax, other_syntax;
f902a008 2261
04882296
KH
2262 if (from == stop)
2263 {
ef316cf0 2264 SET_PT_BOTH (from, from_byte);
b7e6e612 2265 immediate_quit = 0;
04882296
KH
2266 return Qnil;
2267 }
b7dbcc19 2268 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
c5683ceb 2269 syntax = SYNTAX_WITH_FLAGS (c);
b3cfe0c8 2270 code = SYNTAX (c);
c5683ceb
SM
2271 comstart_first = SYNTAX_FLAGS_COMSTART_FIRST (syntax);
2272 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2273 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
6a140a74 2274 INC_BOTH (from, from_byte);
f902a008 2275 UPDATE_SYNTAX_TABLE_FORWARD (from);
f902a008 2276 if (from < stop && comstart_first
b7dbcc19 2277 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
c5683ceb
SM
2278 other_syntax = SYNTAX_WITH_FLAGS (c1),
2279 SYNTAX_FLAGS_COMSTART_SECOND (other_syntax)))
b3cfe0c8 2280 {
7d0393cf 2281 /* We have encountered a comment start sequence and we
7fc8191e 2282 are ignoring all text inside comments. We must record
b3cfe0c8
RS
2283 the comment style this sequence begins so that later,
2284 only a comment end of the same style actually ends
7fc8191e 2285 the comment section. */
b3cfe0c8 2286 code = Scomment;
c5683ceb
SM
2287 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2288 comnested
2289 = comnested || SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
6a140a74 2290 INC_BOTH (from, from_byte);
95ff8dfc 2291 UPDATE_SYNTAX_TABLE_FORWARD (from);
b3cfe0c8 2292 }
04882296 2293 }
abf8a9ff 2294 while (code == Swhitespace || (code == Sendcomment && c == '\n'));
6a140a74 2295
da71ab91
KH
2296 if (code == Scomment_fence)
2297 comstyle = ST_COMMENT_STYLE;
2298 else if (code != Scomment)
04882296
KH
2299 {
2300 immediate_quit = 0;
6a140a74 2301 DEC_BOTH (from, from_byte);
ef316cf0 2302 SET_PT_BOTH (from, from_byte);
04882296
KH
2303 return Qnil;
2304 }
2305 /* We're at the start of a comment. */
95ff8dfc
RS
2306 found = forw_comment (from, from_byte, stop, comnested, comstyle, 0,
2307 &out_charpos, &out_bytepos, &dummy);
2308 from = out_charpos; from_byte = out_bytepos;
2309 if (!found)
04882296 2310 {
95ff8dfc
RS
2311 immediate_quit = 0;
2312 SET_PT_BOTH (from, from_byte);
2313 return Qnil;
b3cfe0c8 2314 }
95ff8dfc
RS
2315 INC_BOTH (from, from_byte);
2316 UPDATE_SYNTAX_TABLE_FORWARD (from);
04882296 2317 /* We have skipped one comment. */
840f481c 2318 count1--;
b3cfe0c8
RS
2319 }
2320
840f481c 2321 while (count1 < 0)
b3cfe0c8 2322 {
b9145dbb 2323 while (1)
b3cfe0c8 2324 {
c5683ceb 2325 int quoted, syntax;
f902a008 2326
b9145dbb
RS
2327 if (from <= stop)
2328 {
6a140a74 2329 SET_PT_BOTH (BEGV, BEGV_BYTE);
b9145dbb
RS
2330 immediate_quit = 0;
2331 return Qnil;
2332 }
b3cfe0c8 2333
6a140a74 2334 DEC_BOTH (from, from_byte);
f902a008 2335 /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */
6a140a74 2336 quoted = char_quoted (from, from_byte);
b7dbcc19 2337 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
c5683ceb 2338 syntax = SYNTAX_WITH_FLAGS (c);
b3cfe0c8
RS
2339 code = SYNTAX (c);
2340 comstyle = 0;
c5683ceb 2341 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
7fc8191e 2342 if (code == Sendcomment)
c5683ceb
SM
2343 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2344 if (from > stop && SYNTAX_FLAGS_COMEND_SECOND (syntax)
f902a008 2345 && prev_char_comend_first (from, from_byte)
89c6809a 2346 && !char_quoted (from - 1, dec_bytepos (from_byte)))
b3cfe0c8 2347 {
c5683ceb 2348 int other_syntax;
7fc8191e 2349 /* We must record the comment style encountered so that
b3cfe0c8 2350 later, we can match only the proper comment begin
7fc8191e 2351 sequence of the same style. */
6a140a74 2352 DEC_BOTH (from, from_byte);
f902a008
RS
2353 code = Sendcomment;
2354 /* Calling char_quoted, above, set up global syntax position
2355 at the new value of FROM. */
b7dbcc19 2356 c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
c5683ceb
SM
2357 other_syntax = SYNTAX_WITH_FLAGS (c1);
2358 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2359 comnested
2360 = comnested || SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
b3cfe0c8
RS
2361 }
2362
195d1361
RS
2363 if (code == Scomment_fence)
2364 {
2365 /* Skip until first preceding unquoted comment_fence. */
01f44d5a 2366 int fence_found = 0;
4f3a2f8d 2367 EMACS_INT ini = from, ini_byte = from_byte;
7d0393cf 2368
6a140a74 2369 while (1)
195d1361 2370 {
6a140a74 2371 DEC_BOTH (from, from_byte);
195d1361 2372 UPDATE_SYNTAX_TABLE_BACKWARD (from);
b7dbcc19 2373 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
6a140a74 2374 if (SYNTAX (c) == Scomment_fence
7d0393cf 2375 && !char_quoted (from, from_byte))
195d1361 2376 {
01f44d5a 2377 fence_found = 1;
195d1361
RS
2378 break;
2379 }
fcdd4585
SM
2380 else if (from == stop)
2381 break;
195d1361 2382 }
01f44d5a 2383 if (fence_found == 0)
195d1361
RS
2384 {
2385 from = ini; /* Set point to ini + 1. */
6a140a74 2386 from_byte = ini_byte;
195d1361
RS
2387 goto leave;
2388 }
39c41ad4
SM
2389 else
2390 /* We have skipped one comment. */
2391 break;
195d1361
RS
2392 }
2393 else if (code == Sendcomment)
b3cfe0c8 2394 {
95ff8dfc 2395 found = back_comment (from, from_byte, stop, comnested, comstyle,
6a140a74 2396 &out_charpos, &out_bytepos);
1aa963c8
SM
2397 if (found == -1)
2398 {
abf8a9ff
SM
2399 if (c == '\n')
2400 /* This end-of-line is not an end-of-comment.
2401 Treat it like a whitespace.
2402 CC-mode (and maybe others) relies on this behavior. */
2403 ;
2404 else
2405 {
2406 /* Failure: we should go back to the end of this
2407 not-quite-endcomment. */
c5683ceb 2408 if (SYNTAX (c) != code)
abf8a9ff
SM
2409 /* It was a two-char Sendcomment. */
2410 INC_BOTH (from, from_byte);
2411 goto leave;
2412 }
1aa963c8 2413 }
2b927d02 2414 else
abf8a9ff
SM
2415 {
2416 /* We have skipped one comment. */
2417 from = out_charpos, from_byte = out_bytepos;
2418 break;
2419 }
b3cfe0c8 2420 }
bb0de084 2421 else if (code != Swhitespace || quoted)
b3cfe0c8 2422 {
195d1361 2423 leave:
b3cfe0c8 2424 immediate_quit = 0;
6a140a74 2425 INC_BOTH (from, from_byte);
ef316cf0 2426 SET_PT_BOTH (from, from_byte);
b3cfe0c8
RS
2427 return Qnil;
2428 }
2429 }
2430
840f481c 2431 count1++;
b3cfe0c8
RS
2432 }
2433
ef316cf0 2434 SET_PT_BOTH (from, from_byte);
b3cfe0c8
RS
2435 immediate_quit = 0;
2436 return Qt;
2437}
2438\f
b7dbcc19 2439/* Return syntax code of character C if C is an ASCII character
db9f48c3 2440 or `multibyte_symbol_p' is zero. Otherwise, return Ssymbol. */
bd25db08 2441
b7dbcc19
KH
2442#define SYNTAX_WITH_MULTIBYTE_CHECK(c) \
2443 ((ASCII_CHAR_P (c) || !multibyte_symbol_p) \
bd25db08
KH
2444 ? SYNTAX (c) : Ssymbol)
2445
6a140a74 2446static Lisp_Object
971de7fb 2447scan_lists (register EMACS_INT from, EMACS_INT count, EMACS_INT depth, int sexpflag)
8489eb67
RS
2448{
2449 Lisp_Object val;
932e6895 2450 register EMACS_INT stop = count > 0 ? ZV : BEGV;
93da5fff
KH
2451 register int c, c1;
2452 int stringterm;
8489eb67
RS
2453 int quoted;
2454 int mathexit = 0;
93da5fff 2455 register enum syntaxcode code, temp_code;
195d1361 2456 int min_depth = depth; /* Err out if depth gets less than this. */
e5d4f4dc 2457 int comstyle = 0; /* style of comment encountered */
95ff8dfc 2458 int comnested = 0; /* whether the comment is nestable or not */
932e6895
SM
2459 EMACS_INT temp_pos;
2460 EMACS_INT last_good = from;
195d1361 2461 int found;
932e6895
SM
2462 EMACS_INT from_byte;
2463 EMACS_INT out_bytepos, out_charpos;
95ff8dfc 2464 int temp, dummy;
bd25db08 2465 int multibyte_symbol_p = sexpflag && multibyte_syntax_as_symbol;
8489eb67
RS
2466
2467 if (depth > 0) min_depth = 0;
2468
c3907a7e
RS
2469 if (from > ZV) from = ZV;
2470 if (from < BEGV) from = BEGV;
2471
2472 from_byte = CHAR_TO_BYTE (from);
2473
8489eb67
RS
2474 immediate_quit = 1;
2475 QUIT;
2476
195d1361 2477 SETUP_SYNTAX_TABLE (from, count);
8489eb67
RS
2478 while (count > 0)
2479 {
8489eb67
RS
2480 while (from < stop)
2481 {
c5683ceb 2482 int comstart_first, prefix, syntax, other_syntax;
195d1361 2483 UPDATE_SYNTAX_TABLE_FORWARD (from);
b7dbcc19 2484 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
c5683ceb 2485 syntax = SYNTAX_WITH_FLAGS (c);
bd25db08 2486 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
c5683ceb
SM
2487 comstart_first = SYNTAX_FLAGS_COMSTART_FIRST (syntax);
2488 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2489 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2490 prefix = SYNTAX_FLAGS_PREFIX (syntax);
7bf5e9e4
RS
2491 if (depth == min_depth)
2492 last_good = from;
6a140a74 2493 INC_BOTH (from, from_byte);
195d1361 2494 UPDATE_SYNTAX_TABLE_FORWARD (from);
f902a008 2495 if (from < stop && comstart_first
327719ee 2496 && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
c5683ceb
SM
2497 other_syntax = SYNTAX_WITH_FLAGS (c),
2498 SYNTAX_FLAGS_COMSTART_SECOND (other_syntax))
8489eb67 2499 && parse_sexp_ignore_comments)
e5d4f4dc 2500 {
7d0393cf 2501 /* we have encountered a comment start sequence and we
195d1361 2502 are ignoring all text inside comments. We must record
e5d4f4dc
RS
2503 the comment style this sequence begins so that later,
2504 only a comment end of the same style actually ends
2505 the comment section */
2506 code = Scomment;
c5683ceb
SM
2507 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2508 comnested
2509 = comnested || SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
6a140a74 2510 INC_BOTH (from, from_byte);
f902a008 2511 UPDATE_SYNTAX_TABLE_FORWARD (from);
e5d4f4dc 2512 }
7d0393cf 2513
f902a008 2514 if (prefix)
8489eb67
RS
2515 continue;
2516
0220c518 2517 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
2518 {
2519 case Sescape:
2520 case Scharquote:
8b8bf8e6
SM
2521 if (from == stop)
2522 goto lose;
6a140a74 2523 INC_BOTH (from, from_byte);
8489eb67
RS
2524 /* treat following character as a word constituent */
2525 case Sword:
2526 case Ssymbol:
2527 if (depth || !sexpflag) break;
195d1361 2528 /* This word counts as a sexp; return at end of it. */
8489eb67
RS
2529 while (from < stop)
2530 {
195d1361 2531 UPDATE_SYNTAX_TABLE_FORWARD (from);
7c7ca3d2
RS
2532
2533 /* Some compilers can't handle this inside the switch. */
b7dbcc19 2534 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
bd25db08 2535 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
7c7ca3d2 2536 switch (temp)
8489eb67
RS
2537 {
2538 case Scharquote:
2539 case Sescape:
6a140a74 2540 INC_BOTH (from, from_byte);
8b8bf8e6
SM
2541 if (from == stop)
2542 goto lose;
8489eb67
RS
2543 break;
2544 case Sword:
2545 case Ssymbol:
2546 case Squote:
2547 break;
2548 default:
2549 goto done;
2550 }
6a140a74 2551 INC_BOTH (from, from_byte);
8489eb67
RS
2552 }
2553 goto done;
2554
195d1361 2555 case Scomment_fence:
95ff8dfc
RS
2556 comstyle = ST_COMMENT_STYLE;
2557 /* FALLTHROUGH */
2558 case Scomment:
8489eb67 2559 if (!parse_sexp_ignore_comments) break;
95ff8dfc
RS
2560 UPDATE_SYNTAX_TABLE_FORWARD (from);
2561 found = forw_comment (from, from_byte, stop,
2562 comnested, comstyle, 0,
2563 &out_charpos, &out_bytepos, &dummy);
2564 from = out_charpos, from_byte = out_bytepos;
2565 if (!found)
8489eb67 2566 {
95ff8dfc
RS
2567 if (depth == 0)
2568 goto done;
2569 goto lose;
8489eb67 2570 }
95ff8dfc
RS
2571 INC_BOTH (from, from_byte);
2572 UPDATE_SYNTAX_TABLE_FORWARD (from);
8489eb67
RS
2573 break;
2574
2575 case Smath:
2576 if (!sexpflag)
2577 break;
b7dbcc19 2578 if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (from_byte))
6a140a74
RS
2579 {
2580 INC_BOTH (from, from_byte);
2581 }
8489eb67
RS
2582 if (mathexit)
2583 {
2584 mathexit = 0;
2585 goto close1;
2586 }
2587 mathexit = 1;
2588
2589 case Sopen:
2590 if (!++depth) goto done;
2591 break;
2592
2593 case Sclose:
2594 close1:
2595 if (!--depth) goto done;
2596 if (depth < min_depth)
9471945b
KS
2597 xsignal3 (Qscan_error,
2598 build_string ("Containing expression ends prematurely"),
2599 make_number (last_good), make_number (from));
8489eb67
RS
2600 break;
2601
2602 case Sstring:
195d1361 2603 case Sstring_fence:
ef316cf0 2604 temp_pos = dec_bytepos (from_byte);
b7dbcc19 2605 stringterm = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
8489eb67
RS
2606 while (1)
2607 {
8b8bf8e6
SM
2608 if (from >= stop)
2609 goto lose;
195d1361 2610 UPDATE_SYNTAX_TABLE_FORWARD (from);
b7dbcc19 2611 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
42ebfa31
SM
2612 if (code == Sstring
2613 ? (c == stringterm
2614 && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
bd25db08 2615 : SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring_fence)
195d1361 2616 break;
7c7ca3d2
RS
2617
2618 /* Some compilers can't handle this inside the switch. */
bd25db08 2619 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
7c7ca3d2 2620 switch (temp)
8489eb67
RS
2621 {
2622 case Scharquote:
2623 case Sescape:
6a140a74 2624 INC_BOTH (from, from_byte);
8489eb67 2625 }
6a140a74 2626 INC_BOTH (from, from_byte);
8489eb67 2627 }
6a140a74 2628 INC_BOTH (from, from_byte);
8489eb67
RS
2629 if (!depth && sexpflag) goto done;
2630 break;
240c43e8
SM
2631 default:
2632 /* Ignore whitespace, punctuation, quote, endcomment. */
2633 break;
8489eb67
RS
2634 }
2635 }
2636
2637 /* Reached end of buffer. Error if within object, return nil if between */
8b8bf8e6
SM
2638 if (depth)
2639 goto lose;
8489eb67
RS
2640
2641 immediate_quit = 0;
2642 return Qnil;
2643
2644 /* End of object reached */
2645 done:
2646 count--;
2647 }
2648
2649 while (count < 0)
2650 {
8489eb67
RS
2651 while (from > stop)
2652 {
c5683ceb 2653 int syntax;
6a140a74 2654 DEC_BOTH (from, from_byte);
195d1361 2655 UPDATE_SYNTAX_TABLE_BACKWARD (from);
b7dbcc19 2656 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
c5683ceb 2657 syntax= SYNTAX_WITH_FLAGS (c);
bd25db08 2658 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
7bf5e9e4
RS
2659 if (depth == min_depth)
2660 last_good = from;
7fc8191e 2661 comstyle = 0;
c5683ceb 2662 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
7fc8191e 2663 if (code == Sendcomment)
c5683ceb
SM
2664 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2665 if (from > stop && SYNTAX_FLAGS_COMEND_SECOND (syntax)
1674d9a2 2666 && prev_char_comend_first (from, from_byte)
8489eb67 2667 && parse_sexp_ignore_comments)
e5d4f4dc 2668 {
f902a008 2669 /* We must record the comment style encountered so that
e5d4f4dc 2670 later, we can match only the proper comment begin
f902a008 2671 sequence of the same style. */
01f44d5a 2672 int c2, other_syntax;
6a140a74 2673 DEC_BOTH (from, from_byte);
f902a008
RS
2674 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2675 code = Sendcomment;
01f44d5a
PE
2676 c2 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2677 other_syntax = SYNTAX_WITH_FLAGS (c2);
c5683ceb
SM
2678 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2679 comnested
2680 = comnested || SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
e5d4f4dc 2681 }
7d0393cf 2682
9828a477 2683 /* Quoting turns anything except a comment-ender
e6365855 2684 into a word character. Note that this cannot be true
f902a008 2685 if we decremented FROM in the if-statement above. */
9828a477 2686 if (code != Sendcomment && char_quoted (from, from_byte))
240c43e8
SM
2687 {
2688 DEC_BOTH (from, from_byte);
2689 code = Sword;
2690 }
c5683ceb 2691 else if (SYNTAX_FLAGS_PREFIX (syntax))
8489eb67
RS
2692 continue;
2693
9828a477 2694 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
2695 {
2696 case Sword:
2697 case Ssymbol:
9828a477
KH
2698 case Sescape:
2699 case Scharquote:
8489eb67 2700 if (depth || !sexpflag) break;
195d1361
RS
2701 /* This word counts as a sexp; count object finished
2702 after passing it. */
8489eb67
RS
2703 while (from > stop)
2704 {
6a140a74 2705 temp_pos = from_byte;
4b4deea2 2706 if (! NILP (BVAR (current_buffer, enable_multibyte_characters)))
ef316cf0
RS
2707 DEC_POS (temp_pos);
2708 else
2709 temp_pos--;
6a140a74 2710 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
b7dbcc19 2711 c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
bd25db08 2712 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
9828a477
KH
2713 /* Don't allow comment-end to be quoted. */
2714 if (temp_code == Sendcomment)
2715 goto done2;
6a140a74 2716 quoted = char_quoted (from - 1, temp_pos);
8489eb67 2717 if (quoted)
93da5fff 2718 {
6a140a74 2719 DEC_BOTH (from, from_byte);
ef316cf0 2720 temp_pos = dec_bytepos (temp_pos);
6a140a74 2721 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
93da5fff 2722 }
b7dbcc19 2723 c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
bd25db08 2724 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
93da5fff
KH
2725 if (! (quoted || temp_code == Sword
2726 || temp_code == Ssymbol
2727 || temp_code == Squote))
8489eb67 2728 goto done2;
6a140a74 2729 DEC_BOTH (from, from_byte);
8489eb67
RS
2730 }
2731 goto done2;
2732
2733 case Smath:
2734 if (!sexpflag)
2735 break;
ef316cf0 2736 temp_pos = dec_bytepos (from_byte);
6a140a74 2737 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
b7dbcc19 2738 if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (temp_pos))
6a140a74 2739 DEC_BOTH (from, from_byte);
8489eb67
RS
2740 if (mathexit)
2741 {
2742 mathexit = 0;
2743 goto open2;
2744 }
2745 mathexit = 1;
2746
2747 case Sclose:
2748 if (!++depth) goto done2;
2749 break;
2750
2751 case Sopen:
2752 open2:
2753 if (!--depth) goto done2;
2754 if (depth < min_depth)
9471945b
KS
2755 xsignal3 (Qscan_error,
2756 build_string ("Containing expression ends prematurely"),
2757 make_number (last_good), make_number (from));
8489eb67
RS
2758 break;
2759
2760 case Sendcomment:
2761 if (!parse_sexp_ignore_comments)
2762 break;
95ff8dfc 2763 found = back_comment (from, from_byte, stop, comnested, comstyle,
6a140a74 2764 &out_charpos, &out_bytepos);
1aa963c8
SM
2765 /* FIXME: if found == -1, then it really wasn't a comment-end.
2766 For single-char Sendcomment, we can't do much about it apart
2767 from skipping the char.
2768 For 2-char endcomments, we could try again, taking both
2769 chars as separate entities, but it's a lot of trouble
2770 for very little gain, so we don't bother either. -sm */
6a140a74
RS
2771 if (found != -1)
2772 from = out_charpos, from_byte = out_bytepos;
8489eb67
RS
2773 break;
2774
195d1361
RS
2775 case Scomment_fence:
2776 case Sstring_fence:
2777 while (1)
2778 {
8b8bf8e6
SM
2779 if (from == stop)
2780 goto lose;
6b61353c 2781 DEC_BOTH (from, from_byte);
195d1361 2782 UPDATE_SYNTAX_TABLE_BACKWARD (from);
7d0393cf 2783 if (!char_quoted (from, from_byte)
b7dbcc19 2784 && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
bd25db08 2785 SYNTAX_WITH_MULTIBYTE_CHECK (c) == code))
195d1361
RS
2786 break;
2787 }
2788 if (code == Sstring_fence && !depth && sexpflag) goto done2;
2789 break;
7d0393cf 2790
8489eb67 2791 case Sstring:
b7dbcc19 2792 stringterm = FETCH_CHAR_AS_MULTIBYTE (from_byte);
8489eb67
RS
2793 while (1)
2794 {
8b8bf8e6
SM
2795 if (from == stop)
2796 goto lose;
6b61353c
KH
2797 DEC_BOTH (from, from_byte);
2798 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2799 if (!char_quoted (from, from_byte)
2800 && (stringterm
2801 == (c = FETCH_CHAR_AS_MULTIBYTE (from_byte)))
42ebfa31 2802 && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
8489eb67 2803 break;
8489eb67 2804 }
8489eb67
RS
2805 if (!depth && sexpflag) goto done2;
2806 break;
240c43e8
SM
2807 default:
2808 /* Ignore whitespace, punctuation, quote, endcomment. */
2809 break;
8489eb67
RS
2810 }
2811 }
2812
2813 /* Reached start of buffer. Error if within object, return nil if between */
8b8bf8e6
SM
2814 if (depth)
2815 goto lose;
8489eb67
RS
2816
2817 immediate_quit = 0;
2818 return Qnil;
2819
2820 done2:
2821 count++;
2822 }
2823
2824
2825 immediate_quit = 0;
1e142fb7 2826 XSETFASTINT (val, from);
8489eb67
RS
2827 return val;
2828
2829 lose:
9471945b
KS
2830 xsignal3 (Qscan_error,
2831 build_string ("Unbalanced parentheses"),
2832 make_number (last_good), make_number (from));
8489eb67
RS
2833}
2834
8489eb67 2835DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
fdb82f93
PJ
2836 doc: /* Scan from character number FROM by COUNT lists.
2837Returns the character number of the position thus found.
2838
2839If DEPTH is nonzero, paren depth begins counting from that value,
2840only places where the depth in parentheses becomes zero
2841are candidates for stopping; COUNT such places are counted.
2842Thus, a positive value for DEPTH means go out levels.
2843
2844Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2845
2846If the beginning or end of (the accessible part of) the buffer is reached
2847and the depth is wrong, an error is signaled.
2848If the depth is right but the count is not used up, nil is returned. */)
5842a27b 2849 (Lisp_Object from, Lisp_Object count, Lisp_Object depth)
8489eb67 2850{
b7826503
PJ
2851 CHECK_NUMBER (from);
2852 CHECK_NUMBER (count);
2853 CHECK_NUMBER (depth);
8489eb67
RS
2854
2855 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
2856}
2857
2858DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
fdb82f93
PJ
2859 doc: /* Scan from character number FROM by COUNT balanced expressions.
2860If COUNT is negative, scan backwards.
2861Returns the character number of the position thus found.
2862
2863Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2864
2865If the beginning or end of (the accessible part of) the buffer is reached
2866in the middle of a parenthetical grouping, an error is signaled.
2867If the beginning or end is reached between groupings
2868but before count is used up, nil is returned. */)
5842a27b 2869 (Lisp_Object from, Lisp_Object count)
8489eb67 2870{
b7826503
PJ
2871 CHECK_NUMBER (from);
2872 CHECK_NUMBER (count);
8489eb67
RS
2873
2874 return scan_lists (XINT (from), XINT (count), 0, 1);
2875}
2876
2877DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
fdb82f93
PJ
2878 0, 0, 0,
2879 doc: /* Move point backward over any number of chars with prefix syntax.
2880This includes chars with "quote" or "prefix" syntax (' or p). */)
5842a27b 2881 (void)
8489eb67 2882{
4f3a2f8d
EZ
2883 EMACS_INT beg = BEGV;
2884 EMACS_INT opoint = PT;
2885 EMACS_INT opoint_byte = PT_BYTE;
2886 EMACS_INT pos = PT;
2887 EMACS_INT pos_byte = PT_BYTE;
93da5fff 2888 int c;
93da5fff 2889
7d0393cf 2890 if (pos <= beg)
195d1361 2891 {
f902a008
RS
2892 SET_PT_BOTH (opoint, opoint_byte);
2893
2894 return Qnil;
195d1361 2895 }
8489eb67 2896
f902a008
RS
2897 SETUP_SYNTAX_TABLE (pos, -1);
2898
6a140a74
RS
2899 DEC_BOTH (pos, pos_byte);
2900
1fd3172d 2901 while (!char_quoted (pos, pos_byte)
195d1361 2902 /* Previous statement updates syntax table. */
b7dbcc19 2903 && ((c = FETCH_CHAR_AS_MULTIBYTE (pos_byte), SYNTAX (c) == Squote)
93da5fff
KH
2904 || SYNTAX_PREFIX (c)))
2905 {
1fd3172d
RS
2906 opoint = pos;
2907 opoint_byte = pos_byte;
2908
2909 if (pos + 1 > beg)
2910 DEC_BOTH (pos, pos_byte);
93da5fff 2911 }
8489eb67 2912
6a140a74 2913 SET_PT_BOTH (opoint, opoint_byte);
8489eb67
RS
2914
2915 return Qnil;
2916}
2917\f
6a140a74 2918/* Parse forward from FROM / FROM_BYTE to END,
e5d4f4dc
RS
2919 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
2920 and return a description of the state of the parse at END.
c81a3712 2921 If STOPBEFORE is nonzero, stop at the start of an atom.
644ea4df
RS
2922 If COMMENTSTOP is 1, stop at the start of a comment.
2923 If COMMENTSTOP is -1, stop at the start or end of a comment,
2924 after the beginning of a string, or after the end of a string. */
8489eb67 2925
340f92b5 2926static void
d5a3eaaf
AS
2927scan_sexps_forward (struct lisp_parse_state *stateptr,
2928 EMACS_INT from, EMACS_INT from_byte, EMACS_INT end,
2929 int targetdepth, int stopbefore,
2930 Lisp_Object oldstate, int commentstop)
8489eb67
RS
2931{
2932 struct lisp_parse_state state;
2933
2934 register enum syntaxcode code;
95ff8dfc
RS
2935 int c1;
2936 int comnested;
8489eb67
RS
2937 struct level { int last, prev; };
2938 struct level levelstart[100];
2939 register struct level *curlevel = levelstart;
2940 struct level *endlevel = levelstart + 100;
8489eb67
RS
2941 register int depth; /* Paren depth of current scanning location.
2942 level - levelstart equals this except
2943 when the depth becomes negative. */
2944 int mindepth; /* Lowest DEPTH value seen. */
2945 int start_quoted = 0; /* Nonzero means starting after a char quote */
2946 Lisp_Object tem;
8e2911c2
AS
2947 EMACS_INT prev_from; /* Keep one character before FROM. */
2948 EMACS_INT prev_from_byte;
1fd3172d 2949 int prev_from_syntax;
195d1361
RS
2950 int boundary_stop = commentstop == -1;
2951 int nofence;
95ff8dfc 2952 int found;
60c86a83 2953 EMACS_INT out_bytepos, out_charpos;
7c7ca3d2 2954 int temp;
93da5fff
KH
2955
2956 prev_from = from;
6a140a74
RS
2957 prev_from_byte = from_byte;
2958 if (from != BEGV)
2959 DEC_BOTH (prev_from, prev_from_byte);
93da5fff
KH
2960
2961 /* Use this macro instead of `from++'. */
6a140a74
RS
2962#define INC_FROM \
2963do { prev_from = from; \
2964 prev_from_byte = from_byte; \
327719ee 2965 temp = FETCH_CHAR_AS_MULTIBYTE (prev_from_byte); \
ab229fdd 2966 prev_from_syntax = SYNTAX_WITH_FLAGS (temp); \
ef316cf0 2967 INC_BOTH (from, from_byte); \
3ceb4629
SM
2968 if (from < end) \
2969 UPDATE_SYNTAX_TABLE_FORWARD (from); \
6a140a74 2970 } while (0)
8489eb67
RS
2971
2972 immediate_quit = 1;
2973 QUIT;
2974
265a9e55 2975 if (NILP (oldstate))
8489eb67
RS
2976 {
2977 depth = 0;
2978 state.instring = -1;
2979 state.incomment = 0;
195d1361
RS
2980 state.comstyle = 0; /* comment style a by default. */
2981 state.comstr_start = -1; /* no comment/string seen. */
8489eb67
RS
2982 }
2983 else
2984 {
2985 tem = Fcar (oldstate);
265a9e55 2986 if (!NILP (tem))
8489eb67
RS
2987 depth = XINT (tem);
2988 else
2989 depth = 0;
2990
2991 oldstate = Fcdr (oldstate);
2992 oldstate = Fcdr (oldstate);
2993 oldstate = Fcdr (oldstate);
2994 tem = Fcar (oldstate);
195d1361 2995 /* Check whether we are inside string_fence-style string: */
7d0393cf
JB
2996 state.instring = (!NILP (tem)
2997 ? (INTEGERP (tem) ? XINT (tem) : ST_STRING_STYLE)
e76f1c44 2998 : -1);
8489eb67
RS
2999
3000 oldstate = Fcdr (oldstate);
3001 tem = Fcar (oldstate);
e76f1c44
GM
3002 state.incomment = (!NILP (tem)
3003 ? (INTEGERP (tem) ? XINT (tem) : -1)
95ff8dfc 3004 : 0);
8489eb67
RS
3005
3006 oldstate = Fcdr (oldstate);
3007 tem = Fcar (oldstate);
265a9e55 3008 start_quoted = !NILP (tem);
e5d4f4dc 3009
95ff8dfc 3010 /* if the eighth element of the list is nil, we are in comment
195d1361
RS
3011 style a. If it is non-nil, we are in comment style b */
3012 oldstate = Fcdr (oldstate);
e5d4f4dc 3013 oldstate = Fcdr (oldstate);
195d1361 3014 tem = Fcar (oldstate);
c5683ceb
SM
3015 state.comstyle = (NILP (tem)
3016 ? 0
3017 : (EQ (tem, Qsyntax_table)
3018 ? ST_COMMENT_STYLE
3019 : INTEGERP (tem) ? XINT (tem) : 1));
195d1361 3020
e5d4f4dc 3021 oldstate = Fcdr (oldstate);
e5d4f4dc 3022 tem = Fcar (oldstate);
195d1361 3023 state.comstr_start = NILP (tem) ? -1 : XINT (tem) ;
1a102a58
RS
3024 oldstate = Fcdr (oldstate);
3025 tem = Fcar (oldstate);
3026 while (!NILP (tem)) /* >= second enclosing sexps. */
3027 {
3028 /* curlevel++->last ran into compiler bug on Apollo */
3029 curlevel->last = XINT (Fcar (tem));
3030 if (++curlevel == endlevel)
02010917 3031 curlevel--; /* error ("Nesting too deep for parser"); */
1a102a58
RS
3032 curlevel->prev = -1;
3033 curlevel->last = -1;
3034 tem = Fcdr (tem);
3035 }
8489eb67
RS
3036 }
3037 state.quoted = 0;
3038 mindepth = depth;
3039
3040 curlevel->prev = -1;
3041 curlevel->last = -1;
3042
326b283b 3043 SETUP_SYNTAX_TABLE (prev_from, 1);
ab229fdd
AS
3044 temp = FETCH_CHAR (prev_from_byte);
3045 prev_from_syntax = SYNTAX_WITH_FLAGS (temp);
3ceb4629 3046 UPDATE_SYNTAX_TABLE_FORWARD (from);
326b283b 3047
195d1361 3048 /* Enter the loop at a place appropriate for initial state. */
8489eb67 3049
326b283b
RS
3050 if (state.incomment)
3051 goto startincomment;
8489eb67
RS
3052 if (state.instring >= 0)
3053 {
195d1361 3054 nofence = state.instring != ST_STRING_STYLE;
326b283b
RS
3055 if (start_quoted)
3056 goto startquotedinstring;
8489eb67
RS
3057 goto startinstring;
3058 }
326b283b
RS
3059 else if (start_quoted)
3060 goto startquoted;
1fd3172d 3061
8489eb67
RS
3062 while (from < end)
3063 {
c5683ceb 3064 int syntax;
93da5fff 3065 INC_FROM;
1fd3172d 3066 code = prev_from_syntax & 0xff;
4c920633 3067
c5d4b96f
SM
3068 if (from < end
3069 && SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax)
3070 && (c1 = FETCH_CHAR (from_byte),
c5683ceb
SM
3071 syntax = SYNTAX_WITH_FLAGS (c1),
3072 SYNTAX_FLAGS_COMSTART_SECOND (syntax)))
c5d4b96f
SM
3073 /* Duplicate code to avoid a complex if-expression
3074 which causes trouble for the SGI compiler. */
95ff8dfc 3075 {
c5d4b96f
SM
3076 /* Record the comment style we have entered so that only
3077 the comment-end sequence of the same style actually
3078 terminates the comment section. */
c5683ceb
SM
3079 state.comstyle
3080 = SYNTAX_FLAGS_COMMENT_STYLE (syntax, prev_from_syntax);
c5d4b96f 3081 comnested = SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax);
c5683ceb 3082 comnested = comnested || SYNTAX_FLAGS_COMMENT_NESTED (syntax);
c5d4b96f 3083 state.incomment = comnested ? 1 : -1;
95ff8dfc 3084 state.comstr_start = prev_from;
c5d4b96f
SM
3085 INC_FROM;
3086 code = Scomment;
95ff8dfc 3087 }
4c920633 3088 else if (code == Scomment_fence)
e5d4f4dc
RS
3089 {
3090 /* Record the comment style we have entered so that only
3091 the comment-end sequence of the same style actually
3092 terminates the comment section. */
95ff8dfc
RS
3093 state.comstyle = ST_COMMENT_STYLE;
3094 state.incomment = -1;
195d1361 3095 state.comstr_start = prev_from;
e5d4f4dc 3096 code = Scomment;
e5d4f4dc 3097 }
c5d4b96f
SM
3098 else if (code == Scomment)
3099 {
c5683ceb 3100 state.comstyle = SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax, 0);
c5d4b96f
SM
3101 state.incomment = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax) ?
3102 1 : -1);
3103 state.comstr_start = prev_from;
3104 }
e5d4f4dc 3105
1fd3172d 3106 if (SYNTAX_FLAGS_PREFIX (prev_from_syntax))
8489eb67 3107 continue;
0220c518 3108 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
3109 {
3110 case Sescape:
3111 case Scharquote:
3112 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 3113 curlevel->last = prev_from;
8489eb67
RS
3114 startquoted:
3115 if (from == end) goto endquoted;
93da5fff 3116 INC_FROM;
8489eb67
RS
3117 goto symstarted;
3118 /* treat following character as a word constituent */
3119 case Sword:
3120 case Ssymbol:
3121 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 3122 curlevel->last = prev_from;
8489eb67
RS
3123 symstarted:
3124 while (from < end)
3125 {
7c7ca3d2 3126 /* Some compilers can't handle this inside the switch. */
327719ee 3127 temp = FETCH_CHAR_AS_MULTIBYTE (from_byte);
ab229fdd 3128 temp = SYNTAX (temp);
7c7ca3d2 3129 switch (temp)
8489eb67
RS
3130 {
3131 case Scharquote:
3132 case Sescape:
93da5fff 3133 INC_FROM;
8489eb67
RS
3134 if (from == end) goto endquoted;
3135 break;
3136 case Sword:
3137 case Ssymbol:
3138 case Squote:
3139 break;
3140 default:
3141 goto symdone;
3142 }
93da5fff 3143 INC_FROM;
8489eb67
RS
3144 }
3145 symdone:
3146 curlevel->prev = curlevel->last;
3147 break;
3148
240c43e8 3149 case Scomment_fence: /* Can't happen because it's handled above. */
8489eb67 3150 case Scomment:
195d1361 3151 if (commentstop || boundary_stop) goto done;
d355bd8a
SM
3152 startincomment:
3153 /* The (from == BEGV) test was to enter the loop in the middle so
95ff8dfc 3154 that we find a 2-char comment ender even if we start in the
e6365855 3155 middle of it. We don't want to do that if we're just at the
d355bd8a 3156 beginning of the comment (think of (*) ... (*)). */
95ff8dfc
RS
3157 found = forw_comment (from, from_byte, end,
3158 state.incomment, state.comstyle,
e6365855
SM
3159 (from == BEGV || from < state.comstr_start + 3)
3160 ? 0 : prev_from_syntax,
95ff8dfc
RS
3161 &out_charpos, &out_bytepos, &state.incomment);
3162 from = out_charpos; from_byte = out_bytepos;
3163 /* Beware! prev_from and friends are invalid now.
3164 Luckily, the `done' doesn't use them and the INC_FROM
3165 sets them to a sane value without looking at them. */
3166 if (!found) goto done;
d355bd8a 3167 INC_FROM;
8489eb67 3168 state.incomment = 0;
e5d4f4dc 3169 state.comstyle = 0; /* reset the comment style */
195d1361 3170 if (boundary_stop) goto done;
8489eb67
RS
3171 break;
3172
3173 case Sopen:
3174 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3175 depth++;
3176 /* curlevel++->last ran into compiler bug on Apollo */
93da5fff 3177 curlevel->last = prev_from;
8489eb67 3178 if (++curlevel == endlevel)
02010917 3179 curlevel--; /* error ("Nesting too deep for parser"); */
8489eb67
RS
3180 curlevel->prev = -1;
3181 curlevel->last = -1;
30844415 3182 if (targetdepth == depth) goto done;
8489eb67
RS
3183 break;
3184
3185 case Sclose:
3186 depth--;
3187 if (depth < mindepth)
3188 mindepth = depth;
3189 if (curlevel != levelstart)
3190 curlevel--;
3191 curlevel->prev = curlevel->last;
30844415 3192 if (targetdepth == depth) goto done;
8489eb67
RS
3193 break;
3194
3195 case Sstring:
195d1361
RS
3196 case Sstring_fence:
3197 state.comstr_start = from - 1;
8489eb67 3198 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 3199 curlevel->last = prev_from;
7d0393cf 3200 state.instring = (code == Sstring
b7dbcc19 3201 ? (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte))
195d1361
RS
3202 : ST_STRING_STYLE);
3203 if (boundary_stop) goto done;
8489eb67 3204 startinstring:
195d1361 3205 {
644ea4df 3206 nofence = state.instring != ST_STRING_STYLE;
7d0393cf 3207
644ea4df
RS
3208 while (1)
3209 {
3210 int c;
195d1361 3211
644ea4df 3212 if (from >= end) goto done;
b7dbcc19 3213 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
7c7ca3d2
RS
3214 /* Some compilers can't handle this inside the switch. */
3215 temp = SYNTAX (c);
ccf89641
KH
3216
3217 /* Check TEMP here so that if the char has
3218 a syntax-table property which says it is NOT
3219 a string character, it does not end the string. */
3220 if (nofence && c == state.instring && temp == Sstring)
3221 break;
3222
7c7ca3d2 3223 switch (temp)
644ea4df
RS
3224 {
3225 case Sstring_fence:
3226 if (!nofence) goto string_end;
3227 break;
3228 case Scharquote:
3229 case Sescape:
3230 INC_FROM;
3231 startquotedinstring:
3232 if (from >= end) goto endquoted;
195d1361 3233 }
644ea4df
RS
3234 INC_FROM;
3235 }
195d1361
RS
3236 }
3237 string_end:
8489eb67
RS
3238 state.instring = -1;
3239 curlevel->prev = curlevel->last;
93da5fff 3240 INC_FROM;
195d1361 3241 if (boundary_stop) goto done;
8489eb67
RS
3242 break;
3243
3244 case Smath:
240c43e8
SM
3245 /* FIXME: We should do something with it. */
3246 break;
3247 default:
3248 /* Ignore whitespace, punctuation, quote, endcomment. */
8489eb67
RS
3249 break;
3250 }
3251 }
3252 goto done;
3253
3254 stop: /* Here if stopping before start of sexp. */
93da5fff 3255 from = prev_from; /* We have just fetched the char that starts it; */
8489eb67
RS
3256 goto done; /* but return the position before it. */
3257
3258 endquoted:
3259 state.quoted = 1;
3260 done:
3261 state.depth = depth;
3262 state.mindepth = mindepth;
3263 state.thislevelstart = curlevel->prev;
3264 state.prevlevelstart
3265 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
3266 state.location = from;
1a102a58 3267 state.levelstarts = Qnil;
eb4d412d
PE
3268 while (curlevel > levelstart)
3269 state.levelstarts = Fcons (make_number ((--curlevel)->last),
3270 state.levelstarts);
8489eb67
RS
3271 immediate_quit = 0;
3272
e5d4f4dc 3273 *stateptr = state;
8489eb67
RS
3274}
3275
c81a3712 3276DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
fdb82f93
PJ
3277 doc: /* Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
3278Parsing stops at TO or when certain criteria are met;
3279 point is set to where parsing stops.
3280If fifth arg OLDSTATE is omitted or nil,
3281 parsing assumes that FROM is the beginning of a function.
0e6228bc 3282Value is a list of elements describing final state of parsing:
fdb82f93
PJ
3283 0. depth in parens.
3284 1. character address of start of innermost containing list; nil if none.
3285 2. character address of start of last complete sexp terminated.
3286 3. non-nil if inside a string.
3287 (it is the character that will terminate the string,
3288 or t if the string should be terminated by a generic string delimiter.)
7d0393cf 3289 4. nil if outside a comment, t if inside a non-nestable comment,
fdb82f93
PJ
3290 else an integer (the current comment nesting).
3291 5. t if following a quote character.
3292 6. the minimum paren-depth encountered during this scan.
c5683ceb 3293 7. style of comment, if any.
fdb82f93
PJ
3294 8. character address of start of comment or string; nil if not in one.
3295 9. Intermediate data for continuation of parsing (subject to change).
3296If third arg TARGETDEPTH is non-nil, parsing stops if the depth
3297in parentheses becomes equal to TARGETDEPTH.
3298Fourth arg STOPBEFORE non-nil means stop when come to
3299 any character that starts a sexp.
0e6228bc 3300Fifth arg OLDSTATE is a list like what this function returns.
fdb82f93 3301 It is used to initialize the state of the parse. Elements number 1, 2, 6
43726c05 3302 and 8 are ignored.
fdb82f93
PJ
3303Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.
3304 If it is symbol `syntax-table', stop after the start of a comment or a
3305 string, or after end of a comment or a string. */)
5842a27b 3306 (Lisp_Object from, Lisp_Object to, Lisp_Object targetdepth, Lisp_Object stopbefore, Lisp_Object oldstate, Lisp_Object commentstop)
8489eb67
RS
3307{
3308 struct lisp_parse_state state;
3309 int target;
3310
265a9e55 3311 if (!NILP (targetdepth))
8489eb67 3312 {
b7826503 3313 CHECK_NUMBER (targetdepth);
8489eb67
RS
3314 target = XINT (targetdepth);
3315 }
3316 else
3317 target = -100000; /* We won't reach this depth */
3318
3319 validate_region (&from, &to);
6a140a74
RS
3320 scan_sexps_forward (&state, XINT (from), CHAR_TO_BYTE (XINT (from)),
3321 XINT (to),
c81a3712 3322 target, !NILP (stopbefore), oldstate,
7d0393cf 3323 (NILP (commentstop)
195d1361 3324 ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
8489eb67
RS
3325
3326 SET_PT (state.location);
7d0393cf 3327
8489eb67 3328 return Fcons (make_number (state.depth),
c5683ceb
SM
3329 Fcons (state.prevlevelstart < 0
3330 ? Qnil : make_number (state.prevlevelstart),
3331 Fcons (state.thislevelstart < 0
3332 ? Qnil : make_number (state.thislevelstart),
7d0393cf
JB
3333 Fcons (state.instring >= 0
3334 ? (state.instring == ST_STRING_STYLE
195d1361 3335 ? Qt : make_number (state.instring)) : Qnil,
95ff8dfc
RS
3336 Fcons (state.incomment < 0 ? Qt :
3337 (state.incomment == 0 ? Qnil :
3338 make_number (state.incomment)),
8489eb67 3339 Fcons (state.quoted ? Qt : Qnil,
bff37d2a 3340 Fcons (make_number (state.mindepth),
7d0393cf 3341 Fcons ((state.comstyle
bff37d2a 3342 ? (state.comstyle == ST_COMMENT_STYLE
c5683ceb
SM
3343 ? Qsyntax_table
3344 : make_number (state.comstyle))
3345 : Qnil),
0beaf54f
DL
3346 Fcons (((state.incomment
3347 || (state.instring >= 0))
bff37d2a
RS
3348 ? make_number (state.comstr_start)
3349 : Qnil),
1a102a58 3350 Fcons (state.levelstarts, Qnil))))))))));
8489eb67
RS
3351}
3352\f
dfcf069d 3353void
971de7fb 3354init_syntax_once (void)
8489eb67 3355{
78f9a1f7 3356 register int i, c;
8ea151b2 3357 Lisp_Object temp;
8489eb67 3358
5ebaddf5 3359 /* This has to be done here, before we call Fmake_char_table. */
d67b4f80 3360 Qsyntax_table = intern_c_string ("syntax-table");
5ebaddf5
RS
3361 staticpro (&Qsyntax_table);
3362
d67b4f80 3363 /* Intern_C_String this now in case it isn't already done.
5ebaddf5
RS
3364 Setting this variable twice is harmless.
3365 But don't staticpro it here--that is done in alloc.c. */
d67b4f80 3366 Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
5ebaddf5 3367
93da5fff 3368 /* Create objects which can be shared among syntax tables. */
42ebfa31 3369 Vsyntax_code_object = Fmake_vector (make_number (Smax), Qnil);
93da5fff
KH
3370 for (i = 0; i < XVECTOR (Vsyntax_code_object)->size; i++)
3371 XVECTOR (Vsyntax_code_object)->contents[i]
3372 = Fcons (make_number (i), Qnil);
3373
5ebaddf5
RS
3374 /* Now we are ready to set up this property, so we can
3375 create syntax tables. */
3376 Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
3377
93da5fff 3378 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
8489eb67 3379
5ebaddf5 3380 Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
8489eb67 3381
aa7b08b4
RS
3382 /* Control characters should not be whitespace. */
3383 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
3384 for (i = 0; i <= ' ' - 1; i++)
3385 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3386 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 0177, temp);
3387
3388 /* Except that a few really are whitespace. */
3389 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
3390 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ' ', temp);
3391 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\t', temp);
3392 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\n', temp);
3393 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 015, temp);
3394 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 014, temp);
3395
93da5fff 3396 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
8489eb67 3397 for (i = 'a'; i <= 'z'; i++)
8ea151b2 3398 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
8489eb67 3399 for (i = 'A'; i <= 'Z'; i++)
8ea151b2 3400 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
8489eb67 3401 for (i = '0'; i <= '9'; i++)
8ea151b2
RS
3402 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3403
3404 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
3405 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
3406
3407 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
3408 Fcons (make_number (Sopen), make_number (')')));
3409 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
3410 Fcons (make_number (Sclose), make_number ('(')));
3411 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
3412 Fcons (make_number (Sopen), make_number (']')));
3413 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
3414 Fcons (make_number (Sclose), make_number ('[')));
3415 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
3416 Fcons (make_number (Sopen), make_number ('}')));
3417 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
3418 Fcons (make_number (Sclose), make_number ('{')));
3419 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
3420 Fcons (make_number ((int) Sstring), Qnil));
3421 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
3422 Fcons (make_number ((int) Sescape), Qnil));
3423
93da5fff 3424 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Ssymbol];
8489eb67 3425 for (i = 0; i < 10; i++)
78f9a1f7
KH
3426 {
3427 c = "_-+*/&|<>="[i];
3428 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3429 }
8489eb67 3430
93da5fff 3431 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
8489eb67 3432 for (i = 0; i < 12; i++)
78f9a1f7
KH
3433 {
3434 c = ".,;:?!#@~^'`"[i];
3435 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3436 }
bd25db08
KH
3437
3438 /* All multibyte characters have syntax `word' by default. */
3439 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
5c7b02ab 3440 char_table_set_range (Vstandard_syntax_table, 0x80, MAX_CHAR, temp);
8489eb67
RS
3441}
3442
dfcf069d 3443void
971de7fb 3444syms_of_syntax (void)
8489eb67 3445{
d67b4f80 3446 Qsyntax_table_p = intern_c_string ("syntax-table-p");
8489eb67
RS
3447 staticpro (&Qsyntax_table_p);
3448
93da5fff
KH
3449 staticpro (&Vsyntax_code_object);
3450
a2994c46
KS
3451 staticpro (&gl_state.object);
3452 staticpro (&gl_state.global_code);
3453 staticpro (&gl_state.current_syntax_table);
3454 staticpro (&gl_state.old_prop);
3455
3456 /* Defined in regex.c */
3457 staticpro (&re_match_object);
3458
d67b4f80 3459 Qscan_error = intern_c_string ("scan-error");
7bf5e9e4
RS
3460 staticpro (&Qscan_error);
3461 Fput (Qscan_error, Qerror_conditions,
d67b4f80 3462 pure_cons (Qscan_error, pure_cons (Qerror, Qnil)));
7bf5e9e4 3463 Fput (Qscan_error, Qerror_message,
d67b4f80 3464 make_pure_c_string ("Scan error"));
7bf5e9e4 3465
29208e82 3466 DEFVAR_BOOL ("parse-sexp-ignore-comments", parse_sexp_ignore_comments,
fdb82f93 3467 doc: /* Non-nil means `forward-sexp', etc., should treat comments as whitespace. */);
8489eb67 3468
29208e82 3469 DEFVAR_BOOL ("parse-sexp-lookup-properties", parse_sexp_lookup_properties,
fdb82f93
PJ
3470 doc: /* Non-nil means `forward-sexp', etc., obey `syntax-table' property.
3471Otherwise, that text property is simply ignored.
3472See the info node `(elisp)Syntax Properties' for a description of the
3473`syntax-table' property. */);
195d1361 3474
8489eb67 3475 words_include_escapes = 0;
29208e82 3476 DEFVAR_BOOL ("words-include-escapes", words_include_escapes,
fdb82f93 3477 doc: /* Non-nil means `forward-word', etc., should treat escape chars part of words. */);
8489eb67 3478
29208e82 3479 DEFVAR_BOOL ("multibyte-syntax-as-symbol", multibyte_syntax_as_symbol,
fdb82f93 3480 doc: /* Non-nil means `scan-sexps' treats all multibyte characters as symbol. */);
bd25db08
KH
3481 multibyte_syntax_as_symbol = 0;
3482
f4ed767f 3483 DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
29208e82 3484 open_paren_in_column_0_is_defun_start,
b222e415 3485 doc: /* *Non-nil means an open paren in column 0 denotes the start of a defun. */);
f4ed767f
GM
3486 open_paren_in_column_0_is_defun_start = 1;
3487
8f924df7
KH
3488
3489 DEFVAR_LISP ("find-word-boundary-function-table",
29208e82 3490 Vfind_word_boundary_function_table,
869bb237 3491 doc: /*
8f924df7 3492Char table of functions to search for the word boundary.
869bb237
KH
3493Each function is called with two arguments; POS and LIMIT.
3494POS and LIMIT are character positions in the current buffer.
3495
3496If POS is less than LIMIT, POS is at the first character of a word,
3497and the return value of a function is a position after the last
3498character of that word.
3499
3500If POS is not less than LIMIT, POS is at the last character of a word,
3501and the return value of a function is a position at the first
3502character of that word.
3503
3504In both cases, LIMIT bounds the search. */);
8f924df7 3505 Vfind_word_boundary_function_table = Fmake_char_table (Qnil, Qnil);
869bb237 3506
8489eb67
RS
3507 defsubr (&Ssyntax_table_p);
3508 defsubr (&Ssyntax_table);
3509 defsubr (&Sstandard_syntax_table);
3510 defsubr (&Scopy_syntax_table);
3511 defsubr (&Sset_syntax_table);
3512 defsubr (&Schar_syntax);
beefa22e 3513 defsubr (&Smatching_paren);
c65adb44 3514 defsubr (&Sstring_to_syntax);
8489eb67 3515 defsubr (&Smodify_syntax_entry);
62abe9cb 3516 defsubr (&Sinternal_describe_syntax_value);
8489eb67
RS
3517
3518 defsubr (&Sforward_word);
3519
195d1361
RS
3520 defsubr (&Sskip_chars_forward);
3521 defsubr (&Sskip_chars_backward);
3522 defsubr (&Sskip_syntax_forward);
3523 defsubr (&Sskip_syntax_backward);
3524
b3cfe0c8 3525 defsubr (&Sforward_comment);
8489eb67
RS
3526 defsubr (&Sscan_lists);
3527 defsubr (&Sscan_sexps);
3528 defsubr (&Sbackward_prefix_chars);
3529 defsubr (&Sparse_partial_sexp);
3530}