Declare Lisp_Object Q* variables to be 'static' if not exproted.
[bpt/emacs.git] / src / syntax.c
CommitLineData
8489eb67 1/* GNU Emacs routines to deal with syntax tables; also word and list parsing.
73b0cd50 2 Copyright (C) 1985, 1987, 1993-1995, 1997-1999, 2001-2011
429ab54e 3 Free Software Foundation, Inc.
8489eb67
RS
4
5This file is part of GNU Emacs.
6
9ec0b715 7GNU Emacs is free software: you can redistribute it and/or modify
8489eb67 8it under the terms of the GNU General Public License as published by
9ec0b715
GM
9the Free Software Foundation, either version 3 of the License, or
10(at your option) any later version.
8489eb67
RS
11
12GNU Emacs is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
9ec0b715 18along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
8489eb67
RS
19
20
18160b98 21#include <config.h>
8489eb67 22#include <ctype.h>
d7306fe6 23#include <setjmp.h>
8489eb67
RS
24#include "lisp.h"
25#include "commands.h"
26#include "buffer.h"
5c7b02ab 27#include "character.h"
e35f6ff7 28#include "keymap.h"
a1bc88d4 29#include "regex.h"
195d1361
RS
30
31/* Make syntax table lookup grant data in gl_state. */
32#define SYNTAX_ENTRY_VIA_PROPERTY
33
8489eb67 34#include "syntax.h"
195d1361 35#include "intervals.h"
c5683ceb
SM
36#include "category.h"
37
38/* Then there are seven single-bit flags that have the following meanings:
39 1. This character is the first of a two-character comment-start sequence.
40 2. This character is the second of a two-character comment-start sequence.
41 3. This character is the first of a two-character comment-end sequence.
42 4. This character is the second of a two-character comment-end sequence.
43 5. This character is a prefix, for backward-prefix-chars.
44 6. The char is part of a delimiter for comments of style "b".
45 7. This character is part of a nestable comment sequence.
46 8. The char is part of a delimiter for comments of style "c".
47 Note that any two-character sequence whose first character has flag 1
48 and whose second character has flag 2 will be interpreted as a comment start.
49
50 bit 6 and 8 are used to discriminate between different comment styles.
51 Languages such as C++ allow two orthogonal syntax start/end pairs
52 and bit 6 is used to determine whether a comment-end or Scommentend
53 ends style a or b. Comment markers can start style a, b, c, or bc.
54 Style a is always the default.
55 For 2-char comment markers, the style b flag is only looked up on the second
56 char of the comment marker and on the first char of the comment ender.
57 For style c (like to for the nested flag), the flag can be placed on any
58 one of the chars.
59 */
60
61/* These macros extract specific flags from an integer
62 that holds the syntax code and the flags. */
63
64#define SYNTAX_FLAGS_COMSTART_FIRST(flags) (((flags) >> 16) & 1)
65
66#define SYNTAX_FLAGS_COMSTART_SECOND(flags) (((flags) >> 17) & 1)
67
68#define SYNTAX_FLAGS_COMEND_FIRST(flags) (((flags) >> 18) & 1)
69
70#define SYNTAX_FLAGS_COMEND_SECOND(flags) (((flags) >> 19) & 1)
71
72#define SYNTAX_FLAGS_PREFIX(flags) (((flags) >> 20) & 1)
73
74#define SYNTAX_FLAGS_COMMENT_STYLEB(flags) (((flags) >> 21) & 1)
75#define SYNTAX_FLAGS_COMMENT_STYLEC(flags) (((flags) >> 22) & 2)
76/* FLAGS should be the flags of the main char of the comment marker, e.g.
77 the second for comstart and the first for comend. */
78#define SYNTAX_FLAGS_COMMENT_STYLE(flags, other_flags) \
79 (SYNTAX_FLAGS_COMMENT_STYLEB (flags) \
80 | SYNTAX_FLAGS_COMMENT_STYLEC (flags) \
81 | SYNTAX_FLAGS_COMMENT_STYLEC (other_flags))
82
83#define SYNTAX_FLAGS_COMMENT_NESTED(flags) (((flags) >> 22) & 1)
84
85/* These macros extract a particular flag for a given character. */
86
87#define SYNTAX_COMEND_FIRST(c) \
88 (SYNTAX_FLAGS_COMEND_FIRST (SYNTAX_WITH_FLAGS (c)))
89#define SYNTAX_PREFIX(c) (SYNTAX_FLAGS_PREFIX (SYNTAX_WITH_FLAGS (c)))
195d1361
RS
90
91/* We use these constants in place for comment-style and
92 string-ender-char to distinguish comments/strings started by
93 comment_fence and string_fence codes. */
94
95#define ST_COMMENT_STYLE (256 + 1)
96#define ST_STRING_STYLE (256 + 2)
8489eb67 97
955cbe7b
PE
98static Lisp_Object Qsyntax_table_p;
99static Lisp_Object Qsyntax_table, Qscan_error;
8489eb67 100
8ea151b2
RS
101/* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
102 if not compiled with GCC. No need to mark it, since it is used
103 only very temporarily. */
104Lisp_Object syntax_temp;
105
e5d4f4dc
RS
106/* This is the internal form of the parse state used in parse-partial-sexp. */
107
108struct lisp_parse_state
109 {
6449674e
SM
110 int depth; /* Depth at end of parsing. */
111 int instring; /* -1 if not within string, else desired terminator. */
112 int incomment; /* -1 if in unnestable comment else comment nesting */
113 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
114 int quoted; /* Nonzero if just after an escape char at end of parsing */
115 int mindepth; /* Minimum depth seen while scanning. */
116 /* Char number of most recent start-of-expression at current level */
117 EMACS_INT thislevelstart;
118 /* Char number of start of containing expression */
119 EMACS_INT prevlevelstart;
120 EMACS_INT location; /* Char number at which parsing stopped. */
121 EMACS_INT comstr_start; /* Position of last comment/string starter. */
122 Lisp_Object levelstarts; /* Char numbers of starts-of-expression
123 of levels (starting from outermost). */
e5d4f4dc
RS
124 };
125\f
37bef230
RS
126/* These variables are a cache for finding the start of a defun.
127 find_start_pos is the place for which the defun start was found.
128 find_start_value is the defun start position found for it.
6a140a74 129 find_start_value_byte is the corresponding byte position.
37bef230
RS
130 find_start_buffer is the buffer it was found in.
131 find_start_begv is the BEGV value when it was found.
132 find_start_modiff is the value of MODIFF when it was found. */
133
8e2911c2
AS
134static EMACS_INT find_start_pos;
135static EMACS_INT find_start_value;
136static EMACS_INT find_start_value_byte;
37bef230 137static struct buffer *find_start_buffer;
8e2911c2 138static EMACS_INT find_start_begv;
37bef230 139static int find_start_modiff;
6a140a74
RS
140
141
16a97296 142INFUN (Fsyntax_table_p, 1);
f57e2426
J
143static Lisp_Object skip_chars (int, Lisp_Object, Lisp_Object, int);
144static Lisp_Object skip_syntaxes (int, Lisp_Object, Lisp_Object);
145static Lisp_Object scan_lists (EMACS_INT, EMACS_INT, EMACS_INT, int);
146static void scan_sexps_forward (struct lisp_parse_state *,
147 EMACS_INT, EMACS_INT, EMACS_INT, int,
148 int, Lisp_Object, int);
149static int in_classes (int, Lisp_Object);
195d1361 150\f
c5683ceb
SM
151/* Whether the syntax of the character C has the prefix flag set. */
152int syntax_prefix_flag_p (int c)
153{
154 return SYNTAX_PREFIX (c);
155}
195d1361
RS
156
157struct gl_state_s gl_state; /* Global state of syntax parser. */
158
195d1361
RS
159#define INTERVALS_AT_ONCE 10 /* 1 + max-number of intervals
160 to scan to property-change. */
161
6a140a74
RS
162/* Update gl_state to an appropriate interval which contains CHARPOS. The
163 sign of COUNT give the relative position of CHARPOS wrt the previously
195d1361 164 valid interval. If INIT, only [be]_property fields of gl_state are
6a140a74 165 valid at start, the rest is filled basing on OBJECT.
195d1361 166
6a140a74 167 `gl_state.*_i' are the intervals, and CHARPOS is further in the search
195d1361
RS
168 direction than the intervals - or in an interval. We update the
169 current syntax-table basing on the property of this interval, and
6a140a74 170 update the interval to start further than CHARPOS - or be
195d1361 171 NULL_INTERVAL. We also update lim_property to be the next value of
6a140a74 172 charpos to call this subroutine again - or be before/after the
195d1361
RS
173 start/end of OBJECT. */
174
175void
4f3a2f8d
EZ
176update_syntax_table (EMACS_INT charpos, int count, int init,
177 Lisp_Object object)
195d1361
RS
178{
179 Lisp_Object tmp_table;
12cbf13f
PE
180 unsigned cnt = 0;
181 int invalidate = 1;
5a774522 182 INTERVAL i;
195d1361
RS
183
184 if (init)
185 {
bb0de084 186 gl_state.old_prop = Qnil;
195d1361
RS
187 gl_state.start = gl_state.b_property;
188 gl_state.stop = gl_state.e_property;
bb0de084
SM
189 i = interval_of (charpos, object);
190 gl_state.backward_i = gl_state.forward_i = i;
195d1361
RS
191 invalidate = 0;
192 if (NULL_INTERVAL_P (i))
193 return;
f902a008 194 /* interval_of updates only ->position of the return value, so
d80f4cc7 195 update the parents manually to speed up update_interval. */
bb0de084 196 while (!NULL_PARENT (i))
d80f4cc7
RS
197 {
198 if (AM_RIGHT_CHILD (i))
439d5cb4 199 INTERVAL_PARENT (i)->position = i->position
d80f4cc7 200 - LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */
439d5cb4
KR
201 - TOTAL_LENGTH (INTERVAL_PARENT (i))
202 + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i));
d80f4cc7 203 else
439d5cb4 204 INTERVAL_PARENT (i)->position = i->position - LEFT_TOTAL_LENGTH (i)
d80f4cc7 205 + TOTAL_LENGTH (i);
439d5cb4 206 i = INTERVAL_PARENT (i);
d80f4cc7
RS
207 }
208 i = gl_state.forward_i;
bb0de084 209 gl_state.b_property = i->position - gl_state.offset;
ee0cdb48 210 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361
RS
211 goto update;
212 }
5a774522 213 i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
195d1361 214
423e705d 215 /* We are guaranteed to be called with CHARPOS either in i,
6a140a74 216 or further off. */
195d1361
RS
217 if (NULL_INTERVAL_P (i))
218 error ("Error in syntax_table logic for to-the-end intervals");
6a140a74 219 else if (charpos < i->position) /* Move left. */
195d1361
RS
220 {
221 if (count > 0)
f902a008 222 error ("Error in syntax_table logic for intervals <-");
195d1361 223 /* Update the interval. */
6a140a74 224 i = update_interval (i, charpos);
bb0de084 225 if (INTERVAL_LAST_POS (i) != gl_state.b_property)
195d1361
RS
226 {
227 invalidate = 0;
195d1361 228 gl_state.forward_i = i;
ee0cdb48 229 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361 230 }
423e705d 231 }
6a140a74 232 else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right. */
195d1361
RS
233 {
234 if (count < 0)
f902a008 235 error ("Error in syntax_table logic for intervals ->");
195d1361 236 /* Update the interval. */
6a140a74 237 i = update_interval (i, charpos);
bb0de084 238 if (i->position != gl_state.e_property)
195d1361
RS
239 {
240 invalidate = 0;
195d1361 241 gl_state.backward_i = i;
bb0de084 242 gl_state.b_property = i->position - gl_state.offset;
195d1361
RS
243 }
244 }
195d1361
RS
245
246 update:
247 tmp_table = textget (i->plist, Qsyntax_table);
248
249 if (invalidate)
250 invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
7d0393cf 251
423e705d
SM
252 if (invalidate) /* Did not get to adjacent interval. */
253 { /* with the same table => */
254 /* invalidate the old range. */
195d1361
RS
255 if (count > 0)
256 {
257 gl_state.backward_i = i;
bb0de084
SM
258 gl_state.b_property = i->position - gl_state.offset;
259 }
260 else
195d1361 261 {
bb0de084 262 gl_state.forward_i = i;
ee0cdb48 263 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361
RS
264 }
265 }
37bef230 266
bb0de084 267 if (!EQ (tmp_table, gl_state.old_prop))
195d1361 268 {
bb0de084
SM
269 gl_state.current_syntax_table = tmp_table;
270 gl_state.old_prop = tmp_table;
271 if (EQ (Fsyntax_table_p (tmp_table), Qt))
272 {
273 gl_state.use_global = 0;
7d0393cf 274 }
bb0de084
SM
275 else if (CONSP (tmp_table))
276 {
277 gl_state.use_global = 1;
278 gl_state.global_code = tmp_table;
279 }
7d0393cf 280 else
bb0de084
SM
281 {
282 gl_state.use_global = 0;
4b4deea2 283 gl_state.current_syntax_table = BVAR (current_buffer, syntax_table);
bb0de084 284 }
195d1361
RS
285 }
286
287 while (!NULL_INTERVAL_P (i))
288 {
289 if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
290 {
291 if (count > 0)
bb0de084
SM
292 {
293 gl_state.e_property = i->position - gl_state.offset;
294 gl_state.forward_i = i;
295 }
296 else
297 {
5a774522
SM
298 gl_state.b_property
299 = i->position + LENGTH (i) - gl_state.offset;
bb0de084
SM
300 gl_state.backward_i = i;
301 }
302 return;
195d1361 303 }
7d0393cf 304 else if (cnt == INTERVALS_AT_ONCE)
195d1361
RS
305 {
306 if (count > 0)
bb0de084 307 {
5a774522
SM
308 gl_state.e_property
309 = i->position + LENGTH (i) - gl_state.offset
310 /* e_property at EOB is not set to ZV but to ZV+1, so that
311 we can do INC(from);UPDATE_SYNTAX_TABLE_FORWARD without
312 having to check eob between the two. */
313 + (NULL_INTERVAL_P (next_interval (i)) ? 1 : 0);
bb0de084
SM
314 gl_state.forward_i = i;
315 }
316 else
317 {
318 gl_state.b_property = i->position - gl_state.offset;
319 gl_state.backward_i = i;
320 }
321 return;
195d1361
RS
322 }
323 cnt++;
324 i = count > 0 ? next_interval (i) : previous_interval (i);
325 }
bb0de084
SM
326 eassert (NULL_INTERVAL_P (i)); /* This property goes to the end. */
327 if (count > 0)
328 gl_state.e_property = gl_state.stop;
329 else
330 gl_state.b_property = gl_state.start;
195d1361
RS
331}
332\f
6a140a74
RS
333/* Returns TRUE if char at CHARPOS is quoted.
334 Global syntax-table data should be set up already to be good at CHARPOS
335 or after. On return global syntax data is good for lookup at CHARPOS. */
195d1361
RS
336
337static int
6449674e 338char_quoted (EMACS_INT charpos, EMACS_INT bytepos)
195d1361
RS
339{
340 register enum syntaxcode code;
6449674e 341 register EMACS_INT beg = BEGV;
195d1361 342 register int quoted = 0;
6449674e 343 EMACS_INT orig = charpos;
6a140a74 344
02d8b017 345 while (charpos > beg)
195d1361 346 {
ab229fdd 347 int c;
02d8b017 348 DEC_BOTH (charpos, bytepos);
ab229fdd 349
6a140a74 350 UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
327719ee 351 c = FETCH_CHAR_AS_MULTIBYTE (bytepos);
ab229fdd 352 code = SYNTAX (c);
6a140a74
RS
353 if (! (code == Scharquote || code == Sescape))
354 break;
355
6a140a74 356 quoted = !quoted;
195d1361 357 }
6a140a74
RS
358
359 UPDATE_SYNTAX_TABLE (orig);
195d1361
RS
360 return quoted;
361}
6a140a74 362
6a140a74
RS
363/* Return the bytepos one character before BYTEPOS.
364 We assume that BYTEPOS is not at the start of the buffer. */
365
b6557553 366static INLINE EMACS_INT
971de7fb 367dec_bytepos (EMACS_INT bytepos)
6a140a74 368{
4b4deea2 369 if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
ef316cf0
RS
370 return bytepos - 1;
371
6a140a74
RS
372 DEC_POS (bytepos);
373 return bytepos;
374}
195d1361 375\f
2b34df4e 376/* Return a defun-start position before POS and not too far before.
7cc80f0a
RS
377 It should be the last one before POS, or nearly the last.
378
379 When open_paren_in_column_0_is_defun_start is nonzero,
1fd1cc2f 380 only the beginning of the buffer is treated as a defun-start.
7cc80f0a
RS
381
382 We record the information about where the scan started
383 and what its result was, so that another call in the same area
384 can return the same value very quickly.
195d1361
RS
385
386 There is no promise at which position the global syntax data is
387 valid on return from the subroutine, so the caller should explicitly
388 update the global data. */
37bef230 389
6449674e 390static EMACS_INT
971de7fb 391find_defun_start (EMACS_INT pos, EMACS_INT pos_byte)
37bef230 392{
8e2911c2 393 EMACS_INT opoint = PT, opoint_byte = PT_BYTE;
37bef230 394
1fd1cc2f
RS
395 if (!open_paren_in_column_0_is_defun_start)
396 {
397 find_start_value_byte = BEGV_BYTE;
398 return BEGV;
399 }
400
37bef230
RS
401 /* Use previous finding, if it's valid and applies to this inquiry. */
402 if (current_buffer == find_start_buffer
403 /* Reuse the defun-start even if POS is a little farther on.
404 POS might be in the next defun, but that's ok.
405 Our value may not be the best possible, but will still be usable. */
406 && pos <= find_start_pos + 1000
407 && pos >= find_start_value
408 && BEGV == find_start_begv
409 && MODIFF == find_start_modiff)
410 return find_start_value;
411
412 /* Back up to start of line. */
6a140a74 413 scan_newline (pos, pos_byte, BEGV, BEGV_BYTE, -1, 1);
37bef230 414
195d1361
RS
415 /* We optimize syntax-table lookup for rare updates. Thus we accept
416 only those `^\s(' which are good in global _and_ text-property
417 syntax-tables. */
d48cd3f4 418 SETUP_BUFFER_SYNTAX_TABLE ();
1fd1cc2f 419 while (PT > BEGV)
37bef230 420 {
ab229fdd
AS
421 int c;
422
1fd1cc2f
RS
423 /* Open-paren at start of line means we may have found our
424 defun-start. */
327719ee 425 c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
ab229fdd 426 if (SYNTAX (c) == Sopen)
195d1361 427 {
1fd1cc2f 428 SETUP_SYNTAX_TABLE (PT + 1, -1); /* Try again... */
327719ee 429 c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
ab229fdd 430 if (SYNTAX (c) == Sopen)
1fd1cc2f
RS
431 break;
432 /* Now fallback to the default value. */
d48cd3f4 433 SETUP_BUFFER_SYNTAX_TABLE ();
195d1361 434 }
1fd1cc2f
RS
435 /* Move to beg of previous line. */
436 scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
37bef230
RS
437 }
438
439 /* Record what we found, for the next try. */
6a140a74
RS
440 find_start_value = PT;
441 find_start_value_byte = PT_BYTE;
37bef230
RS
442 find_start_buffer = current_buffer;
443 find_start_modiff = MODIFF;
444 find_start_begv = BEGV;
445 find_start_pos = pos;
446
6a140a74
RS
447 TEMP_SET_PT_BOTH (opoint, opoint_byte);
448
37bef230
RS
449 return find_start_value;
450}
451\f
f902a008
RS
452/* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE. */
453
454static int
4f3a2f8d 455prev_char_comend_first (EMACS_INT pos, EMACS_INT pos_byte)
f902a008
RS
456{
457 int c, val;
458
459 DEC_BOTH (pos, pos_byte);
460 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
461 c = FETCH_CHAR (pos_byte);
462 val = SYNTAX_COMEND_FIRST (c);
463 UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
464 return val;
465}
466
467/* Return the SYNTAX_COMSTART_FIRST of the character before POS, POS_BYTE. */
468
3f679f55
SM
469/* static int
470 * prev_char_comstart_first (pos, pos_byte)
471 * int pos, pos_byte;
472 * {
473 * int c, val;
7d0393cf 474 *
3f679f55
SM
475 * DEC_BOTH (pos, pos_byte);
476 * UPDATE_SYNTAX_TABLE_BACKWARD (pos);
477 * c = FETCH_CHAR (pos_byte);
478 * val = SYNTAX_COMSTART_FIRST (c);
479 * UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
480 * return val;
481 * } */
f902a008 482
6a140a74
RS
483/* Checks whether charpos FROM is at the end of a comment.
484 FROM_BYTE is the bytepos corresponding to FROM.
485 Do not move back before STOP.
486
487 Return a positive value if we find a comment ending at FROM/FROM_BYTE;
488 return -1 otherwise.
489
490 If successful, store the charpos of the comment's beginning
491 into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
527a32d9
KH
492
493 Global syntax data remains valid for backward search starting at
494 the returned value (or at FROM, if the search was not successful). */
195d1361
RS
495
496static int
971de7fb 497back_comment (EMACS_INT from, EMACS_INT from_byte, EMACS_INT stop, int comnested, int comstyle, EMACS_INT *charpos_ptr, EMACS_INT *bytepos_ptr)
195d1361
RS
498{
499 /* Look back, counting the parity of string-quotes,
500 and recording the comment-starters seen.
501 When we reach a safe place, assume that's not in a string;
502 then step the main scan to the earliest comment-starter seen
503 an even number of string quotes away from the safe place.
504
505 OFROM[I] is position of the earliest comment-starter seen
506 which is I+2X quotes from the comment-end.
507 PARITY is current parity of quotes from the comment end. */
3ee5041c 508 int string_style = -1; /* Presumed outside of any string. */
195d1361 509 int string_lossage = 0;
f7c436c1 510 /* Not a real lossage: indicates that we have passed a matching comment
7d0393cf 511 starter plus a non-matching comment-ender, meaning that any matching
f7c436c1
SM
512 comment-starter we might see later could be a false positive (hidden
513 inside another comment).
514 Test case: { a (* b } c (* d *) */
515 int comment_lossage = 0;
8e2911c2
AS
516 EMACS_INT comment_end = from;
517 EMACS_INT comment_end_byte = from_byte;
518 EMACS_INT comstart_pos = 0;
4f63c6bb 519 EMACS_INT comstart_byte IF_LINT (= 0);
eb35b628
RS
520 /* Place where the containing defun starts,
521 or 0 if we didn't come across it yet. */
8e2911c2
AS
522 EMACS_INT defun_start = 0;
523 EMACS_INT defun_start_byte = 0;
195d1361 524 register enum syntaxcode code;
95ff8dfc 525 int nesting = 1; /* current comment nesting */
ea315ed6 526 int c;
3f679f55
SM
527 int syntax = 0;
528
529 /* FIXME: A }} comment-ender style leads to incorrect behavior
530 in the case of {{ c }}} because we ignore the last two chars which are
531 assumed to be comment-enders although they aren't. */
195d1361
RS
532
533 /* At beginning of range to scan, we're outside of strings;
534 that determines quote parity to the comment-end. */
535 while (from != stop)
536 {
4f3a2f8d
EZ
537 EMACS_INT temp_byte;
538 int prev_syntax, com2start, com2end;
fbb3da77 539 int comstart;
6a140a74 540
195d1361 541 /* Move back and examine a character. */
6a140a74 542 DEC_BOTH (from, from_byte);
195d1361
RS
543 UPDATE_SYNTAX_TABLE_BACKWARD (from);
544
3f679f55 545 prev_syntax = syntax;
b7dbcc19 546 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3f679f55 547 syntax = SYNTAX_WITH_FLAGS (c);
195d1361
RS
548 code = SYNTAX (c);
549
3f679f55
SM
550 /* Check for 2-char comment markers. */
551 com2start = (SYNTAX_FLAGS_COMSTART_FIRST (syntax)
552 && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax)
c5683ceb
SM
553 && (comstyle
554 == SYNTAX_FLAGS_COMMENT_STYLE (prev_syntax, syntax))
3f679f55
SM
555 && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax)
556 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested);
557 com2end = (SYNTAX_FLAGS_COMEND_FIRST (syntax)
558 && SYNTAX_FLAGS_COMEND_SECOND (prev_syntax));
fbb3da77 559 comstart = (com2start || code == Scomment);
01f44d5a 560
3f679f55
SM
561 /* Nasty cases with overlapping 2-char comment markers:
562 - snmp-mode: -- c -- foo -- c --
563 --- c --
564 ------ c --
565 - c-mode: *||*
566 |* *|* *|
567 |*| |* |*|
568 /// */
569
570 /* If a 2-char comment sequence partly overlaps with another,
fbb3da77
SM
571 we don't try to be clever. E.g. |*| in C, or }% in modes that
572 have %..\n and %{..}%. */
573 if (from > stop && (com2end || comstart))
195d1361 574 {
4f3a2f8d
EZ
575 EMACS_INT next = from, next_byte = from_byte;
576 int next_c, next_syntax;
3f679f55
SM
577 DEC_BOTH (next, next_byte);
578 UPDATE_SYNTAX_TABLE_BACKWARD (next);
b7dbcc19 579 next_c = FETCH_CHAR_AS_MULTIBYTE (next_byte);
3f679f55 580 next_syntax = SYNTAX_WITH_FLAGS (next_c);
fbb3da77 581 if (((comstart || comnested)
3f679f55
SM
582 && SYNTAX_FLAGS_COMEND_SECOND (syntax)
583 && SYNTAX_FLAGS_COMEND_FIRST (next_syntax))
584 || ((com2end || comnested)
585 && SYNTAX_FLAGS_COMSTART_SECOND (syntax)
c5683ceb
SM
586 && (comstyle
587 == SYNTAX_FLAGS_COMMENT_STYLE (syntax, prev_syntax))
3f679f55
SM
588 && SYNTAX_FLAGS_COMSTART_FIRST (next_syntax)))
589 goto lossage;
590 /* UPDATE_SYNTAX_TABLE_FORWARD (next + 1); */
f902a008 591 }
3f679f55
SM
592
593 if (com2start && comstart_pos == 0)
594 /* We're looking at a comment starter. But it might be a comment
595 ender as well (see snmp-mode). The first time we see one, we
596 need to consider it as a comment starter,
597 and the subsequent times as a comment ender. */
598 com2end = 0;
599
600 /* Turn a 2-char comment sequences into the appropriate syntax. */
601 if (com2end)
602 code = Sendcomment;
603 else if (com2start)
604 code = Scomment;
605 /* Ignore comment starters of a different style. */
606 else if (code == Scomment
c5683ceb 607 && (comstyle != SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0)
3f679f55 608 || SYNTAX_FLAGS_COMMENT_NESTED (syntax) != comnested))
1aa963c8 609 continue;
195d1361 610
9828a477
KH
611 /* Ignore escaped characters, except comment-enders. */
612 if (code != Sendcomment && char_quoted (from, from_byte))
195d1361
RS
613 continue;
614
3ee5041c 615 switch (code)
195d1361 616 {
3ee5041c
SM
617 case Sstring_fence:
618 case Scomment_fence:
619 c = (code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE);
620 case Sstring:
621 /* Track parity of quotes. */
622 if (string_style == -1)
623 /* Entering a string. */
624 string_style = c;
625 else if (string_style == c)
626 /* Leaving the string. */
627 string_style = -1;
628 else
629 /* If we have two kinds of string delimiters.
630 There's no way to grok this scanning backwards. */
195d1361 631 string_lossage = 1;
3ee5041c 632 break;
7d0393cf 633
3ee5041c
SM
634 case Scomment:
635 /* We've already checked that it is the relevant comstyle. */
f7c436c1 636 if (string_style != -1 || comment_lossage || string_lossage)
3ee5041c
SM
637 /* There are odd string quotes involved, so let's be careful.
638 Test case in Pascal: " { " a { " } */
639 goto lossage;
640
f7c436c1
SM
641 if (!comnested)
642 {
643 /* Record best comment-starter so far. */
644 comstart_pos = from;
645 comstart_byte = from_byte;
646 }
647 else if (--nesting <= 0)
95ff8dfc
RS
648 /* nested comments have to be balanced, so we don't need to
649 keep looking for earlier ones. We use here the same (slightly
650 incorrect) reasoning as below: since it is followed by uniform
651 paired string quotes, this comment-start has to be outside of
652 strings, else the comment-end itself would be inside a string. */
653 goto done;
3ee5041c
SM
654 break;
655
02010917 656 case Sendcomment:
c5683ceb 657 if (SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == comstyle
d070eb22 658 && ((com2end && SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax))
3f679f55 659 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested)
02010917
SM
660 /* This is the same style of comment ender as ours. */
661 {
662 if (comnested)
663 nesting++;
664 else
665 /* Anything before that can't count because it would match
666 this comment-ender rather than ours. */
667 from = stop; /* Break out of the loop. */
668 }
f7c436c1
SM
669 else if (comstart_pos != 0 || c != '\n')
670 /* We're mixing comment styles here, so we'd better be careful.
671 The (comstart_pos != 0 || c != '\n') check is not quite correct
672 (we should just always set comment_lossage), but removing it
673 would imply that any multiline comment in C would go through
674 lossage, which seems overkill.
675 The failure should only happen in the rare cases such as
676 { (* } *) */
677 comment_lossage = 1;
02010917 678 break;
195d1361 679
02010917
SM
680 case Sopen:
681 /* Assume a defun-start point is outside of strings. */
682 if (open_paren_in_column_0_is_defun_start
683 && (from == stop
684 || (temp_byte = dec_bytepos (from_byte),
685 FETCH_CHAR (temp_byte) == '\n')))
686 {
687 defun_start = from;
688 defun_start_byte = from_byte;
689 from = stop; /* Break out of the loop. */
690 }
eb35b628 691 break;
02010917
SM
692
693 default:
f7c436c1 694 break;
eb35b628 695 }
195d1361
RS
696 }
697
698 if (comstart_pos == 0)
699 {
700 from = comment_end;
6a140a74 701 from_byte = comment_end_byte;
195d1361
RS
702 UPDATE_SYNTAX_TABLE_FORWARD (comment_end - 1);
703 }
f7c436c1
SM
704 /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
705 or `done'), then we've found the beginning of the non-nested comment. */
706 else if (1) /* !comnested */
195d1361
RS
707 {
708 from = comstart_pos;
6a140a74 709 from_byte = comstart_byte;
789f3320 710 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
195d1361
RS
711 }
712 else
713 {
3ee5041c
SM
714 struct lisp_parse_state state;
715 lossage:
195d1361
RS
716 /* We had two kinds of string delimiters mixed up
717 together. Decode this going forwards.
02010917 718 Scan fwd from a known safe place (beginning-of-defun)
195d1361
RS
719 to the one in question; this records where we
720 last passed a comment starter. */
eb35b628
RS
721 /* If we did not already find the defun start, find it now. */
722 if (defun_start == 0)
723 {
724 defun_start = find_defun_start (comment_end, comment_end_byte);
725 defun_start_byte = find_start_value_byte;
726 }
02010917 727 do
195d1361 728 {
02010917
SM
729 scan_sexps_forward (&state,
730 defun_start, defun_start_byte,
731 comment_end, -10000, 0, Qnil, 0);
732 defun_start = comment_end;
733 if (state.incomment == (comnested ? 1 : -1)
734 && state.comstyle == comstyle)
735 from = state.comstr_start;
736 else
737 {
738 from = comment_end;
739 if (state.incomment)
740 /* If comment_end is inside some other comment, maybe ours
741 is nested, so we need to try again from within the
742 surrounding comment. Example: { a (* " *) */
743 {
744 /* FIXME: We should advance by one or two chars. */
745 defun_start = state.comstr_start + 2;
746 defun_start_byte = CHAR_TO_BYTE (defun_start);
747 }
748 }
749 } while (defun_start < comment_end);
750
6a140a74 751 from_byte = CHAR_TO_BYTE (from);
195d1361
RS
752 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
753 }
7d0393cf 754
95ff8dfc 755 done:
6a140a74
RS
756 *charpos_ptr = from;
757 *bytepos_ptr = from_byte;
758
1aa963c8 759 return (from == comment_end) ? -1 : from;
195d1361
RS
760}
761\f
8489eb67 762DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
fdb82f93
PJ
763 doc: /* Return t if OBJECT is a syntax table.
764Currently, any char-table counts as a syntax table. */)
5842a27b 765 (Lisp_Object object)
8489eb67 766{
2203e1e8 767 if (CHAR_TABLE_P (object)
e704cb4b 768 && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
8489eb67
RS
769 return Qt;
770 return Qnil;
771}
772
8ea151b2 773static void
971de7fb 774check_syntax_table (Lisp_Object obj)
8489eb67 775{
47f5f6ae
KS
776 CHECK_TYPE (CHAR_TABLE_P (obj) && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table),
777 Qsyntax_table_p, obj);
7d0393cf 778}
8489eb67 779
8489eb67 780DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
fdb82f93
PJ
781 doc: /* Return the current syntax table.
782This is the one specified by the current buffer. */)
5842a27b 783 (void)
8489eb67 784{
4b4deea2 785 return BVAR (current_buffer, syntax_table);
8489eb67
RS
786}
787
788DEFUN ("standard-syntax-table", Fstandard_syntax_table,
789 Sstandard_syntax_table, 0, 0, 0,
fdb82f93
PJ
790 doc: /* Return the standard syntax table.
791This is the one used for new buffers. */)
5842a27b 792 (void)
8489eb67
RS
793{
794 return Vstandard_syntax_table;
795}
796
797DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
fdb82f93
PJ
798 doc: /* Construct a new syntax table and return it.
799It is a copy of the TABLE, which defaults to the standard syntax table. */)
5842a27b 800 (Lisp_Object table)
8489eb67 801{
8ea151b2
RS
802 Lisp_Object copy;
803
265a9e55 804 if (!NILP (table))
8ea151b2
RS
805 check_syntax_table (table);
806 else
807 table = Vstandard_syntax_table;
808
809 copy = Fcopy_sequence (table);
0f867324
RS
810
811 /* Only the standard syntax table should have a default element.
812 Other syntax tables should inherit from parents instead. */
813 XCHAR_TABLE (copy)->defalt = Qnil;
814
815 /* Copied syntax tables should all have parents.
816 If we copied one with no parent, such as the standard syntax table,
817 use the standard syntax table as the copy's parent. */
818 if (NILP (XCHAR_TABLE (copy)->parent))
819 Fset_char_table_parent (copy, Vstandard_syntax_table);
8ea151b2 820 return copy;
8489eb67
RS
821}
822
823DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
fdb82f93
PJ
824 doc: /* Select a new syntax table for the current buffer.
825One argument, a syntax table. */)
5842a27b 826 (Lisp_Object table)
8489eb67 827{
4e4dfa78 828 int idx;
8ea151b2 829 check_syntax_table (table);
4b4deea2 830 BVAR (current_buffer, syntax_table) = table;
8489eb67 831 /* Indicate that this buffer now has a specified syntax table. */
f6cd0527
GM
832 idx = PER_BUFFER_VAR_IDX (syntax_table);
833 SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
8489eb67
RS
834 return table;
835}
836\f
837/* Convert a letter which signifies a syntax code
838 into the code it signifies.
195d1361 839 This is used by modify-syntax-entry, and other things. */
8489eb67
RS
840
841unsigned char syntax_spec_code[0400] =
842 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
843 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
844 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
845 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
195d1361 846 (char) Swhitespace, (char) Scomment_fence, (char) Sstring, 0377,
8489eb67
RS
847 (char) Smath, 0377, 0377, (char) Squote,
848 (char) Sopen, (char) Sclose, 0377, 0377,
849 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
850 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
851 0377, 0377, 0377, 0377,
852 (char) Scomment, 0377, (char) Sendcomment, 0377,
6cb71bf6 853 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
8489eb67
RS
854 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
855 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
856 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
857 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
858 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
859 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
195d1361 860 0377, 0377, 0377, 0377, (char) Sstring_fence, 0377, 0377, 0377
8489eb67
RS
861 };
862
195d1361 863/* Indexed by syntax code, give the letter that describes it. */
8489eb67 864
195d1361 865char syntax_code_spec[16] =
8489eb67 866 {
195d1361
RS
867 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
868 '!', '|'
8489eb67 869 };
93da5fff
KH
870
871/* Indexed by syntax code, give the object (cons of syntax code and
872 nil) to be stored in syntax table. Since these objects can be
873 shared among syntax tables, we generate them in advance. By
874 sharing objects, the function `describe-syntax' can give a more
875 compact listing. */
876static Lisp_Object Vsyntax_code_object;
877
8489eb67
RS
878\f
879DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
fdb82f93 880 doc: /* Return the syntax code of CHARACTER, described by a character.
e2b6daf4
CY
881For example, if CHARACTER is a word constituent, the
882character `w' (119) is returned.
fdb82f93
PJ
883The characters that correspond to various syntax codes
884are listed in the documentation of `modify-syntax-entry'. */)
5842a27b 885 (Lisp_Object character)
8489eb67 886{
8ea151b2 887 int char_int;
774b9a60 888 CHECK_CHARACTER (character);
2203e1e8 889 char_int = XINT (character);
d48cd3f4 890 SETUP_BUFFER_SYNTAX_TABLE ();
8ea151b2 891 return make_number (syntax_code_spec[(int) SYNTAX (char_int)]);
beefa22e
RS
892}
893
894DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
fdb82f93 895 doc: /* Return the matching parenthesis of CHARACTER, or nil if none. */)
5842a27b 896 (Lisp_Object character)
beefa22e 897{
8ea151b2 898 int char_int, code;
b7826503 899 CHECK_NUMBER (character);
2203e1e8 900 char_int = XINT (character);
d48cd3f4 901 SETUP_BUFFER_SYNTAX_TABLE ();
8ea151b2 902 code = SYNTAX (char_int);
a8bd7cd8 903 if (code == Sopen || code == Sclose)
2e34157c 904 return SYNTAX_MATCH (char_int);
beefa22e 905 return Qnil;
8489eb67
RS
906}
907
c65adb44 908DEFUN ("string-to-syntax", Fstring_to_syntax, Sstring_to_syntax, 1, 1, 0,
fdb82f93
PJ
909 doc: /* Convert a syntax specification STRING into syntax cell form.
910STRING should be a string as it is allowed as argument of
911`modify-syntax-entry'. Value is the equivalent cons cell
bce3dc36 912\(CODE . MATCHING-CHAR) that can be used as value of a `syntax-table'
fdb82f93 913text property. */)
5842a27b 914 (Lisp_Object string)
8489eb67 915{
2e567bd3 916 register const unsigned char *p;
8489eb67 917 register enum syntaxcode code;
247e20a8 918 int val;
8ea151b2 919 Lisp_Object match;
8489eb67 920
b7826503 921 CHECK_STRING (string);
8489eb67 922
d5db4077 923 p = SDATA (string);
8489eb67
RS
924 code = (enum syntaxcode) syntax_spec_code[*p++];
925 if (((int) code & 0377) == 0377)
b764a653 926 error ("Invalid syntax description letter: %c", p[-1]);
8489eb67 927
8ea151b2 928 if (code == Sinherit)
c65adb44 929 return Qnil;
8ea151b2
RS
930
931 if (*p)
d1be9f0f 932 {
93da5fff 933 int len;
62a6e103 934 int character = STRING_CHAR_AND_LENGTH (p, len);
93da5fff 935 XSETINT (match, character);
d1be9f0f
RS
936 if (XFASTINT (match) == ' ')
937 match = Qnil;
93da5fff 938 p += len;
d1be9f0f
RS
939 }
940 else
8ea151b2 941 match = Qnil;
8489eb67 942
8ea151b2 943 val = (int) code;
8489eb67
RS
944 while (*p)
945 switch (*p++)
946 {
947 case '1':
247e20a8 948 val |= 1 << 16;
8489eb67
RS
949 break;
950
951 case '2':
247e20a8 952 val |= 1 << 17;
8489eb67
RS
953 break;
954
955 case '3':
247e20a8 956 val |= 1 << 18;
8489eb67
RS
957 break;
958
959 case '4':
247e20a8 960 val |= 1 << 19;
8489eb67
RS
961 break;
962
963 case 'p':
247e20a8 964 val |= 1 << 20;
8489eb67 965 break;
e5d4f4dc
RS
966
967 case 'b':
247e20a8 968 val |= 1 << 21;
e5d4f4dc 969 break;
95ff8dfc
RS
970
971 case 'n':
972 val |= 1 << 22;
973 break;
c5683ceb
SM
974
975 case 'c':
976 val |= 1 << 23;
977 break;
8489eb67 978 }
7d0393cf 979
93da5fff 980 if (val < XVECTOR (Vsyntax_code_object)->size && NILP (match))
c65adb44 981 return XVECTOR (Vsyntax_code_object)->contents[val];
93da5fff
KH
982 else
983 /* Since we can't use a shared object, let's make a new one. */
c65adb44
SM
984 return Fcons (make_number (val), match);
985}
986
fdb82f93 987/* I really don't know why this is interactive
2c9e1900 988 help-form should at least be made useful whilst reading the second arg. */
7d0393cf 989DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
c65adb44 990 "cSet syntax for character: \nsSet syntax for %s to: ",
a50a10a0 991 doc: /* Set syntax for character CHAR according to string NEWENTRY.
d7ee9fab 992The syntax is changed only for table SYNTAX-TABLE, which defaults to
fdb82f93 993 the current buffer's syntax table.
5c7b02ab 994CHAR may be a cons (MIN . MAX), in which case, syntaxes of all characters
fedc6ab5 995in the range MIN to MAX are changed.
fdb82f93
PJ
996The first character of NEWENTRY should be one of the following:
997 Space or - whitespace syntax. w word constituent.
998 _ symbol constituent. . punctuation.
999 ( open-parenthesis. ) close-parenthesis.
1000 " string quote. \\ escape.
1001 $ paired delimiter. ' expression quote or prefix operator.
1002 < comment starter. > comment ender.
1003 / character-quote. @ inherit from `standard-syntax-table'.
1004 | generic string fence. ! generic comment fence.
1005
1006Only single-character comment start and end sequences are represented thus.
1007Two-character sequences are represented as described below.
1008The second character of NEWENTRY is the matching parenthesis,
1009 used only if the first character is `(' or `)'.
1010Any additional characters are flags.
1011Defined flags are the characters 1, 2, 3, 4, b, p, and n.
a50a10a0
PJ
1012 1 means CHAR is the start of a two-char comment start sequence.
1013 2 means CHAR is the second character of such a sequence.
1014 3 means CHAR is the start of a two-char comment end sequence.
1015 4 means CHAR is the second character of such a sequence.
fdb82f93 1016
c5683ceb 1017There can be several orthogonal comment sequences. This is to support
fdb82f93
PJ
1018language modes such as C++. By default, all comment sequences are of style
1019a, but you can set the comment sequence style to b (on the second character
c5683ceb
SM
1020of a comment-start, and the first character of a comment-end sequence) and/or
1021c (on any of its chars) using this flag:
a50a10a0 1022 b means CHAR is part of comment sequence b.
c5683ceb 1023 c means CHAR is part of comment sequence c.
a50a10a0 1024 n means CHAR is part of a nestable comment sequence.
fdb82f93 1025
a50a10a0 1026 p means CHAR is a prefix character for `backward-prefix-chars';
fdb82f93 1027 such characters are treated as whitespace when they occur
a50a10a0 1028 between expressions.
fedc6ab5 1029usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE) */)
5842a27b 1030 (Lisp_Object c, Lisp_Object newentry, Lisp_Object syntax_table)
c65adb44 1031{
5c7b02ab
KH
1032 if (CONSP (c))
1033 {
8f924df7
KH
1034 CHECK_CHARACTER_CAR (c);
1035 CHECK_CHARACTER_CDR (c);
5c7b02ab
KH
1036 }
1037 else
1038 CHECK_CHARACTER (c);
c65adb44
SM
1039
1040 if (NILP (syntax_table))
4b4deea2 1041 syntax_table = BVAR (current_buffer, syntax_table);
c65adb44
SM
1042 else
1043 check_syntax_table (syntax_table);
8489eb67 1044
5c7b02ab
KH
1045 newentry = Fstring_to_syntax (newentry);
1046 if (CONSP (c))
1047 SET_RAW_SYNTAX_ENTRY_RANGE (syntax_table, c, newentry);
1048 else
1049 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), newentry);
e5b94d44
CY
1050
1051 /* We clear the regexp cache, since character classes can now have
1052 different values from those in the compiled regexps.*/
1053 clear_regexp_cache ();
1054
8489eb67
RS
1055 return Qnil;
1056}
1057\f
1058/* Dump syntax table to buffer in human-readable format */
1059
62abe9cb
SM
1060DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value,
1061 Sinternal_describe_syntax_value, 1, 1, 0,
1062 doc: /* Insert a description of the internal syntax description SYNTAX at point. */)
5842a27b 1063 (Lisp_Object syntax)
8489eb67
RS
1064{
1065 register enum syntaxcode code;
c5683ceb
SM
1066 int syntax_code;
1067 char desc, start1, start2, end1, end2, prefix,
1068 comstyleb, comstylec, comnested;
8489eb67 1069 char str[2];
62abe9cb 1070 Lisp_Object first, match_lisp, value = syntax;
8489eb67 1071
8ea151b2
RS
1072 if (NILP (value))
1073 {
62abe9cb
SM
1074 insert_string ("default");
1075 return syntax;
8ea151b2
RS
1076 }
1077
908b7fea
KH
1078 if (CHAR_TABLE_P (value))
1079 {
62abe9cb
SM
1080 insert_string ("deeper char-table ...");
1081 return syntax;
908b7fea
KH
1082 }
1083
8ea151b2
RS
1084 if (!CONSP (value))
1085 {
62abe9cb
SM
1086 insert_string ("invalid");
1087 return syntax;
8ea151b2
RS
1088 }
1089
c1d497be
KR
1090 first = XCAR (value);
1091 match_lisp = XCDR (value);
8ea151b2
RS
1092
1093 if (!INTEGERP (first) || !(NILP (match_lisp) || INTEGERP (match_lisp)))
8489eb67 1094 {
62abe9cb
SM
1095 insert_string ("invalid");
1096 return syntax;
8489eb67
RS
1097 }
1098
c5683ceb
SM
1099 syntax_code = XINT (first);
1100 code = (enum syntaxcode) (syntax_code & 0377);
1101 start1 = SYNTAX_FLAGS_COMSTART_FIRST (syntax_code);
1102 start2 = SYNTAX_FLAGS_COMSTART_SECOND (syntax_code);;
1103 end1 = SYNTAX_FLAGS_COMEND_FIRST (syntax_code);
1104 end2 = SYNTAX_FLAGS_COMEND_SECOND (syntax_code);
1105 prefix = SYNTAX_FLAGS_PREFIX (syntax_code);
1106 comstyleb = SYNTAX_FLAGS_COMMENT_STYLEB (syntax_code);
1107 comstylec = SYNTAX_FLAGS_COMMENT_STYLEC (syntax_code);
1108 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax_code);
8489eb67
RS
1109
1110 if ((int) code < 0 || (int) code >= (int) Smax)
1111 {
1112 insert_string ("invalid");
62abe9cb 1113 return syntax;
8489eb67
RS
1114 }
1115 desc = syntax_code_spec[(int) code];
1116
1117 str[0] = desc, str[1] = 0;
1118 insert (str, 1);
1119
93da5fff
KH
1120 if (NILP (match_lisp))
1121 insert (" ", 1);
1122 else
1123 insert_char (XINT (match_lisp));
8489eb67 1124
8489eb67
RS
1125 if (start1)
1126 insert ("1", 1);
1127 if (start2)
1128 insert ("2", 1);
1129
1130 if (end1)
1131 insert ("3", 1);
1132 if (end2)
1133 insert ("4", 1);
1134
1135 if (prefix)
1136 insert ("p", 1);
c5683ceb 1137 if (comstyleb)
e5d4f4dc 1138 insert ("b", 1);
c5683ceb
SM
1139 if (comstylec)
1140 insert ("c", 1);
e6365855
SM
1141 if (comnested)
1142 insert ("n", 1);
8489eb67
RS
1143
1144 insert_string ("\twhich means: ");
1145
0220c518 1146 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
1147 {
1148 case Swhitespace:
1149 insert_string ("whitespace"); break;
1150 case Spunct:
1151 insert_string ("punctuation"); break;
1152 case Sword:
1153 insert_string ("word"); break;
1154 case Ssymbol:
1155 insert_string ("symbol"); break;
1156 case Sopen:
1157 insert_string ("open"); break;
1158 case Sclose:
1159 insert_string ("close"); break;
1160 case Squote:
d671382e 1161 insert_string ("prefix"); break;
8489eb67
RS
1162 case Sstring:
1163 insert_string ("string"); break;
1164 case Smath:
1165 insert_string ("math"); break;
1166 case Sescape:
1167 insert_string ("escape"); break;
1168 case Scharquote:
1169 insert_string ("charquote"); break;
1170 case Scomment:
1171 insert_string ("comment"); break;
1172 case Sendcomment:
1173 insert_string ("endcomment"); break;
d671382e
SM
1174 case Sinherit:
1175 insert_string ("inherit"); break;
1176 case Scomment_fence:
1177 insert_string ("comment fence"); break;
1178 case Sstring_fence:
1179 insert_string ("string fence"); break;
8489eb67
RS
1180 default:
1181 insert_string ("invalid");
62abe9cb 1182 return syntax;
8489eb67
RS
1183 }
1184
8ea151b2 1185 if (!NILP (match_lisp))
8489eb67
RS
1186 {
1187 insert_string (", matches ");
8ea151b2 1188 insert_char (XINT (match_lisp));
8489eb67
RS
1189 }
1190
1191 if (start1)
1192 insert_string (",\n\t is the first character of a comment-start sequence");
1193 if (start2)
1194 insert_string (",\n\t is the second character of a comment-start sequence");
1195
1196 if (end1)
1197 insert_string (",\n\t is the first character of a comment-end sequence");
1198 if (end2)
1199 insert_string (",\n\t is the second character of a comment-end sequence");
c5683ceb 1200 if (comstyleb)
e5d4f4dc 1201 insert_string (" (comment style b)");
c5683ceb
SM
1202 if (comstylec)
1203 insert_string (" (comment style c)");
e6365855
SM
1204 if (comnested)
1205 insert_string (" (nestable)");
e5d4f4dc 1206
8489eb67
RS
1207 if (prefix)
1208 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
1209
62abe9cb
SM
1210 return syntax;
1211}
8489eb67
RS
1212\f
1213/* Return the position across COUNT words from FROM.
1214 If that many words cannot be found before the end of the buffer, return 0.
1215 COUNT negative means scan backward and stop at word beginning. */
1216
4f3a2f8d
EZ
1217EMACS_INT
1218scan_words (register EMACS_INT from, register EMACS_INT count)
8489eb67 1219{
4f3a2f8d
EZ
1220 register EMACS_INT beg = BEGV;
1221 register EMACS_INT end = ZV;
1222 register EMACS_INT from_byte = CHAR_TO_BYTE (from);
93da5fff
KH
1223 register enum syntaxcode code;
1224 int ch0, ch1;
fca8fe46 1225 Lisp_Object func, pos;
8489eb67
RS
1226
1227 immediate_quit = 1;
1228 QUIT;
1229
195d1361
RS
1230 SETUP_SYNTAX_TABLE (from, count);
1231
8489eb67
RS
1232 while (count > 0)
1233 {
1234 while (1)
1235 {
1236 if (from == end)
1237 {
1238 immediate_quit = 0;
1239 return 0;
1240 }
195d1361 1241 UPDATE_SYNTAX_TABLE_FORWARD (from);
b7dbcc19 1242 ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
93da5fff 1243 code = SYNTAX (ch0);
6a140a74 1244 INC_BOTH (from, from_byte);
8489eb67
RS
1245 if (words_include_escapes
1246 && (code == Sescape || code == Scharquote))
1247 break;
1248 if (code == Sword)
1249 break;
8489eb67 1250 }
93da5fff
KH
1251 /* Now CH0 is a character which begins a word and FROM is the
1252 position of the next character. */
8f924df7 1253 func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch0);
869bb237 1254 if (! NILP (Ffboundp (func)))
8489eb67 1255 {
869bb237 1256 pos = call2 (func, make_number (from - 1), make_number (end));
8f924df7
KH
1257 if (INTEGERP (pos) && XINT (pos) > from)
1258 {
1259 from = XINT (pos);
1260 from_byte = CHAR_TO_BYTE (from);
1261 }
8489eb67 1262 }
869bb237 1263 else
8cb8232a 1264 {
8cb8232a
KH
1265 while (1)
1266 {
1267 if (from == end) break;
1268 UPDATE_SYNTAX_TABLE_FORWARD (from);
b7dbcc19 1269 ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
8cb8232a
KH
1270 code = SYNTAX (ch1);
1271 if ((code != Sword
1272 && (! words_include_escapes
1273 || (code != Sescape && code != Scharquote)))
2b70a11c 1274 || word_boundary_p (ch0, ch1))
869bb237 1275 break;
8cb8232a
KH
1276 INC_BOTH (from, from_byte);
1277 ch0 = ch1;
1278 }
8489eb67
RS
1279 }
1280 count--;
1281 }
1282 while (count < 0)
1283 {
1284 while (1)
1285 {
1286 if (from == beg)
1287 {
1288 immediate_quit = 0;
1289 return 0;
1290 }
6a140a74 1291 DEC_BOTH (from, from_byte);
195d1361 1292 UPDATE_SYNTAX_TABLE_BACKWARD (from);
b7dbcc19 1293 ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
93da5fff 1294 code = SYNTAX (ch1);
8489eb67
RS
1295 if (words_include_escapes
1296 && (code == Sescape || code == Scharquote))
1297 break;
1298 if (code == Sword)
1299 break;
8489eb67 1300 }
93da5fff
KH
1301 /* Now CH1 is a character which ends a word and FROM is the
1302 position of it. */
8f924df7 1303 func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch1);
869bb237 1304 if (! NILP (Ffboundp (func)))
8f924df7 1305 {
869bb237 1306 pos = call2 (func, make_number (from), make_number (beg));
8f924df7
KH
1307 if (INTEGERP (pos) && XINT (pos) < from)
1308 {
1309 from = XINT (pos);
1310 from_byte = CHAR_TO_BYTE (from);
1311 }
8489eb67 1312 }
869bb237 1313 else
8489eb67 1314 {
8cb8232a
KH
1315 while (1)
1316 {
8cb8232a
KH
1317 if (from == beg)
1318 break;
262be72a 1319 DEC_BOTH (from, from_byte);
8cb8232a 1320 UPDATE_SYNTAX_TABLE_BACKWARD (from);
262be72a 1321 ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
8cb8232a
KH
1322 code = SYNTAX (ch0);
1323 if ((code != Sword
1324 && (! words_include_escapes
1325 || (code != Sescape && code != Scharquote)))
2b70a11c 1326 || word_boundary_p (ch0, ch1))
262be72a
MB
1327 {
1328 INC_BOTH (from, from_byte);
1329 break;
1330 }
8cb8232a
KH
1331 ch1 = ch0;
1332 }
8489eb67
RS
1333 }
1334 count++;
1335 }
1336
1337 immediate_quit = 0;
1338
1339 return from;
1340}
1341
16a97296 1342DEFUE ("forward-word", Fforward_word, Sforward_word, 0, 1, "^p",
fdb82f93
PJ
1343 doc: /* Move point forward ARG words (backward if ARG is negative).
1344Normally returns t.
1345If an edge of the buffer or a field boundary is reached, point is left there
1346and the function returns nil. Field boundaries are not noticed if
1347`inhibit-field-text-motion' is non-nil. */)
5842a27b 1348 (Lisp_Object arg)
8489eb67 1349{
c096ae4d 1350 Lisp_Object tmp;
2c6ea900 1351 int orig_val, val;
8489eb67 1352
839966f3
KH
1353 if (NILP (arg))
1354 XSETFASTINT (arg, 1);
1355 else
1356 CHECK_NUMBER (arg);
1357
1358 val = orig_val = scan_words (PT, XINT (arg));
2c6ea900 1359 if (! orig_val)
839966f3 1360 val = XINT (arg) > 0 ? ZV : BEGV;
5878ee6f 1361
b8855607 1362 /* Avoid jumping out of an input field. */
c096ae4d
SM
1363 tmp = Fconstrain_to_field (make_number (val), make_number (PT),
1364 Qt, Qnil, Qnil);
1365 val = XFASTINT (tmp);
7d0393cf 1366
8489eb67 1367 SET_PT (val);
e6d8341f 1368 return val == orig_val ? Qt : Qnil;
8489eb67
RS
1369}
1370\f
16a97296 1371DEFUE ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
fdb82f93
PJ
1372 doc: /* Move point forward, stopping before a char not in STRING, or at pos LIM.
1373STRING is like the inside of a `[...]' in a regular expression
1374except that `]' is never special and `\\' quotes `^', `-' or `\\'
7087d5e9 1375 (but not at the end of a range; quoting is never needed there).
fdb82f93
PJ
1376Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1377With arg "^a-zA-Z", skips nonletters stopping before first letter.
a1bc88d4
RS
1378Char classes, e.g. `[:alpha:]', are supported.
1379
1380Returns the distance traveled, either zero or positive. */)
5842a27b 1381 (Lisp_Object string, Lisp_Object lim)
195d1361 1382{
327719ee 1383 return skip_chars (1, string, lim, 1);
195d1361
RS
1384}
1385
16a97296 1386DEFUE ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
fdb82f93
PJ
1387 doc: /* Move point backward, stopping after a char not in STRING, or at pos LIM.
1388See `skip-chars-forward' for details.
1389Returns the distance traveled, either zero or negative. */)
5842a27b 1390 (Lisp_Object string, Lisp_Object lim)
195d1361 1391{
327719ee 1392 return skip_chars (0, string, lim, 1);
195d1361
RS
1393}
1394
1395DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
fdb82f93
PJ
1396 doc: /* Move point forward across chars in specified syntax classes.
1397SYNTAX is a string of syntax code characters.
1398Stop before a char whose syntax is not in SYNTAX, or at position LIM.
1399If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1400This function returns the distance traveled, either zero or positive. */)
5842a27b 1401 (Lisp_Object syntax, Lisp_Object lim)
195d1361 1402{
b7dbcc19 1403 return skip_syntaxes (1, syntax, lim);
195d1361
RS
1404}
1405
1406DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
fdb82f93
PJ
1407 doc: /* Move point backward across chars in specified syntax classes.
1408SYNTAX is a string of syntax code characters.
1409Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.
1410If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1411This function returns the distance traveled, either zero or negative. */)
5842a27b 1412 (Lisp_Object syntax, Lisp_Object lim)
195d1361 1413{
b7dbcc19 1414 return skip_syntaxes (0, syntax, lim);
195d1361
RS
1415}
1416
6a140a74 1417static Lisp_Object
971de7fb 1418skip_chars (int forwardp, Lisp_Object string, Lisp_Object lim, int handle_iso_classes)
195d1361 1419{
195d1361
RS
1420 register unsigned int c;
1421 unsigned char fastmap[0400];
b7dbcc19 1422 /* Store the ranges of non-ASCII characters. */
4f63c6bb 1423 int *char_ranges IF_LINT (= NULL);
7e68b0ea 1424 int n_char_ranges = 0;
195d1361 1425 int negate = 0;
4f3a2f8d 1426 register EMACS_INT i, i_byte;
b7dbcc19
KH
1427 /* Set to 1 if the current buffer is multibyte and the region
1428 contains non-ASCII chars. */
1429 int multibyte;
1430 /* Set to 1 if STRING is multibyte and it contains non-ASCII
1431 chars. */
1674d9a2 1432 int string_multibyte;
4f3a2f8d 1433 EMACS_INT size_byte;
2e567bd3 1434 const unsigned char *str;
82d497fc 1435 int len;
a1bc88d4 1436 Lisp_Object iso_classes;
195d1361 1437
b7826503 1438 CHECK_STRING (string);
a1bc88d4 1439 iso_classes = Qnil;
82d497fc 1440
195d1361
RS
1441 if (NILP (lim))
1442 XSETINT (lim, forwardp ? ZV : BEGV);
1443 else
b7826503 1444 CHECK_NUMBER_COERCE_MARKER (lim);
195d1361
RS
1445
1446 /* In any case, don't allow scan outside bounds of buffer. */
195d1361
RS
1447 if (XINT (lim) > ZV)
1448 XSETFASTINT (lim, ZV);
1449 if (XINT (lim) < BEGV)
1450 XSETFASTINT (lim, BEGV);
1451
4b4deea2 1452 multibyte = (!NILP (BVAR (current_buffer, enable_multibyte_characters))
92eaa22e 1453 && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
8f924df7 1454 string_multibyte = SBYTES (string) > SCHARS (string);
b7dbcc19 1455
72af86bd 1456 memset (fastmap, 0, sizeof fastmap);
195d1361 1457
8f924df7
KH
1458 str = SDATA (string);
1459 size_byte = SBYTES (string);
4101e6fe 1460
82d497fc 1461 i_byte = 0;
13090112 1462 if (i_byte < size_byte
d5db4077 1463 && SREF (string, 0) == '^')
195d1361 1464 {
82d497fc 1465 negate = 1; i_byte++;
195d1361
RS
1466 }
1467
1468 /* Find the characters specified and set their elements of fastmap.
b7dbcc19 1469 Handle backslashes and ranges specially.
195d1361 1470
b7dbcc19
KH
1471 If STRING contains non-ASCII characters, setup char_ranges for
1472 them and use fastmap only for their leading codes. */
4101e6fe 1473
b7dbcc19 1474 if (! string_multibyte)
195d1361 1475 {
b7dbcc19 1476 int string_has_eight_bit = 0;
4101e6fe 1477
b7dbcc19
KH
1478 /* At first setup fastmap. */
1479 while (i_byte < size_byte)
1480 {
1481 c = str[i_byte++];
1482
a1bc88d4
RS
1483 if (handle_iso_classes && c == '['
1484 && i_byte < size_byte
327719ee 1485 && str[i_byte] == ':')
a1bc88d4
RS
1486 {
1487 const unsigned char *class_beg = str + i_byte + 1;
1488 const unsigned char *class_end = class_beg;
b3bda4fd 1489 const unsigned char *class_limit = str + size_byte - 2;
db9cd97a 1490 /* Leave room for the null. */
a1bc88d4
RS
1491 unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1492 re_wctype_t cc;
1493
1494 if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1495 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1496
b3bda4fd
KS
1497 while (class_end < class_limit
1498 && *class_end >= 'a' && *class_end <= 'z')
a1bc88d4
RS
1499 class_end++;
1500
b3bda4fd
KS
1501 if (class_end == class_beg
1502 || *class_end != ':' || class_end[1] != ']')
1503 goto not_a_class_name;
a1bc88d4 1504
72af86bd 1505 memcpy (class_name, class_beg, class_end - class_beg);
a1bc88d4
RS
1506 class_name[class_end - class_beg] = 0;
1507
1508 cc = re_wctype (class_name);
1509 if (cc == 0)
1510 error ("Invalid ISO C character class");
1511
1512 iso_classes = Fcons (make_number (cc), iso_classes);
1513
1514 i_byte = class_end + 2 - str;
1515 continue;
1516 }
1517
b3bda4fd 1518 not_a_class_name:
b7dbcc19
KH
1519 if (c == '\\')
1520 {
13090112 1521 if (i_byte == size_byte)
4101e6fe
RS
1522 break;
1523
5c7b02ab 1524 c = str[i_byte++];
195d1361 1525 }
839966f3
KH
1526 /* Treat `-' as range character only if another character
1527 follows. */
1528 if (i_byte + 1 < size_byte
82d497fc 1529 && str[i_byte] == '-')
195d1361 1530 {
9690d026 1531 unsigned int c2;
4101e6fe
RS
1532
1533 /* Skip over the dash. */
82d497fc 1534 i_byte++;
4101e6fe 1535
4101e6fe 1536 /* Get the end of the range. */
5c7b02ab 1537 c2 = str[i_byte++];
b7dbcc19
KH
1538 if (c2 == '\\'
1539 && i_byte < size_byte)
1540 c2 = str[i_byte++];
1541
8f924df7
KH
1542 if (c <= c2)
1543 {
1e69125e
PE
1544 unsigned lim2 = c2 + 1;
1545 while (c < lim2)
8f924df7
KH
1546 fastmap[c++] = 1;
1547 if (! ASCII_CHAR_P (c2))
1548 string_has_eight_bit = 1;
1549 }
195d1361
RS
1550 }
1551 else
b7dbcc19
KH
1552 {
1553 fastmap[c] = 1;
1554 if (! ASCII_CHAR_P (c))
1555 string_has_eight_bit = 1;
1556 }
1557 }
1558
1559 /* If the current range is multibyte and STRING contains
1560 eight-bit chars, arrange fastmap and setup char_ranges for
1561 the corresponding multibyte chars. */
1562 if (multibyte && string_has_eight_bit)
1563 {
1564 unsigned char fastmap2[0400];
1565 int range_start_byte, range_start_char;
1566
72af86bd
AS
1567 memcpy (fastmap + 0200, fastmap2 + 0200, 0200);
1568 memset (fastmap + 0200, 0, 0200);
b7dbcc19
KH
1569 /* We are sure that this loop stops. */
1570 for (i = 0200; ! fastmap2[i]; i++);
4c0354d7 1571 c = BYTE8_TO_CHAR (i);
b7dbcc19
KH
1572 fastmap[CHAR_LEADING_CODE (c)] = 1;
1573 range_start_byte = i;
1574 range_start_char = c;
f003ca54 1575 char_ranges = (int *) alloca (sizeof (int) * 128 * 2);
b7dbcc19
KH
1576 for (i = 129; i < 0400; i++)
1577 {
4c0354d7 1578 c = BYTE8_TO_CHAR (i);
b7dbcc19
KH
1579 fastmap[CHAR_LEADING_CODE (c)] = 1;
1580 if (i - range_start_byte != c - range_start_char)
1581 {
1582 char_ranges[n_char_ranges++] = range_start_char;
1583 char_ranges[n_char_ranges++] = ((i - 1 - range_start_byte)
1584 + range_start_char);
1585 range_start_byte = i;
1586 range_start_char = c;
8f924df7 1587 }
b7dbcc19
KH
1588 }
1589 char_ranges[n_char_ranges++] = range_start_char;
1590 char_ranges[n_char_ranges++] = ((i - 1 - range_start_byte)
1591 + range_start_char);
195d1361
RS
1592 }
1593 }
f003ca54 1594 else /* STRING is multibyte */
b7dbcc19 1595 {
f003ca54
KH
1596 char_ranges = (int *) alloca (sizeof (int) * SCHARS (string) * 2);
1597
b7dbcc19 1598 while (i_byte < size_byte)
195d1361 1599 {
b7dbcc19 1600 unsigned char leading_code;
5c7b02ab 1601
b7dbcc19 1602 leading_code = str[i_byte];
62a6e103 1603 c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
b7dbcc19 1604 i_byte += len;
5c7b02ab 1605
327719ee
MB
1606 if (handle_iso_classes && c == '['
1607 && i_byte < size_byte
62a6e103 1608 && STRING_CHAR (str + i_byte) == ':')
327719ee
MB
1609 {
1610 const unsigned char *class_beg = str + i_byte + 1;
1611 const unsigned char *class_end = class_beg;
1612 const unsigned char *class_limit = str + size_byte - 2;
1613 /* Leave room for the null. */
1614 unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1615 re_wctype_t cc;
1616
1617 if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1618 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1619
1620 while (class_end < class_limit
1621 && *class_end >= 'a' && *class_end <= 'z')
1622 class_end++;
1623
1624 if (class_end == class_beg
1625 || *class_end != ':' || class_end[1] != ']')
1626 goto not_a_class_name_multibyte;
1627
72af86bd 1628 memcpy (class_name, class_beg, class_end - class_beg);
327719ee
MB
1629 class_name[class_end - class_beg] = 0;
1630
1631 cc = re_wctype (class_name);
1632 if (cc == 0)
1633 error ("Invalid ISO C character class");
1634
1635 iso_classes = Fcons (make_number (cc), iso_classes);
1636
1637 i_byte = class_end + 2 - str;
1638 continue;
1639 }
1640
1641 not_a_class_name_multibyte:
195d1361
RS
1642 if (c == '\\')
1643 {
13090112 1644 if (i_byte == size_byte)
4101e6fe
RS
1645 break;
1646
b7dbcc19 1647 leading_code = str[i_byte];
62a6e103 1648 c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
82d497fc 1649 i_byte += len;
195d1361 1650 }
839966f3
KH
1651 /* Treat `-' as range character only if another character
1652 follows. */
1653 if (i_byte + 1 < size_byte
82d497fc 1654 && str[i_byte] == '-')
195d1361 1655 {
9690d026 1656 unsigned int c2;
b7dbcc19 1657 unsigned char leading_code2;
4101e6fe
RS
1658
1659 /* Skip over the dash. */
82d497fc 1660 i_byte++;
4101e6fe 1661
4101e6fe 1662 /* Get the end of the range. */
b7dbcc19 1663 leading_code2 = str[i_byte];
62a6e103 1664 c2 = STRING_CHAR_AND_LENGTH (str + i_byte, len);
82d497fc 1665 i_byte += len;
7e68b0ea 1666
b7dbcc19
KH
1667 if (c2 == '\\'
1668 && i_byte < size_byte)
1669 {
1670 leading_code2 = str[i_byte];
62a6e103 1671 c2 =STRING_CHAR_AND_LENGTH (str + i_byte, len);
b7dbcc19
KH
1672 i_byte += len;
1673 }
1674
f003ca54
KH
1675 if (c > c2)
1676 continue;
b7dbcc19 1677 if (ASCII_CHAR_P (c))
06274af5 1678 {
5c7b02ab
KH
1679 while (c <= c2 && c < 0x80)
1680 fastmap[c++] = 1;
b7dbcc19
KH
1681 leading_code = CHAR_LEADING_CODE (c);
1682 }
1683 if (! ASCII_CHAR_P (c))
1684 {
1e69125e
PE
1685 unsigned lim2 = leading_code2 + 1;
1686 while (leading_code < lim2)
b7dbcc19
KH
1687 fastmap[leading_code++] = 1;
1688 if (c <= c2)
e39091b6 1689 {
b7dbcc19 1690 char_ranges[n_char_ranges++] = c;
e39091b6 1691 char_ranges[n_char_ranges++] = c2;
06274af5
KH
1692 }
1693 }
195d1361
RS
1694 }
1695 else
7e68b0ea 1696 {
b7dbcc19 1697 if (ASCII_CHAR_P (c))
e39091b6
KH
1698 fastmap[c] = 1;
1699 else
7e68b0ea 1700 {
b7dbcc19 1701 fastmap[leading_code] = 1;
4101e6fe
RS
1702 char_ranges[n_char_ranges++] = c;
1703 char_ranges[n_char_ranges++] = c;
7e68b0ea
RS
1704 }
1705 }
195d1361 1706 }
b7dbcc19
KH
1707
1708 /* If the current range is unibyte and STRING contains non-ASCII
1709 chars, arrange fastmap for the corresponding unibyte
1710 chars. */
1711
1712 if (! multibyte && n_char_ranges > 0)
1713 {
72af86bd 1714 memset (fastmap + 0200, 0, 0200);
b7dbcc19
KH
1715 for (i = 0; i < n_char_ranges; i += 2)
1716 {
1717 int c1 = char_ranges[i];
1e69125e 1718 unsigned lim2 = char_ranges[i + 1] + 1;
b7dbcc19 1719
1e69125e 1720 for (; c1 < lim2; c1++)
2afc21f5
SM
1721 {
1722 int b = CHAR_TO_BYTE_SAFE (c1);
1723 if (b >= 0)
1724 fastmap[b] = 1;
1725 }
b7dbcc19
KH
1726 }
1727 }
195d1361
RS
1728 }
1729
9690d026 1730 /* If ^ was the first character, complement the fastmap. */
195d1361 1731 if (negate)
b7dbcc19
KH
1732 {
1733 if (! multibyte)
1734 for (i = 0; i < sizeof fastmap; i++)
1735 fastmap[i] ^= 1;
1736 else
1737 {
1738 for (i = 0; i < 0200; i++)
1739 fastmap[i] ^= 1;
1740 /* All non-ASCII chars possibly match. */
1741 for (; i < sizeof fastmap; i++)
1742 fastmap[i] = 1;
1743 }
1744 }
195d1361
RS
1745
1746 {
4f3a2f8d
EZ
1747 EMACS_INT start_point = PT;
1748 EMACS_INT pos = PT;
1749 EMACS_INT pos_byte = PT_BYTE;
9af7511a
KH
1750 unsigned char *p = PT_ADDR, *endp, *stop;
1751
1752 if (forwardp)
1753 {
1754 endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
d7ee9fab 1755 stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
9af7511a
KH
1756 }
1757 else
1758 {
1759 endp = CHAR_POS_ADDR (XINT (lim));
d7ee9fab 1760 stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
9af7511a 1761 }
195d1361
RS
1762
1763 immediate_quit = 1;
618db430
SM
1764 /* This code may look up syntax tables using macros that rely on the
1765 gl_state object. To make sure this object is not out of date,
1766 let's initialize it manually.
1767 We ignore syntax-table text-properties for now, since that's
1768 what we've done in the past. */
d48cd3f4 1769 SETUP_BUFFER_SYNTAX_TABLE ();
b7dbcc19 1770 if (forwardp)
195d1361 1771 {
b7dbcc19 1772 if (multibyte)
8f924df7 1773 while (1)
b7dbcc19 1774 {
8f924df7 1775 int nbytes;
9af7511a 1776
8f924df7 1777 if (p >= stop)
9af7511a 1778 {
8f924df7 1779 if (p >= endp)
9af7511a 1780 break;
8f924df7
KH
1781 p = GAP_END_ADDR;
1782 stop = endp;
9af7511a 1783 }
62a6e103 1784 c = STRING_CHAR_AND_LENGTH (p, nbytes);
327719ee 1785 if (! NILP (iso_classes) && in_classes (c, iso_classes))
9af7511a 1786 {
327719ee 1787 if (negate)
9af7511a 1788 break;
327719ee
MB
1789 else
1790 goto fwd_ok;
9af7511a 1791 }
9af7511a 1792
8f924df7 1793 if (! fastmap[*p])
b7dbcc19
KH
1794 break;
1795 if (! ASCII_CHAR_P (c))
9af7511a 1796 {
b7dbcc19
KH
1797 /* As we are looking at a multibyte character, we
1798 must look up the character in the table
1799 CHAR_RANGES. If there's no data in the table,
1800 that character is not what we want to skip. */
1801
1802 /* The following code do the right thing even if
1803 n_char_ranges is zero (i.e. no data in
1804 CHAR_RANGES). */
1805 for (i = 0; i < n_char_ranges; i += 2)
1806 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
9af7511a 1807 break;
b7dbcc19
KH
1808 if (!(negate ^ (i < n_char_ranges)))
1809 break;
9af7511a 1810 }
327719ee 1811 fwd_ok:
8f924df7 1812 p += nbytes, pos++, pos_byte += nbytes;
b7dbcc19
KH
1813 }
1814 else
8f924df7
KH
1815 while (1)
1816 {
1817 if (p >= stop)
9af7511a 1818 {
8f924df7 1819 if (p >= endp)
9af7511a 1820 break;
8f924df7
KH
1821 p = GAP_END_ADDR;
1822 stop = endp;
9af7511a 1823 }
327719ee
MB
1824
1825 if (!NILP (iso_classes) && in_classes (*p, iso_classes))
1826 {
1827 if (negate)
1828 break;
1829 else
1830 goto fwd_unibyte_ok;
1831 }
1832
8f924df7
KH
1833 if (!fastmap[*p])
1834 break;
327719ee 1835 fwd_unibyte_ok:
8f924df7
KH
1836 p++, pos++, pos_byte++;
1837 }
195d1361
RS
1838 }
1839 else
1840 {
b7dbcc19 1841 if (multibyte)
8f924df7 1842 while (1)
b7dbcc19 1843 {
8f924df7 1844 unsigned char *prev_p;
9af7511a 1845
8f924df7 1846 if (p <= stop)
9af7511a 1847 {
8f924df7 1848 if (p <= endp)
9af7511a 1849 break;
8f924df7
KH
1850 p = GPT_ADDR;
1851 stop = endp;
9af7511a 1852 }
8f924df7
KH
1853 prev_p = p;
1854 while (--p >= stop && ! CHAR_HEAD_P (*p));
62a6e103 1855 c = STRING_CHAR (p);
a1bc88d4 1856
327719ee
MB
1857 if (! NILP (iso_classes) && in_classes (c, iso_classes))
1858 {
1859 if (negate)
1860 break;
9690d026 1861 else
327719ee 1862 goto back_ok;
7e68b0ea 1863 }
a1bc88d4 1864
8f924df7 1865 if (! fastmap[*p])
b7dbcc19
KH
1866 break;
1867 if (! ASCII_CHAR_P (c))
7e68b0ea 1868 {
b7dbcc19
KH
1869 /* See the comment in the previous similar code. */
1870 for (i = 0; i < n_char_ranges; i += 2)
1871 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1872 break;
1873 if (!(negate ^ (i < n_char_ranges)))
1874 break;
7e68b0ea 1875 }
327719ee 1876 back_ok:
8f924df7 1877 pos--, pos_byte -= prev_p - p;
b7dbcc19
KH
1878 }
1879 else
8f924df7
KH
1880 while (1)
1881 {
1882 if (p <= stop)
9af7511a 1883 {
8f924df7 1884 if (p <= endp)
9af7511a 1885 break;
8f924df7
KH
1886 p = GPT_ADDR;
1887 stop = endp;
9af7511a 1888 }
e39091b6 1889
327719ee
MB
1890 if (! NILP (iso_classes) && in_classes (p[-1], iso_classes))
1891 {
1892 if (negate)
1893 break;
9af7511a 1894 else
327719ee
MB
1895 goto back_unibyte_ok;
1896 }
a1bc88d4 1897
8f924df7
KH
1898 if (!fastmap[p[-1]])
1899 break;
327719ee 1900 back_unibyte_ok:
8f924df7
KH
1901 p--, pos--, pos_byte--;
1902 }
195d1361 1903 }
7e68b0ea 1904
b7dbcc19
KH
1905 SET_PT_BOTH (pos, pos_byte);
1906 immediate_quit = 0;
1907
1908 return make_number (PT - start_point);
1909 }
1910}
7e68b0ea 1911
b7dbcc19
KH
1912
1913static Lisp_Object
971de7fb 1914skip_syntaxes (int forwardp, Lisp_Object string, Lisp_Object lim)
b7dbcc19
KH
1915{
1916 register unsigned int c;
1917 unsigned char fastmap[0400];
1918 int negate = 0;
4f3a2f8d 1919 register EMACS_INT i, i_byte;
b7dbcc19 1920 int multibyte;
4f3a2f8d 1921 EMACS_INT size_byte;
b7dbcc19
KH
1922 unsigned char *str;
1923
1924 CHECK_STRING (string);
1925
1926 if (NILP (lim))
1927 XSETINT (lim, forwardp ? ZV : BEGV);
1928 else
1929 CHECK_NUMBER_COERCE_MARKER (lim);
1930
1931 /* In any case, don't allow scan outside bounds of buffer. */
1932 if (XINT (lim) > ZV)
1933 XSETFASTINT (lim, ZV);
1934 if (XINT (lim) < BEGV)
1935 XSETFASTINT (lim, BEGV);
1936
ade8ee9e 1937 if (forwardp ? (PT >= XFASTINT (lim)) : (PT <= XFASTINT (lim)))
49feb1cd 1938 return make_number (0);
8c6e735b 1939
4b4deea2 1940 multibyte = (!NILP (BVAR (current_buffer, enable_multibyte_characters))
92eaa22e 1941 && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
b7dbcc19 1942
72af86bd 1943 memset (fastmap, 0, sizeof fastmap);
b7dbcc19 1944
8f924df7
KH
1945 if (SBYTES (string) > SCHARS (string))
1946 /* As this is very rare case (syntax spec is ASCII only), don't
1947 consider efficiency. */
bc796a59
KH
1948 string = string_make_unibyte (string);
1949
8f924df7
KH
1950 str = SDATA (string);
1951 size_byte = SBYTES (string);
bc796a59 1952
b7dbcc19
KH
1953 i_byte = 0;
1954 if (i_byte < size_byte
8f924df7 1955 && SREF (string, 0) == '^')
b7dbcc19
KH
1956 {
1957 negate = 1; i_byte++;
1958 }
1959
b7dbcc19
KH
1960 /* Find the syntaxes specified and set their elements of fastmap. */
1961
1962 while (i_byte < size_byte)
1963 {
1964 c = str[i_byte++];
8f924df7 1965 fastmap[syntax_spec_code[c]] = 1;
b7dbcc19 1966 }
195d1361 1967
9690d026 1968 /* If ^ was the first character, complement the fastmap. */
195d1361
RS
1969 if (negate)
1970 for (i = 0; i < sizeof fastmap; i++)
9690d026 1971 fastmap[i] ^= 1;
195d1361
RS
1972
1973 {
4f3a2f8d
EZ
1974 EMACS_INT start_point = PT;
1975 EMACS_INT pos = PT;
1976 EMACS_INT pos_byte = PT_BYTE;
8f924df7
KH
1977 unsigned char *p = PT_ADDR, *endp, *stop;
1978
1979 if (forwardp)
1980 {
1981 endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
1982 stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
1983 }
1984 else
1985 {
1986 endp = CHAR_POS_ADDR (XINT (lim));
1987 stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
1988 }
195d1361
RS
1989
1990 immediate_quit = 1;
b7dbcc19
KH
1991 SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
1992 if (forwardp)
195d1361 1993 {
b7dbcc19 1994 if (multibyte)
195d1361 1995 {
8f924df7 1996 while (1)
8c6e735b 1997 {
8f924df7
KH
1998 int nbytes;
1999
2000 if (p >= stop)
2001 {
2002 if (p >= endp)
2003 break;
2004 p = GAP_END_ADDR;
2005 stop = endp;
2006 }
62a6e103 2007 c = STRING_CHAR_AND_LENGTH (p, nbytes);
8f924df7 2008 if (! fastmap[(int) SYNTAX (c)])
8c6e735b 2009 break;
8f924df7 2010 p += nbytes, pos++, pos_byte += nbytes;
8c6e735b
KH
2011 UPDATE_SYNTAX_TABLE_FORWARD (pos);
2012 }
195d1361
RS
2013 }
2014 else
2015 {
8c6e735b 2016 while (1)
7e68b0ea 2017 {
8f924df7
KH
2018 if (p >= stop)
2019 {
2020 if (p >= endp)
2021 break;
2022 p = GAP_END_ADDR;
2023 stop = endp;
2024 }
2025 if (! fastmap[(int) SYNTAX (*p)])
8c6e735b 2026 break;
8f924df7 2027 p++, pos++, pos_byte++;
b7dbcc19 2028 UPDATE_SYNTAX_TABLE_FORWARD (pos);
195d1361
RS
2029 }
2030 }
2031 }
2032 else
2033 {
b7dbcc19 2034 if (multibyte)
195d1361 2035 {
8c6e735b 2036 while (1)
b7dbcc19 2037 {
8f924df7
KH
2038 unsigned char *prev_p;
2039
2040 if (p <= stop)
b7dbcc19 2041 {
8f924df7
KH
2042 if (p <= endp)
2043 break;
2044 p = GPT_ADDR;
2045 stop = endp;
b7dbcc19 2046 }
262be72a 2047 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
8f924df7
KH
2048 prev_p = p;
2049 while (--p >= stop && ! CHAR_HEAD_P (*p));
62a6e103 2050 c = STRING_CHAR (p);
8f924df7
KH
2051 if (! fastmap[(int) SYNTAX (c)])
2052 break;
2053 pos--, pos_byte -= prev_p - p;
b7dbcc19 2054 }
195d1361
RS
2055 }
2056 else
2057 {
8c6e735b
KH
2058 while (1)
2059 {
8f924df7
KH
2060 if (p <= stop)
2061 {
2062 if (p <= endp)
2063 break;
2064 p = GPT_ADDR;
2065 stop = endp;
2066 }
102f6132 2067 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
8f924df7 2068 if (! fastmap[(int) SYNTAX (p[-1])])
8c6e735b 2069 break;
8f924df7 2070 p--, pos--, pos_byte--;
8c6e735b 2071 }
195d1361
RS
2072 }
2073 }
6a140a74
RS
2074
2075 SET_PT_BOTH (pos, pos_byte);
195d1361
RS
2076 immediate_quit = 0;
2077
2078 return make_number (PT - start_point);
2079 }
2080}
a1bc88d4
RS
2081
2082/* Return 1 if character C belongs to one of the ISO classes
2083 in the list ISO_CLASSES. Each class is represented by an
2084 integer which is its type according to re_wctype. */
2085
2086static int
971de7fb 2087in_classes (int c, Lisp_Object iso_classes)
a1bc88d4
RS
2088{
2089 int fits_class = 0;
2090
618db430 2091 while (CONSP (iso_classes))
a1bc88d4
RS
2092 {
2093 Lisp_Object elt;
2094 elt = XCAR (iso_classes);
2095 iso_classes = XCDR (iso_classes);
2096
2097 if (re_iswctype (c, XFASTINT (elt)))
2098 fits_class = 1;
2099 }
2100
2101 return fits_class;
2102}
195d1361 2103\f
95ff8dfc
RS
2104/* Jump over a comment, assuming we are at the beginning of one.
2105 FROM is the current position.
2106 FROM_BYTE is the bytepos corresponding to FROM.
2107 Do not move past STOP (a charpos).
2108 The comment over which we have to jump is of style STYLE
c5683ceb 2109 (either SYNTAX_FLAGS_COMMENT_STYLE(foo) or ST_COMMENT_STYLE).
95ff8dfc
RS
2110 NESTING should be positive to indicate the nesting at the beginning
2111 for nested comments and should be zero or negative else.
2112 ST_COMMENT_STYLE cannot be nested.
2113 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
2114 (or 0 If the search cannot start in the middle of a two-character).
2115
2116 If successful, return 1 and store the charpos of the comment's end
2117 into *CHARPOS_PTR and the corresponding bytepos into *BYTEPOS_PTR.
2118 Else, return 0 and store the charpos STOP into *CHARPOS_PTR, the
2119 corresponding bytepos into *BYTEPOS_PTR and the current nesting
2120 (as defined for state.incomment) in *INCOMMENT_PTR.
2121
2122 The comment end is the last character of the comment rather than the
2123 character just after the comment.
2124
2125 Global syntax data is assumed to initially be valid for FROM and
2126 remains valid for forward search starting at the returned position. */
2127
2128static int
dd4c5104
DN
2129forw_comment (EMACS_INT from, EMACS_INT from_byte, EMACS_INT stop,
2130 int nesting, int style, int prev_syntax,
2131 EMACS_INT *charpos_ptr, EMACS_INT *bytepos_ptr,
2132 int *incomment_ptr)
95ff8dfc
RS
2133{
2134 register int c, c1;
2135 register enum syntaxcode code;
c5683ceb 2136 register int syntax, other_syntax;
95ff8dfc
RS
2137
2138 if (nesting <= 0) nesting = -1;
2139
2140 /* Enter the loop in the middle so that we find
2141 a 2-char comment ender if we start in the middle of it. */
2142 syntax = prev_syntax;
2143 if (syntax != 0) goto forw_incomment;
2144
2145 while (1)
2146 {
2147 if (from == stop)
2148 {
2149 *incomment_ptr = nesting;
2150 *charpos_ptr = from;
2151 *bytepos_ptr = from_byte;
2152 return 0;
2153 }
8f924df7 2154 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
95ff8dfc
RS
2155 syntax = SYNTAX_WITH_FLAGS (c);
2156 code = syntax & 0xff;
2157 if (code == Sendcomment
c5683ceb 2158 && SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == style
abf8a9ff
SM
2159 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ?
2160 (nesting > 0 && --nesting == 0) : nesting < 0))
95ff8dfc
RS
2161 /* we have encountered a comment end of the same style
2162 as the comment sequence which began this comment
2163 section */
2164 break;
2165 if (code == Scomment_fence
2166 && style == ST_COMMENT_STYLE)
2167 /* we have encountered a comment end of the same style
2168 as the comment sequence which began this comment
2169 section. */
2170 break;
2171 if (nesting > 0
2172 && code == Scomment
abf8a9ff 2173 && SYNTAX_FLAGS_COMMENT_NESTED (syntax)
c5683ceb 2174 && SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == style)
95ff8dfc
RS
2175 /* we have encountered a nested comment of the same style
2176 as the comment sequence which began this comment section */
2177 nesting++;
2178 INC_BOTH (from, from_byte);
2179 UPDATE_SYNTAX_TABLE_FORWARD (from);
7d0393cf 2180
95ff8dfc
RS
2181 forw_incomment:
2182 if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
8f924df7 2183 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
c5683ceb
SM
2184 other_syntax = SYNTAX_WITH_FLAGS (c1),
2185 SYNTAX_FLAGS_COMEND_SECOND (other_syntax))
2186 && SYNTAX_FLAGS_COMMENT_STYLE (syntax, other_syntax) == style
abf8a9ff 2187 && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
c5683ceb
SM
2188 SYNTAX_FLAGS_COMMENT_NESTED (other_syntax))
2189 ? nesting > 0 : nesting < 0))
4ffe723b
GM
2190 {
2191 if (--nesting <= 0)
2192 /* we have encountered a comment end of the same style
2193 as the comment sequence which began this comment
2194 section */
2195 break;
2196 else
2197 {
2198 INC_BOTH (from, from_byte);
2199 UPDATE_SYNTAX_TABLE_FORWARD (from);
2200 }
2201 }
95ff8dfc
RS
2202 if (nesting > 0
2203 && from < stop
2204 && SYNTAX_FLAGS_COMSTART_FIRST (syntax)
8f924df7 2205 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
c5683ceb
SM
2206 other_syntax = SYNTAX_WITH_FLAGS (c1),
2207 SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax) == style
2208 && SYNTAX_FLAGS_COMSTART_SECOND (other_syntax))
abf8a9ff 2209 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
c5683ceb 2210 SYNTAX_FLAGS_COMMENT_NESTED (other_syntax)))
95ff8dfc
RS
2211 /* we have encountered a nested comment of the same style
2212 as the comment sequence which began this comment
2213 section */
2214 {
2215 INC_BOTH (from, from_byte);
2216 UPDATE_SYNTAX_TABLE_FORWARD (from);
2217 nesting++;
2218 }
2219 }
2220 *charpos_ptr = from;
2221 *bytepos_ptr = from_byte;
2222 return 1;
2223}
2224
b3cfe0c8 2225DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
177c0ea7 2226 doc: /*
ee648c11 2227Move forward across up to COUNT comments. If COUNT is negative, move backward.
fdb82f93
PJ
2228Stop scanning if we find something other than a comment or whitespace.
2229Set point to where scanning stops.
ee648c11 2230If COUNT comments are found as expected, with nothing except whitespace
fdb82f93 2231between them, return t; otherwise return nil. */)
5842a27b 2232 (Lisp_Object count)
b3cfe0c8 2233{
2312c580
SM
2234 register EMACS_INT from;
2235 EMACS_INT from_byte;
2236 register EMACS_INT stop;
8ea151b2 2237 register int c, c1;
b3cfe0c8
RS
2238 register enum syntaxcode code;
2239 int comstyle = 0; /* style of comment encountered */
95ff8dfc 2240 int comnested = 0; /* whether the comment is nestable or not */
be720845 2241 int found;
2312c580
SM
2242 EMACS_INT count1;
2243 EMACS_INT out_charpos, out_bytepos;
95ff8dfc 2244 int dummy;
840f481c 2245
b7826503 2246 CHECK_NUMBER (count);
840f481c 2247 count1 = XINT (count);
195d1361 2248 stop = count1 > 0 ? ZV : BEGV;
b3cfe0c8
RS
2249
2250 immediate_quit = 1;
2251 QUIT;
2252
2253 from = PT;
6a140a74 2254 from_byte = PT_BYTE;
b3cfe0c8 2255
195d1361 2256 SETUP_SYNTAX_TABLE (from, count1);
840f481c 2257 while (count1 > 0)
b3cfe0c8 2258 {
04882296 2259 do
b3cfe0c8 2260 {
c5683ceb 2261 int comstart_first, syntax, other_syntax;
f902a008 2262
04882296
KH
2263 if (from == stop)
2264 {
ef316cf0 2265 SET_PT_BOTH (from, from_byte);
b7e6e612 2266 immediate_quit = 0;
04882296
KH
2267 return Qnil;
2268 }
b7dbcc19 2269 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
c5683ceb 2270 syntax = SYNTAX_WITH_FLAGS (c);
b3cfe0c8 2271 code = SYNTAX (c);
c5683ceb
SM
2272 comstart_first = SYNTAX_FLAGS_COMSTART_FIRST (syntax);
2273 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2274 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
6a140a74 2275 INC_BOTH (from, from_byte);
f902a008 2276 UPDATE_SYNTAX_TABLE_FORWARD (from);
f902a008 2277 if (from < stop && comstart_first
b7dbcc19 2278 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
c5683ceb
SM
2279 other_syntax = SYNTAX_WITH_FLAGS (c1),
2280 SYNTAX_FLAGS_COMSTART_SECOND (other_syntax)))
b3cfe0c8 2281 {
7d0393cf 2282 /* We have encountered a comment start sequence and we
7fc8191e 2283 are ignoring all text inside comments. We must record
b3cfe0c8
RS
2284 the comment style this sequence begins so that later,
2285 only a comment end of the same style actually ends
7fc8191e 2286 the comment section. */
b3cfe0c8 2287 code = Scomment;
c5683ceb
SM
2288 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2289 comnested
2290 = comnested || SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
6a140a74 2291 INC_BOTH (from, from_byte);
95ff8dfc 2292 UPDATE_SYNTAX_TABLE_FORWARD (from);
b3cfe0c8 2293 }
04882296 2294 }
abf8a9ff 2295 while (code == Swhitespace || (code == Sendcomment && c == '\n'));
6a140a74 2296
da71ab91
KH
2297 if (code == Scomment_fence)
2298 comstyle = ST_COMMENT_STYLE;
2299 else if (code != Scomment)
04882296
KH
2300 {
2301 immediate_quit = 0;
6a140a74 2302 DEC_BOTH (from, from_byte);
ef316cf0 2303 SET_PT_BOTH (from, from_byte);
04882296
KH
2304 return Qnil;
2305 }
2306 /* We're at the start of a comment. */
95ff8dfc
RS
2307 found = forw_comment (from, from_byte, stop, comnested, comstyle, 0,
2308 &out_charpos, &out_bytepos, &dummy);
2309 from = out_charpos; from_byte = out_bytepos;
2310 if (!found)
04882296 2311 {
95ff8dfc
RS
2312 immediate_quit = 0;
2313 SET_PT_BOTH (from, from_byte);
2314 return Qnil;
b3cfe0c8 2315 }
95ff8dfc
RS
2316 INC_BOTH (from, from_byte);
2317 UPDATE_SYNTAX_TABLE_FORWARD (from);
04882296 2318 /* We have skipped one comment. */
840f481c 2319 count1--;
b3cfe0c8
RS
2320 }
2321
840f481c 2322 while (count1 < 0)
b3cfe0c8 2323 {
b9145dbb 2324 while (1)
b3cfe0c8 2325 {
c5683ceb 2326 int quoted, syntax;
f902a008 2327
b9145dbb
RS
2328 if (from <= stop)
2329 {
6a140a74 2330 SET_PT_BOTH (BEGV, BEGV_BYTE);
b9145dbb
RS
2331 immediate_quit = 0;
2332 return Qnil;
2333 }
b3cfe0c8 2334
6a140a74 2335 DEC_BOTH (from, from_byte);
f902a008 2336 /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */
6a140a74 2337 quoted = char_quoted (from, from_byte);
b7dbcc19 2338 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
c5683ceb 2339 syntax = SYNTAX_WITH_FLAGS (c);
b3cfe0c8
RS
2340 code = SYNTAX (c);
2341 comstyle = 0;
c5683ceb 2342 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
7fc8191e 2343 if (code == Sendcomment)
c5683ceb
SM
2344 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2345 if (from > stop && SYNTAX_FLAGS_COMEND_SECOND (syntax)
f902a008 2346 && prev_char_comend_first (from, from_byte)
89c6809a 2347 && !char_quoted (from - 1, dec_bytepos (from_byte)))
b3cfe0c8 2348 {
c5683ceb 2349 int other_syntax;
7fc8191e 2350 /* We must record the comment style encountered so that
b3cfe0c8 2351 later, we can match only the proper comment begin
7fc8191e 2352 sequence of the same style. */
6a140a74 2353 DEC_BOTH (from, from_byte);
f902a008
RS
2354 code = Sendcomment;
2355 /* Calling char_quoted, above, set up global syntax position
2356 at the new value of FROM. */
b7dbcc19 2357 c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
c5683ceb
SM
2358 other_syntax = SYNTAX_WITH_FLAGS (c1);
2359 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2360 comnested
2361 = comnested || SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
b3cfe0c8
RS
2362 }
2363
195d1361
RS
2364 if (code == Scomment_fence)
2365 {
2366 /* Skip until first preceding unquoted comment_fence. */
01f44d5a 2367 int fence_found = 0;
4f3a2f8d 2368 EMACS_INT ini = from, ini_byte = from_byte;
7d0393cf 2369
6a140a74 2370 while (1)
195d1361 2371 {
6a140a74 2372 DEC_BOTH (from, from_byte);
195d1361 2373 UPDATE_SYNTAX_TABLE_BACKWARD (from);
b7dbcc19 2374 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
6a140a74 2375 if (SYNTAX (c) == Scomment_fence
7d0393cf 2376 && !char_quoted (from, from_byte))
195d1361 2377 {
01f44d5a 2378 fence_found = 1;
195d1361
RS
2379 break;
2380 }
fcdd4585
SM
2381 else if (from == stop)
2382 break;
195d1361 2383 }
01f44d5a 2384 if (fence_found == 0)
195d1361
RS
2385 {
2386 from = ini; /* Set point to ini + 1. */
6a140a74 2387 from_byte = ini_byte;
195d1361
RS
2388 goto leave;
2389 }
39c41ad4
SM
2390 else
2391 /* We have skipped one comment. */
2392 break;
195d1361
RS
2393 }
2394 else if (code == Sendcomment)
b3cfe0c8 2395 {
95ff8dfc 2396 found = back_comment (from, from_byte, stop, comnested, comstyle,
6a140a74 2397 &out_charpos, &out_bytepos);
1aa963c8
SM
2398 if (found == -1)
2399 {
abf8a9ff
SM
2400 if (c == '\n')
2401 /* This end-of-line is not an end-of-comment.
2402 Treat it like a whitespace.
2403 CC-mode (and maybe others) relies on this behavior. */
2404 ;
2405 else
2406 {
2407 /* Failure: we should go back to the end of this
2408 not-quite-endcomment. */
c5683ceb 2409 if (SYNTAX (c) != code)
abf8a9ff
SM
2410 /* It was a two-char Sendcomment. */
2411 INC_BOTH (from, from_byte);
2412 goto leave;
2413 }
1aa963c8 2414 }
2b927d02 2415 else
abf8a9ff
SM
2416 {
2417 /* We have skipped one comment. */
2418 from = out_charpos, from_byte = out_bytepos;
2419 break;
2420 }
b3cfe0c8 2421 }
bb0de084 2422 else if (code != Swhitespace || quoted)
b3cfe0c8 2423 {
195d1361 2424 leave:
b3cfe0c8 2425 immediate_quit = 0;
6a140a74 2426 INC_BOTH (from, from_byte);
ef316cf0 2427 SET_PT_BOTH (from, from_byte);
b3cfe0c8
RS
2428 return Qnil;
2429 }
2430 }
2431
840f481c 2432 count1++;
b3cfe0c8
RS
2433 }
2434
ef316cf0 2435 SET_PT_BOTH (from, from_byte);
b3cfe0c8
RS
2436 immediate_quit = 0;
2437 return Qt;
2438}
2439\f
b7dbcc19 2440/* Return syntax code of character C if C is an ASCII character
db9f48c3 2441 or `multibyte_symbol_p' is zero. Otherwise, return Ssymbol. */
bd25db08 2442
b7dbcc19
KH
2443#define SYNTAX_WITH_MULTIBYTE_CHECK(c) \
2444 ((ASCII_CHAR_P (c) || !multibyte_symbol_p) \
bd25db08
KH
2445 ? SYNTAX (c) : Ssymbol)
2446
6a140a74 2447static Lisp_Object
971de7fb 2448scan_lists (register EMACS_INT from, EMACS_INT count, EMACS_INT depth, int sexpflag)
8489eb67
RS
2449{
2450 Lisp_Object val;
932e6895 2451 register EMACS_INT stop = count > 0 ? ZV : BEGV;
93da5fff
KH
2452 register int c, c1;
2453 int stringterm;
8489eb67
RS
2454 int quoted;
2455 int mathexit = 0;
93da5fff 2456 register enum syntaxcode code, temp_code;
195d1361 2457 int min_depth = depth; /* Err out if depth gets less than this. */
e5d4f4dc 2458 int comstyle = 0; /* style of comment encountered */
95ff8dfc 2459 int comnested = 0; /* whether the comment is nestable or not */
932e6895
SM
2460 EMACS_INT temp_pos;
2461 EMACS_INT last_good = from;
195d1361 2462 int found;
932e6895
SM
2463 EMACS_INT from_byte;
2464 EMACS_INT out_bytepos, out_charpos;
95ff8dfc 2465 int temp, dummy;
bd25db08 2466 int multibyte_symbol_p = sexpflag && multibyte_syntax_as_symbol;
8489eb67
RS
2467
2468 if (depth > 0) min_depth = 0;
2469
c3907a7e
RS
2470 if (from > ZV) from = ZV;
2471 if (from < BEGV) from = BEGV;
2472
2473 from_byte = CHAR_TO_BYTE (from);
2474
8489eb67
RS
2475 immediate_quit = 1;
2476 QUIT;
2477
195d1361 2478 SETUP_SYNTAX_TABLE (from, count);
8489eb67
RS
2479 while (count > 0)
2480 {
8489eb67
RS
2481 while (from < stop)
2482 {
c5683ceb 2483 int comstart_first, prefix, syntax, other_syntax;
195d1361 2484 UPDATE_SYNTAX_TABLE_FORWARD (from);
b7dbcc19 2485 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
c5683ceb 2486 syntax = SYNTAX_WITH_FLAGS (c);
bd25db08 2487 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
c5683ceb
SM
2488 comstart_first = SYNTAX_FLAGS_COMSTART_FIRST (syntax);
2489 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2490 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2491 prefix = SYNTAX_FLAGS_PREFIX (syntax);
7bf5e9e4
RS
2492 if (depth == min_depth)
2493 last_good = from;
6a140a74 2494 INC_BOTH (from, from_byte);
195d1361 2495 UPDATE_SYNTAX_TABLE_FORWARD (from);
f902a008 2496 if (from < stop && comstart_first
327719ee 2497 && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
c5683ceb
SM
2498 other_syntax = SYNTAX_WITH_FLAGS (c),
2499 SYNTAX_FLAGS_COMSTART_SECOND (other_syntax))
8489eb67 2500 && parse_sexp_ignore_comments)
e5d4f4dc 2501 {
7d0393cf 2502 /* we have encountered a comment start sequence and we
195d1361 2503 are ignoring all text inside comments. We must record
e5d4f4dc
RS
2504 the comment style this sequence begins so that later,
2505 only a comment end of the same style actually ends
2506 the comment section */
2507 code = Scomment;
c5683ceb
SM
2508 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2509 comnested
2510 = comnested || SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
6a140a74 2511 INC_BOTH (from, from_byte);
f902a008 2512 UPDATE_SYNTAX_TABLE_FORWARD (from);
e5d4f4dc 2513 }
7d0393cf 2514
f902a008 2515 if (prefix)
8489eb67
RS
2516 continue;
2517
0220c518 2518 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
2519 {
2520 case Sescape:
2521 case Scharquote:
8b8bf8e6
SM
2522 if (from == stop)
2523 goto lose;
6a140a74 2524 INC_BOTH (from, from_byte);
8489eb67
RS
2525 /* treat following character as a word constituent */
2526 case Sword:
2527 case Ssymbol:
2528 if (depth || !sexpflag) break;
195d1361 2529 /* This word counts as a sexp; return at end of it. */
8489eb67
RS
2530 while (from < stop)
2531 {
195d1361 2532 UPDATE_SYNTAX_TABLE_FORWARD (from);
7c7ca3d2
RS
2533
2534 /* Some compilers can't handle this inside the switch. */
b7dbcc19 2535 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
bd25db08 2536 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
7c7ca3d2 2537 switch (temp)
8489eb67
RS
2538 {
2539 case Scharquote:
2540 case Sescape:
6a140a74 2541 INC_BOTH (from, from_byte);
8b8bf8e6
SM
2542 if (from == stop)
2543 goto lose;
8489eb67
RS
2544 break;
2545 case Sword:
2546 case Ssymbol:
2547 case Squote:
2548 break;
2549 default:
2550 goto done;
2551 }
6a140a74 2552 INC_BOTH (from, from_byte);
8489eb67
RS
2553 }
2554 goto done;
2555
195d1361 2556 case Scomment_fence:
95ff8dfc
RS
2557 comstyle = ST_COMMENT_STYLE;
2558 /* FALLTHROUGH */
2559 case Scomment:
8489eb67 2560 if (!parse_sexp_ignore_comments) break;
95ff8dfc
RS
2561 UPDATE_SYNTAX_TABLE_FORWARD (from);
2562 found = forw_comment (from, from_byte, stop,
2563 comnested, comstyle, 0,
2564 &out_charpos, &out_bytepos, &dummy);
2565 from = out_charpos, from_byte = out_bytepos;
2566 if (!found)
8489eb67 2567 {
95ff8dfc
RS
2568 if (depth == 0)
2569 goto done;
2570 goto lose;
8489eb67 2571 }
95ff8dfc
RS
2572 INC_BOTH (from, from_byte);
2573 UPDATE_SYNTAX_TABLE_FORWARD (from);
8489eb67
RS
2574 break;
2575
2576 case Smath:
2577 if (!sexpflag)
2578 break;
b7dbcc19 2579 if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (from_byte))
6a140a74
RS
2580 {
2581 INC_BOTH (from, from_byte);
2582 }
8489eb67
RS
2583 if (mathexit)
2584 {
2585 mathexit = 0;
2586 goto close1;
2587 }
2588 mathexit = 1;
2589
2590 case Sopen:
2591 if (!++depth) goto done;
2592 break;
2593
2594 case Sclose:
2595 close1:
2596 if (!--depth) goto done;
2597 if (depth < min_depth)
9471945b
KS
2598 xsignal3 (Qscan_error,
2599 build_string ("Containing expression ends prematurely"),
2600 make_number (last_good), make_number (from));
8489eb67
RS
2601 break;
2602
2603 case Sstring:
195d1361 2604 case Sstring_fence:
ef316cf0 2605 temp_pos = dec_bytepos (from_byte);
b7dbcc19 2606 stringterm = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
8489eb67
RS
2607 while (1)
2608 {
8b8bf8e6
SM
2609 if (from >= stop)
2610 goto lose;
195d1361 2611 UPDATE_SYNTAX_TABLE_FORWARD (from);
b7dbcc19 2612 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
42ebfa31
SM
2613 if (code == Sstring
2614 ? (c == stringterm
2615 && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
bd25db08 2616 : SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring_fence)
195d1361 2617 break;
7c7ca3d2
RS
2618
2619 /* Some compilers can't handle this inside the switch. */
bd25db08 2620 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
7c7ca3d2 2621 switch (temp)
8489eb67
RS
2622 {
2623 case Scharquote:
2624 case Sescape:
6a140a74 2625 INC_BOTH (from, from_byte);
8489eb67 2626 }
6a140a74 2627 INC_BOTH (from, from_byte);
8489eb67 2628 }
6a140a74 2629 INC_BOTH (from, from_byte);
8489eb67
RS
2630 if (!depth && sexpflag) goto done;
2631 break;
240c43e8
SM
2632 default:
2633 /* Ignore whitespace, punctuation, quote, endcomment. */
2634 break;
8489eb67
RS
2635 }
2636 }
2637
2638 /* Reached end of buffer. Error if within object, return nil if between */
8b8bf8e6
SM
2639 if (depth)
2640 goto lose;
8489eb67
RS
2641
2642 immediate_quit = 0;
2643 return Qnil;
2644
2645 /* End of object reached */
2646 done:
2647 count--;
2648 }
2649
2650 while (count < 0)
2651 {
8489eb67
RS
2652 while (from > stop)
2653 {
c5683ceb 2654 int syntax;
6a140a74 2655 DEC_BOTH (from, from_byte);
195d1361 2656 UPDATE_SYNTAX_TABLE_BACKWARD (from);
b7dbcc19 2657 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
c5683ceb 2658 syntax= SYNTAX_WITH_FLAGS (c);
bd25db08 2659 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
7bf5e9e4
RS
2660 if (depth == min_depth)
2661 last_good = from;
7fc8191e 2662 comstyle = 0;
c5683ceb 2663 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
7fc8191e 2664 if (code == Sendcomment)
c5683ceb
SM
2665 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2666 if (from > stop && SYNTAX_FLAGS_COMEND_SECOND (syntax)
1674d9a2 2667 && prev_char_comend_first (from, from_byte)
8489eb67 2668 && parse_sexp_ignore_comments)
e5d4f4dc 2669 {
f902a008 2670 /* We must record the comment style encountered so that
e5d4f4dc 2671 later, we can match only the proper comment begin
f902a008 2672 sequence of the same style. */
01f44d5a 2673 int c2, other_syntax;
6a140a74 2674 DEC_BOTH (from, from_byte);
f902a008
RS
2675 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2676 code = Sendcomment;
01f44d5a
PE
2677 c2 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2678 other_syntax = SYNTAX_WITH_FLAGS (c2);
c5683ceb
SM
2679 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2680 comnested
2681 = comnested || SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
e5d4f4dc 2682 }
7d0393cf 2683
9828a477 2684 /* Quoting turns anything except a comment-ender
e6365855 2685 into a word character. Note that this cannot be true
f902a008 2686 if we decremented FROM in the if-statement above. */
9828a477 2687 if (code != Sendcomment && char_quoted (from, from_byte))
240c43e8
SM
2688 {
2689 DEC_BOTH (from, from_byte);
2690 code = Sword;
2691 }
c5683ceb 2692 else if (SYNTAX_FLAGS_PREFIX (syntax))
8489eb67
RS
2693 continue;
2694
9828a477 2695 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
2696 {
2697 case Sword:
2698 case Ssymbol:
9828a477
KH
2699 case Sescape:
2700 case Scharquote:
8489eb67 2701 if (depth || !sexpflag) break;
195d1361
RS
2702 /* This word counts as a sexp; count object finished
2703 after passing it. */
8489eb67
RS
2704 while (from > stop)
2705 {
6a140a74 2706 temp_pos = from_byte;
4b4deea2 2707 if (! NILP (BVAR (current_buffer, enable_multibyte_characters)))
ef316cf0
RS
2708 DEC_POS (temp_pos);
2709 else
2710 temp_pos--;
6a140a74 2711 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
b7dbcc19 2712 c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
bd25db08 2713 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
9828a477
KH
2714 /* Don't allow comment-end to be quoted. */
2715 if (temp_code == Sendcomment)
2716 goto done2;
6a140a74 2717 quoted = char_quoted (from - 1, temp_pos);
8489eb67 2718 if (quoted)
93da5fff 2719 {
6a140a74 2720 DEC_BOTH (from, from_byte);
ef316cf0 2721 temp_pos = dec_bytepos (temp_pos);
6a140a74 2722 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
93da5fff 2723 }
b7dbcc19 2724 c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
bd25db08 2725 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
93da5fff
KH
2726 if (! (quoted || temp_code == Sword
2727 || temp_code == Ssymbol
2728 || temp_code == Squote))
8489eb67 2729 goto done2;
6a140a74 2730 DEC_BOTH (from, from_byte);
8489eb67
RS
2731 }
2732 goto done2;
2733
2734 case Smath:
2735 if (!sexpflag)
2736 break;
ef316cf0 2737 temp_pos = dec_bytepos (from_byte);
6a140a74 2738 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
b7dbcc19 2739 if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (temp_pos))
6a140a74 2740 DEC_BOTH (from, from_byte);
8489eb67
RS
2741 if (mathexit)
2742 {
2743 mathexit = 0;
2744 goto open2;
2745 }
2746 mathexit = 1;
2747
2748 case Sclose:
2749 if (!++depth) goto done2;
2750 break;
2751
2752 case Sopen:
2753 open2:
2754 if (!--depth) goto done2;
2755 if (depth < min_depth)
9471945b
KS
2756 xsignal3 (Qscan_error,
2757 build_string ("Containing expression ends prematurely"),
2758 make_number (last_good), make_number (from));
8489eb67
RS
2759 break;
2760
2761 case Sendcomment:
2762 if (!parse_sexp_ignore_comments)
2763 break;
95ff8dfc 2764 found = back_comment (from, from_byte, stop, comnested, comstyle,
6a140a74 2765 &out_charpos, &out_bytepos);
1aa963c8
SM
2766 /* FIXME: if found == -1, then it really wasn't a comment-end.
2767 For single-char Sendcomment, we can't do much about it apart
2768 from skipping the char.
2769 For 2-char endcomments, we could try again, taking both
2770 chars as separate entities, but it's a lot of trouble
2771 for very little gain, so we don't bother either. -sm */
6a140a74
RS
2772 if (found != -1)
2773 from = out_charpos, from_byte = out_bytepos;
8489eb67
RS
2774 break;
2775
195d1361
RS
2776 case Scomment_fence:
2777 case Sstring_fence:
2778 while (1)
2779 {
8b8bf8e6
SM
2780 if (from == stop)
2781 goto lose;
6b61353c 2782 DEC_BOTH (from, from_byte);
195d1361 2783 UPDATE_SYNTAX_TABLE_BACKWARD (from);
7d0393cf 2784 if (!char_quoted (from, from_byte)
b7dbcc19 2785 && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
bd25db08 2786 SYNTAX_WITH_MULTIBYTE_CHECK (c) == code))
195d1361
RS
2787 break;
2788 }
2789 if (code == Sstring_fence && !depth && sexpflag) goto done2;
2790 break;
7d0393cf 2791
8489eb67 2792 case Sstring:
b7dbcc19 2793 stringterm = FETCH_CHAR_AS_MULTIBYTE (from_byte);
8489eb67
RS
2794 while (1)
2795 {
8b8bf8e6
SM
2796 if (from == stop)
2797 goto lose;
6b61353c
KH
2798 DEC_BOTH (from, from_byte);
2799 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2800 if (!char_quoted (from, from_byte)
2801 && (stringterm
2802 == (c = FETCH_CHAR_AS_MULTIBYTE (from_byte)))
42ebfa31 2803 && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
8489eb67 2804 break;
8489eb67 2805 }
8489eb67
RS
2806 if (!depth && sexpflag) goto done2;
2807 break;
240c43e8
SM
2808 default:
2809 /* Ignore whitespace, punctuation, quote, endcomment. */
2810 break;
8489eb67
RS
2811 }
2812 }
2813
2814 /* Reached start of buffer. Error if within object, return nil if between */
8b8bf8e6
SM
2815 if (depth)
2816 goto lose;
8489eb67
RS
2817
2818 immediate_quit = 0;
2819 return Qnil;
2820
2821 done2:
2822 count++;
2823 }
2824
2825
2826 immediate_quit = 0;
1e142fb7 2827 XSETFASTINT (val, from);
8489eb67
RS
2828 return val;
2829
2830 lose:
9471945b
KS
2831 xsignal3 (Qscan_error,
2832 build_string ("Unbalanced parentheses"),
2833 make_number (last_good), make_number (from));
8489eb67
RS
2834}
2835
8489eb67 2836DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
fdb82f93
PJ
2837 doc: /* Scan from character number FROM by COUNT lists.
2838Returns the character number of the position thus found.
2839
2840If DEPTH is nonzero, paren depth begins counting from that value,
2841only places where the depth in parentheses becomes zero
2842are candidates for stopping; COUNT such places are counted.
2843Thus, a positive value for DEPTH means go out levels.
2844
2845Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2846
2847If the beginning or end of (the accessible part of) the buffer is reached
2848and the depth is wrong, an error is signaled.
2849If the depth is right but the count is not used up, nil is returned. */)
5842a27b 2850 (Lisp_Object from, Lisp_Object count, Lisp_Object depth)
8489eb67 2851{
b7826503
PJ
2852 CHECK_NUMBER (from);
2853 CHECK_NUMBER (count);
2854 CHECK_NUMBER (depth);
8489eb67
RS
2855
2856 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
2857}
2858
2859DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
fdb82f93
PJ
2860 doc: /* Scan from character number FROM by COUNT balanced expressions.
2861If COUNT is negative, scan backwards.
2862Returns the character number of the position thus found.
2863
2864Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2865
2866If the beginning or end of (the accessible part of) the buffer is reached
2867in the middle of a parenthetical grouping, an error is signaled.
2868If the beginning or end is reached between groupings
2869but before count is used up, nil is returned. */)
5842a27b 2870 (Lisp_Object from, Lisp_Object count)
8489eb67 2871{
b7826503
PJ
2872 CHECK_NUMBER (from);
2873 CHECK_NUMBER (count);
8489eb67
RS
2874
2875 return scan_lists (XINT (from), XINT (count), 0, 1);
2876}
2877
2878DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
fdb82f93
PJ
2879 0, 0, 0,
2880 doc: /* Move point backward over any number of chars with prefix syntax.
2881This includes chars with "quote" or "prefix" syntax (' or p). */)
5842a27b 2882 (void)
8489eb67 2883{
4f3a2f8d
EZ
2884 EMACS_INT beg = BEGV;
2885 EMACS_INT opoint = PT;
2886 EMACS_INT opoint_byte = PT_BYTE;
2887 EMACS_INT pos = PT;
2888 EMACS_INT pos_byte = PT_BYTE;
93da5fff 2889 int c;
93da5fff 2890
7d0393cf 2891 if (pos <= beg)
195d1361 2892 {
f902a008
RS
2893 SET_PT_BOTH (opoint, opoint_byte);
2894
2895 return Qnil;
195d1361 2896 }
8489eb67 2897
f902a008
RS
2898 SETUP_SYNTAX_TABLE (pos, -1);
2899
6a140a74
RS
2900 DEC_BOTH (pos, pos_byte);
2901
1fd3172d 2902 while (!char_quoted (pos, pos_byte)
195d1361 2903 /* Previous statement updates syntax table. */
b7dbcc19 2904 && ((c = FETCH_CHAR_AS_MULTIBYTE (pos_byte), SYNTAX (c) == Squote)
93da5fff
KH
2905 || SYNTAX_PREFIX (c)))
2906 {
1fd3172d
RS
2907 opoint = pos;
2908 opoint_byte = pos_byte;
2909
2910 if (pos + 1 > beg)
2911 DEC_BOTH (pos, pos_byte);
93da5fff 2912 }
8489eb67 2913
6a140a74 2914 SET_PT_BOTH (opoint, opoint_byte);
8489eb67
RS
2915
2916 return Qnil;
2917}
2918\f
6a140a74 2919/* Parse forward from FROM / FROM_BYTE to END,
e5d4f4dc
RS
2920 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
2921 and return a description of the state of the parse at END.
c81a3712 2922 If STOPBEFORE is nonzero, stop at the start of an atom.
644ea4df
RS
2923 If COMMENTSTOP is 1, stop at the start of a comment.
2924 If COMMENTSTOP is -1, stop at the start or end of a comment,
2925 after the beginning of a string, or after the end of a string. */
8489eb67 2926
340f92b5 2927static void
d5a3eaaf
AS
2928scan_sexps_forward (struct lisp_parse_state *stateptr,
2929 EMACS_INT from, EMACS_INT from_byte, EMACS_INT end,
2930 int targetdepth, int stopbefore,
2931 Lisp_Object oldstate, int commentstop)
8489eb67
RS
2932{
2933 struct lisp_parse_state state;
2934
2935 register enum syntaxcode code;
95ff8dfc
RS
2936 int c1;
2937 int comnested;
8489eb67
RS
2938 struct level { int last, prev; };
2939 struct level levelstart[100];
2940 register struct level *curlevel = levelstart;
2941 struct level *endlevel = levelstart + 100;
8489eb67
RS
2942 register int depth; /* Paren depth of current scanning location.
2943 level - levelstart equals this except
2944 when the depth becomes negative. */
2945 int mindepth; /* Lowest DEPTH value seen. */
2946 int start_quoted = 0; /* Nonzero means starting after a char quote */
2947 Lisp_Object tem;
8e2911c2
AS
2948 EMACS_INT prev_from; /* Keep one character before FROM. */
2949 EMACS_INT prev_from_byte;
1fd3172d 2950 int prev_from_syntax;
195d1361
RS
2951 int boundary_stop = commentstop == -1;
2952 int nofence;
95ff8dfc 2953 int found;
60c86a83 2954 EMACS_INT out_bytepos, out_charpos;
7c7ca3d2 2955 int temp;
93da5fff
KH
2956
2957 prev_from = from;
6a140a74
RS
2958 prev_from_byte = from_byte;
2959 if (from != BEGV)
2960 DEC_BOTH (prev_from, prev_from_byte);
93da5fff
KH
2961
2962 /* Use this macro instead of `from++'. */
6a140a74
RS
2963#define INC_FROM \
2964do { prev_from = from; \
2965 prev_from_byte = from_byte; \
327719ee 2966 temp = FETCH_CHAR_AS_MULTIBYTE (prev_from_byte); \
ab229fdd 2967 prev_from_syntax = SYNTAX_WITH_FLAGS (temp); \
ef316cf0 2968 INC_BOTH (from, from_byte); \
3ceb4629
SM
2969 if (from < end) \
2970 UPDATE_SYNTAX_TABLE_FORWARD (from); \
6a140a74 2971 } while (0)
8489eb67
RS
2972
2973 immediate_quit = 1;
2974 QUIT;
2975
265a9e55 2976 if (NILP (oldstate))
8489eb67
RS
2977 {
2978 depth = 0;
2979 state.instring = -1;
2980 state.incomment = 0;
195d1361
RS
2981 state.comstyle = 0; /* comment style a by default. */
2982 state.comstr_start = -1; /* no comment/string seen. */
8489eb67
RS
2983 }
2984 else
2985 {
2986 tem = Fcar (oldstate);
265a9e55 2987 if (!NILP (tem))
8489eb67
RS
2988 depth = XINT (tem);
2989 else
2990 depth = 0;
2991
2992 oldstate = Fcdr (oldstate);
2993 oldstate = Fcdr (oldstate);
2994 oldstate = Fcdr (oldstate);
2995 tem = Fcar (oldstate);
195d1361 2996 /* Check whether we are inside string_fence-style string: */
7d0393cf
JB
2997 state.instring = (!NILP (tem)
2998 ? (INTEGERP (tem) ? XINT (tem) : ST_STRING_STYLE)
e76f1c44 2999 : -1);
8489eb67
RS
3000
3001 oldstate = Fcdr (oldstate);
3002 tem = Fcar (oldstate);
e76f1c44
GM
3003 state.incomment = (!NILP (tem)
3004 ? (INTEGERP (tem) ? XINT (tem) : -1)
95ff8dfc 3005 : 0);
8489eb67
RS
3006
3007 oldstate = Fcdr (oldstate);
3008 tem = Fcar (oldstate);
265a9e55 3009 start_quoted = !NILP (tem);
e5d4f4dc 3010
95ff8dfc 3011 /* if the eighth element of the list is nil, we are in comment
195d1361
RS
3012 style a. If it is non-nil, we are in comment style b */
3013 oldstate = Fcdr (oldstate);
e5d4f4dc 3014 oldstate = Fcdr (oldstate);
195d1361 3015 tem = Fcar (oldstate);
c5683ceb
SM
3016 state.comstyle = (NILP (tem)
3017 ? 0
3018 : (EQ (tem, Qsyntax_table)
3019 ? ST_COMMENT_STYLE
3020 : INTEGERP (tem) ? XINT (tem) : 1));
195d1361 3021
e5d4f4dc 3022 oldstate = Fcdr (oldstate);
e5d4f4dc 3023 tem = Fcar (oldstate);
195d1361 3024 state.comstr_start = NILP (tem) ? -1 : XINT (tem) ;
1a102a58
RS
3025 oldstate = Fcdr (oldstate);
3026 tem = Fcar (oldstate);
3027 while (!NILP (tem)) /* >= second enclosing sexps. */
3028 {
3029 /* curlevel++->last ran into compiler bug on Apollo */
3030 curlevel->last = XINT (Fcar (tem));
3031 if (++curlevel == endlevel)
02010917 3032 curlevel--; /* error ("Nesting too deep for parser"); */
1a102a58
RS
3033 curlevel->prev = -1;
3034 curlevel->last = -1;
3035 tem = Fcdr (tem);
3036 }
8489eb67
RS
3037 }
3038 state.quoted = 0;
3039 mindepth = depth;
3040
3041 curlevel->prev = -1;
3042 curlevel->last = -1;
3043
326b283b 3044 SETUP_SYNTAX_TABLE (prev_from, 1);
ab229fdd
AS
3045 temp = FETCH_CHAR (prev_from_byte);
3046 prev_from_syntax = SYNTAX_WITH_FLAGS (temp);
3ceb4629 3047 UPDATE_SYNTAX_TABLE_FORWARD (from);
326b283b 3048
195d1361 3049 /* Enter the loop at a place appropriate for initial state. */
8489eb67 3050
326b283b
RS
3051 if (state.incomment)
3052 goto startincomment;
8489eb67
RS
3053 if (state.instring >= 0)
3054 {
195d1361 3055 nofence = state.instring != ST_STRING_STYLE;
326b283b
RS
3056 if (start_quoted)
3057 goto startquotedinstring;
8489eb67
RS
3058 goto startinstring;
3059 }
326b283b
RS
3060 else if (start_quoted)
3061 goto startquoted;
1fd3172d 3062
8489eb67
RS
3063 while (from < end)
3064 {
c5683ceb 3065 int syntax;
93da5fff 3066 INC_FROM;
1fd3172d 3067 code = prev_from_syntax & 0xff;
4c920633 3068
c5d4b96f
SM
3069 if (from < end
3070 && SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax)
3071 && (c1 = FETCH_CHAR (from_byte),
c5683ceb
SM
3072 syntax = SYNTAX_WITH_FLAGS (c1),
3073 SYNTAX_FLAGS_COMSTART_SECOND (syntax)))
c5d4b96f
SM
3074 /* Duplicate code to avoid a complex if-expression
3075 which causes trouble for the SGI compiler. */
95ff8dfc 3076 {
c5d4b96f
SM
3077 /* Record the comment style we have entered so that only
3078 the comment-end sequence of the same style actually
3079 terminates the comment section. */
c5683ceb
SM
3080 state.comstyle
3081 = SYNTAX_FLAGS_COMMENT_STYLE (syntax, prev_from_syntax);
c5d4b96f 3082 comnested = SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax);
c5683ceb 3083 comnested = comnested || SYNTAX_FLAGS_COMMENT_NESTED (syntax);
c5d4b96f 3084 state.incomment = comnested ? 1 : -1;
95ff8dfc 3085 state.comstr_start = prev_from;
c5d4b96f
SM
3086 INC_FROM;
3087 code = Scomment;
95ff8dfc 3088 }
4c920633 3089 else if (code == Scomment_fence)
e5d4f4dc
RS
3090 {
3091 /* Record the comment style we have entered so that only
3092 the comment-end sequence of the same style actually
3093 terminates the comment section. */
95ff8dfc
RS
3094 state.comstyle = ST_COMMENT_STYLE;
3095 state.incomment = -1;
195d1361 3096 state.comstr_start = prev_from;
e5d4f4dc 3097 code = Scomment;
e5d4f4dc 3098 }
c5d4b96f
SM
3099 else if (code == Scomment)
3100 {
c5683ceb 3101 state.comstyle = SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax, 0);
c5d4b96f
SM
3102 state.incomment = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax) ?
3103 1 : -1);
3104 state.comstr_start = prev_from;
3105 }
e5d4f4dc 3106
1fd3172d 3107 if (SYNTAX_FLAGS_PREFIX (prev_from_syntax))
8489eb67 3108 continue;
0220c518 3109 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
3110 {
3111 case Sescape:
3112 case Scharquote:
3113 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 3114 curlevel->last = prev_from;
8489eb67
RS
3115 startquoted:
3116 if (from == end) goto endquoted;
93da5fff 3117 INC_FROM;
8489eb67
RS
3118 goto symstarted;
3119 /* treat following character as a word constituent */
3120 case Sword:
3121 case Ssymbol:
3122 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 3123 curlevel->last = prev_from;
8489eb67
RS
3124 symstarted:
3125 while (from < end)
3126 {
7c7ca3d2 3127 /* Some compilers can't handle this inside the switch. */
327719ee 3128 temp = FETCH_CHAR_AS_MULTIBYTE (from_byte);
ab229fdd 3129 temp = SYNTAX (temp);
7c7ca3d2 3130 switch (temp)
8489eb67
RS
3131 {
3132 case Scharquote:
3133 case Sescape:
93da5fff 3134 INC_FROM;
8489eb67
RS
3135 if (from == end) goto endquoted;
3136 break;
3137 case Sword:
3138 case Ssymbol:
3139 case Squote:
3140 break;
3141 default:
3142 goto symdone;
3143 }
93da5fff 3144 INC_FROM;
8489eb67
RS
3145 }
3146 symdone:
3147 curlevel->prev = curlevel->last;
3148 break;
3149
240c43e8 3150 case Scomment_fence: /* Can't happen because it's handled above. */
8489eb67 3151 case Scomment:
195d1361 3152 if (commentstop || boundary_stop) goto done;
d355bd8a
SM
3153 startincomment:
3154 /* The (from == BEGV) test was to enter the loop in the middle so
95ff8dfc 3155 that we find a 2-char comment ender even if we start in the
e6365855 3156 middle of it. We don't want to do that if we're just at the
d355bd8a 3157 beginning of the comment (think of (*) ... (*)). */
95ff8dfc
RS
3158 found = forw_comment (from, from_byte, end,
3159 state.incomment, state.comstyle,
e6365855
SM
3160 (from == BEGV || from < state.comstr_start + 3)
3161 ? 0 : prev_from_syntax,
95ff8dfc
RS
3162 &out_charpos, &out_bytepos, &state.incomment);
3163 from = out_charpos; from_byte = out_bytepos;
3164 /* Beware! prev_from and friends are invalid now.
3165 Luckily, the `done' doesn't use them and the INC_FROM
3166 sets them to a sane value without looking at them. */
3167 if (!found) goto done;
d355bd8a 3168 INC_FROM;
8489eb67 3169 state.incomment = 0;
e5d4f4dc 3170 state.comstyle = 0; /* reset the comment style */
195d1361 3171 if (boundary_stop) goto done;
8489eb67
RS
3172 break;
3173
3174 case Sopen:
3175 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3176 depth++;
3177 /* curlevel++->last ran into compiler bug on Apollo */
93da5fff 3178 curlevel->last = prev_from;
8489eb67 3179 if (++curlevel == endlevel)
02010917 3180 curlevel--; /* error ("Nesting too deep for parser"); */
8489eb67
RS
3181 curlevel->prev = -1;
3182 curlevel->last = -1;
30844415 3183 if (targetdepth == depth) goto done;
8489eb67
RS
3184 break;
3185
3186 case Sclose:
3187 depth--;
3188 if (depth < mindepth)
3189 mindepth = depth;
3190 if (curlevel != levelstart)
3191 curlevel--;
3192 curlevel->prev = curlevel->last;
30844415 3193 if (targetdepth == depth) goto done;
8489eb67
RS
3194 break;
3195
3196 case Sstring:
195d1361
RS
3197 case Sstring_fence:
3198 state.comstr_start = from - 1;
8489eb67 3199 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 3200 curlevel->last = prev_from;
7d0393cf 3201 state.instring = (code == Sstring
b7dbcc19 3202 ? (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte))
195d1361
RS
3203 : ST_STRING_STYLE);
3204 if (boundary_stop) goto done;
8489eb67 3205 startinstring:
195d1361 3206 {
644ea4df 3207 nofence = state.instring != ST_STRING_STYLE;
7d0393cf 3208
644ea4df
RS
3209 while (1)
3210 {
3211 int c;
195d1361 3212
644ea4df 3213 if (from >= end) goto done;
b7dbcc19 3214 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
7c7ca3d2
RS
3215 /* Some compilers can't handle this inside the switch. */
3216 temp = SYNTAX (c);
ccf89641
KH
3217
3218 /* Check TEMP here so that if the char has
3219 a syntax-table property which says it is NOT
3220 a string character, it does not end the string. */
3221 if (nofence && c == state.instring && temp == Sstring)
3222 break;
3223
7c7ca3d2 3224 switch (temp)
644ea4df
RS
3225 {
3226 case Sstring_fence:
3227 if (!nofence) goto string_end;
3228 break;
3229 case Scharquote:
3230 case Sescape:
3231 INC_FROM;
3232 startquotedinstring:
3233 if (from >= end) goto endquoted;
195d1361 3234 }
644ea4df
RS
3235 INC_FROM;
3236 }
195d1361
RS
3237 }
3238 string_end:
8489eb67
RS
3239 state.instring = -1;
3240 curlevel->prev = curlevel->last;
93da5fff 3241 INC_FROM;
195d1361 3242 if (boundary_stop) goto done;
8489eb67
RS
3243 break;
3244
3245 case Smath:
240c43e8
SM
3246 /* FIXME: We should do something with it. */
3247 break;
3248 default:
3249 /* Ignore whitespace, punctuation, quote, endcomment. */
8489eb67
RS
3250 break;
3251 }
3252 }
3253 goto done;
3254
3255 stop: /* Here if stopping before start of sexp. */
93da5fff 3256 from = prev_from; /* We have just fetched the char that starts it; */
8489eb67
RS
3257 goto done; /* but return the position before it. */
3258
3259 endquoted:
3260 state.quoted = 1;
3261 done:
3262 state.depth = depth;
3263 state.mindepth = mindepth;
3264 state.thislevelstart = curlevel->prev;
3265 state.prevlevelstart
3266 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
3267 state.location = from;
1a102a58 3268 state.levelstarts = Qnil;
eb4d412d
PE
3269 while (curlevel > levelstart)
3270 state.levelstarts = Fcons (make_number ((--curlevel)->last),
3271 state.levelstarts);
8489eb67
RS
3272 immediate_quit = 0;
3273
e5d4f4dc 3274 *stateptr = state;
8489eb67
RS
3275}
3276
c81a3712 3277DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
fdb82f93
PJ
3278 doc: /* Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
3279Parsing stops at TO or when certain criteria are met;
3280 point is set to where parsing stops.
3281If fifth arg OLDSTATE is omitted or nil,
3282 parsing assumes that FROM is the beginning of a function.
0e6228bc 3283Value is a list of elements describing final state of parsing:
fdb82f93
PJ
3284 0. depth in parens.
3285 1. character address of start of innermost containing list; nil if none.
3286 2. character address of start of last complete sexp terminated.
3287 3. non-nil if inside a string.
3288 (it is the character that will terminate the string,
3289 or t if the string should be terminated by a generic string delimiter.)
7d0393cf 3290 4. nil if outside a comment, t if inside a non-nestable comment,
fdb82f93
PJ
3291 else an integer (the current comment nesting).
3292 5. t if following a quote character.
3293 6. the minimum paren-depth encountered during this scan.
c5683ceb 3294 7. style of comment, if any.
fdb82f93
PJ
3295 8. character address of start of comment or string; nil if not in one.
3296 9. Intermediate data for continuation of parsing (subject to change).
3297If third arg TARGETDEPTH is non-nil, parsing stops if the depth
3298in parentheses becomes equal to TARGETDEPTH.
3299Fourth arg STOPBEFORE non-nil means stop when come to
3300 any character that starts a sexp.
0e6228bc 3301Fifth arg OLDSTATE is a list like what this function returns.
fdb82f93 3302 It is used to initialize the state of the parse. Elements number 1, 2, 6
43726c05 3303 and 8 are ignored.
fdb82f93
PJ
3304Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.
3305 If it is symbol `syntax-table', stop after the start of a comment or a
3306 string, or after end of a comment or a string. */)
5842a27b 3307 (Lisp_Object from, Lisp_Object to, Lisp_Object targetdepth, Lisp_Object stopbefore, Lisp_Object oldstate, Lisp_Object commentstop)
8489eb67
RS
3308{
3309 struct lisp_parse_state state;
3310 int target;
3311
265a9e55 3312 if (!NILP (targetdepth))
8489eb67 3313 {
b7826503 3314 CHECK_NUMBER (targetdepth);
8489eb67
RS
3315 target = XINT (targetdepth);
3316 }
3317 else
3318 target = -100000; /* We won't reach this depth */
3319
3320 validate_region (&from, &to);
6a140a74
RS
3321 scan_sexps_forward (&state, XINT (from), CHAR_TO_BYTE (XINT (from)),
3322 XINT (to),
c81a3712 3323 target, !NILP (stopbefore), oldstate,
7d0393cf 3324 (NILP (commentstop)
195d1361 3325 ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
8489eb67
RS
3326
3327 SET_PT (state.location);
7d0393cf 3328
8489eb67 3329 return Fcons (make_number (state.depth),
c5683ceb
SM
3330 Fcons (state.prevlevelstart < 0
3331 ? Qnil : make_number (state.prevlevelstart),
3332 Fcons (state.thislevelstart < 0
3333 ? Qnil : make_number (state.thislevelstart),
7d0393cf
JB
3334 Fcons (state.instring >= 0
3335 ? (state.instring == ST_STRING_STYLE
195d1361 3336 ? Qt : make_number (state.instring)) : Qnil,
95ff8dfc
RS
3337 Fcons (state.incomment < 0 ? Qt :
3338 (state.incomment == 0 ? Qnil :
3339 make_number (state.incomment)),
8489eb67 3340 Fcons (state.quoted ? Qt : Qnil,
bff37d2a 3341 Fcons (make_number (state.mindepth),
7d0393cf 3342 Fcons ((state.comstyle
bff37d2a 3343 ? (state.comstyle == ST_COMMENT_STYLE
c5683ceb
SM
3344 ? Qsyntax_table
3345 : make_number (state.comstyle))
3346 : Qnil),
0beaf54f
DL
3347 Fcons (((state.incomment
3348 || (state.instring >= 0))
bff37d2a
RS
3349 ? make_number (state.comstr_start)
3350 : Qnil),
1a102a58 3351 Fcons (state.levelstarts, Qnil))))))))));
8489eb67
RS
3352}
3353\f
dfcf069d 3354void
971de7fb 3355init_syntax_once (void)
8489eb67 3356{
78f9a1f7 3357 register int i, c;
8ea151b2 3358 Lisp_Object temp;
8489eb67 3359
5ebaddf5 3360 /* This has to be done here, before we call Fmake_char_table. */
d67b4f80 3361 Qsyntax_table = intern_c_string ("syntax-table");
5ebaddf5
RS
3362 staticpro (&Qsyntax_table);
3363
d67b4f80 3364 /* Intern_C_String this now in case it isn't already done.
5ebaddf5
RS
3365 Setting this variable twice is harmless.
3366 But don't staticpro it here--that is done in alloc.c. */
d67b4f80 3367 Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
5ebaddf5 3368
93da5fff 3369 /* Create objects which can be shared among syntax tables. */
42ebfa31 3370 Vsyntax_code_object = Fmake_vector (make_number (Smax), Qnil);
93da5fff
KH
3371 for (i = 0; i < XVECTOR (Vsyntax_code_object)->size; i++)
3372 XVECTOR (Vsyntax_code_object)->contents[i]
3373 = Fcons (make_number (i), Qnil);
3374
5ebaddf5
RS
3375 /* Now we are ready to set up this property, so we can
3376 create syntax tables. */
3377 Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
3378
93da5fff 3379 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
8489eb67 3380
5ebaddf5 3381 Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
8489eb67 3382
aa7b08b4
RS
3383 /* Control characters should not be whitespace. */
3384 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
3385 for (i = 0; i <= ' ' - 1; i++)
3386 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3387 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 0177, temp);
3388
3389 /* Except that a few really are whitespace. */
3390 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
3391 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ' ', temp);
3392 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\t', temp);
3393 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\n', temp);
3394 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 015, temp);
3395 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 014, temp);
3396
93da5fff 3397 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
8489eb67 3398 for (i = 'a'; i <= 'z'; i++)
8ea151b2 3399 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
8489eb67 3400 for (i = 'A'; i <= 'Z'; i++)
8ea151b2 3401 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
8489eb67 3402 for (i = '0'; i <= '9'; i++)
8ea151b2
RS
3403 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3404
3405 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
3406 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
3407
3408 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
3409 Fcons (make_number (Sopen), make_number (')')));
3410 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
3411 Fcons (make_number (Sclose), make_number ('(')));
3412 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
3413 Fcons (make_number (Sopen), make_number (']')));
3414 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
3415 Fcons (make_number (Sclose), make_number ('[')));
3416 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
3417 Fcons (make_number (Sopen), make_number ('}')));
3418 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
3419 Fcons (make_number (Sclose), make_number ('{')));
3420 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
3421 Fcons (make_number ((int) Sstring), Qnil));
3422 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
3423 Fcons (make_number ((int) Sescape), Qnil));
3424
93da5fff 3425 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Ssymbol];
8489eb67 3426 for (i = 0; i < 10; i++)
78f9a1f7
KH
3427 {
3428 c = "_-+*/&|<>="[i];
3429 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3430 }
8489eb67 3431
93da5fff 3432 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
8489eb67 3433 for (i = 0; i < 12; i++)
78f9a1f7
KH
3434 {
3435 c = ".,;:?!#@~^'`"[i];
3436 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3437 }
bd25db08
KH
3438
3439 /* All multibyte characters have syntax `word' by default. */
3440 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
5c7b02ab 3441 char_table_set_range (Vstandard_syntax_table, 0x80, MAX_CHAR, temp);
8489eb67
RS
3442}
3443
dfcf069d 3444void
971de7fb 3445syms_of_syntax (void)
8489eb67 3446{
d67b4f80 3447 Qsyntax_table_p = intern_c_string ("syntax-table-p");
8489eb67
RS
3448 staticpro (&Qsyntax_table_p);
3449
93da5fff
KH
3450 staticpro (&Vsyntax_code_object);
3451
a2994c46
KS
3452 staticpro (&gl_state.object);
3453 staticpro (&gl_state.global_code);
3454 staticpro (&gl_state.current_syntax_table);
3455 staticpro (&gl_state.old_prop);
3456
3457 /* Defined in regex.c */
3458 staticpro (&re_match_object);
3459
d67b4f80 3460 Qscan_error = intern_c_string ("scan-error");
7bf5e9e4
RS
3461 staticpro (&Qscan_error);
3462 Fput (Qscan_error, Qerror_conditions,
d67b4f80 3463 pure_cons (Qscan_error, pure_cons (Qerror, Qnil)));
7bf5e9e4 3464 Fput (Qscan_error, Qerror_message,
d67b4f80 3465 make_pure_c_string ("Scan error"));
7bf5e9e4 3466
29208e82 3467 DEFVAR_BOOL ("parse-sexp-ignore-comments", parse_sexp_ignore_comments,
fdb82f93 3468 doc: /* Non-nil means `forward-sexp', etc., should treat comments as whitespace. */);
8489eb67 3469
29208e82 3470 DEFVAR_BOOL ("parse-sexp-lookup-properties", parse_sexp_lookup_properties,
fdb82f93
PJ
3471 doc: /* Non-nil means `forward-sexp', etc., obey `syntax-table' property.
3472Otherwise, that text property is simply ignored.
3473See the info node `(elisp)Syntax Properties' for a description of the
3474`syntax-table' property. */);
195d1361 3475
8489eb67 3476 words_include_escapes = 0;
29208e82 3477 DEFVAR_BOOL ("words-include-escapes", words_include_escapes,
fdb82f93 3478 doc: /* Non-nil means `forward-word', etc., should treat escape chars part of words. */);
8489eb67 3479
29208e82 3480 DEFVAR_BOOL ("multibyte-syntax-as-symbol", multibyte_syntax_as_symbol,
fdb82f93 3481 doc: /* Non-nil means `scan-sexps' treats all multibyte characters as symbol. */);
bd25db08
KH
3482 multibyte_syntax_as_symbol = 0;
3483
f4ed767f 3484 DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
29208e82 3485 open_paren_in_column_0_is_defun_start,
b222e415 3486 doc: /* *Non-nil means an open paren in column 0 denotes the start of a defun. */);
f4ed767f
GM
3487 open_paren_in_column_0_is_defun_start = 1;
3488
8f924df7
KH
3489
3490 DEFVAR_LISP ("find-word-boundary-function-table",
29208e82 3491 Vfind_word_boundary_function_table,
869bb237 3492 doc: /*
8f924df7 3493Char table of functions to search for the word boundary.
869bb237
KH
3494Each function is called with two arguments; POS and LIMIT.
3495POS and LIMIT are character positions in the current buffer.
3496
3497If POS is less than LIMIT, POS is at the first character of a word,
3498and the return value of a function is a position after the last
3499character of that word.
3500
3501If POS is not less than LIMIT, POS is at the last character of a word,
3502and the return value of a function is a position at the first
3503character of that word.
3504
3505In both cases, LIMIT bounds the search. */);
8f924df7 3506 Vfind_word_boundary_function_table = Fmake_char_table (Qnil, Qnil);
869bb237 3507
8489eb67
RS
3508 defsubr (&Ssyntax_table_p);
3509 defsubr (&Ssyntax_table);
3510 defsubr (&Sstandard_syntax_table);
3511 defsubr (&Scopy_syntax_table);
3512 defsubr (&Sset_syntax_table);
3513 defsubr (&Schar_syntax);
beefa22e 3514 defsubr (&Smatching_paren);
c65adb44 3515 defsubr (&Sstring_to_syntax);
8489eb67 3516 defsubr (&Smodify_syntax_entry);
62abe9cb 3517 defsubr (&Sinternal_describe_syntax_value);
8489eb67
RS
3518
3519 defsubr (&Sforward_word);
3520
195d1361
RS
3521 defsubr (&Sskip_chars_forward);
3522 defsubr (&Sskip_chars_backward);
3523 defsubr (&Sskip_syntax_forward);
3524 defsubr (&Sskip_syntax_backward);
3525
b3cfe0c8 3526 defsubr (&Sforward_comment);
8489eb67
RS
3527 defsubr (&Sscan_lists);
3528 defsubr (&Sscan_sexps);
3529 defsubr (&Sbackward_prefix_chars);
3530 defsubr (&Sparse_partial_sexp);
3531}