(Fformat): Fix calculation of text property positions
[bpt/emacs.git] / src / syntax.c
CommitLineData
8489eb67 1/* GNU Emacs routines to deal with syntax tables; also word and list parsing.
0b5538bd 2 Copyright (C) 1985, 1987, 1993, 1994, 1995, 1997, 1998, 1999, 2002,
aaef169d 3 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
8489eb67
RS
4
5This file is part of GNU Emacs.
6
7GNU Emacs is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
a4275ad1 9the Free Software Foundation; either version 2, or (at your option)
8489eb67
RS
10any later version.
11
12GNU Emacs is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU Emacs; see the file COPYING. If not, write to
4fc5845f
LK
19the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20Boston, MA 02110-1301, USA. */
8489eb67
RS
21
22
18160b98 23#include <config.h>
8489eb67
RS
24#include <ctype.h>
25#include "lisp.h"
26#include "commands.h"
27#include "buffer.h"
93da5fff 28#include "charset.h"
e35f6ff7 29#include "keymap.h"
a1bc88d4 30#include "regex.h"
195d1361
RS
31
32/* Make syntax table lookup grant data in gl_state. */
33#define SYNTAX_ENTRY_VIA_PROPERTY
34
8489eb67 35#include "syntax.h"
195d1361
RS
36#include "intervals.h"
37
38/* We use these constants in place for comment-style and
39 string-ender-char to distinguish comments/strings started by
40 comment_fence and string_fence codes. */
41
42#define ST_COMMENT_STYLE (256 + 1)
43#define ST_STRING_STYLE (256 + 2)
93da5fff 44#include "category.h"
8489eb67 45
7bf5e9e4 46Lisp_Object Qsyntax_table_p, Qsyntax_table, Qscan_error;
8489eb67
RS
47
48int words_include_escapes;
195d1361 49int parse_sexp_lookup_properties;
8489eb67 50
bd25db08
KH
51/* Nonzero means `scan-sexps' treat all multibyte characters as symbol. */
52int multibyte_syntax_as_symbol;
53
8ea151b2
RS
54/* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
55 if not compiled with GCC. No need to mark it, since it is used
56 only very temporarily. */
57Lisp_Object syntax_temp;
58
f4ed767f
GM
59/* Non-zero means an open parenthesis in column 0 is always considered
60 to be the start of a defun. Zero means an open parenthesis in
61 column 0 has no special meaning. */
62
63int open_paren_in_column_0_is_defun_start;
64
e5d4f4dc
RS
65/* This is the internal form of the parse state used in parse-partial-sexp. */
66
67struct lisp_parse_state
68 {
195d1361
RS
69 int depth; /* Depth at end of parsing. */
70 int instring; /* -1 if not within string, else desired terminator. */
95ff8dfc 71 int incomment; /* -1 if in unnestable comment else comment nesting */
195d1361 72 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
e5d4f4dc
RS
73 int quoted; /* Nonzero if just after an escape char at end of parsing */
74 int thislevelstart; /* Char number of most recent start-of-expression at current level */
75 int prevlevelstart; /* Char number of start of containing expression */
195d1361 76 int location; /* Char number at which parsing stopped. */
e5d4f4dc 77 int mindepth; /* Minimum depth seen while scanning. */
195d1361 78 int comstr_start; /* Position just after last comment/string starter. */
1a102a58
RS
79 Lisp_Object levelstarts; /* Char numbers of starts-of-expression
80 of levels (starting from outermost). */
e5d4f4dc
RS
81 };
82\f
37bef230
RS
83/* These variables are a cache for finding the start of a defun.
84 find_start_pos is the place for which the defun start was found.
85 find_start_value is the defun start position found for it.
6a140a74 86 find_start_value_byte is the corresponding byte position.
37bef230
RS
87 find_start_buffer is the buffer it was found in.
88 find_start_begv is the BEGV value when it was found.
89 find_start_modiff is the value of MODIFF when it was found. */
90
91static int find_start_pos;
92static int find_start_value;
6a140a74 93static int find_start_value_byte;
37bef230
RS
94static struct buffer *find_start_buffer;
95static int find_start_begv;
96static int find_start_modiff;
6a140a74
RS
97
98
99static int find_defun_start P_ ((int, int));
932e6895
SM
100static int back_comment P_ ((EMACS_INT, EMACS_INT, EMACS_INT, int, int,
101 EMACS_INT *, EMACS_INT *));
6a140a74 102static int char_quoted P_ ((int, int));
a1bc88d4 103static Lisp_Object skip_chars P_ ((int, int, Lisp_Object, Lisp_Object, int));
932e6895 104static Lisp_Object scan_lists P_ ((EMACS_INT, EMACS_INT, EMACS_INT, int));
6a140a74
RS
105static void scan_sexps_forward P_ ((struct lisp_parse_state *,
106 int, int, int, int,
107 int, Lisp_Object, int));
a1bc88d4 108static int in_classes P_ ((int, Lisp_Object));
195d1361
RS
109\f
110
111struct gl_state_s gl_state; /* Global state of syntax parser. */
112
113INTERVAL interval_of ();
114#define INTERVALS_AT_ONCE 10 /* 1 + max-number of intervals
115 to scan to property-change. */
116
6a140a74
RS
117/* Update gl_state to an appropriate interval which contains CHARPOS. The
118 sign of COUNT give the relative position of CHARPOS wrt the previously
195d1361 119 valid interval. If INIT, only [be]_property fields of gl_state are
6a140a74 120 valid at start, the rest is filled basing on OBJECT.
195d1361 121
6a140a74 122 `gl_state.*_i' are the intervals, and CHARPOS is further in the search
195d1361
RS
123 direction than the intervals - or in an interval. We update the
124 current syntax-table basing on the property of this interval, and
6a140a74 125 update the interval to start further than CHARPOS - or be
195d1361 126 NULL_INTERVAL. We also update lim_property to be the next value of
6a140a74 127 charpos to call this subroutine again - or be before/after the
195d1361
RS
128 start/end of OBJECT. */
129
130void
6a140a74
RS
131update_syntax_table (charpos, count, init, object)
132 int charpos, count, init;
195d1361
RS
133 Lisp_Object object;
134{
135 Lisp_Object tmp_table;
4ffe723b 136 int cnt = 0, invalidate = 1;
5a774522 137 INTERVAL i;
195d1361
RS
138
139 if (init)
140 {
bb0de084 141 gl_state.old_prop = Qnil;
195d1361
RS
142 gl_state.start = gl_state.b_property;
143 gl_state.stop = gl_state.e_property;
bb0de084
SM
144 i = interval_of (charpos, object);
145 gl_state.backward_i = gl_state.forward_i = i;
195d1361
RS
146 invalidate = 0;
147 if (NULL_INTERVAL_P (i))
148 return;
f902a008 149 /* interval_of updates only ->position of the return value, so
d80f4cc7 150 update the parents manually to speed up update_interval. */
bb0de084 151 while (!NULL_PARENT (i))
d80f4cc7
RS
152 {
153 if (AM_RIGHT_CHILD (i))
439d5cb4 154 INTERVAL_PARENT (i)->position = i->position
d80f4cc7 155 - LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */
439d5cb4
KR
156 - TOTAL_LENGTH (INTERVAL_PARENT (i))
157 + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i));
d80f4cc7 158 else
439d5cb4 159 INTERVAL_PARENT (i)->position = i->position - LEFT_TOTAL_LENGTH (i)
d80f4cc7 160 + TOTAL_LENGTH (i);
439d5cb4 161 i = INTERVAL_PARENT (i);
d80f4cc7
RS
162 }
163 i = gl_state.forward_i;
bb0de084 164 gl_state.b_property = i->position - gl_state.offset;
ee0cdb48 165 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361
RS
166 goto update;
167 }
5a774522 168 i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
195d1361 169
423e705d 170 /* We are guaranteed to be called with CHARPOS either in i,
6a140a74 171 or further off. */
195d1361
RS
172 if (NULL_INTERVAL_P (i))
173 error ("Error in syntax_table logic for to-the-end intervals");
6a140a74 174 else if (charpos < i->position) /* Move left. */
195d1361
RS
175 {
176 if (count > 0)
f902a008 177 error ("Error in syntax_table logic for intervals <-");
195d1361 178 /* Update the interval. */
6a140a74 179 i = update_interval (i, charpos);
bb0de084 180 if (INTERVAL_LAST_POS (i) != gl_state.b_property)
195d1361
RS
181 {
182 invalidate = 0;
195d1361 183 gl_state.forward_i = i;
ee0cdb48 184 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361 185 }
423e705d 186 }
6a140a74 187 else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right. */
195d1361
RS
188 {
189 if (count < 0)
f902a008 190 error ("Error in syntax_table logic for intervals ->");
195d1361 191 /* Update the interval. */
6a140a74 192 i = update_interval (i, charpos);
bb0de084 193 if (i->position != gl_state.e_property)
195d1361
RS
194 {
195 invalidate = 0;
195d1361 196 gl_state.backward_i = i;
bb0de084 197 gl_state.b_property = i->position - gl_state.offset;
195d1361
RS
198 }
199 }
195d1361
RS
200
201 update:
202 tmp_table = textget (i->plist, Qsyntax_table);
203
204 if (invalidate)
205 invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
7d0393cf 206
423e705d
SM
207 if (invalidate) /* Did not get to adjacent interval. */
208 { /* with the same table => */
209 /* invalidate the old range. */
195d1361
RS
210 if (count > 0)
211 {
212 gl_state.backward_i = i;
bb0de084
SM
213 gl_state.b_property = i->position - gl_state.offset;
214 }
215 else
195d1361 216 {
bb0de084 217 gl_state.forward_i = i;
ee0cdb48 218 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361
RS
219 }
220 }
37bef230 221
bb0de084 222 if (!EQ (tmp_table, gl_state.old_prop))
195d1361 223 {
bb0de084
SM
224 gl_state.current_syntax_table = tmp_table;
225 gl_state.old_prop = tmp_table;
226 if (EQ (Fsyntax_table_p (tmp_table), Qt))
227 {
228 gl_state.use_global = 0;
7d0393cf 229 }
bb0de084
SM
230 else if (CONSP (tmp_table))
231 {
232 gl_state.use_global = 1;
233 gl_state.global_code = tmp_table;
234 }
7d0393cf 235 else
bb0de084
SM
236 {
237 gl_state.use_global = 0;
238 gl_state.current_syntax_table = current_buffer->syntax_table;
239 }
195d1361
RS
240 }
241
242 while (!NULL_INTERVAL_P (i))
243 {
244 if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
245 {
246 if (count > 0)
bb0de084
SM
247 {
248 gl_state.e_property = i->position - gl_state.offset;
249 gl_state.forward_i = i;
250 }
251 else
252 {
5a774522
SM
253 gl_state.b_property
254 = i->position + LENGTH (i) - gl_state.offset;
bb0de084
SM
255 gl_state.backward_i = i;
256 }
257 return;
195d1361 258 }
7d0393cf 259 else if (cnt == INTERVALS_AT_ONCE)
195d1361
RS
260 {
261 if (count > 0)
bb0de084 262 {
5a774522
SM
263 gl_state.e_property
264 = i->position + LENGTH (i) - gl_state.offset
265 /* e_property at EOB is not set to ZV but to ZV+1, so that
266 we can do INC(from);UPDATE_SYNTAX_TABLE_FORWARD without
267 having to check eob between the two. */
268 + (NULL_INTERVAL_P (next_interval (i)) ? 1 : 0);
bb0de084
SM
269 gl_state.forward_i = i;
270 }
271 else
272 {
273 gl_state.b_property = i->position - gl_state.offset;
274 gl_state.backward_i = i;
275 }
276 return;
195d1361
RS
277 }
278 cnt++;
279 i = count > 0 ? next_interval (i) : previous_interval (i);
280 }
bb0de084
SM
281 eassert (NULL_INTERVAL_P (i)); /* This property goes to the end. */
282 if (count > 0)
283 gl_state.e_property = gl_state.stop;
284 else
285 gl_state.b_property = gl_state.start;
195d1361
RS
286}
287\f
6a140a74
RS
288/* Returns TRUE if char at CHARPOS is quoted.
289 Global syntax-table data should be set up already to be good at CHARPOS
290 or after. On return global syntax data is good for lookup at CHARPOS. */
195d1361
RS
291
292static int
6a140a74
RS
293char_quoted (charpos, bytepos)
294 register int charpos, bytepos;
195d1361
RS
295{
296 register enum syntaxcode code;
297 register int beg = BEGV;
298 register int quoted = 0;
6a140a74
RS
299 int orig = charpos;
300
301 DEC_BOTH (charpos, bytepos);
195d1361 302
709a47ce 303 while (charpos >= beg)
195d1361 304 {
ab229fdd
AS
305 int c;
306
6a140a74 307 UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
ab229fdd
AS
308 c = FETCH_CHAR (bytepos);
309 code = SYNTAX (c);
6a140a74
RS
310 if (! (code == Scharquote || code == Sescape))
311 break;
312
313 DEC_BOTH (charpos, bytepos);
314 quoted = !quoted;
195d1361 315 }
6a140a74
RS
316
317 UPDATE_SYNTAX_TABLE (orig);
195d1361
RS
318 return quoted;
319}
6a140a74
RS
320
321/* Return the bytepos one character after BYTEPOS.
322 We assume that BYTEPOS is not at the end of the buffer. */
323
324INLINE int
325inc_bytepos (bytepos)
326 int bytepos;
327{
ef316cf0
RS
328 if (NILP (current_buffer->enable_multibyte_characters))
329 return bytepos + 1;
330
6a140a74
RS
331 INC_POS (bytepos);
332 return bytepos;
333}
334
335/* Return the bytepos one character before BYTEPOS.
336 We assume that BYTEPOS is not at the start of the buffer. */
337
338INLINE int
339dec_bytepos (bytepos)
340 int bytepos;
341{
ef316cf0
RS
342 if (NILP (current_buffer->enable_multibyte_characters))
343 return bytepos - 1;
344
6a140a74
RS
345 DEC_POS (bytepos);
346 return bytepos;
347}
195d1361 348\f
7cc80f0a
RS
349/* Return a defun-start position before before POS and not too far before.
350 It should be the last one before POS, or nearly the last.
351
352 When open_paren_in_column_0_is_defun_start is nonzero,
1fd1cc2f 353 only the beginning of the buffer is treated as a defun-start.
7cc80f0a
RS
354
355 We record the information about where the scan started
356 and what its result was, so that another call in the same area
357 can return the same value very quickly.
195d1361
RS
358
359 There is no promise at which position the global syntax data is
360 valid on return from the subroutine, so the caller should explicitly
361 update the global data. */
37bef230
RS
362
363static int
6a140a74
RS
364find_defun_start (pos, pos_byte)
365 int pos, pos_byte;
37bef230 366{
6a140a74 367 int opoint = PT, opoint_byte = PT_BYTE;
37bef230 368
1fd1cc2f
RS
369 if (!open_paren_in_column_0_is_defun_start)
370 {
371 find_start_value_byte = BEGV_BYTE;
372 return BEGV;
373 }
374
37bef230
RS
375 /* Use previous finding, if it's valid and applies to this inquiry. */
376 if (current_buffer == find_start_buffer
377 /* Reuse the defun-start even if POS is a little farther on.
378 POS might be in the next defun, but that's ok.
379 Our value may not be the best possible, but will still be usable. */
380 && pos <= find_start_pos + 1000
381 && pos >= find_start_value
382 && BEGV == find_start_begv
383 && MODIFF == find_start_modiff)
384 return find_start_value;
385
386 /* Back up to start of line. */
6a140a74 387 scan_newline (pos, pos_byte, BEGV, BEGV_BYTE, -1, 1);
37bef230 388
195d1361
RS
389 /* We optimize syntax-table lookup for rare updates. Thus we accept
390 only those `^\s(' which are good in global _and_ text-property
391 syntax-tables. */
392 gl_state.current_syntax_table = current_buffer->syntax_table;
393 gl_state.use_global = 0;
1fd1cc2f 394 while (PT > BEGV)
37bef230 395 {
ab229fdd
AS
396 int c;
397
1fd1cc2f
RS
398 /* Open-paren at start of line means we may have found our
399 defun-start. */
ab229fdd
AS
400 c = FETCH_CHAR (PT_BYTE);
401 if (SYNTAX (c) == Sopen)
195d1361 402 {
1fd1cc2f 403 SETUP_SYNTAX_TABLE (PT + 1, -1); /* Try again... */
ab229fdd
AS
404 c = FETCH_CHAR (PT_BYTE);
405 if (SYNTAX (c) == Sopen)
1fd1cc2f
RS
406 break;
407 /* Now fallback to the default value. */
408 gl_state.current_syntax_table = current_buffer->syntax_table;
409 gl_state.use_global = 0;
195d1361 410 }
1fd1cc2f
RS
411 /* Move to beg of previous line. */
412 scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
37bef230
RS
413 }
414
415 /* Record what we found, for the next try. */
6a140a74
RS
416 find_start_value = PT;
417 find_start_value_byte = PT_BYTE;
37bef230
RS
418 find_start_buffer = current_buffer;
419 find_start_modiff = MODIFF;
420 find_start_begv = BEGV;
421 find_start_pos = pos;
422
6a140a74
RS
423 TEMP_SET_PT_BOTH (opoint, opoint_byte);
424
37bef230
RS
425 return find_start_value;
426}
427\f
f902a008
RS
428/* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE. */
429
430static int
431prev_char_comend_first (pos, pos_byte)
432 int pos, pos_byte;
433{
434 int c, val;
435
436 DEC_BOTH (pos, pos_byte);
437 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
438 c = FETCH_CHAR (pos_byte);
439 val = SYNTAX_COMEND_FIRST (c);
440 UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
441 return val;
442}
443
444/* Return the SYNTAX_COMSTART_FIRST of the character before POS, POS_BYTE. */
445
3f679f55
SM
446/* static int
447 * prev_char_comstart_first (pos, pos_byte)
448 * int pos, pos_byte;
449 * {
450 * int c, val;
7d0393cf 451 *
3f679f55
SM
452 * DEC_BOTH (pos, pos_byte);
453 * UPDATE_SYNTAX_TABLE_BACKWARD (pos);
454 * c = FETCH_CHAR (pos_byte);
455 * val = SYNTAX_COMSTART_FIRST (c);
456 * UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
457 * return val;
458 * } */
f902a008 459
6a140a74
RS
460/* Checks whether charpos FROM is at the end of a comment.
461 FROM_BYTE is the bytepos corresponding to FROM.
462 Do not move back before STOP.
463
464 Return a positive value if we find a comment ending at FROM/FROM_BYTE;
465 return -1 otherwise.
466
467 If successful, store the charpos of the comment's beginning
468 into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
527a32d9
KH
469
470 Global syntax data remains valid for backward search starting at
471 the returned value (or at FROM, if the search was not successful). */
195d1361
RS
472
473static int
95ff8dfc 474back_comment (from, from_byte, stop, comnested, comstyle, charpos_ptr, bytepos_ptr)
932e6895 475 EMACS_INT from, from_byte, stop;
95ff8dfc 476 int comnested, comstyle;
932e6895 477 EMACS_INT *charpos_ptr, *bytepos_ptr;
195d1361
RS
478{
479 /* Look back, counting the parity of string-quotes,
480 and recording the comment-starters seen.
481 When we reach a safe place, assume that's not in a string;
482 then step the main scan to the earliest comment-starter seen
483 an even number of string quotes away from the safe place.
484
485 OFROM[I] is position of the earliest comment-starter seen
486 which is I+2X quotes from the comment-end.
487 PARITY is current parity of quotes from the comment end. */
3ee5041c 488 int string_style = -1; /* Presumed outside of any string. */
195d1361 489 int string_lossage = 0;
f7c436c1 490 /* Not a real lossage: indicates that we have passed a matching comment
7d0393cf 491 starter plus a non-matching comment-ender, meaning that any matching
f7c436c1
SM
492 comment-starter we might see later could be a false positive (hidden
493 inside another comment).
494 Test case: { a (* b } c (* d *) */
495 int comment_lossage = 0;
195d1361 496 int comment_end = from;
6a140a74 497 int comment_end_byte = from_byte;
195d1361 498 int comstart_pos = 0;
6a140a74 499 int comstart_byte;
eb35b628
RS
500 /* Place where the containing defun starts,
501 or 0 if we didn't come across it yet. */
502 int defun_start = 0;
503 int defun_start_byte = 0;
195d1361 504 register enum syntaxcode code;
95ff8dfc 505 int nesting = 1; /* current comment nesting */
ea315ed6 506 int c;
3f679f55
SM
507 int syntax = 0;
508
509 /* FIXME: A }} comment-ender style leads to incorrect behavior
510 in the case of {{ c }}} because we ignore the last two chars which are
511 assumed to be comment-enders although they aren't. */
195d1361
RS
512
513 /* At beginning of range to scan, we're outside of strings;
514 that determines quote parity to the comment-end. */
515 while (from != stop)
516 {
3f679f55
SM
517 int temp_byte, prev_syntax;
518 int com2start, com2end;
6a140a74 519
195d1361 520 /* Move back and examine a character. */
6a140a74 521 DEC_BOTH (from, from_byte);
195d1361
RS
522 UPDATE_SYNTAX_TABLE_BACKWARD (from);
523
3f679f55 524 prev_syntax = syntax;
6a140a74 525 c = FETCH_CHAR (from_byte);
3f679f55 526 syntax = SYNTAX_WITH_FLAGS (c);
195d1361
RS
527 code = SYNTAX (c);
528
3f679f55
SM
529 /* Check for 2-char comment markers. */
530 com2start = (SYNTAX_FLAGS_COMSTART_FIRST (syntax)
531 && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax)
532 && comstyle == SYNTAX_FLAGS_COMMENT_STYLE (prev_syntax)
533 && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax)
534 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested);
535 com2end = (SYNTAX_FLAGS_COMEND_FIRST (syntax)
536 && SYNTAX_FLAGS_COMEND_SECOND (prev_syntax));
537
538 /* Nasty cases with overlapping 2-char comment markers:
539 - snmp-mode: -- c -- foo -- c --
540 --- c --
541 ------ c --
542 - c-mode: *||*
543 |* *|* *|
544 |*| |* |*|
545 /// */
546
547 /* If a 2-char comment sequence partly overlaps with another,
548 we don't try to be clever. */
549 if (from > stop && (com2end || com2start))
195d1361 550 {
3f679f55
SM
551 int next = from, next_byte = from_byte, next_c, next_syntax;
552 DEC_BOTH (next, next_byte);
553 UPDATE_SYNTAX_TABLE_BACKWARD (next);
554 next_c = FETCH_CHAR (next_byte);
555 next_syntax = SYNTAX_WITH_FLAGS (next_c);
556 if (((com2start || comnested)
557 && SYNTAX_FLAGS_COMEND_SECOND (syntax)
558 && SYNTAX_FLAGS_COMEND_FIRST (next_syntax))
559 || ((com2end || comnested)
560 && SYNTAX_FLAGS_COMSTART_SECOND (syntax)
561 && comstyle == SYNTAX_FLAGS_COMMENT_STYLE (syntax)
562 && SYNTAX_FLAGS_COMSTART_FIRST (next_syntax)))
563 goto lossage;
564 /* UPDATE_SYNTAX_TABLE_FORWARD (next + 1); */
f902a008 565 }
3f679f55
SM
566
567 if (com2start && comstart_pos == 0)
568 /* We're looking at a comment starter. But it might be a comment
569 ender as well (see snmp-mode). The first time we see one, we
570 need to consider it as a comment starter,
571 and the subsequent times as a comment ender. */
572 com2end = 0;
573
574 /* Turn a 2-char comment sequences into the appropriate syntax. */
575 if (com2end)
576 code = Sendcomment;
577 else if (com2start)
578 code = Scomment;
579 /* Ignore comment starters of a different style. */
580 else if (code == Scomment
581 && (comstyle != SYNTAX_FLAGS_COMMENT_STYLE (syntax)
582 || SYNTAX_FLAGS_COMMENT_NESTED (syntax) != comnested))
1aa963c8 583 continue;
195d1361 584
9828a477
KH
585 /* Ignore escaped characters, except comment-enders. */
586 if (code != Sendcomment && char_quoted (from, from_byte))
195d1361
RS
587 continue;
588
3ee5041c 589 switch (code)
195d1361 590 {
3ee5041c
SM
591 case Sstring_fence:
592 case Scomment_fence:
593 c = (code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE);
594 case Sstring:
595 /* Track parity of quotes. */
596 if (string_style == -1)
597 /* Entering a string. */
598 string_style = c;
599 else if (string_style == c)
600 /* Leaving the string. */
601 string_style = -1;
602 else
603 /* If we have two kinds of string delimiters.
604 There's no way to grok this scanning backwards. */
195d1361 605 string_lossage = 1;
3ee5041c 606 break;
7d0393cf 607
3ee5041c
SM
608 case Scomment:
609 /* We've already checked that it is the relevant comstyle. */
f7c436c1 610 if (string_style != -1 || comment_lossage || string_lossage)
3ee5041c
SM
611 /* There are odd string quotes involved, so let's be careful.
612 Test case in Pascal: " { " a { " } */
613 goto lossage;
614
f7c436c1
SM
615 if (!comnested)
616 {
617 /* Record best comment-starter so far. */
618 comstart_pos = from;
619 comstart_byte = from_byte;
620 }
621 else if (--nesting <= 0)
95ff8dfc
RS
622 /* nested comments have to be balanced, so we don't need to
623 keep looking for earlier ones. We use here the same (slightly
624 incorrect) reasoning as below: since it is followed by uniform
625 paired string quotes, this comment-start has to be outside of
626 strings, else the comment-end itself would be inside a string. */
627 goto done;
3ee5041c
SM
628 break;
629
02010917 630 case Sendcomment:
3f679f55 631 if (SYNTAX_FLAGS_COMMENT_STYLE (syntax) == comstyle
d070eb22 632 && ((com2end && SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax))
3f679f55 633 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested)
02010917
SM
634 /* This is the same style of comment ender as ours. */
635 {
636 if (comnested)
637 nesting++;
638 else
639 /* Anything before that can't count because it would match
640 this comment-ender rather than ours. */
641 from = stop; /* Break out of the loop. */
642 }
f7c436c1
SM
643 else if (comstart_pos != 0 || c != '\n')
644 /* We're mixing comment styles here, so we'd better be careful.
645 The (comstart_pos != 0 || c != '\n') check is not quite correct
646 (we should just always set comment_lossage), but removing it
647 would imply that any multiline comment in C would go through
648 lossage, which seems overkill.
649 The failure should only happen in the rare cases such as
650 { (* } *) */
651 comment_lossage = 1;
02010917 652 break;
195d1361 653
02010917
SM
654 case Sopen:
655 /* Assume a defun-start point is outside of strings. */
656 if (open_paren_in_column_0_is_defun_start
657 && (from == stop
658 || (temp_byte = dec_bytepos (from_byte),
659 FETCH_CHAR (temp_byte) == '\n')))
660 {
661 defun_start = from;
662 defun_start_byte = from_byte;
663 from = stop; /* Break out of the loop. */
664 }
eb35b628 665 break;
02010917
SM
666
667 default:
f7c436c1 668 break;
eb35b628 669 }
195d1361
RS
670 }
671
672 if (comstart_pos == 0)
673 {
674 from = comment_end;
6a140a74 675 from_byte = comment_end_byte;
195d1361
RS
676 UPDATE_SYNTAX_TABLE_FORWARD (comment_end - 1);
677 }
f7c436c1
SM
678 /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
679 or `done'), then we've found the beginning of the non-nested comment. */
680 else if (1) /* !comnested */
195d1361
RS
681 {
682 from = comstart_pos;
6a140a74 683 from_byte = comstart_byte;
789f3320 684 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
195d1361
RS
685 }
686 else
687 {
3ee5041c
SM
688 struct lisp_parse_state state;
689 lossage:
195d1361
RS
690 /* We had two kinds of string delimiters mixed up
691 together. Decode this going forwards.
02010917 692 Scan fwd from a known safe place (beginning-of-defun)
195d1361
RS
693 to the one in question; this records where we
694 last passed a comment starter. */
eb35b628
RS
695 /* If we did not already find the defun start, find it now. */
696 if (defun_start == 0)
697 {
698 defun_start = find_defun_start (comment_end, comment_end_byte);
699 defun_start_byte = find_start_value_byte;
700 }
02010917 701 do
195d1361 702 {
02010917
SM
703 scan_sexps_forward (&state,
704 defun_start, defun_start_byte,
705 comment_end, -10000, 0, Qnil, 0);
706 defun_start = comment_end;
707 if (state.incomment == (comnested ? 1 : -1)
708 && state.comstyle == comstyle)
709 from = state.comstr_start;
710 else
711 {
712 from = comment_end;
713 if (state.incomment)
714 /* If comment_end is inside some other comment, maybe ours
715 is nested, so we need to try again from within the
716 surrounding comment. Example: { a (* " *) */
717 {
718 /* FIXME: We should advance by one or two chars. */
719 defun_start = state.comstr_start + 2;
720 defun_start_byte = CHAR_TO_BYTE (defun_start);
721 }
722 }
723 } while (defun_start < comment_end);
724
6a140a74 725 from_byte = CHAR_TO_BYTE (from);
195d1361
RS
726 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
727 }
7d0393cf 728
95ff8dfc 729 done:
6a140a74
RS
730 *charpos_ptr = from;
731 *bytepos_ptr = from_byte;
732
1aa963c8 733 return (from == comment_end) ? -1 : from;
195d1361
RS
734}
735\f
8489eb67 736DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
fdb82f93
PJ
737 doc: /* Return t if OBJECT is a syntax table.
738Currently, any char-table counts as a syntax table. */)
739 (object)
2203e1e8 740 Lisp_Object object;
8489eb67 741{
2203e1e8 742 if (CHAR_TABLE_P (object)
e704cb4b 743 && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
8489eb67
RS
744 return Qt;
745 return Qnil;
746}
747
8ea151b2 748static void
8489eb67
RS
749check_syntax_table (obj)
750 Lisp_Object obj;
751{
47f5f6ae
KS
752 CHECK_TYPE (CHAR_TABLE_P (obj) && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table),
753 Qsyntax_table_p, obj);
7d0393cf 754}
8489eb67 755
8489eb67 756DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
fdb82f93
PJ
757 doc: /* Return the current syntax table.
758This is the one specified by the current buffer. */)
759 ()
8489eb67
RS
760{
761 return current_buffer->syntax_table;
762}
763
764DEFUN ("standard-syntax-table", Fstandard_syntax_table,
765 Sstandard_syntax_table, 0, 0, 0,
fdb82f93
PJ
766 doc: /* Return the standard syntax table.
767This is the one used for new buffers. */)
768 ()
8489eb67
RS
769{
770 return Vstandard_syntax_table;
771}
772
773DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
fdb82f93
PJ
774 doc: /* Construct a new syntax table and return it.
775It is a copy of the TABLE, which defaults to the standard syntax table. */)
776 (table)
8489eb67
RS
777 Lisp_Object table;
778{
8ea151b2
RS
779 Lisp_Object copy;
780
265a9e55 781 if (!NILP (table))
8ea151b2
RS
782 check_syntax_table (table);
783 else
784 table = Vstandard_syntax_table;
785
786 copy = Fcopy_sequence (table);
0f867324
RS
787
788 /* Only the standard syntax table should have a default element.
789 Other syntax tables should inherit from parents instead. */
790 XCHAR_TABLE (copy)->defalt = Qnil;
791
792 /* Copied syntax tables should all have parents.
793 If we copied one with no parent, such as the standard syntax table,
794 use the standard syntax table as the copy's parent. */
795 if (NILP (XCHAR_TABLE (copy)->parent))
796 Fset_char_table_parent (copy, Vstandard_syntax_table);
8ea151b2 797 return copy;
8489eb67
RS
798}
799
800DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
fdb82f93
PJ
801 doc: /* Select a new syntax table for the current buffer.
802One argument, a syntax table. */)
803 (table)
8489eb67
RS
804 Lisp_Object table;
805{
4e4dfa78 806 int idx;
8ea151b2 807 check_syntax_table (table);
8489eb67
RS
808 current_buffer->syntax_table = table;
809 /* Indicate that this buffer now has a specified syntax table. */
f6cd0527
GM
810 idx = PER_BUFFER_VAR_IDX (syntax_table);
811 SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
8489eb67
RS
812 return table;
813}
814\f
815/* Convert a letter which signifies a syntax code
816 into the code it signifies.
195d1361 817 This is used by modify-syntax-entry, and other things. */
8489eb67
RS
818
819unsigned char syntax_spec_code[0400] =
820 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
821 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
822 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
823 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
195d1361 824 (char) Swhitespace, (char) Scomment_fence, (char) Sstring, 0377,
8489eb67
RS
825 (char) Smath, 0377, 0377, (char) Squote,
826 (char) Sopen, (char) Sclose, 0377, 0377,
827 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
828 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
829 0377, 0377, 0377, 0377,
830 (char) Scomment, 0377, (char) Sendcomment, 0377,
6cb71bf6 831 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
8489eb67
RS
832 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
833 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
834 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
835 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
836 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
837 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
195d1361 838 0377, 0377, 0377, 0377, (char) Sstring_fence, 0377, 0377, 0377
8489eb67
RS
839 };
840
195d1361 841/* Indexed by syntax code, give the letter that describes it. */
8489eb67 842
195d1361 843char syntax_code_spec[16] =
8489eb67 844 {
195d1361
RS
845 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
846 '!', '|'
8489eb67 847 };
93da5fff
KH
848
849/* Indexed by syntax code, give the object (cons of syntax code and
850 nil) to be stored in syntax table. Since these objects can be
851 shared among syntax tables, we generate them in advance. By
852 sharing objects, the function `describe-syntax' can give a more
853 compact listing. */
854static Lisp_Object Vsyntax_code_object;
855
8489eb67 856\f
8ea151b2
RS
857/* Look up the value for CHARACTER in syntax table TABLE's parent
858 and its parents. SYNTAX_ENTRY calls this, when TABLE itself has nil
859 for CHARACTER. It's actually used only when not compiled with GCC. */
860
861Lisp_Object
862syntax_parent_lookup (table, character)
863 Lisp_Object table;
864 int character;
865{
866 Lisp_Object value;
867
868 while (1)
869 {
870 table = XCHAR_TABLE (table)->parent;
871 if (NILP (table))
872 return Qnil;
873
874 value = XCHAR_TABLE (table)->contents[character];
875 if (!NILP (value))
876 return value;
877 }
878}
879
8489eb67 880DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
fdb82f93
PJ
881 doc: /* Return the syntax code of CHARACTER, described by a character.
882For example, if CHARACTER is a word constituent,
883the character `w' is returned.
884The characters that correspond to various syntax codes
885are listed in the documentation of `modify-syntax-entry'. */)
886 (character)
2203e1e8 887 Lisp_Object character;
8489eb67 888{
8ea151b2 889 int char_int;
195d1361
RS
890 gl_state.current_syntax_table = current_buffer->syntax_table;
891
892 gl_state.use_global = 0;
b7826503 893 CHECK_NUMBER (character);
2203e1e8 894 char_int = XINT (character);
8ea151b2 895 return make_number (syntax_code_spec[(int) SYNTAX (char_int)]);
beefa22e
RS
896}
897
898DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
fdb82f93
PJ
899 doc: /* Return the matching parenthesis of CHARACTER, or nil if none. */)
900 (character)
2203e1e8 901 Lisp_Object character;
beefa22e 902{
8ea151b2 903 int char_int, code;
195d1361
RS
904 gl_state.current_syntax_table = current_buffer->syntax_table;
905 gl_state.use_global = 0;
b7826503 906 CHECK_NUMBER (character);
2203e1e8 907 char_int = XINT (character);
8ea151b2 908 code = SYNTAX (char_int);
a8bd7cd8 909 if (code == Sopen || code == Sclose)
2e34157c 910 return SYNTAX_MATCH (char_int);
beefa22e 911 return Qnil;
8489eb67
RS
912}
913
c65adb44 914DEFUN ("string-to-syntax", Fstring_to_syntax, Sstring_to_syntax, 1, 1, 0,
fdb82f93
PJ
915 doc: /* Convert a syntax specification STRING into syntax cell form.
916STRING should be a string as it is allowed as argument of
917`modify-syntax-entry'. Value is the equivalent cons cell
918(CODE . MATCHING-CHAR) that can be used as value of a `syntax-table'
919text property. */)
920 (string)
c65adb44 921 Lisp_Object string;
8489eb67 922{
2e567bd3 923 register const unsigned char *p;
8489eb67 924 register enum syntaxcode code;
247e20a8 925 int val;
8ea151b2 926 Lisp_Object match;
8489eb67 927
b7826503 928 CHECK_STRING (string);
8489eb67 929
d5db4077 930 p = SDATA (string);
8489eb67
RS
931 code = (enum syntaxcode) syntax_spec_code[*p++];
932 if (((int) code & 0377) == 0377)
b764a653 933 error ("Invalid syntax description letter: %c", p[-1]);
8489eb67 934
8ea151b2 935 if (code == Sinherit)
c65adb44 936 return Qnil;
8ea151b2
RS
937
938 if (*p)
d1be9f0f 939 {
93da5fff 940 int len;
7c19e8e1 941 int character = (STRING_CHAR_AND_LENGTH
d5db4077 942 (p, SBYTES (string) - 1, len));
93da5fff 943 XSETINT (match, character);
d1be9f0f
RS
944 if (XFASTINT (match) == ' ')
945 match = Qnil;
93da5fff 946 p += len;
d1be9f0f
RS
947 }
948 else
8ea151b2 949 match = Qnil;
8489eb67 950
8ea151b2 951 val = (int) code;
8489eb67
RS
952 while (*p)
953 switch (*p++)
954 {
955 case '1':
247e20a8 956 val |= 1 << 16;
8489eb67
RS
957 break;
958
959 case '2':
247e20a8 960 val |= 1 << 17;
8489eb67
RS
961 break;
962
963 case '3':
247e20a8 964 val |= 1 << 18;
8489eb67
RS
965 break;
966
967 case '4':
247e20a8 968 val |= 1 << 19;
8489eb67
RS
969 break;
970
971 case 'p':
247e20a8 972 val |= 1 << 20;
8489eb67 973 break;
e5d4f4dc
RS
974
975 case 'b':
247e20a8 976 val |= 1 << 21;
e5d4f4dc 977 break;
95ff8dfc
RS
978
979 case 'n':
980 val |= 1 << 22;
981 break;
8489eb67 982 }
7d0393cf 983
93da5fff 984 if (val < XVECTOR (Vsyntax_code_object)->size && NILP (match))
c65adb44 985 return XVECTOR (Vsyntax_code_object)->contents[val];
93da5fff
KH
986 else
987 /* Since we can't use a shared object, let's make a new one. */
c65adb44
SM
988 return Fcons (make_number (val), match);
989}
990
fdb82f93 991/* I really don't know why this is interactive
2c9e1900 992 help-form should at least be made useful whilst reading the second arg. */
7d0393cf 993DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
c65adb44 994 "cSet syntax for character: \nsSet syntax for %s to: ",
a50a10a0 995 doc: /* Set syntax for character CHAR according to string NEWENTRY.
d7ee9fab 996The syntax is changed only for table SYNTAX-TABLE, which defaults to
fdb82f93
PJ
997 the current buffer's syntax table.
998The first character of NEWENTRY should be one of the following:
999 Space or - whitespace syntax. w word constituent.
1000 _ symbol constituent. . punctuation.
1001 ( open-parenthesis. ) close-parenthesis.
1002 " string quote. \\ escape.
1003 $ paired delimiter. ' expression quote or prefix operator.
1004 < comment starter. > comment ender.
1005 / character-quote. @ inherit from `standard-syntax-table'.
1006 | generic string fence. ! generic comment fence.
1007
1008Only single-character comment start and end sequences are represented thus.
1009Two-character sequences are represented as described below.
1010The second character of NEWENTRY is the matching parenthesis,
1011 used only if the first character is `(' or `)'.
1012Any additional characters are flags.
1013Defined flags are the characters 1, 2, 3, 4, b, p, and n.
a50a10a0
PJ
1014 1 means CHAR is the start of a two-char comment start sequence.
1015 2 means CHAR is the second character of such a sequence.
1016 3 means CHAR is the start of a two-char comment end sequence.
1017 4 means CHAR is the second character of such a sequence.
fdb82f93
PJ
1018
1019There can be up to two orthogonal comment sequences. This is to support
1020language modes such as C++. By default, all comment sequences are of style
1021a, but you can set the comment sequence style to b (on the second character
1022of a comment-start, or the first character of a comment-end sequence) using
1023this flag:
a50a10a0
PJ
1024 b means CHAR is part of comment sequence b.
1025 n means CHAR is part of a nestable comment sequence.
fdb82f93 1026
a50a10a0 1027 p means CHAR is a prefix character for `backward-prefix-chars';
fdb82f93 1028 such characters are treated as whitespace when they occur
a50a10a0
PJ
1029 between expressions.
1030usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE) */)
fdb82f93 1031 (c, newentry, syntax_table)
c65adb44
SM
1032 Lisp_Object c, newentry, syntax_table;
1033{
b7826503 1034 CHECK_NUMBER (c);
c65adb44
SM
1035
1036 if (NILP (syntax_table))
1037 syntax_table = current_buffer->syntax_table;
1038 else
1039 check_syntax_table (syntax_table);
8489eb67 1040
c65adb44 1041 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), Fstring_to_syntax (newentry));
8489eb67
RS
1042 return Qnil;
1043}
1044\f
1045/* Dump syntax table to buffer in human-readable format */
1046
62abe9cb
SM
1047DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value,
1048 Sinternal_describe_syntax_value, 1, 1, 0,
1049 doc: /* Insert a description of the internal syntax description SYNTAX at point. */)
1050 (syntax)
1051 Lisp_Object syntax;
8489eb67
RS
1052{
1053 register enum syntaxcode code;
e6365855 1054 char desc, start1, start2, end1, end2, prefix, comstyle, comnested;
8489eb67 1055 char str[2];
62abe9cb 1056 Lisp_Object first, match_lisp, value = syntax;
8489eb67 1057
8ea151b2
RS
1058 if (NILP (value))
1059 {
62abe9cb
SM
1060 insert_string ("default");
1061 return syntax;
8ea151b2
RS
1062 }
1063
908b7fea
KH
1064 if (CHAR_TABLE_P (value))
1065 {
62abe9cb
SM
1066 insert_string ("deeper char-table ...");
1067 return syntax;
908b7fea
KH
1068 }
1069
8ea151b2
RS
1070 if (!CONSP (value))
1071 {
62abe9cb
SM
1072 insert_string ("invalid");
1073 return syntax;
8ea151b2
RS
1074 }
1075
c1d497be
KR
1076 first = XCAR (value);
1077 match_lisp = XCDR (value);
8ea151b2
RS
1078
1079 if (!INTEGERP (first) || !(NILP (match_lisp) || INTEGERP (match_lisp)))
8489eb67 1080 {
62abe9cb
SM
1081 insert_string ("invalid");
1082 return syntax;
8489eb67
RS
1083 }
1084
e704cb4b 1085 code = (enum syntaxcode) (XINT (first) & 0377);
8ea151b2
RS
1086 start1 = (XINT (first) >> 16) & 1;
1087 start2 = (XINT (first) >> 17) & 1;
1088 end1 = (XINT (first) >> 18) & 1;
1089 end2 = (XINT (first) >> 19) & 1;
1090 prefix = (XINT (first) >> 20) & 1;
1091 comstyle = (XINT (first) >> 21) & 1;
e6365855 1092 comnested = (XINT (first) >> 22) & 1;
8489eb67
RS
1093
1094 if ((int) code < 0 || (int) code >= (int) Smax)
1095 {
1096 insert_string ("invalid");
62abe9cb 1097 return syntax;
8489eb67
RS
1098 }
1099 desc = syntax_code_spec[(int) code];
1100
1101 str[0] = desc, str[1] = 0;
1102 insert (str, 1);
1103
93da5fff
KH
1104 if (NILP (match_lisp))
1105 insert (" ", 1);
1106 else
1107 insert_char (XINT (match_lisp));
8489eb67 1108
8489eb67
RS
1109 if (start1)
1110 insert ("1", 1);
1111 if (start2)
1112 insert ("2", 1);
1113
1114 if (end1)
1115 insert ("3", 1);
1116 if (end2)
1117 insert ("4", 1);
1118
1119 if (prefix)
1120 insert ("p", 1);
e5d4f4dc
RS
1121 if (comstyle)
1122 insert ("b", 1);
e6365855
SM
1123 if (comnested)
1124 insert ("n", 1);
8489eb67
RS
1125
1126 insert_string ("\twhich means: ");
1127
0220c518 1128 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
1129 {
1130 case Swhitespace:
1131 insert_string ("whitespace"); break;
1132 case Spunct:
1133 insert_string ("punctuation"); break;
1134 case Sword:
1135 insert_string ("word"); break;
1136 case Ssymbol:
1137 insert_string ("symbol"); break;
1138 case Sopen:
1139 insert_string ("open"); break;
1140 case Sclose:
1141 insert_string ("close"); break;
1142 case Squote:
d671382e 1143 insert_string ("prefix"); break;
8489eb67
RS
1144 case Sstring:
1145 insert_string ("string"); break;
1146 case Smath:
1147 insert_string ("math"); break;
1148 case Sescape:
1149 insert_string ("escape"); break;
1150 case Scharquote:
1151 insert_string ("charquote"); break;
1152 case Scomment:
1153 insert_string ("comment"); break;
1154 case Sendcomment:
1155 insert_string ("endcomment"); break;
d671382e
SM
1156 case Sinherit:
1157 insert_string ("inherit"); break;
1158 case Scomment_fence:
1159 insert_string ("comment fence"); break;
1160 case Sstring_fence:
1161 insert_string ("string fence"); break;
8489eb67
RS
1162 default:
1163 insert_string ("invalid");
62abe9cb 1164 return syntax;
8489eb67
RS
1165 }
1166
8ea151b2 1167 if (!NILP (match_lisp))
8489eb67
RS
1168 {
1169 insert_string (", matches ");
8ea151b2 1170 insert_char (XINT (match_lisp));
8489eb67
RS
1171 }
1172
1173 if (start1)
1174 insert_string (",\n\t is the first character of a comment-start sequence");
1175 if (start2)
1176 insert_string (",\n\t is the second character of a comment-start sequence");
1177
1178 if (end1)
1179 insert_string (",\n\t is the first character of a comment-end sequence");
1180 if (end2)
1181 insert_string (",\n\t is the second character of a comment-end sequence");
e5d4f4dc
RS
1182 if (comstyle)
1183 insert_string (" (comment style b)");
e6365855
SM
1184 if (comnested)
1185 insert_string (" (nestable)");
e5d4f4dc 1186
8489eb67
RS
1187 if (prefix)
1188 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
1189
62abe9cb
SM
1190 return syntax;
1191}
8489eb67 1192\f
195d1361
RS
1193int parse_sexp_ignore_comments;
1194
8489eb67
RS
1195/* Return the position across COUNT words from FROM.
1196 If that many words cannot be found before the end of the buffer, return 0.
1197 COUNT negative means scan backward and stop at word beginning. */
1198
dfcf069d 1199int
8489eb67
RS
1200scan_words (from, count)
1201 register int from, count;
1202{
1203 register int beg = BEGV;
1204 register int end = ZV;
6a140a74 1205 register int from_byte = CHAR_TO_BYTE (from);
93da5fff
KH
1206 register enum syntaxcode code;
1207 int ch0, ch1;
8489eb67
RS
1208
1209 immediate_quit = 1;
1210 QUIT;
1211
195d1361
RS
1212 SETUP_SYNTAX_TABLE (from, count);
1213
8489eb67
RS
1214 while (count > 0)
1215 {
1216 while (1)
1217 {
1218 if (from == end)
1219 {
1220 immediate_quit = 0;
1221 return 0;
1222 }
195d1361 1223 UPDATE_SYNTAX_TABLE_FORWARD (from);
6a140a74 1224 ch0 = FETCH_CHAR (from_byte);
93da5fff 1225 code = SYNTAX (ch0);
6a140a74 1226 INC_BOTH (from, from_byte);
8489eb67
RS
1227 if (words_include_escapes
1228 && (code == Sescape || code == Scharquote))
1229 break;
1230 if (code == Sword)
1231 break;
8489eb67 1232 }
93da5fff
KH
1233 /* Now CH0 is a character which begins a word and FROM is the
1234 position of the next character. */
8489eb67
RS
1235 while (1)
1236 {
1237 if (from == end) break;
195d1361 1238 UPDATE_SYNTAX_TABLE_FORWARD (from);
6a140a74 1239 ch1 = FETCH_CHAR (from_byte);
93da5fff 1240 code = SYNTAX (ch1);
8489eb67
RS
1241 if (!(words_include_escapes
1242 && (code == Sescape || code == Scharquote)))
93da5fff 1243 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
8489eb67 1244 break;
6a140a74 1245 INC_BOTH (from, from_byte);
93da5fff 1246 ch0 = ch1;
8489eb67
RS
1247 }
1248 count--;
1249 }
1250 while (count < 0)
1251 {
1252 while (1)
1253 {
1254 if (from == beg)
1255 {
1256 immediate_quit = 0;
1257 return 0;
1258 }
6a140a74 1259 DEC_BOTH (from, from_byte);
195d1361 1260 UPDATE_SYNTAX_TABLE_BACKWARD (from);
6a140a74 1261 ch1 = FETCH_CHAR (from_byte);
93da5fff 1262 code = SYNTAX (ch1);
8489eb67
RS
1263 if (words_include_escapes
1264 && (code == Sescape || code == Scharquote))
1265 break;
1266 if (code == Sword)
1267 break;
8489eb67 1268 }
93da5fff
KH
1269 /* Now CH1 is a character which ends a word and FROM is the
1270 position of it. */
8489eb67
RS
1271 while (1)
1272 {
6a140a74
RS
1273 int temp_byte;
1274
1275 if (from == beg)
1276 break;
1277 temp_byte = dec_bytepos (from_byte);
195d1361 1278 UPDATE_SYNTAX_TABLE_BACKWARD (from);
6a140a74 1279 ch0 = FETCH_CHAR (temp_byte);
93da5fff 1280 code = SYNTAX (ch0);
8489eb67
RS
1281 if (!(words_include_escapes
1282 && (code == Sescape || code == Scharquote)))
93da5fff 1283 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
8489eb67 1284 break;
6a140a74 1285 DEC_BOTH (from, from_byte);
93da5fff 1286 ch1 = ch0;
8489eb67
RS
1287 }
1288 count++;
1289 }
1290
1291 immediate_quit = 0;
1292
1293 return from;
1294}
1295
29e16385 1296DEFUN ("forward-word", Fforward_word, Sforward_word, 0, 1, "p",
fdb82f93
PJ
1297 doc: /* Move point forward ARG words (backward if ARG is negative).
1298Normally returns t.
1299If an edge of the buffer or a field boundary is reached, point is left there
1300and the function returns nil. Field boundaries are not noticed if
1301`inhibit-field-text-motion' is non-nil. */)
29e16385
RS
1302 (arg)
1303 Lisp_Object arg;
8489eb67 1304{
c096ae4d 1305 Lisp_Object tmp;
2c6ea900 1306 int orig_val, val;
8489eb67 1307
29e16385
RS
1308 if (NILP (arg))
1309 XSETFASTINT (arg, 1);
1310 else
1311 CHECK_NUMBER (arg);
1312
1313 val = orig_val = scan_words (PT, XINT (arg));
2c6ea900 1314 if (! orig_val)
29e16385 1315 val = XINT (arg) > 0 ? ZV : BEGV;
5878ee6f 1316
b8855607 1317 /* Avoid jumping out of an input field. */
c096ae4d
SM
1318 tmp = Fconstrain_to_field (make_number (val), make_number (PT),
1319 Qt, Qnil, Qnil);
1320 val = XFASTINT (tmp);
7d0393cf 1321
8489eb67 1322 SET_PT (val);
e6d8341f 1323 return val == orig_val ? Qt : Qnil;
8489eb67
RS
1324}
1325\f
195d1361
RS
1326Lisp_Object skip_chars ();
1327
1328DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
fdb82f93
PJ
1329 doc: /* Move point forward, stopping before a char not in STRING, or at pos LIM.
1330STRING is like the inside of a `[...]' in a regular expression
1331except that `]' is never special and `\\' quotes `^', `-' or `\\'
1332 (but not as the end of a range; quoting is never needed there).
1333Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1334With arg "^a-zA-Z", skips nonletters stopping before first letter.
a1bc88d4
RS
1335Char classes, e.g. `[:alpha:]', are supported.
1336
1337Returns the distance traveled, either zero or positive. */)
fdb82f93 1338 (string, lim)
195d1361
RS
1339 Lisp_Object string, lim;
1340{
a1bc88d4 1341 return skip_chars (1, 0, string, lim, 1);
195d1361
RS
1342}
1343
1344DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
fdb82f93
PJ
1345 doc: /* Move point backward, stopping after a char not in STRING, or at pos LIM.
1346See `skip-chars-forward' for details.
1347Returns the distance traveled, either zero or negative. */)
1348 (string, lim)
195d1361
RS
1349 Lisp_Object string, lim;
1350{
a1bc88d4 1351 return skip_chars (0, 0, string, lim, 1);
195d1361
RS
1352}
1353
1354DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
fdb82f93
PJ
1355 doc: /* Move point forward across chars in specified syntax classes.
1356SYNTAX is a string of syntax code characters.
1357Stop before a char whose syntax is not in SYNTAX, or at position LIM.
1358If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1359This function returns the distance traveled, either zero or positive. */)
1360 (syntax, lim)
195d1361
RS
1361 Lisp_Object syntax, lim;
1362{
a1bc88d4 1363 return skip_chars (1, 1, syntax, lim, 0);
195d1361
RS
1364}
1365
1366DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
fdb82f93
PJ
1367 doc: /* Move point backward across chars in specified syntax classes.
1368SYNTAX is a string of syntax code characters.
1369Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.
1370If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1371This function returns the distance traveled, either zero or negative. */)
1372 (syntax, lim)
195d1361
RS
1373 Lisp_Object syntax, lim;
1374{
a1bc88d4 1375 return skip_chars (0, 1, syntax, lim, 0);
195d1361
RS
1376}
1377
6a140a74 1378static Lisp_Object
a1bc88d4 1379skip_chars (forwardp, syntaxp, string, lim, handle_iso_classes)
195d1361
RS
1380 int forwardp, syntaxp;
1381 Lisp_Object string, lim;
a1bc88d4 1382 int handle_iso_classes;
195d1361 1383{
195d1361
RS
1384 register unsigned int c;
1385 unsigned char fastmap[0400];
7e68b0ea 1386 /* If SYNTAXP is 0, STRING may contain multi-byte form of characters
9690d026
KH
1387 of which codes don't fit in FASTMAP. In that case, set the
1388 ranges of characters in CHAR_RANGES. */
5df4982e 1389 int *char_ranges;
7e68b0ea 1390 int n_char_ranges = 0;
195d1361 1391 int negate = 0;
4101e6fe 1392 register int i, i_byte;
7e68b0ea 1393 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1674d9a2
RS
1394 int string_multibyte;
1395 int size_byte;
2e567bd3 1396 const unsigned char *str;
82d497fc 1397 int len;
a1bc88d4 1398 Lisp_Object iso_classes;
195d1361 1399
b7826503 1400 CHECK_STRING (string);
d5db4077 1401 char_ranges = (int *) alloca (SCHARS (string) * (sizeof (int)) * 2);
1674d9a2 1402 string_multibyte = STRING_MULTIBYTE (string);
d5db4077
KR
1403 str = SDATA (string);
1404 size_byte = SBYTES (string);
a1bc88d4 1405 iso_classes = Qnil;
195d1361 1406
82d497fc
KH
1407 /* Adjust the multibyteness of the string to that of the buffer. */
1408 if (multibyte != string_multibyte)
1409 {
1410 int nbytes;
1411
1412 if (multibyte)
d5db4077
KR
1413 nbytes = count_size_as_multibyte (SDATA (string),
1414 SCHARS (string));
82d497fc 1415 else
d5db4077 1416 nbytes = SCHARS (string);
82d497fc
KH
1417 if (nbytes != size_byte)
1418 {
2e567bd3
KR
1419 unsigned char *tmp = (unsigned char *) alloca (nbytes);
1420 copy_text (SDATA (string), tmp, size_byte,
82d497fc
KH
1421 string_multibyte, multibyte);
1422 size_byte = nbytes;
2e567bd3 1423 str = tmp;
82d497fc
KH
1424 }
1425 }
1426
195d1361
RS
1427 if (NILP (lim))
1428 XSETINT (lim, forwardp ? ZV : BEGV);
1429 else
b7826503 1430 CHECK_NUMBER_COERCE_MARKER (lim);
195d1361
RS
1431
1432 /* In any case, don't allow scan outside bounds of buffer. */
195d1361
RS
1433 if (XINT (lim) > ZV)
1434 XSETFASTINT (lim, ZV);
1435 if (XINT (lim) < BEGV)
1436 XSETFASTINT (lim, BEGV);
1437
195d1361
RS
1438 bzero (fastmap, sizeof fastmap);
1439
82d497fc 1440 i_byte = 0;
4101e6fe 1441
13090112 1442 if (i_byte < size_byte
d5db4077 1443 && SREF (string, 0) == '^')
195d1361 1444 {
82d497fc 1445 negate = 1; i_byte++;
195d1361
RS
1446 }
1447
1448 /* Find the characters specified and set their elements of fastmap.
1449 If syntaxp, each character counts as itself.
1450 Otherwise, handle backslashes and ranges specially. */
1451
13090112 1452 while (i_byte < size_byte)
195d1361 1453 {
82d497fc
KH
1454 c = STRING_CHAR_AND_LENGTH (str + i_byte, size_byte - i_byte, len);
1455 i_byte += len;
4101e6fe 1456
195d1361 1457 if (syntaxp)
4101e6fe 1458 fastmap[syntax_spec_code[c & 0377]] = 1;
195d1361
RS
1459 else
1460 {
a1bc88d4
RS
1461 if (handle_iso_classes && c == '['
1462 && i_byte < size_byte
1463 && STRING_CHAR (str + i_byte, size_byte - i_byte) == ':')
1464 {
1465 const unsigned char *class_beg = str + i_byte + 1;
1466 const unsigned char *class_end = class_beg;
b3bda4fd 1467 const unsigned char *class_limit = str + size_byte - 2;
a1bc88d4
RS
1468 /* Leave room for the null. */
1469 unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1470 re_wctype_t cc;
1471
1472 if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1473 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1474
b3bda4fd
KS
1475 while (class_end < class_limit
1476 && *class_end >= 'a' && *class_end <= 'z')
a1bc88d4
RS
1477 class_end++;
1478
b3bda4fd
KS
1479 if (class_end == class_beg
1480 || *class_end != ':' || class_end[1] != ']')
1481 goto not_a_class_name;
a1bc88d4
RS
1482
1483 bcopy (class_beg, class_name, class_end - class_beg);
1484 class_name[class_end - class_beg] = 0;
1485
1486 cc = re_wctype (class_name);
1487 if (cc == 0)
1488 error ("Invalid ISO C character class");
1489
1490 iso_classes = Fcons (make_number (cc), iso_classes);
1491
1492 i_byte = class_end + 2 - str;
1493 continue;
1494 }
1495
b3bda4fd 1496 not_a_class_name:
195d1361
RS
1497 if (c == '\\')
1498 {
13090112 1499 if (i_byte == size_byte)
4101e6fe
RS
1500 break;
1501
57951c2e
AS
1502 c = STRING_CHAR_AND_LENGTH (str + i_byte,
1503 size_byte - i_byte, len);
82d497fc 1504 i_byte += len;
195d1361 1505 }
57951c2e
AS
1506 /* Treat `-' as range character only if another character
1507 follows. */
1508 if (i_byte + 1 < size_byte
82d497fc 1509 && str[i_byte] == '-')
195d1361 1510 {
9690d026 1511 unsigned int c2;
4101e6fe
RS
1512
1513 /* Skip over the dash. */
82d497fc 1514 i_byte++;
4101e6fe 1515
4101e6fe 1516 /* Get the end of the range. */
57951c2e
AS
1517 c2 = STRING_CHAR_AND_LENGTH (str + i_byte,
1518 size_byte - i_byte, len);
82d497fc 1519 i_byte += len;
7e68b0ea 1520
4101e6fe 1521 if (SINGLE_BYTE_CHAR_P (c))
06274af5
KH
1522 {
1523 if (! SINGLE_BYTE_CHAR_P (c2))
e39091b6 1524 {
7ae339a7
KH
1525 /* Handle a range starting with a character of
1526 less than 256, and ending with a character of
1527 not less than 256. Split that into two
1528 ranges, the low one ending at 0377, and the
1529 high one starting at the smallest character
1530 in the charset of C2 and ending at C2. */
e39091b6
KH
1531 int charset = CHAR_CHARSET (c2);
1532 int c1 = MAKE_CHAR (charset, 0, 0);
1533
e39091b6
KH
1534 char_ranges[n_char_ranges++] = c1;
1535 char_ranges[n_char_ranges++] = c2;
465cf84c 1536 c2 = 0377;
e39091b6 1537 }
06274af5
KH
1538 while (c <= c2)
1539 {
1540 fastmap[c] = 1;
1541 c++;
1542 }
1543 }
9690d026 1544 else if (c <= c2) /* Both C and C2 are multibyte char. */
195d1361 1545 {
9690d026
KH
1546 char_ranges[n_char_ranges++] = c;
1547 char_ranges[n_char_ranges++] = c2;
195d1361 1548 }
195d1361
RS
1549 }
1550 else
7e68b0ea 1551 {
e39091b6
KH
1552 if (SINGLE_BYTE_CHAR_P (c))
1553 fastmap[c] = 1;
1554 else
7e68b0ea 1555 {
4101e6fe
RS
1556 char_ranges[n_char_ranges++] = c;
1557 char_ranges[n_char_ranges++] = c;
7e68b0ea
RS
1558 }
1559 }
195d1361
RS
1560 }
1561 }
1562
9690d026 1563 /* If ^ was the first character, complement the fastmap. */
195d1361
RS
1564 if (negate)
1565 for (i = 0; i < sizeof fastmap; i++)
9690d026 1566 fastmap[i] ^= 1;
195d1361
RS
1567
1568 {
1569 int start_point = PT;
1570 int pos = PT;
6a140a74 1571 int pos_byte = PT_BYTE;
9af7511a
KH
1572 unsigned char *p = PT_ADDR, *endp, *stop;
1573
1574 if (forwardp)
1575 {
1576 endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
d7ee9fab 1577 stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
9af7511a
KH
1578 }
1579 else
1580 {
1581 endp = CHAR_POS_ADDR (XINT (lim));
d7ee9fab 1582 stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
9af7511a 1583 }
195d1361
RS
1584
1585 immediate_quit = 1;
1586 if (syntaxp)
1587 {
1588 SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
1589 if (forwardp)
1590 {
7e68b0ea 1591 if (multibyte)
9af7511a
KH
1592 while (1)
1593 {
1594 int nbytes;
1595
1596 if (p >= stop)
ee0cdb48 1597 {
9af7511a
KH
1598 if (p >= endp)
1599 break;
1600 p = GAP_END_ADDR;
1601 stop = endp;
ee0cdb48 1602 }
9af7511a
KH
1603 c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes);
1604 if (! fastmap[(int) SYNTAX (c)])
1605 break;
1606 p += nbytes, pos++, pos_byte += nbytes;
1607 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1608 }
7e68b0ea 1609 else
9af7511a
KH
1610 while (1)
1611 {
1612 if (p >= stop)
1613 {
1614 if (p >= endp)
1615 break;
1616 p = GAP_END_ADDR;
1617 stop = endp;
1618 }
1619 if (! fastmap[(int) SYNTAX (*p)])
1620 break;
1621 p++, pos++;
1622 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1623 }
195d1361
RS
1624 }
1625 else
1626 {
7e68b0ea 1627 if (multibyte)
9af7511a
KH
1628 while (1)
1629 {
1630 unsigned char *prev_p;
1631 int nbytes;
1632
1633 if (p <= stop)
1634 {
1635 if (p <= endp)
7e68b0ea 1636 break;
9af7511a
KH
1637 p = GPT_ADDR;
1638 stop = endp;
1639 }
1640 prev_p = p;
1641 while (--p >= stop && ! CHAR_HEAD_P (*p));
1642 PARSE_MULTIBYTE_SEQ (p, MAX_MULTIBYTE_LENGTH, nbytes);
1643 if (prev_p - p > nbytes)
1644 p = prev_p - 1, c = *p, nbytes = 1;
1645 else
1646 c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH);
1647 pos--, pos_byte -= nbytes;
1648 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
1649 if (! fastmap[(int) SYNTAX (c)])
1650 {
1651 pos++;
1652 pos_byte += nbytes;
1653 break;
1654 }
1655 }
7e68b0ea 1656 else
9af7511a
KH
1657 while (1)
1658 {
1659 if (p <= stop)
ee0cdb48 1660 {
9af7511a 1661 if (p <= endp)
4c8535eb 1662 break;
9af7511a
KH
1663 p = GPT_ADDR;
1664 stop = endp;
ee0cdb48 1665 }
9af7511a
KH
1666 if (! fastmap[(int) SYNTAX (p[-1])])
1667 break;
1668 p--, pos--;
1669 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
1670 }
195d1361
RS
1671 }
1672 }
1673 else
1674 {
1675 if (forwardp)
1676 {
7e68b0ea 1677 if (multibyte)
9af7511a 1678 while (1)
7e68b0ea 1679 {
9af7511a
KH
1680 int nbytes;
1681
1682 if (p >= stop)
1683 {
1684 if (p >= endp)
1685 break;
1686 p = GAP_END_ADDR;
1687 stop = endp;
1688 }
1689 c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes);
a1bc88d4
RS
1690
1691 if (! NILP (iso_classes) && in_classes (c, iso_classes))
1692 {
1693 if (negate)
1694 break;
1695 else
1696 goto fwd_ok;
1697 }
1698
9690d026 1699 if (SINGLE_BYTE_CHAR_P (c))
7e68b0ea 1700 {
9690d026
KH
1701 if (!fastmap[c])
1702 break;
1703 }
1704 else
1705 {
1706 /* If we are looking at a multibyte character,
1707 we must look up the character in the table
1708 CHAR_RANGES. If there's no data in the
1709 table, that character is not what we want to
1710 skip. */
1711
e39091b6
KH
1712 /* The following code do the right thing even if
1713 n_char_ranges is zero (i.e. no data in
1714 CHAR_RANGES). */
7e68b0ea 1715 for (i = 0; i < n_char_ranges; i += 2)
e39091b6 1716 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
7e68b0ea
RS
1717 break;
1718 if (!(negate ^ (i < n_char_ranges)))
1719 break;
7e68b0ea 1720 }
a1bc88d4 1721 fwd_ok:
9af7511a 1722 p += nbytes, pos++, pos_byte += nbytes;
7e68b0ea
RS
1723 }
1724 else
9af7511a
KH
1725 while (1)
1726 {
1727 if (p >= stop)
1728 {
1729 if (p >= endp)
1730 break;
1731 p = GAP_END_ADDR;
1732 stop = endp;
1733 }
a1bc88d4
RS
1734
1735 if (!NILP (iso_classes) && in_classes (*p, iso_classes))
1736 {
1737 if (negate)
1738 break;
1739 else
1ad6e4db 1740 goto fwd_unibyte_ok;
a1bc88d4
RS
1741 }
1742
4e837cd0 1743 if (!fastmap[*p])
9af7511a 1744 break;
1ad6e4db 1745 fwd_unibyte_ok:
9af7511a
KH
1746 p++, pos++;
1747 }
195d1361
RS
1748 }
1749 else
1750 {
7e68b0ea 1751 if (multibyte)
9af7511a 1752 while (1)
7e68b0ea 1753 {
9af7511a
KH
1754 unsigned char *prev_p;
1755 int nbytes;
e39091b6 1756
9af7511a
KH
1757 if (p <= stop)
1758 {
1759 if (p <= endp)
1760 break;
1761 p = GPT_ADDR;
1762 stop = endp;
1763 }
1764 prev_p = p;
1765 while (--p >= stop && ! CHAR_HEAD_P (*p));
1766 PARSE_MULTIBYTE_SEQ (p, MAX_MULTIBYTE_LENGTH, nbytes);
1767 if (prev_p - p > nbytes)
1768 p = prev_p - 1, c = *p, nbytes = 1;
1769 else
1770 c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH);
a1bc88d4
RS
1771
1772 if (! NILP (iso_classes) && in_classes (c, iso_classes))
1773 {
1774 if (negate)
1775 break;
1776 else
1777 goto back_ok;
1778 }
1779
9690d026
KH
1780 if (SINGLE_BYTE_CHAR_P (c))
1781 {
1782 if (!fastmap[c])
1783 break;
1784 }
1785 else
7e68b0ea 1786 {
9690d026 1787 /* See the comment in the previous similar code. */
e39091b6
KH
1788 for (i = 0; i < n_char_ranges; i += 2)
1789 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1790 break;
1791 if (!(negate ^ (i < n_char_ranges)))
1792 break;
7e68b0ea 1793 }
a1bc88d4 1794 back_ok:
9af7511a 1795 pos--, pos_byte -= nbytes;
7e68b0ea
RS
1796 }
1797 else
9af7511a
KH
1798 while (1)
1799 {
1800 if (p <= stop)
1801 {
1802 if (p <= endp)
1803 break;
1804 p = GPT_ADDR;
1805 stop = endp;
1806 }
a1bc88d4
RS
1807
1808 if (! NILP (iso_classes) && in_classes (p[-1], iso_classes))
1809 {
1810 if (negate)
1811 break;
1812 else
1ad6e4db 1813 goto back_unibyte_ok;
a1bc88d4
RS
1814 }
1815
4e837cd0 1816 if (!fastmap[p[-1]])
9af7511a 1817 break;
1ad6e4db 1818 back_unibyte_ok:
9af7511a
KH
1819 p--, pos--;
1820 }
195d1361
RS
1821 }
1822 }
7e68b0ea 1823
6a140a74
RS
1824#if 0 /* Not needed now that a position in mid-character
1825 cannot be specified in Lisp. */
7e68b0ea
RS
1826 if (multibyte
1827 /* INC_POS or DEC_POS might have moved POS over LIM. */
1828 && (forwardp ? (pos > XINT (lim)) : (pos < XINT (lim))))
1829 pos = XINT (lim);
6a140a74 1830#endif
7e68b0ea 1831
6a140a74
RS
1832 if (! multibyte)
1833 pos_byte = pos;
1834
1835 SET_PT_BOTH (pos, pos_byte);
195d1361
RS
1836 immediate_quit = 0;
1837
1838 return make_number (PT - start_point);
1839 }
1840}
a1bc88d4
RS
1841
1842/* Return 1 if character C belongs to one of the ISO classes
1843 in the list ISO_CLASSES. Each class is represented by an
1844 integer which is its type according to re_wctype. */
1845
1846static int
1847in_classes (c, iso_classes)
1848 int c;
1849 Lisp_Object iso_classes;
1850{
1851 int fits_class = 0;
1852
1853 while (! NILP (iso_classes))
1854 {
1855 Lisp_Object elt;
1856 elt = XCAR (iso_classes);
1857 iso_classes = XCDR (iso_classes);
1858
1859 if (re_iswctype (c, XFASTINT (elt)))
1860 fits_class = 1;
1861 }
1862
1863 return fits_class;
1864}
195d1361 1865\f
95ff8dfc
RS
1866/* Jump over a comment, assuming we are at the beginning of one.
1867 FROM is the current position.
1868 FROM_BYTE is the bytepos corresponding to FROM.
1869 Do not move past STOP (a charpos).
1870 The comment over which we have to jump is of style STYLE
1871 (either SYNTAX_COMMENT_STYLE(foo) or ST_COMMENT_STYLE).
1872 NESTING should be positive to indicate the nesting at the beginning
1873 for nested comments and should be zero or negative else.
1874 ST_COMMENT_STYLE cannot be nested.
1875 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
1876 (or 0 If the search cannot start in the middle of a two-character).
1877
1878 If successful, return 1 and store the charpos of the comment's end
1879 into *CHARPOS_PTR and the corresponding bytepos into *BYTEPOS_PTR.
1880 Else, return 0 and store the charpos STOP into *CHARPOS_PTR, the
1881 corresponding bytepos into *BYTEPOS_PTR and the current nesting
1882 (as defined for state.incomment) in *INCOMMENT_PTR.
1883
1884 The comment end is the last character of the comment rather than the
1885 character just after the comment.
1886
1887 Global syntax data is assumed to initially be valid for FROM and
1888 remains valid for forward search starting at the returned position. */
1889
1890static int
1891forw_comment (from, from_byte, stop, nesting, style, prev_syntax,
1892 charpos_ptr, bytepos_ptr, incomment_ptr)
a0aa7fcf 1893 EMACS_INT from, from_byte, stop;
95ff8dfc 1894 int nesting, style, prev_syntax;
a0aa7fcf
SM
1895 EMACS_INT *charpos_ptr, *bytepos_ptr;
1896 int *incomment_ptr;
95ff8dfc
RS
1897{
1898 register int c, c1;
1899 register enum syntaxcode code;
1900 register int syntax;
1901
1902 if (nesting <= 0) nesting = -1;
1903
1904 /* Enter the loop in the middle so that we find
1905 a 2-char comment ender if we start in the middle of it. */
1906 syntax = prev_syntax;
1907 if (syntax != 0) goto forw_incomment;
1908
1909 while (1)
1910 {
1911 if (from == stop)
1912 {
1913 *incomment_ptr = nesting;
1914 *charpos_ptr = from;
1915 *bytepos_ptr = from_byte;
1916 return 0;
1917 }
1918 c = FETCH_CHAR (from_byte);
1919 syntax = SYNTAX_WITH_FLAGS (c);
1920 code = syntax & 0xff;
1921 if (code == Sendcomment
1922 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
abf8a9ff
SM
1923 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ?
1924 (nesting > 0 && --nesting == 0) : nesting < 0))
95ff8dfc
RS
1925 /* we have encountered a comment end of the same style
1926 as the comment sequence which began this comment
1927 section */
1928 break;
1929 if (code == Scomment_fence
1930 && style == ST_COMMENT_STYLE)
1931 /* we have encountered a comment end of the same style
1932 as the comment sequence which began this comment
1933 section. */
1934 break;
1935 if (nesting > 0
1936 && code == Scomment
abf8a9ff 1937 && SYNTAX_FLAGS_COMMENT_NESTED (syntax)
95ff8dfc
RS
1938 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style)
1939 /* we have encountered a nested comment of the same style
1940 as the comment sequence which began this comment section */
1941 nesting++;
1942 INC_BOTH (from, from_byte);
1943 UPDATE_SYNTAX_TABLE_FORWARD (from);
7d0393cf 1944
95ff8dfc
RS
1945 forw_incomment:
1946 if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
1947 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
1948 && (c1 = FETCH_CHAR (from_byte),
abf8a9ff
SM
1949 SYNTAX_COMEND_SECOND (c1))
1950 && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
1951 SYNTAX_COMMENT_NESTED (c1)) ? nesting > 0 : nesting < 0))
4ffe723b
GM
1952 {
1953 if (--nesting <= 0)
1954 /* we have encountered a comment end of the same style
1955 as the comment sequence which began this comment
1956 section */
1957 break;
1958 else
1959 {
1960 INC_BOTH (from, from_byte);
1961 UPDATE_SYNTAX_TABLE_FORWARD (from);
1962 }
1963 }
95ff8dfc
RS
1964 if (nesting > 0
1965 && from < stop
1966 && SYNTAX_FLAGS_COMSTART_FIRST (syntax)
1967 && (c1 = FETCH_CHAR (from_byte),
1968 SYNTAX_COMMENT_STYLE (c1) == style
abf8a9ff
SM
1969 && SYNTAX_COMSTART_SECOND (c1))
1970 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
1971 SYNTAX_COMMENT_NESTED (c1)))
95ff8dfc
RS
1972 /* we have encountered a nested comment of the same style
1973 as the comment sequence which began this comment
1974 section */
1975 {
1976 INC_BOTH (from, from_byte);
1977 UPDATE_SYNTAX_TABLE_FORWARD (from);
1978 nesting++;
1979 }
1980 }
1981 *charpos_ptr = from;
1982 *bytepos_ptr = from_byte;
1983 return 1;
1984}
1985
b3cfe0c8 1986DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
177c0ea7 1987 doc: /*
ee648c11 1988Move forward across up to COUNT comments. If COUNT is negative, move backward.
fdb82f93
PJ
1989Stop scanning if we find something other than a comment or whitespace.
1990Set point to where scanning stops.
ee648c11 1991If COUNT comments are found as expected, with nothing except whitespace
fdb82f93
PJ
1992between them, return t; otherwise return nil. */)
1993 (count)
840f481c 1994 Lisp_Object count;
b3cfe0c8 1995{
2312c580
SM
1996 register EMACS_INT from;
1997 EMACS_INT from_byte;
1998 register EMACS_INT stop;
8ea151b2 1999 register int c, c1;
b3cfe0c8
RS
2000 register enum syntaxcode code;
2001 int comstyle = 0; /* style of comment encountered */
95ff8dfc 2002 int comnested = 0; /* whether the comment is nestable or not */
be720845 2003 int found;
2312c580
SM
2004 EMACS_INT count1;
2005 EMACS_INT out_charpos, out_bytepos;
95ff8dfc 2006 int dummy;
840f481c 2007
b7826503 2008 CHECK_NUMBER (count);
840f481c 2009 count1 = XINT (count);
195d1361 2010 stop = count1 > 0 ? ZV : BEGV;
b3cfe0c8
RS
2011
2012 immediate_quit = 1;
2013 QUIT;
2014
2015 from = PT;
6a140a74 2016 from_byte = PT_BYTE;
b3cfe0c8 2017
195d1361 2018 SETUP_SYNTAX_TABLE (from, count1);
840f481c 2019 while (count1 > 0)
b3cfe0c8 2020 {
04882296 2021 do
b3cfe0c8 2022 {
f902a008
RS
2023 int comstart_first;
2024
04882296
KH
2025 if (from == stop)
2026 {
ef316cf0 2027 SET_PT_BOTH (from, from_byte);
b7e6e612 2028 immediate_quit = 0;
04882296
KH
2029 return Qnil;
2030 }
6a140a74 2031 c = FETCH_CHAR (from_byte);
b3cfe0c8 2032 code = SYNTAX (c);
f902a008 2033 comstart_first = SYNTAX_COMSTART_FIRST (c);
95ff8dfc 2034 comnested = SYNTAX_COMMENT_NESTED (c);
e6365855 2035 comstyle = SYNTAX_COMMENT_STYLE (c);
6a140a74 2036 INC_BOTH (from, from_byte);
f902a008 2037 UPDATE_SYNTAX_TABLE_FORWARD (from);
f902a008 2038 if (from < stop && comstart_first
6a140a74 2039 && (c1 = FETCH_CHAR (from_byte),
8ea151b2 2040 SYNTAX_COMSTART_SECOND (c1)))
b3cfe0c8 2041 {
7d0393cf 2042 /* We have encountered a comment start sequence and we
7fc8191e 2043 are ignoring all text inside comments. We must record
b3cfe0c8
RS
2044 the comment style this sequence begins so that later,
2045 only a comment end of the same style actually ends
7fc8191e 2046 the comment section. */
b3cfe0c8 2047 code = Scomment;
8ea151b2 2048 comstyle = SYNTAX_COMMENT_STYLE (c1);
95ff8dfc 2049 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
6a140a74 2050 INC_BOTH (from, from_byte);
95ff8dfc 2051 UPDATE_SYNTAX_TABLE_FORWARD (from);
b3cfe0c8 2052 }
04882296 2053 }
abf8a9ff 2054 while (code == Swhitespace || (code == Sendcomment && c == '\n'));
6a140a74 2055
da71ab91
KH
2056 if (code == Scomment_fence)
2057 comstyle = ST_COMMENT_STYLE;
2058 else if (code != Scomment)
04882296
KH
2059 {
2060 immediate_quit = 0;
6a140a74 2061 DEC_BOTH (from, from_byte);
ef316cf0 2062 SET_PT_BOTH (from, from_byte);
04882296
KH
2063 return Qnil;
2064 }
2065 /* We're at the start of a comment. */
95ff8dfc
RS
2066 found = forw_comment (from, from_byte, stop, comnested, comstyle, 0,
2067 &out_charpos, &out_bytepos, &dummy);
2068 from = out_charpos; from_byte = out_bytepos;
2069 if (!found)
04882296 2070 {
95ff8dfc
RS
2071 immediate_quit = 0;
2072 SET_PT_BOTH (from, from_byte);
2073 return Qnil;
b3cfe0c8 2074 }
95ff8dfc
RS
2075 INC_BOTH (from, from_byte);
2076 UPDATE_SYNTAX_TABLE_FORWARD (from);
04882296 2077 /* We have skipped one comment. */
840f481c 2078 count1--;
b3cfe0c8
RS
2079 }
2080
840f481c 2081 while (count1 < 0)
b3cfe0c8 2082 {
b9145dbb 2083 while (1)
b3cfe0c8 2084 {
c65adb44 2085 int quoted;
f902a008 2086
b9145dbb
RS
2087 if (from <= stop)
2088 {
6a140a74 2089 SET_PT_BOTH (BEGV, BEGV_BYTE);
b9145dbb
RS
2090 immediate_quit = 0;
2091 return Qnil;
2092 }
b3cfe0c8 2093
6a140a74 2094 DEC_BOTH (from, from_byte);
f902a008 2095 /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */
6a140a74 2096 quoted = char_quoted (from, from_byte);
6a140a74 2097 c = FETCH_CHAR (from_byte);
b3cfe0c8
RS
2098 code = SYNTAX (c);
2099 comstyle = 0;
95ff8dfc 2100 comnested = SYNTAX_COMMENT_NESTED (c);
7fc8191e
RS
2101 if (code == Sendcomment)
2102 comstyle = SYNTAX_COMMENT_STYLE (c);
b3cfe0c8 2103 if (from > stop && SYNTAX_COMEND_SECOND (c)
f902a008 2104 && prev_char_comend_first (from, from_byte)
89c6809a 2105 && !char_quoted (from - 1, dec_bytepos (from_byte)))
b3cfe0c8 2106 {
7fc8191e 2107 /* We must record the comment style encountered so that
b3cfe0c8 2108 later, we can match only the proper comment begin
7fc8191e 2109 sequence of the same style. */
6a140a74 2110 DEC_BOTH (from, from_byte);
f902a008
RS
2111 code = Sendcomment;
2112 /* Calling char_quoted, above, set up global syntax position
2113 at the new value of FROM. */
95ff8dfc
RS
2114 c1 = FETCH_CHAR (from_byte);
2115 comstyle = SYNTAX_COMMENT_STYLE (c1);
2116 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
b3cfe0c8
RS
2117 }
2118
195d1361
RS
2119 if (code == Scomment_fence)
2120 {
2121 /* Skip until first preceding unquoted comment_fence. */
6a140a74 2122 int found = 0, ini = from, ini_byte = from_byte;
7d0393cf 2123
6a140a74 2124 while (1)
195d1361 2125 {
6a140a74 2126 DEC_BOTH (from, from_byte);
195d1361 2127 UPDATE_SYNTAX_TABLE_BACKWARD (from);
6a140a74
RS
2128 c = FETCH_CHAR (from_byte);
2129 if (SYNTAX (c) == Scomment_fence
7d0393cf 2130 && !char_quoted (from, from_byte))
195d1361 2131 {
7d0393cf 2132 found = 1;
195d1361
RS
2133 break;
2134 }
fcdd4585
SM
2135 else if (from == stop)
2136 break;
195d1361
RS
2137 }
2138 if (found == 0)
2139 {
2140 from = ini; /* Set point to ini + 1. */
6a140a74 2141 from_byte = ini_byte;
195d1361
RS
2142 goto leave;
2143 }
39c41ad4
SM
2144 else
2145 /* We have skipped one comment. */
2146 break;
195d1361
RS
2147 }
2148 else if (code == Sendcomment)
b3cfe0c8 2149 {
95ff8dfc 2150 found = back_comment (from, from_byte, stop, comnested, comstyle,
6a140a74 2151 &out_charpos, &out_bytepos);
1aa963c8
SM
2152 if (found == -1)
2153 {
abf8a9ff
SM
2154 if (c == '\n')
2155 /* This end-of-line is not an end-of-comment.
2156 Treat it like a whitespace.
2157 CC-mode (and maybe others) relies on this behavior. */
2158 ;
2159 else
2160 {
2161 /* Failure: we should go back to the end of this
2162 not-quite-endcomment. */
2163 if (SYNTAX(c) != code)
2164 /* It was a two-char Sendcomment. */
2165 INC_BOTH (from, from_byte);
2166 goto leave;
2167 }
1aa963c8 2168 }
2b927d02 2169 else
abf8a9ff
SM
2170 {
2171 /* We have skipped one comment. */
2172 from = out_charpos, from_byte = out_bytepos;
2173 break;
2174 }
b3cfe0c8 2175 }
bb0de084 2176 else if (code != Swhitespace || quoted)
b3cfe0c8 2177 {
195d1361 2178 leave:
b3cfe0c8 2179 immediate_quit = 0;
6a140a74 2180 INC_BOTH (from, from_byte);
ef316cf0 2181 SET_PT_BOTH (from, from_byte);
b3cfe0c8
RS
2182 return Qnil;
2183 }
2184 }
2185
840f481c 2186 count1++;
b3cfe0c8
RS
2187 }
2188
ef316cf0 2189 SET_PT_BOTH (from, from_byte);
b3cfe0c8
RS
2190 immediate_quit = 0;
2191 return Qt;
2192}
2193\f
bd25db08 2194/* Return syntax code of character C if C is a single byte character
db9f48c3 2195 or `multibyte_symbol_p' is zero. Otherwise, return Ssymbol. */
bd25db08
KH
2196
2197#define SYNTAX_WITH_MULTIBYTE_CHECK(c) \
2198 ((SINGLE_BYTE_CHAR_P (c) || !multibyte_symbol_p) \
2199 ? SYNTAX (c) : Ssymbol)
2200
6a140a74 2201static Lisp_Object
8489eb67 2202scan_lists (from, count, depth, sexpflag)
932e6895
SM
2203 register EMACS_INT from;
2204 EMACS_INT count, depth;
2205 int sexpflag;
8489eb67
RS
2206{
2207 Lisp_Object val;
932e6895 2208 register EMACS_INT stop = count > 0 ? ZV : BEGV;
93da5fff
KH
2209 register int c, c1;
2210 int stringterm;
8489eb67
RS
2211 int quoted;
2212 int mathexit = 0;
93da5fff 2213 register enum syntaxcode code, temp_code;
195d1361 2214 int min_depth = depth; /* Err out if depth gets less than this. */
e5d4f4dc 2215 int comstyle = 0; /* style of comment encountered */
95ff8dfc 2216 int comnested = 0; /* whether the comment is nestable or not */
932e6895
SM
2217 EMACS_INT temp_pos;
2218 EMACS_INT last_good = from;
195d1361 2219 int found;
932e6895
SM
2220 EMACS_INT from_byte;
2221 EMACS_INT out_bytepos, out_charpos;
95ff8dfc 2222 int temp, dummy;
bd25db08 2223 int multibyte_symbol_p = sexpflag && multibyte_syntax_as_symbol;
8489eb67
RS
2224
2225 if (depth > 0) min_depth = 0;
2226
c3907a7e
RS
2227 if (from > ZV) from = ZV;
2228 if (from < BEGV) from = BEGV;
2229
2230 from_byte = CHAR_TO_BYTE (from);
2231
8489eb67
RS
2232 immediate_quit = 1;
2233 QUIT;
2234
195d1361 2235 SETUP_SYNTAX_TABLE (from, count);
8489eb67
RS
2236 while (count > 0)
2237 {
8489eb67
RS
2238 while (from < stop)
2239 {
f902a008 2240 int comstart_first, prefix;
195d1361 2241 UPDATE_SYNTAX_TABLE_FORWARD (from);
6a140a74 2242 c = FETCH_CHAR (from_byte);
bd25db08 2243 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
f902a008 2244 comstart_first = SYNTAX_COMSTART_FIRST (c);
95ff8dfc 2245 comnested = SYNTAX_COMMENT_NESTED (c);
e6365855 2246 comstyle = SYNTAX_COMMENT_STYLE (c);
f902a008 2247 prefix = SYNTAX_PREFIX (c);
7bf5e9e4
RS
2248 if (depth == min_depth)
2249 last_good = from;
6a140a74 2250 INC_BOTH (from, from_byte);
195d1361 2251 UPDATE_SYNTAX_TABLE_FORWARD (from);
f902a008 2252 if (from < stop && comstart_first
ab229fdd 2253 && (c = FETCH_CHAR (from_byte), SYNTAX_COMSTART_SECOND (c))
8489eb67 2254 && parse_sexp_ignore_comments)
e5d4f4dc 2255 {
7d0393cf 2256 /* we have encountered a comment start sequence and we
195d1361 2257 are ignoring all text inside comments. We must record
e5d4f4dc
RS
2258 the comment style this sequence begins so that later,
2259 only a comment end of the same style actually ends
2260 the comment section */
2261 code = Scomment;
95ff8dfc
RS
2262 c1 = FETCH_CHAR (from_byte);
2263 comstyle = SYNTAX_COMMENT_STYLE (c1);
2264 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
6a140a74 2265 INC_BOTH (from, from_byte);
f902a008 2266 UPDATE_SYNTAX_TABLE_FORWARD (from);
e5d4f4dc 2267 }
7d0393cf 2268
f902a008 2269 if (prefix)
8489eb67
RS
2270 continue;
2271
0220c518 2272 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
2273 {
2274 case Sescape:
2275 case Scharquote:
2276 if (from == stop) goto lose;
6a140a74 2277 INC_BOTH (from, from_byte);
8489eb67
RS
2278 /* treat following character as a word constituent */
2279 case Sword:
2280 case Ssymbol:
2281 if (depth || !sexpflag) break;
195d1361 2282 /* This word counts as a sexp; return at end of it. */
8489eb67
RS
2283 while (from < stop)
2284 {
195d1361 2285 UPDATE_SYNTAX_TABLE_FORWARD (from);
7c7ca3d2
RS
2286
2287 /* Some compilers can't handle this inside the switch. */
bd25db08
KH
2288 c = FETCH_CHAR (from_byte);
2289 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
7c7ca3d2 2290 switch (temp)
8489eb67
RS
2291 {
2292 case Scharquote:
2293 case Sescape:
6a140a74 2294 INC_BOTH (from, from_byte);
8489eb67
RS
2295 if (from == stop) goto lose;
2296 break;
2297 case Sword:
2298 case Ssymbol:
2299 case Squote:
2300 break;
2301 default:
2302 goto done;
2303 }
6a140a74 2304 INC_BOTH (from, from_byte);
8489eb67
RS
2305 }
2306 goto done;
2307
195d1361 2308 case Scomment_fence:
95ff8dfc
RS
2309 comstyle = ST_COMMENT_STYLE;
2310 /* FALLTHROUGH */
2311 case Scomment:
8489eb67 2312 if (!parse_sexp_ignore_comments) break;
95ff8dfc
RS
2313 UPDATE_SYNTAX_TABLE_FORWARD (from);
2314 found = forw_comment (from, from_byte, stop,
2315 comnested, comstyle, 0,
2316 &out_charpos, &out_bytepos, &dummy);
2317 from = out_charpos, from_byte = out_bytepos;
2318 if (!found)
8489eb67 2319 {
95ff8dfc
RS
2320 if (depth == 0)
2321 goto done;
2322 goto lose;
8489eb67 2323 }
95ff8dfc
RS
2324 INC_BOTH (from, from_byte);
2325 UPDATE_SYNTAX_TABLE_FORWARD (from);
8489eb67
RS
2326 break;
2327
2328 case Smath:
2329 if (!sexpflag)
2330 break;
6a140a74
RS
2331 if (from != stop && c == FETCH_CHAR (from_byte))
2332 {
2333 INC_BOTH (from, from_byte);
2334 }
8489eb67
RS
2335 if (mathexit)
2336 {
2337 mathexit = 0;
2338 goto close1;
2339 }
2340 mathexit = 1;
2341
2342 case Sopen:
2343 if (!++depth) goto done;
2344 break;
2345
2346 case Sclose:
2347 close1:
2348 if (!--depth) goto done;
2349 if (depth < min_depth)
7bf5e9e4
RS
2350 Fsignal (Qscan_error,
2351 Fcons (build_string ("Containing expression ends prematurely"),
2352 Fcons (make_number (last_good),
2353 Fcons (make_number (from), Qnil))));
8489eb67
RS
2354 break;
2355
2356 case Sstring:
195d1361 2357 case Sstring_fence:
ef316cf0 2358 temp_pos = dec_bytepos (from_byte);
93da5fff 2359 stringterm = FETCH_CHAR (temp_pos);
8489eb67
RS
2360 while (1)
2361 {
2362 if (from >= stop) goto lose;
195d1361 2363 UPDATE_SYNTAX_TABLE_FORWARD (from);
bd25db08 2364 c = FETCH_CHAR (from_byte);
42ebfa31
SM
2365 if (code == Sstring
2366 ? (c == stringterm
2367 && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
bd25db08 2368 : SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring_fence)
195d1361 2369 break;
7c7ca3d2
RS
2370
2371 /* Some compilers can't handle this inside the switch. */
bd25db08 2372 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
7c7ca3d2 2373 switch (temp)
8489eb67
RS
2374 {
2375 case Scharquote:
2376 case Sescape:
6a140a74 2377 INC_BOTH (from, from_byte);
8489eb67 2378 }
6a140a74 2379 INC_BOTH (from, from_byte);
8489eb67 2380 }
6a140a74 2381 INC_BOTH (from, from_byte);
8489eb67
RS
2382 if (!depth && sexpflag) goto done;
2383 break;
240c43e8
SM
2384 default:
2385 /* Ignore whitespace, punctuation, quote, endcomment. */
2386 break;
8489eb67
RS
2387 }
2388 }
2389
2390 /* Reached end of buffer. Error if within object, return nil if between */
2391 if (depth) goto lose;
2392
2393 immediate_quit = 0;
2394 return Qnil;
2395
2396 /* End of object reached */
2397 done:
2398 count--;
2399 }
2400
2401 while (count < 0)
2402 {
8489eb67
RS
2403 while (from > stop)
2404 {
6a140a74 2405 DEC_BOTH (from, from_byte);
195d1361 2406 UPDATE_SYNTAX_TABLE_BACKWARD (from);
6a140a74 2407 c = FETCH_CHAR (from_byte);
bd25db08 2408 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
7bf5e9e4
RS
2409 if (depth == min_depth)
2410 last_good = from;
7fc8191e 2411 comstyle = 0;
95ff8dfc 2412 comnested = SYNTAX_COMMENT_NESTED (c);
7fc8191e
RS
2413 if (code == Sendcomment)
2414 comstyle = SYNTAX_COMMENT_STYLE (c);
8489eb67 2415 if (from > stop && SYNTAX_COMEND_SECOND (c)
1674d9a2 2416 && prev_char_comend_first (from, from_byte)
8489eb67 2417 && parse_sexp_ignore_comments)
e5d4f4dc 2418 {
f902a008 2419 /* We must record the comment style encountered so that
e5d4f4dc 2420 later, we can match only the proper comment begin
f902a008 2421 sequence of the same style. */
6a140a74 2422 DEC_BOTH (from, from_byte);
f902a008
RS
2423 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2424 code = Sendcomment;
95ff8dfc
RS
2425 c1 = FETCH_CHAR (from_byte);
2426 comstyle = SYNTAX_COMMENT_STYLE (c1);
2427 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
e5d4f4dc 2428 }
7d0393cf 2429
9828a477 2430 /* Quoting turns anything except a comment-ender
e6365855 2431 into a word character. Note that this cannot be true
f902a008 2432 if we decremented FROM in the if-statement above. */
9828a477 2433 if (code != Sendcomment && char_quoted (from, from_byte))
240c43e8
SM
2434 {
2435 DEC_BOTH (from, from_byte);
2436 code = Sword;
2437 }
9828a477 2438 else if (SYNTAX_PREFIX (c))
8489eb67
RS
2439 continue;
2440
9828a477 2441 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
2442 {
2443 case Sword:
2444 case Ssymbol:
9828a477
KH
2445 case Sescape:
2446 case Scharquote:
8489eb67 2447 if (depth || !sexpflag) break;
195d1361
RS
2448 /* This word counts as a sexp; count object finished
2449 after passing it. */
8489eb67
RS
2450 while (from > stop)
2451 {
6a140a74 2452 temp_pos = from_byte;
ef316cf0
RS
2453 if (! NILP (current_buffer->enable_multibyte_characters))
2454 DEC_POS (temp_pos);
2455 else
2456 temp_pos--;
6a140a74 2457 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
9828a477 2458 c1 = FETCH_CHAR (temp_pos);
bd25db08 2459 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
9828a477
KH
2460 /* Don't allow comment-end to be quoted. */
2461 if (temp_code == Sendcomment)
2462 goto done2;
6a140a74 2463 quoted = char_quoted (from - 1, temp_pos);
8489eb67 2464 if (quoted)
93da5fff 2465 {
6a140a74 2466 DEC_BOTH (from, from_byte);
ef316cf0 2467 temp_pos = dec_bytepos (temp_pos);
6a140a74 2468 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
93da5fff
KH
2469 }
2470 c1 = FETCH_CHAR (temp_pos);
bd25db08 2471 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
93da5fff
KH
2472 if (! (quoted || temp_code == Sword
2473 || temp_code == Ssymbol
2474 || temp_code == Squote))
8489eb67 2475 goto done2;
6a140a74 2476 DEC_BOTH (from, from_byte);
8489eb67
RS
2477 }
2478 goto done2;
2479
2480 case Smath:
2481 if (!sexpflag)
2482 break;
ef316cf0 2483 temp_pos = dec_bytepos (from_byte);
6a140a74 2484 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
93da5fff 2485 if (from != stop && c == FETCH_CHAR (temp_pos))
6a140a74 2486 DEC_BOTH (from, from_byte);
8489eb67
RS
2487 if (mathexit)
2488 {
2489 mathexit = 0;
2490 goto open2;
2491 }
2492 mathexit = 1;
2493
2494 case Sclose:
2495 if (!++depth) goto done2;
2496 break;
2497
2498 case Sopen:
2499 open2:
2500 if (!--depth) goto done2;
2501 if (depth < min_depth)
7bf5e9e4
RS
2502 Fsignal (Qscan_error,
2503 Fcons (build_string ("Containing expression ends prematurely"),
2504 Fcons (make_number (last_good),
2505 Fcons (make_number (from), Qnil))));
8489eb67
RS
2506 break;
2507
2508 case Sendcomment:
2509 if (!parse_sexp_ignore_comments)
2510 break;
95ff8dfc 2511 found = back_comment (from, from_byte, stop, comnested, comstyle,
6a140a74 2512 &out_charpos, &out_bytepos);
1aa963c8
SM
2513 /* FIXME: if found == -1, then it really wasn't a comment-end.
2514 For single-char Sendcomment, we can't do much about it apart
2515 from skipping the char.
2516 For 2-char endcomments, we could try again, taking both
2517 chars as separate entities, but it's a lot of trouble
2518 for very little gain, so we don't bother either. -sm */
6a140a74
RS
2519 if (found != -1)
2520 from = out_charpos, from_byte = out_bytepos;
8489eb67
RS
2521 break;
2522
195d1361
RS
2523 case Scomment_fence:
2524 case Sstring_fence:
2525 while (1)
2526 {
195d1361 2527 if (from == stop) goto lose;
d0abdf7e 2528 DEC_BOTH (from, from_byte);
195d1361 2529 UPDATE_SYNTAX_TABLE_BACKWARD (from);
7d0393cf 2530 if (!char_quoted (from, from_byte)
bd25db08
KH
2531 && (c = FETCH_CHAR (from_byte),
2532 SYNTAX_WITH_MULTIBYTE_CHECK (c) == code))
195d1361
RS
2533 break;
2534 }
2535 if (code == Sstring_fence && !depth && sexpflag) goto done2;
2536 break;
7d0393cf 2537
8489eb67 2538 case Sstring:
6a140a74 2539 stringterm = FETCH_CHAR (from_byte);
8489eb67
RS
2540 while (1)
2541 {
2542 if (from == stop) goto lose;
d0abdf7e
SM
2543 DEC_BOTH (from, from_byte);
2544 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2545 if (!char_quoted (from, from_byte)
2546 && stringterm == (c = FETCH_CHAR (from_byte))
42ebfa31 2547 && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
8489eb67 2548 break;
8489eb67 2549 }
8489eb67
RS
2550 if (!depth && sexpflag) goto done2;
2551 break;
240c43e8
SM
2552 default:
2553 /* Ignore whitespace, punctuation, quote, endcomment. */
2554 break;
8489eb67
RS
2555 }
2556 }
2557
2558 /* Reached start of buffer. Error if within object, return nil if between */
2559 if (depth) goto lose;
2560
2561 immediate_quit = 0;
2562 return Qnil;
2563
2564 done2:
2565 count++;
2566 }
2567
2568
2569 immediate_quit = 0;
1e142fb7 2570 XSETFASTINT (val, from);
8489eb67
RS
2571 return val;
2572
2573 lose:
7bf5e9e4
RS
2574 Fsignal (Qscan_error,
2575 Fcons (build_string ("Unbalanced parentheses"),
2576 Fcons (make_number (last_good),
2577 Fcons (make_number (from), Qnil))));
ab229fdd 2578 abort ();
8489eb67
RS
2579 /* NOTREACHED */
2580}
2581
8489eb67 2582DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
fdb82f93
PJ
2583 doc: /* Scan from character number FROM by COUNT lists.
2584Returns the character number of the position thus found.
2585
2586If DEPTH is nonzero, paren depth begins counting from that value,
2587only places where the depth in parentheses becomes zero
2588are candidates for stopping; COUNT such places are counted.
2589Thus, a positive value for DEPTH means go out levels.
2590
2591Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2592
2593If the beginning or end of (the accessible part of) the buffer is reached
2594and the depth is wrong, an error is signaled.
2595If the depth is right but the count is not used up, nil is returned. */)
2596 (from, count, depth)
8489eb67
RS
2597 Lisp_Object from, count, depth;
2598{
b7826503
PJ
2599 CHECK_NUMBER (from);
2600 CHECK_NUMBER (count);
2601 CHECK_NUMBER (depth);
8489eb67
RS
2602
2603 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
2604}
2605
2606DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
fdb82f93
PJ
2607 doc: /* Scan from character number FROM by COUNT balanced expressions.
2608If COUNT is negative, scan backwards.
2609Returns the character number of the position thus found.
2610
2611Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2612
2613If the beginning or end of (the accessible part of) the buffer is reached
2614in the middle of a parenthetical grouping, an error is signaled.
2615If the beginning or end is reached between groupings
2616but before count is used up, nil is returned. */)
2617 (from, count)
8489eb67
RS
2618 Lisp_Object from, count;
2619{
b7826503
PJ
2620 CHECK_NUMBER (from);
2621 CHECK_NUMBER (count);
8489eb67
RS
2622
2623 return scan_lists (XINT (from), XINT (count), 0, 1);
2624}
2625
2626DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
fdb82f93
PJ
2627 0, 0, 0,
2628 doc: /* Move point backward over any number of chars with prefix syntax.
2629This includes chars with "quote" or "prefix" syntax (' or p). */)
2630 ()
8489eb67
RS
2631{
2632 int beg = BEGV;
6a140a74
RS
2633 int opoint = PT;
2634 int opoint_byte = PT_BYTE;
6ec8bbd2 2635 int pos = PT;
6a140a74 2636 int pos_byte = PT_BYTE;
93da5fff 2637 int c;
93da5fff 2638
7d0393cf 2639 if (pos <= beg)
195d1361 2640 {
f902a008
RS
2641 SET_PT_BOTH (opoint, opoint_byte);
2642
2643 return Qnil;
195d1361 2644 }
8489eb67 2645
f902a008
RS
2646 SETUP_SYNTAX_TABLE (pos, -1);
2647
6a140a74
RS
2648 DEC_BOTH (pos, pos_byte);
2649
1fd3172d 2650 while (!char_quoted (pos, pos_byte)
195d1361 2651 /* Previous statement updates syntax table. */
6a140a74 2652 && ((c = FETCH_CHAR (pos_byte), SYNTAX (c) == Squote)
93da5fff
KH
2653 || SYNTAX_PREFIX (c)))
2654 {
1fd3172d
RS
2655 opoint = pos;
2656 opoint_byte = pos_byte;
2657
2658 if (pos + 1 > beg)
2659 DEC_BOTH (pos, pos_byte);
93da5fff 2660 }
8489eb67 2661
6a140a74 2662 SET_PT_BOTH (opoint, opoint_byte);
8489eb67
RS
2663
2664 return Qnil;
2665}
2666\f
6a140a74 2667/* Parse forward from FROM / FROM_BYTE to END,
e5d4f4dc
RS
2668 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
2669 and return a description of the state of the parse at END.
c81a3712 2670 If STOPBEFORE is nonzero, stop at the start of an atom.
644ea4df
RS
2671 If COMMENTSTOP is 1, stop at the start of a comment.
2672 If COMMENTSTOP is -1, stop at the start or end of a comment,
2673 after the beginning of a string, or after the end of a string. */
8489eb67 2674
340f92b5 2675static void
6a140a74 2676scan_sexps_forward (stateptr, from, from_byte, end, targetdepth,
c81a3712 2677 stopbefore, oldstate, commentstop)
e5d4f4dc 2678 struct lisp_parse_state *stateptr;
8489eb67 2679 register int from;
ddc447fc 2680 int end, targetdepth, stopbefore, from_byte;
8489eb67 2681 Lisp_Object oldstate;
c81a3712 2682 int commentstop;
8489eb67
RS
2683{
2684 struct lisp_parse_state state;
2685
2686 register enum syntaxcode code;
95ff8dfc
RS
2687 int c1;
2688 int comnested;
8489eb67
RS
2689 struct level { int last, prev; };
2690 struct level levelstart[100];
2691 register struct level *curlevel = levelstart;
2692 struct level *endlevel = levelstart + 100;
8489eb67
RS
2693 register int depth; /* Paren depth of current scanning location.
2694 level - levelstart equals this except
2695 when the depth becomes negative. */
2696 int mindepth; /* Lowest DEPTH value seen. */
2697 int start_quoted = 0; /* Nonzero means starting after a char quote */
2698 Lisp_Object tem;
93da5fff 2699 int prev_from; /* Keep one character before FROM. */
6a140a74 2700 int prev_from_byte;
1fd3172d 2701 int prev_from_syntax;
195d1361
RS
2702 int boundary_stop = commentstop == -1;
2703 int nofence;
95ff8dfc
RS
2704 int found;
2705 int out_bytepos, out_charpos;
7c7ca3d2 2706 int temp;
93da5fff
KH
2707
2708 prev_from = from;
6a140a74
RS
2709 prev_from_byte = from_byte;
2710 if (from != BEGV)
2711 DEC_BOTH (prev_from, prev_from_byte);
93da5fff
KH
2712
2713 /* Use this macro instead of `from++'. */
6a140a74
RS
2714#define INC_FROM \
2715do { prev_from = from; \
2716 prev_from_byte = from_byte; \
ab229fdd
AS
2717 temp = FETCH_CHAR (prev_from_byte); \
2718 prev_from_syntax = SYNTAX_WITH_FLAGS (temp); \
ef316cf0 2719 INC_BOTH (from, from_byte); \
3ceb4629
SM
2720 if (from < end) \
2721 UPDATE_SYNTAX_TABLE_FORWARD (from); \
6a140a74 2722 } while (0)
8489eb67
RS
2723
2724 immediate_quit = 1;
2725 QUIT;
2726
265a9e55 2727 if (NILP (oldstate))
8489eb67
RS
2728 {
2729 depth = 0;
2730 state.instring = -1;
2731 state.incomment = 0;
195d1361
RS
2732 state.comstyle = 0; /* comment style a by default. */
2733 state.comstr_start = -1; /* no comment/string seen. */
8489eb67
RS
2734 }
2735 else
2736 {
2737 tem = Fcar (oldstate);
265a9e55 2738 if (!NILP (tem))
8489eb67
RS
2739 depth = XINT (tem);
2740 else
2741 depth = 0;
2742
2743 oldstate = Fcdr (oldstate);
2744 oldstate = Fcdr (oldstate);
2745 oldstate = Fcdr (oldstate);
2746 tem = Fcar (oldstate);
195d1361 2747 /* Check whether we are inside string_fence-style string: */
7d0393cf
JB
2748 state.instring = (!NILP (tem)
2749 ? (INTEGERP (tem) ? XINT (tem) : ST_STRING_STYLE)
e76f1c44 2750 : -1);
8489eb67
RS
2751
2752 oldstate = Fcdr (oldstate);
2753 tem = Fcar (oldstate);
e76f1c44
GM
2754 state.incomment = (!NILP (tem)
2755 ? (INTEGERP (tem) ? XINT (tem) : -1)
95ff8dfc 2756 : 0);
8489eb67
RS
2757
2758 oldstate = Fcdr (oldstate);
2759 tem = Fcar (oldstate);
265a9e55 2760 start_quoted = !NILP (tem);
e5d4f4dc 2761
95ff8dfc 2762 /* if the eighth element of the list is nil, we are in comment
195d1361
RS
2763 style a. If it is non-nil, we are in comment style b */
2764 oldstate = Fcdr (oldstate);
e5d4f4dc 2765 oldstate = Fcdr (oldstate);
195d1361 2766 tem = Fcar (oldstate);
7d0393cf 2767 state.comstyle = NILP (tem) ? 0 : (EQ (tem, Qsyntax_table)
e76f1c44 2768 ? ST_COMMENT_STYLE : 1);
195d1361 2769
e5d4f4dc 2770 oldstate = Fcdr (oldstate);
e5d4f4dc 2771 tem = Fcar (oldstate);
195d1361 2772 state.comstr_start = NILP (tem) ? -1 : XINT (tem) ;
1a102a58
RS
2773 oldstate = Fcdr (oldstate);
2774 tem = Fcar (oldstate);
2775 while (!NILP (tem)) /* >= second enclosing sexps. */
2776 {
2777 /* curlevel++->last ran into compiler bug on Apollo */
2778 curlevel->last = XINT (Fcar (tem));
2779 if (++curlevel == endlevel)
02010917 2780 curlevel--; /* error ("Nesting too deep for parser"); */
1a102a58
RS
2781 curlevel->prev = -1;
2782 curlevel->last = -1;
2783 tem = Fcdr (tem);
2784 }
8489eb67
RS
2785 }
2786 state.quoted = 0;
2787 mindepth = depth;
2788
2789 curlevel->prev = -1;
2790 curlevel->last = -1;
2791
326b283b 2792 SETUP_SYNTAX_TABLE (prev_from, 1);
ab229fdd
AS
2793 temp = FETCH_CHAR (prev_from_byte);
2794 prev_from_syntax = SYNTAX_WITH_FLAGS (temp);
3ceb4629 2795 UPDATE_SYNTAX_TABLE_FORWARD (from);
326b283b 2796
195d1361 2797 /* Enter the loop at a place appropriate for initial state. */
8489eb67 2798
326b283b
RS
2799 if (state.incomment)
2800 goto startincomment;
8489eb67
RS
2801 if (state.instring >= 0)
2802 {
195d1361 2803 nofence = state.instring != ST_STRING_STYLE;
326b283b
RS
2804 if (start_quoted)
2805 goto startquotedinstring;
8489eb67
RS
2806 goto startinstring;
2807 }
326b283b
RS
2808 else if (start_quoted)
2809 goto startquoted;
1fd3172d 2810
8489eb67
RS
2811 while (from < end)
2812 {
93da5fff 2813 INC_FROM;
1fd3172d 2814 code = prev_from_syntax & 0xff;
4c920633 2815
c5d4b96f
SM
2816 if (from < end
2817 && SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax)
2818 && (c1 = FETCH_CHAR (from_byte),
2819 SYNTAX_COMSTART_SECOND (c1)))
2820 /* Duplicate code to avoid a complex if-expression
2821 which causes trouble for the SGI compiler. */
95ff8dfc 2822 {
c5d4b96f
SM
2823 /* Record the comment style we have entered so that only
2824 the comment-end sequence of the same style actually
2825 terminates the comment section. */
2826 state.comstyle = SYNTAX_COMMENT_STYLE (c1);
2827 comnested = SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax);
2828 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2829 state.incomment = comnested ? 1 : -1;
95ff8dfc 2830 state.comstr_start = prev_from;
c5d4b96f
SM
2831 INC_FROM;
2832 code = Scomment;
95ff8dfc 2833 }
4c920633 2834 else if (code == Scomment_fence)
e5d4f4dc
RS
2835 {
2836 /* Record the comment style we have entered so that only
2837 the comment-end sequence of the same style actually
2838 terminates the comment section. */
95ff8dfc
RS
2839 state.comstyle = ST_COMMENT_STYLE;
2840 state.incomment = -1;
195d1361 2841 state.comstr_start = prev_from;
e5d4f4dc 2842 code = Scomment;
e5d4f4dc 2843 }
c5d4b96f
SM
2844 else if (code == Scomment)
2845 {
2846 state.comstyle = SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax);
2847 state.incomment = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax) ?
2848 1 : -1);
2849 state.comstr_start = prev_from;
2850 }
e5d4f4dc 2851
1fd3172d 2852 if (SYNTAX_FLAGS_PREFIX (prev_from_syntax))
8489eb67 2853 continue;
0220c518 2854 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
2855 {
2856 case Sescape:
2857 case Scharquote:
2858 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 2859 curlevel->last = prev_from;
8489eb67
RS
2860 startquoted:
2861 if (from == end) goto endquoted;
93da5fff 2862 INC_FROM;
8489eb67
RS
2863 goto symstarted;
2864 /* treat following character as a word constituent */
2865 case Sword:
2866 case Ssymbol:
2867 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 2868 curlevel->last = prev_from;
8489eb67
RS
2869 symstarted:
2870 while (from < end)
2871 {
7c7ca3d2 2872 /* Some compilers can't handle this inside the switch. */
ab229fdd
AS
2873 temp = FETCH_CHAR (from_byte);
2874 temp = SYNTAX (temp);
7c7ca3d2 2875 switch (temp)
8489eb67
RS
2876 {
2877 case Scharquote:
2878 case Sescape:
93da5fff 2879 INC_FROM;
8489eb67
RS
2880 if (from == end) goto endquoted;
2881 break;
2882 case Sword:
2883 case Ssymbol:
2884 case Squote:
2885 break;
2886 default:
2887 goto symdone;
2888 }
93da5fff 2889 INC_FROM;
8489eb67
RS
2890 }
2891 symdone:
2892 curlevel->prev = curlevel->last;
2893 break;
2894
240c43e8 2895 case Scomment_fence: /* Can't happen because it's handled above. */
8489eb67 2896 case Scomment:
195d1361 2897 if (commentstop || boundary_stop) goto done;
d355bd8a
SM
2898 startincomment:
2899 /* The (from == BEGV) test was to enter the loop in the middle so
95ff8dfc 2900 that we find a 2-char comment ender even if we start in the
e6365855 2901 middle of it. We don't want to do that if we're just at the
d355bd8a 2902 beginning of the comment (think of (*) ... (*)). */
95ff8dfc
RS
2903 found = forw_comment (from, from_byte, end,
2904 state.incomment, state.comstyle,
e6365855
SM
2905 (from == BEGV || from < state.comstr_start + 3)
2906 ? 0 : prev_from_syntax,
95ff8dfc
RS
2907 &out_charpos, &out_bytepos, &state.incomment);
2908 from = out_charpos; from_byte = out_bytepos;
2909 /* Beware! prev_from and friends are invalid now.
2910 Luckily, the `done' doesn't use them and the INC_FROM
2911 sets them to a sane value without looking at them. */
2912 if (!found) goto done;
d355bd8a 2913 INC_FROM;
8489eb67 2914 state.incomment = 0;
e5d4f4dc 2915 state.comstyle = 0; /* reset the comment style */
195d1361 2916 if (boundary_stop) goto done;
8489eb67
RS
2917 break;
2918
2919 case Sopen:
2920 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2921 depth++;
2922 /* curlevel++->last ran into compiler bug on Apollo */
93da5fff 2923 curlevel->last = prev_from;
8489eb67 2924 if (++curlevel == endlevel)
02010917 2925 curlevel--; /* error ("Nesting too deep for parser"); */
8489eb67
RS
2926 curlevel->prev = -1;
2927 curlevel->last = -1;
30844415 2928 if (targetdepth == depth) goto done;
8489eb67
RS
2929 break;
2930
2931 case Sclose:
2932 depth--;
2933 if (depth < mindepth)
2934 mindepth = depth;
2935 if (curlevel != levelstart)
2936 curlevel--;
2937 curlevel->prev = curlevel->last;
30844415 2938 if (targetdepth == depth) goto done;
8489eb67
RS
2939 break;
2940
2941 case Sstring:
195d1361
RS
2942 case Sstring_fence:
2943 state.comstr_start = from - 1;
8489eb67 2944 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 2945 curlevel->last = prev_from;
7d0393cf 2946 state.instring = (code == Sstring
6a140a74 2947 ? (FETCH_CHAR (prev_from_byte))
195d1361
RS
2948 : ST_STRING_STYLE);
2949 if (boundary_stop) goto done;
8489eb67 2950 startinstring:
195d1361 2951 {
644ea4df 2952 nofence = state.instring != ST_STRING_STYLE;
7d0393cf 2953
644ea4df
RS
2954 while (1)
2955 {
2956 int c;
195d1361 2957
644ea4df 2958 if (from >= end) goto done;
6a140a74 2959 c = FETCH_CHAR (from_byte);
7c7ca3d2
RS
2960 /* Some compilers can't handle this inside the switch. */
2961 temp = SYNTAX (c);
ccf89641
KH
2962
2963 /* Check TEMP here so that if the char has
2964 a syntax-table property which says it is NOT
2965 a string character, it does not end the string. */
2966 if (nofence && c == state.instring && temp == Sstring)
2967 break;
2968
7c7ca3d2 2969 switch (temp)
644ea4df
RS
2970 {
2971 case Sstring_fence:
2972 if (!nofence) goto string_end;
2973 break;
2974 case Scharquote:
2975 case Sescape:
2976 INC_FROM;
2977 startquotedinstring:
2978 if (from >= end) goto endquoted;
195d1361 2979 }
644ea4df
RS
2980 INC_FROM;
2981 }
195d1361
RS
2982 }
2983 string_end:
8489eb67
RS
2984 state.instring = -1;
2985 curlevel->prev = curlevel->last;
93da5fff 2986 INC_FROM;
195d1361 2987 if (boundary_stop) goto done;
8489eb67
RS
2988 break;
2989
2990 case Smath:
240c43e8
SM
2991 /* FIXME: We should do something with it. */
2992 break;
2993 default:
2994 /* Ignore whitespace, punctuation, quote, endcomment. */
8489eb67
RS
2995 break;
2996 }
2997 }
2998 goto done;
2999
3000 stop: /* Here if stopping before start of sexp. */
93da5fff 3001 from = prev_from; /* We have just fetched the char that starts it; */
8489eb67
RS
3002 goto done; /* but return the position before it. */
3003
3004 endquoted:
3005 state.quoted = 1;
3006 done:
3007 state.depth = depth;
3008 state.mindepth = mindepth;
3009 state.thislevelstart = curlevel->prev;
3010 state.prevlevelstart
3011 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
3012 state.location = from;
1a102a58
RS
3013 state.levelstarts = Qnil;
3014 while (--curlevel >= levelstart)
3015 state.levelstarts = Fcons (make_number (curlevel->last),
3016 state.levelstarts);
8489eb67
RS
3017 immediate_quit = 0;
3018
e5d4f4dc 3019 *stateptr = state;
8489eb67
RS
3020}
3021
c81a3712 3022DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
fdb82f93
PJ
3023 doc: /* Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
3024Parsing stops at TO or when certain criteria are met;
3025 point is set to where parsing stops.
3026If fifth arg OLDSTATE is omitted or nil,
3027 parsing assumes that FROM is the beginning of a function.
0e6228bc 3028Value is a list of elements describing final state of parsing:
fdb82f93
PJ
3029 0. depth in parens.
3030 1. character address of start of innermost containing list; nil if none.
3031 2. character address of start of last complete sexp terminated.
3032 3. non-nil if inside a string.
3033 (it is the character that will terminate the string,
3034 or t if the string should be terminated by a generic string delimiter.)
7d0393cf 3035 4. nil if outside a comment, t if inside a non-nestable comment,
fdb82f93
PJ
3036 else an integer (the current comment nesting).
3037 5. t if following a quote character.
3038 6. the minimum paren-depth encountered during this scan.
3039 7. t if in a comment of style b; symbol `syntax-table' if the comment
3040 should be terminated by a generic comment delimiter.
3041 8. character address of start of comment or string; nil if not in one.
3042 9. Intermediate data for continuation of parsing (subject to change).
3043If third arg TARGETDEPTH is non-nil, parsing stops if the depth
3044in parentheses becomes equal to TARGETDEPTH.
3045Fourth arg STOPBEFORE non-nil means stop when come to
3046 any character that starts a sexp.
0e6228bc 3047Fifth arg OLDSTATE is a list like what this function returns.
fdb82f93
PJ
3048 It is used to initialize the state of the parse. Elements number 1, 2, 6
3049 and 8 are ignored; you can leave off element 8 (the last) entirely.
3050Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.
3051 If it is symbol `syntax-table', stop after the start of a comment or a
3052 string, or after end of a comment or a string. */)
3053 (from, to, targetdepth, stopbefore, oldstate, commentstop)
c81a3712 3054 Lisp_Object from, to, targetdepth, stopbefore, oldstate, commentstop;
8489eb67
RS
3055{
3056 struct lisp_parse_state state;
3057 int target;
3058
265a9e55 3059 if (!NILP (targetdepth))
8489eb67 3060 {
b7826503 3061 CHECK_NUMBER (targetdepth);
8489eb67
RS
3062 target = XINT (targetdepth);
3063 }
3064 else
3065 target = -100000; /* We won't reach this depth */
3066
3067 validate_region (&from, &to);
6a140a74
RS
3068 scan_sexps_forward (&state, XINT (from), CHAR_TO_BYTE (XINT (from)),
3069 XINT (to),
c81a3712 3070 target, !NILP (stopbefore), oldstate,
7d0393cf 3071 (NILP (commentstop)
195d1361 3072 ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
8489eb67
RS
3073
3074 SET_PT (state.location);
7d0393cf 3075
8489eb67
RS
3076 return Fcons (make_number (state.depth),
3077 Fcons (state.prevlevelstart < 0 ? Qnil : make_number (state.prevlevelstart),
3078 Fcons (state.thislevelstart < 0 ? Qnil : make_number (state.thislevelstart),
7d0393cf
JB
3079 Fcons (state.instring >= 0
3080 ? (state.instring == ST_STRING_STYLE
195d1361 3081 ? Qt : make_number (state.instring)) : Qnil,
95ff8dfc
RS
3082 Fcons (state.incomment < 0 ? Qt :
3083 (state.incomment == 0 ? Qnil :
3084 make_number (state.incomment)),
8489eb67 3085 Fcons (state.quoted ? Qt : Qnil,
bff37d2a 3086 Fcons (make_number (state.mindepth),
7d0393cf 3087 Fcons ((state.comstyle
bff37d2a
RS
3088 ? (state.comstyle == ST_COMMENT_STYLE
3089 ? Qsyntax_table : Qt) :
3090 Qnil),
0beaf54f
DL
3091 Fcons (((state.incomment
3092 || (state.instring >= 0))
bff37d2a
RS
3093 ? make_number (state.comstr_start)
3094 : Qnil),
1a102a58 3095 Fcons (state.levelstarts, Qnil))))))))));
8489eb67
RS
3096}
3097\f
dfcf069d 3098void
8489eb67
RS
3099init_syntax_once ()
3100{
78f9a1f7 3101 register int i, c;
8ea151b2 3102 Lisp_Object temp;
8489eb67 3103
5ebaddf5
RS
3104 /* This has to be done here, before we call Fmake_char_table. */
3105 Qsyntax_table = intern ("syntax-table");
3106 staticpro (&Qsyntax_table);
3107
3108 /* Intern this now in case it isn't already done.
3109 Setting this variable twice is harmless.
3110 But don't staticpro it here--that is done in alloc.c. */
3111 Qchar_table_extra_slots = intern ("char-table-extra-slots");
3112
93da5fff 3113 /* Create objects which can be shared among syntax tables. */
42ebfa31 3114 Vsyntax_code_object = Fmake_vector (make_number (Smax), Qnil);
93da5fff
KH
3115 for (i = 0; i < XVECTOR (Vsyntax_code_object)->size; i++)
3116 XVECTOR (Vsyntax_code_object)->contents[i]
3117 = Fcons (make_number (i), Qnil);
3118
5ebaddf5
RS
3119 /* Now we are ready to set up this property, so we can
3120 create syntax tables. */
3121 Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
3122
93da5fff 3123 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
8489eb67 3124
5ebaddf5 3125 Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
8489eb67 3126
aa7b08b4
RS
3127 /* Control characters should not be whitespace. */
3128 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
3129 for (i = 0; i <= ' ' - 1; i++)
3130 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3131 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 0177, temp);
3132
3133 /* Except that a few really are whitespace. */
3134 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
3135 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ' ', temp);
3136 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\t', temp);
3137 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\n', temp);
3138 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 015, temp);
3139 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 014, temp);
3140
93da5fff 3141 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
8489eb67 3142 for (i = 'a'; i <= 'z'; i++)
8ea151b2 3143 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
8489eb67 3144 for (i = 'A'; i <= 'Z'; i++)
8ea151b2 3145 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
8489eb67 3146 for (i = '0'; i <= '9'; i++)
8ea151b2
RS
3147 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3148
3149 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
3150 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
3151
3152 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
3153 Fcons (make_number (Sopen), make_number (')')));
3154 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
3155 Fcons (make_number (Sclose), make_number ('(')));
3156 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
3157 Fcons (make_number (Sopen), make_number (']')));
3158 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
3159 Fcons (make_number (Sclose), make_number ('[')));
3160 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
3161 Fcons (make_number (Sopen), make_number ('}')));
3162 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
3163 Fcons (make_number (Sclose), make_number ('{')));
3164 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
3165 Fcons (make_number ((int) Sstring), Qnil));
3166 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
3167 Fcons (make_number ((int) Sescape), Qnil));
3168
93da5fff 3169 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Ssymbol];
8489eb67 3170 for (i = 0; i < 10; i++)
78f9a1f7
KH
3171 {
3172 c = "_-+*/&|<>="[i];
3173 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3174 }
8489eb67 3175
93da5fff 3176 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
8489eb67 3177 for (i = 0; i < 12; i++)
78f9a1f7
KH
3178 {
3179 c = ".,;:?!#@~^'`"[i];
3180 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3181 }
bd25db08
KH
3182
3183 /* All multibyte characters have syntax `word' by default. */
3184 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
3185 for (i = CHAR_TABLE_SINGLE_BYTE_SLOTS; i < CHAR_TABLE_ORDINARY_SLOTS; i++)
3186 XCHAR_TABLE (Vstandard_syntax_table)->contents[i] = temp;
8489eb67
RS
3187}
3188
dfcf069d 3189void
8489eb67
RS
3190syms_of_syntax ()
3191{
3192 Qsyntax_table_p = intern ("syntax-table-p");
3193 staticpro (&Qsyntax_table_p);
3194
93da5fff
KH
3195 staticpro (&Vsyntax_code_object);
3196
a2994c46
KS
3197 staticpro (&gl_state.object);
3198 staticpro (&gl_state.global_code);
3199 staticpro (&gl_state.current_syntax_table);
3200 staticpro (&gl_state.old_prop);
3201
3202 /* Defined in regex.c */
3203 staticpro (&re_match_object);
3204
7bf5e9e4
RS
3205 Qscan_error = intern ("scan-error");
3206 staticpro (&Qscan_error);
3207 Fput (Qscan_error, Qerror_conditions,
f3be100f 3208 Fcons (Qscan_error, Fcons (Qerror, Qnil)));
7bf5e9e4
RS
3209 Fput (Qscan_error, Qerror_message,
3210 build_string ("Scan error"));
3211
8489eb67 3212 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments,
fdb82f93 3213 doc: /* Non-nil means `forward-sexp', etc., should treat comments as whitespace. */);
8489eb67 3214
195d1361 3215 DEFVAR_BOOL ("parse-sexp-lookup-properties", &parse_sexp_lookup_properties,
fdb82f93
PJ
3216 doc: /* Non-nil means `forward-sexp', etc., obey `syntax-table' property.
3217Otherwise, that text property is simply ignored.
3218See the info node `(elisp)Syntax Properties' for a description of the
3219`syntax-table' property. */);
195d1361 3220
8489eb67
RS
3221 words_include_escapes = 0;
3222 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes,
fdb82f93 3223 doc: /* Non-nil means `forward-word', etc., should treat escape chars part of words. */);
8489eb67 3224
bd25db08 3225 DEFVAR_BOOL ("multibyte-syntax-as-symbol", &multibyte_syntax_as_symbol,
fdb82f93 3226 doc: /* Non-nil means `scan-sexps' treats all multibyte characters as symbol. */);
bd25db08
KH
3227 multibyte_syntax_as_symbol = 0;
3228
f4ed767f
GM
3229 DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
3230 &open_paren_in_column_0_is_defun_start,
b222e415 3231 doc: /* *Non-nil means an open paren in column 0 denotes the start of a defun. */);
f4ed767f
GM
3232 open_paren_in_column_0_is_defun_start = 1;
3233
8489eb67
RS
3234 defsubr (&Ssyntax_table_p);
3235 defsubr (&Ssyntax_table);
3236 defsubr (&Sstandard_syntax_table);
3237 defsubr (&Scopy_syntax_table);
3238 defsubr (&Sset_syntax_table);
3239 defsubr (&Schar_syntax);
beefa22e 3240 defsubr (&Smatching_paren);
c65adb44 3241 defsubr (&Sstring_to_syntax);
8489eb67 3242 defsubr (&Smodify_syntax_entry);
62abe9cb 3243 defsubr (&Sinternal_describe_syntax_value);
8489eb67
RS
3244
3245 defsubr (&Sforward_word);
3246
195d1361
RS
3247 defsubr (&Sskip_chars_forward);
3248 defsubr (&Sskip_chars_backward);
3249 defsubr (&Sskip_syntax_forward);
3250 defsubr (&Sskip_syntax_backward);
3251
b3cfe0c8 3252 defsubr (&Sforward_comment);
8489eb67
RS
3253 defsubr (&Sscan_lists);
3254 defsubr (&Sscan_sexps);
3255 defsubr (&Sbackward_prefix_chars);
3256 defsubr (&Sparse_partial_sexp);
3257}
ab5796a9
MB
3258
3259/* arch-tag: 3e297b9f-088e-4b64-8f4c-fb0b3443e412
3260 (do not change this comment) */