Update years in copyright notice; nfc.
[bpt/emacs.git] / src / syntax.c
CommitLineData
8489eb67 1/* GNU Emacs routines to deal with syntax tables; also word and list parsing.
0b5538bd 2 Copyright (C) 1985, 1987, 1993, 1994, 1995, 1997, 1998, 1999, 2002,
aaef169d 3 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
8489eb67
RS
4
5This file is part of GNU Emacs.
6
7GNU Emacs is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
a4275ad1 9the Free Software Foundation; either version 2, or (at your option)
8489eb67
RS
10any later version.
11
12GNU Emacs is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU Emacs; see the file COPYING. If not, write to
4fc5845f
LK
19the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20Boston, MA 02110-1301, USA. */
8489eb67
RS
21
22
18160b98 23#include <config.h>
8489eb67
RS
24#include <ctype.h>
25#include "lisp.h"
26#include "commands.h"
27#include "buffer.h"
93da5fff 28#include "charset.h"
e35f6ff7 29#include "keymap.h"
a1bc88d4 30#include "regex.h"
195d1361
RS
31
32/* Make syntax table lookup grant data in gl_state. */
33#define SYNTAX_ENTRY_VIA_PROPERTY
34
8489eb67 35#include "syntax.h"
195d1361
RS
36#include "intervals.h"
37
38/* We use these constants in place for comment-style and
39 string-ender-char to distinguish comments/strings started by
40 comment_fence and string_fence codes. */
41
42#define ST_COMMENT_STYLE (256 + 1)
43#define ST_STRING_STYLE (256 + 2)
93da5fff 44#include "category.h"
8489eb67 45
7bf5e9e4 46Lisp_Object Qsyntax_table_p, Qsyntax_table, Qscan_error;
8489eb67
RS
47
48int words_include_escapes;
195d1361 49int parse_sexp_lookup_properties;
8489eb67 50
bd25db08
KH
51/* Nonzero means `scan-sexps' treat all multibyte characters as symbol. */
52int multibyte_syntax_as_symbol;
53
8ea151b2
RS
54/* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
55 if not compiled with GCC. No need to mark it, since it is used
56 only very temporarily. */
57Lisp_Object syntax_temp;
58
f4ed767f
GM
59/* Non-zero means an open parenthesis in column 0 is always considered
60 to be the start of a defun. Zero means an open parenthesis in
61 column 0 has no special meaning. */
62
63int open_paren_in_column_0_is_defun_start;
64
e5d4f4dc
RS
65/* This is the internal form of the parse state used in parse-partial-sexp. */
66
67struct lisp_parse_state
68 {
195d1361
RS
69 int depth; /* Depth at end of parsing. */
70 int instring; /* -1 if not within string, else desired terminator. */
95ff8dfc 71 int incomment; /* -1 if in unnestable comment else comment nesting */
195d1361 72 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
e5d4f4dc
RS
73 int quoted; /* Nonzero if just after an escape char at end of parsing */
74 int thislevelstart; /* Char number of most recent start-of-expression at current level */
75 int prevlevelstart; /* Char number of start of containing expression */
195d1361 76 int location; /* Char number at which parsing stopped. */
e5d4f4dc 77 int mindepth; /* Minimum depth seen while scanning. */
195d1361 78 int comstr_start; /* Position just after last comment/string starter. */
1a102a58
RS
79 Lisp_Object levelstarts; /* Char numbers of starts-of-expression
80 of levels (starting from outermost). */
e5d4f4dc
RS
81 };
82\f
37bef230
RS
83/* These variables are a cache for finding the start of a defun.
84 find_start_pos is the place for which the defun start was found.
85 find_start_value is the defun start position found for it.
6a140a74 86 find_start_value_byte is the corresponding byte position.
37bef230
RS
87 find_start_buffer is the buffer it was found in.
88 find_start_begv is the BEGV value when it was found.
89 find_start_modiff is the value of MODIFF when it was found. */
90
91static int find_start_pos;
92static int find_start_value;
6a140a74 93static int find_start_value_byte;
37bef230
RS
94static struct buffer *find_start_buffer;
95static int find_start_begv;
96static int find_start_modiff;
6a140a74
RS
97
98
99static int find_defun_start P_ ((int, int));
95ff8dfc 100static int back_comment P_ ((int, int, int, int, int, int *, int *));
6a140a74 101static int char_quoted P_ ((int, int));
a1bc88d4 102static Lisp_Object skip_chars P_ ((int, int, Lisp_Object, Lisp_Object, int));
6a140a74
RS
103static Lisp_Object scan_lists P_ ((int, int, int, int));
104static void scan_sexps_forward P_ ((struct lisp_parse_state *,
105 int, int, int, int,
106 int, Lisp_Object, int));
a1bc88d4 107static int in_classes P_ ((int, Lisp_Object));
195d1361
RS
108\f
109
110struct gl_state_s gl_state; /* Global state of syntax parser. */
111
112INTERVAL interval_of ();
113#define INTERVALS_AT_ONCE 10 /* 1 + max-number of intervals
114 to scan to property-change. */
115
6a140a74
RS
116/* Update gl_state to an appropriate interval which contains CHARPOS. The
117 sign of COUNT give the relative position of CHARPOS wrt the previously
195d1361 118 valid interval. If INIT, only [be]_property fields of gl_state are
6a140a74 119 valid at start, the rest is filled basing on OBJECT.
195d1361 120
6a140a74 121 `gl_state.*_i' are the intervals, and CHARPOS is further in the search
195d1361
RS
122 direction than the intervals - or in an interval. We update the
123 current syntax-table basing on the property of this interval, and
6a140a74 124 update the interval to start further than CHARPOS - or be
195d1361 125 NULL_INTERVAL. We also update lim_property to be the next value of
6a140a74 126 charpos to call this subroutine again - or be before/after the
195d1361
RS
127 start/end of OBJECT. */
128
129void
6a140a74
RS
130update_syntax_table (charpos, count, init, object)
131 int charpos, count, init;
195d1361
RS
132 Lisp_Object object;
133{
134 Lisp_Object tmp_table;
4ffe723b 135 int cnt = 0, invalidate = 1;
5a774522 136 INTERVAL i;
195d1361
RS
137
138 if (init)
139 {
bb0de084 140 gl_state.old_prop = Qnil;
195d1361
RS
141 gl_state.start = gl_state.b_property;
142 gl_state.stop = gl_state.e_property;
bb0de084
SM
143 i = interval_of (charpos, object);
144 gl_state.backward_i = gl_state.forward_i = i;
195d1361
RS
145 invalidate = 0;
146 if (NULL_INTERVAL_P (i))
147 return;
f902a008 148 /* interval_of updates only ->position of the return value, so
d80f4cc7 149 update the parents manually to speed up update_interval. */
bb0de084 150 while (!NULL_PARENT (i))
d80f4cc7
RS
151 {
152 if (AM_RIGHT_CHILD (i))
439d5cb4 153 INTERVAL_PARENT (i)->position = i->position
d80f4cc7 154 - LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */
439d5cb4
KR
155 - TOTAL_LENGTH (INTERVAL_PARENT (i))
156 + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i));
d80f4cc7 157 else
439d5cb4 158 INTERVAL_PARENT (i)->position = i->position - LEFT_TOTAL_LENGTH (i)
d80f4cc7 159 + TOTAL_LENGTH (i);
439d5cb4 160 i = INTERVAL_PARENT (i);
d80f4cc7
RS
161 }
162 i = gl_state.forward_i;
bb0de084 163 gl_state.b_property = i->position - gl_state.offset;
ee0cdb48 164 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361
RS
165 goto update;
166 }
5a774522 167 i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
195d1361 168
423e705d 169 /* We are guaranteed to be called with CHARPOS either in i,
6a140a74 170 or further off. */
195d1361
RS
171 if (NULL_INTERVAL_P (i))
172 error ("Error in syntax_table logic for to-the-end intervals");
6a140a74 173 else if (charpos < i->position) /* Move left. */
195d1361
RS
174 {
175 if (count > 0)
f902a008 176 error ("Error in syntax_table logic for intervals <-");
195d1361 177 /* Update the interval. */
6a140a74 178 i = update_interval (i, charpos);
bb0de084 179 if (INTERVAL_LAST_POS (i) != gl_state.b_property)
195d1361
RS
180 {
181 invalidate = 0;
195d1361 182 gl_state.forward_i = i;
ee0cdb48 183 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361 184 }
423e705d 185 }
6a140a74 186 else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right. */
195d1361
RS
187 {
188 if (count < 0)
f902a008 189 error ("Error in syntax_table logic for intervals ->");
195d1361 190 /* Update the interval. */
6a140a74 191 i = update_interval (i, charpos);
bb0de084 192 if (i->position != gl_state.e_property)
195d1361
RS
193 {
194 invalidate = 0;
195d1361 195 gl_state.backward_i = i;
bb0de084 196 gl_state.b_property = i->position - gl_state.offset;
195d1361
RS
197 }
198 }
195d1361
RS
199
200 update:
201 tmp_table = textget (i->plist, Qsyntax_table);
202
203 if (invalidate)
204 invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
7d0393cf 205
423e705d
SM
206 if (invalidate) /* Did not get to adjacent interval. */
207 { /* with the same table => */
208 /* invalidate the old range. */
195d1361
RS
209 if (count > 0)
210 {
211 gl_state.backward_i = i;
bb0de084
SM
212 gl_state.b_property = i->position - gl_state.offset;
213 }
214 else
195d1361 215 {
bb0de084 216 gl_state.forward_i = i;
ee0cdb48 217 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361
RS
218 }
219 }
37bef230 220
bb0de084 221 if (!EQ (tmp_table, gl_state.old_prop))
195d1361 222 {
bb0de084
SM
223 gl_state.current_syntax_table = tmp_table;
224 gl_state.old_prop = tmp_table;
225 if (EQ (Fsyntax_table_p (tmp_table), Qt))
226 {
227 gl_state.use_global = 0;
7d0393cf 228 }
bb0de084
SM
229 else if (CONSP (tmp_table))
230 {
231 gl_state.use_global = 1;
232 gl_state.global_code = tmp_table;
233 }
7d0393cf 234 else
bb0de084
SM
235 {
236 gl_state.use_global = 0;
237 gl_state.current_syntax_table = current_buffer->syntax_table;
238 }
195d1361
RS
239 }
240
241 while (!NULL_INTERVAL_P (i))
242 {
243 if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
244 {
245 if (count > 0)
bb0de084
SM
246 {
247 gl_state.e_property = i->position - gl_state.offset;
248 gl_state.forward_i = i;
249 }
250 else
251 {
5a774522
SM
252 gl_state.b_property
253 = i->position + LENGTH (i) - gl_state.offset;
bb0de084
SM
254 gl_state.backward_i = i;
255 }
256 return;
195d1361 257 }
7d0393cf 258 else if (cnt == INTERVALS_AT_ONCE)
195d1361
RS
259 {
260 if (count > 0)
bb0de084 261 {
5a774522
SM
262 gl_state.e_property
263 = i->position + LENGTH (i) - gl_state.offset
264 /* e_property at EOB is not set to ZV but to ZV+1, so that
265 we can do INC(from);UPDATE_SYNTAX_TABLE_FORWARD without
266 having to check eob between the two. */
267 + (NULL_INTERVAL_P (next_interval (i)) ? 1 : 0);
bb0de084
SM
268 gl_state.forward_i = i;
269 }
270 else
271 {
272 gl_state.b_property = i->position - gl_state.offset;
273 gl_state.backward_i = i;
274 }
275 return;
195d1361
RS
276 }
277 cnt++;
278 i = count > 0 ? next_interval (i) : previous_interval (i);
279 }
bb0de084
SM
280 eassert (NULL_INTERVAL_P (i)); /* This property goes to the end. */
281 if (count > 0)
282 gl_state.e_property = gl_state.stop;
283 else
284 gl_state.b_property = gl_state.start;
195d1361
RS
285}
286\f
6a140a74
RS
287/* Returns TRUE if char at CHARPOS is quoted.
288 Global syntax-table data should be set up already to be good at CHARPOS
289 or after. On return global syntax data is good for lookup at CHARPOS. */
195d1361
RS
290
291static int
6a140a74
RS
292char_quoted (charpos, bytepos)
293 register int charpos, bytepos;
195d1361
RS
294{
295 register enum syntaxcode code;
296 register int beg = BEGV;
297 register int quoted = 0;
6a140a74
RS
298 int orig = charpos;
299
300 DEC_BOTH (charpos, bytepos);
195d1361 301
709a47ce 302 while (charpos >= beg)
195d1361 303 {
ab229fdd
AS
304 int c;
305
6a140a74 306 UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
ab229fdd
AS
307 c = FETCH_CHAR (bytepos);
308 code = SYNTAX (c);
6a140a74
RS
309 if (! (code == Scharquote || code == Sescape))
310 break;
311
312 DEC_BOTH (charpos, bytepos);
313 quoted = !quoted;
195d1361 314 }
6a140a74
RS
315
316 UPDATE_SYNTAX_TABLE (orig);
195d1361
RS
317 return quoted;
318}
6a140a74
RS
319
320/* Return the bytepos one character after BYTEPOS.
321 We assume that BYTEPOS is not at the end of the buffer. */
322
323INLINE int
324inc_bytepos (bytepos)
325 int bytepos;
326{
ef316cf0
RS
327 if (NILP (current_buffer->enable_multibyte_characters))
328 return bytepos + 1;
329
6a140a74
RS
330 INC_POS (bytepos);
331 return bytepos;
332}
333
334/* Return the bytepos one character before BYTEPOS.
335 We assume that BYTEPOS is not at the start of the buffer. */
336
337INLINE int
338dec_bytepos (bytepos)
339 int bytepos;
340{
ef316cf0
RS
341 if (NILP (current_buffer->enable_multibyte_characters))
342 return bytepos - 1;
343
6a140a74
RS
344 DEC_POS (bytepos);
345 return bytepos;
346}
195d1361 347\f
7cc80f0a
RS
348/* Return a defun-start position before before POS and not too far before.
349 It should be the last one before POS, or nearly the last.
350
351 When open_paren_in_column_0_is_defun_start is nonzero,
1fd1cc2f 352 only the beginning of the buffer is treated as a defun-start.
7cc80f0a
RS
353
354 We record the information about where the scan started
355 and what its result was, so that another call in the same area
356 can return the same value very quickly.
195d1361
RS
357
358 There is no promise at which position the global syntax data is
359 valid on return from the subroutine, so the caller should explicitly
360 update the global data. */
37bef230
RS
361
362static int
6a140a74
RS
363find_defun_start (pos, pos_byte)
364 int pos, pos_byte;
37bef230 365{
6a140a74 366 int opoint = PT, opoint_byte = PT_BYTE;
37bef230 367
1fd1cc2f
RS
368 if (!open_paren_in_column_0_is_defun_start)
369 {
370 find_start_value_byte = BEGV_BYTE;
371 return BEGV;
372 }
373
37bef230
RS
374 /* Use previous finding, if it's valid and applies to this inquiry. */
375 if (current_buffer == find_start_buffer
376 /* Reuse the defun-start even if POS is a little farther on.
377 POS might be in the next defun, but that's ok.
378 Our value may not be the best possible, but will still be usable. */
379 && pos <= find_start_pos + 1000
380 && pos >= find_start_value
381 && BEGV == find_start_begv
382 && MODIFF == find_start_modiff)
383 return find_start_value;
384
385 /* Back up to start of line. */
6a140a74 386 scan_newline (pos, pos_byte, BEGV, BEGV_BYTE, -1, 1);
37bef230 387
195d1361
RS
388 /* We optimize syntax-table lookup for rare updates. Thus we accept
389 only those `^\s(' which are good in global _and_ text-property
390 syntax-tables. */
391 gl_state.current_syntax_table = current_buffer->syntax_table;
392 gl_state.use_global = 0;
1fd1cc2f 393 while (PT > BEGV)
37bef230 394 {
ab229fdd
AS
395 int c;
396
1fd1cc2f
RS
397 /* Open-paren at start of line means we may have found our
398 defun-start. */
ab229fdd
AS
399 c = FETCH_CHAR (PT_BYTE);
400 if (SYNTAX (c) == Sopen)
195d1361 401 {
1fd1cc2f 402 SETUP_SYNTAX_TABLE (PT + 1, -1); /* Try again... */
ab229fdd
AS
403 c = FETCH_CHAR (PT_BYTE);
404 if (SYNTAX (c) == Sopen)
1fd1cc2f
RS
405 break;
406 /* Now fallback to the default value. */
407 gl_state.current_syntax_table = current_buffer->syntax_table;
408 gl_state.use_global = 0;
195d1361 409 }
1fd1cc2f
RS
410 /* Move to beg of previous line. */
411 scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
37bef230
RS
412 }
413
414 /* Record what we found, for the next try. */
6a140a74
RS
415 find_start_value = PT;
416 find_start_value_byte = PT_BYTE;
37bef230
RS
417 find_start_buffer = current_buffer;
418 find_start_modiff = MODIFF;
419 find_start_begv = BEGV;
420 find_start_pos = pos;
421
6a140a74
RS
422 TEMP_SET_PT_BOTH (opoint, opoint_byte);
423
37bef230
RS
424 return find_start_value;
425}
426\f
f902a008
RS
427/* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE. */
428
429static int
430prev_char_comend_first (pos, pos_byte)
431 int pos, pos_byte;
432{
433 int c, val;
434
435 DEC_BOTH (pos, pos_byte);
436 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
437 c = FETCH_CHAR (pos_byte);
438 val = SYNTAX_COMEND_FIRST (c);
439 UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
440 return val;
441}
442
443/* Return the SYNTAX_COMSTART_FIRST of the character before POS, POS_BYTE. */
444
3f679f55
SM
445/* static int
446 * prev_char_comstart_first (pos, pos_byte)
447 * int pos, pos_byte;
448 * {
449 * int c, val;
7d0393cf 450 *
3f679f55
SM
451 * DEC_BOTH (pos, pos_byte);
452 * UPDATE_SYNTAX_TABLE_BACKWARD (pos);
453 * c = FETCH_CHAR (pos_byte);
454 * val = SYNTAX_COMSTART_FIRST (c);
455 * UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
456 * return val;
457 * } */
f902a008 458
6a140a74
RS
459/* Checks whether charpos FROM is at the end of a comment.
460 FROM_BYTE is the bytepos corresponding to FROM.
461 Do not move back before STOP.
462
463 Return a positive value if we find a comment ending at FROM/FROM_BYTE;
464 return -1 otherwise.
465
466 If successful, store the charpos of the comment's beginning
467 into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
527a32d9
KH
468
469 Global syntax data remains valid for backward search starting at
470 the returned value (or at FROM, if the search was not successful). */
195d1361
RS
471
472static int
95ff8dfc 473back_comment (from, from_byte, stop, comnested, comstyle, charpos_ptr, bytepos_ptr)
6a140a74 474 int from, from_byte, stop;
95ff8dfc 475 int comnested, comstyle;
6a140a74 476 int *charpos_ptr, *bytepos_ptr;
195d1361
RS
477{
478 /* Look back, counting the parity of string-quotes,
479 and recording the comment-starters seen.
480 When we reach a safe place, assume that's not in a string;
481 then step the main scan to the earliest comment-starter seen
482 an even number of string quotes away from the safe place.
483
484 OFROM[I] is position of the earliest comment-starter seen
485 which is I+2X quotes from the comment-end.
486 PARITY is current parity of quotes from the comment end. */
3ee5041c 487 int string_style = -1; /* Presumed outside of any string. */
195d1361 488 int string_lossage = 0;
f7c436c1 489 /* Not a real lossage: indicates that we have passed a matching comment
7d0393cf 490 starter plus a non-matching comment-ender, meaning that any matching
f7c436c1
SM
491 comment-starter we might see later could be a false positive (hidden
492 inside another comment).
493 Test case: { a (* b } c (* d *) */
494 int comment_lossage = 0;
195d1361 495 int comment_end = from;
6a140a74 496 int comment_end_byte = from_byte;
195d1361 497 int comstart_pos = 0;
6a140a74 498 int comstart_byte;
eb35b628
RS
499 /* Place where the containing defun starts,
500 or 0 if we didn't come across it yet. */
501 int defun_start = 0;
502 int defun_start_byte = 0;
195d1361 503 register enum syntaxcode code;
95ff8dfc 504 int nesting = 1; /* current comment nesting */
ea315ed6 505 int c;
3f679f55
SM
506 int syntax = 0;
507
508 /* FIXME: A }} comment-ender style leads to incorrect behavior
509 in the case of {{ c }}} because we ignore the last two chars which are
510 assumed to be comment-enders although they aren't. */
195d1361
RS
511
512 /* At beginning of range to scan, we're outside of strings;
513 that determines quote parity to the comment-end. */
514 while (from != stop)
515 {
3f679f55
SM
516 int temp_byte, prev_syntax;
517 int com2start, com2end;
6a140a74 518
195d1361 519 /* Move back and examine a character. */
6a140a74 520 DEC_BOTH (from, from_byte);
195d1361
RS
521 UPDATE_SYNTAX_TABLE_BACKWARD (from);
522
3f679f55 523 prev_syntax = syntax;
6a140a74 524 c = FETCH_CHAR (from_byte);
3f679f55 525 syntax = SYNTAX_WITH_FLAGS (c);
195d1361
RS
526 code = SYNTAX (c);
527
3f679f55
SM
528 /* Check for 2-char comment markers. */
529 com2start = (SYNTAX_FLAGS_COMSTART_FIRST (syntax)
530 && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax)
531 && comstyle == SYNTAX_FLAGS_COMMENT_STYLE (prev_syntax)
532 && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax)
533 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested);
534 com2end = (SYNTAX_FLAGS_COMEND_FIRST (syntax)
535 && SYNTAX_FLAGS_COMEND_SECOND (prev_syntax));
536
537 /* Nasty cases with overlapping 2-char comment markers:
538 - snmp-mode: -- c -- foo -- c --
539 --- c --
540 ------ c --
541 - c-mode: *||*
542 |* *|* *|
543 |*| |* |*|
544 /// */
545
546 /* If a 2-char comment sequence partly overlaps with another,
547 we don't try to be clever. */
548 if (from > stop && (com2end || com2start))
195d1361 549 {
3f679f55
SM
550 int next = from, next_byte = from_byte, next_c, next_syntax;
551 DEC_BOTH (next, next_byte);
552 UPDATE_SYNTAX_TABLE_BACKWARD (next);
553 next_c = FETCH_CHAR (next_byte);
554 next_syntax = SYNTAX_WITH_FLAGS (next_c);
555 if (((com2start || comnested)
556 && SYNTAX_FLAGS_COMEND_SECOND (syntax)
557 && SYNTAX_FLAGS_COMEND_FIRST (next_syntax))
558 || ((com2end || comnested)
559 && SYNTAX_FLAGS_COMSTART_SECOND (syntax)
560 && comstyle == SYNTAX_FLAGS_COMMENT_STYLE (syntax)
561 && SYNTAX_FLAGS_COMSTART_FIRST (next_syntax)))
562 goto lossage;
563 /* UPDATE_SYNTAX_TABLE_FORWARD (next + 1); */
f902a008 564 }
3f679f55
SM
565
566 if (com2start && comstart_pos == 0)
567 /* We're looking at a comment starter. But it might be a comment
568 ender as well (see snmp-mode). The first time we see one, we
569 need to consider it as a comment starter,
570 and the subsequent times as a comment ender. */
571 com2end = 0;
572
573 /* Turn a 2-char comment sequences into the appropriate syntax. */
574 if (com2end)
575 code = Sendcomment;
576 else if (com2start)
577 code = Scomment;
578 /* Ignore comment starters of a different style. */
579 else if (code == Scomment
580 && (comstyle != SYNTAX_FLAGS_COMMENT_STYLE (syntax)
581 || SYNTAX_FLAGS_COMMENT_NESTED (syntax) != comnested))
1aa963c8 582 continue;
195d1361 583
9828a477
KH
584 /* Ignore escaped characters, except comment-enders. */
585 if (code != Sendcomment && char_quoted (from, from_byte))
195d1361
RS
586 continue;
587
3ee5041c 588 switch (code)
195d1361 589 {
3ee5041c
SM
590 case Sstring_fence:
591 case Scomment_fence:
592 c = (code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE);
593 case Sstring:
594 /* Track parity of quotes. */
595 if (string_style == -1)
596 /* Entering a string. */
597 string_style = c;
598 else if (string_style == c)
599 /* Leaving the string. */
600 string_style = -1;
601 else
602 /* If we have two kinds of string delimiters.
603 There's no way to grok this scanning backwards. */
195d1361 604 string_lossage = 1;
3ee5041c 605 break;
7d0393cf 606
3ee5041c
SM
607 case Scomment:
608 /* We've already checked that it is the relevant comstyle. */
f7c436c1 609 if (string_style != -1 || comment_lossage || string_lossage)
3ee5041c
SM
610 /* There are odd string quotes involved, so let's be careful.
611 Test case in Pascal: " { " a { " } */
612 goto lossage;
613
f7c436c1
SM
614 if (!comnested)
615 {
616 /* Record best comment-starter so far. */
617 comstart_pos = from;
618 comstart_byte = from_byte;
619 }
620 else if (--nesting <= 0)
95ff8dfc
RS
621 /* nested comments have to be balanced, so we don't need to
622 keep looking for earlier ones. We use here the same (slightly
623 incorrect) reasoning as below: since it is followed by uniform
624 paired string quotes, this comment-start has to be outside of
625 strings, else the comment-end itself would be inside a string. */
626 goto done;
3ee5041c
SM
627 break;
628
02010917 629 case Sendcomment:
3f679f55 630 if (SYNTAX_FLAGS_COMMENT_STYLE (syntax) == comstyle
d070eb22 631 && ((com2end && SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax))
3f679f55 632 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested)
02010917
SM
633 /* This is the same style of comment ender as ours. */
634 {
635 if (comnested)
636 nesting++;
637 else
638 /* Anything before that can't count because it would match
639 this comment-ender rather than ours. */
640 from = stop; /* Break out of the loop. */
641 }
f7c436c1
SM
642 else if (comstart_pos != 0 || c != '\n')
643 /* We're mixing comment styles here, so we'd better be careful.
644 The (comstart_pos != 0 || c != '\n') check is not quite correct
645 (we should just always set comment_lossage), but removing it
646 would imply that any multiline comment in C would go through
647 lossage, which seems overkill.
648 The failure should only happen in the rare cases such as
649 { (* } *) */
650 comment_lossage = 1;
02010917 651 break;
195d1361 652
02010917
SM
653 case Sopen:
654 /* Assume a defun-start point is outside of strings. */
655 if (open_paren_in_column_0_is_defun_start
656 && (from == stop
657 || (temp_byte = dec_bytepos (from_byte),
658 FETCH_CHAR (temp_byte) == '\n')))
659 {
660 defun_start = from;
661 defun_start_byte = from_byte;
662 from = stop; /* Break out of the loop. */
663 }
eb35b628 664 break;
02010917
SM
665
666 default:
f7c436c1 667 break;
eb35b628 668 }
195d1361
RS
669 }
670
671 if (comstart_pos == 0)
672 {
673 from = comment_end;
6a140a74 674 from_byte = comment_end_byte;
195d1361
RS
675 UPDATE_SYNTAX_TABLE_FORWARD (comment_end - 1);
676 }
f7c436c1
SM
677 /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
678 or `done'), then we've found the beginning of the non-nested comment. */
679 else if (1) /* !comnested */
195d1361
RS
680 {
681 from = comstart_pos;
6a140a74 682 from_byte = comstart_byte;
789f3320 683 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
195d1361
RS
684 }
685 else
686 {
3ee5041c
SM
687 struct lisp_parse_state state;
688 lossage:
195d1361
RS
689 /* We had two kinds of string delimiters mixed up
690 together. Decode this going forwards.
02010917 691 Scan fwd from a known safe place (beginning-of-defun)
195d1361
RS
692 to the one in question; this records where we
693 last passed a comment starter. */
eb35b628
RS
694 /* If we did not already find the defun start, find it now. */
695 if (defun_start == 0)
696 {
697 defun_start = find_defun_start (comment_end, comment_end_byte);
698 defun_start_byte = find_start_value_byte;
699 }
02010917 700 do
195d1361 701 {
02010917
SM
702 scan_sexps_forward (&state,
703 defun_start, defun_start_byte,
704 comment_end, -10000, 0, Qnil, 0);
705 defun_start = comment_end;
706 if (state.incomment == (comnested ? 1 : -1)
707 && state.comstyle == comstyle)
708 from = state.comstr_start;
709 else
710 {
711 from = comment_end;
712 if (state.incomment)
713 /* If comment_end is inside some other comment, maybe ours
714 is nested, so we need to try again from within the
715 surrounding comment. Example: { a (* " *) */
716 {
717 /* FIXME: We should advance by one or two chars. */
718 defun_start = state.comstr_start + 2;
719 defun_start_byte = CHAR_TO_BYTE (defun_start);
720 }
721 }
722 } while (defun_start < comment_end);
723
6a140a74 724 from_byte = CHAR_TO_BYTE (from);
195d1361
RS
725 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
726 }
7d0393cf 727
95ff8dfc 728 done:
6a140a74
RS
729 *charpos_ptr = from;
730 *bytepos_ptr = from_byte;
731
1aa963c8 732 return (from == comment_end) ? -1 : from;
195d1361
RS
733}
734\f
8489eb67 735DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
fdb82f93
PJ
736 doc: /* Return t if OBJECT is a syntax table.
737Currently, any char-table counts as a syntax table. */)
738 (object)
2203e1e8 739 Lisp_Object object;
8489eb67 740{
2203e1e8 741 if (CHAR_TABLE_P (object)
e704cb4b 742 && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
8489eb67
RS
743 return Qt;
744 return Qnil;
745}
746
8ea151b2 747static void
8489eb67
RS
748check_syntax_table (obj)
749 Lisp_Object obj;
750{
d1be9f0f 751 if (!(CHAR_TABLE_P (obj)
e704cb4b 752 && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table)))
d1be9f0f 753 wrong_type_argument (Qsyntax_table_p, obj);
7d0393cf 754}
8489eb67 755
8489eb67 756DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
fdb82f93
PJ
757 doc: /* Return the current syntax table.
758This is the one specified by the current buffer. */)
759 ()
8489eb67
RS
760{
761 return current_buffer->syntax_table;
762}
763
764DEFUN ("standard-syntax-table", Fstandard_syntax_table,
765 Sstandard_syntax_table, 0, 0, 0,
fdb82f93
PJ
766 doc: /* Return the standard syntax table.
767This is the one used for new buffers. */)
768 ()
8489eb67
RS
769{
770 return Vstandard_syntax_table;
771}
772
773DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
fdb82f93
PJ
774 doc: /* Construct a new syntax table and return it.
775It is a copy of the TABLE, which defaults to the standard syntax table. */)
776 (table)
8489eb67
RS
777 Lisp_Object table;
778{
8ea151b2
RS
779 Lisp_Object copy;
780
265a9e55 781 if (!NILP (table))
8ea151b2
RS
782 check_syntax_table (table);
783 else
784 table = Vstandard_syntax_table;
785
786 copy = Fcopy_sequence (table);
0f867324
RS
787
788 /* Only the standard syntax table should have a default element.
789 Other syntax tables should inherit from parents instead. */
790 XCHAR_TABLE (copy)->defalt = Qnil;
791
792 /* Copied syntax tables should all have parents.
793 If we copied one with no parent, such as the standard syntax table,
794 use the standard syntax table as the copy's parent. */
795 if (NILP (XCHAR_TABLE (copy)->parent))
796 Fset_char_table_parent (copy, Vstandard_syntax_table);
8ea151b2 797 return copy;
8489eb67
RS
798}
799
800DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
fdb82f93
PJ
801 doc: /* Select a new syntax table for the current buffer.
802One argument, a syntax table. */)
803 (table)
8489eb67
RS
804 Lisp_Object table;
805{
4e4dfa78 806 int idx;
8ea151b2 807 check_syntax_table (table);
8489eb67
RS
808 current_buffer->syntax_table = table;
809 /* Indicate that this buffer now has a specified syntax table. */
f6cd0527
GM
810 idx = PER_BUFFER_VAR_IDX (syntax_table);
811 SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
8489eb67
RS
812 return table;
813}
814\f
815/* Convert a letter which signifies a syntax code
816 into the code it signifies.
195d1361 817 This is used by modify-syntax-entry, and other things. */
8489eb67
RS
818
819unsigned char syntax_spec_code[0400] =
820 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
821 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
822 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
823 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
195d1361 824 (char) Swhitespace, (char) Scomment_fence, (char) Sstring, 0377,
8489eb67
RS
825 (char) Smath, 0377, 0377, (char) Squote,
826 (char) Sopen, (char) Sclose, 0377, 0377,
827 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
828 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
829 0377, 0377, 0377, 0377,
830 (char) Scomment, 0377, (char) Sendcomment, 0377,
6cb71bf6 831 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
8489eb67
RS
832 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
833 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
834 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
835 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
836 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
837 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
195d1361 838 0377, 0377, 0377, 0377, (char) Sstring_fence, 0377, 0377, 0377
8489eb67
RS
839 };
840
195d1361 841/* Indexed by syntax code, give the letter that describes it. */
8489eb67 842
195d1361 843char syntax_code_spec[16] =
8489eb67 844 {
195d1361
RS
845 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
846 '!', '|'
8489eb67 847 };
93da5fff
KH
848
849/* Indexed by syntax code, give the object (cons of syntax code and
850 nil) to be stored in syntax table. Since these objects can be
851 shared among syntax tables, we generate them in advance. By
852 sharing objects, the function `describe-syntax' can give a more
853 compact listing. */
854static Lisp_Object Vsyntax_code_object;
855
8489eb67 856\f
8ea151b2
RS
857/* Look up the value for CHARACTER in syntax table TABLE's parent
858 and its parents. SYNTAX_ENTRY calls this, when TABLE itself has nil
859 for CHARACTER. It's actually used only when not compiled with GCC. */
860
861Lisp_Object
862syntax_parent_lookup (table, character)
863 Lisp_Object table;
864 int character;
865{
866 Lisp_Object value;
867
868 while (1)
869 {
870 table = XCHAR_TABLE (table)->parent;
871 if (NILP (table))
872 return Qnil;
873
874 value = XCHAR_TABLE (table)->contents[character];
875 if (!NILP (value))
876 return value;
877 }
878}
879
8489eb67 880DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
fdb82f93
PJ
881 doc: /* Return the syntax code of CHARACTER, described by a character.
882For example, if CHARACTER is a word constituent,
883the character `w' is returned.
884The characters that correspond to various syntax codes
885are listed in the documentation of `modify-syntax-entry'. */)
886 (character)
2203e1e8 887 Lisp_Object character;
8489eb67 888{
8ea151b2 889 int char_int;
195d1361
RS
890 gl_state.current_syntax_table = current_buffer->syntax_table;
891
892 gl_state.use_global = 0;
b7826503 893 CHECK_NUMBER (character);
2203e1e8 894 char_int = XINT (character);
8ea151b2 895 return make_number (syntax_code_spec[(int) SYNTAX (char_int)]);
beefa22e
RS
896}
897
898DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
fdb82f93
PJ
899 doc: /* Return the matching parenthesis of CHARACTER, or nil if none. */)
900 (character)
2203e1e8 901 Lisp_Object character;
beefa22e 902{
8ea151b2 903 int char_int, code;
195d1361
RS
904 gl_state.current_syntax_table = current_buffer->syntax_table;
905 gl_state.use_global = 0;
b7826503 906 CHECK_NUMBER (character);
2203e1e8 907 char_int = XINT (character);
8ea151b2 908 code = SYNTAX (char_int);
a8bd7cd8 909 if (code == Sopen || code == Sclose)
2e34157c 910 return SYNTAX_MATCH (char_int);
beefa22e 911 return Qnil;
8489eb67
RS
912}
913
c65adb44 914DEFUN ("string-to-syntax", Fstring_to_syntax, Sstring_to_syntax, 1, 1, 0,
fdb82f93
PJ
915 doc: /* Convert a syntax specification STRING into syntax cell form.
916STRING should be a string as it is allowed as argument of
917`modify-syntax-entry'. Value is the equivalent cons cell
918(CODE . MATCHING-CHAR) that can be used as value of a `syntax-table'
919text property. */)
920 (string)
c65adb44 921 Lisp_Object string;
8489eb67 922{
2e567bd3 923 register const unsigned char *p;
8489eb67 924 register enum syntaxcode code;
247e20a8 925 int val;
8ea151b2 926 Lisp_Object match;
8489eb67 927
b7826503 928 CHECK_STRING (string);
8489eb67 929
d5db4077 930 p = SDATA (string);
8489eb67
RS
931 code = (enum syntaxcode) syntax_spec_code[*p++];
932 if (((int) code & 0377) == 0377)
b764a653 933 error ("Invalid syntax description letter: %c", p[-1]);
8489eb67 934
8ea151b2 935 if (code == Sinherit)
c65adb44 936 return Qnil;
8ea151b2
RS
937
938 if (*p)
d1be9f0f 939 {
93da5fff 940 int len;
7c19e8e1 941 int character = (STRING_CHAR_AND_LENGTH
d5db4077 942 (p, SBYTES (string) - 1, len));
93da5fff 943 XSETINT (match, character);
d1be9f0f
RS
944 if (XFASTINT (match) == ' ')
945 match = Qnil;
93da5fff 946 p += len;
d1be9f0f
RS
947 }
948 else
8ea151b2 949 match = Qnil;
8489eb67 950
8ea151b2 951 val = (int) code;
8489eb67
RS
952 while (*p)
953 switch (*p++)
954 {
955 case '1':
247e20a8 956 val |= 1 << 16;
8489eb67
RS
957 break;
958
959 case '2':
247e20a8 960 val |= 1 << 17;
8489eb67
RS
961 break;
962
963 case '3':
247e20a8 964 val |= 1 << 18;
8489eb67
RS
965 break;
966
967 case '4':
247e20a8 968 val |= 1 << 19;
8489eb67
RS
969 break;
970
971 case 'p':
247e20a8 972 val |= 1 << 20;
8489eb67 973 break;
e5d4f4dc
RS
974
975 case 'b':
247e20a8 976 val |= 1 << 21;
e5d4f4dc 977 break;
95ff8dfc
RS
978
979 case 'n':
980 val |= 1 << 22;
981 break;
8489eb67 982 }
7d0393cf 983
93da5fff 984 if (val < XVECTOR (Vsyntax_code_object)->size && NILP (match))
c65adb44 985 return XVECTOR (Vsyntax_code_object)->contents[val];
93da5fff
KH
986 else
987 /* Since we can't use a shared object, let's make a new one. */
c65adb44
SM
988 return Fcons (make_number (val), match);
989}
990
fdb82f93 991/* I really don't know why this is interactive
2c9e1900 992 help-form should at least be made useful whilst reading the second arg. */
7d0393cf 993DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
c65adb44 994 "cSet syntax for character: \nsSet syntax for %s to: ",
a50a10a0 995 doc: /* Set syntax for character CHAR according to string NEWENTRY.
d7ee9fab 996The syntax is changed only for table SYNTAX-TABLE, which defaults to
fdb82f93
PJ
997 the current buffer's syntax table.
998The first character of NEWENTRY should be one of the following:
999 Space or - whitespace syntax. w word constituent.
1000 _ symbol constituent. . punctuation.
1001 ( open-parenthesis. ) close-parenthesis.
1002 " string quote. \\ escape.
1003 $ paired delimiter. ' expression quote or prefix operator.
1004 < comment starter. > comment ender.
1005 / character-quote. @ inherit from `standard-syntax-table'.
1006 | generic string fence. ! generic comment fence.
1007
1008Only single-character comment start and end sequences are represented thus.
1009Two-character sequences are represented as described below.
1010The second character of NEWENTRY is the matching parenthesis,
1011 used only if the first character is `(' or `)'.
1012Any additional characters are flags.
1013Defined flags are the characters 1, 2, 3, 4, b, p, and n.
a50a10a0
PJ
1014 1 means CHAR is the start of a two-char comment start sequence.
1015 2 means CHAR is the second character of such a sequence.
1016 3 means CHAR is the start of a two-char comment end sequence.
1017 4 means CHAR is the second character of such a sequence.
fdb82f93
PJ
1018
1019There can be up to two orthogonal comment sequences. This is to support
1020language modes such as C++. By default, all comment sequences are of style
1021a, but you can set the comment sequence style to b (on the second character
1022of a comment-start, or the first character of a comment-end sequence) using
1023this flag:
a50a10a0
PJ
1024 b means CHAR is part of comment sequence b.
1025 n means CHAR is part of a nestable comment sequence.
fdb82f93 1026
a50a10a0 1027 p means CHAR is a prefix character for `backward-prefix-chars';
fdb82f93 1028 such characters are treated as whitespace when they occur
a50a10a0
PJ
1029 between expressions.
1030usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE) */)
fdb82f93 1031 (c, newentry, syntax_table)
c65adb44
SM
1032 Lisp_Object c, newentry, syntax_table;
1033{
b7826503 1034 CHECK_NUMBER (c);
c65adb44
SM
1035
1036 if (NILP (syntax_table))
1037 syntax_table = current_buffer->syntax_table;
1038 else
1039 check_syntax_table (syntax_table);
8489eb67 1040
c65adb44 1041 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), Fstring_to_syntax (newentry));
8489eb67
RS
1042 return Qnil;
1043}
1044\f
1045/* Dump syntax table to buffer in human-readable format */
1046
62abe9cb
SM
1047DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value,
1048 Sinternal_describe_syntax_value, 1, 1, 0,
1049 doc: /* Insert a description of the internal syntax description SYNTAX at point. */)
1050 (syntax)
1051 Lisp_Object syntax;
8489eb67
RS
1052{
1053 register enum syntaxcode code;
e6365855 1054 char desc, start1, start2, end1, end2, prefix, comstyle, comnested;
8489eb67 1055 char str[2];
62abe9cb 1056 Lisp_Object first, match_lisp, value = syntax;
8489eb67 1057
8ea151b2
RS
1058 if (NILP (value))
1059 {
62abe9cb
SM
1060 insert_string ("default");
1061 return syntax;
8ea151b2
RS
1062 }
1063
908b7fea
KH
1064 if (CHAR_TABLE_P (value))
1065 {
62abe9cb
SM
1066 insert_string ("deeper char-table ...");
1067 return syntax;
908b7fea
KH
1068 }
1069
8ea151b2
RS
1070 if (!CONSP (value))
1071 {
62abe9cb
SM
1072 insert_string ("invalid");
1073 return syntax;
8ea151b2
RS
1074 }
1075
c1d497be
KR
1076 first = XCAR (value);
1077 match_lisp = XCDR (value);
8ea151b2
RS
1078
1079 if (!INTEGERP (first) || !(NILP (match_lisp) || INTEGERP (match_lisp)))
8489eb67 1080 {
62abe9cb
SM
1081 insert_string ("invalid");
1082 return syntax;
8489eb67
RS
1083 }
1084
e704cb4b 1085 code = (enum syntaxcode) (XINT (first) & 0377);
8ea151b2
RS
1086 start1 = (XINT (first) >> 16) & 1;
1087 start2 = (XINT (first) >> 17) & 1;
1088 end1 = (XINT (first) >> 18) & 1;
1089 end2 = (XINT (first) >> 19) & 1;
1090 prefix = (XINT (first) >> 20) & 1;
1091 comstyle = (XINT (first) >> 21) & 1;
e6365855 1092 comnested = (XINT (first) >> 22) & 1;
8489eb67
RS
1093
1094 if ((int) code < 0 || (int) code >= (int) Smax)
1095 {
1096 insert_string ("invalid");
62abe9cb 1097 return syntax;
8489eb67
RS
1098 }
1099 desc = syntax_code_spec[(int) code];
1100
1101 str[0] = desc, str[1] = 0;
1102 insert (str, 1);
1103
93da5fff
KH
1104 if (NILP (match_lisp))
1105 insert (" ", 1);
1106 else
1107 insert_char (XINT (match_lisp));
8489eb67 1108
8489eb67
RS
1109 if (start1)
1110 insert ("1", 1);
1111 if (start2)
1112 insert ("2", 1);
1113
1114 if (end1)
1115 insert ("3", 1);
1116 if (end2)
1117 insert ("4", 1);
1118
1119 if (prefix)
1120 insert ("p", 1);
e5d4f4dc
RS
1121 if (comstyle)
1122 insert ("b", 1);
e6365855
SM
1123 if (comnested)
1124 insert ("n", 1);
8489eb67
RS
1125
1126 insert_string ("\twhich means: ");
1127
0220c518 1128 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
1129 {
1130 case Swhitespace:
1131 insert_string ("whitespace"); break;
1132 case Spunct:
1133 insert_string ("punctuation"); break;
1134 case Sword:
1135 insert_string ("word"); break;
1136 case Ssymbol:
1137 insert_string ("symbol"); break;
1138 case Sopen:
1139 insert_string ("open"); break;
1140 case Sclose:
1141 insert_string ("close"); break;
1142 case Squote:
d671382e 1143 insert_string ("prefix"); break;
8489eb67
RS
1144 case Sstring:
1145 insert_string ("string"); break;
1146 case Smath:
1147 insert_string ("math"); break;
1148 case Sescape:
1149 insert_string ("escape"); break;
1150 case Scharquote:
1151 insert_string ("charquote"); break;
1152 case Scomment:
1153 insert_string ("comment"); break;
1154 case Sendcomment:
1155 insert_string ("endcomment"); break;
d671382e
SM
1156 case Sinherit:
1157 insert_string ("inherit"); break;
1158 case Scomment_fence:
1159 insert_string ("comment fence"); break;
1160 case Sstring_fence:
1161 insert_string ("string fence"); break;
8489eb67
RS
1162 default:
1163 insert_string ("invalid");
62abe9cb 1164 return syntax;
8489eb67
RS
1165 }
1166
8ea151b2 1167 if (!NILP (match_lisp))
8489eb67
RS
1168 {
1169 insert_string (", matches ");
8ea151b2 1170 insert_char (XINT (match_lisp));
8489eb67
RS
1171 }
1172
1173 if (start1)
1174 insert_string (",\n\t is the first character of a comment-start sequence");
1175 if (start2)
1176 insert_string (",\n\t is the second character of a comment-start sequence");
1177
1178 if (end1)
1179 insert_string (",\n\t is the first character of a comment-end sequence");
1180 if (end2)
1181 insert_string (",\n\t is the second character of a comment-end sequence");
e5d4f4dc
RS
1182 if (comstyle)
1183 insert_string (" (comment style b)");
e6365855
SM
1184 if (comnested)
1185 insert_string (" (nestable)");
e5d4f4dc 1186
8489eb67
RS
1187 if (prefix)
1188 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
1189
62abe9cb
SM
1190 return syntax;
1191}
8489eb67 1192\f
195d1361
RS
1193int parse_sexp_ignore_comments;
1194
8489eb67
RS
1195/* Return the position across COUNT words from FROM.
1196 If that many words cannot be found before the end of the buffer, return 0.
1197 COUNT negative means scan backward and stop at word beginning. */
1198
dfcf069d 1199int
8489eb67
RS
1200scan_words (from, count)
1201 register int from, count;
1202{
1203 register int beg = BEGV;
1204 register int end = ZV;
6a140a74 1205 register int from_byte = CHAR_TO_BYTE (from);
93da5fff
KH
1206 register enum syntaxcode code;
1207 int ch0, ch1;
8489eb67
RS
1208
1209 immediate_quit = 1;
1210 QUIT;
1211
195d1361
RS
1212 SETUP_SYNTAX_TABLE (from, count);
1213
8489eb67
RS
1214 while (count > 0)
1215 {
1216 while (1)
1217 {
1218 if (from == end)
1219 {
1220 immediate_quit = 0;
1221 return 0;
1222 }
195d1361 1223 UPDATE_SYNTAX_TABLE_FORWARD (from);
6a140a74 1224 ch0 = FETCH_CHAR (from_byte);
93da5fff 1225 code = SYNTAX (ch0);
6a140a74 1226 INC_BOTH (from, from_byte);
8489eb67
RS
1227 if (words_include_escapes
1228 && (code == Sescape || code == Scharquote))
1229 break;
1230 if (code == Sword)
1231 break;
8489eb67 1232 }
93da5fff
KH
1233 /* Now CH0 is a character which begins a word and FROM is the
1234 position of the next character. */
8489eb67
RS
1235 while (1)
1236 {
1237 if (from == end) break;
195d1361 1238 UPDATE_SYNTAX_TABLE_FORWARD (from);
6a140a74 1239 ch1 = FETCH_CHAR (from_byte);
93da5fff 1240 code = SYNTAX (ch1);
8489eb67
RS
1241 if (!(words_include_escapes
1242 && (code == Sescape || code == Scharquote)))
93da5fff 1243 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
8489eb67 1244 break;
6a140a74 1245 INC_BOTH (from, from_byte);
93da5fff 1246 ch0 = ch1;
8489eb67
RS
1247 }
1248 count--;
1249 }
1250 while (count < 0)
1251 {
1252 while (1)
1253 {
1254 if (from == beg)
1255 {
1256 immediate_quit = 0;
1257 return 0;
1258 }
6a140a74 1259 DEC_BOTH (from, from_byte);
195d1361 1260 UPDATE_SYNTAX_TABLE_BACKWARD (from);
6a140a74 1261 ch1 = FETCH_CHAR (from_byte);
93da5fff 1262 code = SYNTAX (ch1);
8489eb67
RS
1263 if (words_include_escapes
1264 && (code == Sescape || code == Scharquote))
1265 break;
1266 if (code == Sword)
1267 break;
8489eb67 1268 }
93da5fff
KH
1269 /* Now CH1 is a character which ends a word and FROM is the
1270 position of it. */
8489eb67
RS
1271 while (1)
1272 {
6a140a74
RS
1273 int temp_byte;
1274
1275 if (from == beg)
1276 break;
1277 temp_byte = dec_bytepos (from_byte);
195d1361 1278 UPDATE_SYNTAX_TABLE_BACKWARD (from);
6a140a74 1279 ch0 = FETCH_CHAR (temp_byte);
93da5fff 1280 code = SYNTAX (ch0);
8489eb67
RS
1281 if (!(words_include_escapes
1282 && (code == Sescape || code == Scharquote)))
93da5fff 1283 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
8489eb67 1284 break;
6a140a74 1285 DEC_BOTH (from, from_byte);
93da5fff 1286 ch1 = ch0;
8489eb67
RS
1287 }
1288 count++;
1289 }
1290
1291 immediate_quit = 0;
1292
1293 return from;
1294}
1295
29e16385 1296DEFUN ("forward-word", Fforward_word, Sforward_word, 0, 1, "p",
fdb82f93
PJ
1297 doc: /* Move point forward ARG words (backward if ARG is negative).
1298Normally returns t.
1299If an edge of the buffer or a field boundary is reached, point is left there
1300and the function returns nil. Field boundaries are not noticed if
1301`inhibit-field-text-motion' is non-nil. */)
29e16385
RS
1302 (arg)
1303 Lisp_Object arg;
8489eb67 1304{
c096ae4d 1305 Lisp_Object tmp;
2c6ea900 1306 int orig_val, val;
8489eb67 1307
29e16385
RS
1308 if (NILP (arg))
1309 XSETFASTINT (arg, 1);
1310 else
1311 CHECK_NUMBER (arg);
1312
1313 val = orig_val = scan_words (PT, XINT (arg));
2c6ea900 1314 if (! orig_val)
29e16385 1315 val = XINT (arg) > 0 ? ZV : BEGV;
5878ee6f 1316
b8855607 1317 /* Avoid jumping out of an input field. */
c096ae4d
SM
1318 tmp = Fconstrain_to_field (make_number (val), make_number (PT),
1319 Qt, Qnil, Qnil);
1320 val = XFASTINT (tmp);
7d0393cf 1321
8489eb67 1322 SET_PT (val);
e6d8341f 1323 return val == orig_val ? Qt : Qnil;
8489eb67
RS
1324}
1325\f
195d1361
RS
1326Lisp_Object skip_chars ();
1327
1328DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
fdb82f93
PJ
1329 doc: /* Move point forward, stopping before a char not in STRING, or at pos LIM.
1330STRING is like the inside of a `[...]' in a regular expression
1331except that `]' is never special and `\\' quotes `^', `-' or `\\'
1332 (but not as the end of a range; quoting is never needed there).
1333Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1334With arg "^a-zA-Z", skips nonletters stopping before first letter.
a1bc88d4
RS
1335Char classes, e.g. `[:alpha:]', are supported.
1336
1337Returns the distance traveled, either zero or positive. */)
fdb82f93 1338 (string, lim)
195d1361
RS
1339 Lisp_Object string, lim;
1340{
a1bc88d4 1341 return skip_chars (1, 0, string, lim, 1);
195d1361
RS
1342}
1343
1344DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
fdb82f93
PJ
1345 doc: /* Move point backward, stopping after a char not in STRING, or at pos LIM.
1346See `skip-chars-forward' for details.
1347Returns the distance traveled, either zero or negative. */)
1348 (string, lim)
195d1361
RS
1349 Lisp_Object string, lim;
1350{
a1bc88d4 1351 return skip_chars (0, 0, string, lim, 1);
195d1361
RS
1352}
1353
1354DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
fdb82f93
PJ
1355 doc: /* Move point forward across chars in specified syntax classes.
1356SYNTAX is a string of syntax code characters.
1357Stop before a char whose syntax is not in SYNTAX, or at position LIM.
1358If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1359This function returns the distance traveled, either zero or positive. */)
1360 (syntax, lim)
195d1361
RS
1361 Lisp_Object syntax, lim;
1362{
a1bc88d4 1363 return skip_chars (1, 1, syntax, lim, 0);
195d1361
RS
1364}
1365
1366DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
fdb82f93
PJ
1367 doc: /* Move point backward across chars in specified syntax classes.
1368SYNTAX is a string of syntax code characters.
1369Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.
1370If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1371This function returns the distance traveled, either zero or negative. */)
1372 (syntax, lim)
195d1361
RS
1373 Lisp_Object syntax, lim;
1374{
a1bc88d4 1375 return skip_chars (0, 1, syntax, lim, 0);
195d1361
RS
1376}
1377
6a140a74 1378static Lisp_Object
a1bc88d4 1379skip_chars (forwardp, syntaxp, string, lim, handle_iso_classes)
195d1361
RS
1380 int forwardp, syntaxp;
1381 Lisp_Object string, lim;
a1bc88d4 1382 int handle_iso_classes;
195d1361 1383{
195d1361
RS
1384 register unsigned int c;
1385 unsigned char fastmap[0400];
7e68b0ea 1386 /* If SYNTAXP is 0, STRING may contain multi-byte form of characters
9690d026
KH
1387 of which codes don't fit in FASTMAP. In that case, set the
1388 ranges of characters in CHAR_RANGES. */
5df4982e 1389 int *char_ranges;
7e68b0ea 1390 int n_char_ranges = 0;
195d1361 1391 int negate = 0;
4101e6fe 1392 register int i, i_byte;
7e68b0ea 1393 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1674d9a2
RS
1394 int string_multibyte;
1395 int size_byte;
2e567bd3 1396 const unsigned char *str;
82d497fc 1397 int len;
a1bc88d4 1398 Lisp_Object iso_classes;
195d1361 1399
b7826503 1400 CHECK_STRING (string);
d5db4077 1401 char_ranges = (int *) alloca (SCHARS (string) * (sizeof (int)) * 2);
1674d9a2 1402 string_multibyte = STRING_MULTIBYTE (string);
d5db4077
KR
1403 str = SDATA (string);
1404 size_byte = SBYTES (string);
a1bc88d4 1405 iso_classes = Qnil;
195d1361 1406
82d497fc
KH
1407 /* Adjust the multibyteness of the string to that of the buffer. */
1408 if (multibyte != string_multibyte)
1409 {
1410 int nbytes;
1411
1412 if (multibyte)
d5db4077
KR
1413 nbytes = count_size_as_multibyte (SDATA (string),
1414 SCHARS (string));
82d497fc 1415 else
d5db4077 1416 nbytes = SCHARS (string);
82d497fc
KH
1417 if (nbytes != size_byte)
1418 {
2e567bd3
KR
1419 unsigned char *tmp = (unsigned char *) alloca (nbytes);
1420 copy_text (SDATA (string), tmp, size_byte,
82d497fc
KH
1421 string_multibyte, multibyte);
1422 size_byte = nbytes;
2e567bd3 1423 str = tmp;
82d497fc
KH
1424 }
1425 }
1426
195d1361
RS
1427 if (NILP (lim))
1428 XSETINT (lim, forwardp ? ZV : BEGV);
1429 else
b7826503 1430 CHECK_NUMBER_COERCE_MARKER (lim);
195d1361
RS
1431
1432 /* In any case, don't allow scan outside bounds of buffer. */
195d1361
RS
1433 if (XINT (lim) > ZV)
1434 XSETFASTINT (lim, ZV);
1435 if (XINT (lim) < BEGV)
1436 XSETFASTINT (lim, BEGV);
1437
195d1361
RS
1438 bzero (fastmap, sizeof fastmap);
1439
82d497fc 1440 i_byte = 0;
4101e6fe 1441
13090112 1442 if (i_byte < size_byte
d5db4077 1443 && SREF (string, 0) == '^')
195d1361 1444 {
82d497fc 1445 negate = 1; i_byte++;
195d1361
RS
1446 }
1447
1448 /* Find the characters specified and set their elements of fastmap.
1449 If syntaxp, each character counts as itself.
1450 Otherwise, handle backslashes and ranges specially. */
1451
13090112 1452 while (i_byte < size_byte)
195d1361 1453 {
82d497fc
KH
1454 c = STRING_CHAR_AND_LENGTH (str + i_byte, size_byte - i_byte, len);
1455 i_byte += len;
4101e6fe 1456
195d1361 1457 if (syntaxp)
4101e6fe 1458 fastmap[syntax_spec_code[c & 0377]] = 1;
195d1361
RS
1459 else
1460 {
a1bc88d4
RS
1461 if (handle_iso_classes && c == '['
1462 && i_byte < size_byte
1463 && STRING_CHAR (str + i_byte, size_byte - i_byte) == ':')
1464 {
1465 const unsigned char *class_beg = str + i_byte + 1;
1466 const unsigned char *class_end = class_beg;
b3bda4fd 1467 const unsigned char *class_limit = str + size_byte - 2;
a1bc88d4
RS
1468 /* Leave room for the null. */
1469 unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1470 re_wctype_t cc;
1471
1472 if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1473 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1474
b3bda4fd
KS
1475 while (class_end < class_limit
1476 && *class_end >= 'a' && *class_end <= 'z')
a1bc88d4
RS
1477 class_end++;
1478
b3bda4fd
KS
1479 if (class_end == class_beg
1480 || *class_end != ':' || class_end[1] != ']')
1481 goto not_a_class_name;
a1bc88d4
RS
1482
1483 bcopy (class_beg, class_name, class_end - class_beg);
1484 class_name[class_end - class_beg] = 0;
1485
1486 cc = re_wctype (class_name);
1487 if (cc == 0)
1488 error ("Invalid ISO C character class");
1489
1490 iso_classes = Fcons (make_number (cc), iso_classes);
1491
1492 i_byte = class_end + 2 - str;
1493 continue;
1494 }
1495
b3bda4fd 1496 not_a_class_name:
195d1361
RS
1497 if (c == '\\')
1498 {
13090112 1499 if (i_byte == size_byte)
4101e6fe
RS
1500 break;
1501
57951c2e
AS
1502 c = STRING_CHAR_AND_LENGTH (str + i_byte,
1503 size_byte - i_byte, len);
82d497fc 1504 i_byte += len;
195d1361 1505 }
57951c2e
AS
1506 /* Treat `-' as range character only if another character
1507 follows. */
1508 if (i_byte + 1 < size_byte
82d497fc 1509 && str[i_byte] == '-')
195d1361 1510 {
9690d026 1511 unsigned int c2;
4101e6fe
RS
1512
1513 /* Skip over the dash. */
82d497fc 1514 i_byte++;
4101e6fe 1515
4101e6fe 1516 /* Get the end of the range. */
57951c2e
AS
1517 c2 = STRING_CHAR_AND_LENGTH (str + i_byte,
1518 size_byte - i_byte, len);
82d497fc 1519 i_byte += len;
7e68b0ea 1520
4101e6fe 1521 if (SINGLE_BYTE_CHAR_P (c))
06274af5
KH
1522 {
1523 if (! SINGLE_BYTE_CHAR_P (c2))
e39091b6 1524 {
7ae339a7
KH
1525 /* Handle a range starting with a character of
1526 less than 256, and ending with a character of
1527 not less than 256. Split that into two
1528 ranges, the low one ending at 0377, and the
1529 high one starting at the smallest character
1530 in the charset of C2 and ending at C2. */
e39091b6
KH
1531 int charset = CHAR_CHARSET (c2);
1532 int c1 = MAKE_CHAR (charset, 0, 0);
1533
e39091b6
KH
1534 char_ranges[n_char_ranges++] = c1;
1535 char_ranges[n_char_ranges++] = c2;
465cf84c 1536 c2 = 0377;
e39091b6 1537 }
06274af5
KH
1538 while (c <= c2)
1539 {
1540 fastmap[c] = 1;
1541 c++;
1542 }
1543 }
9690d026 1544 else if (c <= c2) /* Both C and C2 are multibyte char. */
195d1361 1545 {
9690d026
KH
1546 char_ranges[n_char_ranges++] = c;
1547 char_ranges[n_char_ranges++] = c2;
195d1361 1548 }
195d1361
RS
1549 }
1550 else
7e68b0ea 1551 {
e39091b6
KH
1552 if (SINGLE_BYTE_CHAR_P (c))
1553 fastmap[c] = 1;
1554 else
7e68b0ea 1555 {
4101e6fe
RS
1556 char_ranges[n_char_ranges++] = c;
1557 char_ranges[n_char_ranges++] = c;
7e68b0ea
RS
1558 }
1559 }
195d1361
RS
1560 }
1561 }
1562
9690d026 1563 /* If ^ was the first character, complement the fastmap. */
195d1361
RS
1564 if (negate)
1565 for (i = 0; i < sizeof fastmap; i++)
9690d026 1566 fastmap[i] ^= 1;
195d1361
RS
1567
1568 {
1569 int start_point = PT;
1570 int pos = PT;
6a140a74 1571 int pos_byte = PT_BYTE;
9af7511a
KH
1572 unsigned char *p = PT_ADDR, *endp, *stop;
1573
1574 if (forwardp)
1575 {
1576 endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
d7ee9fab 1577 stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
9af7511a
KH
1578 }
1579 else
1580 {
1581 endp = CHAR_POS_ADDR (XINT (lim));
d7ee9fab 1582 stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
9af7511a 1583 }
195d1361
RS
1584
1585 immediate_quit = 1;
1586 if (syntaxp)
1587 {
1588 SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
1589 if (forwardp)
1590 {
7e68b0ea 1591 if (multibyte)
9af7511a
KH
1592 while (1)
1593 {
1594 int nbytes;
1595
1596 if (p >= stop)
ee0cdb48 1597 {
9af7511a
KH
1598 if (p >= endp)
1599 break;
1600 p = GAP_END_ADDR;
1601 stop = endp;
ee0cdb48 1602 }
9af7511a
KH
1603 c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes);
1604 if (! fastmap[(int) SYNTAX (c)])
1605 break;
1606 p += nbytes, pos++, pos_byte += nbytes;
1607 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1608 }
7e68b0ea 1609 else
9af7511a
KH
1610 while (1)
1611 {
1612 if (p >= stop)
1613 {
1614 if (p >= endp)
1615 break;
1616 p = GAP_END_ADDR;
1617 stop = endp;
1618 }
1619 if (! fastmap[(int) SYNTAX (*p)])
1620 break;
1621 p++, pos++;
1622 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1623 }
195d1361
RS
1624 }
1625 else
1626 {
7e68b0ea 1627 if (multibyte)
9af7511a
KH
1628 while (1)
1629 {
1630 unsigned char *prev_p;
1631 int nbytes;
1632
1633 if (p <= stop)
1634 {
1635 if (p <= endp)
7e68b0ea 1636 break;
9af7511a
KH
1637 p = GPT_ADDR;
1638 stop = endp;
1639 }
1640 prev_p = p;
1641 while (--p >= stop && ! CHAR_HEAD_P (*p));
1642 PARSE_MULTIBYTE_SEQ (p, MAX_MULTIBYTE_LENGTH, nbytes);
1643 if (prev_p - p > nbytes)
1644 p = prev_p - 1, c = *p, nbytes = 1;
1645 else
1646 c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH);
1647 pos--, pos_byte -= nbytes;
1648 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
1649 if (! fastmap[(int) SYNTAX (c)])
1650 {
1651 pos++;
1652 pos_byte += nbytes;
1653 break;
1654 }
1655 }
7e68b0ea 1656 else
9af7511a
KH
1657 while (1)
1658 {
1659 if (p <= stop)
ee0cdb48 1660 {
9af7511a 1661 if (p <= endp)
4c8535eb 1662 break;
9af7511a
KH
1663 p = GPT_ADDR;
1664 stop = endp;
ee0cdb48 1665 }
9af7511a
KH
1666 if (! fastmap[(int) SYNTAX (p[-1])])
1667 break;
1668 p--, pos--;
1669 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
1670 }
195d1361
RS
1671 }
1672 }
1673 else
1674 {
1675 if (forwardp)
1676 {
7e68b0ea 1677 if (multibyte)
9af7511a 1678 while (1)
7e68b0ea 1679 {
9af7511a
KH
1680 int nbytes;
1681
1682 if (p >= stop)
1683 {
1684 if (p >= endp)
1685 break;
1686 p = GAP_END_ADDR;
1687 stop = endp;
1688 }
1689 c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes);
a1bc88d4
RS
1690
1691 if (! NILP (iso_classes) && in_classes (c, iso_classes))
1692 {
1693 if (negate)
1694 break;
1695 else
1696 goto fwd_ok;
1697 }
1698
9690d026 1699 if (SINGLE_BYTE_CHAR_P (c))
7e68b0ea 1700 {
9690d026
KH
1701 if (!fastmap[c])
1702 break;
1703 }
1704 else
1705 {
1706 /* If we are looking at a multibyte character,
1707 we must look up the character in the table
1708 CHAR_RANGES. If there's no data in the
1709 table, that character is not what we want to
1710 skip. */
1711
e39091b6
KH
1712 /* The following code do the right thing even if
1713 n_char_ranges is zero (i.e. no data in
1714 CHAR_RANGES). */
7e68b0ea 1715 for (i = 0; i < n_char_ranges; i += 2)
e39091b6 1716 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
7e68b0ea
RS
1717 break;
1718 if (!(negate ^ (i < n_char_ranges)))
1719 break;
7e68b0ea 1720 }
a1bc88d4 1721 fwd_ok:
9af7511a 1722 p += nbytes, pos++, pos_byte += nbytes;
7e68b0ea
RS
1723 }
1724 else
9af7511a
KH
1725 while (1)
1726 {
1727 if (p >= stop)
1728 {
1729 if (p >= endp)
1730 break;
1731 p = GAP_END_ADDR;
1732 stop = endp;
1733 }
a1bc88d4
RS
1734
1735 if (!NILP (iso_classes) && in_classes (*p, iso_classes))
1736 {
1737 if (negate)
1738 break;
1739 else
1ad6e4db 1740 goto fwd_unibyte_ok;
a1bc88d4
RS
1741 }
1742
4e837cd0 1743 if (!fastmap[*p])
9af7511a 1744 break;
1ad6e4db 1745 fwd_unibyte_ok:
9af7511a
KH
1746 p++, pos++;
1747 }
195d1361
RS
1748 }
1749 else
1750 {
7e68b0ea 1751 if (multibyte)
9af7511a 1752 while (1)
7e68b0ea 1753 {
9af7511a
KH
1754 unsigned char *prev_p;
1755 int nbytes;
e39091b6 1756
9af7511a
KH
1757 if (p <= stop)
1758 {
1759 if (p <= endp)
1760 break;
1761 p = GPT_ADDR;
1762 stop = endp;
1763 }
1764 prev_p = p;
1765 while (--p >= stop && ! CHAR_HEAD_P (*p));
1766 PARSE_MULTIBYTE_SEQ (p, MAX_MULTIBYTE_LENGTH, nbytes);
1767 if (prev_p - p > nbytes)
1768 p = prev_p - 1, c = *p, nbytes = 1;
1769 else
1770 c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH);
a1bc88d4
RS
1771
1772 if (! NILP (iso_classes) && in_classes (c, iso_classes))
1773 {
1774 if (negate)
1775 break;
1776 else
1777 goto back_ok;
1778 }
1779
9690d026
KH
1780 if (SINGLE_BYTE_CHAR_P (c))
1781 {
1782 if (!fastmap[c])
1783 break;
1784 }
1785 else
7e68b0ea 1786 {
9690d026 1787 /* See the comment in the previous similar code. */
e39091b6
KH
1788 for (i = 0; i < n_char_ranges; i += 2)
1789 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1790 break;
1791 if (!(negate ^ (i < n_char_ranges)))
1792 break;
7e68b0ea 1793 }
a1bc88d4 1794 back_ok:
9af7511a 1795 pos--, pos_byte -= nbytes;
7e68b0ea
RS
1796 }
1797 else
9af7511a
KH
1798 while (1)
1799 {
1800 if (p <= stop)
1801 {
1802 if (p <= endp)
1803 break;
1804 p = GPT_ADDR;
1805 stop = endp;
1806 }
a1bc88d4
RS
1807
1808 if (! NILP (iso_classes) && in_classes (p[-1], iso_classes))
1809 {
1810 if (negate)
1811 break;
1812 else
1ad6e4db 1813 goto back_unibyte_ok;
a1bc88d4
RS
1814 }
1815
4e837cd0 1816 if (!fastmap[p[-1]])
9af7511a 1817 break;
1ad6e4db 1818 back_unibyte_ok:
9af7511a
KH
1819 p--, pos--;
1820 }
195d1361
RS
1821 }
1822 }
7e68b0ea 1823
6a140a74
RS
1824#if 0 /* Not needed now that a position in mid-character
1825 cannot be specified in Lisp. */
7e68b0ea
RS
1826 if (multibyte
1827 /* INC_POS or DEC_POS might have moved POS over LIM. */
1828 && (forwardp ? (pos > XINT (lim)) : (pos < XINT (lim))))
1829 pos = XINT (lim);
6a140a74 1830#endif
7e68b0ea 1831
6a140a74
RS
1832 if (! multibyte)
1833 pos_byte = pos;
1834
1835 SET_PT_BOTH (pos, pos_byte);
195d1361
RS
1836 immediate_quit = 0;
1837
1838 return make_number (PT - start_point);
1839 }
1840}
a1bc88d4
RS
1841
1842/* Return 1 if character C belongs to one of the ISO classes
1843 in the list ISO_CLASSES. Each class is represented by an
1844 integer which is its type according to re_wctype. */
1845
1846static int
1847in_classes (c, iso_classes)
1848 int c;
1849 Lisp_Object iso_classes;
1850{
1851 int fits_class = 0;
1852
1853 while (! NILP (iso_classes))
1854 {
1855 Lisp_Object elt;
1856 elt = XCAR (iso_classes);
1857 iso_classes = XCDR (iso_classes);
1858
1859 if (re_iswctype (c, XFASTINT (elt)))
1860 fits_class = 1;
1861 }
1862
1863 return fits_class;
1864}
195d1361 1865\f
95ff8dfc
RS
1866/* Jump over a comment, assuming we are at the beginning of one.
1867 FROM is the current position.
1868 FROM_BYTE is the bytepos corresponding to FROM.
1869 Do not move past STOP (a charpos).
1870 The comment over which we have to jump is of style STYLE
1871 (either SYNTAX_COMMENT_STYLE(foo) or ST_COMMENT_STYLE).
1872 NESTING should be positive to indicate the nesting at the beginning
1873 for nested comments and should be zero or negative else.
1874 ST_COMMENT_STYLE cannot be nested.
1875 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
1876 (or 0 If the search cannot start in the middle of a two-character).
1877
1878 If successful, return 1 and store the charpos of the comment's end
1879 into *CHARPOS_PTR and the corresponding bytepos into *BYTEPOS_PTR.
1880 Else, return 0 and store the charpos STOP into *CHARPOS_PTR, the
1881 corresponding bytepos into *BYTEPOS_PTR and the current nesting
1882 (as defined for state.incomment) in *INCOMMENT_PTR.
1883
1884 The comment end is the last character of the comment rather than the
1885 character just after the comment.
1886
1887 Global syntax data is assumed to initially be valid for FROM and
1888 remains valid for forward search starting at the returned position. */
1889
1890static int
1891forw_comment (from, from_byte, stop, nesting, style, prev_syntax,
1892 charpos_ptr, bytepos_ptr, incomment_ptr)
1893 int from, from_byte, stop;
1894 int nesting, style, prev_syntax;
1895 int *charpos_ptr, *bytepos_ptr, *incomment_ptr;
1896{
1897 register int c, c1;
1898 register enum syntaxcode code;
1899 register int syntax;
1900
1901 if (nesting <= 0) nesting = -1;
1902
1903 /* Enter the loop in the middle so that we find
1904 a 2-char comment ender if we start in the middle of it. */
1905 syntax = prev_syntax;
1906 if (syntax != 0) goto forw_incomment;
1907
1908 while (1)
1909 {
1910 if (from == stop)
1911 {
1912 *incomment_ptr = nesting;
1913 *charpos_ptr = from;
1914 *bytepos_ptr = from_byte;
1915 return 0;
1916 }
1917 c = FETCH_CHAR (from_byte);
1918 syntax = SYNTAX_WITH_FLAGS (c);
1919 code = syntax & 0xff;
1920 if (code == Sendcomment
1921 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
abf8a9ff
SM
1922 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ?
1923 (nesting > 0 && --nesting == 0) : nesting < 0))
95ff8dfc
RS
1924 /* we have encountered a comment end of the same style
1925 as the comment sequence which began this comment
1926 section */
1927 break;
1928 if (code == Scomment_fence
1929 && style == ST_COMMENT_STYLE)
1930 /* we have encountered a comment end of the same style
1931 as the comment sequence which began this comment
1932 section. */
1933 break;
1934 if (nesting > 0
1935 && code == Scomment
abf8a9ff 1936 && SYNTAX_FLAGS_COMMENT_NESTED (syntax)
95ff8dfc
RS
1937 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style)
1938 /* we have encountered a nested comment of the same style
1939 as the comment sequence which began this comment section */
1940 nesting++;
1941 INC_BOTH (from, from_byte);
1942 UPDATE_SYNTAX_TABLE_FORWARD (from);
7d0393cf 1943
95ff8dfc
RS
1944 forw_incomment:
1945 if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
1946 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
1947 && (c1 = FETCH_CHAR (from_byte),
abf8a9ff
SM
1948 SYNTAX_COMEND_SECOND (c1))
1949 && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
1950 SYNTAX_COMMENT_NESTED (c1)) ? nesting > 0 : nesting < 0))
4ffe723b
GM
1951 {
1952 if (--nesting <= 0)
1953 /* we have encountered a comment end of the same style
1954 as the comment sequence which began this comment
1955 section */
1956 break;
1957 else
1958 {
1959 INC_BOTH (from, from_byte);
1960 UPDATE_SYNTAX_TABLE_FORWARD (from);
1961 }
1962 }
95ff8dfc
RS
1963 if (nesting > 0
1964 && from < stop
1965 && SYNTAX_FLAGS_COMSTART_FIRST (syntax)
1966 && (c1 = FETCH_CHAR (from_byte),
1967 SYNTAX_COMMENT_STYLE (c1) == style
abf8a9ff
SM
1968 && SYNTAX_COMSTART_SECOND (c1))
1969 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
1970 SYNTAX_COMMENT_NESTED (c1)))
95ff8dfc
RS
1971 /* we have encountered a nested comment of the same style
1972 as the comment sequence which began this comment
1973 section */
1974 {
1975 INC_BOTH (from, from_byte);
1976 UPDATE_SYNTAX_TABLE_FORWARD (from);
1977 nesting++;
1978 }
1979 }
1980 *charpos_ptr = from;
1981 *bytepos_ptr = from_byte;
1982 return 1;
1983}
1984
b3cfe0c8 1985DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
177c0ea7 1986 doc: /*
ee648c11 1987Move forward across up to COUNT comments. If COUNT is negative, move backward.
fdb82f93
PJ
1988Stop scanning if we find something other than a comment or whitespace.
1989Set point to where scanning stops.
ee648c11 1990If COUNT comments are found as expected, with nothing except whitespace
fdb82f93
PJ
1991between them, return t; otherwise return nil. */)
1992 (count)
840f481c 1993 Lisp_Object count;
b3cfe0c8
RS
1994{
1995 register int from;
6a140a74 1996 int from_byte;
b3cfe0c8 1997 register int stop;
8ea151b2 1998 register int c, c1;
b3cfe0c8
RS
1999 register enum syntaxcode code;
2000 int comstyle = 0; /* style of comment encountered */
95ff8dfc 2001 int comnested = 0; /* whether the comment is nestable or not */
be720845 2002 int found;
840f481c 2003 int count1;
6a140a74 2004 int out_charpos, out_bytepos;
95ff8dfc 2005 int dummy;
840f481c 2006
b7826503 2007 CHECK_NUMBER (count);
840f481c 2008 count1 = XINT (count);
195d1361 2009 stop = count1 > 0 ? ZV : BEGV;
b3cfe0c8
RS
2010
2011 immediate_quit = 1;
2012 QUIT;
2013
2014 from = PT;
6a140a74 2015 from_byte = PT_BYTE;
b3cfe0c8 2016
195d1361 2017 SETUP_SYNTAX_TABLE (from, count1);
840f481c 2018 while (count1 > 0)
b3cfe0c8 2019 {
04882296 2020 do
b3cfe0c8 2021 {
f902a008
RS
2022 int comstart_first;
2023
04882296
KH
2024 if (from == stop)
2025 {
ef316cf0 2026 SET_PT_BOTH (from, from_byte);
b7e6e612 2027 immediate_quit = 0;
04882296
KH
2028 return Qnil;
2029 }
6a140a74 2030 c = FETCH_CHAR (from_byte);
b3cfe0c8 2031 code = SYNTAX (c);
f902a008 2032 comstart_first = SYNTAX_COMSTART_FIRST (c);
95ff8dfc 2033 comnested = SYNTAX_COMMENT_NESTED (c);
e6365855 2034 comstyle = SYNTAX_COMMENT_STYLE (c);
6a140a74 2035 INC_BOTH (from, from_byte);
f902a008 2036 UPDATE_SYNTAX_TABLE_FORWARD (from);
f902a008 2037 if (from < stop && comstart_first
6a140a74 2038 && (c1 = FETCH_CHAR (from_byte),
8ea151b2 2039 SYNTAX_COMSTART_SECOND (c1)))
b3cfe0c8 2040 {
7d0393cf 2041 /* We have encountered a comment start sequence and we
7fc8191e 2042 are ignoring all text inside comments. We must record
b3cfe0c8
RS
2043 the comment style this sequence begins so that later,
2044 only a comment end of the same style actually ends
7fc8191e 2045 the comment section. */
b3cfe0c8 2046 code = Scomment;
8ea151b2 2047 comstyle = SYNTAX_COMMENT_STYLE (c1);
95ff8dfc 2048 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
6a140a74 2049 INC_BOTH (from, from_byte);
95ff8dfc 2050 UPDATE_SYNTAX_TABLE_FORWARD (from);
b3cfe0c8 2051 }
04882296 2052 }
abf8a9ff 2053 while (code == Swhitespace || (code == Sendcomment && c == '\n'));
6a140a74 2054
da71ab91
KH
2055 if (code == Scomment_fence)
2056 comstyle = ST_COMMENT_STYLE;
2057 else if (code != Scomment)
04882296
KH
2058 {
2059 immediate_quit = 0;
6a140a74 2060 DEC_BOTH (from, from_byte);
ef316cf0 2061 SET_PT_BOTH (from, from_byte);
04882296
KH
2062 return Qnil;
2063 }
2064 /* We're at the start of a comment. */
95ff8dfc
RS
2065 found = forw_comment (from, from_byte, stop, comnested, comstyle, 0,
2066 &out_charpos, &out_bytepos, &dummy);
2067 from = out_charpos; from_byte = out_bytepos;
2068 if (!found)
04882296 2069 {
95ff8dfc
RS
2070 immediate_quit = 0;
2071 SET_PT_BOTH (from, from_byte);
2072 return Qnil;
b3cfe0c8 2073 }
95ff8dfc
RS
2074 INC_BOTH (from, from_byte);
2075 UPDATE_SYNTAX_TABLE_FORWARD (from);
04882296 2076 /* We have skipped one comment. */
840f481c 2077 count1--;
b3cfe0c8
RS
2078 }
2079
840f481c 2080 while (count1 < 0)
b3cfe0c8 2081 {
b9145dbb 2082 while (1)
b3cfe0c8 2083 {
c65adb44 2084 int quoted;
f902a008 2085
b9145dbb
RS
2086 if (from <= stop)
2087 {
6a140a74 2088 SET_PT_BOTH (BEGV, BEGV_BYTE);
b9145dbb
RS
2089 immediate_quit = 0;
2090 return Qnil;
2091 }
b3cfe0c8 2092
6a140a74 2093 DEC_BOTH (from, from_byte);
f902a008 2094 /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */
6a140a74 2095 quoted = char_quoted (from, from_byte);
6a140a74 2096 c = FETCH_CHAR (from_byte);
b3cfe0c8
RS
2097 code = SYNTAX (c);
2098 comstyle = 0;
95ff8dfc 2099 comnested = SYNTAX_COMMENT_NESTED (c);
7fc8191e
RS
2100 if (code == Sendcomment)
2101 comstyle = SYNTAX_COMMENT_STYLE (c);
b3cfe0c8 2102 if (from > stop && SYNTAX_COMEND_SECOND (c)
f902a008 2103 && prev_char_comend_first (from, from_byte)
89c6809a 2104 && !char_quoted (from - 1, dec_bytepos (from_byte)))
b3cfe0c8 2105 {
7fc8191e 2106 /* We must record the comment style encountered so that
b3cfe0c8 2107 later, we can match only the proper comment begin
7fc8191e 2108 sequence of the same style. */
6a140a74 2109 DEC_BOTH (from, from_byte);
f902a008
RS
2110 code = Sendcomment;
2111 /* Calling char_quoted, above, set up global syntax position
2112 at the new value of FROM. */
95ff8dfc
RS
2113 c1 = FETCH_CHAR (from_byte);
2114 comstyle = SYNTAX_COMMENT_STYLE (c1);
2115 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
b3cfe0c8
RS
2116 }
2117
195d1361
RS
2118 if (code == Scomment_fence)
2119 {
2120 /* Skip until first preceding unquoted comment_fence. */
6a140a74 2121 int found = 0, ini = from, ini_byte = from_byte;
7d0393cf 2122
6a140a74 2123 while (1)
195d1361 2124 {
6a140a74
RS
2125 DEC_BOTH (from, from_byte);
2126 if (from == stop)
2127 break;
195d1361 2128 UPDATE_SYNTAX_TABLE_BACKWARD (from);
6a140a74
RS
2129 c = FETCH_CHAR (from_byte);
2130 if (SYNTAX (c) == Scomment_fence
7d0393cf 2131 && !char_quoted (from, from_byte))
195d1361 2132 {
7d0393cf 2133 found = 1;
195d1361
RS
2134 break;
2135 }
2136 }
2137 if (found == 0)
2138 {
2139 from = ini; /* Set point to ini + 1. */
6a140a74 2140 from_byte = ini_byte;
195d1361
RS
2141 goto leave;
2142 }
2143 }
2144 else if (code == Sendcomment)
b3cfe0c8 2145 {
95ff8dfc 2146 found = back_comment (from, from_byte, stop, comnested, comstyle,
6a140a74 2147 &out_charpos, &out_bytepos);
1aa963c8
SM
2148 if (found == -1)
2149 {
abf8a9ff
SM
2150 if (c == '\n')
2151 /* This end-of-line is not an end-of-comment.
2152 Treat it like a whitespace.
2153 CC-mode (and maybe others) relies on this behavior. */
2154 ;
2155 else
2156 {
2157 /* Failure: we should go back to the end of this
2158 not-quite-endcomment. */
2159 if (SYNTAX(c) != code)
2160 /* It was a two-char Sendcomment. */
2161 INC_BOTH (from, from_byte);
2162 goto leave;
2163 }
1aa963c8 2164 }
2b927d02 2165 else
abf8a9ff
SM
2166 {
2167 /* We have skipped one comment. */
2168 from = out_charpos, from_byte = out_bytepos;
2169 break;
2170 }
b3cfe0c8 2171 }
bb0de084 2172 else if (code != Swhitespace || quoted)
b3cfe0c8 2173 {
195d1361 2174 leave:
b3cfe0c8 2175 immediate_quit = 0;
6a140a74 2176 INC_BOTH (from, from_byte);
ef316cf0 2177 SET_PT_BOTH (from, from_byte);
b3cfe0c8
RS
2178 return Qnil;
2179 }
2180 }
2181
840f481c 2182 count1++;
b3cfe0c8
RS
2183 }
2184
ef316cf0 2185 SET_PT_BOTH (from, from_byte);
b3cfe0c8
RS
2186 immediate_quit = 0;
2187 return Qt;
2188}
2189\f
bd25db08 2190/* Return syntax code of character C if C is a single byte character
db9f48c3 2191 or `multibyte_symbol_p' is zero. Otherwise, return Ssymbol. */
bd25db08
KH
2192
2193#define SYNTAX_WITH_MULTIBYTE_CHECK(c) \
2194 ((SINGLE_BYTE_CHAR_P (c) || !multibyte_symbol_p) \
2195 ? SYNTAX (c) : Ssymbol)
2196
6a140a74 2197static Lisp_Object
8489eb67
RS
2198scan_lists (from, count, depth, sexpflag)
2199 register int from;
2200 int count, depth, sexpflag;
2201{
2202 Lisp_Object val;
195d1361 2203 register int stop = count > 0 ? ZV : BEGV;
93da5fff
KH
2204 register int c, c1;
2205 int stringterm;
8489eb67
RS
2206 int quoted;
2207 int mathexit = 0;
93da5fff 2208 register enum syntaxcode code, temp_code;
195d1361 2209 int min_depth = depth; /* Err out if depth gets less than this. */
e5d4f4dc 2210 int comstyle = 0; /* style of comment encountered */
95ff8dfc 2211 int comnested = 0; /* whether the comment is nestable or not */
93da5fff 2212 int temp_pos;
7bf5e9e4 2213 int last_good = from;
195d1361 2214 int found;
c3907a7e 2215 int from_byte;
6a140a74 2216 int out_bytepos, out_charpos;
95ff8dfc 2217 int temp, dummy;
bd25db08 2218 int multibyte_symbol_p = sexpflag && multibyte_syntax_as_symbol;
8489eb67
RS
2219
2220 if (depth > 0) min_depth = 0;
2221
c3907a7e
RS
2222 if (from > ZV) from = ZV;
2223 if (from < BEGV) from = BEGV;
2224
2225 from_byte = CHAR_TO_BYTE (from);
2226
8489eb67
RS
2227 immediate_quit = 1;
2228 QUIT;
2229
195d1361 2230 SETUP_SYNTAX_TABLE (from, count);
8489eb67
RS
2231 while (count > 0)
2232 {
8489eb67
RS
2233 while (from < stop)
2234 {
f902a008 2235 int comstart_first, prefix;
195d1361 2236 UPDATE_SYNTAX_TABLE_FORWARD (from);
6a140a74 2237 c = FETCH_CHAR (from_byte);
bd25db08 2238 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
f902a008 2239 comstart_first = SYNTAX_COMSTART_FIRST (c);
95ff8dfc 2240 comnested = SYNTAX_COMMENT_NESTED (c);
e6365855 2241 comstyle = SYNTAX_COMMENT_STYLE (c);
f902a008 2242 prefix = SYNTAX_PREFIX (c);
7bf5e9e4
RS
2243 if (depth == min_depth)
2244 last_good = from;
6a140a74 2245 INC_BOTH (from, from_byte);
195d1361 2246 UPDATE_SYNTAX_TABLE_FORWARD (from);
f902a008 2247 if (from < stop && comstart_first
ab229fdd 2248 && (c = FETCH_CHAR (from_byte), SYNTAX_COMSTART_SECOND (c))
8489eb67 2249 && parse_sexp_ignore_comments)
e5d4f4dc 2250 {
7d0393cf 2251 /* we have encountered a comment start sequence and we
195d1361 2252 are ignoring all text inside comments. We must record
e5d4f4dc
RS
2253 the comment style this sequence begins so that later,
2254 only a comment end of the same style actually ends
2255 the comment section */
2256 code = Scomment;
95ff8dfc
RS
2257 c1 = FETCH_CHAR (from_byte);
2258 comstyle = SYNTAX_COMMENT_STYLE (c1);
2259 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
6a140a74 2260 INC_BOTH (from, from_byte);
f902a008 2261 UPDATE_SYNTAX_TABLE_FORWARD (from);
e5d4f4dc 2262 }
7d0393cf 2263
f902a008 2264 if (prefix)
8489eb67
RS
2265 continue;
2266
0220c518 2267 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
2268 {
2269 case Sescape:
2270 case Scharquote:
2271 if (from == stop) goto lose;
6a140a74 2272 INC_BOTH (from, from_byte);
8489eb67
RS
2273 /* treat following character as a word constituent */
2274 case Sword:
2275 case Ssymbol:
2276 if (depth || !sexpflag) break;
195d1361 2277 /* This word counts as a sexp; return at end of it. */
8489eb67
RS
2278 while (from < stop)
2279 {
195d1361 2280 UPDATE_SYNTAX_TABLE_FORWARD (from);
7c7ca3d2
RS
2281
2282 /* Some compilers can't handle this inside the switch. */
bd25db08
KH
2283 c = FETCH_CHAR (from_byte);
2284 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
7c7ca3d2 2285 switch (temp)
8489eb67
RS
2286 {
2287 case Scharquote:
2288 case Sescape:
6a140a74 2289 INC_BOTH (from, from_byte);
8489eb67
RS
2290 if (from == stop) goto lose;
2291 break;
2292 case Sword:
2293 case Ssymbol:
2294 case Squote:
2295 break;
2296 default:
2297 goto done;
2298 }
6a140a74 2299 INC_BOTH (from, from_byte);
8489eb67
RS
2300 }
2301 goto done;
2302
195d1361 2303 case Scomment_fence:
95ff8dfc
RS
2304 comstyle = ST_COMMENT_STYLE;
2305 /* FALLTHROUGH */
2306 case Scomment:
8489eb67 2307 if (!parse_sexp_ignore_comments) break;
95ff8dfc
RS
2308 UPDATE_SYNTAX_TABLE_FORWARD (from);
2309 found = forw_comment (from, from_byte, stop,
2310 comnested, comstyle, 0,
2311 &out_charpos, &out_bytepos, &dummy);
2312 from = out_charpos, from_byte = out_bytepos;
2313 if (!found)
8489eb67 2314 {
95ff8dfc
RS
2315 if (depth == 0)
2316 goto done;
2317 goto lose;
8489eb67 2318 }
95ff8dfc
RS
2319 INC_BOTH (from, from_byte);
2320 UPDATE_SYNTAX_TABLE_FORWARD (from);
8489eb67
RS
2321 break;
2322
2323 case Smath:
2324 if (!sexpflag)
2325 break;
6a140a74
RS
2326 if (from != stop && c == FETCH_CHAR (from_byte))
2327 {
2328 INC_BOTH (from, from_byte);
2329 }
8489eb67
RS
2330 if (mathexit)
2331 {
2332 mathexit = 0;
2333 goto close1;
2334 }
2335 mathexit = 1;
2336
2337 case Sopen:
2338 if (!++depth) goto done;
2339 break;
2340
2341 case Sclose:
2342 close1:
2343 if (!--depth) goto done;
2344 if (depth < min_depth)
7bf5e9e4
RS
2345 Fsignal (Qscan_error,
2346 Fcons (build_string ("Containing expression ends prematurely"),
2347 Fcons (make_number (last_good),
2348 Fcons (make_number (from), Qnil))));
8489eb67
RS
2349 break;
2350
2351 case Sstring:
195d1361 2352 case Sstring_fence:
ef316cf0 2353 temp_pos = dec_bytepos (from_byte);
93da5fff 2354 stringterm = FETCH_CHAR (temp_pos);
8489eb67
RS
2355 while (1)
2356 {
2357 if (from >= stop) goto lose;
195d1361 2358 UPDATE_SYNTAX_TABLE_FORWARD (from);
bd25db08 2359 c = FETCH_CHAR (from_byte);
42ebfa31
SM
2360 if (code == Sstring
2361 ? (c == stringterm
2362 && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
bd25db08 2363 : SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring_fence)
195d1361 2364 break;
7c7ca3d2
RS
2365
2366 /* Some compilers can't handle this inside the switch. */
bd25db08 2367 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
7c7ca3d2 2368 switch (temp)
8489eb67
RS
2369 {
2370 case Scharquote:
2371 case Sescape:
6a140a74 2372 INC_BOTH (from, from_byte);
8489eb67 2373 }
6a140a74 2374 INC_BOTH (from, from_byte);
8489eb67 2375 }
6a140a74 2376 INC_BOTH (from, from_byte);
8489eb67
RS
2377 if (!depth && sexpflag) goto done;
2378 break;
240c43e8
SM
2379 default:
2380 /* Ignore whitespace, punctuation, quote, endcomment. */
2381 break;
8489eb67
RS
2382 }
2383 }
2384
2385 /* Reached end of buffer. Error if within object, return nil if between */
2386 if (depth) goto lose;
2387
2388 immediate_quit = 0;
2389 return Qnil;
2390
2391 /* End of object reached */
2392 done:
2393 count--;
2394 }
2395
2396 while (count < 0)
2397 {
8489eb67
RS
2398 while (from > stop)
2399 {
6a140a74 2400 DEC_BOTH (from, from_byte);
195d1361 2401 UPDATE_SYNTAX_TABLE_BACKWARD (from);
6a140a74 2402 c = FETCH_CHAR (from_byte);
bd25db08 2403 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
7bf5e9e4
RS
2404 if (depth == min_depth)
2405 last_good = from;
7fc8191e 2406 comstyle = 0;
95ff8dfc 2407 comnested = SYNTAX_COMMENT_NESTED (c);
7fc8191e
RS
2408 if (code == Sendcomment)
2409 comstyle = SYNTAX_COMMENT_STYLE (c);
8489eb67 2410 if (from > stop && SYNTAX_COMEND_SECOND (c)
1674d9a2 2411 && prev_char_comend_first (from, from_byte)
8489eb67 2412 && parse_sexp_ignore_comments)
e5d4f4dc 2413 {
f902a008 2414 /* We must record the comment style encountered so that
e5d4f4dc 2415 later, we can match only the proper comment begin
f902a008 2416 sequence of the same style. */
6a140a74 2417 DEC_BOTH (from, from_byte);
f902a008
RS
2418 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2419 code = Sendcomment;
95ff8dfc
RS
2420 c1 = FETCH_CHAR (from_byte);
2421 comstyle = SYNTAX_COMMENT_STYLE (c1);
2422 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
e5d4f4dc 2423 }
7d0393cf 2424
9828a477 2425 /* Quoting turns anything except a comment-ender
e6365855 2426 into a word character. Note that this cannot be true
f902a008 2427 if we decremented FROM in the if-statement above. */
9828a477 2428 if (code != Sendcomment && char_quoted (from, from_byte))
240c43e8
SM
2429 {
2430 DEC_BOTH (from, from_byte);
2431 code = Sword;
2432 }
9828a477 2433 else if (SYNTAX_PREFIX (c))
8489eb67
RS
2434 continue;
2435
9828a477 2436 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
2437 {
2438 case Sword:
2439 case Ssymbol:
9828a477
KH
2440 case Sescape:
2441 case Scharquote:
8489eb67 2442 if (depth || !sexpflag) break;
195d1361
RS
2443 /* This word counts as a sexp; count object finished
2444 after passing it. */
8489eb67
RS
2445 while (from > stop)
2446 {
6a140a74 2447 temp_pos = from_byte;
ef316cf0
RS
2448 if (! NILP (current_buffer->enable_multibyte_characters))
2449 DEC_POS (temp_pos);
2450 else
2451 temp_pos--;
6a140a74 2452 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
9828a477 2453 c1 = FETCH_CHAR (temp_pos);
bd25db08 2454 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
9828a477
KH
2455 /* Don't allow comment-end to be quoted. */
2456 if (temp_code == Sendcomment)
2457 goto done2;
6a140a74 2458 quoted = char_quoted (from - 1, temp_pos);
8489eb67 2459 if (quoted)
93da5fff 2460 {
6a140a74 2461 DEC_BOTH (from, from_byte);
ef316cf0 2462 temp_pos = dec_bytepos (temp_pos);
6a140a74 2463 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
93da5fff
KH
2464 }
2465 c1 = FETCH_CHAR (temp_pos);
bd25db08 2466 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
93da5fff
KH
2467 if (! (quoted || temp_code == Sword
2468 || temp_code == Ssymbol
2469 || temp_code == Squote))
8489eb67 2470 goto done2;
6a140a74 2471 DEC_BOTH (from, from_byte);
8489eb67
RS
2472 }
2473 goto done2;
2474
2475 case Smath:
2476 if (!sexpflag)
2477 break;
ef316cf0 2478 temp_pos = dec_bytepos (from_byte);
6a140a74 2479 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
93da5fff 2480 if (from != stop && c == FETCH_CHAR (temp_pos))
6a140a74 2481 DEC_BOTH (from, from_byte);
8489eb67
RS
2482 if (mathexit)
2483 {
2484 mathexit = 0;
2485 goto open2;
2486 }
2487 mathexit = 1;
2488
2489 case Sclose:
2490 if (!++depth) goto done2;
2491 break;
2492
2493 case Sopen:
2494 open2:
2495 if (!--depth) goto done2;
2496 if (depth < min_depth)
7bf5e9e4
RS
2497 Fsignal (Qscan_error,
2498 Fcons (build_string ("Containing expression ends prematurely"),
2499 Fcons (make_number (last_good),
2500 Fcons (make_number (from), Qnil))));
8489eb67
RS
2501 break;
2502
2503 case Sendcomment:
2504 if (!parse_sexp_ignore_comments)
2505 break;
95ff8dfc 2506 found = back_comment (from, from_byte, stop, comnested, comstyle,
6a140a74 2507 &out_charpos, &out_bytepos);
1aa963c8
SM
2508 /* FIXME: if found == -1, then it really wasn't a comment-end.
2509 For single-char Sendcomment, we can't do much about it apart
2510 from skipping the char.
2511 For 2-char endcomments, we could try again, taking both
2512 chars as separate entities, but it's a lot of trouble
2513 for very little gain, so we don't bother either. -sm */
6a140a74
RS
2514 if (found != -1)
2515 from = out_charpos, from_byte = out_bytepos;
8489eb67
RS
2516 break;
2517
195d1361
RS
2518 case Scomment_fence:
2519 case Sstring_fence:
2520 while (1)
2521 {
195d1361 2522 if (from == stop) goto lose;
d0abdf7e 2523 DEC_BOTH (from, from_byte);
195d1361 2524 UPDATE_SYNTAX_TABLE_BACKWARD (from);
7d0393cf 2525 if (!char_quoted (from, from_byte)
bd25db08
KH
2526 && (c = FETCH_CHAR (from_byte),
2527 SYNTAX_WITH_MULTIBYTE_CHECK (c) == code))
195d1361
RS
2528 break;
2529 }
2530 if (code == Sstring_fence && !depth && sexpflag) goto done2;
2531 break;
7d0393cf 2532
8489eb67 2533 case Sstring:
6a140a74 2534 stringterm = FETCH_CHAR (from_byte);
8489eb67
RS
2535 while (1)
2536 {
2537 if (from == stop) goto lose;
d0abdf7e
SM
2538 DEC_BOTH (from, from_byte);
2539 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2540 if (!char_quoted (from, from_byte)
2541 && stringterm == (c = FETCH_CHAR (from_byte))
42ebfa31 2542 && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
8489eb67 2543 break;
8489eb67 2544 }
8489eb67
RS
2545 if (!depth && sexpflag) goto done2;
2546 break;
240c43e8
SM
2547 default:
2548 /* Ignore whitespace, punctuation, quote, endcomment. */
2549 break;
8489eb67
RS
2550 }
2551 }
2552
2553 /* Reached start of buffer. Error if within object, return nil if between */
2554 if (depth) goto lose;
2555
2556 immediate_quit = 0;
2557 return Qnil;
2558
2559 done2:
2560 count++;
2561 }
2562
2563
2564 immediate_quit = 0;
1e142fb7 2565 XSETFASTINT (val, from);
8489eb67
RS
2566 return val;
2567
2568 lose:
7bf5e9e4
RS
2569 Fsignal (Qscan_error,
2570 Fcons (build_string ("Unbalanced parentheses"),
2571 Fcons (make_number (last_good),
2572 Fcons (make_number (from), Qnil))));
ab229fdd 2573 abort ();
8489eb67
RS
2574 /* NOTREACHED */
2575}
2576
8489eb67 2577DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
fdb82f93
PJ
2578 doc: /* Scan from character number FROM by COUNT lists.
2579Returns the character number of the position thus found.
2580
2581If DEPTH is nonzero, paren depth begins counting from that value,
2582only places where the depth in parentheses becomes zero
2583are candidates for stopping; COUNT such places are counted.
2584Thus, a positive value for DEPTH means go out levels.
2585
2586Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2587
2588If the beginning or end of (the accessible part of) the buffer is reached
2589and the depth is wrong, an error is signaled.
2590If the depth is right but the count is not used up, nil is returned. */)
2591 (from, count, depth)
8489eb67
RS
2592 Lisp_Object from, count, depth;
2593{
b7826503
PJ
2594 CHECK_NUMBER (from);
2595 CHECK_NUMBER (count);
2596 CHECK_NUMBER (depth);
8489eb67
RS
2597
2598 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
2599}
2600
2601DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
fdb82f93
PJ
2602 doc: /* Scan from character number FROM by COUNT balanced expressions.
2603If COUNT is negative, scan backwards.
2604Returns the character number of the position thus found.
2605
2606Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2607
2608If the beginning or end of (the accessible part of) the buffer is reached
2609in the middle of a parenthetical grouping, an error is signaled.
2610If the beginning or end is reached between groupings
2611but before count is used up, nil is returned. */)
2612 (from, count)
8489eb67
RS
2613 Lisp_Object from, count;
2614{
b7826503
PJ
2615 CHECK_NUMBER (from);
2616 CHECK_NUMBER (count);
8489eb67
RS
2617
2618 return scan_lists (XINT (from), XINT (count), 0, 1);
2619}
2620
2621DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
fdb82f93
PJ
2622 0, 0, 0,
2623 doc: /* Move point backward over any number of chars with prefix syntax.
2624This includes chars with "quote" or "prefix" syntax (' or p). */)
2625 ()
8489eb67
RS
2626{
2627 int beg = BEGV;
6a140a74
RS
2628 int opoint = PT;
2629 int opoint_byte = PT_BYTE;
6ec8bbd2 2630 int pos = PT;
6a140a74 2631 int pos_byte = PT_BYTE;
93da5fff 2632 int c;
93da5fff 2633
7d0393cf 2634 if (pos <= beg)
195d1361 2635 {
f902a008
RS
2636 SET_PT_BOTH (opoint, opoint_byte);
2637
2638 return Qnil;
195d1361 2639 }
8489eb67 2640
f902a008
RS
2641 SETUP_SYNTAX_TABLE (pos, -1);
2642
6a140a74
RS
2643 DEC_BOTH (pos, pos_byte);
2644
1fd3172d 2645 while (!char_quoted (pos, pos_byte)
195d1361 2646 /* Previous statement updates syntax table. */
6a140a74 2647 && ((c = FETCH_CHAR (pos_byte), SYNTAX (c) == Squote)
93da5fff
KH
2648 || SYNTAX_PREFIX (c)))
2649 {
1fd3172d
RS
2650 opoint = pos;
2651 opoint_byte = pos_byte;
2652
2653 if (pos + 1 > beg)
2654 DEC_BOTH (pos, pos_byte);
93da5fff 2655 }
8489eb67 2656
6a140a74 2657 SET_PT_BOTH (opoint, opoint_byte);
8489eb67
RS
2658
2659 return Qnil;
2660}
2661\f
6a140a74 2662/* Parse forward from FROM / FROM_BYTE to END,
e5d4f4dc
RS
2663 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
2664 and return a description of the state of the parse at END.
c81a3712 2665 If STOPBEFORE is nonzero, stop at the start of an atom.
644ea4df
RS
2666 If COMMENTSTOP is 1, stop at the start of a comment.
2667 If COMMENTSTOP is -1, stop at the start or end of a comment,
2668 after the beginning of a string, or after the end of a string. */
8489eb67 2669
340f92b5 2670static void
6a140a74 2671scan_sexps_forward (stateptr, from, from_byte, end, targetdepth,
c81a3712 2672 stopbefore, oldstate, commentstop)
e5d4f4dc 2673 struct lisp_parse_state *stateptr;
8489eb67 2674 register int from;
ddc447fc 2675 int end, targetdepth, stopbefore, from_byte;
8489eb67 2676 Lisp_Object oldstate;
c81a3712 2677 int commentstop;
8489eb67
RS
2678{
2679 struct lisp_parse_state state;
2680
2681 register enum syntaxcode code;
95ff8dfc
RS
2682 int c1;
2683 int comnested;
8489eb67
RS
2684 struct level { int last, prev; };
2685 struct level levelstart[100];
2686 register struct level *curlevel = levelstart;
2687 struct level *endlevel = levelstart + 100;
8489eb67
RS
2688 register int depth; /* Paren depth of current scanning location.
2689 level - levelstart equals this except
2690 when the depth becomes negative. */
2691 int mindepth; /* Lowest DEPTH value seen. */
2692 int start_quoted = 0; /* Nonzero means starting after a char quote */
2693 Lisp_Object tem;
93da5fff 2694 int prev_from; /* Keep one character before FROM. */
6a140a74 2695 int prev_from_byte;
1fd3172d 2696 int prev_from_syntax;
195d1361
RS
2697 int boundary_stop = commentstop == -1;
2698 int nofence;
95ff8dfc
RS
2699 int found;
2700 int out_bytepos, out_charpos;
7c7ca3d2 2701 int temp;
93da5fff
KH
2702
2703 prev_from = from;
6a140a74
RS
2704 prev_from_byte = from_byte;
2705 if (from != BEGV)
2706 DEC_BOTH (prev_from, prev_from_byte);
93da5fff
KH
2707
2708 /* Use this macro instead of `from++'. */
6a140a74
RS
2709#define INC_FROM \
2710do { prev_from = from; \
2711 prev_from_byte = from_byte; \
ab229fdd
AS
2712 temp = FETCH_CHAR (prev_from_byte); \
2713 prev_from_syntax = SYNTAX_WITH_FLAGS (temp); \
ef316cf0 2714 INC_BOTH (from, from_byte); \
3ceb4629
SM
2715 if (from < end) \
2716 UPDATE_SYNTAX_TABLE_FORWARD (from); \
6a140a74 2717 } while (0)
8489eb67
RS
2718
2719 immediate_quit = 1;
2720 QUIT;
2721
265a9e55 2722 if (NILP (oldstate))
8489eb67
RS
2723 {
2724 depth = 0;
2725 state.instring = -1;
2726 state.incomment = 0;
195d1361
RS
2727 state.comstyle = 0; /* comment style a by default. */
2728 state.comstr_start = -1; /* no comment/string seen. */
8489eb67
RS
2729 }
2730 else
2731 {
2732 tem = Fcar (oldstate);
265a9e55 2733 if (!NILP (tem))
8489eb67
RS
2734 depth = XINT (tem);
2735 else
2736 depth = 0;
2737
2738 oldstate = Fcdr (oldstate);
2739 oldstate = Fcdr (oldstate);
2740 oldstate = Fcdr (oldstate);
2741 tem = Fcar (oldstate);
195d1361 2742 /* Check whether we are inside string_fence-style string: */
7d0393cf
JB
2743 state.instring = (!NILP (tem)
2744 ? (INTEGERP (tem) ? XINT (tem) : ST_STRING_STYLE)
e76f1c44 2745 : -1);
8489eb67
RS
2746
2747 oldstate = Fcdr (oldstate);
2748 tem = Fcar (oldstate);
e76f1c44
GM
2749 state.incomment = (!NILP (tem)
2750 ? (INTEGERP (tem) ? XINT (tem) : -1)
95ff8dfc 2751 : 0);
8489eb67
RS
2752
2753 oldstate = Fcdr (oldstate);
2754 tem = Fcar (oldstate);
265a9e55 2755 start_quoted = !NILP (tem);
e5d4f4dc 2756
95ff8dfc 2757 /* if the eighth element of the list is nil, we are in comment
195d1361
RS
2758 style a. If it is non-nil, we are in comment style b */
2759 oldstate = Fcdr (oldstate);
e5d4f4dc 2760 oldstate = Fcdr (oldstate);
195d1361 2761 tem = Fcar (oldstate);
7d0393cf 2762 state.comstyle = NILP (tem) ? 0 : (EQ (tem, Qsyntax_table)
e76f1c44 2763 ? ST_COMMENT_STYLE : 1);
195d1361 2764
e5d4f4dc 2765 oldstate = Fcdr (oldstate);
e5d4f4dc 2766 tem = Fcar (oldstate);
195d1361 2767 state.comstr_start = NILP (tem) ? -1 : XINT (tem) ;
1a102a58
RS
2768 oldstate = Fcdr (oldstate);
2769 tem = Fcar (oldstate);
2770 while (!NILP (tem)) /* >= second enclosing sexps. */
2771 {
2772 /* curlevel++->last ran into compiler bug on Apollo */
2773 curlevel->last = XINT (Fcar (tem));
2774 if (++curlevel == endlevel)
02010917 2775 curlevel--; /* error ("Nesting too deep for parser"); */
1a102a58
RS
2776 curlevel->prev = -1;
2777 curlevel->last = -1;
2778 tem = Fcdr (tem);
2779 }
8489eb67
RS
2780 }
2781 state.quoted = 0;
2782 mindepth = depth;
2783
2784 curlevel->prev = -1;
2785 curlevel->last = -1;
2786
326b283b 2787 SETUP_SYNTAX_TABLE (prev_from, 1);
ab229fdd
AS
2788 temp = FETCH_CHAR (prev_from_byte);
2789 prev_from_syntax = SYNTAX_WITH_FLAGS (temp);
3ceb4629 2790 UPDATE_SYNTAX_TABLE_FORWARD (from);
326b283b 2791
195d1361 2792 /* Enter the loop at a place appropriate for initial state. */
8489eb67 2793
326b283b
RS
2794 if (state.incomment)
2795 goto startincomment;
8489eb67
RS
2796 if (state.instring >= 0)
2797 {
195d1361 2798 nofence = state.instring != ST_STRING_STYLE;
326b283b
RS
2799 if (start_quoted)
2800 goto startquotedinstring;
8489eb67
RS
2801 goto startinstring;
2802 }
326b283b
RS
2803 else if (start_quoted)
2804 goto startquoted;
1fd3172d 2805
8489eb67
RS
2806 while (from < end)
2807 {
93da5fff 2808 INC_FROM;
1fd3172d 2809 code = prev_from_syntax & 0xff;
4c920633 2810
c5d4b96f
SM
2811 if (from < end
2812 && SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax)
2813 && (c1 = FETCH_CHAR (from_byte),
2814 SYNTAX_COMSTART_SECOND (c1)))
2815 /* Duplicate code to avoid a complex if-expression
2816 which causes trouble for the SGI compiler. */
95ff8dfc 2817 {
c5d4b96f
SM
2818 /* Record the comment style we have entered so that only
2819 the comment-end sequence of the same style actually
2820 terminates the comment section. */
2821 state.comstyle = SYNTAX_COMMENT_STYLE (c1);
2822 comnested = SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax);
2823 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2824 state.incomment = comnested ? 1 : -1;
95ff8dfc 2825 state.comstr_start = prev_from;
c5d4b96f
SM
2826 INC_FROM;
2827 code = Scomment;
95ff8dfc 2828 }
4c920633 2829 else if (code == Scomment_fence)
e5d4f4dc
RS
2830 {
2831 /* Record the comment style we have entered so that only
2832 the comment-end sequence of the same style actually
2833 terminates the comment section. */
95ff8dfc
RS
2834 state.comstyle = ST_COMMENT_STYLE;
2835 state.incomment = -1;
195d1361 2836 state.comstr_start = prev_from;
e5d4f4dc 2837 code = Scomment;
e5d4f4dc 2838 }
c5d4b96f
SM
2839 else if (code == Scomment)
2840 {
2841 state.comstyle = SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax);
2842 state.incomment = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax) ?
2843 1 : -1);
2844 state.comstr_start = prev_from;
2845 }
e5d4f4dc 2846
1fd3172d 2847 if (SYNTAX_FLAGS_PREFIX (prev_from_syntax))
8489eb67 2848 continue;
0220c518 2849 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
2850 {
2851 case Sescape:
2852 case Scharquote:
2853 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 2854 curlevel->last = prev_from;
8489eb67
RS
2855 startquoted:
2856 if (from == end) goto endquoted;
93da5fff 2857 INC_FROM;
8489eb67
RS
2858 goto symstarted;
2859 /* treat following character as a word constituent */
2860 case Sword:
2861 case Ssymbol:
2862 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 2863 curlevel->last = prev_from;
8489eb67
RS
2864 symstarted:
2865 while (from < end)
2866 {
7c7ca3d2 2867 /* Some compilers can't handle this inside the switch. */
ab229fdd
AS
2868 temp = FETCH_CHAR (from_byte);
2869 temp = SYNTAX (temp);
7c7ca3d2 2870 switch (temp)
8489eb67
RS
2871 {
2872 case Scharquote:
2873 case Sescape:
93da5fff 2874 INC_FROM;
8489eb67
RS
2875 if (from == end) goto endquoted;
2876 break;
2877 case Sword:
2878 case Ssymbol:
2879 case Squote:
2880 break;
2881 default:
2882 goto symdone;
2883 }
93da5fff 2884 INC_FROM;
8489eb67
RS
2885 }
2886 symdone:
2887 curlevel->prev = curlevel->last;
2888 break;
2889
240c43e8 2890 case Scomment_fence: /* Can't happen because it's handled above. */
8489eb67 2891 case Scomment:
195d1361 2892 if (commentstop || boundary_stop) goto done;
d355bd8a
SM
2893 startincomment:
2894 /* The (from == BEGV) test was to enter the loop in the middle so
95ff8dfc 2895 that we find a 2-char comment ender even if we start in the
e6365855 2896 middle of it. We don't want to do that if we're just at the
d355bd8a 2897 beginning of the comment (think of (*) ... (*)). */
95ff8dfc
RS
2898 found = forw_comment (from, from_byte, end,
2899 state.incomment, state.comstyle,
e6365855
SM
2900 (from == BEGV || from < state.comstr_start + 3)
2901 ? 0 : prev_from_syntax,
95ff8dfc
RS
2902 &out_charpos, &out_bytepos, &state.incomment);
2903 from = out_charpos; from_byte = out_bytepos;
2904 /* Beware! prev_from and friends are invalid now.
2905 Luckily, the `done' doesn't use them and the INC_FROM
2906 sets them to a sane value without looking at them. */
2907 if (!found) goto done;
d355bd8a 2908 INC_FROM;
8489eb67 2909 state.incomment = 0;
e5d4f4dc 2910 state.comstyle = 0; /* reset the comment style */
195d1361 2911 if (boundary_stop) goto done;
8489eb67
RS
2912 break;
2913
2914 case Sopen:
2915 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2916 depth++;
2917 /* curlevel++->last ran into compiler bug on Apollo */
93da5fff 2918 curlevel->last = prev_from;
8489eb67 2919 if (++curlevel == endlevel)
02010917 2920 curlevel--; /* error ("Nesting too deep for parser"); */
8489eb67
RS
2921 curlevel->prev = -1;
2922 curlevel->last = -1;
30844415 2923 if (targetdepth == depth) goto done;
8489eb67
RS
2924 break;
2925
2926 case Sclose:
2927 depth--;
2928 if (depth < mindepth)
2929 mindepth = depth;
2930 if (curlevel != levelstart)
2931 curlevel--;
2932 curlevel->prev = curlevel->last;
30844415 2933 if (targetdepth == depth) goto done;
8489eb67
RS
2934 break;
2935
2936 case Sstring:
195d1361
RS
2937 case Sstring_fence:
2938 state.comstr_start = from - 1;
8489eb67 2939 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 2940 curlevel->last = prev_from;
7d0393cf 2941 state.instring = (code == Sstring
6a140a74 2942 ? (FETCH_CHAR (prev_from_byte))
195d1361
RS
2943 : ST_STRING_STYLE);
2944 if (boundary_stop) goto done;
8489eb67 2945 startinstring:
195d1361 2946 {
644ea4df 2947 nofence = state.instring != ST_STRING_STYLE;
7d0393cf 2948
644ea4df
RS
2949 while (1)
2950 {
2951 int c;
195d1361 2952
644ea4df 2953 if (from >= end) goto done;
6a140a74 2954 c = FETCH_CHAR (from_byte);
7c7ca3d2
RS
2955 /* Some compilers can't handle this inside the switch. */
2956 temp = SYNTAX (c);
ccf89641
KH
2957
2958 /* Check TEMP here so that if the char has
2959 a syntax-table property which says it is NOT
2960 a string character, it does not end the string. */
2961 if (nofence && c == state.instring && temp == Sstring)
2962 break;
2963
7c7ca3d2 2964 switch (temp)
644ea4df
RS
2965 {
2966 case Sstring_fence:
2967 if (!nofence) goto string_end;
2968 break;
2969 case Scharquote:
2970 case Sescape:
2971 INC_FROM;
2972 startquotedinstring:
2973 if (from >= end) goto endquoted;
195d1361 2974 }
644ea4df
RS
2975 INC_FROM;
2976 }
195d1361
RS
2977 }
2978 string_end:
8489eb67
RS
2979 state.instring = -1;
2980 curlevel->prev = curlevel->last;
93da5fff 2981 INC_FROM;
195d1361 2982 if (boundary_stop) goto done;
8489eb67
RS
2983 break;
2984
2985 case Smath:
240c43e8
SM
2986 /* FIXME: We should do something with it. */
2987 break;
2988 default:
2989 /* Ignore whitespace, punctuation, quote, endcomment. */
8489eb67
RS
2990 break;
2991 }
2992 }
2993 goto done;
2994
2995 stop: /* Here if stopping before start of sexp. */
93da5fff 2996 from = prev_from; /* We have just fetched the char that starts it; */
8489eb67
RS
2997 goto done; /* but return the position before it. */
2998
2999 endquoted:
3000 state.quoted = 1;
3001 done:
3002 state.depth = depth;
3003 state.mindepth = mindepth;
3004 state.thislevelstart = curlevel->prev;
3005 state.prevlevelstart
3006 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
3007 state.location = from;
1a102a58
RS
3008 state.levelstarts = Qnil;
3009 while (--curlevel >= levelstart)
3010 state.levelstarts = Fcons (make_number (curlevel->last),
3011 state.levelstarts);
8489eb67
RS
3012 immediate_quit = 0;
3013
e5d4f4dc 3014 *stateptr = state;
8489eb67
RS
3015}
3016
c81a3712 3017DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
fdb82f93
PJ
3018 doc: /* Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
3019Parsing stops at TO or when certain criteria are met;
3020 point is set to where parsing stops.
3021If fifth arg OLDSTATE is omitted or nil,
3022 parsing assumes that FROM is the beginning of a function.
0e6228bc 3023Value is a list of elements describing final state of parsing:
fdb82f93
PJ
3024 0. depth in parens.
3025 1. character address of start of innermost containing list; nil if none.
3026 2. character address of start of last complete sexp terminated.
3027 3. non-nil if inside a string.
3028 (it is the character that will terminate the string,
3029 or t if the string should be terminated by a generic string delimiter.)
7d0393cf 3030 4. nil if outside a comment, t if inside a non-nestable comment,
fdb82f93
PJ
3031 else an integer (the current comment nesting).
3032 5. t if following a quote character.
3033 6. the minimum paren-depth encountered during this scan.
3034 7. t if in a comment of style b; symbol `syntax-table' if the comment
3035 should be terminated by a generic comment delimiter.
3036 8. character address of start of comment or string; nil if not in one.
3037 9. Intermediate data for continuation of parsing (subject to change).
3038If third arg TARGETDEPTH is non-nil, parsing stops if the depth
3039in parentheses becomes equal to TARGETDEPTH.
3040Fourth arg STOPBEFORE non-nil means stop when come to
3041 any character that starts a sexp.
0e6228bc 3042Fifth arg OLDSTATE is a list like what this function returns.
fdb82f93
PJ
3043 It is used to initialize the state of the parse. Elements number 1, 2, 6
3044 and 8 are ignored; you can leave off element 8 (the last) entirely.
3045Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.
3046 If it is symbol `syntax-table', stop after the start of a comment or a
3047 string, or after end of a comment or a string. */)
3048 (from, to, targetdepth, stopbefore, oldstate, commentstop)
c81a3712 3049 Lisp_Object from, to, targetdepth, stopbefore, oldstate, commentstop;
8489eb67
RS
3050{
3051 struct lisp_parse_state state;
3052 int target;
3053
265a9e55 3054 if (!NILP (targetdepth))
8489eb67 3055 {
b7826503 3056 CHECK_NUMBER (targetdepth);
8489eb67
RS
3057 target = XINT (targetdepth);
3058 }
3059 else
3060 target = -100000; /* We won't reach this depth */
3061
3062 validate_region (&from, &to);
6a140a74
RS
3063 scan_sexps_forward (&state, XINT (from), CHAR_TO_BYTE (XINT (from)),
3064 XINT (to),
c81a3712 3065 target, !NILP (stopbefore), oldstate,
7d0393cf 3066 (NILP (commentstop)
195d1361 3067 ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
8489eb67
RS
3068
3069 SET_PT (state.location);
7d0393cf 3070
8489eb67
RS
3071 return Fcons (make_number (state.depth),
3072 Fcons (state.prevlevelstart < 0 ? Qnil : make_number (state.prevlevelstart),
3073 Fcons (state.thislevelstart < 0 ? Qnil : make_number (state.thislevelstart),
7d0393cf
JB
3074 Fcons (state.instring >= 0
3075 ? (state.instring == ST_STRING_STYLE
195d1361 3076 ? Qt : make_number (state.instring)) : Qnil,
95ff8dfc
RS
3077 Fcons (state.incomment < 0 ? Qt :
3078 (state.incomment == 0 ? Qnil :
3079 make_number (state.incomment)),
8489eb67 3080 Fcons (state.quoted ? Qt : Qnil,
bff37d2a 3081 Fcons (make_number (state.mindepth),
7d0393cf 3082 Fcons ((state.comstyle
bff37d2a
RS
3083 ? (state.comstyle == ST_COMMENT_STYLE
3084 ? Qsyntax_table : Qt) :
3085 Qnil),
0beaf54f
DL
3086 Fcons (((state.incomment
3087 || (state.instring >= 0))
bff37d2a
RS
3088 ? make_number (state.comstr_start)
3089 : Qnil),
1a102a58 3090 Fcons (state.levelstarts, Qnil))))))))));
8489eb67
RS
3091}
3092\f
dfcf069d 3093void
8489eb67
RS
3094init_syntax_once ()
3095{
78f9a1f7 3096 register int i, c;
8ea151b2 3097 Lisp_Object temp;
8489eb67 3098
5ebaddf5
RS
3099 /* This has to be done here, before we call Fmake_char_table. */
3100 Qsyntax_table = intern ("syntax-table");
3101 staticpro (&Qsyntax_table);
3102
3103 /* Intern this now in case it isn't already done.
3104 Setting this variable twice is harmless.
3105 But don't staticpro it here--that is done in alloc.c. */
3106 Qchar_table_extra_slots = intern ("char-table-extra-slots");
3107
93da5fff 3108 /* Create objects which can be shared among syntax tables. */
42ebfa31 3109 Vsyntax_code_object = Fmake_vector (make_number (Smax), Qnil);
93da5fff
KH
3110 for (i = 0; i < XVECTOR (Vsyntax_code_object)->size; i++)
3111 XVECTOR (Vsyntax_code_object)->contents[i]
3112 = Fcons (make_number (i), Qnil);
3113
5ebaddf5
RS
3114 /* Now we are ready to set up this property, so we can
3115 create syntax tables. */
3116 Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
3117
93da5fff 3118 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
8489eb67 3119
5ebaddf5 3120 Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
8489eb67 3121
93da5fff 3122 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
8489eb67 3123 for (i = 'a'; i <= 'z'; i++)
8ea151b2 3124 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
8489eb67 3125 for (i = 'A'; i <= 'Z'; i++)
8ea151b2 3126 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
8489eb67 3127 for (i = '0'; i <= '9'; i++)
8ea151b2
RS
3128 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3129
3130 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
3131 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
3132
3133 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
3134 Fcons (make_number (Sopen), make_number (')')));
3135 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
3136 Fcons (make_number (Sclose), make_number ('(')));
3137 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
3138 Fcons (make_number (Sopen), make_number (']')));
3139 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
3140 Fcons (make_number (Sclose), make_number ('[')));
3141 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
3142 Fcons (make_number (Sopen), make_number ('}')));
3143 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
3144 Fcons (make_number (Sclose), make_number ('{')));
3145 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
3146 Fcons (make_number ((int) Sstring), Qnil));
3147 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
3148 Fcons (make_number ((int) Sescape), Qnil));
3149
93da5fff 3150 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Ssymbol];
8489eb67 3151 for (i = 0; i < 10; i++)
78f9a1f7
KH
3152 {
3153 c = "_-+*/&|<>="[i];
3154 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3155 }
8489eb67 3156
93da5fff 3157 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
8489eb67 3158 for (i = 0; i < 12; i++)
78f9a1f7
KH
3159 {
3160 c = ".,;:?!#@~^'`"[i];
3161 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3162 }
bd25db08
KH
3163
3164 /* All multibyte characters have syntax `word' by default. */
3165 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
3166 for (i = CHAR_TABLE_SINGLE_BYTE_SLOTS; i < CHAR_TABLE_ORDINARY_SLOTS; i++)
3167 XCHAR_TABLE (Vstandard_syntax_table)->contents[i] = temp;
8489eb67
RS
3168}
3169
dfcf069d 3170void
8489eb67
RS
3171syms_of_syntax ()
3172{
3173 Qsyntax_table_p = intern ("syntax-table-p");
3174 staticpro (&Qsyntax_table_p);
3175
93da5fff
KH
3176 staticpro (&Vsyntax_code_object);
3177
a2994c46
KS
3178 staticpro (&gl_state.object);
3179 staticpro (&gl_state.global_code);
3180 staticpro (&gl_state.current_syntax_table);
3181 staticpro (&gl_state.old_prop);
3182
3183 /* Defined in regex.c */
3184 staticpro (&re_match_object);
3185
7bf5e9e4
RS
3186 Qscan_error = intern ("scan-error");
3187 staticpro (&Qscan_error);
3188 Fput (Qscan_error, Qerror_conditions,
f3be100f 3189 Fcons (Qscan_error, Fcons (Qerror, Qnil)));
7bf5e9e4
RS
3190 Fput (Qscan_error, Qerror_message,
3191 build_string ("Scan error"));
3192
8489eb67 3193 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments,
fdb82f93 3194 doc: /* Non-nil means `forward-sexp', etc., should treat comments as whitespace. */);
8489eb67 3195
195d1361 3196 DEFVAR_BOOL ("parse-sexp-lookup-properties", &parse_sexp_lookup_properties,
fdb82f93
PJ
3197 doc: /* Non-nil means `forward-sexp', etc., obey `syntax-table' property.
3198Otherwise, that text property is simply ignored.
3199See the info node `(elisp)Syntax Properties' for a description of the
3200`syntax-table' property. */);
195d1361 3201
8489eb67
RS
3202 words_include_escapes = 0;
3203 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes,
fdb82f93 3204 doc: /* Non-nil means `forward-word', etc., should treat escape chars part of words. */);
8489eb67 3205
bd25db08 3206 DEFVAR_BOOL ("multibyte-syntax-as-symbol", &multibyte_syntax_as_symbol,
fdb82f93 3207 doc: /* Non-nil means `scan-sexps' treats all multibyte characters as symbol. */);
bd25db08
KH
3208 multibyte_syntax_as_symbol = 0;
3209
f4ed767f
GM
3210 DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
3211 &open_paren_in_column_0_is_defun_start,
b222e415 3212 doc: /* *Non-nil means an open paren in column 0 denotes the start of a defun. */);
f4ed767f
GM
3213 open_paren_in_column_0_is_defun_start = 1;
3214
8489eb67
RS
3215 defsubr (&Ssyntax_table_p);
3216 defsubr (&Ssyntax_table);
3217 defsubr (&Sstandard_syntax_table);
3218 defsubr (&Scopy_syntax_table);
3219 defsubr (&Sset_syntax_table);
3220 defsubr (&Schar_syntax);
beefa22e 3221 defsubr (&Smatching_paren);
c65adb44 3222 defsubr (&Sstring_to_syntax);
8489eb67 3223 defsubr (&Smodify_syntax_entry);
62abe9cb 3224 defsubr (&Sinternal_describe_syntax_value);
8489eb67
RS
3225
3226 defsubr (&Sforward_word);
3227
195d1361
RS
3228 defsubr (&Sskip_chars_forward);
3229 defsubr (&Sskip_chars_backward);
3230 defsubr (&Sskip_syntax_forward);
3231 defsubr (&Sskip_syntax_backward);
3232
b3cfe0c8 3233 defsubr (&Sforward_comment);
8489eb67
RS
3234 defsubr (&Sscan_lists);
3235 defsubr (&Sscan_sexps);
3236 defsubr (&Sbackward_prefix_chars);
3237 defsubr (&Sparse_partial_sexp);
3238}
ab5796a9
MB
3239
3240/* arch-tag: 3e297b9f-088e-4b64-8f4c-fb0b3443e412
3241 (do not change this comment) */