*** empty log message ***
[bpt/emacs.git] / src / syntax.c
CommitLineData
8489eb67 1/* GNU Emacs routines to deal with syntax tables; also word and list parsing.
0b5538bd 2 Copyright (C) 1985, 1987, 1993, 1994, 1995, 1997, 1998, 1999, 2002,
aaef169d 3 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
8489eb67
RS
4
5This file is part of GNU Emacs.
6
7GNU Emacs is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
a4275ad1 9the Free Software Foundation; either version 2, or (at your option)
8489eb67
RS
10any later version.
11
12GNU Emacs is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU Emacs; see the file COPYING. If not, write to
4fc5845f
LK
19the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20Boston, MA 02110-1301, USA. */
8489eb67
RS
21
22
18160b98 23#include <config.h>
8489eb67
RS
24#include <ctype.h>
25#include "lisp.h"
26#include "commands.h"
27#include "buffer.h"
93da5fff 28#include "charset.h"
e35f6ff7 29#include "keymap.h"
a1bc88d4 30#include "regex.h"
195d1361
RS
31
32/* Make syntax table lookup grant data in gl_state. */
33#define SYNTAX_ENTRY_VIA_PROPERTY
34
8489eb67 35#include "syntax.h"
195d1361
RS
36#include "intervals.h"
37
38/* We use these constants in place for comment-style and
39 string-ender-char to distinguish comments/strings started by
40 comment_fence and string_fence codes. */
41
42#define ST_COMMENT_STYLE (256 + 1)
43#define ST_STRING_STYLE (256 + 2)
93da5fff 44#include "category.h"
8489eb67 45
7bf5e9e4 46Lisp_Object Qsyntax_table_p, Qsyntax_table, Qscan_error;
8489eb67
RS
47
48int words_include_escapes;
195d1361 49int parse_sexp_lookup_properties;
8489eb67 50
bd25db08
KH
51/* Nonzero means `scan-sexps' treat all multibyte characters as symbol. */
52int multibyte_syntax_as_symbol;
53
8ea151b2
RS
54/* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
55 if not compiled with GCC. No need to mark it, since it is used
56 only very temporarily. */
57Lisp_Object syntax_temp;
58
f4ed767f
GM
59/* Non-zero means an open parenthesis in column 0 is always considered
60 to be the start of a defun. Zero means an open parenthesis in
61 column 0 has no special meaning. */
62
63int open_paren_in_column_0_is_defun_start;
64
e5d4f4dc
RS
65/* This is the internal form of the parse state used in parse-partial-sexp. */
66
67struct lisp_parse_state
68 {
195d1361
RS
69 int depth; /* Depth at end of parsing. */
70 int instring; /* -1 if not within string, else desired terminator. */
95ff8dfc 71 int incomment; /* -1 if in unnestable comment else comment nesting */
195d1361 72 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
e5d4f4dc
RS
73 int quoted; /* Nonzero if just after an escape char at end of parsing */
74 int thislevelstart; /* Char number of most recent start-of-expression at current level */
75 int prevlevelstart; /* Char number of start of containing expression */
195d1361 76 int location; /* Char number at which parsing stopped. */
e5d4f4dc 77 int mindepth; /* Minimum depth seen while scanning. */
195d1361 78 int comstr_start; /* Position just after last comment/string starter. */
1a102a58
RS
79 Lisp_Object levelstarts; /* Char numbers of starts-of-expression
80 of levels (starting from outermost). */
e5d4f4dc
RS
81 };
82\f
37bef230
RS
83/* These variables are a cache for finding the start of a defun.
84 find_start_pos is the place for which the defun start was found.
85 find_start_value is the defun start position found for it.
6a140a74 86 find_start_value_byte is the corresponding byte position.
37bef230
RS
87 find_start_buffer is the buffer it was found in.
88 find_start_begv is the BEGV value when it was found.
89 find_start_modiff is the value of MODIFF when it was found. */
90
91static int find_start_pos;
92static int find_start_value;
6a140a74 93static int find_start_value_byte;
37bef230
RS
94static struct buffer *find_start_buffer;
95static int find_start_begv;
96static int find_start_modiff;
6a140a74
RS
97
98
99static int find_defun_start P_ ((int, int));
95ff8dfc 100static int back_comment P_ ((int, int, int, int, int, int *, int *));
6a140a74 101static int char_quoted P_ ((int, int));
a1bc88d4 102static Lisp_Object skip_chars P_ ((int, int, Lisp_Object, Lisp_Object, int));
6a140a74
RS
103static Lisp_Object scan_lists P_ ((int, int, int, int));
104static void scan_sexps_forward P_ ((struct lisp_parse_state *,
105 int, int, int, int,
106 int, Lisp_Object, int));
a1bc88d4 107static int in_classes P_ ((int, Lisp_Object));
195d1361
RS
108\f
109
110struct gl_state_s gl_state; /* Global state of syntax parser. */
111
112INTERVAL interval_of ();
113#define INTERVALS_AT_ONCE 10 /* 1 + max-number of intervals
114 to scan to property-change. */
115
6a140a74
RS
116/* Update gl_state to an appropriate interval which contains CHARPOS. The
117 sign of COUNT give the relative position of CHARPOS wrt the previously
195d1361 118 valid interval. If INIT, only [be]_property fields of gl_state are
6a140a74 119 valid at start, the rest is filled basing on OBJECT.
195d1361 120
6a140a74 121 `gl_state.*_i' are the intervals, and CHARPOS is further in the search
195d1361
RS
122 direction than the intervals - or in an interval. We update the
123 current syntax-table basing on the property of this interval, and
6a140a74 124 update the interval to start further than CHARPOS - or be
195d1361 125 NULL_INTERVAL. We also update lim_property to be the next value of
6a140a74 126 charpos to call this subroutine again - or be before/after the
195d1361
RS
127 start/end of OBJECT. */
128
129void
6a140a74
RS
130update_syntax_table (charpos, count, init, object)
131 int charpos, count, init;
195d1361
RS
132 Lisp_Object object;
133{
134 Lisp_Object tmp_table;
4ffe723b 135 int cnt = 0, invalidate = 1;
5a774522 136 INTERVAL i;
195d1361
RS
137
138 if (init)
139 {
bb0de084 140 gl_state.old_prop = Qnil;
195d1361
RS
141 gl_state.start = gl_state.b_property;
142 gl_state.stop = gl_state.e_property;
bb0de084
SM
143 i = interval_of (charpos, object);
144 gl_state.backward_i = gl_state.forward_i = i;
195d1361
RS
145 invalidate = 0;
146 if (NULL_INTERVAL_P (i))
147 return;
f902a008 148 /* interval_of updates only ->position of the return value, so
d80f4cc7 149 update the parents manually to speed up update_interval. */
bb0de084 150 while (!NULL_PARENT (i))
d80f4cc7
RS
151 {
152 if (AM_RIGHT_CHILD (i))
439d5cb4 153 INTERVAL_PARENT (i)->position = i->position
d80f4cc7 154 - LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */
439d5cb4
KR
155 - TOTAL_LENGTH (INTERVAL_PARENT (i))
156 + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i));
d80f4cc7 157 else
439d5cb4 158 INTERVAL_PARENT (i)->position = i->position - LEFT_TOTAL_LENGTH (i)
d80f4cc7 159 + TOTAL_LENGTH (i);
439d5cb4 160 i = INTERVAL_PARENT (i);
d80f4cc7
RS
161 }
162 i = gl_state.forward_i;
bb0de084 163 gl_state.b_property = i->position - gl_state.offset;
ee0cdb48 164 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361
RS
165 goto update;
166 }
5a774522 167 i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
195d1361 168
423e705d 169 /* We are guaranteed to be called with CHARPOS either in i,
6a140a74 170 or further off. */
195d1361
RS
171 if (NULL_INTERVAL_P (i))
172 error ("Error in syntax_table logic for to-the-end intervals");
6a140a74 173 else if (charpos < i->position) /* Move left. */
195d1361
RS
174 {
175 if (count > 0)
f902a008 176 error ("Error in syntax_table logic for intervals <-");
195d1361 177 /* Update the interval. */
6a140a74 178 i = update_interval (i, charpos);
bb0de084 179 if (INTERVAL_LAST_POS (i) != gl_state.b_property)
195d1361
RS
180 {
181 invalidate = 0;
195d1361 182 gl_state.forward_i = i;
ee0cdb48 183 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361 184 }
423e705d 185 }
6a140a74 186 else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right. */
195d1361
RS
187 {
188 if (count < 0)
f902a008 189 error ("Error in syntax_table logic for intervals ->");
195d1361 190 /* Update the interval. */
6a140a74 191 i = update_interval (i, charpos);
bb0de084 192 if (i->position != gl_state.e_property)
195d1361
RS
193 {
194 invalidate = 0;
195d1361 195 gl_state.backward_i = i;
bb0de084 196 gl_state.b_property = i->position - gl_state.offset;
195d1361
RS
197 }
198 }
195d1361
RS
199
200 update:
201 tmp_table = textget (i->plist, Qsyntax_table);
202
203 if (invalidate)
204 invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
7d0393cf 205
423e705d
SM
206 if (invalidate) /* Did not get to adjacent interval. */
207 { /* with the same table => */
208 /* invalidate the old range. */
195d1361
RS
209 if (count > 0)
210 {
211 gl_state.backward_i = i;
bb0de084
SM
212 gl_state.b_property = i->position - gl_state.offset;
213 }
214 else
195d1361 215 {
bb0de084 216 gl_state.forward_i = i;
ee0cdb48 217 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361
RS
218 }
219 }
37bef230 220
bb0de084 221 if (!EQ (tmp_table, gl_state.old_prop))
195d1361 222 {
bb0de084
SM
223 gl_state.current_syntax_table = tmp_table;
224 gl_state.old_prop = tmp_table;
225 if (EQ (Fsyntax_table_p (tmp_table), Qt))
226 {
227 gl_state.use_global = 0;
7d0393cf 228 }
bb0de084
SM
229 else if (CONSP (tmp_table))
230 {
231 gl_state.use_global = 1;
232 gl_state.global_code = tmp_table;
233 }
7d0393cf 234 else
bb0de084
SM
235 {
236 gl_state.use_global = 0;
237 gl_state.current_syntax_table = current_buffer->syntax_table;
238 }
195d1361
RS
239 }
240
241 while (!NULL_INTERVAL_P (i))
242 {
243 if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
244 {
245 if (count > 0)
bb0de084
SM
246 {
247 gl_state.e_property = i->position - gl_state.offset;
248 gl_state.forward_i = i;
249 }
250 else
251 {
5a774522
SM
252 gl_state.b_property
253 = i->position + LENGTH (i) - gl_state.offset;
bb0de084
SM
254 gl_state.backward_i = i;
255 }
256 return;
195d1361 257 }
7d0393cf 258 else if (cnt == INTERVALS_AT_ONCE)
195d1361
RS
259 {
260 if (count > 0)
bb0de084 261 {
5a774522
SM
262 gl_state.e_property
263 = i->position + LENGTH (i) - gl_state.offset
264 /* e_property at EOB is not set to ZV but to ZV+1, so that
265 we can do INC(from);UPDATE_SYNTAX_TABLE_FORWARD without
266 having to check eob between the two. */
267 + (NULL_INTERVAL_P (next_interval (i)) ? 1 : 0);
bb0de084
SM
268 gl_state.forward_i = i;
269 }
270 else
271 {
272 gl_state.b_property = i->position - gl_state.offset;
273 gl_state.backward_i = i;
274 }
275 return;
195d1361
RS
276 }
277 cnt++;
278 i = count > 0 ? next_interval (i) : previous_interval (i);
279 }
bb0de084
SM
280 eassert (NULL_INTERVAL_P (i)); /* This property goes to the end. */
281 if (count > 0)
282 gl_state.e_property = gl_state.stop;
283 else
284 gl_state.b_property = gl_state.start;
195d1361
RS
285}
286\f
6a140a74
RS
287/* Returns TRUE if char at CHARPOS is quoted.
288 Global syntax-table data should be set up already to be good at CHARPOS
289 or after. On return global syntax data is good for lookup at CHARPOS. */
195d1361
RS
290
291static int
6a140a74
RS
292char_quoted (charpos, bytepos)
293 register int charpos, bytepos;
195d1361
RS
294{
295 register enum syntaxcode code;
296 register int beg = BEGV;
297 register int quoted = 0;
6a140a74
RS
298 int orig = charpos;
299
300 DEC_BOTH (charpos, bytepos);
195d1361 301
709a47ce 302 while (charpos >= beg)
195d1361 303 {
ab229fdd
AS
304 int c;
305
6a140a74 306 UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
ab229fdd
AS
307 c = FETCH_CHAR (bytepos);
308 code = SYNTAX (c);
6a140a74
RS
309 if (! (code == Scharquote || code == Sescape))
310 break;
311
312 DEC_BOTH (charpos, bytepos);
313 quoted = !quoted;
195d1361 314 }
6a140a74
RS
315
316 UPDATE_SYNTAX_TABLE (orig);
195d1361
RS
317 return quoted;
318}
6a140a74
RS
319
320/* Return the bytepos one character after BYTEPOS.
321 We assume that BYTEPOS is not at the end of the buffer. */
322
323INLINE int
324inc_bytepos (bytepos)
325 int bytepos;
326{
ef316cf0
RS
327 if (NILP (current_buffer->enable_multibyte_characters))
328 return bytepos + 1;
329
6a140a74
RS
330 INC_POS (bytepos);
331 return bytepos;
332}
333
334/* Return the bytepos one character before BYTEPOS.
335 We assume that BYTEPOS is not at the start of the buffer. */
336
337INLINE int
338dec_bytepos (bytepos)
339 int bytepos;
340{
ef316cf0
RS
341 if (NILP (current_buffer->enable_multibyte_characters))
342 return bytepos - 1;
343
6a140a74
RS
344 DEC_POS (bytepos);
345 return bytepos;
346}
195d1361 347\f
7cc80f0a
RS
348/* Return a defun-start position before before POS and not too far before.
349 It should be the last one before POS, or nearly the last.
350
351 When open_paren_in_column_0_is_defun_start is nonzero,
1fd1cc2f 352 only the beginning of the buffer is treated as a defun-start.
7cc80f0a
RS
353
354 We record the information about where the scan started
355 and what its result was, so that another call in the same area
356 can return the same value very quickly.
195d1361
RS
357
358 There is no promise at which position the global syntax data is
359 valid on return from the subroutine, so the caller should explicitly
360 update the global data. */
37bef230
RS
361
362static int
6a140a74
RS
363find_defun_start (pos, pos_byte)
364 int pos, pos_byte;
37bef230 365{
6a140a74 366 int opoint = PT, opoint_byte = PT_BYTE;
37bef230 367
1fd1cc2f
RS
368 if (!open_paren_in_column_0_is_defun_start)
369 {
370 find_start_value_byte = BEGV_BYTE;
371 return BEGV;
372 }
373
37bef230
RS
374 /* Use previous finding, if it's valid and applies to this inquiry. */
375 if (current_buffer == find_start_buffer
376 /* Reuse the defun-start even if POS is a little farther on.
377 POS might be in the next defun, but that's ok.
378 Our value may not be the best possible, but will still be usable. */
379 && pos <= find_start_pos + 1000
380 && pos >= find_start_value
381 && BEGV == find_start_begv
382 && MODIFF == find_start_modiff)
383 return find_start_value;
384
385 /* Back up to start of line. */
6a140a74 386 scan_newline (pos, pos_byte, BEGV, BEGV_BYTE, -1, 1);
37bef230 387
195d1361
RS
388 /* We optimize syntax-table lookup for rare updates. Thus we accept
389 only those `^\s(' which are good in global _and_ text-property
390 syntax-tables. */
391 gl_state.current_syntax_table = current_buffer->syntax_table;
392 gl_state.use_global = 0;
1fd1cc2f 393 while (PT > BEGV)
37bef230 394 {
ab229fdd
AS
395 int c;
396
1fd1cc2f
RS
397 /* Open-paren at start of line means we may have found our
398 defun-start. */
ab229fdd
AS
399 c = FETCH_CHAR (PT_BYTE);
400 if (SYNTAX (c) == Sopen)
195d1361 401 {
1fd1cc2f 402 SETUP_SYNTAX_TABLE (PT + 1, -1); /* Try again... */
ab229fdd
AS
403 c = FETCH_CHAR (PT_BYTE);
404 if (SYNTAX (c) == Sopen)
1fd1cc2f
RS
405 break;
406 /* Now fallback to the default value. */
407 gl_state.current_syntax_table = current_buffer->syntax_table;
408 gl_state.use_global = 0;
195d1361 409 }
1fd1cc2f
RS
410 /* Move to beg of previous line. */
411 scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
37bef230
RS
412 }
413
414 /* Record what we found, for the next try. */
6a140a74
RS
415 find_start_value = PT;
416 find_start_value_byte = PT_BYTE;
37bef230
RS
417 find_start_buffer = current_buffer;
418 find_start_modiff = MODIFF;
419 find_start_begv = BEGV;
420 find_start_pos = pos;
421
6a140a74
RS
422 TEMP_SET_PT_BOTH (opoint, opoint_byte);
423
37bef230
RS
424 return find_start_value;
425}
426\f
f902a008
RS
427/* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE. */
428
429static int
430prev_char_comend_first (pos, pos_byte)
431 int pos, pos_byte;
432{
433 int c, val;
434
435 DEC_BOTH (pos, pos_byte);
436 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
437 c = FETCH_CHAR (pos_byte);
438 val = SYNTAX_COMEND_FIRST (c);
439 UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
440 return val;
441}
442
443/* Return the SYNTAX_COMSTART_FIRST of the character before POS, POS_BYTE. */
444
3f679f55
SM
445/* static int
446 * prev_char_comstart_first (pos, pos_byte)
447 * int pos, pos_byte;
448 * {
449 * int c, val;
7d0393cf 450 *
3f679f55
SM
451 * DEC_BOTH (pos, pos_byte);
452 * UPDATE_SYNTAX_TABLE_BACKWARD (pos);
453 * c = FETCH_CHAR (pos_byte);
454 * val = SYNTAX_COMSTART_FIRST (c);
455 * UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
456 * return val;
457 * } */
f902a008 458
6a140a74
RS
459/* Checks whether charpos FROM is at the end of a comment.
460 FROM_BYTE is the bytepos corresponding to FROM.
461 Do not move back before STOP.
462
463 Return a positive value if we find a comment ending at FROM/FROM_BYTE;
464 return -1 otherwise.
465
466 If successful, store the charpos of the comment's beginning
467 into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
527a32d9
KH
468
469 Global syntax data remains valid for backward search starting at
470 the returned value (or at FROM, if the search was not successful). */
195d1361
RS
471
472static int
95ff8dfc 473back_comment (from, from_byte, stop, comnested, comstyle, charpos_ptr, bytepos_ptr)
6a140a74 474 int from, from_byte, stop;
95ff8dfc 475 int comnested, comstyle;
6a140a74 476 int *charpos_ptr, *bytepos_ptr;
195d1361
RS
477{
478 /* Look back, counting the parity of string-quotes,
479 and recording the comment-starters seen.
480 When we reach a safe place, assume that's not in a string;
481 then step the main scan to the earliest comment-starter seen
482 an even number of string quotes away from the safe place.
483
484 OFROM[I] is position of the earliest comment-starter seen
485 which is I+2X quotes from the comment-end.
486 PARITY is current parity of quotes from the comment end. */
3ee5041c 487 int string_style = -1; /* Presumed outside of any string. */
195d1361 488 int string_lossage = 0;
f7c436c1 489 /* Not a real lossage: indicates that we have passed a matching comment
7d0393cf 490 starter plus a non-matching comment-ender, meaning that any matching
f7c436c1
SM
491 comment-starter we might see later could be a false positive (hidden
492 inside another comment).
493 Test case: { a (* b } c (* d *) */
494 int comment_lossage = 0;
195d1361 495 int comment_end = from;
6a140a74 496 int comment_end_byte = from_byte;
195d1361 497 int comstart_pos = 0;
6a140a74 498 int comstart_byte;
eb35b628
RS
499 /* Place where the containing defun starts,
500 or 0 if we didn't come across it yet. */
501 int defun_start = 0;
502 int defun_start_byte = 0;
195d1361 503 register enum syntaxcode code;
95ff8dfc 504 int nesting = 1; /* current comment nesting */
ea315ed6 505 int c;
3f679f55
SM
506 int syntax = 0;
507
508 /* FIXME: A }} comment-ender style leads to incorrect behavior
509 in the case of {{ c }}} because we ignore the last two chars which are
510 assumed to be comment-enders although they aren't. */
195d1361
RS
511
512 /* At beginning of range to scan, we're outside of strings;
513 that determines quote parity to the comment-end. */
514 while (from != stop)
515 {
3f679f55
SM
516 int temp_byte, prev_syntax;
517 int com2start, com2end;
6a140a74 518
195d1361 519 /* Move back and examine a character. */
6a140a74 520 DEC_BOTH (from, from_byte);
195d1361
RS
521 UPDATE_SYNTAX_TABLE_BACKWARD (from);
522
3f679f55 523 prev_syntax = syntax;
6a140a74 524 c = FETCH_CHAR (from_byte);
3f679f55 525 syntax = SYNTAX_WITH_FLAGS (c);
195d1361
RS
526 code = SYNTAX (c);
527
3f679f55
SM
528 /* Check for 2-char comment markers. */
529 com2start = (SYNTAX_FLAGS_COMSTART_FIRST (syntax)
530 && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax)
531 && comstyle == SYNTAX_FLAGS_COMMENT_STYLE (prev_syntax)
532 && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax)
533 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested);
534 com2end = (SYNTAX_FLAGS_COMEND_FIRST (syntax)
535 && SYNTAX_FLAGS_COMEND_SECOND (prev_syntax));
536
537 /* Nasty cases with overlapping 2-char comment markers:
538 - snmp-mode: -- c -- foo -- c --
539 --- c --
540 ------ c --
541 - c-mode: *||*
542 |* *|* *|
543 |*| |* |*|
544 /// */
545
546 /* If a 2-char comment sequence partly overlaps with another,
547 we don't try to be clever. */
548 if (from > stop && (com2end || com2start))
195d1361 549 {
3f679f55
SM
550 int next = from, next_byte = from_byte, next_c, next_syntax;
551 DEC_BOTH (next, next_byte);
552 UPDATE_SYNTAX_TABLE_BACKWARD (next);
553 next_c = FETCH_CHAR (next_byte);
554 next_syntax = SYNTAX_WITH_FLAGS (next_c);
555 if (((com2start || comnested)
556 && SYNTAX_FLAGS_COMEND_SECOND (syntax)
557 && SYNTAX_FLAGS_COMEND_FIRST (next_syntax))
558 || ((com2end || comnested)
559 && SYNTAX_FLAGS_COMSTART_SECOND (syntax)
560 && comstyle == SYNTAX_FLAGS_COMMENT_STYLE (syntax)
561 && SYNTAX_FLAGS_COMSTART_FIRST (next_syntax)))
562 goto lossage;
563 /* UPDATE_SYNTAX_TABLE_FORWARD (next + 1); */
f902a008 564 }
3f679f55
SM
565
566 if (com2start && comstart_pos == 0)
567 /* We're looking at a comment starter. But it might be a comment
568 ender as well (see snmp-mode). The first time we see one, we
569 need to consider it as a comment starter,
570 and the subsequent times as a comment ender. */
571 com2end = 0;
572
573 /* Turn a 2-char comment sequences into the appropriate syntax. */
574 if (com2end)
575 code = Sendcomment;
576 else if (com2start)
577 code = Scomment;
578 /* Ignore comment starters of a different style. */
579 else if (code == Scomment
580 && (comstyle != SYNTAX_FLAGS_COMMENT_STYLE (syntax)
581 || SYNTAX_FLAGS_COMMENT_NESTED (syntax) != comnested))
1aa963c8 582 continue;
195d1361 583
9828a477
KH
584 /* Ignore escaped characters, except comment-enders. */
585 if (code != Sendcomment && char_quoted (from, from_byte))
195d1361
RS
586 continue;
587
3ee5041c 588 switch (code)
195d1361 589 {
3ee5041c
SM
590 case Sstring_fence:
591 case Scomment_fence:
592 c = (code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE);
593 case Sstring:
594 /* Track parity of quotes. */
595 if (string_style == -1)
596 /* Entering a string. */
597 string_style = c;
598 else if (string_style == c)
599 /* Leaving the string. */
600 string_style = -1;
601 else
602 /* If we have two kinds of string delimiters.
603 There's no way to grok this scanning backwards. */
195d1361 604 string_lossage = 1;
3ee5041c 605 break;
7d0393cf 606
3ee5041c
SM
607 case Scomment:
608 /* We've already checked that it is the relevant comstyle. */
f7c436c1 609 if (string_style != -1 || comment_lossage || string_lossage)
3ee5041c
SM
610 /* There are odd string quotes involved, so let's be careful.
611 Test case in Pascal: " { " a { " } */
612 goto lossage;
613
f7c436c1
SM
614 if (!comnested)
615 {
616 /* Record best comment-starter so far. */
617 comstart_pos = from;
618 comstart_byte = from_byte;
619 }
620 else if (--nesting <= 0)
95ff8dfc
RS
621 /* nested comments have to be balanced, so we don't need to
622 keep looking for earlier ones. We use here the same (slightly
623 incorrect) reasoning as below: since it is followed by uniform
624 paired string quotes, this comment-start has to be outside of
625 strings, else the comment-end itself would be inside a string. */
626 goto done;
3ee5041c
SM
627 break;
628
02010917 629 case Sendcomment:
3f679f55 630 if (SYNTAX_FLAGS_COMMENT_STYLE (syntax) == comstyle
d070eb22 631 && ((com2end && SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax))
3f679f55 632 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested)
02010917
SM
633 /* This is the same style of comment ender as ours. */
634 {
635 if (comnested)
636 nesting++;
637 else
638 /* Anything before that can't count because it would match
639 this comment-ender rather than ours. */
640 from = stop; /* Break out of the loop. */
641 }
f7c436c1
SM
642 else if (comstart_pos != 0 || c != '\n')
643 /* We're mixing comment styles here, so we'd better be careful.
644 The (comstart_pos != 0 || c != '\n') check is not quite correct
645 (we should just always set comment_lossage), but removing it
646 would imply that any multiline comment in C would go through
647 lossage, which seems overkill.
648 The failure should only happen in the rare cases such as
649 { (* } *) */
650 comment_lossage = 1;
02010917 651 break;
195d1361 652
02010917
SM
653 case Sopen:
654 /* Assume a defun-start point is outside of strings. */
655 if (open_paren_in_column_0_is_defun_start
656 && (from == stop
657 || (temp_byte = dec_bytepos (from_byte),
658 FETCH_CHAR (temp_byte) == '\n')))
659 {
660 defun_start = from;
661 defun_start_byte = from_byte;
662 from = stop; /* Break out of the loop. */
663 }
eb35b628 664 break;
02010917
SM
665
666 default:
f7c436c1 667 break;
eb35b628 668 }
195d1361
RS
669 }
670
671 if (comstart_pos == 0)
672 {
673 from = comment_end;
6a140a74 674 from_byte = comment_end_byte;
195d1361
RS
675 UPDATE_SYNTAX_TABLE_FORWARD (comment_end - 1);
676 }
f7c436c1
SM
677 /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
678 or `done'), then we've found the beginning of the non-nested comment. */
679 else if (1) /* !comnested */
195d1361
RS
680 {
681 from = comstart_pos;
6a140a74 682 from_byte = comstart_byte;
789f3320 683 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
195d1361
RS
684 }
685 else
686 {
3ee5041c
SM
687 struct lisp_parse_state state;
688 lossage:
195d1361
RS
689 /* We had two kinds of string delimiters mixed up
690 together. Decode this going forwards.
02010917 691 Scan fwd from a known safe place (beginning-of-defun)
195d1361
RS
692 to the one in question; this records where we
693 last passed a comment starter. */
eb35b628
RS
694 /* If we did not already find the defun start, find it now. */
695 if (defun_start == 0)
696 {
697 defun_start = find_defun_start (comment_end, comment_end_byte);
698 defun_start_byte = find_start_value_byte;
699 }
02010917 700 do
195d1361 701 {
02010917
SM
702 scan_sexps_forward (&state,
703 defun_start, defun_start_byte,
704 comment_end, -10000, 0, Qnil, 0);
705 defun_start = comment_end;
706 if (state.incomment == (comnested ? 1 : -1)
707 && state.comstyle == comstyle)
708 from = state.comstr_start;
709 else
710 {
711 from = comment_end;
712 if (state.incomment)
713 /* If comment_end is inside some other comment, maybe ours
714 is nested, so we need to try again from within the
715 surrounding comment. Example: { a (* " *) */
716 {
717 /* FIXME: We should advance by one or two chars. */
718 defun_start = state.comstr_start + 2;
719 defun_start_byte = CHAR_TO_BYTE (defun_start);
720 }
721 }
722 } while (defun_start < comment_end);
723
6a140a74 724 from_byte = CHAR_TO_BYTE (from);
195d1361
RS
725 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
726 }
7d0393cf 727
95ff8dfc 728 done:
6a140a74
RS
729 *charpos_ptr = from;
730 *bytepos_ptr = from_byte;
731
1aa963c8 732 return (from == comment_end) ? -1 : from;
195d1361
RS
733}
734\f
8489eb67 735DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
fdb82f93
PJ
736 doc: /* Return t if OBJECT is a syntax table.
737Currently, any char-table counts as a syntax table. */)
738 (object)
2203e1e8 739 Lisp_Object object;
8489eb67 740{
2203e1e8 741 if (CHAR_TABLE_P (object)
e704cb4b 742 && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
8489eb67
RS
743 return Qt;
744 return Qnil;
745}
746
8ea151b2 747static void
8489eb67
RS
748check_syntax_table (obj)
749 Lisp_Object obj;
750{
d1be9f0f 751 if (!(CHAR_TABLE_P (obj)
e704cb4b 752 && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table)))
d1be9f0f 753 wrong_type_argument (Qsyntax_table_p, obj);
7d0393cf 754}
8489eb67 755
8489eb67 756DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
fdb82f93
PJ
757 doc: /* Return the current syntax table.
758This is the one specified by the current buffer. */)
759 ()
8489eb67
RS
760{
761 return current_buffer->syntax_table;
762}
763
764DEFUN ("standard-syntax-table", Fstandard_syntax_table,
765 Sstandard_syntax_table, 0, 0, 0,
fdb82f93
PJ
766 doc: /* Return the standard syntax table.
767This is the one used for new buffers. */)
768 ()
8489eb67
RS
769{
770 return Vstandard_syntax_table;
771}
772
773DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
fdb82f93
PJ
774 doc: /* Construct a new syntax table and return it.
775It is a copy of the TABLE, which defaults to the standard syntax table. */)
776 (table)
8489eb67
RS
777 Lisp_Object table;
778{
8ea151b2
RS
779 Lisp_Object copy;
780
265a9e55 781 if (!NILP (table))
8ea151b2
RS
782 check_syntax_table (table);
783 else
784 table = Vstandard_syntax_table;
785
786 copy = Fcopy_sequence (table);
0f867324
RS
787
788 /* Only the standard syntax table should have a default element.
789 Other syntax tables should inherit from parents instead. */
790 XCHAR_TABLE (copy)->defalt = Qnil;
791
792 /* Copied syntax tables should all have parents.
793 If we copied one with no parent, such as the standard syntax table,
794 use the standard syntax table as the copy's parent. */
795 if (NILP (XCHAR_TABLE (copy)->parent))
796 Fset_char_table_parent (copy, Vstandard_syntax_table);
8ea151b2 797 return copy;
8489eb67
RS
798}
799
800DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
fdb82f93
PJ
801 doc: /* Select a new syntax table for the current buffer.
802One argument, a syntax table. */)
803 (table)
8489eb67
RS
804 Lisp_Object table;
805{
4e4dfa78 806 int idx;
8ea151b2 807 check_syntax_table (table);
8489eb67
RS
808 current_buffer->syntax_table = table;
809 /* Indicate that this buffer now has a specified syntax table. */
f6cd0527
GM
810 idx = PER_BUFFER_VAR_IDX (syntax_table);
811 SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
8489eb67
RS
812 return table;
813}
814\f
815/* Convert a letter which signifies a syntax code
816 into the code it signifies.
195d1361 817 This is used by modify-syntax-entry, and other things. */
8489eb67
RS
818
819unsigned char syntax_spec_code[0400] =
820 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
821 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
822 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
823 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
195d1361 824 (char) Swhitespace, (char) Scomment_fence, (char) Sstring, 0377,
8489eb67
RS
825 (char) Smath, 0377, 0377, (char) Squote,
826 (char) Sopen, (char) Sclose, 0377, 0377,
827 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
828 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
829 0377, 0377, 0377, 0377,
830 (char) Scomment, 0377, (char) Sendcomment, 0377,
6cb71bf6 831 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
8489eb67
RS
832 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
833 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
834 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
835 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
836 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
837 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
195d1361 838 0377, 0377, 0377, 0377, (char) Sstring_fence, 0377, 0377, 0377
8489eb67
RS
839 };
840
195d1361 841/* Indexed by syntax code, give the letter that describes it. */
8489eb67 842
195d1361 843char syntax_code_spec[16] =
8489eb67 844 {
195d1361
RS
845 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
846 '!', '|'
8489eb67 847 };
93da5fff
KH
848
849/* Indexed by syntax code, give the object (cons of syntax code and
850 nil) to be stored in syntax table. Since these objects can be
851 shared among syntax tables, we generate them in advance. By
852 sharing objects, the function `describe-syntax' can give a more
853 compact listing. */
854static Lisp_Object Vsyntax_code_object;
855
8489eb67 856\f
8ea151b2
RS
857/* Look up the value for CHARACTER in syntax table TABLE's parent
858 and its parents. SYNTAX_ENTRY calls this, when TABLE itself has nil
859 for CHARACTER. It's actually used only when not compiled with GCC. */
860
861Lisp_Object
862syntax_parent_lookup (table, character)
863 Lisp_Object table;
864 int character;
865{
866 Lisp_Object value;
867
868 while (1)
869 {
870 table = XCHAR_TABLE (table)->parent;
871 if (NILP (table))
872 return Qnil;
873
874 value = XCHAR_TABLE (table)->contents[character];
875 if (!NILP (value))
876 return value;
877 }
878}
879
8489eb67 880DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
fdb82f93
PJ
881 doc: /* Return the syntax code of CHARACTER, described by a character.
882For example, if CHARACTER is a word constituent,
883the character `w' is returned.
884The characters that correspond to various syntax codes
885are listed in the documentation of `modify-syntax-entry'. */)
886 (character)
2203e1e8 887 Lisp_Object character;
8489eb67 888{
8ea151b2 889 int char_int;
195d1361
RS
890 gl_state.current_syntax_table = current_buffer->syntax_table;
891
892 gl_state.use_global = 0;
b7826503 893 CHECK_NUMBER (character);
2203e1e8 894 char_int = XINT (character);
8ea151b2 895 return make_number (syntax_code_spec[(int) SYNTAX (char_int)]);
beefa22e
RS
896}
897
898DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
fdb82f93
PJ
899 doc: /* Return the matching parenthesis of CHARACTER, or nil if none. */)
900 (character)
2203e1e8 901 Lisp_Object character;
beefa22e 902{
8ea151b2 903 int char_int, code;
195d1361
RS
904 gl_state.current_syntax_table = current_buffer->syntax_table;
905 gl_state.use_global = 0;
b7826503 906 CHECK_NUMBER (character);
2203e1e8 907 char_int = XINT (character);
8ea151b2 908 code = SYNTAX (char_int);
a8bd7cd8 909 if (code == Sopen || code == Sclose)
2e34157c 910 return SYNTAX_MATCH (char_int);
beefa22e 911 return Qnil;
8489eb67
RS
912}
913
c65adb44 914DEFUN ("string-to-syntax", Fstring_to_syntax, Sstring_to_syntax, 1, 1, 0,
fdb82f93
PJ
915 doc: /* Convert a syntax specification STRING into syntax cell form.
916STRING should be a string as it is allowed as argument of
917`modify-syntax-entry'. Value is the equivalent cons cell
918(CODE . MATCHING-CHAR) that can be used as value of a `syntax-table'
919text property. */)
920 (string)
c65adb44 921 Lisp_Object string;
8489eb67 922{
2e567bd3 923 register const unsigned char *p;
8489eb67 924 register enum syntaxcode code;
247e20a8 925 int val;
8ea151b2 926 Lisp_Object match;
8489eb67 927
b7826503 928 CHECK_STRING (string);
8489eb67 929
d5db4077 930 p = SDATA (string);
8489eb67
RS
931 code = (enum syntaxcode) syntax_spec_code[*p++];
932 if (((int) code & 0377) == 0377)
b764a653 933 error ("Invalid syntax description letter: %c", p[-1]);
8489eb67 934
8ea151b2 935 if (code == Sinherit)
c65adb44 936 return Qnil;
8ea151b2
RS
937
938 if (*p)
d1be9f0f 939 {
93da5fff 940 int len;
7c19e8e1 941 int character = (STRING_CHAR_AND_LENGTH
d5db4077 942 (p, SBYTES (string) - 1, len));
93da5fff 943 XSETINT (match, character);
d1be9f0f
RS
944 if (XFASTINT (match) == ' ')
945 match = Qnil;
93da5fff 946 p += len;
d1be9f0f
RS
947 }
948 else
8ea151b2 949 match = Qnil;
8489eb67 950
8ea151b2 951 val = (int) code;
8489eb67
RS
952 while (*p)
953 switch (*p++)
954 {
955 case '1':
247e20a8 956 val |= 1 << 16;
8489eb67
RS
957 break;
958
959 case '2':
247e20a8 960 val |= 1 << 17;
8489eb67
RS
961 break;
962
963 case '3':
247e20a8 964 val |= 1 << 18;
8489eb67
RS
965 break;
966
967 case '4':
247e20a8 968 val |= 1 << 19;
8489eb67
RS
969 break;
970
971 case 'p':
247e20a8 972 val |= 1 << 20;
8489eb67 973 break;
e5d4f4dc
RS
974
975 case 'b':
247e20a8 976 val |= 1 << 21;
e5d4f4dc 977 break;
95ff8dfc
RS
978
979 case 'n':
980 val |= 1 << 22;
981 break;
8489eb67 982 }
7d0393cf 983
93da5fff 984 if (val < XVECTOR (Vsyntax_code_object)->size && NILP (match))
c65adb44 985 return XVECTOR (Vsyntax_code_object)->contents[val];
93da5fff
KH
986 else
987 /* Since we can't use a shared object, let's make a new one. */
c65adb44
SM
988 return Fcons (make_number (val), match);
989}
990
fdb82f93 991/* I really don't know why this is interactive
2c9e1900 992 help-form should at least be made useful whilst reading the second arg. */
7d0393cf 993DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
c65adb44 994 "cSet syntax for character: \nsSet syntax for %s to: ",
a50a10a0 995 doc: /* Set syntax for character CHAR according to string NEWENTRY.
d7ee9fab 996The syntax is changed only for table SYNTAX-TABLE, which defaults to
fdb82f93
PJ
997 the current buffer's syntax table.
998The first character of NEWENTRY should be one of the following:
999 Space or - whitespace syntax. w word constituent.
1000 _ symbol constituent. . punctuation.
1001 ( open-parenthesis. ) close-parenthesis.
1002 " string quote. \\ escape.
1003 $ paired delimiter. ' expression quote or prefix operator.
1004 < comment starter. > comment ender.
1005 / character-quote. @ inherit from `standard-syntax-table'.
1006 | generic string fence. ! generic comment fence.
1007
1008Only single-character comment start and end sequences are represented thus.
1009Two-character sequences are represented as described below.
1010The second character of NEWENTRY is the matching parenthesis,
1011 used only if the first character is `(' or `)'.
1012Any additional characters are flags.
1013Defined flags are the characters 1, 2, 3, 4, b, p, and n.
a50a10a0
PJ
1014 1 means CHAR is the start of a two-char comment start sequence.
1015 2 means CHAR is the second character of such a sequence.
1016 3 means CHAR is the start of a two-char comment end sequence.
1017 4 means CHAR is the second character of such a sequence.
fdb82f93
PJ
1018
1019There can be up to two orthogonal comment sequences. This is to support
1020language modes such as C++. By default, all comment sequences are of style
1021a, but you can set the comment sequence style to b (on the second character
1022of a comment-start, or the first character of a comment-end sequence) using
1023this flag:
a50a10a0
PJ
1024 b means CHAR is part of comment sequence b.
1025 n means CHAR is part of a nestable comment sequence.
fdb82f93 1026
a50a10a0 1027 p means CHAR is a prefix character for `backward-prefix-chars';
fdb82f93 1028 such characters are treated as whitespace when they occur
a50a10a0
PJ
1029 between expressions.
1030usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE) */)
fdb82f93 1031 (c, newentry, syntax_table)
c65adb44
SM
1032 Lisp_Object c, newentry, syntax_table;
1033{
b7826503 1034 CHECK_NUMBER (c);
c65adb44
SM
1035
1036 if (NILP (syntax_table))
1037 syntax_table = current_buffer->syntax_table;
1038 else
1039 check_syntax_table (syntax_table);
8489eb67 1040
c65adb44 1041 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), Fstring_to_syntax (newentry));
8489eb67
RS
1042 return Qnil;
1043}
1044\f
1045/* Dump syntax table to buffer in human-readable format */
1046
62abe9cb
SM
1047DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value,
1048 Sinternal_describe_syntax_value, 1, 1, 0,
1049 doc: /* Insert a description of the internal syntax description SYNTAX at point. */)
1050 (syntax)
1051 Lisp_Object syntax;
8489eb67
RS
1052{
1053 register enum syntaxcode code;
e6365855 1054 char desc, start1, start2, end1, end2, prefix, comstyle, comnested;
8489eb67 1055 char str[2];
62abe9cb 1056 Lisp_Object first, match_lisp, value = syntax;
8489eb67 1057
8ea151b2
RS
1058 if (NILP (value))
1059 {
62abe9cb
SM
1060 insert_string ("default");
1061 return syntax;
8ea151b2
RS
1062 }
1063
908b7fea
KH
1064 if (CHAR_TABLE_P (value))
1065 {
62abe9cb
SM
1066 insert_string ("deeper char-table ...");
1067 return syntax;
908b7fea
KH
1068 }
1069
8ea151b2
RS
1070 if (!CONSP (value))
1071 {
62abe9cb
SM
1072 insert_string ("invalid");
1073 return syntax;
8ea151b2
RS
1074 }
1075
c1d497be
KR
1076 first = XCAR (value);
1077 match_lisp = XCDR (value);
8ea151b2
RS
1078
1079 if (!INTEGERP (first) || !(NILP (match_lisp) || INTEGERP (match_lisp)))
8489eb67 1080 {
62abe9cb
SM
1081 insert_string ("invalid");
1082 return syntax;
8489eb67
RS
1083 }
1084
e704cb4b 1085 code = (enum syntaxcode) (XINT (first) & 0377);
8ea151b2
RS
1086 start1 = (XINT (first) >> 16) & 1;
1087 start2 = (XINT (first) >> 17) & 1;
1088 end1 = (XINT (first) >> 18) & 1;
1089 end2 = (XINT (first) >> 19) & 1;
1090 prefix = (XINT (first) >> 20) & 1;
1091 comstyle = (XINT (first) >> 21) & 1;
e6365855 1092 comnested = (XINT (first) >> 22) & 1;
8489eb67
RS
1093
1094 if ((int) code < 0 || (int) code >= (int) Smax)
1095 {
1096 insert_string ("invalid");
62abe9cb 1097 return syntax;
8489eb67
RS
1098 }
1099 desc = syntax_code_spec[(int) code];
1100
1101 str[0] = desc, str[1] = 0;
1102 insert (str, 1);
1103
93da5fff
KH
1104 if (NILP (match_lisp))
1105 insert (" ", 1);
1106 else
1107 insert_char (XINT (match_lisp));
8489eb67 1108
8489eb67
RS
1109 if (start1)
1110 insert ("1", 1);
1111 if (start2)
1112 insert ("2", 1);
1113
1114 if (end1)
1115 insert ("3", 1);
1116 if (end2)
1117 insert ("4", 1);
1118
1119 if (prefix)
1120 insert ("p", 1);
e5d4f4dc
RS
1121 if (comstyle)
1122 insert ("b", 1);
e6365855
SM
1123 if (comnested)
1124 insert ("n", 1);
8489eb67
RS
1125
1126 insert_string ("\twhich means: ");
1127
0220c518 1128 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
1129 {
1130 case Swhitespace:
1131 insert_string ("whitespace"); break;
1132 case Spunct:
1133 insert_string ("punctuation"); break;
1134 case Sword:
1135 insert_string ("word"); break;
1136 case Ssymbol:
1137 insert_string ("symbol"); break;
1138 case Sopen:
1139 insert_string ("open"); break;
1140 case Sclose:
1141 insert_string ("close"); break;
1142 case Squote:
d671382e 1143 insert_string ("prefix"); break;
8489eb67
RS
1144 case Sstring:
1145 insert_string ("string"); break;
1146 case Smath:
1147 insert_string ("math"); break;
1148 case Sescape:
1149 insert_string ("escape"); break;
1150 case Scharquote:
1151 insert_string ("charquote"); break;
1152 case Scomment:
1153 insert_string ("comment"); break;
1154 case Sendcomment:
1155 insert_string ("endcomment"); break;
d671382e
SM
1156 case Sinherit:
1157 insert_string ("inherit"); break;
1158 case Scomment_fence:
1159 insert_string ("comment fence"); break;
1160 case Sstring_fence:
1161 insert_string ("string fence"); break;
8489eb67
RS
1162 default:
1163 insert_string ("invalid");
62abe9cb 1164 return syntax;
8489eb67
RS
1165 }
1166
8ea151b2 1167 if (!NILP (match_lisp))
8489eb67
RS
1168 {
1169 insert_string (", matches ");
8ea151b2 1170 insert_char (XINT (match_lisp));
8489eb67
RS
1171 }
1172
1173 if (start1)
1174 insert_string (",\n\t is the first character of a comment-start sequence");
1175 if (start2)
1176 insert_string (",\n\t is the second character of a comment-start sequence");
1177
1178 if (end1)
1179 insert_string (",\n\t is the first character of a comment-end sequence");
1180 if (end2)
1181 insert_string (",\n\t is the second character of a comment-end sequence");
e5d4f4dc
RS
1182 if (comstyle)
1183 insert_string (" (comment style b)");
e6365855
SM
1184 if (comnested)
1185 insert_string (" (nestable)");
e5d4f4dc 1186
8489eb67
RS
1187 if (prefix)
1188 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
1189
62abe9cb
SM
1190 return syntax;
1191}
8489eb67 1192\f
195d1361
RS
1193int parse_sexp_ignore_comments;
1194
8489eb67
RS
1195/* Return the position across COUNT words from FROM.
1196 If that many words cannot be found before the end of the buffer, return 0.
1197 COUNT negative means scan backward and stop at word beginning. */
1198
dfcf069d 1199int
8489eb67
RS
1200scan_words (from, count)
1201 register int from, count;
1202{
1203 register int beg = BEGV;
1204 register int end = ZV;
6a140a74 1205 register int from_byte = CHAR_TO_BYTE (from);
93da5fff
KH
1206 register enum syntaxcode code;
1207 int ch0, ch1;
8489eb67
RS
1208
1209 immediate_quit = 1;
1210 QUIT;
1211
195d1361
RS
1212 SETUP_SYNTAX_TABLE (from, count);
1213
8489eb67
RS
1214 while (count > 0)
1215 {
1216 while (1)
1217 {
1218 if (from == end)
1219 {
1220 immediate_quit = 0;
1221 return 0;
1222 }
195d1361 1223 UPDATE_SYNTAX_TABLE_FORWARD (from);
6a140a74 1224 ch0 = FETCH_CHAR (from_byte);
93da5fff 1225 code = SYNTAX (ch0);
6a140a74 1226 INC_BOTH (from, from_byte);
8489eb67
RS
1227 if (words_include_escapes
1228 && (code == Sescape || code == Scharquote))
1229 break;
1230 if (code == Sword)
1231 break;
8489eb67 1232 }
93da5fff
KH
1233 /* Now CH0 is a character which begins a word and FROM is the
1234 position of the next character. */
8489eb67
RS
1235 while (1)
1236 {
1237 if (from == end) break;
195d1361 1238 UPDATE_SYNTAX_TABLE_FORWARD (from);
6a140a74 1239 ch1 = FETCH_CHAR (from_byte);
93da5fff 1240 code = SYNTAX (ch1);
8489eb67
RS
1241 if (!(words_include_escapes
1242 && (code == Sescape || code == Scharquote)))
93da5fff 1243 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
8489eb67 1244 break;
6a140a74 1245 INC_BOTH (from, from_byte);
93da5fff 1246 ch0 = ch1;
8489eb67
RS
1247 }
1248 count--;
1249 }
1250 while (count < 0)
1251 {
1252 while (1)
1253 {
1254 if (from == beg)
1255 {
1256 immediate_quit = 0;
1257 return 0;
1258 }
6a140a74 1259 DEC_BOTH (from, from_byte);
195d1361 1260 UPDATE_SYNTAX_TABLE_BACKWARD (from);
6a140a74 1261 ch1 = FETCH_CHAR (from_byte);
93da5fff 1262 code = SYNTAX (ch1);
8489eb67
RS
1263 if (words_include_escapes
1264 && (code == Sescape || code == Scharquote))
1265 break;
1266 if (code == Sword)
1267 break;
8489eb67 1268 }
93da5fff
KH
1269 /* Now CH1 is a character which ends a word and FROM is the
1270 position of it. */
8489eb67
RS
1271 while (1)
1272 {
6a140a74
RS
1273 int temp_byte;
1274
1275 if (from == beg)
1276 break;
1277 temp_byte = dec_bytepos (from_byte);
195d1361 1278 UPDATE_SYNTAX_TABLE_BACKWARD (from);
6a140a74 1279 ch0 = FETCH_CHAR (temp_byte);
93da5fff 1280 code = SYNTAX (ch0);
8489eb67
RS
1281 if (!(words_include_escapes
1282 && (code == Sescape || code == Scharquote)))
93da5fff 1283 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
8489eb67 1284 break;
6a140a74 1285 DEC_BOTH (from, from_byte);
93da5fff 1286 ch1 = ch0;
8489eb67
RS
1287 }
1288 count++;
1289 }
1290
1291 immediate_quit = 0;
1292
1293 return from;
1294}
1295
29e16385 1296DEFUN ("forward-word", Fforward_word, Sforward_word, 0, 1, "p",
fdb82f93
PJ
1297 doc: /* Move point forward ARG words (backward if ARG is negative).
1298Normally returns t.
1299If an edge of the buffer or a field boundary is reached, point is left there
1300and the function returns nil. Field boundaries are not noticed if
1301`inhibit-field-text-motion' is non-nil. */)
29e16385
RS
1302 (arg)
1303 Lisp_Object arg;
8489eb67 1304{
c096ae4d 1305 Lisp_Object tmp;
2c6ea900 1306 int orig_val, val;
8489eb67 1307
29e16385
RS
1308 if (NILP (arg))
1309 XSETFASTINT (arg, 1);
1310 else
1311 CHECK_NUMBER (arg);
1312
1313 val = orig_val = scan_words (PT, XINT (arg));
2c6ea900 1314 if (! orig_val)
29e16385 1315 val = XINT (arg) > 0 ? ZV : BEGV;
5878ee6f 1316
b8855607 1317 /* Avoid jumping out of an input field. */
c096ae4d
SM
1318 tmp = Fconstrain_to_field (make_number (val), make_number (PT),
1319 Qt, Qnil, Qnil);
1320 val = XFASTINT (tmp);
7d0393cf 1321
8489eb67 1322 SET_PT (val);
e6d8341f 1323 return val == orig_val ? Qt : Qnil;
8489eb67
RS
1324}
1325\f
195d1361
RS
1326Lisp_Object skip_chars ();
1327
1328DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
fdb82f93
PJ
1329 doc: /* Move point forward, stopping before a char not in STRING, or at pos LIM.
1330STRING is like the inside of a `[...]' in a regular expression
1331except that `]' is never special and `\\' quotes `^', `-' or `\\'
1332 (but not as the end of a range; quoting is never needed there).
1333Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1334With arg "^a-zA-Z", skips nonletters stopping before first letter.
a1bc88d4
RS
1335Char classes, e.g. `[:alpha:]', are supported.
1336
1337Returns the distance traveled, either zero or positive. */)
fdb82f93 1338 (string, lim)
195d1361
RS
1339 Lisp_Object string, lim;
1340{
a1bc88d4 1341 return skip_chars (1, 0, string, lim, 1);
195d1361
RS
1342}
1343
1344DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
fdb82f93
PJ
1345 doc: /* Move point backward, stopping after a char not in STRING, or at pos LIM.
1346See `skip-chars-forward' for details.
1347Returns the distance traveled, either zero or negative. */)
1348 (string, lim)
195d1361
RS
1349 Lisp_Object string, lim;
1350{
a1bc88d4 1351 return skip_chars (0, 0, string, lim, 1);
195d1361
RS
1352}
1353
1354DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
fdb82f93
PJ
1355 doc: /* Move point forward across chars in specified syntax classes.
1356SYNTAX is a string of syntax code characters.
1357Stop before a char whose syntax is not in SYNTAX, or at position LIM.
1358If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1359This function returns the distance traveled, either zero or positive. */)
1360 (syntax, lim)
195d1361
RS
1361 Lisp_Object syntax, lim;
1362{
a1bc88d4 1363 return skip_chars (1, 1, syntax, lim, 0);
195d1361
RS
1364}
1365
1366DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
fdb82f93
PJ
1367 doc: /* Move point backward across chars in specified syntax classes.
1368SYNTAX is a string of syntax code characters.
1369Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.
1370If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1371This function returns the distance traveled, either zero or negative. */)
1372 (syntax, lim)
195d1361
RS
1373 Lisp_Object syntax, lim;
1374{
a1bc88d4 1375 return skip_chars (0, 1, syntax, lim, 0);
195d1361
RS
1376}
1377
6a140a74 1378static Lisp_Object
a1bc88d4 1379skip_chars (forwardp, syntaxp, string, lim, handle_iso_classes)
195d1361
RS
1380 int forwardp, syntaxp;
1381 Lisp_Object string, lim;
a1bc88d4 1382 int handle_iso_classes;
195d1361 1383{
195d1361
RS
1384 register unsigned int c;
1385 unsigned char fastmap[0400];
7e68b0ea 1386 /* If SYNTAXP is 0, STRING may contain multi-byte form of characters
9690d026
KH
1387 of which codes don't fit in FASTMAP. In that case, set the
1388 ranges of characters in CHAR_RANGES. */
5df4982e 1389 int *char_ranges;
7e68b0ea 1390 int n_char_ranges = 0;
195d1361 1391 int negate = 0;
4101e6fe 1392 register int i, i_byte;
7e68b0ea 1393 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1674d9a2
RS
1394 int string_multibyte;
1395 int size_byte;
2e567bd3 1396 const unsigned char *str;
82d497fc 1397 int len;
a1bc88d4 1398 Lisp_Object iso_classes;
195d1361 1399
b7826503 1400 CHECK_STRING (string);
d5db4077 1401 char_ranges = (int *) alloca (SCHARS (string) * (sizeof (int)) * 2);
1674d9a2 1402 string_multibyte = STRING_MULTIBYTE (string);
d5db4077
KR
1403 str = SDATA (string);
1404 size_byte = SBYTES (string);
a1bc88d4 1405 iso_classes = Qnil;
195d1361 1406
82d497fc
KH
1407 /* Adjust the multibyteness of the string to that of the buffer. */
1408 if (multibyte != string_multibyte)
1409 {
1410 int nbytes;
1411
1412 if (multibyte)
d5db4077
KR
1413 nbytes = count_size_as_multibyte (SDATA (string),
1414 SCHARS (string));
82d497fc 1415 else
d5db4077 1416 nbytes = SCHARS (string);
82d497fc
KH
1417 if (nbytes != size_byte)
1418 {
2e567bd3
KR
1419 unsigned char *tmp = (unsigned char *) alloca (nbytes);
1420 copy_text (SDATA (string), tmp, size_byte,
82d497fc
KH
1421 string_multibyte, multibyte);
1422 size_byte = nbytes;
2e567bd3 1423 str = tmp;
82d497fc
KH
1424 }
1425 }
1426
195d1361
RS
1427 if (NILP (lim))
1428 XSETINT (lim, forwardp ? ZV : BEGV);
1429 else
b7826503 1430 CHECK_NUMBER_COERCE_MARKER (lim);
195d1361
RS
1431
1432 /* In any case, don't allow scan outside bounds of buffer. */
195d1361
RS
1433 if (XINT (lim) > ZV)
1434 XSETFASTINT (lim, ZV);
1435 if (XINT (lim) < BEGV)
1436 XSETFASTINT (lim, BEGV);
1437
195d1361
RS
1438 bzero (fastmap, sizeof fastmap);
1439
82d497fc 1440 i_byte = 0;
4101e6fe 1441
13090112 1442 if (i_byte < size_byte
d5db4077 1443 && SREF (string, 0) == '^')
195d1361 1444 {
82d497fc 1445 negate = 1; i_byte++;
195d1361
RS
1446 }
1447
1448 /* Find the characters specified and set their elements of fastmap.
1449 If syntaxp, each character counts as itself.
1450 Otherwise, handle backslashes and ranges specially. */
1451
13090112 1452 while (i_byte < size_byte)
195d1361 1453 {
82d497fc
KH
1454 c = STRING_CHAR_AND_LENGTH (str + i_byte, size_byte - i_byte, len);
1455 i_byte += len;
4101e6fe 1456
195d1361 1457 if (syntaxp)
4101e6fe 1458 fastmap[syntax_spec_code[c & 0377]] = 1;
195d1361
RS
1459 else
1460 {
a1bc88d4
RS
1461 if (handle_iso_classes && c == '['
1462 && i_byte < size_byte
1463 && STRING_CHAR (str + i_byte, size_byte - i_byte) == ':')
1464 {
1465 const unsigned char *class_beg = str + i_byte + 1;
1466 const unsigned char *class_end = class_beg;
b3bda4fd 1467 const unsigned char *class_limit = str + size_byte - 2;
a1bc88d4
RS
1468 /* Leave room for the null. */
1469 unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1470 re_wctype_t cc;
1471
1472 if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1473 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1474
b3bda4fd
KS
1475 while (class_end < class_limit
1476 && *class_end >= 'a' && *class_end <= 'z')
a1bc88d4
RS
1477 class_end++;
1478
b3bda4fd
KS
1479 if (class_end == class_beg
1480 || *class_end != ':' || class_end[1] != ']')
1481 goto not_a_class_name;
a1bc88d4
RS
1482
1483 bcopy (class_beg, class_name, class_end - class_beg);
1484 class_name[class_end - class_beg] = 0;
1485
1486 cc = re_wctype (class_name);
1487 if (cc == 0)
1488 error ("Invalid ISO C character class");
1489
1490 iso_classes = Fcons (make_number (cc), iso_classes);
1491
1492 i_byte = class_end + 2 - str;
1493 continue;
1494 }
1495
b3bda4fd 1496 not_a_class_name:
195d1361
RS
1497 if (c == '\\')
1498 {
13090112 1499 if (i_byte == size_byte)
4101e6fe
RS
1500 break;
1501
57951c2e
AS
1502 c = STRING_CHAR_AND_LENGTH (str + i_byte,
1503 size_byte - i_byte, len);
82d497fc 1504 i_byte += len;
195d1361 1505 }
57951c2e
AS
1506 /* Treat `-' as range character only if another character
1507 follows. */
1508 if (i_byte + 1 < size_byte
82d497fc 1509 && str[i_byte] == '-')
195d1361 1510 {
9690d026 1511 unsigned int c2;
4101e6fe
RS
1512
1513 /* Skip over the dash. */
82d497fc 1514 i_byte++;
4101e6fe 1515
4101e6fe 1516 /* Get the end of the range. */
57951c2e
AS
1517 c2 = STRING_CHAR_AND_LENGTH (str + i_byte,
1518 size_byte - i_byte, len);
82d497fc 1519 i_byte += len;
7e68b0ea 1520
4101e6fe 1521 if (SINGLE_BYTE_CHAR_P (c))
06274af5
KH
1522 {
1523 if (! SINGLE_BYTE_CHAR_P (c2))
e39091b6 1524 {
7ae339a7
KH
1525 /* Handle a range starting with a character of
1526 less than 256, and ending with a character of
1527 not less than 256. Split that into two
1528 ranges, the low one ending at 0377, and the
1529 high one starting at the smallest character
1530 in the charset of C2 and ending at C2. */
e39091b6
KH
1531 int charset = CHAR_CHARSET (c2);
1532 int c1 = MAKE_CHAR (charset, 0, 0);
1533
e39091b6
KH
1534 char_ranges[n_char_ranges++] = c1;
1535 char_ranges[n_char_ranges++] = c2;
465cf84c 1536 c2 = 0377;
e39091b6 1537 }
06274af5
KH
1538 while (c <= c2)
1539 {
1540 fastmap[c] = 1;
1541 c++;
1542 }
1543 }
9690d026 1544 else if (c <= c2) /* Both C and C2 are multibyte char. */
195d1361 1545 {
9690d026
KH
1546 char_ranges[n_char_ranges++] = c;
1547 char_ranges[n_char_ranges++] = c2;
195d1361 1548 }
195d1361
RS
1549 }
1550 else
7e68b0ea 1551 {
e39091b6
KH
1552 if (SINGLE_BYTE_CHAR_P (c))
1553 fastmap[c] = 1;
1554 else
7e68b0ea 1555 {
4101e6fe
RS
1556 char_ranges[n_char_ranges++] = c;
1557 char_ranges[n_char_ranges++] = c;
7e68b0ea
RS
1558 }
1559 }
195d1361
RS
1560 }
1561 }
1562
9690d026 1563 /* If ^ was the first character, complement the fastmap. */
195d1361
RS
1564 if (negate)
1565 for (i = 0; i < sizeof fastmap; i++)
9690d026 1566 fastmap[i] ^= 1;
195d1361
RS
1567
1568 {
1569 int start_point = PT;
1570 int pos = PT;
6a140a74 1571 int pos_byte = PT_BYTE;
9af7511a
KH
1572 unsigned char *p = PT_ADDR, *endp, *stop;
1573
1574 if (forwardp)
1575 {
1576 endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
d7ee9fab 1577 stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
9af7511a
KH
1578 }
1579 else
1580 {
1581 endp = CHAR_POS_ADDR (XINT (lim));
d7ee9fab 1582 stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
9af7511a 1583 }
195d1361
RS
1584
1585 immediate_quit = 1;
1586 if (syntaxp)
1587 {
1588 SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
1589 if (forwardp)
1590 {
7e68b0ea 1591 if (multibyte)
9af7511a
KH
1592 while (1)
1593 {
1594 int nbytes;
1595
1596 if (p >= stop)
ee0cdb48 1597 {
9af7511a
KH
1598 if (p >= endp)
1599 break;
1600 p = GAP_END_ADDR;
1601 stop = endp;
ee0cdb48 1602 }
9af7511a
KH
1603 c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes);
1604 if (! fastmap[(int) SYNTAX (c)])
1605 break;
1606 p += nbytes, pos++, pos_byte += nbytes;
1607 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1608 }
7e68b0ea 1609 else
9af7511a
KH
1610 while (1)
1611 {
1612 if (p >= stop)
1613 {
1614 if (p >= endp)
1615 break;
1616 p = GAP_END_ADDR;
1617 stop = endp;
1618 }
1619 if (! fastmap[(int) SYNTAX (*p)])
1620 break;
1621 p++, pos++;
1622 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1623 }
195d1361
RS
1624 }
1625 else
1626 {
7e68b0ea 1627 if (multibyte)
9af7511a
KH
1628 while (1)
1629 {
1630 unsigned char *prev_p;
1631 int nbytes;
1632
1633 if (p <= stop)
1634 {
1635 if (p <= endp)
7e68b0ea 1636 break;
9af7511a
KH
1637 p = GPT_ADDR;
1638 stop = endp;
1639 }
1640 prev_p = p;
1641 while (--p >= stop && ! CHAR_HEAD_P (*p));
1642 PARSE_MULTIBYTE_SEQ (p, MAX_MULTIBYTE_LENGTH, nbytes);
1643 if (prev_p - p > nbytes)
1644 p = prev_p - 1, c = *p, nbytes = 1;
1645 else
1646 c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH);
1647 pos--, pos_byte -= nbytes;
1648 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
1649 if (! fastmap[(int) SYNTAX (c)])
1650 {
1651 pos++;
1652 pos_byte += nbytes;
1653 break;
1654 }
1655 }
7e68b0ea 1656 else
9af7511a
KH
1657 while (1)
1658 {
1659 if (p <= stop)
ee0cdb48 1660 {
9af7511a 1661 if (p <= endp)
4c8535eb 1662 break;
9af7511a
KH
1663 p = GPT_ADDR;
1664 stop = endp;
ee0cdb48 1665 }
9af7511a
KH
1666 if (! fastmap[(int) SYNTAX (p[-1])])
1667 break;
1668 p--, pos--;
1669 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
1670 }
195d1361
RS
1671 }
1672 }
1673 else
1674 {
1675 if (forwardp)
1676 {
7e68b0ea 1677 if (multibyte)
9af7511a 1678 while (1)
7e68b0ea 1679 {
9af7511a
KH
1680 int nbytes;
1681
1682 if (p >= stop)
1683 {
1684 if (p >= endp)
1685 break;
1686 p = GAP_END_ADDR;
1687 stop = endp;
1688 }
1689 c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes);
a1bc88d4
RS
1690
1691 if (! NILP (iso_classes) && in_classes (c, iso_classes))
1692 {
1693 if (negate)
1694 break;
1695 else
1696 goto fwd_ok;
1697 }
1698
9690d026 1699 if (SINGLE_BYTE_CHAR_P (c))
7e68b0ea 1700 {
9690d026
KH
1701 if (!fastmap[c])
1702 break;
1703 }
1704 else
1705 {
1706 /* If we are looking at a multibyte character,
1707 we must look up the character in the table
1708 CHAR_RANGES. If there's no data in the
1709 table, that character is not what we want to
1710 skip. */
1711
e39091b6
KH
1712 /* The following code do the right thing even if
1713 n_char_ranges is zero (i.e. no data in
1714 CHAR_RANGES). */
7e68b0ea 1715 for (i = 0; i < n_char_ranges; i += 2)
e39091b6 1716 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
7e68b0ea
RS
1717 break;
1718 if (!(negate ^ (i < n_char_ranges)))
1719 break;
7e68b0ea 1720 }
a1bc88d4 1721 fwd_ok:
9af7511a 1722 p += nbytes, pos++, pos_byte += nbytes;
7e68b0ea
RS
1723 }
1724 else
9af7511a
KH
1725 while (1)
1726 {
1727 if (p >= stop)
1728 {
1729 if (p >= endp)
1730 break;
1731 p = GAP_END_ADDR;
1732 stop = endp;
1733 }
a1bc88d4
RS
1734
1735 if (!NILP (iso_classes) && in_classes (*p, iso_classes))
1736 {
1737 if (negate)
1738 break;
1739 else
1ad6e4db 1740 goto fwd_unibyte_ok;
a1bc88d4
RS
1741 }
1742
4e837cd0 1743 if (!fastmap[*p])
9af7511a 1744 break;
1ad6e4db 1745 fwd_unibyte_ok:
9af7511a
KH
1746 p++, pos++;
1747 }
195d1361
RS
1748 }
1749 else
1750 {
7e68b0ea 1751 if (multibyte)
9af7511a 1752 while (1)
7e68b0ea 1753 {
9af7511a
KH
1754 unsigned char *prev_p;
1755 int nbytes;
e39091b6 1756
9af7511a
KH
1757 if (p <= stop)
1758 {
1759 if (p <= endp)
1760 break;
1761 p = GPT_ADDR;
1762 stop = endp;
1763 }
1764 prev_p = p;
1765 while (--p >= stop && ! CHAR_HEAD_P (*p));
1766 PARSE_MULTIBYTE_SEQ (p, MAX_MULTIBYTE_LENGTH, nbytes);
1767 if (prev_p - p > nbytes)
1768 p = prev_p - 1, c = *p, nbytes = 1;
1769 else
1770 c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH);
a1bc88d4
RS
1771
1772 if (! NILP (iso_classes) && in_classes (c, iso_classes))
1773 {
1774 if (negate)
1775 break;
1776 else
1777 goto back_ok;
1778 }
1779
9690d026
KH
1780 if (SINGLE_BYTE_CHAR_P (c))
1781 {
1782 if (!fastmap[c])
1783 break;
1784 }
1785 else
7e68b0ea 1786 {
9690d026 1787 /* See the comment in the previous similar code. */
e39091b6
KH
1788 for (i = 0; i < n_char_ranges; i += 2)
1789 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1790 break;
1791 if (!(negate ^ (i < n_char_ranges)))
1792 break;
7e68b0ea 1793 }
a1bc88d4 1794 back_ok:
9af7511a 1795 pos--, pos_byte -= nbytes;
7e68b0ea
RS
1796 }
1797 else
9af7511a
KH
1798 while (1)
1799 {
1800 if (p <= stop)
1801 {
1802 if (p <= endp)
1803 break;
1804 p = GPT_ADDR;
1805 stop = endp;
1806 }
a1bc88d4
RS
1807
1808 if (! NILP (iso_classes) && in_classes (p[-1], iso_classes))
1809 {
1810 if (negate)
1811 break;
1812 else
1ad6e4db 1813 goto back_unibyte_ok;
a1bc88d4
RS
1814 }
1815
4e837cd0 1816 if (!fastmap[p[-1]])
9af7511a 1817 break;
1ad6e4db 1818 back_unibyte_ok:
9af7511a
KH
1819 p--, pos--;
1820 }
195d1361
RS
1821 }
1822 }
7e68b0ea 1823
6a140a74
RS
1824#if 0 /* Not needed now that a position in mid-character
1825 cannot be specified in Lisp. */
7e68b0ea
RS
1826 if (multibyte
1827 /* INC_POS or DEC_POS might have moved POS over LIM. */
1828 && (forwardp ? (pos > XINT (lim)) : (pos < XINT (lim))))
1829 pos = XINT (lim);
6a140a74 1830#endif
7e68b0ea 1831
6a140a74
RS
1832 if (! multibyte)
1833 pos_byte = pos;
1834
1835 SET_PT_BOTH (pos, pos_byte);
195d1361
RS
1836 immediate_quit = 0;
1837
1838 return make_number (PT - start_point);
1839 }
1840}
a1bc88d4
RS
1841
1842/* Return 1 if character C belongs to one of the ISO classes
1843 in the list ISO_CLASSES. Each class is represented by an
1844 integer which is its type according to re_wctype. */
1845
1846static int
1847in_classes (c, iso_classes)
1848 int c;
1849 Lisp_Object iso_classes;
1850{
1851 int fits_class = 0;
1852
1853 while (! NILP (iso_classes))
1854 {
1855 Lisp_Object elt;
1856 elt = XCAR (iso_classes);
1857 iso_classes = XCDR (iso_classes);
1858
1859 if (re_iswctype (c, XFASTINT (elt)))
1860 fits_class = 1;
1861 }
1862
1863 return fits_class;
1864}
195d1361 1865\f
95ff8dfc
RS
1866/* Jump over a comment, assuming we are at the beginning of one.
1867 FROM is the current position.
1868 FROM_BYTE is the bytepos corresponding to FROM.
1869 Do not move past STOP (a charpos).
1870 The comment over which we have to jump is of style STYLE
1871 (either SYNTAX_COMMENT_STYLE(foo) or ST_COMMENT_STYLE).
1872 NESTING should be positive to indicate the nesting at the beginning
1873 for nested comments and should be zero or negative else.
1874 ST_COMMENT_STYLE cannot be nested.
1875 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
1876 (or 0 If the search cannot start in the middle of a two-character).
1877
1878 If successful, return 1 and store the charpos of the comment's end
1879 into *CHARPOS_PTR and the corresponding bytepos into *BYTEPOS_PTR.
1880 Else, return 0 and store the charpos STOP into *CHARPOS_PTR, the
1881 corresponding bytepos into *BYTEPOS_PTR and the current nesting
1882 (as defined for state.incomment) in *INCOMMENT_PTR.
1883
1884 The comment end is the last character of the comment rather than the
1885 character just after the comment.
1886
1887 Global syntax data is assumed to initially be valid for FROM and
1888 remains valid for forward search starting at the returned position. */
1889
1890static int
1891forw_comment (from, from_byte, stop, nesting, style, prev_syntax,
1892 charpos_ptr, bytepos_ptr, incomment_ptr)
1893 int from, from_byte, stop;
1894 int nesting, style, prev_syntax;
1895 int *charpos_ptr, *bytepos_ptr, *incomment_ptr;
1896{
1897 register int c, c1;
1898 register enum syntaxcode code;
1899 register int syntax;
1900
1901 if (nesting <= 0) nesting = -1;
1902
1903 /* Enter the loop in the middle so that we find
1904 a 2-char comment ender if we start in the middle of it. */
1905 syntax = prev_syntax;
1906 if (syntax != 0) goto forw_incomment;
1907
1908 while (1)
1909 {
1910 if (from == stop)
1911 {
1912 *incomment_ptr = nesting;
1913 *charpos_ptr = from;
1914 *bytepos_ptr = from_byte;
1915 return 0;
1916 }
1917 c = FETCH_CHAR (from_byte);
1918 syntax = SYNTAX_WITH_FLAGS (c);
1919 code = syntax & 0xff;
1920 if (code == Sendcomment
1921 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
abf8a9ff
SM
1922 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ?
1923 (nesting > 0 && --nesting == 0) : nesting < 0))
95ff8dfc
RS
1924 /* we have encountered a comment end of the same style
1925 as the comment sequence which began this comment
1926 section */
1927 break;
1928 if (code == Scomment_fence
1929 && style == ST_COMMENT_STYLE)
1930 /* we have encountered a comment end of the same style
1931 as the comment sequence which began this comment
1932 section. */
1933 break;
1934 if (nesting > 0
1935 && code == Scomment
abf8a9ff 1936 && SYNTAX_FLAGS_COMMENT_NESTED (syntax)
95ff8dfc
RS
1937 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style)
1938 /* we have encountered a nested comment of the same style
1939 as the comment sequence which began this comment section */
1940 nesting++;
1941 INC_BOTH (from, from_byte);
1942 UPDATE_SYNTAX_TABLE_FORWARD (from);
7d0393cf 1943
95ff8dfc
RS
1944 forw_incomment:
1945 if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
1946 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
1947 && (c1 = FETCH_CHAR (from_byte),
abf8a9ff
SM
1948 SYNTAX_COMEND_SECOND (c1))
1949 && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
1950 SYNTAX_COMMENT_NESTED (c1)) ? nesting > 0 : nesting < 0))
4ffe723b
GM
1951 {
1952 if (--nesting <= 0)
1953 /* we have encountered a comment end of the same style
1954 as the comment sequence which began this comment
1955 section */
1956 break;
1957 else
1958 {
1959 INC_BOTH (from, from_byte);
1960 UPDATE_SYNTAX_TABLE_FORWARD (from);
1961 }
1962 }
95ff8dfc
RS
1963 if (nesting > 0
1964 && from < stop
1965 && SYNTAX_FLAGS_COMSTART_FIRST (syntax)
1966 && (c1 = FETCH_CHAR (from_byte),
1967 SYNTAX_COMMENT_STYLE (c1) == style
abf8a9ff
SM
1968 && SYNTAX_COMSTART_SECOND (c1))
1969 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
1970 SYNTAX_COMMENT_NESTED (c1)))
95ff8dfc
RS
1971 /* we have encountered a nested comment of the same style
1972 as the comment sequence which began this comment
1973 section */
1974 {
1975 INC_BOTH (from, from_byte);
1976 UPDATE_SYNTAX_TABLE_FORWARD (from);
1977 nesting++;
1978 }
1979 }
1980 *charpos_ptr = from;
1981 *bytepos_ptr = from_byte;
1982 return 1;
1983}
1984
b3cfe0c8 1985DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
177c0ea7 1986 doc: /*
ee648c11 1987Move forward across up to COUNT comments. If COUNT is negative, move backward.
fdb82f93
PJ
1988Stop scanning if we find something other than a comment or whitespace.
1989Set point to where scanning stops.
ee648c11 1990If COUNT comments are found as expected, with nothing except whitespace
fdb82f93
PJ
1991between them, return t; otherwise return nil. */)
1992 (count)
840f481c 1993 Lisp_Object count;
b3cfe0c8
RS
1994{
1995 register int from;
6a140a74 1996 int from_byte;
b3cfe0c8 1997 register int stop;
8ea151b2 1998 register int c, c1;
b3cfe0c8
RS
1999 register enum syntaxcode code;
2000 int comstyle = 0; /* style of comment encountered */
95ff8dfc 2001 int comnested = 0; /* whether the comment is nestable or not */
be720845 2002 int found;
840f481c 2003 int count1;
6a140a74 2004 int out_charpos, out_bytepos;
95ff8dfc 2005 int dummy;
840f481c 2006
b7826503 2007 CHECK_NUMBER (count);
840f481c 2008 count1 = XINT (count);
195d1361 2009 stop = count1 > 0 ? ZV : BEGV;
b3cfe0c8
RS
2010
2011 immediate_quit = 1;
2012 QUIT;
2013
2014 from = PT;
6a140a74 2015 from_byte = PT_BYTE;
b3cfe0c8 2016
195d1361 2017 SETUP_SYNTAX_TABLE (from, count1);
840f481c 2018 while (count1 > 0)
b3cfe0c8 2019 {
04882296 2020 do
b3cfe0c8 2021 {
f902a008
RS
2022 int comstart_first;
2023
04882296
KH
2024 if (from == stop)
2025 {
ef316cf0 2026 SET_PT_BOTH (from, from_byte);
b7e6e612 2027 immediate_quit = 0;
04882296
KH
2028 return Qnil;
2029 }
6a140a74 2030 c = FETCH_CHAR (from_byte);
b3cfe0c8 2031 code = SYNTAX (c);
f902a008 2032 comstart_first = SYNTAX_COMSTART_FIRST (c);
95ff8dfc 2033 comnested = SYNTAX_COMMENT_NESTED (c);
e6365855 2034 comstyle = SYNTAX_COMMENT_STYLE (c);
6a140a74 2035 INC_BOTH (from, from_byte);
f902a008 2036 UPDATE_SYNTAX_TABLE_FORWARD (from);
f902a008 2037 if (from < stop && comstart_first
6a140a74 2038 && (c1 = FETCH_CHAR (from_byte),
8ea151b2 2039 SYNTAX_COMSTART_SECOND (c1)))
b3cfe0c8 2040 {
7d0393cf 2041 /* We have encountered a comment start sequence and we
7fc8191e 2042 are ignoring all text inside comments. We must record
b3cfe0c8
RS
2043 the comment style this sequence begins so that later,
2044 only a comment end of the same style actually ends
7fc8191e 2045 the comment section. */
b3cfe0c8 2046 code = Scomment;
8ea151b2 2047 comstyle = SYNTAX_COMMENT_STYLE (c1);
95ff8dfc 2048 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
6a140a74 2049 INC_BOTH (from, from_byte);
95ff8dfc 2050 UPDATE_SYNTAX_TABLE_FORWARD (from);
b3cfe0c8 2051 }
04882296 2052 }
abf8a9ff 2053 while (code == Swhitespace || (code == Sendcomment && c == '\n'));
6a140a74 2054
da71ab91
KH
2055 if (code == Scomment_fence)
2056 comstyle = ST_COMMENT_STYLE;
2057 else if (code != Scomment)
04882296
KH
2058 {
2059 immediate_quit = 0;
6a140a74 2060 DEC_BOTH (from, from_byte);
ef316cf0 2061 SET_PT_BOTH (from, from_byte);
04882296
KH
2062 return Qnil;
2063 }
2064 /* We're at the start of a comment. */
95ff8dfc
RS
2065 found = forw_comment (from, from_byte, stop, comnested, comstyle, 0,
2066 &out_charpos, &out_bytepos, &dummy);
2067 from = out_charpos; from_byte = out_bytepos;
2068 if (!found)
04882296 2069 {
95ff8dfc
RS
2070 immediate_quit = 0;
2071 SET_PT_BOTH (from, from_byte);
2072 return Qnil;
b3cfe0c8 2073 }
95ff8dfc
RS
2074 INC_BOTH (from, from_byte);
2075 UPDATE_SYNTAX_TABLE_FORWARD (from);
04882296 2076 /* We have skipped one comment. */
840f481c 2077 count1--;
b3cfe0c8
RS
2078 }
2079
840f481c 2080 while (count1 < 0)
b3cfe0c8 2081 {
b9145dbb 2082 while (1)
b3cfe0c8 2083 {
c65adb44 2084 int quoted;
f902a008 2085
b9145dbb
RS
2086 if (from <= stop)
2087 {
6a140a74 2088 SET_PT_BOTH (BEGV, BEGV_BYTE);
b9145dbb
RS
2089 immediate_quit = 0;
2090 return Qnil;
2091 }
b3cfe0c8 2092
6a140a74 2093 DEC_BOTH (from, from_byte);
f902a008 2094 /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */
6a140a74 2095 quoted = char_quoted (from, from_byte);
6a140a74 2096 c = FETCH_CHAR (from_byte);
b3cfe0c8
RS
2097 code = SYNTAX (c);
2098 comstyle = 0;
95ff8dfc 2099 comnested = SYNTAX_COMMENT_NESTED (c);
7fc8191e
RS
2100 if (code == Sendcomment)
2101 comstyle = SYNTAX_COMMENT_STYLE (c);
b3cfe0c8 2102 if (from > stop && SYNTAX_COMEND_SECOND (c)
f902a008 2103 && prev_char_comend_first (from, from_byte)
89c6809a 2104 && !char_quoted (from - 1, dec_bytepos (from_byte)))
b3cfe0c8 2105 {
7fc8191e 2106 /* We must record the comment style encountered so that
b3cfe0c8 2107 later, we can match only the proper comment begin
7fc8191e 2108 sequence of the same style. */
6a140a74 2109 DEC_BOTH (from, from_byte);
f902a008
RS
2110 code = Sendcomment;
2111 /* Calling char_quoted, above, set up global syntax position
2112 at the new value of FROM. */
95ff8dfc
RS
2113 c1 = FETCH_CHAR (from_byte);
2114 comstyle = SYNTAX_COMMENT_STYLE (c1);
2115 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
b3cfe0c8
RS
2116 }
2117
195d1361
RS
2118 if (code == Scomment_fence)
2119 {
2120 /* Skip until first preceding unquoted comment_fence. */
6a140a74 2121 int found = 0, ini = from, ini_byte = from_byte;
7d0393cf 2122
6a140a74 2123 while (1)
195d1361 2124 {
6a140a74
RS
2125 DEC_BOTH (from, from_byte);
2126 if (from == stop)
2127 break;
195d1361 2128 UPDATE_SYNTAX_TABLE_BACKWARD (from);
6a140a74
RS
2129 c = FETCH_CHAR (from_byte);
2130 if (SYNTAX (c) == Scomment_fence
7d0393cf 2131 && !char_quoted (from, from_byte))
195d1361 2132 {
7d0393cf 2133 found = 1;
195d1361
RS
2134 break;
2135 }
2136 }
2137 if (found == 0)
2138 {
2139 from = ini; /* Set point to ini + 1. */
6a140a74 2140 from_byte = ini_byte;
195d1361
RS
2141 goto leave;
2142 }
39c41ad4
SM
2143 else
2144 /* We have skipped one comment. */
2145 break;
195d1361
RS
2146 }
2147 else if (code == Sendcomment)
b3cfe0c8 2148 {
95ff8dfc 2149 found = back_comment (from, from_byte, stop, comnested, comstyle,
6a140a74 2150 &out_charpos, &out_bytepos);
1aa963c8
SM
2151 if (found == -1)
2152 {
abf8a9ff
SM
2153 if (c == '\n')
2154 /* This end-of-line is not an end-of-comment.
2155 Treat it like a whitespace.
2156 CC-mode (and maybe others) relies on this behavior. */
2157 ;
2158 else
2159 {
2160 /* Failure: we should go back to the end of this
2161 not-quite-endcomment. */
2162 if (SYNTAX(c) != code)
2163 /* It was a two-char Sendcomment. */
2164 INC_BOTH (from, from_byte);
2165 goto leave;
2166 }
1aa963c8 2167 }
2b927d02 2168 else
abf8a9ff
SM
2169 {
2170 /* We have skipped one comment. */
2171 from = out_charpos, from_byte = out_bytepos;
2172 break;
2173 }
b3cfe0c8 2174 }
bb0de084 2175 else if (code != Swhitespace || quoted)
b3cfe0c8 2176 {
195d1361 2177 leave:
b3cfe0c8 2178 immediate_quit = 0;
6a140a74 2179 INC_BOTH (from, from_byte);
ef316cf0 2180 SET_PT_BOTH (from, from_byte);
b3cfe0c8
RS
2181 return Qnil;
2182 }
2183 }
2184
840f481c 2185 count1++;
b3cfe0c8
RS
2186 }
2187
ef316cf0 2188 SET_PT_BOTH (from, from_byte);
b3cfe0c8
RS
2189 immediate_quit = 0;
2190 return Qt;
2191}
2192\f
bd25db08 2193/* Return syntax code of character C if C is a single byte character
db9f48c3 2194 or `multibyte_symbol_p' is zero. Otherwise, return Ssymbol. */
bd25db08
KH
2195
2196#define SYNTAX_WITH_MULTIBYTE_CHECK(c) \
2197 ((SINGLE_BYTE_CHAR_P (c) || !multibyte_symbol_p) \
2198 ? SYNTAX (c) : Ssymbol)
2199
6a140a74 2200static Lisp_Object
8489eb67
RS
2201scan_lists (from, count, depth, sexpflag)
2202 register int from;
2203 int count, depth, sexpflag;
2204{
2205 Lisp_Object val;
195d1361 2206 register int stop = count > 0 ? ZV : BEGV;
93da5fff
KH
2207 register int c, c1;
2208 int stringterm;
8489eb67
RS
2209 int quoted;
2210 int mathexit = 0;
93da5fff 2211 register enum syntaxcode code, temp_code;
195d1361 2212 int min_depth = depth; /* Err out if depth gets less than this. */
e5d4f4dc 2213 int comstyle = 0; /* style of comment encountered */
95ff8dfc 2214 int comnested = 0; /* whether the comment is nestable or not */
93da5fff 2215 int temp_pos;
7bf5e9e4 2216 int last_good = from;
195d1361 2217 int found;
c3907a7e 2218 int from_byte;
6a140a74 2219 int out_bytepos, out_charpos;
95ff8dfc 2220 int temp, dummy;
bd25db08 2221 int multibyte_symbol_p = sexpflag && multibyte_syntax_as_symbol;
8489eb67
RS
2222
2223 if (depth > 0) min_depth = 0;
2224
c3907a7e
RS
2225 if (from > ZV) from = ZV;
2226 if (from < BEGV) from = BEGV;
2227
2228 from_byte = CHAR_TO_BYTE (from);
2229
8489eb67
RS
2230 immediate_quit = 1;
2231 QUIT;
2232
195d1361 2233 SETUP_SYNTAX_TABLE (from, count);
8489eb67
RS
2234 while (count > 0)
2235 {
8489eb67
RS
2236 while (from < stop)
2237 {
f902a008 2238 int comstart_first, prefix;
195d1361 2239 UPDATE_SYNTAX_TABLE_FORWARD (from);
6a140a74 2240 c = FETCH_CHAR (from_byte);
bd25db08 2241 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
f902a008 2242 comstart_first = SYNTAX_COMSTART_FIRST (c);
95ff8dfc 2243 comnested = SYNTAX_COMMENT_NESTED (c);
e6365855 2244 comstyle = SYNTAX_COMMENT_STYLE (c);
f902a008 2245 prefix = SYNTAX_PREFIX (c);
7bf5e9e4
RS
2246 if (depth == min_depth)
2247 last_good = from;
6a140a74 2248 INC_BOTH (from, from_byte);
195d1361 2249 UPDATE_SYNTAX_TABLE_FORWARD (from);
f902a008 2250 if (from < stop && comstart_first
ab229fdd 2251 && (c = FETCH_CHAR (from_byte), SYNTAX_COMSTART_SECOND (c))
8489eb67 2252 && parse_sexp_ignore_comments)
e5d4f4dc 2253 {
7d0393cf 2254 /* we have encountered a comment start sequence and we
195d1361 2255 are ignoring all text inside comments. We must record
e5d4f4dc
RS
2256 the comment style this sequence begins so that later,
2257 only a comment end of the same style actually ends
2258 the comment section */
2259 code = Scomment;
95ff8dfc
RS
2260 c1 = FETCH_CHAR (from_byte);
2261 comstyle = SYNTAX_COMMENT_STYLE (c1);
2262 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
6a140a74 2263 INC_BOTH (from, from_byte);
f902a008 2264 UPDATE_SYNTAX_TABLE_FORWARD (from);
e5d4f4dc 2265 }
7d0393cf 2266
f902a008 2267 if (prefix)
8489eb67
RS
2268 continue;
2269
0220c518 2270 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
2271 {
2272 case Sescape:
2273 case Scharquote:
2274 if (from == stop) goto lose;
6a140a74 2275 INC_BOTH (from, from_byte);
8489eb67
RS
2276 /* treat following character as a word constituent */
2277 case Sword:
2278 case Ssymbol:
2279 if (depth || !sexpflag) break;
195d1361 2280 /* This word counts as a sexp; return at end of it. */
8489eb67
RS
2281 while (from < stop)
2282 {
195d1361 2283 UPDATE_SYNTAX_TABLE_FORWARD (from);
7c7ca3d2
RS
2284
2285 /* Some compilers can't handle this inside the switch. */
bd25db08
KH
2286 c = FETCH_CHAR (from_byte);
2287 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
7c7ca3d2 2288 switch (temp)
8489eb67
RS
2289 {
2290 case Scharquote:
2291 case Sescape:
6a140a74 2292 INC_BOTH (from, from_byte);
8489eb67
RS
2293 if (from == stop) goto lose;
2294 break;
2295 case Sword:
2296 case Ssymbol:
2297 case Squote:
2298 break;
2299 default:
2300 goto done;
2301 }
6a140a74 2302 INC_BOTH (from, from_byte);
8489eb67
RS
2303 }
2304 goto done;
2305
195d1361 2306 case Scomment_fence:
95ff8dfc
RS
2307 comstyle = ST_COMMENT_STYLE;
2308 /* FALLTHROUGH */
2309 case Scomment:
8489eb67 2310 if (!parse_sexp_ignore_comments) break;
95ff8dfc
RS
2311 UPDATE_SYNTAX_TABLE_FORWARD (from);
2312 found = forw_comment (from, from_byte, stop,
2313 comnested, comstyle, 0,
2314 &out_charpos, &out_bytepos, &dummy);
2315 from = out_charpos, from_byte = out_bytepos;
2316 if (!found)
8489eb67 2317 {
95ff8dfc
RS
2318 if (depth == 0)
2319 goto done;
2320 goto lose;
8489eb67 2321 }
95ff8dfc
RS
2322 INC_BOTH (from, from_byte);
2323 UPDATE_SYNTAX_TABLE_FORWARD (from);
8489eb67
RS
2324 break;
2325
2326 case Smath:
2327 if (!sexpflag)
2328 break;
6a140a74
RS
2329 if (from != stop && c == FETCH_CHAR (from_byte))
2330 {
2331 INC_BOTH (from, from_byte);
2332 }
8489eb67
RS
2333 if (mathexit)
2334 {
2335 mathexit = 0;
2336 goto close1;
2337 }
2338 mathexit = 1;
2339
2340 case Sopen:
2341 if (!++depth) goto done;
2342 break;
2343
2344 case Sclose:
2345 close1:
2346 if (!--depth) goto done;
2347 if (depth < min_depth)
7bf5e9e4
RS
2348 Fsignal (Qscan_error,
2349 Fcons (build_string ("Containing expression ends prematurely"),
2350 Fcons (make_number (last_good),
2351 Fcons (make_number (from), Qnil))));
8489eb67
RS
2352 break;
2353
2354 case Sstring:
195d1361 2355 case Sstring_fence:
ef316cf0 2356 temp_pos = dec_bytepos (from_byte);
93da5fff 2357 stringterm = FETCH_CHAR (temp_pos);
8489eb67
RS
2358 while (1)
2359 {
2360 if (from >= stop) goto lose;
195d1361 2361 UPDATE_SYNTAX_TABLE_FORWARD (from);
bd25db08 2362 c = FETCH_CHAR (from_byte);
42ebfa31
SM
2363 if (code == Sstring
2364 ? (c == stringterm
2365 && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
bd25db08 2366 : SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring_fence)
195d1361 2367 break;
7c7ca3d2
RS
2368
2369 /* Some compilers can't handle this inside the switch. */
bd25db08 2370 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
7c7ca3d2 2371 switch (temp)
8489eb67
RS
2372 {
2373 case Scharquote:
2374 case Sescape:
6a140a74 2375 INC_BOTH (from, from_byte);
8489eb67 2376 }
6a140a74 2377 INC_BOTH (from, from_byte);
8489eb67 2378 }
6a140a74 2379 INC_BOTH (from, from_byte);
8489eb67
RS
2380 if (!depth && sexpflag) goto done;
2381 break;
240c43e8
SM
2382 default:
2383 /* Ignore whitespace, punctuation, quote, endcomment. */
2384 break;
8489eb67
RS
2385 }
2386 }
2387
2388 /* Reached end of buffer. Error if within object, return nil if between */
2389 if (depth) goto lose;
2390
2391 immediate_quit = 0;
2392 return Qnil;
2393
2394 /* End of object reached */
2395 done:
2396 count--;
2397 }
2398
2399 while (count < 0)
2400 {
8489eb67
RS
2401 while (from > stop)
2402 {
6a140a74 2403 DEC_BOTH (from, from_byte);
195d1361 2404 UPDATE_SYNTAX_TABLE_BACKWARD (from);
6a140a74 2405 c = FETCH_CHAR (from_byte);
bd25db08 2406 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
7bf5e9e4
RS
2407 if (depth == min_depth)
2408 last_good = from;
7fc8191e 2409 comstyle = 0;
95ff8dfc 2410 comnested = SYNTAX_COMMENT_NESTED (c);
7fc8191e
RS
2411 if (code == Sendcomment)
2412 comstyle = SYNTAX_COMMENT_STYLE (c);
8489eb67 2413 if (from > stop && SYNTAX_COMEND_SECOND (c)
1674d9a2 2414 && prev_char_comend_first (from, from_byte)
8489eb67 2415 && parse_sexp_ignore_comments)
e5d4f4dc 2416 {
f902a008 2417 /* We must record the comment style encountered so that
e5d4f4dc 2418 later, we can match only the proper comment begin
f902a008 2419 sequence of the same style. */
6a140a74 2420 DEC_BOTH (from, from_byte);
f902a008
RS
2421 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2422 code = Sendcomment;
95ff8dfc
RS
2423 c1 = FETCH_CHAR (from_byte);
2424 comstyle = SYNTAX_COMMENT_STYLE (c1);
2425 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
e5d4f4dc 2426 }
7d0393cf 2427
9828a477 2428 /* Quoting turns anything except a comment-ender
e6365855 2429 into a word character. Note that this cannot be true
f902a008 2430 if we decremented FROM in the if-statement above. */
9828a477 2431 if (code != Sendcomment && char_quoted (from, from_byte))
240c43e8
SM
2432 {
2433 DEC_BOTH (from, from_byte);
2434 code = Sword;
2435 }
9828a477 2436 else if (SYNTAX_PREFIX (c))
8489eb67
RS
2437 continue;
2438
9828a477 2439 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
2440 {
2441 case Sword:
2442 case Ssymbol:
9828a477
KH
2443 case Sescape:
2444 case Scharquote:
8489eb67 2445 if (depth || !sexpflag) break;
195d1361
RS
2446 /* This word counts as a sexp; count object finished
2447 after passing it. */
8489eb67
RS
2448 while (from > stop)
2449 {
6a140a74 2450 temp_pos = from_byte;
ef316cf0
RS
2451 if (! NILP (current_buffer->enable_multibyte_characters))
2452 DEC_POS (temp_pos);
2453 else
2454 temp_pos--;
6a140a74 2455 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
9828a477 2456 c1 = FETCH_CHAR (temp_pos);
bd25db08 2457 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
9828a477
KH
2458 /* Don't allow comment-end to be quoted. */
2459 if (temp_code == Sendcomment)
2460 goto done2;
6a140a74 2461 quoted = char_quoted (from - 1, temp_pos);
8489eb67 2462 if (quoted)
93da5fff 2463 {
6a140a74 2464 DEC_BOTH (from, from_byte);
ef316cf0 2465 temp_pos = dec_bytepos (temp_pos);
6a140a74 2466 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
93da5fff
KH
2467 }
2468 c1 = FETCH_CHAR (temp_pos);
bd25db08 2469 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
93da5fff
KH
2470 if (! (quoted || temp_code == Sword
2471 || temp_code == Ssymbol
2472 || temp_code == Squote))
8489eb67 2473 goto done2;
6a140a74 2474 DEC_BOTH (from, from_byte);
8489eb67
RS
2475 }
2476 goto done2;
2477
2478 case Smath:
2479 if (!sexpflag)
2480 break;
ef316cf0 2481 temp_pos = dec_bytepos (from_byte);
6a140a74 2482 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
93da5fff 2483 if (from != stop && c == FETCH_CHAR (temp_pos))
6a140a74 2484 DEC_BOTH (from, from_byte);
8489eb67
RS
2485 if (mathexit)
2486 {
2487 mathexit = 0;
2488 goto open2;
2489 }
2490 mathexit = 1;
2491
2492 case Sclose:
2493 if (!++depth) goto done2;
2494 break;
2495
2496 case Sopen:
2497 open2:
2498 if (!--depth) goto done2;
2499 if (depth < min_depth)
7bf5e9e4
RS
2500 Fsignal (Qscan_error,
2501 Fcons (build_string ("Containing expression ends prematurely"),
2502 Fcons (make_number (last_good),
2503 Fcons (make_number (from), Qnil))));
8489eb67
RS
2504 break;
2505
2506 case Sendcomment:
2507 if (!parse_sexp_ignore_comments)
2508 break;
95ff8dfc 2509 found = back_comment (from, from_byte, stop, comnested, comstyle,
6a140a74 2510 &out_charpos, &out_bytepos);
1aa963c8
SM
2511 /* FIXME: if found == -1, then it really wasn't a comment-end.
2512 For single-char Sendcomment, we can't do much about it apart
2513 from skipping the char.
2514 For 2-char endcomments, we could try again, taking both
2515 chars as separate entities, but it's a lot of trouble
2516 for very little gain, so we don't bother either. -sm */
6a140a74
RS
2517 if (found != -1)
2518 from = out_charpos, from_byte = out_bytepos;
8489eb67
RS
2519 break;
2520
195d1361
RS
2521 case Scomment_fence:
2522 case Sstring_fence:
2523 while (1)
2524 {
195d1361 2525 if (from == stop) goto lose;
d0abdf7e 2526 DEC_BOTH (from, from_byte);
195d1361 2527 UPDATE_SYNTAX_TABLE_BACKWARD (from);
7d0393cf 2528 if (!char_quoted (from, from_byte)
bd25db08
KH
2529 && (c = FETCH_CHAR (from_byte),
2530 SYNTAX_WITH_MULTIBYTE_CHECK (c) == code))
195d1361
RS
2531 break;
2532 }
2533 if (code == Sstring_fence && !depth && sexpflag) goto done2;
2534 break;
7d0393cf 2535
8489eb67 2536 case Sstring:
6a140a74 2537 stringterm = FETCH_CHAR (from_byte);
8489eb67
RS
2538 while (1)
2539 {
2540 if (from == stop) goto lose;
d0abdf7e
SM
2541 DEC_BOTH (from, from_byte);
2542 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2543 if (!char_quoted (from, from_byte)
2544 && stringterm == (c = FETCH_CHAR (from_byte))
42ebfa31 2545 && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
8489eb67 2546 break;
8489eb67 2547 }
8489eb67
RS
2548 if (!depth && sexpflag) goto done2;
2549 break;
240c43e8
SM
2550 default:
2551 /* Ignore whitespace, punctuation, quote, endcomment. */
2552 break;
8489eb67
RS
2553 }
2554 }
2555
2556 /* Reached start of buffer. Error if within object, return nil if between */
2557 if (depth) goto lose;
2558
2559 immediate_quit = 0;
2560 return Qnil;
2561
2562 done2:
2563 count++;
2564 }
2565
2566
2567 immediate_quit = 0;
1e142fb7 2568 XSETFASTINT (val, from);
8489eb67
RS
2569 return val;
2570
2571 lose:
7bf5e9e4
RS
2572 Fsignal (Qscan_error,
2573 Fcons (build_string ("Unbalanced parentheses"),
2574 Fcons (make_number (last_good),
2575 Fcons (make_number (from), Qnil))));
ab229fdd 2576 abort ();
8489eb67
RS
2577 /* NOTREACHED */
2578}
2579
8489eb67 2580DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
fdb82f93
PJ
2581 doc: /* Scan from character number FROM by COUNT lists.
2582Returns the character number of the position thus found.
2583
2584If DEPTH is nonzero, paren depth begins counting from that value,
2585only places where the depth in parentheses becomes zero
2586are candidates for stopping; COUNT such places are counted.
2587Thus, a positive value for DEPTH means go out levels.
2588
2589Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2590
2591If the beginning or end of (the accessible part of) the buffer is reached
2592and the depth is wrong, an error is signaled.
2593If the depth is right but the count is not used up, nil is returned. */)
2594 (from, count, depth)
8489eb67
RS
2595 Lisp_Object from, count, depth;
2596{
b7826503
PJ
2597 CHECK_NUMBER (from);
2598 CHECK_NUMBER (count);
2599 CHECK_NUMBER (depth);
8489eb67
RS
2600
2601 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
2602}
2603
2604DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
fdb82f93
PJ
2605 doc: /* Scan from character number FROM by COUNT balanced expressions.
2606If COUNT is negative, scan backwards.
2607Returns the character number of the position thus found.
2608
2609Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2610
2611If the beginning or end of (the accessible part of) the buffer is reached
2612in the middle of a parenthetical grouping, an error is signaled.
2613If the beginning or end is reached between groupings
2614but before count is used up, nil is returned. */)
2615 (from, count)
8489eb67
RS
2616 Lisp_Object from, count;
2617{
b7826503
PJ
2618 CHECK_NUMBER (from);
2619 CHECK_NUMBER (count);
8489eb67
RS
2620
2621 return scan_lists (XINT (from), XINT (count), 0, 1);
2622}
2623
2624DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
fdb82f93
PJ
2625 0, 0, 0,
2626 doc: /* Move point backward over any number of chars with prefix syntax.
2627This includes chars with "quote" or "prefix" syntax (' or p). */)
2628 ()
8489eb67
RS
2629{
2630 int beg = BEGV;
6a140a74
RS
2631 int opoint = PT;
2632 int opoint_byte = PT_BYTE;
6ec8bbd2 2633 int pos = PT;
6a140a74 2634 int pos_byte = PT_BYTE;
93da5fff 2635 int c;
93da5fff 2636
7d0393cf 2637 if (pos <= beg)
195d1361 2638 {
f902a008
RS
2639 SET_PT_BOTH (opoint, opoint_byte);
2640
2641 return Qnil;
195d1361 2642 }
8489eb67 2643
f902a008
RS
2644 SETUP_SYNTAX_TABLE (pos, -1);
2645
6a140a74
RS
2646 DEC_BOTH (pos, pos_byte);
2647
1fd3172d 2648 while (!char_quoted (pos, pos_byte)
195d1361 2649 /* Previous statement updates syntax table. */
6a140a74 2650 && ((c = FETCH_CHAR (pos_byte), SYNTAX (c) == Squote)
93da5fff
KH
2651 || SYNTAX_PREFIX (c)))
2652 {
1fd3172d
RS
2653 opoint = pos;
2654 opoint_byte = pos_byte;
2655
2656 if (pos + 1 > beg)
2657 DEC_BOTH (pos, pos_byte);
93da5fff 2658 }
8489eb67 2659
6a140a74 2660 SET_PT_BOTH (opoint, opoint_byte);
8489eb67
RS
2661
2662 return Qnil;
2663}
2664\f
6a140a74 2665/* Parse forward from FROM / FROM_BYTE to END,
e5d4f4dc
RS
2666 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
2667 and return a description of the state of the parse at END.
c81a3712 2668 If STOPBEFORE is nonzero, stop at the start of an atom.
644ea4df
RS
2669 If COMMENTSTOP is 1, stop at the start of a comment.
2670 If COMMENTSTOP is -1, stop at the start or end of a comment,
2671 after the beginning of a string, or after the end of a string. */
8489eb67 2672
340f92b5 2673static void
6a140a74 2674scan_sexps_forward (stateptr, from, from_byte, end, targetdepth,
c81a3712 2675 stopbefore, oldstate, commentstop)
e5d4f4dc 2676 struct lisp_parse_state *stateptr;
8489eb67 2677 register int from;
ddc447fc 2678 int end, targetdepth, stopbefore, from_byte;
8489eb67 2679 Lisp_Object oldstate;
c81a3712 2680 int commentstop;
8489eb67
RS
2681{
2682 struct lisp_parse_state state;
2683
2684 register enum syntaxcode code;
95ff8dfc
RS
2685 int c1;
2686 int comnested;
8489eb67
RS
2687 struct level { int last, prev; };
2688 struct level levelstart[100];
2689 register struct level *curlevel = levelstart;
2690 struct level *endlevel = levelstart + 100;
8489eb67
RS
2691 register int depth; /* Paren depth of current scanning location.
2692 level - levelstart equals this except
2693 when the depth becomes negative. */
2694 int mindepth; /* Lowest DEPTH value seen. */
2695 int start_quoted = 0; /* Nonzero means starting after a char quote */
2696 Lisp_Object tem;
93da5fff 2697 int prev_from; /* Keep one character before FROM. */
6a140a74 2698 int prev_from_byte;
1fd3172d 2699 int prev_from_syntax;
195d1361
RS
2700 int boundary_stop = commentstop == -1;
2701 int nofence;
95ff8dfc
RS
2702 int found;
2703 int out_bytepos, out_charpos;
7c7ca3d2 2704 int temp;
93da5fff
KH
2705
2706 prev_from = from;
6a140a74
RS
2707 prev_from_byte = from_byte;
2708 if (from != BEGV)
2709 DEC_BOTH (prev_from, prev_from_byte);
93da5fff
KH
2710
2711 /* Use this macro instead of `from++'. */
6a140a74
RS
2712#define INC_FROM \
2713do { prev_from = from; \
2714 prev_from_byte = from_byte; \
ab229fdd
AS
2715 temp = FETCH_CHAR (prev_from_byte); \
2716 prev_from_syntax = SYNTAX_WITH_FLAGS (temp); \
ef316cf0 2717 INC_BOTH (from, from_byte); \
3ceb4629
SM
2718 if (from < end) \
2719 UPDATE_SYNTAX_TABLE_FORWARD (from); \
6a140a74 2720 } while (0)
8489eb67
RS
2721
2722 immediate_quit = 1;
2723 QUIT;
2724
265a9e55 2725 if (NILP (oldstate))
8489eb67
RS
2726 {
2727 depth = 0;
2728 state.instring = -1;
2729 state.incomment = 0;
195d1361
RS
2730 state.comstyle = 0; /* comment style a by default. */
2731 state.comstr_start = -1; /* no comment/string seen. */
8489eb67
RS
2732 }
2733 else
2734 {
2735 tem = Fcar (oldstate);
265a9e55 2736 if (!NILP (tem))
8489eb67
RS
2737 depth = XINT (tem);
2738 else
2739 depth = 0;
2740
2741 oldstate = Fcdr (oldstate);
2742 oldstate = Fcdr (oldstate);
2743 oldstate = Fcdr (oldstate);
2744 tem = Fcar (oldstate);
195d1361 2745 /* Check whether we are inside string_fence-style string: */
7d0393cf
JB
2746 state.instring = (!NILP (tem)
2747 ? (INTEGERP (tem) ? XINT (tem) : ST_STRING_STYLE)
e76f1c44 2748 : -1);
8489eb67
RS
2749
2750 oldstate = Fcdr (oldstate);
2751 tem = Fcar (oldstate);
e76f1c44
GM
2752 state.incomment = (!NILP (tem)
2753 ? (INTEGERP (tem) ? XINT (tem) : -1)
95ff8dfc 2754 : 0);
8489eb67
RS
2755
2756 oldstate = Fcdr (oldstate);
2757 tem = Fcar (oldstate);
265a9e55 2758 start_quoted = !NILP (tem);
e5d4f4dc 2759
95ff8dfc 2760 /* if the eighth element of the list is nil, we are in comment
195d1361
RS
2761 style a. If it is non-nil, we are in comment style b */
2762 oldstate = Fcdr (oldstate);
e5d4f4dc 2763 oldstate = Fcdr (oldstate);
195d1361 2764 tem = Fcar (oldstate);
7d0393cf 2765 state.comstyle = NILP (tem) ? 0 : (EQ (tem, Qsyntax_table)
e76f1c44 2766 ? ST_COMMENT_STYLE : 1);
195d1361 2767
e5d4f4dc 2768 oldstate = Fcdr (oldstate);
e5d4f4dc 2769 tem = Fcar (oldstate);
195d1361 2770 state.comstr_start = NILP (tem) ? -1 : XINT (tem) ;
1a102a58
RS
2771 oldstate = Fcdr (oldstate);
2772 tem = Fcar (oldstate);
2773 while (!NILP (tem)) /* >= second enclosing sexps. */
2774 {
2775 /* curlevel++->last ran into compiler bug on Apollo */
2776 curlevel->last = XINT (Fcar (tem));
2777 if (++curlevel == endlevel)
02010917 2778 curlevel--; /* error ("Nesting too deep for parser"); */
1a102a58
RS
2779 curlevel->prev = -1;
2780 curlevel->last = -1;
2781 tem = Fcdr (tem);
2782 }
8489eb67
RS
2783 }
2784 state.quoted = 0;
2785 mindepth = depth;
2786
2787 curlevel->prev = -1;
2788 curlevel->last = -1;
2789
326b283b 2790 SETUP_SYNTAX_TABLE (prev_from, 1);
ab229fdd
AS
2791 temp = FETCH_CHAR (prev_from_byte);
2792 prev_from_syntax = SYNTAX_WITH_FLAGS (temp);
3ceb4629 2793 UPDATE_SYNTAX_TABLE_FORWARD (from);
326b283b 2794
195d1361 2795 /* Enter the loop at a place appropriate for initial state. */
8489eb67 2796
326b283b
RS
2797 if (state.incomment)
2798 goto startincomment;
8489eb67
RS
2799 if (state.instring >= 0)
2800 {
195d1361 2801 nofence = state.instring != ST_STRING_STYLE;
326b283b
RS
2802 if (start_quoted)
2803 goto startquotedinstring;
8489eb67
RS
2804 goto startinstring;
2805 }
326b283b
RS
2806 else if (start_quoted)
2807 goto startquoted;
1fd3172d 2808
8489eb67
RS
2809 while (from < end)
2810 {
93da5fff 2811 INC_FROM;
1fd3172d 2812 code = prev_from_syntax & 0xff;
4c920633 2813
c5d4b96f
SM
2814 if (from < end
2815 && SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax)
2816 && (c1 = FETCH_CHAR (from_byte),
2817 SYNTAX_COMSTART_SECOND (c1)))
2818 /* Duplicate code to avoid a complex if-expression
2819 which causes trouble for the SGI compiler. */
95ff8dfc 2820 {
c5d4b96f
SM
2821 /* Record the comment style we have entered so that only
2822 the comment-end sequence of the same style actually
2823 terminates the comment section. */
2824 state.comstyle = SYNTAX_COMMENT_STYLE (c1);
2825 comnested = SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax);
2826 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2827 state.incomment = comnested ? 1 : -1;
95ff8dfc 2828 state.comstr_start = prev_from;
c5d4b96f
SM
2829 INC_FROM;
2830 code = Scomment;
95ff8dfc 2831 }
4c920633 2832 else if (code == Scomment_fence)
e5d4f4dc
RS
2833 {
2834 /* Record the comment style we have entered so that only
2835 the comment-end sequence of the same style actually
2836 terminates the comment section. */
95ff8dfc
RS
2837 state.comstyle = ST_COMMENT_STYLE;
2838 state.incomment = -1;
195d1361 2839 state.comstr_start = prev_from;
e5d4f4dc 2840 code = Scomment;
e5d4f4dc 2841 }
c5d4b96f
SM
2842 else if (code == Scomment)
2843 {
2844 state.comstyle = SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax);
2845 state.incomment = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax) ?
2846 1 : -1);
2847 state.comstr_start = prev_from;
2848 }
e5d4f4dc 2849
1fd3172d 2850 if (SYNTAX_FLAGS_PREFIX (prev_from_syntax))
8489eb67 2851 continue;
0220c518 2852 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
2853 {
2854 case Sescape:
2855 case Scharquote:
2856 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 2857 curlevel->last = prev_from;
8489eb67
RS
2858 startquoted:
2859 if (from == end) goto endquoted;
93da5fff 2860 INC_FROM;
8489eb67
RS
2861 goto symstarted;
2862 /* treat following character as a word constituent */
2863 case Sword:
2864 case Ssymbol:
2865 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 2866 curlevel->last = prev_from;
8489eb67
RS
2867 symstarted:
2868 while (from < end)
2869 {
7c7ca3d2 2870 /* Some compilers can't handle this inside the switch. */
ab229fdd
AS
2871 temp = FETCH_CHAR (from_byte);
2872 temp = SYNTAX (temp);
7c7ca3d2 2873 switch (temp)
8489eb67
RS
2874 {
2875 case Scharquote:
2876 case Sescape:
93da5fff 2877 INC_FROM;
8489eb67
RS
2878 if (from == end) goto endquoted;
2879 break;
2880 case Sword:
2881 case Ssymbol:
2882 case Squote:
2883 break;
2884 default:
2885 goto symdone;
2886 }
93da5fff 2887 INC_FROM;
8489eb67
RS
2888 }
2889 symdone:
2890 curlevel->prev = curlevel->last;
2891 break;
2892
240c43e8 2893 case Scomment_fence: /* Can't happen because it's handled above. */
8489eb67 2894 case Scomment:
195d1361 2895 if (commentstop || boundary_stop) goto done;
d355bd8a
SM
2896 startincomment:
2897 /* The (from == BEGV) test was to enter the loop in the middle so
95ff8dfc 2898 that we find a 2-char comment ender even if we start in the
e6365855 2899 middle of it. We don't want to do that if we're just at the
d355bd8a 2900 beginning of the comment (think of (*) ... (*)). */
95ff8dfc
RS
2901 found = forw_comment (from, from_byte, end,
2902 state.incomment, state.comstyle,
e6365855
SM
2903 (from == BEGV || from < state.comstr_start + 3)
2904 ? 0 : prev_from_syntax,
95ff8dfc
RS
2905 &out_charpos, &out_bytepos, &state.incomment);
2906 from = out_charpos; from_byte = out_bytepos;
2907 /* Beware! prev_from and friends are invalid now.
2908 Luckily, the `done' doesn't use them and the INC_FROM
2909 sets them to a sane value without looking at them. */
2910 if (!found) goto done;
d355bd8a 2911 INC_FROM;
8489eb67 2912 state.incomment = 0;
e5d4f4dc 2913 state.comstyle = 0; /* reset the comment style */
195d1361 2914 if (boundary_stop) goto done;
8489eb67
RS
2915 break;
2916
2917 case Sopen:
2918 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2919 depth++;
2920 /* curlevel++->last ran into compiler bug on Apollo */
93da5fff 2921 curlevel->last = prev_from;
8489eb67 2922 if (++curlevel == endlevel)
02010917 2923 curlevel--; /* error ("Nesting too deep for parser"); */
8489eb67
RS
2924 curlevel->prev = -1;
2925 curlevel->last = -1;
30844415 2926 if (targetdepth == depth) goto done;
8489eb67
RS
2927 break;
2928
2929 case Sclose:
2930 depth--;
2931 if (depth < mindepth)
2932 mindepth = depth;
2933 if (curlevel != levelstart)
2934 curlevel--;
2935 curlevel->prev = curlevel->last;
30844415 2936 if (targetdepth == depth) goto done;
8489eb67
RS
2937 break;
2938
2939 case Sstring:
195d1361
RS
2940 case Sstring_fence:
2941 state.comstr_start = from - 1;
8489eb67 2942 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 2943 curlevel->last = prev_from;
7d0393cf 2944 state.instring = (code == Sstring
6a140a74 2945 ? (FETCH_CHAR (prev_from_byte))
195d1361
RS
2946 : ST_STRING_STYLE);
2947 if (boundary_stop) goto done;
8489eb67 2948 startinstring:
195d1361 2949 {
644ea4df 2950 nofence = state.instring != ST_STRING_STYLE;
7d0393cf 2951
644ea4df
RS
2952 while (1)
2953 {
2954 int c;
195d1361 2955
644ea4df 2956 if (from >= end) goto done;
6a140a74 2957 c = FETCH_CHAR (from_byte);
7c7ca3d2
RS
2958 /* Some compilers can't handle this inside the switch. */
2959 temp = SYNTAX (c);
ccf89641
KH
2960
2961 /* Check TEMP here so that if the char has
2962 a syntax-table property which says it is NOT
2963 a string character, it does not end the string. */
2964 if (nofence && c == state.instring && temp == Sstring)
2965 break;
2966
7c7ca3d2 2967 switch (temp)
644ea4df
RS
2968 {
2969 case Sstring_fence:
2970 if (!nofence) goto string_end;
2971 break;
2972 case Scharquote:
2973 case Sescape:
2974 INC_FROM;
2975 startquotedinstring:
2976 if (from >= end) goto endquoted;
195d1361 2977 }
644ea4df
RS
2978 INC_FROM;
2979 }
195d1361
RS
2980 }
2981 string_end:
8489eb67
RS
2982 state.instring = -1;
2983 curlevel->prev = curlevel->last;
93da5fff 2984 INC_FROM;
195d1361 2985 if (boundary_stop) goto done;
8489eb67
RS
2986 break;
2987
2988 case Smath:
240c43e8
SM
2989 /* FIXME: We should do something with it. */
2990 break;
2991 default:
2992 /* Ignore whitespace, punctuation, quote, endcomment. */
8489eb67
RS
2993 break;
2994 }
2995 }
2996 goto done;
2997
2998 stop: /* Here if stopping before start of sexp. */
93da5fff 2999 from = prev_from; /* We have just fetched the char that starts it; */
8489eb67
RS
3000 goto done; /* but return the position before it. */
3001
3002 endquoted:
3003 state.quoted = 1;
3004 done:
3005 state.depth = depth;
3006 state.mindepth = mindepth;
3007 state.thislevelstart = curlevel->prev;
3008 state.prevlevelstart
3009 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
3010 state.location = from;
1a102a58
RS
3011 state.levelstarts = Qnil;
3012 while (--curlevel >= levelstart)
3013 state.levelstarts = Fcons (make_number (curlevel->last),
3014 state.levelstarts);
8489eb67
RS
3015 immediate_quit = 0;
3016
e5d4f4dc 3017 *stateptr = state;
8489eb67
RS
3018}
3019
c81a3712 3020DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
fdb82f93
PJ
3021 doc: /* Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
3022Parsing stops at TO or when certain criteria are met;
3023 point is set to where parsing stops.
3024If fifth arg OLDSTATE is omitted or nil,
3025 parsing assumes that FROM is the beginning of a function.
0e6228bc 3026Value is a list of elements describing final state of parsing:
fdb82f93
PJ
3027 0. depth in parens.
3028 1. character address of start of innermost containing list; nil if none.
3029 2. character address of start of last complete sexp terminated.
3030 3. non-nil if inside a string.
3031 (it is the character that will terminate the string,
3032 or t if the string should be terminated by a generic string delimiter.)
7d0393cf 3033 4. nil if outside a comment, t if inside a non-nestable comment,
fdb82f93
PJ
3034 else an integer (the current comment nesting).
3035 5. t if following a quote character.
3036 6. the minimum paren-depth encountered during this scan.
3037 7. t if in a comment of style b; symbol `syntax-table' if the comment
3038 should be terminated by a generic comment delimiter.
3039 8. character address of start of comment or string; nil if not in one.
3040 9. Intermediate data for continuation of parsing (subject to change).
3041If third arg TARGETDEPTH is non-nil, parsing stops if the depth
3042in parentheses becomes equal to TARGETDEPTH.
3043Fourth arg STOPBEFORE non-nil means stop when come to
3044 any character that starts a sexp.
0e6228bc 3045Fifth arg OLDSTATE is a list like what this function returns.
fdb82f93
PJ
3046 It is used to initialize the state of the parse. Elements number 1, 2, 6
3047 and 8 are ignored; you can leave off element 8 (the last) entirely.
3048Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.
3049 If it is symbol `syntax-table', stop after the start of a comment or a
3050 string, or after end of a comment or a string. */)
3051 (from, to, targetdepth, stopbefore, oldstate, commentstop)
c81a3712 3052 Lisp_Object from, to, targetdepth, stopbefore, oldstate, commentstop;
8489eb67
RS
3053{
3054 struct lisp_parse_state state;
3055 int target;
3056
265a9e55 3057 if (!NILP (targetdepth))
8489eb67 3058 {
b7826503 3059 CHECK_NUMBER (targetdepth);
8489eb67
RS
3060 target = XINT (targetdepth);
3061 }
3062 else
3063 target = -100000; /* We won't reach this depth */
3064
3065 validate_region (&from, &to);
6a140a74
RS
3066 scan_sexps_forward (&state, XINT (from), CHAR_TO_BYTE (XINT (from)),
3067 XINT (to),
c81a3712 3068 target, !NILP (stopbefore), oldstate,
7d0393cf 3069 (NILP (commentstop)
195d1361 3070 ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
8489eb67
RS
3071
3072 SET_PT (state.location);
7d0393cf 3073
8489eb67
RS
3074 return Fcons (make_number (state.depth),
3075 Fcons (state.prevlevelstart < 0 ? Qnil : make_number (state.prevlevelstart),
3076 Fcons (state.thislevelstart < 0 ? Qnil : make_number (state.thislevelstart),
7d0393cf
JB
3077 Fcons (state.instring >= 0
3078 ? (state.instring == ST_STRING_STYLE
195d1361 3079 ? Qt : make_number (state.instring)) : Qnil,
95ff8dfc
RS
3080 Fcons (state.incomment < 0 ? Qt :
3081 (state.incomment == 0 ? Qnil :
3082 make_number (state.incomment)),
8489eb67 3083 Fcons (state.quoted ? Qt : Qnil,
bff37d2a 3084 Fcons (make_number (state.mindepth),
7d0393cf 3085 Fcons ((state.comstyle
bff37d2a
RS
3086 ? (state.comstyle == ST_COMMENT_STYLE
3087 ? Qsyntax_table : Qt) :
3088 Qnil),
0beaf54f
DL
3089 Fcons (((state.incomment
3090 || (state.instring >= 0))
bff37d2a
RS
3091 ? make_number (state.comstr_start)
3092 : Qnil),
1a102a58 3093 Fcons (state.levelstarts, Qnil))))))))));
8489eb67
RS
3094}
3095\f
dfcf069d 3096void
8489eb67
RS
3097init_syntax_once ()
3098{
78f9a1f7 3099 register int i, c;
8ea151b2 3100 Lisp_Object temp;
8489eb67 3101
5ebaddf5
RS
3102 /* This has to be done here, before we call Fmake_char_table. */
3103 Qsyntax_table = intern ("syntax-table");
3104 staticpro (&Qsyntax_table);
3105
3106 /* Intern this now in case it isn't already done.
3107 Setting this variable twice is harmless.
3108 But don't staticpro it here--that is done in alloc.c. */
3109 Qchar_table_extra_slots = intern ("char-table-extra-slots");
3110
93da5fff 3111 /* Create objects which can be shared among syntax tables. */
42ebfa31 3112 Vsyntax_code_object = Fmake_vector (make_number (Smax), Qnil);
93da5fff
KH
3113 for (i = 0; i < XVECTOR (Vsyntax_code_object)->size; i++)
3114 XVECTOR (Vsyntax_code_object)->contents[i]
3115 = Fcons (make_number (i), Qnil);
3116
5ebaddf5
RS
3117 /* Now we are ready to set up this property, so we can
3118 create syntax tables. */
3119 Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
3120
93da5fff 3121 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
8489eb67 3122
5ebaddf5 3123 Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
8489eb67 3124
93da5fff 3125 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
8489eb67 3126 for (i = 'a'; i <= 'z'; i++)
8ea151b2 3127 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
8489eb67 3128 for (i = 'A'; i <= 'Z'; i++)
8ea151b2 3129 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
8489eb67 3130 for (i = '0'; i <= '9'; i++)
8ea151b2
RS
3131 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3132
3133 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
3134 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
3135
3136 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
3137 Fcons (make_number (Sopen), make_number (')')));
3138 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
3139 Fcons (make_number (Sclose), make_number ('(')));
3140 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
3141 Fcons (make_number (Sopen), make_number (']')));
3142 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
3143 Fcons (make_number (Sclose), make_number ('[')));
3144 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
3145 Fcons (make_number (Sopen), make_number ('}')));
3146 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
3147 Fcons (make_number (Sclose), make_number ('{')));
3148 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
3149 Fcons (make_number ((int) Sstring), Qnil));
3150 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
3151 Fcons (make_number ((int) Sescape), Qnil));
3152
93da5fff 3153 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Ssymbol];
8489eb67 3154 for (i = 0; i < 10; i++)
78f9a1f7
KH
3155 {
3156 c = "_-+*/&|<>="[i];
3157 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3158 }
8489eb67 3159
93da5fff 3160 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
8489eb67 3161 for (i = 0; i < 12; i++)
78f9a1f7
KH
3162 {
3163 c = ".,;:?!#@~^'`"[i];
3164 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3165 }
bd25db08
KH
3166
3167 /* All multibyte characters have syntax `word' by default. */
3168 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
3169 for (i = CHAR_TABLE_SINGLE_BYTE_SLOTS; i < CHAR_TABLE_ORDINARY_SLOTS; i++)
3170 XCHAR_TABLE (Vstandard_syntax_table)->contents[i] = temp;
8489eb67
RS
3171}
3172
dfcf069d 3173void
8489eb67
RS
3174syms_of_syntax ()
3175{
3176 Qsyntax_table_p = intern ("syntax-table-p");
3177 staticpro (&Qsyntax_table_p);
3178
93da5fff
KH
3179 staticpro (&Vsyntax_code_object);
3180
a2994c46
KS
3181 staticpro (&gl_state.object);
3182 staticpro (&gl_state.global_code);
3183 staticpro (&gl_state.current_syntax_table);
3184 staticpro (&gl_state.old_prop);
3185
3186 /* Defined in regex.c */
3187 staticpro (&re_match_object);
3188
7bf5e9e4
RS
3189 Qscan_error = intern ("scan-error");
3190 staticpro (&Qscan_error);
3191 Fput (Qscan_error, Qerror_conditions,
f3be100f 3192 Fcons (Qscan_error, Fcons (Qerror, Qnil)));
7bf5e9e4
RS
3193 Fput (Qscan_error, Qerror_message,
3194 build_string ("Scan error"));
3195
8489eb67 3196 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments,
fdb82f93 3197 doc: /* Non-nil means `forward-sexp', etc., should treat comments as whitespace. */);
8489eb67 3198
195d1361 3199 DEFVAR_BOOL ("parse-sexp-lookup-properties", &parse_sexp_lookup_properties,
fdb82f93
PJ
3200 doc: /* Non-nil means `forward-sexp', etc., obey `syntax-table' property.
3201Otherwise, that text property is simply ignored.
3202See the info node `(elisp)Syntax Properties' for a description of the
3203`syntax-table' property. */);
195d1361 3204
8489eb67
RS
3205 words_include_escapes = 0;
3206 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes,
fdb82f93 3207 doc: /* Non-nil means `forward-word', etc., should treat escape chars part of words. */);
8489eb67 3208
bd25db08 3209 DEFVAR_BOOL ("multibyte-syntax-as-symbol", &multibyte_syntax_as_symbol,
fdb82f93 3210 doc: /* Non-nil means `scan-sexps' treats all multibyte characters as symbol. */);
bd25db08
KH
3211 multibyte_syntax_as_symbol = 0;
3212
f4ed767f
GM
3213 DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
3214 &open_paren_in_column_0_is_defun_start,
b222e415 3215 doc: /* *Non-nil means an open paren in column 0 denotes the start of a defun. */);
f4ed767f
GM
3216 open_paren_in_column_0_is_defun_start = 1;
3217
8489eb67
RS
3218 defsubr (&Ssyntax_table_p);
3219 defsubr (&Ssyntax_table);
3220 defsubr (&Sstandard_syntax_table);
3221 defsubr (&Scopy_syntax_table);
3222 defsubr (&Sset_syntax_table);
3223 defsubr (&Schar_syntax);
beefa22e 3224 defsubr (&Smatching_paren);
c65adb44 3225 defsubr (&Sstring_to_syntax);
8489eb67 3226 defsubr (&Smodify_syntax_entry);
62abe9cb 3227 defsubr (&Sinternal_describe_syntax_value);
8489eb67
RS
3228
3229 defsubr (&Sforward_word);
3230
195d1361
RS
3231 defsubr (&Sskip_chars_forward);
3232 defsubr (&Sskip_chars_backward);
3233 defsubr (&Sskip_syntax_forward);
3234 defsubr (&Sskip_syntax_backward);
3235
b3cfe0c8 3236 defsubr (&Sforward_comment);
8489eb67
RS
3237 defsubr (&Sscan_lists);
3238 defsubr (&Sscan_sexps);
3239 defsubr (&Sbackward_prefix_chars);
3240 defsubr (&Sparse_partial_sexp);
3241}
ab5796a9
MB
3242
3243/* arch-tag: 3e297b9f-088e-4b64-8f4c-fb0b3443e412
3244 (do not change this comment) */