New directory
[bpt/emacs.git] / src / syntax.c
CommitLineData
8489eb67 1/* GNU Emacs routines to deal with syntax tables; also word and list parsing.
b8855607 2 Copyright (C) 1985, 87, 93, 94, 95, 97, 1998, 1999 Free Software Foundation, Inc.
8489eb67
RS
3
4This file is part of GNU Emacs.
5
6GNU Emacs is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
a4275ad1 8the Free Software Foundation; either version 2, or (at your option)
8489eb67
RS
9any later version.
10
11GNU Emacs is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GNU Emacs; see the file COPYING. If not, write to
3b7ad313
EN
18the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19Boston, MA 02111-1307, USA. */
8489eb67
RS
20
21
18160b98 22#include <config.h>
8489eb67
RS
23#include <ctype.h>
24#include "lisp.h"
25#include "commands.h"
26#include "buffer.h"
93da5fff 27#include "charset.h"
e35f6ff7 28#include "keymap.h"
195d1361
RS
29
30/* Make syntax table lookup grant data in gl_state. */
31#define SYNTAX_ENTRY_VIA_PROPERTY
32
8489eb67 33#include "syntax.h"
195d1361
RS
34#include "intervals.h"
35
36/* We use these constants in place for comment-style and
37 string-ender-char to distinguish comments/strings started by
38 comment_fence and string_fence codes. */
39
40#define ST_COMMENT_STYLE (256 + 1)
41#define ST_STRING_STYLE (256 + 2)
93da5fff 42#include "category.h"
8489eb67 43
7bf5e9e4 44Lisp_Object Qsyntax_table_p, Qsyntax_table, Qscan_error;
8489eb67
RS
45
46int words_include_escapes;
195d1361 47int parse_sexp_lookup_properties;
8489eb67 48
bd25db08
KH
49/* Nonzero means `scan-sexps' treat all multibyte characters as symbol. */
50int multibyte_syntax_as_symbol;
51
8ea151b2
RS
52/* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
53 if not compiled with GCC. No need to mark it, since it is used
54 only very temporarily. */
55Lisp_Object syntax_temp;
56
f4ed767f
GM
57/* Non-zero means an open parenthesis in column 0 is always considered
58 to be the start of a defun. Zero means an open parenthesis in
59 column 0 has no special meaning. */
60
61int open_paren_in_column_0_is_defun_start;
62
e5d4f4dc
RS
63/* This is the internal form of the parse state used in parse-partial-sexp. */
64
65struct lisp_parse_state
66 {
195d1361
RS
67 int depth; /* Depth at end of parsing. */
68 int instring; /* -1 if not within string, else desired terminator. */
95ff8dfc 69 int incomment; /* -1 if in unnestable comment else comment nesting */
195d1361 70 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
e5d4f4dc
RS
71 int quoted; /* Nonzero if just after an escape char at end of parsing */
72 int thislevelstart; /* Char number of most recent start-of-expression at current level */
73 int prevlevelstart; /* Char number of start of containing expression */
195d1361 74 int location; /* Char number at which parsing stopped. */
e5d4f4dc 75 int mindepth; /* Minimum depth seen while scanning. */
195d1361 76 int comstr_start; /* Position just after last comment/string starter. */
1a102a58
RS
77 Lisp_Object levelstarts; /* Char numbers of starts-of-expression
78 of levels (starting from outermost). */
e5d4f4dc
RS
79 };
80\f
37bef230
RS
81/* These variables are a cache for finding the start of a defun.
82 find_start_pos is the place for which the defun start was found.
83 find_start_value is the defun start position found for it.
6a140a74 84 find_start_value_byte is the corresponding byte position.
37bef230
RS
85 find_start_buffer is the buffer it was found in.
86 find_start_begv is the BEGV value when it was found.
87 find_start_modiff is the value of MODIFF when it was found. */
88
89static int find_start_pos;
90static int find_start_value;
6a140a74 91static int find_start_value_byte;
37bef230
RS
92static struct buffer *find_start_buffer;
93static int find_start_begv;
94static int find_start_modiff;
6a140a74
RS
95
96
97static int find_defun_start P_ ((int, int));
95ff8dfc 98static int back_comment P_ ((int, int, int, int, int, int *, int *));
6a140a74
RS
99static int char_quoted P_ ((int, int));
100static Lisp_Object skip_chars P_ ((int, int, Lisp_Object, Lisp_Object));
101static Lisp_Object scan_lists P_ ((int, int, int, int));
102static void scan_sexps_forward P_ ((struct lisp_parse_state *,
103 int, int, int, int,
104 int, Lisp_Object, int));
195d1361
RS
105\f
106
107struct gl_state_s gl_state; /* Global state of syntax parser. */
108
109INTERVAL interval_of ();
110#define INTERVALS_AT_ONCE 10 /* 1 + max-number of intervals
111 to scan to property-change. */
112
6a140a74
RS
113/* Update gl_state to an appropriate interval which contains CHARPOS. The
114 sign of COUNT give the relative position of CHARPOS wrt the previously
195d1361 115 valid interval. If INIT, only [be]_property fields of gl_state are
6a140a74 116 valid at start, the rest is filled basing on OBJECT.
195d1361 117
6a140a74 118 `gl_state.*_i' are the intervals, and CHARPOS is further in the search
195d1361
RS
119 direction than the intervals - or in an interval. We update the
120 current syntax-table basing on the property of this interval, and
6a140a74 121 update the interval to start further than CHARPOS - or be
195d1361 122 NULL_INTERVAL. We also update lim_property to be the next value of
6a140a74 123 charpos to call this subroutine again - or be before/after the
195d1361
RS
124 start/end of OBJECT. */
125
126void
6a140a74
RS
127update_syntax_table (charpos, count, init, object)
128 int charpos, count, init;
195d1361
RS
129 Lisp_Object object;
130{
131 Lisp_Object tmp_table;
4ffe723b 132 int cnt = 0, invalidate = 1;
195d1361
RS
133 INTERVAL i, oldi;
134
135 if (init)
136 {
bb0de084 137 gl_state.old_prop = Qnil;
195d1361
RS
138 gl_state.start = gl_state.b_property;
139 gl_state.stop = gl_state.e_property;
bb0de084
SM
140 i = interval_of (charpos, object);
141 gl_state.backward_i = gl_state.forward_i = i;
195d1361
RS
142 invalidate = 0;
143 if (NULL_INTERVAL_P (i))
144 return;
f902a008 145 /* interval_of updates only ->position of the return value, so
d80f4cc7 146 update the parents manually to speed up update_interval. */
bb0de084 147 while (!NULL_PARENT (i))
d80f4cc7
RS
148 {
149 if (AM_RIGHT_CHILD (i))
439d5cb4 150 INTERVAL_PARENT (i)->position = i->position
d80f4cc7 151 - LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */
439d5cb4
KR
152 - TOTAL_LENGTH (INTERVAL_PARENT (i))
153 + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i));
d80f4cc7 154 else
439d5cb4 155 INTERVAL_PARENT (i)->position = i->position - LEFT_TOTAL_LENGTH (i)
d80f4cc7 156 + TOTAL_LENGTH (i);
439d5cb4 157 i = INTERVAL_PARENT (i);
d80f4cc7
RS
158 }
159 i = gl_state.forward_i;
bb0de084 160 gl_state.b_property = i->position - gl_state.offset;
ee0cdb48 161 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361
RS
162 goto update;
163 }
164 oldi = i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
165
423e705d 166 /* We are guaranteed to be called with CHARPOS either in i,
6a140a74 167 or further off. */
195d1361
RS
168 if (NULL_INTERVAL_P (i))
169 error ("Error in syntax_table logic for to-the-end intervals");
6a140a74 170 else if (charpos < i->position) /* Move left. */
195d1361
RS
171 {
172 if (count > 0)
f902a008 173 error ("Error in syntax_table logic for intervals <-");
195d1361 174 /* Update the interval. */
6a140a74 175 i = update_interval (i, charpos);
bb0de084 176 if (INTERVAL_LAST_POS (i) != gl_state.b_property)
195d1361
RS
177 {
178 invalidate = 0;
195d1361 179 gl_state.forward_i = i;
ee0cdb48 180 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361 181 }
423e705d 182 }
6a140a74 183 else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right. */
195d1361
RS
184 {
185 if (count < 0)
f902a008 186 error ("Error in syntax_table logic for intervals ->");
195d1361 187 /* Update the interval. */
6a140a74 188 i = update_interval (i, charpos);
bb0de084 189 if (i->position != gl_state.e_property)
195d1361
RS
190 {
191 invalidate = 0;
195d1361 192 gl_state.backward_i = i;
bb0de084 193 gl_state.b_property = i->position - gl_state.offset;
195d1361
RS
194 }
195 }
195d1361
RS
196
197 update:
198 tmp_table = textget (i->plist, Qsyntax_table);
199
200 if (invalidate)
201 invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
7d0393cf 202
423e705d
SM
203 if (invalidate) /* Did not get to adjacent interval. */
204 { /* with the same table => */
205 /* invalidate the old range. */
195d1361
RS
206 if (count > 0)
207 {
208 gl_state.backward_i = i;
bb0de084
SM
209 gl_state.b_property = i->position - gl_state.offset;
210 }
211 else
195d1361 212 {
bb0de084 213 gl_state.forward_i = i;
ee0cdb48 214 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361
RS
215 }
216 }
37bef230 217
bb0de084 218 if (!EQ (tmp_table, gl_state.old_prop))
195d1361 219 {
bb0de084
SM
220 gl_state.current_syntax_table = tmp_table;
221 gl_state.old_prop = tmp_table;
222 if (EQ (Fsyntax_table_p (tmp_table), Qt))
223 {
224 gl_state.use_global = 0;
7d0393cf 225 }
bb0de084
SM
226 else if (CONSP (tmp_table))
227 {
228 gl_state.use_global = 1;
229 gl_state.global_code = tmp_table;
230 }
7d0393cf 231 else
bb0de084
SM
232 {
233 gl_state.use_global = 0;
234 gl_state.current_syntax_table = current_buffer->syntax_table;
235 }
195d1361
RS
236 }
237
238 while (!NULL_INTERVAL_P (i))
239 {
240 if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
241 {
242 if (count > 0)
bb0de084
SM
243 {
244 gl_state.e_property = i->position - gl_state.offset;
245 gl_state.forward_i = i;
246 }
247 else
248 {
249 gl_state.b_property = i->position + LENGTH (i) - gl_state.offset;
250 gl_state.backward_i = i;
251 }
252 return;
195d1361 253 }
7d0393cf 254 else if (cnt == INTERVALS_AT_ONCE)
195d1361
RS
255 {
256 if (count > 0)
bb0de084
SM
257 {
258 gl_state.e_property = i->position + LENGTH (i) - gl_state.offset;
259 gl_state.forward_i = i;
260 }
261 else
262 {
263 gl_state.b_property = i->position - gl_state.offset;
264 gl_state.backward_i = i;
265 }
266 return;
195d1361
RS
267 }
268 cnt++;
269 i = count > 0 ? next_interval (i) : previous_interval (i);
270 }
bb0de084
SM
271 eassert (NULL_INTERVAL_P (i)); /* This property goes to the end. */
272 if (count > 0)
273 gl_state.e_property = gl_state.stop;
274 else
275 gl_state.b_property = gl_state.start;
195d1361
RS
276}
277\f
6a140a74
RS
278/* Returns TRUE if char at CHARPOS is quoted.
279 Global syntax-table data should be set up already to be good at CHARPOS
280 or after. On return global syntax data is good for lookup at CHARPOS. */
195d1361
RS
281
282static int
6a140a74
RS
283char_quoted (charpos, bytepos)
284 register int charpos, bytepos;
195d1361
RS
285{
286 register enum syntaxcode code;
287 register int beg = BEGV;
288 register int quoted = 0;
6a140a74
RS
289 int orig = charpos;
290
291 DEC_BOTH (charpos, bytepos);
195d1361 292
6a140a74 293 while (bytepos >= beg)
195d1361 294 {
6a140a74
RS
295 UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
296 code = SYNTAX (FETCH_CHAR (bytepos));
297 if (! (code == Scharquote || code == Sescape))
298 break;
299
300 DEC_BOTH (charpos, bytepos);
301 quoted = !quoted;
195d1361 302 }
6a140a74
RS
303
304 UPDATE_SYNTAX_TABLE (orig);
195d1361
RS
305 return quoted;
306}
6a140a74
RS
307
308/* Return the bytepos one character after BYTEPOS.
309 We assume that BYTEPOS is not at the end of the buffer. */
310
311INLINE int
312inc_bytepos (bytepos)
313 int bytepos;
314{
ef316cf0
RS
315 if (NILP (current_buffer->enable_multibyte_characters))
316 return bytepos + 1;
317
6a140a74
RS
318 INC_POS (bytepos);
319 return bytepos;
320}
321
322/* Return the bytepos one character before BYTEPOS.
323 We assume that BYTEPOS is not at the start of the buffer. */
324
325INLINE int
326dec_bytepos (bytepos)
327 int bytepos;
328{
ef316cf0
RS
329 if (NILP (current_buffer->enable_multibyte_characters))
330 return bytepos - 1;
331
6a140a74
RS
332 DEC_POS (bytepos);
333 return bytepos;
334}
195d1361 335\f
7cc80f0a
RS
336/* Return a defun-start position before before POS and not too far before.
337 It should be the last one before POS, or nearly the last.
338
339 When open_paren_in_column_0_is_defun_start is nonzero,
1fd1cc2f 340 only the beginning of the buffer is treated as a defun-start.
7cc80f0a
RS
341
342 We record the information about where the scan started
343 and what its result was, so that another call in the same area
344 can return the same value very quickly.
195d1361
RS
345
346 There is no promise at which position the global syntax data is
347 valid on return from the subroutine, so the caller should explicitly
348 update the global data. */
37bef230
RS
349
350static int
6a140a74
RS
351find_defun_start (pos, pos_byte)
352 int pos, pos_byte;
37bef230 353{
6a140a74 354 int opoint = PT, opoint_byte = PT_BYTE;
37bef230 355
1fd1cc2f
RS
356 if (!open_paren_in_column_0_is_defun_start)
357 {
358 find_start_value_byte = BEGV_BYTE;
359 return BEGV;
360 }
361
37bef230
RS
362 /* Use previous finding, if it's valid and applies to this inquiry. */
363 if (current_buffer == find_start_buffer
364 /* Reuse the defun-start even if POS is a little farther on.
365 POS might be in the next defun, but that's ok.
366 Our value may not be the best possible, but will still be usable. */
367 && pos <= find_start_pos + 1000
368 && pos >= find_start_value
369 && BEGV == find_start_begv
370 && MODIFF == find_start_modiff)
371 return find_start_value;
372
373 /* Back up to start of line. */
6a140a74 374 scan_newline (pos, pos_byte, BEGV, BEGV_BYTE, -1, 1);
37bef230 375
195d1361
RS
376 /* We optimize syntax-table lookup for rare updates. Thus we accept
377 only those `^\s(' which are good in global _and_ text-property
378 syntax-tables. */
379 gl_state.current_syntax_table = current_buffer->syntax_table;
380 gl_state.use_global = 0;
1fd1cc2f 381 while (PT > BEGV)
37bef230 382 {
1fd1cc2f
RS
383 /* Open-paren at start of line means we may have found our
384 defun-start. */
385 if (SYNTAX (FETCH_CHAR (PT_BYTE)) == Sopen)
195d1361 386 {
1fd1cc2f 387 SETUP_SYNTAX_TABLE (PT + 1, -1); /* Try again... */
4467eb94 388 if (SYNTAX (FETCH_CHAR (PT_BYTE)) == Sopen)
1fd1cc2f
RS
389 break;
390 /* Now fallback to the default value. */
391 gl_state.current_syntax_table = current_buffer->syntax_table;
392 gl_state.use_global = 0;
195d1361 393 }
1fd1cc2f
RS
394 /* Move to beg of previous line. */
395 scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
37bef230
RS
396 }
397
398 /* Record what we found, for the next try. */
6a140a74
RS
399 find_start_value = PT;
400 find_start_value_byte = PT_BYTE;
37bef230
RS
401 find_start_buffer = current_buffer;
402 find_start_modiff = MODIFF;
403 find_start_begv = BEGV;
404 find_start_pos = pos;
405
6a140a74
RS
406 TEMP_SET_PT_BOTH (opoint, opoint_byte);
407
37bef230
RS
408 return find_start_value;
409}
410\f
f902a008
RS
411/* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE. */
412
413static int
414prev_char_comend_first (pos, pos_byte)
415 int pos, pos_byte;
416{
417 int c, val;
418
419 DEC_BOTH (pos, pos_byte);
420 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
421 c = FETCH_CHAR (pos_byte);
422 val = SYNTAX_COMEND_FIRST (c);
423 UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
424 return val;
425}
426
427/* Return the SYNTAX_COMSTART_FIRST of the character before POS, POS_BYTE. */
428
3f679f55
SM
429/* static int
430 * prev_char_comstart_first (pos, pos_byte)
431 * int pos, pos_byte;
432 * {
433 * int c, val;
7d0393cf 434 *
3f679f55
SM
435 * DEC_BOTH (pos, pos_byte);
436 * UPDATE_SYNTAX_TABLE_BACKWARD (pos);
437 * c = FETCH_CHAR (pos_byte);
438 * val = SYNTAX_COMSTART_FIRST (c);
439 * UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
440 * return val;
441 * } */
f902a008 442
6a140a74
RS
443/* Checks whether charpos FROM is at the end of a comment.
444 FROM_BYTE is the bytepos corresponding to FROM.
445 Do not move back before STOP.
446
447 Return a positive value if we find a comment ending at FROM/FROM_BYTE;
448 return -1 otherwise.
449
450 If successful, store the charpos of the comment's beginning
451 into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
527a32d9
KH
452
453 Global syntax data remains valid for backward search starting at
454 the returned value (or at FROM, if the search was not successful). */
195d1361
RS
455
456static int
95ff8dfc 457back_comment (from, from_byte, stop, comnested, comstyle, charpos_ptr, bytepos_ptr)
6a140a74 458 int from, from_byte, stop;
95ff8dfc 459 int comnested, comstyle;
6a140a74 460 int *charpos_ptr, *bytepos_ptr;
195d1361
RS
461{
462 /* Look back, counting the parity of string-quotes,
463 and recording the comment-starters seen.
464 When we reach a safe place, assume that's not in a string;
465 then step the main scan to the earliest comment-starter seen
466 an even number of string quotes away from the safe place.
467
468 OFROM[I] is position of the earliest comment-starter seen
469 which is I+2X quotes from the comment-end.
470 PARITY is current parity of quotes from the comment end. */
3ee5041c 471 int string_style = -1; /* Presumed outside of any string. */
195d1361 472 int string_lossage = 0;
f7c436c1 473 /* Not a real lossage: indicates that we have passed a matching comment
7d0393cf 474 starter plus a non-matching comment-ender, meaning that any matching
f7c436c1
SM
475 comment-starter we might see later could be a false positive (hidden
476 inside another comment).
477 Test case: { a (* b } c (* d *) */
478 int comment_lossage = 0;
195d1361 479 int comment_end = from;
6a140a74 480 int comment_end_byte = from_byte;
195d1361 481 int comstart_pos = 0;
6a140a74 482 int comstart_byte;
eb35b628
RS
483 /* Place where the containing defun starts,
484 or 0 if we didn't come across it yet. */
485 int defun_start = 0;
486 int defun_start_byte = 0;
195d1361 487 register enum syntaxcode code;
95ff8dfc 488 int nesting = 1; /* current comment nesting */
ea315ed6 489 int c;
3f679f55
SM
490 int syntax = 0;
491
492 /* FIXME: A }} comment-ender style leads to incorrect behavior
493 in the case of {{ c }}} because we ignore the last two chars which are
494 assumed to be comment-enders although they aren't. */
195d1361
RS
495
496 /* At beginning of range to scan, we're outside of strings;
497 that determines quote parity to the comment-end. */
498 while (from != stop)
499 {
3f679f55
SM
500 int temp_byte, prev_syntax;
501 int com2start, com2end;
6a140a74 502
195d1361 503 /* Move back and examine a character. */
6a140a74 504 DEC_BOTH (from, from_byte);
195d1361
RS
505 UPDATE_SYNTAX_TABLE_BACKWARD (from);
506
3f679f55 507 prev_syntax = syntax;
6a140a74 508 c = FETCH_CHAR (from_byte);
3f679f55 509 syntax = SYNTAX_WITH_FLAGS (c);
195d1361
RS
510 code = SYNTAX (c);
511
3f679f55
SM
512 /* Check for 2-char comment markers. */
513 com2start = (SYNTAX_FLAGS_COMSTART_FIRST (syntax)
514 && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax)
515 && comstyle == SYNTAX_FLAGS_COMMENT_STYLE (prev_syntax)
516 && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax)
517 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested);
518 com2end = (SYNTAX_FLAGS_COMEND_FIRST (syntax)
519 && SYNTAX_FLAGS_COMEND_SECOND (prev_syntax));
520
521 /* Nasty cases with overlapping 2-char comment markers:
522 - snmp-mode: -- c -- foo -- c --
523 --- c --
524 ------ c --
525 - c-mode: *||*
526 |* *|* *|
527 |*| |* |*|
528 /// */
529
530 /* If a 2-char comment sequence partly overlaps with another,
531 we don't try to be clever. */
532 if (from > stop && (com2end || com2start))
195d1361 533 {
3f679f55
SM
534 int next = from, next_byte = from_byte, next_c, next_syntax;
535 DEC_BOTH (next, next_byte);
536 UPDATE_SYNTAX_TABLE_BACKWARD (next);
537 next_c = FETCH_CHAR (next_byte);
538 next_syntax = SYNTAX_WITH_FLAGS (next_c);
539 if (((com2start || comnested)
540 && SYNTAX_FLAGS_COMEND_SECOND (syntax)
541 && SYNTAX_FLAGS_COMEND_FIRST (next_syntax))
542 || ((com2end || comnested)
543 && SYNTAX_FLAGS_COMSTART_SECOND (syntax)
544 && comstyle == SYNTAX_FLAGS_COMMENT_STYLE (syntax)
545 && SYNTAX_FLAGS_COMSTART_FIRST (next_syntax)))
546 goto lossage;
547 /* UPDATE_SYNTAX_TABLE_FORWARD (next + 1); */
f902a008 548 }
3f679f55
SM
549
550 if (com2start && comstart_pos == 0)
551 /* We're looking at a comment starter. But it might be a comment
552 ender as well (see snmp-mode). The first time we see one, we
553 need to consider it as a comment starter,
554 and the subsequent times as a comment ender. */
555 com2end = 0;
556
557 /* Turn a 2-char comment sequences into the appropriate syntax. */
558 if (com2end)
559 code = Sendcomment;
560 else if (com2start)
561 code = Scomment;
562 /* Ignore comment starters of a different style. */
563 else if (code == Scomment
564 && (comstyle != SYNTAX_FLAGS_COMMENT_STYLE (syntax)
565 || SYNTAX_FLAGS_COMMENT_NESTED (syntax) != comnested))
1aa963c8 566 continue;
195d1361 567
9828a477
KH
568 /* Ignore escaped characters, except comment-enders. */
569 if (code != Sendcomment && char_quoted (from, from_byte))
195d1361
RS
570 continue;
571
3ee5041c 572 switch (code)
195d1361 573 {
3ee5041c
SM
574 case Sstring_fence:
575 case Scomment_fence:
576 c = (code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE);
577 case Sstring:
578 /* Track parity of quotes. */
579 if (string_style == -1)
580 /* Entering a string. */
581 string_style = c;
582 else if (string_style == c)
583 /* Leaving the string. */
584 string_style = -1;
585 else
586 /* If we have two kinds of string delimiters.
587 There's no way to grok this scanning backwards. */
195d1361 588 string_lossage = 1;
3ee5041c 589 break;
7d0393cf 590
3ee5041c
SM
591 case Scomment:
592 /* We've already checked that it is the relevant comstyle. */
f7c436c1 593 if (string_style != -1 || comment_lossage || string_lossage)
3ee5041c
SM
594 /* There are odd string quotes involved, so let's be careful.
595 Test case in Pascal: " { " a { " } */
596 goto lossage;
597
f7c436c1
SM
598 if (!comnested)
599 {
600 /* Record best comment-starter so far. */
601 comstart_pos = from;
602 comstart_byte = from_byte;
603 }
604 else if (--nesting <= 0)
95ff8dfc
RS
605 /* nested comments have to be balanced, so we don't need to
606 keep looking for earlier ones. We use here the same (slightly
607 incorrect) reasoning as below: since it is followed by uniform
608 paired string quotes, this comment-start has to be outside of
609 strings, else the comment-end itself would be inside a string. */
610 goto done;
3ee5041c
SM
611 break;
612
02010917 613 case Sendcomment:
3f679f55 614 if (SYNTAX_FLAGS_COMMENT_STYLE (syntax) == comstyle
d070eb22 615 && ((com2end && SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax))
3f679f55 616 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested)
02010917
SM
617 /* This is the same style of comment ender as ours. */
618 {
619 if (comnested)
620 nesting++;
621 else
622 /* Anything before that can't count because it would match
623 this comment-ender rather than ours. */
624 from = stop; /* Break out of the loop. */
625 }
f7c436c1
SM
626 else if (comstart_pos != 0 || c != '\n')
627 /* We're mixing comment styles here, so we'd better be careful.
628 The (comstart_pos != 0 || c != '\n') check is not quite correct
629 (we should just always set comment_lossage), but removing it
630 would imply that any multiline comment in C would go through
631 lossage, which seems overkill.
632 The failure should only happen in the rare cases such as
633 { (* } *) */
634 comment_lossage = 1;
02010917 635 break;
195d1361 636
02010917
SM
637 case Sopen:
638 /* Assume a defun-start point is outside of strings. */
639 if (open_paren_in_column_0_is_defun_start
640 && (from == stop
641 || (temp_byte = dec_bytepos (from_byte),
642 FETCH_CHAR (temp_byte) == '\n')))
643 {
644 defun_start = from;
645 defun_start_byte = from_byte;
646 from = stop; /* Break out of the loop. */
647 }
eb35b628 648 break;
02010917
SM
649
650 default:
f7c436c1 651 break;
eb35b628 652 }
195d1361
RS
653 }
654
655 if (comstart_pos == 0)
656 {
657 from = comment_end;
6a140a74 658 from_byte = comment_end_byte;
195d1361
RS
659 UPDATE_SYNTAX_TABLE_FORWARD (comment_end - 1);
660 }
f7c436c1
SM
661 /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
662 or `done'), then we've found the beginning of the non-nested comment. */
663 else if (1) /* !comnested */
195d1361
RS
664 {
665 from = comstart_pos;
6a140a74 666 from_byte = comstart_byte;
195d1361
RS
667 /* Globals are correct now. */
668 }
669 else
670 {
3ee5041c
SM
671 struct lisp_parse_state state;
672 lossage:
195d1361
RS
673 /* We had two kinds of string delimiters mixed up
674 together. Decode this going forwards.
02010917 675 Scan fwd from a known safe place (beginning-of-defun)
195d1361
RS
676 to the one in question; this records where we
677 last passed a comment starter. */
eb35b628
RS
678 /* If we did not already find the defun start, find it now. */
679 if (defun_start == 0)
680 {
681 defun_start = find_defun_start (comment_end, comment_end_byte);
682 defun_start_byte = find_start_value_byte;
683 }
02010917 684 do
195d1361 685 {
02010917
SM
686 scan_sexps_forward (&state,
687 defun_start, defun_start_byte,
688 comment_end, -10000, 0, Qnil, 0);
689 defun_start = comment_end;
690 if (state.incomment == (comnested ? 1 : -1)
691 && state.comstyle == comstyle)
692 from = state.comstr_start;
693 else
694 {
695 from = comment_end;
696 if (state.incomment)
697 /* If comment_end is inside some other comment, maybe ours
698 is nested, so we need to try again from within the
699 surrounding comment. Example: { a (* " *) */
700 {
701 /* FIXME: We should advance by one or two chars. */
702 defun_start = state.comstr_start + 2;
703 defun_start_byte = CHAR_TO_BYTE (defun_start);
704 }
705 }
706 } while (defun_start < comment_end);
707
6a140a74 708 from_byte = CHAR_TO_BYTE (from);
195d1361
RS
709 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
710 }
7d0393cf 711
95ff8dfc 712 done:
6a140a74
RS
713 *charpos_ptr = from;
714 *bytepos_ptr = from_byte;
715
1aa963c8 716 return (from == comment_end) ? -1 : from;
195d1361
RS
717}
718\f
8489eb67 719DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
fdb82f93
PJ
720 doc: /* Return t if OBJECT is a syntax table.
721Currently, any char-table counts as a syntax table. */)
722 (object)
2203e1e8 723 Lisp_Object object;
8489eb67 724{
2203e1e8 725 if (CHAR_TABLE_P (object)
e704cb4b 726 && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
8489eb67
RS
727 return Qt;
728 return Qnil;
729}
730
8ea151b2 731static void
8489eb67
RS
732check_syntax_table (obj)
733 Lisp_Object obj;
734{
d1be9f0f 735 if (!(CHAR_TABLE_P (obj)
e704cb4b 736 && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table)))
d1be9f0f 737 wrong_type_argument (Qsyntax_table_p, obj);
7d0393cf 738}
8489eb67 739
8489eb67 740DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
fdb82f93
PJ
741 doc: /* Return the current syntax table.
742This is the one specified by the current buffer. */)
743 ()
8489eb67
RS
744{
745 return current_buffer->syntax_table;
746}
747
748DEFUN ("standard-syntax-table", Fstandard_syntax_table,
749 Sstandard_syntax_table, 0, 0, 0,
fdb82f93
PJ
750 doc: /* Return the standard syntax table.
751This is the one used for new buffers. */)
752 ()
8489eb67
RS
753{
754 return Vstandard_syntax_table;
755}
756
757DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
fdb82f93
PJ
758 doc: /* Construct a new syntax table and return it.
759It is a copy of the TABLE, which defaults to the standard syntax table. */)
760 (table)
8489eb67
RS
761 Lisp_Object table;
762{
8ea151b2
RS
763 Lisp_Object copy;
764
265a9e55 765 if (!NILP (table))
8ea151b2
RS
766 check_syntax_table (table);
767 else
768 table = Vstandard_syntax_table;
769
770 copy = Fcopy_sequence (table);
0f867324
RS
771
772 /* Only the standard syntax table should have a default element.
773 Other syntax tables should inherit from parents instead. */
774 XCHAR_TABLE (copy)->defalt = Qnil;
775
776 /* Copied syntax tables should all have parents.
777 If we copied one with no parent, such as the standard syntax table,
778 use the standard syntax table as the copy's parent. */
779 if (NILP (XCHAR_TABLE (copy)->parent))
780 Fset_char_table_parent (copy, Vstandard_syntax_table);
8ea151b2 781 return copy;
8489eb67
RS
782}
783
784DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
fdb82f93
PJ
785 doc: /* Select a new syntax table for the current buffer.
786One argument, a syntax table. */)
787 (table)
8489eb67
RS
788 Lisp_Object table;
789{
4e4dfa78 790 int idx;
8ea151b2 791 check_syntax_table (table);
8489eb67
RS
792 current_buffer->syntax_table = table;
793 /* Indicate that this buffer now has a specified syntax table. */
f6cd0527
GM
794 idx = PER_BUFFER_VAR_IDX (syntax_table);
795 SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
8489eb67
RS
796 return table;
797}
798\f
799/* Convert a letter which signifies a syntax code
800 into the code it signifies.
195d1361 801 This is used by modify-syntax-entry, and other things. */
8489eb67
RS
802
803unsigned char syntax_spec_code[0400] =
804 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
805 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
806 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
807 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
195d1361 808 (char) Swhitespace, (char) Scomment_fence, (char) Sstring, 0377,
8489eb67
RS
809 (char) Smath, 0377, 0377, (char) Squote,
810 (char) Sopen, (char) Sclose, 0377, 0377,
811 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
812 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
813 0377, 0377, 0377, 0377,
814 (char) Scomment, 0377, (char) Sendcomment, 0377,
6cb71bf6 815 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
8489eb67
RS
816 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
817 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
818 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
819 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
820 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
821 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
195d1361 822 0377, 0377, 0377, 0377, (char) Sstring_fence, 0377, 0377, 0377
8489eb67
RS
823 };
824
195d1361 825/* Indexed by syntax code, give the letter that describes it. */
8489eb67 826
195d1361 827char syntax_code_spec[16] =
8489eb67 828 {
195d1361
RS
829 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
830 '!', '|'
8489eb67 831 };
93da5fff
KH
832
833/* Indexed by syntax code, give the object (cons of syntax code and
834 nil) to be stored in syntax table. Since these objects can be
835 shared among syntax tables, we generate them in advance. By
836 sharing objects, the function `describe-syntax' can give a more
837 compact listing. */
838static Lisp_Object Vsyntax_code_object;
839
8489eb67 840\f
8ea151b2
RS
841/* Look up the value for CHARACTER in syntax table TABLE's parent
842 and its parents. SYNTAX_ENTRY calls this, when TABLE itself has nil
843 for CHARACTER. It's actually used only when not compiled with GCC. */
844
845Lisp_Object
846syntax_parent_lookup (table, character)
847 Lisp_Object table;
848 int character;
849{
850 Lisp_Object value;
851
852 while (1)
853 {
854 table = XCHAR_TABLE (table)->parent;
855 if (NILP (table))
856 return Qnil;
857
858 value = XCHAR_TABLE (table)->contents[character];
859 if (!NILP (value))
860 return value;
861 }
862}
863
8489eb67 864DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
fdb82f93
PJ
865 doc: /* Return the syntax code of CHARACTER, described by a character.
866For example, if CHARACTER is a word constituent,
867the character `w' is returned.
868The characters that correspond to various syntax codes
869are listed in the documentation of `modify-syntax-entry'. */)
870 (character)
2203e1e8 871 Lisp_Object character;
8489eb67 872{
8ea151b2 873 int char_int;
195d1361
RS
874 gl_state.current_syntax_table = current_buffer->syntax_table;
875
876 gl_state.use_global = 0;
b7826503 877 CHECK_NUMBER (character);
2203e1e8 878 char_int = XINT (character);
8ea151b2 879 return make_number (syntax_code_spec[(int) SYNTAX (char_int)]);
beefa22e
RS
880}
881
882DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
fdb82f93
PJ
883 doc: /* Return the matching parenthesis of CHARACTER, or nil if none. */)
884 (character)
2203e1e8 885 Lisp_Object character;
beefa22e 886{
8ea151b2 887 int char_int, code;
195d1361
RS
888 gl_state.current_syntax_table = current_buffer->syntax_table;
889 gl_state.use_global = 0;
b7826503 890 CHECK_NUMBER (character);
2203e1e8 891 char_int = XINT (character);
8ea151b2 892 code = SYNTAX (char_int);
a8bd7cd8 893 if (code == Sopen || code == Sclose)
2e34157c 894 return SYNTAX_MATCH (char_int);
beefa22e 895 return Qnil;
8489eb67
RS
896}
897
c65adb44 898DEFUN ("string-to-syntax", Fstring_to_syntax, Sstring_to_syntax, 1, 1, 0,
fdb82f93
PJ
899 doc: /* Convert a syntax specification STRING into syntax cell form.
900STRING should be a string as it is allowed as argument of
901`modify-syntax-entry'. Value is the equivalent cons cell
902(CODE . MATCHING-CHAR) that can be used as value of a `syntax-table'
903text property. */)
904 (string)
c65adb44 905 Lisp_Object string;
8489eb67 906{
2e567bd3 907 register const unsigned char *p;
8489eb67 908 register enum syntaxcode code;
247e20a8 909 int val;
8ea151b2 910 Lisp_Object match;
8489eb67 911
b7826503 912 CHECK_STRING (string);
8489eb67 913
d5db4077 914 p = SDATA (string);
8489eb67
RS
915 code = (enum syntaxcode) syntax_spec_code[*p++];
916 if (((int) code & 0377) == 0377)
2e34157c 917 error ("invalid syntax description letter: %c", p[-1]);
8489eb67 918
8ea151b2 919 if (code == Sinherit)
c65adb44 920 return Qnil;
8ea151b2
RS
921
922 if (*p)
d1be9f0f 923 {
93da5fff 924 int len;
7c19e8e1 925 int character = (STRING_CHAR_AND_LENGTH
d5db4077 926 (p, SBYTES (string) - 1, len));
93da5fff 927 XSETINT (match, character);
d1be9f0f
RS
928 if (XFASTINT (match) == ' ')
929 match = Qnil;
93da5fff 930 p += len;
d1be9f0f
RS
931 }
932 else
8ea151b2 933 match = Qnil;
8489eb67 934
8ea151b2 935 val = (int) code;
8489eb67
RS
936 while (*p)
937 switch (*p++)
938 {
939 case '1':
247e20a8 940 val |= 1 << 16;
8489eb67
RS
941 break;
942
943 case '2':
247e20a8 944 val |= 1 << 17;
8489eb67
RS
945 break;
946
947 case '3':
247e20a8 948 val |= 1 << 18;
8489eb67
RS
949 break;
950
951 case '4':
247e20a8 952 val |= 1 << 19;
8489eb67
RS
953 break;
954
955 case 'p':
247e20a8 956 val |= 1 << 20;
8489eb67 957 break;
e5d4f4dc
RS
958
959 case 'b':
247e20a8 960 val |= 1 << 21;
e5d4f4dc 961 break;
95ff8dfc
RS
962
963 case 'n':
964 val |= 1 << 22;
965 break;
8489eb67 966 }
7d0393cf 967
93da5fff 968 if (val < XVECTOR (Vsyntax_code_object)->size && NILP (match))
c65adb44 969 return XVECTOR (Vsyntax_code_object)->contents[val];
93da5fff
KH
970 else
971 /* Since we can't use a shared object, let's make a new one. */
c65adb44
SM
972 return Fcons (make_number (val), match);
973}
974
fdb82f93 975/* I really don't know why this is interactive
2c9e1900 976 help-form should at least be made useful whilst reading the second arg. */
7d0393cf 977DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
c65adb44 978 "cSet syntax for character: \nsSet syntax for %s to: ",
a50a10a0 979 doc: /* Set syntax for character CHAR according to string NEWENTRY.
fdb82f93
PJ
980The syntax is changed only for table SYNTAX_TABLE, which defaults to
981 the current buffer's syntax table.
982The first character of NEWENTRY should be one of the following:
983 Space or - whitespace syntax. w word constituent.
984 _ symbol constituent. . punctuation.
985 ( open-parenthesis. ) close-parenthesis.
986 " string quote. \\ escape.
987 $ paired delimiter. ' expression quote or prefix operator.
988 < comment starter. > comment ender.
989 / character-quote. @ inherit from `standard-syntax-table'.
990 | generic string fence. ! generic comment fence.
991
992Only single-character comment start and end sequences are represented thus.
993Two-character sequences are represented as described below.
994The second character of NEWENTRY is the matching parenthesis,
995 used only if the first character is `(' or `)'.
996Any additional characters are flags.
997Defined flags are the characters 1, 2, 3, 4, b, p, and n.
a50a10a0
PJ
998 1 means CHAR is the start of a two-char comment start sequence.
999 2 means CHAR is the second character of such a sequence.
1000 3 means CHAR is the start of a two-char comment end sequence.
1001 4 means CHAR is the second character of such a sequence.
fdb82f93
PJ
1002
1003There can be up to two orthogonal comment sequences. This is to support
1004language modes such as C++. By default, all comment sequences are of style
1005a, but you can set the comment sequence style to b (on the second character
1006of a comment-start, or the first character of a comment-end sequence) using
1007this flag:
a50a10a0
PJ
1008 b means CHAR is part of comment sequence b.
1009 n means CHAR is part of a nestable comment sequence.
fdb82f93 1010
a50a10a0 1011 p means CHAR is a prefix character for `backward-prefix-chars';
fdb82f93 1012 such characters are treated as whitespace when they occur
a50a10a0
PJ
1013 between expressions.
1014usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE) */)
fdb82f93 1015 (c, newentry, syntax_table)
c65adb44
SM
1016 Lisp_Object c, newentry, syntax_table;
1017{
b7826503 1018 CHECK_NUMBER (c);
c65adb44
SM
1019
1020 if (NILP (syntax_table))
1021 syntax_table = current_buffer->syntax_table;
1022 else
1023 check_syntax_table (syntax_table);
8489eb67 1024
c65adb44 1025 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), Fstring_to_syntax (newentry));
8489eb67
RS
1026 return Qnil;
1027}
1028\f
1029/* Dump syntax table to buffer in human-readable format */
1030
62abe9cb
SM
1031DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value,
1032 Sinternal_describe_syntax_value, 1, 1, 0,
1033 doc: /* Insert a description of the internal syntax description SYNTAX at point. */)
1034 (syntax)
1035 Lisp_Object syntax;
8489eb67
RS
1036{
1037 register enum syntaxcode code;
e6365855 1038 char desc, start1, start2, end1, end2, prefix, comstyle, comnested;
8489eb67 1039 char str[2];
62abe9cb 1040 Lisp_Object first, match_lisp, value = syntax;
8489eb67 1041
8ea151b2
RS
1042 if (NILP (value))
1043 {
62abe9cb
SM
1044 insert_string ("default");
1045 return syntax;
8ea151b2
RS
1046 }
1047
908b7fea
KH
1048 if (CHAR_TABLE_P (value))
1049 {
62abe9cb
SM
1050 insert_string ("deeper char-table ...");
1051 return syntax;
908b7fea
KH
1052 }
1053
8ea151b2
RS
1054 if (!CONSP (value))
1055 {
62abe9cb
SM
1056 insert_string ("invalid");
1057 return syntax;
8ea151b2
RS
1058 }
1059
c1d497be
KR
1060 first = XCAR (value);
1061 match_lisp = XCDR (value);
8ea151b2
RS
1062
1063 if (!INTEGERP (first) || !(NILP (match_lisp) || INTEGERP (match_lisp)))
8489eb67 1064 {
62abe9cb
SM
1065 insert_string ("invalid");
1066 return syntax;
8489eb67
RS
1067 }
1068
e704cb4b 1069 code = (enum syntaxcode) (XINT (first) & 0377);
8ea151b2
RS
1070 start1 = (XINT (first) >> 16) & 1;
1071 start2 = (XINT (first) >> 17) & 1;
1072 end1 = (XINT (first) >> 18) & 1;
1073 end2 = (XINT (first) >> 19) & 1;
1074 prefix = (XINT (first) >> 20) & 1;
1075 comstyle = (XINT (first) >> 21) & 1;
e6365855 1076 comnested = (XINT (first) >> 22) & 1;
8489eb67
RS
1077
1078 if ((int) code < 0 || (int) code >= (int) Smax)
1079 {
1080 insert_string ("invalid");
62abe9cb 1081 return syntax;
8489eb67
RS
1082 }
1083 desc = syntax_code_spec[(int) code];
1084
1085 str[0] = desc, str[1] = 0;
1086 insert (str, 1);
1087
93da5fff
KH
1088 if (NILP (match_lisp))
1089 insert (" ", 1);
1090 else
1091 insert_char (XINT (match_lisp));
8489eb67 1092
8489eb67
RS
1093 if (start1)
1094 insert ("1", 1);
1095 if (start2)
1096 insert ("2", 1);
1097
1098 if (end1)
1099 insert ("3", 1);
1100 if (end2)
1101 insert ("4", 1);
1102
1103 if (prefix)
1104 insert ("p", 1);
e5d4f4dc
RS
1105 if (comstyle)
1106 insert ("b", 1);
e6365855
SM
1107 if (comnested)
1108 insert ("n", 1);
8489eb67
RS
1109
1110 insert_string ("\twhich means: ");
1111
0220c518 1112 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
1113 {
1114 case Swhitespace:
1115 insert_string ("whitespace"); break;
1116 case Spunct:
1117 insert_string ("punctuation"); break;
1118 case Sword:
1119 insert_string ("word"); break;
1120 case Ssymbol:
1121 insert_string ("symbol"); break;
1122 case Sopen:
1123 insert_string ("open"); break;
1124 case Sclose:
1125 insert_string ("close"); break;
1126 case Squote:
d671382e 1127 insert_string ("prefix"); break;
8489eb67
RS
1128 case Sstring:
1129 insert_string ("string"); break;
1130 case Smath:
1131 insert_string ("math"); break;
1132 case Sescape:
1133 insert_string ("escape"); break;
1134 case Scharquote:
1135 insert_string ("charquote"); break;
1136 case Scomment:
1137 insert_string ("comment"); break;
1138 case Sendcomment:
1139 insert_string ("endcomment"); break;
d671382e
SM
1140 case Sinherit:
1141 insert_string ("inherit"); break;
1142 case Scomment_fence:
1143 insert_string ("comment fence"); break;
1144 case Sstring_fence:
1145 insert_string ("string fence"); break;
8489eb67
RS
1146 default:
1147 insert_string ("invalid");
62abe9cb 1148 return syntax;
8489eb67
RS
1149 }
1150
8ea151b2 1151 if (!NILP (match_lisp))
8489eb67
RS
1152 {
1153 insert_string (", matches ");
8ea151b2 1154 insert_char (XINT (match_lisp));
8489eb67
RS
1155 }
1156
1157 if (start1)
1158 insert_string (",\n\t is the first character of a comment-start sequence");
1159 if (start2)
1160 insert_string (",\n\t is the second character of a comment-start sequence");
1161
1162 if (end1)
1163 insert_string (",\n\t is the first character of a comment-end sequence");
1164 if (end2)
1165 insert_string (",\n\t is the second character of a comment-end sequence");
e5d4f4dc
RS
1166 if (comstyle)
1167 insert_string (" (comment style b)");
e6365855
SM
1168 if (comnested)
1169 insert_string (" (nestable)");
e5d4f4dc 1170
8489eb67
RS
1171 if (prefix)
1172 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
1173
62abe9cb
SM
1174 return syntax;
1175}
8489eb67 1176\f
195d1361
RS
1177int parse_sexp_ignore_comments;
1178
8489eb67
RS
1179/* Return the position across COUNT words from FROM.
1180 If that many words cannot be found before the end of the buffer, return 0.
1181 COUNT negative means scan backward and stop at word beginning. */
1182
dfcf069d 1183int
8489eb67
RS
1184scan_words (from, count)
1185 register int from, count;
1186{
1187 register int beg = BEGV;
1188 register int end = ZV;
6a140a74 1189 register int from_byte = CHAR_TO_BYTE (from);
93da5fff
KH
1190 register enum syntaxcode code;
1191 int ch0, ch1;
8489eb67
RS
1192
1193 immediate_quit = 1;
1194 QUIT;
1195
195d1361
RS
1196 SETUP_SYNTAX_TABLE (from, count);
1197
8489eb67
RS
1198 while (count > 0)
1199 {
1200 while (1)
1201 {
1202 if (from == end)
1203 {
1204 immediate_quit = 0;
1205 return 0;
1206 }
195d1361 1207 UPDATE_SYNTAX_TABLE_FORWARD (from);
6a140a74 1208 ch0 = FETCH_CHAR (from_byte);
93da5fff 1209 code = SYNTAX (ch0);
6a140a74 1210 INC_BOTH (from, from_byte);
8489eb67
RS
1211 if (words_include_escapes
1212 && (code == Sescape || code == Scharquote))
1213 break;
1214 if (code == Sword)
1215 break;
8489eb67 1216 }
93da5fff
KH
1217 /* Now CH0 is a character which begins a word and FROM is the
1218 position of the next character. */
8489eb67
RS
1219 while (1)
1220 {
1221 if (from == end) break;
195d1361 1222 UPDATE_SYNTAX_TABLE_FORWARD (from);
6a140a74 1223 ch1 = FETCH_CHAR (from_byte);
93da5fff 1224 code = SYNTAX (ch1);
8489eb67
RS
1225 if (!(words_include_escapes
1226 && (code == Sescape || code == Scharquote)))
93da5fff 1227 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
8489eb67 1228 break;
6a140a74 1229 INC_BOTH (from, from_byte);
93da5fff 1230 ch0 = ch1;
8489eb67
RS
1231 }
1232 count--;
1233 }
1234 while (count < 0)
1235 {
1236 while (1)
1237 {
1238 if (from == beg)
1239 {
1240 immediate_quit = 0;
1241 return 0;
1242 }
6a140a74 1243 DEC_BOTH (from, from_byte);
195d1361 1244 UPDATE_SYNTAX_TABLE_BACKWARD (from);
6a140a74 1245 ch1 = FETCH_CHAR (from_byte);
93da5fff 1246 code = SYNTAX (ch1);
8489eb67
RS
1247 if (words_include_escapes
1248 && (code == Sescape || code == Scharquote))
1249 break;
1250 if (code == Sword)
1251 break;
8489eb67 1252 }
93da5fff
KH
1253 /* Now CH1 is a character which ends a word and FROM is the
1254 position of it. */
8489eb67
RS
1255 while (1)
1256 {
6a140a74
RS
1257 int temp_byte;
1258
1259 if (from == beg)
1260 break;
1261 temp_byte = dec_bytepos (from_byte);
195d1361 1262 UPDATE_SYNTAX_TABLE_BACKWARD (from);
6a140a74 1263 ch0 = FETCH_CHAR (temp_byte);
93da5fff 1264 code = SYNTAX (ch0);
8489eb67
RS
1265 if (!(words_include_escapes
1266 && (code == Sescape || code == Scharquote)))
93da5fff 1267 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
8489eb67 1268 break;
6a140a74 1269 DEC_BOTH (from, from_byte);
93da5fff 1270 ch1 = ch0;
8489eb67
RS
1271 }
1272 count++;
1273 }
1274
1275 immediate_quit = 0;
1276
1277 return from;
1278}
1279
463f5630 1280DEFUN ("forward-word", Fforward_word, Sforward_word, 1, 1, "p",
fdb82f93
PJ
1281 doc: /* Move point forward ARG words (backward if ARG is negative).
1282Normally returns t.
1283If an edge of the buffer or a field boundary is reached, point is left there
1284and the function returns nil. Field boundaries are not noticed if
1285`inhibit-field-text-motion' is non-nil. */)
463f5630
KH
1286 (count)
1287 Lisp_Object count;
8489eb67 1288{
2c6ea900 1289 int orig_val, val;
463f5630 1290 CHECK_NUMBER (count);
8489eb67 1291
463f5630 1292 val = orig_val = scan_words (PT, XINT (count));
2c6ea900 1293 if (! orig_val)
463f5630 1294 val = XINT (count) > 0 ? ZV : BEGV;
5878ee6f 1295
b8855607 1296 /* Avoid jumping out of an input field. */
64aa4eb1 1297 val = XFASTINT (Fconstrain_to_field (make_number (val), make_number (PT),
070fe99c 1298 Qt, Qnil, Qnil));
7d0393cf 1299
8489eb67 1300 SET_PT (val);
e6d8341f 1301 return val == orig_val ? Qt : Qnil;
8489eb67
RS
1302}
1303\f
195d1361
RS
1304Lisp_Object skip_chars ();
1305
1306DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
fdb82f93
PJ
1307 doc: /* Move point forward, stopping before a char not in STRING, or at pos LIM.
1308STRING is like the inside of a `[...]' in a regular expression
1309except that `]' is never special and `\\' quotes `^', `-' or `\\'
1310 (but not as the end of a range; quoting is never needed there).
1311Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1312With arg "^a-zA-Z", skips nonletters stopping before first letter.
b222e415
DL
1313Returns the distance traveled, either zero or positive.
1314Note that char classes, e.g. `[:alpha:]', are not currently supported;
1315they will be treated as literals. */)
fdb82f93 1316 (string, lim)
195d1361
RS
1317 Lisp_Object string, lim;
1318{
1319 return skip_chars (1, 0, string, lim);
1320}
1321
1322DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
fdb82f93
PJ
1323 doc: /* Move point backward, stopping after a char not in STRING, or at pos LIM.
1324See `skip-chars-forward' for details.
1325Returns the distance traveled, either zero or negative. */)
1326 (string, lim)
195d1361
RS
1327 Lisp_Object string, lim;
1328{
1329 return skip_chars (0, 0, string, lim);
1330}
1331
1332DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
fdb82f93
PJ
1333 doc: /* Move point forward across chars in specified syntax classes.
1334SYNTAX is a string of syntax code characters.
1335Stop before a char whose syntax is not in SYNTAX, or at position LIM.
1336If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1337This function returns the distance traveled, either zero or positive. */)
1338 (syntax, lim)
195d1361
RS
1339 Lisp_Object syntax, lim;
1340{
1341 return skip_chars (1, 1, syntax, lim);
1342}
1343
1344DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
fdb82f93
PJ
1345 doc: /* Move point backward across chars in specified syntax classes.
1346SYNTAX is a string of syntax code characters.
1347Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.
1348If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1349This function returns the distance traveled, either zero or negative. */)
1350 (syntax, lim)
195d1361
RS
1351 Lisp_Object syntax, lim;
1352{
1353 return skip_chars (0, 1, syntax, lim);
1354}
1355
6a140a74 1356static Lisp_Object
195d1361
RS
1357skip_chars (forwardp, syntaxp, string, lim)
1358 int forwardp, syntaxp;
1359 Lisp_Object string, lim;
1360{
195d1361
RS
1361 register unsigned int c;
1362 unsigned char fastmap[0400];
7e68b0ea 1363 /* If SYNTAXP is 0, STRING may contain multi-byte form of characters
9690d026
KH
1364 of which codes don't fit in FASTMAP. In that case, set the
1365 ranges of characters in CHAR_RANGES. */
5df4982e 1366 int *char_ranges;
7e68b0ea 1367 int n_char_ranges = 0;
195d1361 1368 int negate = 0;
4101e6fe 1369 register int i, i_byte;
7e68b0ea 1370 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1674d9a2
RS
1371 int string_multibyte;
1372 int size_byte;
2e567bd3 1373 const unsigned char *str;
82d497fc 1374 int len;
195d1361 1375
b7826503 1376 CHECK_STRING (string);
d5db4077 1377 char_ranges = (int *) alloca (SCHARS (string) * (sizeof (int)) * 2);
1674d9a2 1378 string_multibyte = STRING_MULTIBYTE (string);
d5db4077
KR
1379 str = SDATA (string);
1380 size_byte = SBYTES (string);
195d1361 1381
82d497fc
KH
1382 /* Adjust the multibyteness of the string to that of the buffer. */
1383 if (multibyte != string_multibyte)
1384 {
1385 int nbytes;
1386
1387 if (multibyte)
d5db4077
KR
1388 nbytes = count_size_as_multibyte (SDATA (string),
1389 SCHARS (string));
82d497fc 1390 else
d5db4077 1391 nbytes = SCHARS (string);
82d497fc
KH
1392 if (nbytes != size_byte)
1393 {
2e567bd3
KR
1394 unsigned char *tmp = (unsigned char *) alloca (nbytes);
1395 copy_text (SDATA (string), tmp, size_byte,
82d497fc
KH
1396 string_multibyte, multibyte);
1397 size_byte = nbytes;
2e567bd3 1398 str = tmp;
82d497fc
KH
1399 }
1400 }
1401
195d1361
RS
1402 if (NILP (lim))
1403 XSETINT (lim, forwardp ? ZV : BEGV);
1404 else
b7826503 1405 CHECK_NUMBER_COERCE_MARKER (lim);
195d1361
RS
1406
1407 /* In any case, don't allow scan outside bounds of buffer. */
195d1361
RS
1408 if (XINT (lim) > ZV)
1409 XSETFASTINT (lim, ZV);
1410 if (XINT (lim) < BEGV)
1411 XSETFASTINT (lim, BEGV);
1412
195d1361
RS
1413 bzero (fastmap, sizeof fastmap);
1414
82d497fc 1415 i_byte = 0;
4101e6fe 1416
13090112 1417 if (i_byte < size_byte
d5db4077 1418 && SREF (string, 0) == '^')
195d1361 1419 {
82d497fc 1420 negate = 1; i_byte++;
195d1361
RS
1421 }
1422
1423 /* Find the characters specified and set their elements of fastmap.
1424 If syntaxp, each character counts as itself.
1425 Otherwise, handle backslashes and ranges specially. */
1426
13090112 1427 while (i_byte < size_byte)
195d1361 1428 {
82d497fc
KH
1429 c = STRING_CHAR_AND_LENGTH (str + i_byte, size_byte - i_byte, len);
1430 i_byte += len;
4101e6fe 1431
195d1361 1432 if (syntaxp)
4101e6fe 1433 fastmap[syntax_spec_code[c & 0377]] = 1;
195d1361
RS
1434 else
1435 {
1436 if (c == '\\')
1437 {
13090112 1438 if (i_byte == size_byte)
4101e6fe
RS
1439 break;
1440
82d497fc
KH
1441 c = STRING_CHAR_AND_LENGTH (str+i_byte, size_byte-i_byte, len);
1442 i_byte += len;
195d1361 1443 }
13090112 1444 if (i_byte < size_byte
82d497fc 1445 && str[i_byte] == '-')
195d1361 1446 {
9690d026 1447 unsigned int c2;
4101e6fe
RS
1448
1449 /* Skip over the dash. */
82d497fc 1450 i_byte++;
4101e6fe 1451
13090112 1452 if (i_byte == size_byte)
4101e6fe
RS
1453 break;
1454
1455 /* Get the end of the range. */
82d497fc
KH
1456 c2 =STRING_CHAR_AND_LENGTH (str+i_byte, size_byte-i_byte, len);
1457 i_byte += len;
7e68b0ea 1458
4101e6fe 1459 if (SINGLE_BYTE_CHAR_P (c))
06274af5
KH
1460 {
1461 if (! SINGLE_BYTE_CHAR_P (c2))
e39091b6 1462 {
7ae339a7
KH
1463 /* Handle a range starting with a character of
1464 less than 256, and ending with a character of
1465 not less than 256. Split that into two
1466 ranges, the low one ending at 0377, and the
1467 high one starting at the smallest character
1468 in the charset of C2 and ending at C2. */
e39091b6
KH
1469 int charset = CHAR_CHARSET (c2);
1470 int c1 = MAKE_CHAR (charset, 0, 0);
1471
e39091b6
KH
1472 char_ranges[n_char_ranges++] = c1;
1473 char_ranges[n_char_ranges++] = c2;
465cf84c 1474 c2 = 0377;
e39091b6 1475 }
06274af5
KH
1476 while (c <= c2)
1477 {
1478 fastmap[c] = 1;
1479 c++;
1480 }
1481 }
9690d026 1482 else if (c <= c2) /* Both C and C2 are multibyte char. */
195d1361 1483 {
9690d026
KH
1484 char_ranges[n_char_ranges++] = c;
1485 char_ranges[n_char_ranges++] = c2;
195d1361 1486 }
195d1361
RS
1487 }
1488 else
7e68b0ea 1489 {
e39091b6
KH
1490 if (SINGLE_BYTE_CHAR_P (c))
1491 fastmap[c] = 1;
1492 else
7e68b0ea 1493 {
4101e6fe
RS
1494 char_ranges[n_char_ranges++] = c;
1495 char_ranges[n_char_ranges++] = c;
7e68b0ea
RS
1496 }
1497 }
195d1361
RS
1498 }
1499 }
1500
9690d026 1501 /* If ^ was the first character, complement the fastmap. */
195d1361
RS
1502 if (negate)
1503 for (i = 0; i < sizeof fastmap; i++)
9690d026 1504 fastmap[i] ^= 1;
195d1361
RS
1505
1506 {
1507 int start_point = PT;
1508 int pos = PT;
6a140a74 1509 int pos_byte = PT_BYTE;
9af7511a
KH
1510 unsigned char *p = PT_ADDR, *endp, *stop;
1511
1512 if (forwardp)
1513 {
1514 endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
1515 stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
1516 }
1517 else
1518 {
1519 endp = CHAR_POS_ADDR (XINT (lim));
1520 stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
1521 }
195d1361
RS
1522
1523 immediate_quit = 1;
1524 if (syntaxp)
1525 {
1526 SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
1527 if (forwardp)
1528 {
7e68b0ea 1529 if (multibyte)
9af7511a
KH
1530 while (1)
1531 {
1532 int nbytes;
1533
1534 if (p >= stop)
ee0cdb48 1535 {
9af7511a
KH
1536 if (p >= endp)
1537 break;
1538 p = GAP_END_ADDR;
1539 stop = endp;
ee0cdb48 1540 }
9af7511a
KH
1541 c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes);
1542 if (! fastmap[(int) SYNTAX (c)])
1543 break;
1544 p += nbytes, pos++, pos_byte += nbytes;
1545 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1546 }
7e68b0ea 1547 else
9af7511a
KH
1548 while (1)
1549 {
1550 if (p >= stop)
1551 {
1552 if (p >= endp)
1553 break;
1554 p = GAP_END_ADDR;
1555 stop = endp;
1556 }
1557 if (! fastmap[(int) SYNTAX (*p)])
1558 break;
1559 p++, pos++;
1560 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1561 }
195d1361
RS
1562 }
1563 else
1564 {
7e68b0ea 1565 if (multibyte)
9af7511a
KH
1566 while (1)
1567 {
1568 unsigned char *prev_p;
1569 int nbytes;
1570
1571 if (p <= stop)
1572 {
1573 if (p <= endp)
7e68b0ea 1574 break;
9af7511a
KH
1575 p = GPT_ADDR;
1576 stop = endp;
1577 }
1578 prev_p = p;
1579 while (--p >= stop && ! CHAR_HEAD_P (*p));
1580 PARSE_MULTIBYTE_SEQ (p, MAX_MULTIBYTE_LENGTH, nbytes);
1581 if (prev_p - p > nbytes)
1582 p = prev_p - 1, c = *p, nbytes = 1;
1583 else
1584 c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH);
1585 pos--, pos_byte -= nbytes;
1586 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
1587 if (! fastmap[(int) SYNTAX (c)])
1588 {
1589 pos++;
1590 pos_byte += nbytes;
1591 break;
1592 }
1593 }
7e68b0ea 1594 else
9af7511a
KH
1595 while (1)
1596 {
1597 if (p <= stop)
ee0cdb48 1598 {
9af7511a 1599 if (p <= endp)
4c8535eb 1600 break;
9af7511a
KH
1601 p = GPT_ADDR;
1602 stop = endp;
ee0cdb48 1603 }
9af7511a
KH
1604 if (! fastmap[(int) SYNTAX (p[-1])])
1605 break;
1606 p--, pos--;
1607 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
1608 }
195d1361
RS
1609 }
1610 }
1611 else
1612 {
1613 if (forwardp)
1614 {
7e68b0ea 1615 if (multibyte)
9af7511a 1616 while (1)
7e68b0ea 1617 {
9af7511a
KH
1618 int nbytes;
1619
1620 if (p >= stop)
1621 {
1622 if (p >= endp)
1623 break;
1624 p = GAP_END_ADDR;
1625 stop = endp;
1626 }
1627 c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes);
9690d026 1628 if (SINGLE_BYTE_CHAR_P (c))
7e68b0ea 1629 {
9690d026
KH
1630 if (!fastmap[c])
1631 break;
1632 }
1633 else
1634 {
1635 /* If we are looking at a multibyte character,
1636 we must look up the character in the table
1637 CHAR_RANGES. If there's no data in the
1638 table, that character is not what we want to
1639 skip. */
1640
e39091b6
KH
1641 /* The following code do the right thing even if
1642 n_char_ranges is zero (i.e. no data in
1643 CHAR_RANGES). */
7e68b0ea 1644 for (i = 0; i < n_char_ranges; i += 2)
e39091b6 1645 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
7e68b0ea
RS
1646 break;
1647 if (!(negate ^ (i < n_char_ranges)))
1648 break;
7e68b0ea 1649 }
9af7511a 1650 p += nbytes, pos++, pos_byte += nbytes;
7e68b0ea
RS
1651 }
1652 else
9af7511a
KH
1653 while (1)
1654 {
1655 if (p >= stop)
1656 {
1657 if (p >= endp)
1658 break;
1659 p = GAP_END_ADDR;
1660 stop = endp;
1661 }
4e837cd0 1662 if (!fastmap[*p])
9af7511a
KH
1663 break;
1664 p++, pos++;
1665 }
195d1361
RS
1666 }
1667 else
1668 {
7e68b0ea 1669 if (multibyte)
9af7511a 1670 while (1)
7e68b0ea 1671 {
9af7511a
KH
1672 unsigned char *prev_p;
1673 int nbytes;
e39091b6 1674
9af7511a
KH
1675 if (p <= stop)
1676 {
1677 if (p <= endp)
1678 break;
1679 p = GPT_ADDR;
1680 stop = endp;
1681 }
1682 prev_p = p;
1683 while (--p >= stop && ! CHAR_HEAD_P (*p));
1684 PARSE_MULTIBYTE_SEQ (p, MAX_MULTIBYTE_LENGTH, nbytes);
1685 if (prev_p - p > nbytes)
1686 p = prev_p - 1, c = *p, nbytes = 1;
1687 else
1688 c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH);
9690d026
KH
1689 if (SINGLE_BYTE_CHAR_P (c))
1690 {
1691 if (!fastmap[c])
1692 break;
1693 }
1694 else
7e68b0ea 1695 {
9690d026 1696 /* See the comment in the previous similar code. */
e39091b6
KH
1697 for (i = 0; i < n_char_ranges; i += 2)
1698 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1699 break;
1700 if (!(negate ^ (i < n_char_ranges)))
1701 break;
7e68b0ea 1702 }
9af7511a 1703 pos--, pos_byte -= nbytes;
7e68b0ea
RS
1704 }
1705 else
9af7511a
KH
1706 while (1)
1707 {
1708 if (p <= stop)
1709 {
1710 if (p <= endp)
1711 break;
1712 p = GPT_ADDR;
1713 stop = endp;
1714 }
4e837cd0 1715 if (!fastmap[p[-1]])
9af7511a
KH
1716 break;
1717 p--, pos--;
1718 }
195d1361
RS
1719 }
1720 }
7e68b0ea 1721
6a140a74
RS
1722#if 0 /* Not needed now that a position in mid-character
1723 cannot be specified in Lisp. */
7e68b0ea
RS
1724 if (multibyte
1725 /* INC_POS or DEC_POS might have moved POS over LIM. */
1726 && (forwardp ? (pos > XINT (lim)) : (pos < XINT (lim))))
1727 pos = XINT (lim);
6a140a74 1728#endif
7e68b0ea 1729
6a140a74
RS
1730 if (! multibyte)
1731 pos_byte = pos;
1732
1733 SET_PT_BOTH (pos, pos_byte);
195d1361
RS
1734 immediate_quit = 0;
1735
1736 return make_number (PT - start_point);
1737 }
1738}
1739\f
95ff8dfc
RS
1740/* Jump over a comment, assuming we are at the beginning of one.
1741 FROM is the current position.
1742 FROM_BYTE is the bytepos corresponding to FROM.
1743 Do not move past STOP (a charpos).
1744 The comment over which we have to jump is of style STYLE
1745 (either SYNTAX_COMMENT_STYLE(foo) or ST_COMMENT_STYLE).
1746 NESTING should be positive to indicate the nesting at the beginning
1747 for nested comments and should be zero or negative else.
1748 ST_COMMENT_STYLE cannot be nested.
1749 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
1750 (or 0 If the search cannot start in the middle of a two-character).
1751
1752 If successful, return 1 and store the charpos of the comment's end
1753 into *CHARPOS_PTR and the corresponding bytepos into *BYTEPOS_PTR.
1754 Else, return 0 and store the charpos STOP into *CHARPOS_PTR, the
1755 corresponding bytepos into *BYTEPOS_PTR and the current nesting
1756 (as defined for state.incomment) in *INCOMMENT_PTR.
1757
1758 The comment end is the last character of the comment rather than the
1759 character just after the comment.
1760
1761 Global syntax data is assumed to initially be valid for FROM and
1762 remains valid for forward search starting at the returned position. */
1763
1764static int
1765forw_comment (from, from_byte, stop, nesting, style, prev_syntax,
1766 charpos_ptr, bytepos_ptr, incomment_ptr)
1767 int from, from_byte, stop;
1768 int nesting, style, prev_syntax;
1769 int *charpos_ptr, *bytepos_ptr, *incomment_ptr;
1770{
1771 register int c, c1;
1772 register enum syntaxcode code;
1773 register int syntax;
1774
1775 if (nesting <= 0) nesting = -1;
1776
1777 /* Enter the loop in the middle so that we find
1778 a 2-char comment ender if we start in the middle of it. */
1779 syntax = prev_syntax;
1780 if (syntax != 0) goto forw_incomment;
1781
1782 while (1)
1783 {
1784 if (from == stop)
1785 {
1786 *incomment_ptr = nesting;
1787 *charpos_ptr = from;
1788 *bytepos_ptr = from_byte;
1789 return 0;
1790 }
1791 c = FETCH_CHAR (from_byte);
1792 syntax = SYNTAX_WITH_FLAGS (c);
1793 code = syntax & 0xff;
1794 if (code == Sendcomment
1795 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
abf8a9ff
SM
1796 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ?
1797 (nesting > 0 && --nesting == 0) : nesting < 0))
95ff8dfc
RS
1798 /* we have encountered a comment end of the same style
1799 as the comment sequence which began this comment
1800 section */
1801 break;
1802 if (code == Scomment_fence
1803 && style == ST_COMMENT_STYLE)
1804 /* we have encountered a comment end of the same style
1805 as the comment sequence which began this comment
1806 section. */
1807 break;
1808 if (nesting > 0
1809 && code == Scomment
abf8a9ff 1810 && SYNTAX_FLAGS_COMMENT_NESTED (syntax)
95ff8dfc
RS
1811 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style)
1812 /* we have encountered a nested comment of the same style
1813 as the comment sequence which began this comment section */
1814 nesting++;
1815 INC_BOTH (from, from_byte);
1816 UPDATE_SYNTAX_TABLE_FORWARD (from);
7d0393cf 1817
95ff8dfc
RS
1818 forw_incomment:
1819 if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
1820 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
1821 && (c1 = FETCH_CHAR (from_byte),
abf8a9ff
SM
1822 SYNTAX_COMEND_SECOND (c1))
1823 && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
1824 SYNTAX_COMMENT_NESTED (c1)) ? nesting > 0 : nesting < 0))
4ffe723b
GM
1825 {
1826 if (--nesting <= 0)
1827 /* we have encountered a comment end of the same style
1828 as the comment sequence which began this comment
1829 section */
1830 break;
1831 else
1832 {
1833 INC_BOTH (from, from_byte);
1834 UPDATE_SYNTAX_TABLE_FORWARD (from);
1835 }
1836 }
95ff8dfc
RS
1837 if (nesting > 0
1838 && from < stop
1839 && SYNTAX_FLAGS_COMSTART_FIRST (syntax)
1840 && (c1 = FETCH_CHAR (from_byte),
1841 SYNTAX_COMMENT_STYLE (c1) == style
abf8a9ff
SM
1842 && SYNTAX_COMSTART_SECOND (c1))
1843 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
1844 SYNTAX_COMMENT_NESTED (c1)))
95ff8dfc
RS
1845 /* we have encountered a nested comment of the same style
1846 as the comment sequence which began this comment
1847 section */
1848 {
1849 INC_BOTH (from, from_byte);
1850 UPDATE_SYNTAX_TABLE_FORWARD (from);
1851 nesting++;
1852 }
1853 }
1854 *charpos_ptr = from;
1855 *bytepos_ptr = from_byte;
1856 return 1;
1857}
1858
b3cfe0c8 1859DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
177c0ea7 1860 doc: /*
ee648c11 1861Move forward across up to COUNT comments. If COUNT is negative, move backward.
fdb82f93
PJ
1862Stop scanning if we find something other than a comment or whitespace.
1863Set point to where scanning stops.
ee648c11 1864If COUNT comments are found as expected, with nothing except whitespace
fdb82f93
PJ
1865between them, return t; otherwise return nil. */)
1866 (count)
840f481c 1867 Lisp_Object count;
b3cfe0c8
RS
1868{
1869 register int from;
6a140a74 1870 int from_byte;
b3cfe0c8 1871 register int stop;
8ea151b2 1872 register int c, c1;
b3cfe0c8
RS
1873 register enum syntaxcode code;
1874 int comstyle = 0; /* style of comment encountered */
95ff8dfc 1875 int comnested = 0; /* whether the comment is nestable or not */
be720845 1876 int found;
840f481c 1877 int count1;
6a140a74 1878 int out_charpos, out_bytepos;
95ff8dfc 1879 int dummy;
840f481c 1880
b7826503 1881 CHECK_NUMBER (count);
840f481c 1882 count1 = XINT (count);
195d1361 1883 stop = count1 > 0 ? ZV : BEGV;
b3cfe0c8
RS
1884
1885 immediate_quit = 1;
1886 QUIT;
1887
1888 from = PT;
6a140a74 1889 from_byte = PT_BYTE;
b3cfe0c8 1890
195d1361 1891 SETUP_SYNTAX_TABLE (from, count1);
840f481c 1892 while (count1 > 0)
b3cfe0c8 1893 {
04882296 1894 do
b3cfe0c8 1895 {
f902a008
RS
1896 int comstart_first;
1897
04882296
KH
1898 if (from == stop)
1899 {
ef316cf0 1900 SET_PT_BOTH (from, from_byte);
b7e6e612 1901 immediate_quit = 0;
04882296
KH
1902 return Qnil;
1903 }
6a140a74 1904 c = FETCH_CHAR (from_byte);
b3cfe0c8 1905 code = SYNTAX (c);
f902a008 1906 comstart_first = SYNTAX_COMSTART_FIRST (c);
95ff8dfc 1907 comnested = SYNTAX_COMMENT_NESTED (c);
e6365855 1908 comstyle = SYNTAX_COMMENT_STYLE (c);
6a140a74 1909 INC_BOTH (from, from_byte);
f902a008 1910 UPDATE_SYNTAX_TABLE_FORWARD (from);
f902a008 1911 if (from < stop && comstart_first
6a140a74 1912 && (c1 = FETCH_CHAR (from_byte),
8ea151b2 1913 SYNTAX_COMSTART_SECOND (c1)))
b3cfe0c8 1914 {
7d0393cf 1915 /* We have encountered a comment start sequence and we
7fc8191e 1916 are ignoring all text inside comments. We must record
b3cfe0c8
RS
1917 the comment style this sequence begins so that later,
1918 only a comment end of the same style actually ends
7fc8191e 1919 the comment section. */
b3cfe0c8 1920 code = Scomment;
8ea151b2 1921 comstyle = SYNTAX_COMMENT_STYLE (c1);
95ff8dfc 1922 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
6a140a74 1923 INC_BOTH (from, from_byte);
95ff8dfc 1924 UPDATE_SYNTAX_TABLE_FORWARD (from);
b3cfe0c8 1925 }
04882296 1926 }
abf8a9ff 1927 while (code == Swhitespace || (code == Sendcomment && c == '\n'));
6a140a74 1928
da71ab91
KH
1929 if (code == Scomment_fence)
1930 comstyle = ST_COMMENT_STYLE;
1931 else if (code != Scomment)
04882296
KH
1932 {
1933 immediate_quit = 0;
6a140a74 1934 DEC_BOTH (from, from_byte);
ef316cf0 1935 SET_PT_BOTH (from, from_byte);
04882296
KH
1936 return Qnil;
1937 }
1938 /* We're at the start of a comment. */
95ff8dfc
RS
1939 found = forw_comment (from, from_byte, stop, comnested, comstyle, 0,
1940 &out_charpos, &out_bytepos, &dummy);
1941 from = out_charpos; from_byte = out_bytepos;
1942 if (!found)
04882296 1943 {
95ff8dfc
RS
1944 immediate_quit = 0;
1945 SET_PT_BOTH (from, from_byte);
1946 return Qnil;
b3cfe0c8 1947 }
95ff8dfc
RS
1948 INC_BOTH (from, from_byte);
1949 UPDATE_SYNTAX_TABLE_FORWARD (from);
04882296 1950 /* We have skipped one comment. */
840f481c 1951 count1--;
b3cfe0c8
RS
1952 }
1953
840f481c 1954 while (count1 < 0)
b3cfe0c8 1955 {
b9145dbb 1956 while (1)
b3cfe0c8 1957 {
c65adb44 1958 int quoted;
f902a008 1959
b9145dbb
RS
1960 if (from <= stop)
1961 {
6a140a74 1962 SET_PT_BOTH (BEGV, BEGV_BYTE);
b9145dbb
RS
1963 immediate_quit = 0;
1964 return Qnil;
1965 }
b3cfe0c8 1966
6a140a74 1967 DEC_BOTH (from, from_byte);
f902a008 1968 /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */
6a140a74 1969 quoted = char_quoted (from, from_byte);
6a140a74 1970 c = FETCH_CHAR (from_byte);
b3cfe0c8
RS
1971 code = SYNTAX (c);
1972 comstyle = 0;
95ff8dfc 1973 comnested = SYNTAX_COMMENT_NESTED (c);
7fc8191e
RS
1974 if (code == Sendcomment)
1975 comstyle = SYNTAX_COMMENT_STYLE (c);
b3cfe0c8 1976 if (from > stop && SYNTAX_COMEND_SECOND (c)
f902a008 1977 && prev_char_comend_first (from, from_byte)
89c6809a 1978 && !char_quoted (from - 1, dec_bytepos (from_byte)))
b3cfe0c8 1979 {
7fc8191e 1980 /* We must record the comment style encountered so that
b3cfe0c8 1981 later, we can match only the proper comment begin
7fc8191e 1982 sequence of the same style. */
6a140a74 1983 DEC_BOTH (from, from_byte);
f902a008
RS
1984 code = Sendcomment;
1985 /* Calling char_quoted, above, set up global syntax position
1986 at the new value of FROM. */
95ff8dfc
RS
1987 c1 = FETCH_CHAR (from_byte);
1988 comstyle = SYNTAX_COMMENT_STYLE (c1);
1989 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
b3cfe0c8
RS
1990 }
1991
195d1361
RS
1992 if (code == Scomment_fence)
1993 {
1994 /* Skip until first preceding unquoted comment_fence. */
6a140a74 1995 int found = 0, ini = from, ini_byte = from_byte;
7d0393cf 1996
6a140a74 1997 while (1)
195d1361 1998 {
6a140a74
RS
1999 DEC_BOTH (from, from_byte);
2000 if (from == stop)
2001 break;
195d1361 2002 UPDATE_SYNTAX_TABLE_BACKWARD (from);
6a140a74
RS
2003 c = FETCH_CHAR (from_byte);
2004 if (SYNTAX (c) == Scomment_fence
7d0393cf 2005 && !char_quoted (from, from_byte))
195d1361 2006 {
7d0393cf 2007 found = 1;
195d1361
RS
2008 break;
2009 }
2010 }
2011 if (found == 0)
2012 {
2013 from = ini; /* Set point to ini + 1. */
6a140a74 2014 from_byte = ini_byte;
195d1361
RS
2015 goto leave;
2016 }
2017 }
2018 else if (code == Sendcomment)
b3cfe0c8 2019 {
95ff8dfc 2020 found = back_comment (from, from_byte, stop, comnested, comstyle,
6a140a74 2021 &out_charpos, &out_bytepos);
1aa963c8
SM
2022 if (found == -1)
2023 {
abf8a9ff
SM
2024 if (c == '\n')
2025 /* This end-of-line is not an end-of-comment.
2026 Treat it like a whitespace.
2027 CC-mode (and maybe others) relies on this behavior. */
2028 ;
2029 else
2030 {
2031 /* Failure: we should go back to the end of this
2032 not-quite-endcomment. */
2033 if (SYNTAX(c) != code)
2034 /* It was a two-char Sendcomment. */
2035 INC_BOTH (from, from_byte);
2036 goto leave;
2037 }
1aa963c8 2038 }
2b927d02 2039 else
abf8a9ff
SM
2040 {
2041 /* We have skipped one comment. */
2042 from = out_charpos, from_byte = out_bytepos;
2043 break;
2044 }
b3cfe0c8 2045 }
bb0de084 2046 else if (code != Swhitespace || quoted)
b3cfe0c8 2047 {
195d1361 2048 leave:
b3cfe0c8 2049 immediate_quit = 0;
6a140a74 2050 INC_BOTH (from, from_byte);
ef316cf0 2051 SET_PT_BOTH (from, from_byte);
b3cfe0c8
RS
2052 return Qnil;
2053 }
2054 }
2055
840f481c 2056 count1++;
b3cfe0c8
RS
2057 }
2058
ef316cf0 2059 SET_PT_BOTH (from, from_byte);
b3cfe0c8
RS
2060 immediate_quit = 0;
2061 return Qt;
2062}
2063\f
bd25db08 2064/* Return syntax code of character C if C is a single byte character
db9f48c3 2065 or `multibyte_symbol_p' is zero. Otherwise, return Ssymbol. */
bd25db08
KH
2066
2067#define SYNTAX_WITH_MULTIBYTE_CHECK(c) \
2068 ((SINGLE_BYTE_CHAR_P (c) || !multibyte_symbol_p) \
2069 ? SYNTAX (c) : Ssymbol)
2070
6a140a74 2071static Lisp_Object
8489eb67
RS
2072scan_lists (from, count, depth, sexpflag)
2073 register int from;
2074 int count, depth, sexpflag;
2075{
2076 Lisp_Object val;
195d1361 2077 register int stop = count > 0 ? ZV : BEGV;
93da5fff
KH
2078 register int c, c1;
2079 int stringterm;
8489eb67
RS
2080 int quoted;
2081 int mathexit = 0;
93da5fff 2082 register enum syntaxcode code, temp_code;
195d1361 2083 int min_depth = depth; /* Err out if depth gets less than this. */
e5d4f4dc 2084 int comstyle = 0; /* style of comment encountered */
95ff8dfc 2085 int comnested = 0; /* whether the comment is nestable or not */
93da5fff 2086 int temp_pos;
7bf5e9e4 2087 int last_good = from;
195d1361 2088 int found;
c3907a7e 2089 int from_byte;
6a140a74 2090 int out_bytepos, out_charpos;
95ff8dfc 2091 int temp, dummy;
bd25db08 2092 int multibyte_symbol_p = sexpflag && multibyte_syntax_as_symbol;
8489eb67
RS
2093
2094 if (depth > 0) min_depth = 0;
2095
c3907a7e
RS
2096 if (from > ZV) from = ZV;
2097 if (from < BEGV) from = BEGV;
2098
2099 from_byte = CHAR_TO_BYTE (from);
2100
8489eb67
RS
2101 immediate_quit = 1;
2102 QUIT;
2103
195d1361 2104 SETUP_SYNTAX_TABLE (from, count);
8489eb67
RS
2105 while (count > 0)
2106 {
8489eb67
RS
2107 while (from < stop)
2108 {
f902a008 2109 int comstart_first, prefix;
195d1361 2110 UPDATE_SYNTAX_TABLE_FORWARD (from);
6a140a74 2111 c = FETCH_CHAR (from_byte);
bd25db08 2112 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
f902a008 2113 comstart_first = SYNTAX_COMSTART_FIRST (c);
95ff8dfc 2114 comnested = SYNTAX_COMMENT_NESTED (c);
e6365855 2115 comstyle = SYNTAX_COMMENT_STYLE (c);
f902a008 2116 prefix = SYNTAX_PREFIX (c);
7bf5e9e4
RS
2117 if (depth == min_depth)
2118 last_good = from;
6a140a74 2119 INC_BOTH (from, from_byte);
195d1361 2120 UPDATE_SYNTAX_TABLE_FORWARD (from);
f902a008 2121 if (from < stop && comstart_first
6a140a74 2122 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from_byte))
8489eb67 2123 && parse_sexp_ignore_comments)
e5d4f4dc 2124 {
7d0393cf 2125 /* we have encountered a comment start sequence and we
195d1361 2126 are ignoring all text inside comments. We must record
e5d4f4dc
RS
2127 the comment style this sequence begins so that later,
2128 only a comment end of the same style actually ends
2129 the comment section */
2130 code = Scomment;
95ff8dfc
RS
2131 c1 = FETCH_CHAR (from_byte);
2132 comstyle = SYNTAX_COMMENT_STYLE (c1);
2133 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
6a140a74 2134 INC_BOTH (from, from_byte);
f902a008 2135 UPDATE_SYNTAX_TABLE_FORWARD (from);
e5d4f4dc 2136 }
7d0393cf 2137
f902a008 2138 if (prefix)
8489eb67
RS
2139 continue;
2140
0220c518 2141 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
2142 {
2143 case Sescape:
2144 case Scharquote:
2145 if (from == stop) goto lose;
6a140a74 2146 INC_BOTH (from, from_byte);
8489eb67
RS
2147 /* treat following character as a word constituent */
2148 case Sword:
2149 case Ssymbol:
2150 if (depth || !sexpflag) break;
195d1361 2151 /* This word counts as a sexp; return at end of it. */
8489eb67
RS
2152 while (from < stop)
2153 {
195d1361 2154 UPDATE_SYNTAX_TABLE_FORWARD (from);
7c7ca3d2
RS
2155
2156 /* Some compilers can't handle this inside the switch. */
bd25db08
KH
2157 c = FETCH_CHAR (from_byte);
2158 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
7c7ca3d2 2159 switch (temp)
8489eb67
RS
2160 {
2161 case Scharquote:
2162 case Sescape:
6a140a74 2163 INC_BOTH (from, from_byte);
8489eb67
RS
2164 if (from == stop) goto lose;
2165 break;
2166 case Sword:
2167 case Ssymbol:
2168 case Squote:
2169 break;
2170 default:
2171 goto done;
2172 }
6a140a74 2173 INC_BOTH (from, from_byte);
8489eb67
RS
2174 }
2175 goto done;
2176
195d1361 2177 case Scomment_fence:
95ff8dfc
RS
2178 comstyle = ST_COMMENT_STYLE;
2179 /* FALLTHROUGH */
2180 case Scomment:
8489eb67 2181 if (!parse_sexp_ignore_comments) break;
95ff8dfc
RS
2182 UPDATE_SYNTAX_TABLE_FORWARD (from);
2183 found = forw_comment (from, from_byte, stop,
2184 comnested, comstyle, 0,
2185 &out_charpos, &out_bytepos, &dummy);
2186 from = out_charpos, from_byte = out_bytepos;
2187 if (!found)
8489eb67 2188 {
95ff8dfc
RS
2189 if (depth == 0)
2190 goto done;
2191 goto lose;
8489eb67 2192 }
95ff8dfc
RS
2193 INC_BOTH (from, from_byte);
2194 UPDATE_SYNTAX_TABLE_FORWARD (from);
8489eb67
RS
2195 break;
2196
2197 case Smath:
2198 if (!sexpflag)
2199 break;
6a140a74
RS
2200 if (from != stop && c == FETCH_CHAR (from_byte))
2201 {
2202 INC_BOTH (from, from_byte);
2203 }
8489eb67
RS
2204 if (mathexit)
2205 {
2206 mathexit = 0;
2207 goto close1;
2208 }
2209 mathexit = 1;
2210
2211 case Sopen:
2212 if (!++depth) goto done;
2213 break;
2214
2215 case Sclose:
2216 close1:
2217 if (!--depth) goto done;
2218 if (depth < min_depth)
7bf5e9e4
RS
2219 Fsignal (Qscan_error,
2220 Fcons (build_string ("Containing expression ends prematurely"),
2221 Fcons (make_number (last_good),
2222 Fcons (make_number (from), Qnil))));
8489eb67
RS
2223 break;
2224
2225 case Sstring:
195d1361 2226 case Sstring_fence:
ef316cf0 2227 temp_pos = dec_bytepos (from_byte);
93da5fff 2228 stringterm = FETCH_CHAR (temp_pos);
8489eb67
RS
2229 while (1)
2230 {
2231 if (from >= stop) goto lose;
195d1361 2232 UPDATE_SYNTAX_TABLE_FORWARD (from);
bd25db08 2233 c = FETCH_CHAR (from_byte);
42ebfa31
SM
2234 if (code == Sstring
2235 ? (c == stringterm
2236 && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
bd25db08 2237 : SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring_fence)
195d1361 2238 break;
7c7ca3d2
RS
2239
2240 /* Some compilers can't handle this inside the switch. */
bd25db08 2241 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
7c7ca3d2 2242 switch (temp)
8489eb67
RS
2243 {
2244 case Scharquote:
2245 case Sescape:
6a140a74 2246 INC_BOTH (from, from_byte);
8489eb67 2247 }
6a140a74 2248 INC_BOTH (from, from_byte);
8489eb67 2249 }
6a140a74 2250 INC_BOTH (from, from_byte);
8489eb67
RS
2251 if (!depth && sexpflag) goto done;
2252 break;
240c43e8
SM
2253 default:
2254 /* Ignore whitespace, punctuation, quote, endcomment. */
2255 break;
8489eb67
RS
2256 }
2257 }
2258
2259 /* Reached end of buffer. Error if within object, return nil if between */
2260 if (depth) goto lose;
2261
2262 immediate_quit = 0;
2263 return Qnil;
2264
2265 /* End of object reached */
2266 done:
2267 count--;
2268 }
2269
2270 while (count < 0)
2271 {
8489eb67
RS
2272 while (from > stop)
2273 {
6a140a74 2274 DEC_BOTH (from, from_byte);
195d1361 2275 UPDATE_SYNTAX_TABLE_BACKWARD (from);
6a140a74 2276 c = FETCH_CHAR (from_byte);
bd25db08 2277 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
7bf5e9e4
RS
2278 if (depth == min_depth)
2279 last_good = from;
7fc8191e 2280 comstyle = 0;
95ff8dfc 2281 comnested = SYNTAX_COMMENT_NESTED (c);
7fc8191e
RS
2282 if (code == Sendcomment)
2283 comstyle = SYNTAX_COMMENT_STYLE (c);
8489eb67 2284 if (from > stop && SYNTAX_COMEND_SECOND (c)
1674d9a2 2285 && prev_char_comend_first (from, from_byte)
8489eb67 2286 && parse_sexp_ignore_comments)
e5d4f4dc 2287 {
f902a008 2288 /* We must record the comment style encountered so that
e5d4f4dc 2289 later, we can match only the proper comment begin
f902a008 2290 sequence of the same style. */
6a140a74 2291 DEC_BOTH (from, from_byte);
f902a008
RS
2292 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2293 code = Sendcomment;
95ff8dfc
RS
2294 c1 = FETCH_CHAR (from_byte);
2295 comstyle = SYNTAX_COMMENT_STYLE (c1);
2296 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
e5d4f4dc 2297 }
7d0393cf 2298
9828a477 2299 /* Quoting turns anything except a comment-ender
e6365855 2300 into a word character. Note that this cannot be true
f902a008 2301 if we decremented FROM in the if-statement above. */
9828a477 2302 if (code != Sendcomment && char_quoted (from, from_byte))
240c43e8
SM
2303 {
2304 DEC_BOTH (from, from_byte);
2305 code = Sword;
2306 }
9828a477 2307 else if (SYNTAX_PREFIX (c))
8489eb67
RS
2308 continue;
2309
9828a477 2310 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
2311 {
2312 case Sword:
2313 case Ssymbol:
9828a477
KH
2314 case Sescape:
2315 case Scharquote:
8489eb67 2316 if (depth || !sexpflag) break;
195d1361
RS
2317 /* This word counts as a sexp; count object finished
2318 after passing it. */
8489eb67
RS
2319 while (from > stop)
2320 {
6a140a74 2321 temp_pos = from_byte;
ef316cf0
RS
2322 if (! NILP (current_buffer->enable_multibyte_characters))
2323 DEC_POS (temp_pos);
2324 else
2325 temp_pos--;
6a140a74 2326 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
9828a477 2327 c1 = FETCH_CHAR (temp_pos);
bd25db08 2328 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
9828a477
KH
2329 /* Don't allow comment-end to be quoted. */
2330 if (temp_code == Sendcomment)
2331 goto done2;
6a140a74 2332 quoted = char_quoted (from - 1, temp_pos);
8489eb67 2333 if (quoted)
93da5fff 2334 {
6a140a74 2335 DEC_BOTH (from, from_byte);
ef316cf0 2336 temp_pos = dec_bytepos (temp_pos);
6a140a74 2337 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
93da5fff
KH
2338 }
2339 c1 = FETCH_CHAR (temp_pos);
bd25db08 2340 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
93da5fff
KH
2341 if (! (quoted || temp_code == Sword
2342 || temp_code == Ssymbol
2343 || temp_code == Squote))
8489eb67 2344 goto done2;
6a140a74 2345 DEC_BOTH (from, from_byte);
8489eb67
RS
2346 }
2347 goto done2;
2348
2349 case Smath:
2350 if (!sexpflag)
2351 break;
ef316cf0 2352 temp_pos = dec_bytepos (from_byte);
6a140a74 2353 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
93da5fff 2354 if (from != stop && c == FETCH_CHAR (temp_pos))
6a140a74 2355 DEC_BOTH (from, from_byte);
8489eb67
RS
2356 if (mathexit)
2357 {
2358 mathexit = 0;
2359 goto open2;
2360 }
2361 mathexit = 1;
2362
2363 case Sclose:
2364 if (!++depth) goto done2;
2365 break;
2366
2367 case Sopen:
2368 open2:
2369 if (!--depth) goto done2;
2370 if (depth < min_depth)
7bf5e9e4
RS
2371 Fsignal (Qscan_error,
2372 Fcons (build_string ("Containing expression ends prematurely"),
2373 Fcons (make_number (last_good),
2374 Fcons (make_number (from), Qnil))));
8489eb67
RS
2375 break;
2376
2377 case Sendcomment:
2378 if (!parse_sexp_ignore_comments)
2379 break;
95ff8dfc 2380 found = back_comment (from, from_byte, stop, comnested, comstyle,
6a140a74 2381 &out_charpos, &out_bytepos);
1aa963c8
SM
2382 /* FIXME: if found == -1, then it really wasn't a comment-end.
2383 For single-char Sendcomment, we can't do much about it apart
2384 from skipping the char.
2385 For 2-char endcomments, we could try again, taking both
2386 chars as separate entities, but it's a lot of trouble
2387 for very little gain, so we don't bother either. -sm */
6a140a74
RS
2388 if (found != -1)
2389 from = out_charpos, from_byte = out_bytepos;
8489eb67
RS
2390 break;
2391
195d1361
RS
2392 case Scomment_fence:
2393 case Sstring_fence:
2394 while (1)
2395 {
6a140a74 2396 DEC_BOTH (from, from_byte);
195d1361
RS
2397 if (from == stop) goto lose;
2398 UPDATE_SYNTAX_TABLE_BACKWARD (from);
7d0393cf 2399 if (!char_quoted (from, from_byte)
bd25db08
KH
2400 && (c = FETCH_CHAR (from_byte),
2401 SYNTAX_WITH_MULTIBYTE_CHECK (c) == code))
195d1361
RS
2402 break;
2403 }
2404 if (code == Sstring_fence && !depth && sexpflag) goto done2;
2405 break;
7d0393cf 2406
8489eb67 2407 case Sstring:
6a140a74 2408 stringterm = FETCH_CHAR (from_byte);
8489eb67
RS
2409 while (1)
2410 {
2411 if (from == stop) goto lose;
6a140a74 2412 temp_pos = from_byte;
ef316cf0
RS
2413 if (! NILP (current_buffer->enable_multibyte_characters))
2414 DEC_POS (temp_pos);
2415 else
2416 temp_pos--;
6a140a74
RS
2417 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2418 if (!char_quoted (from - 1, temp_pos)
42ebfa31
SM
2419 && stringterm == (c = FETCH_CHAR (temp_pos))
2420 && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
8489eb67 2421 break;
6a140a74 2422 DEC_BOTH (from, from_byte);
8489eb67 2423 }
6a140a74 2424 DEC_BOTH (from, from_byte);
8489eb67
RS
2425 if (!depth && sexpflag) goto done2;
2426 break;
240c43e8
SM
2427 default:
2428 /* Ignore whitespace, punctuation, quote, endcomment. */
2429 break;
8489eb67
RS
2430 }
2431 }
2432
2433 /* Reached start of buffer. Error if within object, return nil if between */
2434 if (depth) goto lose;
2435
2436 immediate_quit = 0;
2437 return Qnil;
2438
2439 done2:
2440 count++;
2441 }
2442
2443
2444 immediate_quit = 0;
1e142fb7 2445 XSETFASTINT (val, from);
8489eb67
RS
2446 return val;
2447
2448 lose:
7bf5e9e4
RS
2449 Fsignal (Qscan_error,
2450 Fcons (build_string ("Unbalanced parentheses"),
2451 Fcons (make_number (last_good),
2452 Fcons (make_number (from), Qnil))));
2453
8489eb67
RS
2454 /* NOTREACHED */
2455}
2456
8489eb67 2457DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
fdb82f93
PJ
2458 doc: /* Scan from character number FROM by COUNT lists.
2459Returns the character number of the position thus found.
2460
2461If DEPTH is nonzero, paren depth begins counting from that value,
2462only places where the depth in parentheses becomes zero
2463are candidates for stopping; COUNT such places are counted.
2464Thus, a positive value for DEPTH means go out levels.
2465
2466Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2467
2468If the beginning or end of (the accessible part of) the buffer is reached
2469and the depth is wrong, an error is signaled.
2470If the depth is right but the count is not used up, nil is returned. */)
2471 (from, count, depth)
8489eb67
RS
2472 Lisp_Object from, count, depth;
2473{
b7826503
PJ
2474 CHECK_NUMBER (from);
2475 CHECK_NUMBER (count);
2476 CHECK_NUMBER (depth);
8489eb67
RS
2477
2478 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
2479}
2480
2481DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
fdb82f93
PJ
2482 doc: /* Scan from character number FROM by COUNT balanced expressions.
2483If COUNT is negative, scan backwards.
2484Returns the character number of the position thus found.
2485
2486Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2487
2488If the beginning or end of (the accessible part of) the buffer is reached
2489in the middle of a parenthetical grouping, an error is signaled.
2490If the beginning or end is reached between groupings
2491but before count is used up, nil is returned. */)
2492 (from, count)
8489eb67
RS
2493 Lisp_Object from, count;
2494{
b7826503
PJ
2495 CHECK_NUMBER (from);
2496 CHECK_NUMBER (count);
8489eb67
RS
2497
2498 return scan_lists (XINT (from), XINT (count), 0, 1);
2499}
2500
2501DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
fdb82f93
PJ
2502 0, 0, 0,
2503 doc: /* Move point backward over any number of chars with prefix syntax.
2504This includes chars with "quote" or "prefix" syntax (' or p). */)
2505 ()
8489eb67
RS
2506{
2507 int beg = BEGV;
6a140a74
RS
2508 int opoint = PT;
2509 int opoint_byte = PT_BYTE;
6ec8bbd2 2510 int pos = PT;
6a140a74 2511 int pos_byte = PT_BYTE;
93da5fff 2512 int c;
93da5fff 2513
7d0393cf 2514 if (pos <= beg)
195d1361 2515 {
f902a008
RS
2516 SET_PT_BOTH (opoint, opoint_byte);
2517
2518 return Qnil;
195d1361 2519 }
8489eb67 2520
f902a008
RS
2521 SETUP_SYNTAX_TABLE (pos, -1);
2522
6a140a74
RS
2523 DEC_BOTH (pos, pos_byte);
2524
1fd3172d 2525 while (!char_quoted (pos, pos_byte)
195d1361 2526 /* Previous statement updates syntax table. */
6a140a74 2527 && ((c = FETCH_CHAR (pos_byte), SYNTAX (c) == Squote)
93da5fff
KH
2528 || SYNTAX_PREFIX (c)))
2529 {
1fd3172d
RS
2530 opoint = pos;
2531 opoint_byte = pos_byte;
2532
2533 if (pos + 1 > beg)
2534 DEC_BOTH (pos, pos_byte);
93da5fff 2535 }
8489eb67 2536
6a140a74 2537 SET_PT_BOTH (opoint, opoint_byte);
8489eb67
RS
2538
2539 return Qnil;
2540}
2541\f
6a140a74 2542/* Parse forward from FROM / FROM_BYTE to END,
e5d4f4dc
RS
2543 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
2544 and return a description of the state of the parse at END.
c81a3712 2545 If STOPBEFORE is nonzero, stop at the start of an atom.
644ea4df
RS
2546 If COMMENTSTOP is 1, stop at the start of a comment.
2547 If COMMENTSTOP is -1, stop at the start or end of a comment,
2548 after the beginning of a string, or after the end of a string. */
8489eb67 2549
340f92b5 2550static void
6a140a74 2551scan_sexps_forward (stateptr, from, from_byte, end, targetdepth,
c81a3712 2552 stopbefore, oldstate, commentstop)
e5d4f4dc 2553 struct lisp_parse_state *stateptr;
8489eb67 2554 register int from;
ddc447fc 2555 int end, targetdepth, stopbefore, from_byte;
8489eb67 2556 Lisp_Object oldstate;
c81a3712 2557 int commentstop;
8489eb67
RS
2558{
2559 struct lisp_parse_state state;
2560
2561 register enum syntaxcode code;
95ff8dfc
RS
2562 int c1;
2563 int comnested;
8489eb67
RS
2564 struct level { int last, prev; };
2565 struct level levelstart[100];
2566 register struct level *curlevel = levelstart;
2567 struct level *endlevel = levelstart + 100;
8489eb67
RS
2568 register int depth; /* Paren depth of current scanning location.
2569 level - levelstart equals this except
2570 when the depth becomes negative. */
2571 int mindepth; /* Lowest DEPTH value seen. */
2572 int start_quoted = 0; /* Nonzero means starting after a char quote */
2573 Lisp_Object tem;
93da5fff 2574 int prev_from; /* Keep one character before FROM. */
6a140a74 2575 int prev_from_byte;
1fd3172d 2576 int prev_from_syntax;
195d1361
RS
2577 int boundary_stop = commentstop == -1;
2578 int nofence;
95ff8dfc
RS
2579 int found;
2580 int out_bytepos, out_charpos;
7c7ca3d2 2581 int temp;
93da5fff
KH
2582
2583 prev_from = from;
6a140a74
RS
2584 prev_from_byte = from_byte;
2585 if (from != BEGV)
2586 DEC_BOTH (prev_from, prev_from_byte);
93da5fff
KH
2587
2588 /* Use this macro instead of `from++'. */
6a140a74
RS
2589#define INC_FROM \
2590do { prev_from = from; \
2591 prev_from_byte = from_byte; \
1fd3172d
RS
2592 prev_from_syntax \
2593 = SYNTAX_WITH_FLAGS (FETCH_CHAR (prev_from_byte)); \
ef316cf0 2594 INC_BOTH (from, from_byte); \
3ceb4629
SM
2595 if (from < end) \
2596 UPDATE_SYNTAX_TABLE_FORWARD (from); \
6a140a74 2597 } while (0)
8489eb67
RS
2598
2599 immediate_quit = 1;
2600 QUIT;
2601
265a9e55 2602 if (NILP (oldstate))
8489eb67
RS
2603 {
2604 depth = 0;
2605 state.instring = -1;
2606 state.incomment = 0;
195d1361
RS
2607 state.comstyle = 0; /* comment style a by default. */
2608 state.comstr_start = -1; /* no comment/string seen. */
8489eb67
RS
2609 }
2610 else
2611 {
2612 tem = Fcar (oldstate);
265a9e55 2613 if (!NILP (tem))
8489eb67
RS
2614 depth = XINT (tem);
2615 else
2616 depth = 0;
2617
2618 oldstate = Fcdr (oldstate);
2619 oldstate = Fcdr (oldstate);
2620 oldstate = Fcdr (oldstate);
2621 tem = Fcar (oldstate);
195d1361 2622 /* Check whether we are inside string_fence-style string: */
7d0393cf
JB
2623 state.instring = (!NILP (tem)
2624 ? (INTEGERP (tem) ? XINT (tem) : ST_STRING_STYLE)
e76f1c44 2625 : -1);
8489eb67
RS
2626
2627 oldstate = Fcdr (oldstate);
2628 tem = Fcar (oldstate);
e76f1c44
GM
2629 state.incomment = (!NILP (tem)
2630 ? (INTEGERP (tem) ? XINT (tem) : -1)
95ff8dfc 2631 : 0);
8489eb67
RS
2632
2633 oldstate = Fcdr (oldstate);
2634 tem = Fcar (oldstate);
265a9e55 2635 start_quoted = !NILP (tem);
e5d4f4dc 2636
95ff8dfc 2637 /* if the eighth element of the list is nil, we are in comment
195d1361
RS
2638 style a. If it is non-nil, we are in comment style b */
2639 oldstate = Fcdr (oldstate);
e5d4f4dc 2640 oldstate = Fcdr (oldstate);
195d1361 2641 tem = Fcar (oldstate);
7d0393cf 2642 state.comstyle = NILP (tem) ? 0 : (EQ (tem, Qsyntax_table)
e76f1c44 2643 ? ST_COMMENT_STYLE : 1);
195d1361 2644
e5d4f4dc 2645 oldstate = Fcdr (oldstate);
e5d4f4dc 2646 tem = Fcar (oldstate);
195d1361 2647 state.comstr_start = NILP (tem) ? -1 : XINT (tem) ;
1a102a58
RS
2648 oldstate = Fcdr (oldstate);
2649 tem = Fcar (oldstate);
2650 while (!NILP (tem)) /* >= second enclosing sexps. */
2651 {
2652 /* curlevel++->last ran into compiler bug on Apollo */
2653 curlevel->last = XINT (Fcar (tem));
2654 if (++curlevel == endlevel)
02010917 2655 curlevel--; /* error ("Nesting too deep for parser"); */
1a102a58
RS
2656 curlevel->prev = -1;
2657 curlevel->last = -1;
2658 tem = Fcdr (tem);
2659 }
8489eb67
RS
2660 }
2661 state.quoted = 0;
2662 mindepth = depth;
2663
2664 curlevel->prev = -1;
2665 curlevel->last = -1;
2666
326b283b
RS
2667 SETUP_SYNTAX_TABLE (prev_from, 1);
2668 prev_from_syntax = SYNTAX_WITH_FLAGS (FETCH_CHAR (prev_from_byte));
3ceb4629 2669 UPDATE_SYNTAX_TABLE_FORWARD (from);
326b283b 2670
195d1361 2671 /* Enter the loop at a place appropriate for initial state. */
8489eb67 2672
326b283b
RS
2673 if (state.incomment)
2674 goto startincomment;
8489eb67
RS
2675 if (state.instring >= 0)
2676 {
195d1361 2677 nofence = state.instring != ST_STRING_STYLE;
326b283b
RS
2678 if (start_quoted)
2679 goto startquotedinstring;
8489eb67
RS
2680 goto startinstring;
2681 }
326b283b
RS
2682 else if (start_quoted)
2683 goto startquoted;
1fd3172d 2684
8489eb67
RS
2685 while (from < end)
2686 {
93da5fff 2687 INC_FROM;
1fd3172d 2688 code = prev_from_syntax & 0xff;
4c920633 2689
8f9dc2ed 2690 if (code == Scomment)
95ff8dfc 2691 {
d355bd8a 2692 state.comstyle = SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax);
95ff8dfc
RS
2693 state.incomment = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax) ?
2694 1 : -1);
2695 state.comstr_start = prev_from;
2696 }
4c920633 2697 else if (code == Scomment_fence)
e5d4f4dc
RS
2698 {
2699 /* Record the comment style we have entered so that only
2700 the comment-end sequence of the same style actually
2701 terminates the comment section. */
95ff8dfc
RS
2702 state.comstyle = ST_COMMENT_STYLE;
2703 state.incomment = -1;
195d1361 2704 state.comstr_start = prev_from;
e5d4f4dc 2705 code = Scomment;
e5d4f4dc 2706 }
4c920633 2707 else if (from < end)
1fd3172d 2708 if (SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax))
95ff8dfc
RS
2709 if (c1 = FETCH_CHAR (from_byte),
2710 SYNTAX_COMSTART_SECOND (c1))
1fd3172d 2711 /* Duplicate code to avoid a complex if-expression
4c920633
RS
2712 which causes trouble for the SGI compiler. */
2713 {
2714 /* Record the comment style we have entered so that only
2715 the comment-end sequence of the same style actually
2716 terminates the comment section. */
79bd6490 2717 state.comstyle = SYNTAX_COMMENT_STYLE (c1);
95ff8dfc
RS
2718 comnested = SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax);
2719 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2720 state.incomment = comnested ? 1 : -1;
4c920633 2721 state.comstr_start = prev_from;
95ff8dfc 2722 INC_FROM;
4c920633
RS
2723 code = Scomment;
2724 }
e5d4f4dc 2725
1fd3172d 2726 if (SYNTAX_FLAGS_PREFIX (prev_from_syntax))
8489eb67 2727 continue;
0220c518 2728 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
2729 {
2730 case Sescape:
2731 case Scharquote:
2732 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 2733 curlevel->last = prev_from;
8489eb67
RS
2734 startquoted:
2735 if (from == end) goto endquoted;
93da5fff 2736 INC_FROM;
8489eb67
RS
2737 goto symstarted;
2738 /* treat following character as a word constituent */
2739 case Sword:
2740 case Ssymbol:
2741 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 2742 curlevel->last = prev_from;
8489eb67
RS
2743 symstarted:
2744 while (from < end)
2745 {
7c7ca3d2
RS
2746 /* Some compilers can't handle this inside the switch. */
2747 temp = SYNTAX (FETCH_CHAR (from_byte));
2748 switch (temp)
8489eb67
RS
2749 {
2750 case Scharquote:
2751 case Sescape:
93da5fff 2752 INC_FROM;
8489eb67
RS
2753 if (from == end) goto endquoted;
2754 break;
2755 case Sword:
2756 case Ssymbol:
2757 case Squote:
2758 break;
2759 default:
2760 goto symdone;
2761 }
93da5fff 2762 INC_FROM;
8489eb67
RS
2763 }
2764 symdone:
2765 curlevel->prev = curlevel->last;
2766 break;
2767
240c43e8 2768 case Scomment_fence: /* Can't happen because it's handled above. */
8489eb67 2769 case Scomment:
195d1361 2770 if (commentstop || boundary_stop) goto done;
d355bd8a
SM
2771 startincomment:
2772 /* The (from == BEGV) test was to enter the loop in the middle so
95ff8dfc 2773 that we find a 2-char comment ender even if we start in the
e6365855 2774 middle of it. We don't want to do that if we're just at the
d355bd8a 2775 beginning of the comment (think of (*) ... (*)). */
95ff8dfc
RS
2776 found = forw_comment (from, from_byte, end,
2777 state.incomment, state.comstyle,
e6365855
SM
2778 (from == BEGV || from < state.comstr_start + 3)
2779 ? 0 : prev_from_syntax,
95ff8dfc
RS
2780 &out_charpos, &out_bytepos, &state.incomment);
2781 from = out_charpos; from_byte = out_bytepos;
2782 /* Beware! prev_from and friends are invalid now.
2783 Luckily, the `done' doesn't use them and the INC_FROM
2784 sets them to a sane value without looking at them. */
2785 if (!found) goto done;
d355bd8a 2786 INC_FROM;
8489eb67 2787 state.incomment = 0;
e5d4f4dc 2788 state.comstyle = 0; /* reset the comment style */
195d1361 2789 if (boundary_stop) goto done;
8489eb67
RS
2790 break;
2791
2792 case Sopen:
2793 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2794 depth++;
2795 /* curlevel++->last ran into compiler bug on Apollo */
93da5fff 2796 curlevel->last = prev_from;
8489eb67 2797 if (++curlevel == endlevel)
02010917 2798 curlevel--; /* error ("Nesting too deep for parser"); */
8489eb67
RS
2799 curlevel->prev = -1;
2800 curlevel->last = -1;
30844415 2801 if (targetdepth == depth) goto done;
8489eb67
RS
2802 break;
2803
2804 case Sclose:
2805 depth--;
2806 if (depth < mindepth)
2807 mindepth = depth;
2808 if (curlevel != levelstart)
2809 curlevel--;
2810 curlevel->prev = curlevel->last;
30844415 2811 if (targetdepth == depth) goto done;
8489eb67
RS
2812 break;
2813
2814 case Sstring:
195d1361
RS
2815 case Sstring_fence:
2816 state.comstr_start = from - 1;
8489eb67 2817 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 2818 curlevel->last = prev_from;
7d0393cf 2819 state.instring = (code == Sstring
6a140a74 2820 ? (FETCH_CHAR (prev_from_byte))
195d1361
RS
2821 : ST_STRING_STYLE);
2822 if (boundary_stop) goto done;
8489eb67 2823 startinstring:
195d1361 2824 {
644ea4df 2825 nofence = state.instring != ST_STRING_STYLE;
7d0393cf 2826
644ea4df
RS
2827 while (1)
2828 {
2829 int c;
195d1361 2830
644ea4df 2831 if (from >= end) goto done;
6a140a74 2832 c = FETCH_CHAR (from_byte);
7c7ca3d2
RS
2833 /* Some compilers can't handle this inside the switch. */
2834 temp = SYNTAX (c);
ccf89641
KH
2835
2836 /* Check TEMP here so that if the char has
2837 a syntax-table property which says it is NOT
2838 a string character, it does not end the string. */
2839 if (nofence && c == state.instring && temp == Sstring)
2840 break;
2841
7c7ca3d2 2842 switch (temp)
644ea4df
RS
2843 {
2844 case Sstring_fence:
2845 if (!nofence) goto string_end;
2846 break;
2847 case Scharquote:
2848 case Sescape:
2849 INC_FROM;
2850 startquotedinstring:
2851 if (from >= end) goto endquoted;
195d1361 2852 }
644ea4df
RS
2853 INC_FROM;
2854 }
195d1361
RS
2855 }
2856 string_end:
8489eb67
RS
2857 state.instring = -1;
2858 curlevel->prev = curlevel->last;
93da5fff 2859 INC_FROM;
195d1361 2860 if (boundary_stop) goto done;
8489eb67
RS
2861 break;
2862
2863 case Smath:
240c43e8
SM
2864 /* FIXME: We should do something with it. */
2865 break;
2866 default:
2867 /* Ignore whitespace, punctuation, quote, endcomment. */
8489eb67
RS
2868 break;
2869 }
2870 }
2871 goto done;
2872
2873 stop: /* Here if stopping before start of sexp. */
93da5fff 2874 from = prev_from; /* We have just fetched the char that starts it; */
8489eb67
RS
2875 goto done; /* but return the position before it. */
2876
2877 endquoted:
2878 state.quoted = 1;
2879 done:
2880 state.depth = depth;
2881 state.mindepth = mindepth;
2882 state.thislevelstart = curlevel->prev;
2883 state.prevlevelstart
2884 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
2885 state.location = from;
1a102a58
RS
2886 state.levelstarts = Qnil;
2887 while (--curlevel >= levelstart)
2888 state.levelstarts = Fcons (make_number (curlevel->last),
2889 state.levelstarts);
8489eb67
RS
2890 immediate_quit = 0;
2891
e5d4f4dc 2892 *stateptr = state;
8489eb67
RS
2893}
2894
c81a3712 2895DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
fdb82f93
PJ
2896 doc: /* Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
2897Parsing stops at TO or when certain criteria are met;
2898 point is set to where parsing stops.
2899If fifth arg OLDSTATE is omitted or nil,
2900 parsing assumes that FROM is the beginning of a function.
2901Value is a list of ten elements describing final state of parsing:
2902 0. depth in parens.
2903 1. character address of start of innermost containing list; nil if none.
2904 2. character address of start of last complete sexp terminated.
2905 3. non-nil if inside a string.
2906 (it is the character that will terminate the string,
2907 or t if the string should be terminated by a generic string delimiter.)
7d0393cf 2908 4. nil if outside a comment, t if inside a non-nestable comment,
fdb82f93
PJ
2909 else an integer (the current comment nesting).
2910 5. t if following a quote character.
2911 6. the minimum paren-depth encountered during this scan.
2912 7. t if in a comment of style b; symbol `syntax-table' if the comment
2913 should be terminated by a generic comment delimiter.
2914 8. character address of start of comment or string; nil if not in one.
2915 9. Intermediate data for continuation of parsing (subject to change).
2916If third arg TARGETDEPTH is non-nil, parsing stops if the depth
2917in parentheses becomes equal to TARGETDEPTH.
2918Fourth arg STOPBEFORE non-nil means stop when come to
2919 any character that starts a sexp.
2920Fifth arg OLDSTATE is a nine-element list like what this function returns.
2921 It is used to initialize the state of the parse. Elements number 1, 2, 6
2922 and 8 are ignored; you can leave off element 8 (the last) entirely.
2923Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.
2924 If it is symbol `syntax-table', stop after the start of a comment or a
2925 string, or after end of a comment or a string. */)
2926 (from, to, targetdepth, stopbefore, oldstate, commentstop)
c81a3712 2927 Lisp_Object from, to, targetdepth, stopbefore, oldstate, commentstop;
8489eb67
RS
2928{
2929 struct lisp_parse_state state;
2930 int target;
2931
265a9e55 2932 if (!NILP (targetdepth))
8489eb67 2933 {
b7826503 2934 CHECK_NUMBER (targetdepth);
8489eb67
RS
2935 target = XINT (targetdepth);
2936 }
2937 else
2938 target = -100000; /* We won't reach this depth */
2939
2940 validate_region (&from, &to);
6a140a74
RS
2941 scan_sexps_forward (&state, XINT (from), CHAR_TO_BYTE (XINT (from)),
2942 XINT (to),
c81a3712 2943 target, !NILP (stopbefore), oldstate,
7d0393cf 2944 (NILP (commentstop)
195d1361 2945 ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
8489eb67
RS
2946
2947 SET_PT (state.location);
7d0393cf 2948
8489eb67
RS
2949 return Fcons (make_number (state.depth),
2950 Fcons (state.prevlevelstart < 0 ? Qnil : make_number (state.prevlevelstart),
2951 Fcons (state.thislevelstart < 0 ? Qnil : make_number (state.thislevelstart),
7d0393cf
JB
2952 Fcons (state.instring >= 0
2953 ? (state.instring == ST_STRING_STYLE
195d1361 2954 ? Qt : make_number (state.instring)) : Qnil,
95ff8dfc
RS
2955 Fcons (state.incomment < 0 ? Qt :
2956 (state.incomment == 0 ? Qnil :
2957 make_number (state.incomment)),
8489eb67 2958 Fcons (state.quoted ? Qt : Qnil,
bff37d2a 2959 Fcons (make_number (state.mindepth),
7d0393cf 2960 Fcons ((state.comstyle
bff37d2a
RS
2961 ? (state.comstyle == ST_COMMENT_STYLE
2962 ? Qsyntax_table : Qt) :
2963 Qnil),
0beaf54f
DL
2964 Fcons (((state.incomment
2965 || (state.instring >= 0))
bff37d2a
RS
2966 ? make_number (state.comstr_start)
2967 : Qnil),
1a102a58 2968 Fcons (state.levelstarts, Qnil))))))))));
8489eb67
RS
2969}
2970\f
dfcf069d 2971void
8489eb67
RS
2972init_syntax_once ()
2973{
78f9a1f7 2974 register int i, c;
8ea151b2 2975 Lisp_Object temp;
8489eb67 2976
5ebaddf5
RS
2977 /* This has to be done here, before we call Fmake_char_table. */
2978 Qsyntax_table = intern ("syntax-table");
2979 staticpro (&Qsyntax_table);
2980
2981 /* Intern this now in case it isn't already done.
2982 Setting this variable twice is harmless.
2983 But don't staticpro it here--that is done in alloc.c. */
2984 Qchar_table_extra_slots = intern ("char-table-extra-slots");
2985
93da5fff 2986 /* Create objects which can be shared among syntax tables. */
42ebfa31 2987 Vsyntax_code_object = Fmake_vector (make_number (Smax), Qnil);
93da5fff
KH
2988 for (i = 0; i < XVECTOR (Vsyntax_code_object)->size; i++)
2989 XVECTOR (Vsyntax_code_object)->contents[i]
2990 = Fcons (make_number (i), Qnil);
2991
5ebaddf5
RS
2992 /* Now we are ready to set up this property, so we can
2993 create syntax tables. */
2994 Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
2995
93da5fff 2996 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
8489eb67 2997
5ebaddf5 2998 Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
8489eb67 2999
93da5fff 3000 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
8489eb67 3001 for (i = 'a'; i <= 'z'; i++)
8ea151b2 3002 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
8489eb67 3003 for (i = 'A'; i <= 'Z'; i++)
8ea151b2 3004 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
8489eb67 3005 for (i = '0'; i <= '9'; i++)
8ea151b2
RS
3006 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3007
3008 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
3009 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
3010
3011 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
3012 Fcons (make_number (Sopen), make_number (')')));
3013 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
3014 Fcons (make_number (Sclose), make_number ('(')));
3015 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
3016 Fcons (make_number (Sopen), make_number (']')));
3017 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
3018 Fcons (make_number (Sclose), make_number ('[')));
3019 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
3020 Fcons (make_number (Sopen), make_number ('}')));
3021 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
3022 Fcons (make_number (Sclose), make_number ('{')));
3023 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
3024 Fcons (make_number ((int) Sstring), Qnil));
3025 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
3026 Fcons (make_number ((int) Sescape), Qnil));
3027
93da5fff 3028 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Ssymbol];
8489eb67 3029 for (i = 0; i < 10; i++)
78f9a1f7
KH
3030 {
3031 c = "_-+*/&|<>="[i];
3032 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3033 }
8489eb67 3034
93da5fff 3035 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
8489eb67 3036 for (i = 0; i < 12; i++)
78f9a1f7
KH
3037 {
3038 c = ".,;:?!#@~^'`"[i];
3039 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3040 }
bd25db08
KH
3041
3042 /* All multibyte characters have syntax `word' by default. */
3043 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
3044 for (i = CHAR_TABLE_SINGLE_BYTE_SLOTS; i < CHAR_TABLE_ORDINARY_SLOTS; i++)
3045 XCHAR_TABLE (Vstandard_syntax_table)->contents[i] = temp;
8489eb67
RS
3046}
3047
dfcf069d 3048void
8489eb67
RS
3049syms_of_syntax ()
3050{
3051 Qsyntax_table_p = intern ("syntax-table-p");
3052 staticpro (&Qsyntax_table_p);
3053
93da5fff
KH
3054 staticpro (&Vsyntax_code_object);
3055
7bf5e9e4
RS
3056 Qscan_error = intern ("scan-error");
3057 staticpro (&Qscan_error);
3058 Fput (Qscan_error, Qerror_conditions,
f3be100f 3059 Fcons (Qscan_error, Fcons (Qerror, Qnil)));
7bf5e9e4
RS
3060 Fput (Qscan_error, Qerror_message,
3061 build_string ("Scan error"));
3062
8489eb67 3063 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments,
fdb82f93 3064 doc: /* Non-nil means `forward-sexp', etc., should treat comments as whitespace. */);
8489eb67 3065
195d1361 3066 DEFVAR_BOOL ("parse-sexp-lookup-properties", &parse_sexp_lookup_properties,
fdb82f93
PJ
3067 doc: /* Non-nil means `forward-sexp', etc., obey `syntax-table' property.
3068Otherwise, that text property is simply ignored.
3069See the info node `(elisp)Syntax Properties' for a description of the
3070`syntax-table' property. */);
195d1361 3071
8489eb67
RS
3072 words_include_escapes = 0;
3073 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes,
fdb82f93 3074 doc: /* Non-nil means `forward-word', etc., should treat escape chars part of words. */);
8489eb67 3075
bd25db08 3076 DEFVAR_BOOL ("multibyte-syntax-as-symbol", &multibyte_syntax_as_symbol,
fdb82f93 3077 doc: /* Non-nil means `scan-sexps' treats all multibyte characters as symbol. */);
bd25db08
KH
3078 multibyte_syntax_as_symbol = 0;
3079
f4ed767f
GM
3080 DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
3081 &open_paren_in_column_0_is_defun_start,
b222e415 3082 doc: /* *Non-nil means an open paren in column 0 denotes the start of a defun. */);
f4ed767f
GM
3083 open_paren_in_column_0_is_defun_start = 1;
3084
8489eb67
RS
3085 defsubr (&Ssyntax_table_p);
3086 defsubr (&Ssyntax_table);
3087 defsubr (&Sstandard_syntax_table);
3088 defsubr (&Scopy_syntax_table);
3089 defsubr (&Sset_syntax_table);
3090 defsubr (&Schar_syntax);
beefa22e 3091 defsubr (&Smatching_paren);
c65adb44 3092 defsubr (&Sstring_to_syntax);
8489eb67 3093 defsubr (&Smodify_syntax_entry);
62abe9cb 3094 defsubr (&Sinternal_describe_syntax_value);
8489eb67
RS
3095
3096 defsubr (&Sforward_word);
3097
195d1361
RS
3098 defsubr (&Sskip_chars_forward);
3099 defsubr (&Sskip_chars_backward);
3100 defsubr (&Sskip_syntax_forward);
3101 defsubr (&Sskip_syntax_backward);
3102
b3cfe0c8 3103 defsubr (&Sforward_comment);
8489eb67
RS
3104 defsubr (&Sscan_lists);
3105 defsubr (&Sscan_sexps);
3106 defsubr (&Sbackward_prefix_chars);
3107 defsubr (&Sparse_partial_sexp);
3108}