(Info-insert-dir): Don't call `Info-fontify-menu-headers'.
[bpt/emacs.git] / src / syntax.c
CommitLineData
8489eb67 1/* GNU Emacs routines to deal with syntax tables; also word and list parsing.
b8855607 2 Copyright (C) 1985, 87, 93, 94, 95, 97, 1998, 1999 Free Software Foundation, Inc.
8489eb67
RS
3
4This file is part of GNU Emacs.
5
6GNU Emacs is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
a4275ad1 8the Free Software Foundation; either version 2, or (at your option)
8489eb67
RS
9any later version.
10
11GNU Emacs is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GNU Emacs; see the file COPYING. If not, write to
3b7ad313
EN
18the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19Boston, MA 02111-1307, USA. */
8489eb67
RS
20
21
18160b98 22#include <config.h>
8489eb67
RS
23#include <ctype.h>
24#include "lisp.h"
25#include "commands.h"
26#include "buffer.h"
93da5fff 27#include "charset.h"
195d1361
RS
28
29/* Make syntax table lookup grant data in gl_state. */
30#define SYNTAX_ENTRY_VIA_PROPERTY
31
8489eb67 32#include "syntax.h"
195d1361
RS
33#include "intervals.h"
34
35/* We use these constants in place for comment-style and
36 string-ender-char to distinguish comments/strings started by
37 comment_fence and string_fence codes. */
38
39#define ST_COMMENT_STYLE (256 + 1)
40#define ST_STRING_STYLE (256 + 2)
93da5fff 41#include "category.h"
8489eb67 42
7bf5e9e4 43Lisp_Object Qsyntax_table_p, Qsyntax_table, Qscan_error;
8489eb67
RS
44
45int words_include_escapes;
195d1361 46int parse_sexp_lookup_properties;
8489eb67 47
bd25db08
KH
48/* Nonzero means `scan-sexps' treat all multibyte characters as symbol. */
49int multibyte_syntax_as_symbol;
50
8ea151b2
RS
51/* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
52 if not compiled with GCC. No need to mark it, since it is used
53 only very temporarily. */
54Lisp_Object syntax_temp;
55
f4ed767f
GM
56/* Non-zero means an open parenthesis in column 0 is always considered
57 to be the start of a defun. Zero means an open parenthesis in
58 column 0 has no special meaning. */
59
60int open_paren_in_column_0_is_defun_start;
61
e5d4f4dc
RS
62/* This is the internal form of the parse state used in parse-partial-sexp. */
63
64struct lisp_parse_state
65 {
195d1361
RS
66 int depth; /* Depth at end of parsing. */
67 int instring; /* -1 if not within string, else desired terminator. */
95ff8dfc 68 int incomment; /* -1 if in unnestable comment else comment nesting */
195d1361 69 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
e5d4f4dc
RS
70 int quoted; /* Nonzero if just after an escape char at end of parsing */
71 int thislevelstart; /* Char number of most recent start-of-expression at current level */
72 int prevlevelstart; /* Char number of start of containing expression */
195d1361 73 int location; /* Char number at which parsing stopped. */
e5d4f4dc 74 int mindepth; /* Minimum depth seen while scanning. */
195d1361 75 int comstr_start; /* Position just after last comment/string starter. */
1a102a58
RS
76 Lisp_Object levelstarts; /* Char numbers of starts-of-expression
77 of levels (starting from outermost). */
e5d4f4dc
RS
78 };
79\f
37bef230
RS
80/* These variables are a cache for finding the start of a defun.
81 find_start_pos is the place for which the defun start was found.
82 find_start_value is the defun start position found for it.
6a140a74 83 find_start_value_byte is the corresponding byte position.
37bef230
RS
84 find_start_buffer is the buffer it was found in.
85 find_start_begv is the BEGV value when it was found.
86 find_start_modiff is the value of MODIFF when it was found. */
87
88static int find_start_pos;
89static int find_start_value;
6a140a74 90static int find_start_value_byte;
37bef230
RS
91static struct buffer *find_start_buffer;
92static int find_start_begv;
93static int find_start_modiff;
6a140a74
RS
94
95
96static int find_defun_start P_ ((int, int));
95ff8dfc 97static int back_comment P_ ((int, int, int, int, int, int *, int *));
6a140a74
RS
98static int char_quoted P_ ((int, int));
99static Lisp_Object skip_chars P_ ((int, int, Lisp_Object, Lisp_Object));
100static Lisp_Object scan_lists P_ ((int, int, int, int));
101static void scan_sexps_forward P_ ((struct lisp_parse_state *,
102 int, int, int, int,
103 int, Lisp_Object, int));
195d1361
RS
104\f
105
106struct gl_state_s gl_state; /* Global state of syntax parser. */
107
108INTERVAL interval_of ();
109#define INTERVALS_AT_ONCE 10 /* 1 + max-number of intervals
110 to scan to property-change. */
111
6a140a74
RS
112/* Update gl_state to an appropriate interval which contains CHARPOS. The
113 sign of COUNT give the relative position of CHARPOS wrt the previously
195d1361 114 valid interval. If INIT, only [be]_property fields of gl_state are
6a140a74 115 valid at start, the rest is filled basing on OBJECT.
195d1361 116
6a140a74 117 `gl_state.*_i' are the intervals, and CHARPOS is further in the search
195d1361
RS
118 direction than the intervals - or in an interval. We update the
119 current syntax-table basing on the property of this interval, and
6a140a74 120 update the interval to start further than CHARPOS - or be
195d1361 121 NULL_INTERVAL. We also update lim_property to be the next value of
6a140a74 122 charpos to call this subroutine again - or be before/after the
195d1361
RS
123 start/end of OBJECT. */
124
125void
6a140a74
RS
126update_syntax_table (charpos, count, init, object)
127 int charpos, count, init;
195d1361
RS
128 Lisp_Object object;
129{
130 Lisp_Object tmp_table;
4ffe723b 131 int cnt = 0, invalidate = 1;
195d1361
RS
132 INTERVAL i, oldi;
133
134 if (init)
135 {
136 gl_state.start = gl_state.b_property;
137 gl_state.stop = gl_state.e_property;
6a140a74 138 gl_state.forward_i = interval_of (charpos, object);
195d1361
RS
139 i = gl_state.backward_i = gl_state.forward_i;
140 gl_state.left_ok = gl_state.right_ok = 1;
141 invalidate = 0;
142 if (NULL_INTERVAL_P (i))
143 return;
f902a008 144 /* interval_of updates only ->position of the return value, so
d80f4cc7
RS
145 update the parents manually to speed up update_interval. */
146 while (!NULL_PARENT (i))
147 {
148 if (AM_RIGHT_CHILD (i))
439d5cb4 149 INTERVAL_PARENT (i)->position = i->position
d80f4cc7 150 - LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */
439d5cb4
KR
151 - TOTAL_LENGTH (INTERVAL_PARENT (i))
152 + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i));
d80f4cc7 153 else
439d5cb4 154 INTERVAL_PARENT (i)->position = i->position - LEFT_TOTAL_LENGTH (i)
d80f4cc7 155 + TOTAL_LENGTH (i);
439d5cb4 156 i = INTERVAL_PARENT (i);
d80f4cc7
RS
157 }
158 i = gl_state.forward_i;
ee0cdb48
KH
159 gl_state.b_property = i->position - 1 - gl_state.offset;
160 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361
RS
161 goto update;
162 }
163 oldi = i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
164
423e705d 165 /* We are guaranteed to be called with CHARPOS either in i,
6a140a74 166 or further off. */
195d1361
RS
167 if (NULL_INTERVAL_P (i))
168 error ("Error in syntax_table logic for to-the-end intervals");
6a140a74 169 else if (charpos < i->position) /* Move left. */
195d1361
RS
170 {
171 if (count > 0)
f902a008 172 error ("Error in syntax_table logic for intervals <-");
195d1361 173 /* Update the interval. */
6a140a74 174 i = update_interval (i, charpos);
423e705d 175 if (!gl_state.left_ok || oldi->position != INTERVAL_LAST_POS (i))
195d1361
RS
176 {
177 invalidate = 0;
178 gl_state.right_ok = 1; /* Invalidate the other end. */
179 gl_state.forward_i = i;
ee0cdb48 180 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361 181 }
423e705d 182 }
6a140a74 183 else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right. */
195d1361
RS
184 {
185 if (count < 0)
f902a008 186 error ("Error in syntax_table logic for intervals ->");
195d1361 187 /* Update the interval. */
6a140a74 188 i = update_interval (i, charpos);
423e705d 189 if (!gl_state.right_ok || i->position != INTERVAL_LAST_POS (oldi))
195d1361
RS
190 {
191 invalidate = 0;
192 gl_state.left_ok = 1; /* Invalidate the other end. */
193 gl_state.backward_i = i;
ee0cdb48 194 gl_state.b_property = i->position - 1 - gl_state.offset;
195d1361
RS
195 }
196 }
197 else if (count > 0 ? gl_state.right_ok : gl_state.left_ok)
198 {
199 /* We do not need to recalculate tmp_table. */
200 tmp_table = gl_state.old_prop;
201 }
202
203 update:
204 tmp_table = textget (i->plist, Qsyntax_table);
205
206 if (invalidate)
207 invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
208
423e705d
SM
209 if (invalidate) /* Did not get to adjacent interval. */
210 { /* with the same table => */
211 /* invalidate the old range. */
195d1361
RS
212 if (count > 0)
213 {
214 gl_state.backward_i = i;
215 gl_state.left_ok = 1; /* Invalidate the other end. */
ee0cdb48 216 gl_state.b_property = i->position - 1 - gl_state.offset;
195d1361
RS
217 }
218 else
219 {
220 gl_state.forward_i = i;
221 gl_state.right_ok = 1; /* Invalidate the other end. */
ee0cdb48 222 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361
RS
223 }
224 }
37bef230 225
195d1361
RS
226 gl_state.current_syntax_table = tmp_table;
227 gl_state.old_prop = tmp_table;
2e34157c 228 if (EQ (Fsyntax_table_p (tmp_table), Qt))
195d1361
RS
229 {
230 gl_state.use_global = 0;
231 }
232 else if (CONSP (tmp_table))
233 {
234 gl_state.use_global = 1;
235 gl_state.global_code = tmp_table;
236 }
237 else
238 {
239 gl_state.use_global = 0;
240 gl_state.current_syntax_table = current_buffer->syntax_table;
241 }
242
243 while (!NULL_INTERVAL_P (i))
244 {
245 if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
246 {
247 if (count > 0)
248 gl_state.right_ok = 0;
249 else
250 gl_state.left_ok = 0;
251 break;
252 }
253 else if (cnt == INTERVALS_AT_ONCE)
254 {
255 if (count > 0)
256 gl_state.right_ok = 1;
257 else
258 gl_state.left_ok = 1;
259 break;
260 }
261 cnt++;
262 i = count > 0 ? next_interval (i) : previous_interval (i);
263 }
264 if (NULL_INTERVAL_P (i))
265 { /* This property goes to the end. */
266 if (count > 0)
267 gl_state.e_property = gl_state.stop;
268 else
269 gl_state.b_property = gl_state.start;
270 }
271 else
272 {
273 if (count > 0)
274 {
ee0cdb48 275 gl_state.e_property = i->position - gl_state.offset;
195d1361
RS
276 gl_state.forward_i = i;
277 }
278 else
279 {
ee0cdb48 280 gl_state.b_property = i->position + LENGTH (i) - 1 - gl_state.offset;
195d1361
RS
281 gl_state.backward_i = i;
282 }
283 }
284}
285\f
6a140a74
RS
286/* Returns TRUE if char at CHARPOS is quoted.
287 Global syntax-table data should be set up already to be good at CHARPOS
288 or after. On return global syntax data is good for lookup at CHARPOS. */
195d1361
RS
289
290static int
6a140a74
RS
291char_quoted (charpos, bytepos)
292 register int charpos, bytepos;
195d1361
RS
293{
294 register enum syntaxcode code;
295 register int beg = BEGV;
296 register int quoted = 0;
6a140a74
RS
297 int orig = charpos;
298
299 DEC_BOTH (charpos, bytepos);
195d1361 300
6a140a74 301 while (bytepos >= beg)
195d1361 302 {
6a140a74
RS
303 UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
304 code = SYNTAX (FETCH_CHAR (bytepos));
305 if (! (code == Scharquote || code == Sescape))
306 break;
307
308 DEC_BOTH (charpos, bytepos);
309 quoted = !quoted;
195d1361 310 }
6a140a74
RS
311
312 UPDATE_SYNTAX_TABLE (orig);
195d1361
RS
313 return quoted;
314}
6a140a74
RS
315
316/* Return the bytepos one character after BYTEPOS.
317 We assume that BYTEPOS is not at the end of the buffer. */
318
319INLINE int
320inc_bytepos (bytepos)
321 int bytepos;
322{
ef316cf0
RS
323 if (NILP (current_buffer->enable_multibyte_characters))
324 return bytepos + 1;
325
6a140a74
RS
326 INC_POS (bytepos);
327 return bytepos;
328}
329
330/* Return the bytepos one character before BYTEPOS.
331 We assume that BYTEPOS is not at the start of the buffer. */
332
333INLINE int
334dec_bytepos (bytepos)
335 int bytepos;
336{
ef316cf0
RS
337 if (NILP (current_buffer->enable_multibyte_characters))
338 return bytepos - 1;
339
6a140a74
RS
340 DEC_POS (bytepos);
341 return bytepos;
342}
195d1361 343\f
7cc80f0a
RS
344/* Return a defun-start position before before POS and not too far before.
345 It should be the last one before POS, or nearly the last.
346
347 When open_paren_in_column_0_is_defun_start is nonzero,
348 the beginning of every line is treated as a defun-start.
349
350 We record the information about where the scan started
351 and what its result was, so that another call in the same area
352 can return the same value very quickly.
195d1361
RS
353
354 There is no promise at which position the global syntax data is
355 valid on return from the subroutine, so the caller should explicitly
356 update the global data. */
37bef230
RS
357
358static int
6a140a74
RS
359find_defun_start (pos, pos_byte)
360 int pos, pos_byte;
37bef230 361{
6a140a74 362 int opoint = PT, opoint_byte = PT_BYTE;
37bef230
RS
363
364 /* Use previous finding, if it's valid and applies to this inquiry. */
365 if (current_buffer == find_start_buffer
366 /* Reuse the defun-start even if POS is a little farther on.
367 POS might be in the next defun, but that's ok.
368 Our value may not be the best possible, but will still be usable. */
369 && pos <= find_start_pos + 1000
370 && pos >= find_start_value
371 && BEGV == find_start_begv
372 && MODIFF == find_start_modiff)
373 return find_start_value;
374
375 /* Back up to start of line. */
6a140a74 376 scan_newline (pos, pos_byte, BEGV, BEGV_BYTE, -1, 1);
37bef230 377
195d1361
RS
378 /* We optimize syntax-table lookup for rare updates. Thus we accept
379 only those `^\s(' which are good in global _and_ text-property
380 syntax-tables. */
381 gl_state.current_syntax_table = current_buffer->syntax_table;
382 gl_state.use_global = 0;
4467eb94 383 if (open_paren_in_column_0_is_defun_start)
37bef230 384 {
4467eb94 385 while (PT > BEGV)
195d1361 386 {
4467eb94
GM
387 /* Open-paren at start of line means we may have found our
388 defun-start. */
389 if (SYNTAX (FETCH_CHAR (PT_BYTE)) == Sopen)
390 {
391 SETUP_SYNTAX_TABLE (PT + 1, -1); /* Try again... */
392 if (SYNTAX (FETCH_CHAR (PT_BYTE)) == Sopen)
393 break;
394 /* Now fallback to the default value. */
395 gl_state.current_syntax_table = current_buffer->syntax_table;
396 gl_state.use_global = 0;
397 }
398 /* Move to beg of previous line. */
399 scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
195d1361 400 }
37bef230
RS
401 }
402
403 /* Record what we found, for the next try. */
6a140a74
RS
404 find_start_value = PT;
405 find_start_value_byte = PT_BYTE;
37bef230
RS
406 find_start_buffer = current_buffer;
407 find_start_modiff = MODIFF;
408 find_start_begv = BEGV;
409 find_start_pos = pos;
410
6a140a74
RS
411 TEMP_SET_PT_BOTH (opoint, opoint_byte);
412
37bef230
RS
413 return find_start_value;
414}
415\f
f902a008
RS
416/* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE. */
417
418static int
419prev_char_comend_first (pos, pos_byte)
420 int pos, pos_byte;
421{
422 int c, val;
423
424 DEC_BOTH (pos, pos_byte);
425 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
426 c = FETCH_CHAR (pos_byte);
427 val = SYNTAX_COMEND_FIRST (c);
428 UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
429 return val;
430}
431
432/* Return the SYNTAX_COMSTART_FIRST of the character before POS, POS_BYTE. */
433
3f679f55
SM
434/* static int
435 * prev_char_comstart_first (pos, pos_byte)
436 * int pos, pos_byte;
437 * {
438 * int c, val;
439 *
440 * DEC_BOTH (pos, pos_byte);
441 * UPDATE_SYNTAX_TABLE_BACKWARD (pos);
442 * c = FETCH_CHAR (pos_byte);
443 * val = SYNTAX_COMSTART_FIRST (c);
444 * UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
445 * return val;
446 * } */
f902a008 447
6a140a74
RS
448/* Checks whether charpos FROM is at the end of a comment.
449 FROM_BYTE is the bytepos corresponding to FROM.
450 Do not move back before STOP.
451
452 Return a positive value if we find a comment ending at FROM/FROM_BYTE;
453 return -1 otherwise.
454
455 If successful, store the charpos of the comment's beginning
456 into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
527a32d9
KH
457
458 Global syntax data remains valid for backward search starting at
459 the returned value (or at FROM, if the search was not successful). */
195d1361
RS
460
461static int
95ff8dfc 462back_comment (from, from_byte, stop, comnested, comstyle, charpos_ptr, bytepos_ptr)
6a140a74 463 int from, from_byte, stop;
95ff8dfc 464 int comnested, comstyle;
6a140a74 465 int *charpos_ptr, *bytepos_ptr;
195d1361
RS
466{
467 /* Look back, counting the parity of string-quotes,
468 and recording the comment-starters seen.
469 When we reach a safe place, assume that's not in a string;
470 then step the main scan to the earliest comment-starter seen
471 an even number of string quotes away from the safe place.
472
473 OFROM[I] is position of the earliest comment-starter seen
474 which is I+2X quotes from the comment-end.
475 PARITY is current parity of quotes from the comment end. */
3ee5041c 476 int string_style = -1; /* Presumed outside of any string. */
195d1361 477 int string_lossage = 0;
f7c436c1
SM
478 /* Not a real lossage: indicates that we have passed a matching comment
479 starter plus an non-matching comment-ender, meaning that any matching
480 comment-starter we might see later could be a false positive (hidden
481 inside another comment).
482 Test case: { a (* b } c (* d *) */
483 int comment_lossage = 0;
195d1361 484 int comment_end = from;
6a140a74 485 int comment_end_byte = from_byte;
195d1361 486 int comstart_pos = 0;
6a140a74 487 int comstart_byte;
eb35b628
RS
488 /* Place where the containing defun starts,
489 or 0 if we didn't come across it yet. */
490 int defun_start = 0;
491 int defun_start_byte = 0;
195d1361 492 register enum syntaxcode code;
95ff8dfc 493 int nesting = 1; /* current comment nesting */
ea315ed6 494 int c;
3f679f55
SM
495 int syntax = 0;
496
497 /* FIXME: A }} comment-ender style leads to incorrect behavior
498 in the case of {{ c }}} because we ignore the last two chars which are
499 assumed to be comment-enders although they aren't. */
195d1361
RS
500
501 /* At beginning of range to scan, we're outside of strings;
502 that determines quote parity to the comment-end. */
503 while (from != stop)
504 {
3f679f55
SM
505 int temp_byte, prev_syntax;
506 int com2start, com2end;
6a140a74 507
195d1361 508 /* Move back and examine a character. */
6a140a74 509 DEC_BOTH (from, from_byte);
195d1361
RS
510 UPDATE_SYNTAX_TABLE_BACKWARD (from);
511
3f679f55 512 prev_syntax = syntax;
6a140a74 513 c = FETCH_CHAR (from_byte);
3f679f55 514 syntax = SYNTAX_WITH_FLAGS (c);
195d1361
RS
515 code = SYNTAX (c);
516
3f679f55
SM
517 /* Check for 2-char comment markers. */
518 com2start = (SYNTAX_FLAGS_COMSTART_FIRST (syntax)
519 && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax)
520 && comstyle == SYNTAX_FLAGS_COMMENT_STYLE (prev_syntax)
521 && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax)
522 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested);
523 com2end = (SYNTAX_FLAGS_COMEND_FIRST (syntax)
524 && SYNTAX_FLAGS_COMEND_SECOND (prev_syntax));
525
526 /* Nasty cases with overlapping 2-char comment markers:
527 - snmp-mode: -- c -- foo -- c --
528 --- c --
529 ------ c --
530 - c-mode: *||*
531 |* *|* *|
532 |*| |* |*|
533 /// */
534
535 /* If a 2-char comment sequence partly overlaps with another,
536 we don't try to be clever. */
537 if (from > stop && (com2end || com2start))
195d1361 538 {
3f679f55
SM
539 int next = from, next_byte = from_byte, next_c, next_syntax;
540 DEC_BOTH (next, next_byte);
541 UPDATE_SYNTAX_TABLE_BACKWARD (next);
542 next_c = FETCH_CHAR (next_byte);
543 next_syntax = SYNTAX_WITH_FLAGS (next_c);
544 if (((com2start || comnested)
545 && SYNTAX_FLAGS_COMEND_SECOND (syntax)
546 && SYNTAX_FLAGS_COMEND_FIRST (next_syntax))
547 || ((com2end || comnested)
548 && SYNTAX_FLAGS_COMSTART_SECOND (syntax)
549 && comstyle == SYNTAX_FLAGS_COMMENT_STYLE (syntax)
550 && SYNTAX_FLAGS_COMSTART_FIRST (next_syntax)))
551 goto lossage;
552 /* UPDATE_SYNTAX_TABLE_FORWARD (next + 1); */
f902a008 553 }
3f679f55
SM
554
555 if (com2start && comstart_pos == 0)
556 /* We're looking at a comment starter. But it might be a comment
557 ender as well (see snmp-mode). The first time we see one, we
558 need to consider it as a comment starter,
559 and the subsequent times as a comment ender. */
560 com2end = 0;
561
562 /* Turn a 2-char comment sequences into the appropriate syntax. */
563 if (com2end)
564 code = Sendcomment;
565 else if (com2start)
566 code = Scomment;
567 /* Ignore comment starters of a different style. */
568 else if (code == Scomment
569 && (comstyle != SYNTAX_FLAGS_COMMENT_STYLE (syntax)
570 || SYNTAX_FLAGS_COMMENT_NESTED (syntax) != comnested))
1aa963c8 571 continue;
195d1361 572
9828a477
KH
573 /* Ignore escaped characters, except comment-enders. */
574 if (code != Sendcomment && char_quoted (from, from_byte))
195d1361
RS
575 continue;
576
3ee5041c 577 switch (code)
195d1361 578 {
3ee5041c
SM
579 case Sstring_fence:
580 case Scomment_fence:
581 c = (code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE);
582 case Sstring:
583 /* Track parity of quotes. */
584 if (string_style == -1)
585 /* Entering a string. */
586 string_style = c;
587 else if (string_style == c)
588 /* Leaving the string. */
589 string_style = -1;
590 else
591 /* If we have two kinds of string delimiters.
592 There's no way to grok this scanning backwards. */
195d1361 593 string_lossage = 1;
3ee5041c
SM
594 break;
595
596 case Scomment:
597 /* We've already checked that it is the relevant comstyle. */
f7c436c1 598 if (string_style != -1 || comment_lossage || string_lossage)
3ee5041c
SM
599 /* There are odd string quotes involved, so let's be careful.
600 Test case in Pascal: " { " a { " } */
601 goto lossage;
602
f7c436c1
SM
603 if (!comnested)
604 {
605 /* Record best comment-starter so far. */
606 comstart_pos = from;
607 comstart_byte = from_byte;
608 }
609 else if (--nesting <= 0)
95ff8dfc
RS
610 /* nested comments have to be balanced, so we don't need to
611 keep looking for earlier ones. We use here the same (slightly
612 incorrect) reasoning as below: since it is followed by uniform
613 paired string quotes, this comment-start has to be outside of
614 strings, else the comment-end itself would be inside a string. */
615 goto done;
3ee5041c
SM
616 break;
617
02010917 618 case Sendcomment:
3f679f55
SM
619 if (SYNTAX_FLAGS_COMMENT_STYLE (syntax) == comstyle
620 && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax)
621 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested)
02010917
SM
622 /* This is the same style of comment ender as ours. */
623 {
624 if (comnested)
625 nesting++;
626 else
627 /* Anything before that can't count because it would match
628 this comment-ender rather than ours. */
629 from = stop; /* Break out of the loop. */
630 }
f7c436c1
SM
631 else if (comstart_pos != 0 || c != '\n')
632 /* We're mixing comment styles here, so we'd better be careful.
633 The (comstart_pos != 0 || c != '\n') check is not quite correct
634 (we should just always set comment_lossage), but removing it
635 would imply that any multiline comment in C would go through
636 lossage, which seems overkill.
637 The failure should only happen in the rare cases such as
638 { (* } *) */
639 comment_lossage = 1;
02010917 640 break;
195d1361 641
02010917
SM
642 case Sopen:
643 /* Assume a defun-start point is outside of strings. */
644 if (open_paren_in_column_0_is_defun_start
645 && (from == stop
646 || (temp_byte = dec_bytepos (from_byte),
647 FETCH_CHAR (temp_byte) == '\n')))
648 {
649 defun_start = from;
650 defun_start_byte = from_byte;
651 from = stop; /* Break out of the loop. */
652 }
eb35b628 653 break;
02010917
SM
654
655 default:
f7c436c1 656 break;
eb35b628 657 }
195d1361
RS
658 }
659
660 if (comstart_pos == 0)
661 {
662 from = comment_end;
6a140a74 663 from_byte = comment_end_byte;
195d1361
RS
664 UPDATE_SYNTAX_TABLE_FORWARD (comment_end - 1);
665 }
f7c436c1
SM
666 /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
667 or `done'), then we've found the beginning of the non-nested comment. */
668 else if (1) /* !comnested */
195d1361
RS
669 {
670 from = comstart_pos;
6a140a74 671 from_byte = comstart_byte;
195d1361
RS
672 /* Globals are correct now. */
673 }
674 else
675 {
3ee5041c
SM
676 struct lisp_parse_state state;
677 lossage:
195d1361
RS
678 /* We had two kinds of string delimiters mixed up
679 together. Decode this going forwards.
02010917 680 Scan fwd from a known safe place (beginning-of-defun)
195d1361
RS
681 to the one in question; this records where we
682 last passed a comment starter. */
eb35b628
RS
683 /* If we did not already find the defun start, find it now. */
684 if (defun_start == 0)
685 {
686 defun_start = find_defun_start (comment_end, comment_end_byte);
687 defun_start_byte = find_start_value_byte;
688 }
02010917 689 do
195d1361 690 {
02010917
SM
691 scan_sexps_forward (&state,
692 defun_start, defun_start_byte,
693 comment_end, -10000, 0, Qnil, 0);
694 defun_start = comment_end;
695 if (state.incomment == (comnested ? 1 : -1)
696 && state.comstyle == comstyle)
697 from = state.comstr_start;
698 else
699 {
700 from = comment_end;
701 if (state.incomment)
702 /* If comment_end is inside some other comment, maybe ours
703 is nested, so we need to try again from within the
704 surrounding comment. Example: { a (* " *) */
705 {
706 /* FIXME: We should advance by one or two chars. */
707 defun_start = state.comstr_start + 2;
708 defun_start_byte = CHAR_TO_BYTE (defun_start);
709 }
710 }
711 } while (defun_start < comment_end);
712
6a140a74 713 from_byte = CHAR_TO_BYTE (from);
195d1361
RS
714 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
715 }
716
95ff8dfc 717 done:
6a140a74
RS
718 *charpos_ptr = from;
719 *bytepos_ptr = from_byte;
720
1aa963c8 721 return (from == comment_end) ? -1 : from;
195d1361
RS
722}
723\f
8489eb67 724DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
2203e1e8 725 "Return t if OBJECT is a syntax table.\n\
8ea151b2 726Currently, any char-table counts as a syntax table.")
2203e1e8
EN
727 (object)
728 Lisp_Object object;
8489eb67 729{
2203e1e8 730 if (CHAR_TABLE_P (object)
e704cb4b 731 && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
8489eb67
RS
732 return Qt;
733 return Qnil;
734}
735
8ea151b2 736static void
8489eb67
RS
737check_syntax_table (obj)
738 Lisp_Object obj;
739{
d1be9f0f 740 if (!(CHAR_TABLE_P (obj)
e704cb4b 741 && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table)))
d1be9f0f 742 wrong_type_argument (Qsyntax_table_p, obj);
8489eb67
RS
743}
744
8489eb67
RS
745DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
746 "Return the current syntax table.\n\
747This is the one specified by the current buffer.")
748 ()
749{
750 return current_buffer->syntax_table;
751}
752
753DEFUN ("standard-syntax-table", Fstandard_syntax_table,
754 Sstandard_syntax_table, 0, 0, 0,
755 "Return the standard syntax table.\n\
756This is the one used for new buffers.")
757 ()
758{
759 return Vstandard_syntax_table;
760}
761
762DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
763 "Construct a new syntax table and return it.\n\
764It is a copy of the TABLE, which defaults to the standard syntax table.")
765 (table)
766 Lisp_Object table;
767{
8ea151b2
RS
768 Lisp_Object copy;
769
265a9e55 770 if (!NILP (table))
8ea151b2
RS
771 check_syntax_table (table);
772 else
773 table = Vstandard_syntax_table;
774
775 copy = Fcopy_sequence (table);
0f867324
RS
776
777 /* Only the standard syntax table should have a default element.
778 Other syntax tables should inherit from parents instead. */
779 XCHAR_TABLE (copy)->defalt = Qnil;
780
781 /* Copied syntax tables should all have parents.
782 If we copied one with no parent, such as the standard syntax table,
783 use the standard syntax table as the copy's parent. */
784 if (NILP (XCHAR_TABLE (copy)->parent))
785 Fset_char_table_parent (copy, Vstandard_syntax_table);
8ea151b2 786 return copy;
8489eb67
RS
787}
788
789DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
790 "Select a new syntax table for the current buffer.\n\
791One argument, a syntax table.")
792 (table)
793 Lisp_Object table;
794{
4e4dfa78 795 int idx;
8ea151b2 796 check_syntax_table (table);
8489eb67
RS
797 current_buffer->syntax_table = table;
798 /* Indicate that this buffer now has a specified syntax table. */
f6cd0527
GM
799 idx = PER_BUFFER_VAR_IDX (syntax_table);
800 SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
8489eb67
RS
801 return table;
802}
803\f
804/* Convert a letter which signifies a syntax code
805 into the code it signifies.
195d1361 806 This is used by modify-syntax-entry, and other things. */
8489eb67
RS
807
808unsigned char syntax_spec_code[0400] =
809 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
810 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
811 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
812 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
195d1361 813 (char) Swhitespace, (char) Scomment_fence, (char) Sstring, 0377,
8489eb67
RS
814 (char) Smath, 0377, 0377, (char) Squote,
815 (char) Sopen, (char) Sclose, 0377, 0377,
816 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
817 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
818 0377, 0377, 0377, 0377,
819 (char) Scomment, 0377, (char) Sendcomment, 0377,
6cb71bf6 820 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
8489eb67
RS
821 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
822 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
823 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
824 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
825 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
826 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
195d1361 827 0377, 0377, 0377, 0377, (char) Sstring_fence, 0377, 0377, 0377
8489eb67
RS
828 };
829
195d1361 830/* Indexed by syntax code, give the letter that describes it. */
8489eb67 831
195d1361 832char syntax_code_spec[16] =
8489eb67 833 {
195d1361
RS
834 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
835 '!', '|'
8489eb67 836 };
93da5fff
KH
837
838/* Indexed by syntax code, give the object (cons of syntax code and
839 nil) to be stored in syntax table. Since these objects can be
840 shared among syntax tables, we generate them in advance. By
841 sharing objects, the function `describe-syntax' can give a more
842 compact listing. */
843static Lisp_Object Vsyntax_code_object;
844
8489eb67 845\f
8ea151b2
RS
846/* Look up the value for CHARACTER in syntax table TABLE's parent
847 and its parents. SYNTAX_ENTRY calls this, when TABLE itself has nil
848 for CHARACTER. It's actually used only when not compiled with GCC. */
849
850Lisp_Object
851syntax_parent_lookup (table, character)
852 Lisp_Object table;
853 int character;
854{
855 Lisp_Object value;
856
857 while (1)
858 {
859 table = XCHAR_TABLE (table)->parent;
860 if (NILP (table))
861 return Qnil;
862
863 value = XCHAR_TABLE (table)->contents[character];
864 if (!NILP (value))
865 return value;
866 }
867}
868
8489eb67 869DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
2203e1e8
EN
870 "Return the syntax code of CHARACTER, described by a character.\n\
871For example, if CHARACTER is a word constituent,\n\
872the character `w' is returned.\n\
8489eb67
RS
873The characters that correspond to various syntax codes\n\
874are listed in the documentation of `modify-syntax-entry'.")
2203e1e8
EN
875 (character)
876 Lisp_Object character;
8489eb67 877{
8ea151b2 878 int char_int;
195d1361
RS
879 gl_state.current_syntax_table = current_buffer->syntax_table;
880
881 gl_state.use_global = 0;
2203e1e8
EN
882 CHECK_NUMBER (character, 0);
883 char_int = XINT (character);
8ea151b2 884 return make_number (syntax_code_spec[(int) SYNTAX (char_int)]);
beefa22e
RS
885}
886
887DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
2203e1e8
EN
888 "Return the matching parenthesis of CHARACTER, or nil if none.")
889 (character)
890 Lisp_Object character;
beefa22e 891{
8ea151b2 892 int char_int, code;
195d1361
RS
893 gl_state.current_syntax_table = current_buffer->syntax_table;
894 gl_state.use_global = 0;
2203e1e8
EN
895 CHECK_NUMBER (character, 0);
896 char_int = XINT (character);
8ea151b2 897 code = SYNTAX (char_int);
a8bd7cd8 898 if (code == Sopen || code == Sclose)
2e34157c 899 return SYNTAX_MATCH (char_int);
beefa22e 900 return Qnil;
8489eb67
RS
901}
902
c65adb44
SM
903DEFUN ("string-to-syntax", Fstring_to_syntax, Sstring_to_syntax, 1, 1, 0,
904 "Convert a syntax specification STRING into syntax cell form.\n\
905STRING should be a string as it is allowed as argument of\n\
906`modify-syntax-entry'. Value is the equivalent cons cell\n\
907\(CODE . MATCHING-CHAR) that can be used as value of a `syntax-table'\n\
908text property.")
909 (string)
910 Lisp_Object string;
8489eb67 911{
8ea151b2 912 register unsigned char *p;
8489eb67 913 register enum syntaxcode code;
247e20a8 914 int val;
8ea151b2 915 Lisp_Object match;
8489eb67 916
c65adb44 917 CHECK_STRING (string, 0);
8489eb67 918
c65adb44 919 p = XSTRING (string)->data;
8489eb67
RS
920 code = (enum syntaxcode) syntax_spec_code[*p++];
921 if (((int) code & 0377) == 0377)
2e34157c 922 error ("invalid syntax description letter: %c", p[-1]);
8489eb67 923
8ea151b2 924 if (code == Sinherit)
c65adb44 925 return Qnil;
8ea151b2
RS
926
927 if (*p)
d1be9f0f 928 {
93da5fff 929 int len;
7c19e8e1 930 int character = (STRING_CHAR_AND_LENGTH
c65adb44 931 (p, STRING_BYTES (XSTRING (string)) - 1, len));
93da5fff 932 XSETINT (match, character);
d1be9f0f
RS
933 if (XFASTINT (match) == ' ')
934 match = Qnil;
93da5fff 935 p += len;
d1be9f0f
RS
936 }
937 else
8ea151b2 938 match = Qnil;
8489eb67 939
8ea151b2 940 val = (int) code;
8489eb67
RS
941 while (*p)
942 switch (*p++)
943 {
944 case '1':
247e20a8 945 val |= 1 << 16;
8489eb67
RS
946 break;
947
948 case '2':
247e20a8 949 val |= 1 << 17;
8489eb67
RS
950 break;
951
952 case '3':
247e20a8 953 val |= 1 << 18;
8489eb67
RS
954 break;
955
956 case '4':
247e20a8 957 val |= 1 << 19;
8489eb67
RS
958 break;
959
960 case 'p':
247e20a8 961 val |= 1 << 20;
8489eb67 962 break;
e5d4f4dc
RS
963
964 case 'b':
247e20a8 965 val |= 1 << 21;
e5d4f4dc 966 break;
95ff8dfc
RS
967
968 case 'n':
969 val |= 1 << 22;
970 break;
8489eb67
RS
971 }
972
93da5fff 973 if (val < XVECTOR (Vsyntax_code_object)->size && NILP (match))
c65adb44 974 return XVECTOR (Vsyntax_code_object)->contents[val];
93da5fff
KH
975 else
976 /* Since we can't use a shared object, let's make a new one. */
c65adb44
SM
977 return Fcons (make_number (val), match);
978}
979
980/* This comment supplies the doc string for modify-syntax-entry,
981 for make-docfile to see. We cannot put this in the real DEFUN
982 due to limits in the Unix cpp.
983
984DEFUN ("modify-syntax-entry", foo, bar, 2, 3, 0,
985 "Set syntax for character CHAR according to string S.\n\
986The syntax is changed only for table TABLE, which defaults to\n\
987 the current buffer's syntax table.\n\
988The first character of S should be one of the following:\n\
989 Space or - whitespace syntax. w word constituent.\n\
990 _ symbol constituent. . punctuation.\n\
991 ( open-parenthesis. ) close-parenthesis.\n\
992 \" string quote. \\ escape.\n\
993 $ paired delimiter. ' expression quote or prefix operator.\n\
994 < comment starter. > comment ender.\n\
995 / character-quote. @ inherit from `standard-syntax-table'.\n\
996 | generic string fence. ! generic comment fence.\n\
997\n\
998Only single-character comment start and end sequences are represented thus.\n\
999Two-character sequences are represented as described below.\n\
1000The second character of S is the matching parenthesis,\n\
1001 used only if the first character is `(' or `)'.\n\
1002Any additional characters are flags.\n\
1003Defined flags are the characters 1, 2, 3, 4, b, p, and n.\n\
1004 1 means CHAR is the start of a two-char comment start sequence.\n\
1005 2 means CHAR is the second character of such a sequence.\n\
1006 3 means CHAR is the start of a two-char comment end sequence.\n\
1007 4 means CHAR is the second character of such a sequence.\n\
1008\n\
1009There can be up to two orthogonal comment sequences. This is to support\n\
1010language modes such as C++. By default, all comment sequences are of style\n\
1011a, but you can set the comment sequence style to b (on the second character\n\
1012of a comment-start, or the first character of a comment-end sequence) using\n\
1013this flag:\n\
1014 b means CHAR is part of comment sequence b.\n\
1015 n means CHAR is part of a nestable comment sequence.\n\
1016\n\
1017 p means CHAR is a prefix character for `backward-prefix-chars';\n\
1018 such characters are treated as whitespace when they occur\n\
1019 between expressions.")
1020 (char, s, table)
1021*/
1022
1023DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
1024 /* I really don't know why this is interactive
1025 help-form should at least be made useful whilst reading the second arg
1026 */
1027 "cSet syntax for character: \nsSet syntax for %s to: ",
1028 0 /* See immediately above */)
1029 (c, newentry, syntax_table)
1030 Lisp_Object c, newentry, syntax_table;
1031{
1032 CHECK_NUMBER (c, 0);
1033
1034 if (NILP (syntax_table))
1035 syntax_table = current_buffer->syntax_table;
1036 else
1037 check_syntax_table (syntax_table);
8489eb67 1038
c65adb44 1039 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), Fstring_to_syntax (newentry));
8489eb67
RS
1040 return Qnil;
1041}
1042\f
1043/* Dump syntax table to buffer in human-readable format */
1044
340f92b5 1045static void
8489eb67
RS
1046describe_syntax (value)
1047 Lisp_Object value;
1048{
1049 register enum syntaxcode code;
e6365855 1050 char desc, start1, start2, end1, end2, prefix, comstyle, comnested;
8489eb67 1051 char str[2];
8ea151b2 1052 Lisp_Object first, match_lisp;
8489eb67
RS
1053
1054 Findent_to (make_number (16), make_number (1));
1055
8ea151b2
RS
1056 if (NILP (value))
1057 {
93da5fff 1058 insert_string ("default\n");
8ea151b2
RS
1059 return;
1060 }
1061
908b7fea
KH
1062 if (CHAR_TABLE_P (value))
1063 {
1064 insert_string ("deeper char-table ...\n");
1065 return;
1066 }
1067
8ea151b2
RS
1068 if (!CONSP (value))
1069 {
93da5fff 1070 insert_string ("invalid\n");
8ea151b2
RS
1071 return;
1072 }
1073
c1d497be
KR
1074 first = XCAR (value);
1075 match_lisp = XCDR (value);
8ea151b2
RS
1076
1077 if (!INTEGERP (first) || !(NILP (match_lisp) || INTEGERP (match_lisp)))
8489eb67 1078 {
93da5fff 1079 insert_string ("invalid\n");
8489eb67
RS
1080 return;
1081 }
1082
e704cb4b 1083 code = (enum syntaxcode) (XINT (first) & 0377);
8ea151b2
RS
1084 start1 = (XINT (first) >> 16) & 1;
1085 start2 = (XINT (first) >> 17) & 1;
1086 end1 = (XINT (first) >> 18) & 1;
1087 end2 = (XINT (first) >> 19) & 1;
1088 prefix = (XINT (first) >> 20) & 1;
1089 comstyle = (XINT (first) >> 21) & 1;
e6365855 1090 comnested = (XINT (first) >> 22) & 1;
8489eb67
RS
1091
1092 if ((int) code < 0 || (int) code >= (int) Smax)
1093 {
1094 insert_string ("invalid");
1095 return;
1096 }
1097 desc = syntax_code_spec[(int) code];
1098
1099 str[0] = desc, str[1] = 0;
1100 insert (str, 1);
1101
93da5fff
KH
1102 if (NILP (match_lisp))
1103 insert (" ", 1);
1104 else
1105 insert_char (XINT (match_lisp));
8489eb67 1106
8489eb67
RS
1107 if (start1)
1108 insert ("1", 1);
1109 if (start2)
1110 insert ("2", 1);
1111
1112 if (end1)
1113 insert ("3", 1);
1114 if (end2)
1115 insert ("4", 1);
1116
1117 if (prefix)
1118 insert ("p", 1);
e5d4f4dc
RS
1119 if (comstyle)
1120 insert ("b", 1);
e6365855
SM
1121 if (comnested)
1122 insert ("n", 1);
8489eb67
RS
1123
1124 insert_string ("\twhich means: ");
1125
0220c518 1126 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
1127 {
1128 case Swhitespace:
1129 insert_string ("whitespace"); break;
1130 case Spunct:
1131 insert_string ("punctuation"); break;
1132 case Sword:
1133 insert_string ("word"); break;
1134 case Ssymbol:
1135 insert_string ("symbol"); break;
1136 case Sopen:
1137 insert_string ("open"); break;
1138 case Sclose:
1139 insert_string ("close"); break;
1140 case Squote:
d671382e 1141 insert_string ("prefix"); break;
8489eb67
RS
1142 case Sstring:
1143 insert_string ("string"); break;
1144 case Smath:
1145 insert_string ("math"); break;
1146 case Sescape:
1147 insert_string ("escape"); break;
1148 case Scharquote:
1149 insert_string ("charquote"); break;
1150 case Scomment:
1151 insert_string ("comment"); break;
1152 case Sendcomment:
1153 insert_string ("endcomment"); break;
d671382e
SM
1154 case Sinherit:
1155 insert_string ("inherit"); break;
1156 case Scomment_fence:
1157 insert_string ("comment fence"); break;
1158 case Sstring_fence:
1159 insert_string ("string fence"); break;
8489eb67
RS
1160 default:
1161 insert_string ("invalid");
1162 return;
1163 }
1164
8ea151b2 1165 if (!NILP (match_lisp))
8489eb67
RS
1166 {
1167 insert_string (", matches ");
8ea151b2 1168 insert_char (XINT (match_lisp));
8489eb67
RS
1169 }
1170
1171 if (start1)
1172 insert_string (",\n\t is the first character of a comment-start sequence");
1173 if (start2)
1174 insert_string (",\n\t is the second character of a comment-start sequence");
1175
1176 if (end1)
1177 insert_string (",\n\t is the first character of a comment-end sequence");
1178 if (end2)
1179 insert_string (",\n\t is the second character of a comment-end sequence");
e5d4f4dc
RS
1180 if (comstyle)
1181 insert_string (" (comment style b)");
e6365855
SM
1182 if (comnested)
1183 insert_string (" (nestable)");
e5d4f4dc 1184
8489eb67
RS
1185 if (prefix)
1186 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
1187
1188 insert_string ("\n");
1189}
1190
340f92b5 1191static Lisp_Object
8489eb67
RS
1192describe_syntax_1 (vector)
1193 Lisp_Object vector;
1194{
1195 struct buffer *old = current_buffer;
1196 set_buffer_internal (XBUFFER (Vstandard_output));
e41819d4 1197 describe_vector (vector, Qnil, describe_syntax, 0, Qnil, Qnil, (int *) 0, 0);
93da5fff
KH
1198 while (! NILP (XCHAR_TABLE (vector)->parent))
1199 {
1200 vector = XCHAR_TABLE (vector)->parent;
1201 insert_string ("\nThe parent syntax table is:");
e41819d4
RS
1202 describe_vector (vector, Qnil, describe_syntax, 0, Qnil, Qnil,
1203 (int *) 0, 0);
93da5fff
KH
1204 }
1205
36cd82fe 1206 call0 (intern ("help-mode"));
8489eb67
RS
1207 set_buffer_internal (old);
1208 return Qnil;
1209}
1210
1211DEFUN ("describe-syntax", Fdescribe_syntax, Sdescribe_syntax, 0, 0, "",
1212 "Describe the syntax specifications in the syntax table.\n\
1213The descriptions are inserted in a buffer, which is then displayed.")
1214 ()
1215{
1216 internal_with_output_to_temp_buffer
1217 ("*Help*", describe_syntax_1, current_buffer->syntax_table);
1218
1219 return Qnil;
1220}
1221\f
195d1361
RS
1222int parse_sexp_ignore_comments;
1223
8489eb67
RS
1224/* Return the position across COUNT words from FROM.
1225 If that many words cannot be found before the end of the buffer, return 0.
1226 COUNT negative means scan backward and stop at word beginning. */
1227
dfcf069d 1228int
8489eb67
RS
1229scan_words (from, count)
1230 register int from, count;
1231{
1232 register int beg = BEGV;
1233 register int end = ZV;
6a140a74 1234 register int from_byte = CHAR_TO_BYTE (from);
93da5fff
KH
1235 register enum syntaxcode code;
1236 int ch0, ch1;
8489eb67
RS
1237
1238 immediate_quit = 1;
1239 QUIT;
1240
195d1361
RS
1241 SETUP_SYNTAX_TABLE (from, count);
1242
8489eb67
RS
1243 while (count > 0)
1244 {
1245 while (1)
1246 {
1247 if (from == end)
1248 {
1249 immediate_quit = 0;
1250 return 0;
1251 }
195d1361 1252 UPDATE_SYNTAX_TABLE_FORWARD (from);
6a140a74 1253 ch0 = FETCH_CHAR (from_byte);
93da5fff 1254 code = SYNTAX (ch0);
6a140a74 1255 INC_BOTH (from, from_byte);
8489eb67
RS
1256 if (words_include_escapes
1257 && (code == Sescape || code == Scharquote))
1258 break;
1259 if (code == Sword)
1260 break;
8489eb67 1261 }
93da5fff
KH
1262 /* Now CH0 is a character which begins a word and FROM is the
1263 position of the next character. */
8489eb67
RS
1264 while (1)
1265 {
1266 if (from == end) break;
195d1361 1267 UPDATE_SYNTAX_TABLE_FORWARD (from);
6a140a74 1268 ch1 = FETCH_CHAR (from_byte);
93da5fff 1269 code = SYNTAX (ch1);
8489eb67
RS
1270 if (!(words_include_escapes
1271 && (code == Sescape || code == Scharquote)))
93da5fff 1272 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
8489eb67 1273 break;
6a140a74 1274 INC_BOTH (from, from_byte);
93da5fff 1275 ch0 = ch1;
8489eb67
RS
1276 }
1277 count--;
1278 }
1279 while (count < 0)
1280 {
1281 while (1)
1282 {
1283 if (from == beg)
1284 {
1285 immediate_quit = 0;
1286 return 0;
1287 }
6a140a74 1288 DEC_BOTH (from, from_byte);
195d1361 1289 UPDATE_SYNTAX_TABLE_BACKWARD (from);
6a140a74 1290 ch1 = FETCH_CHAR (from_byte);
93da5fff 1291 code = SYNTAX (ch1);
8489eb67
RS
1292 if (words_include_escapes
1293 && (code == Sescape || code == Scharquote))
1294 break;
1295 if (code == Sword)
1296 break;
8489eb67 1297 }
93da5fff
KH
1298 /* Now CH1 is a character which ends a word and FROM is the
1299 position of it. */
8489eb67
RS
1300 while (1)
1301 {
6a140a74
RS
1302 int temp_byte;
1303
1304 if (from == beg)
1305 break;
1306 temp_byte = dec_bytepos (from_byte);
195d1361 1307 UPDATE_SYNTAX_TABLE_BACKWARD (from);
6a140a74 1308 ch0 = FETCH_CHAR (temp_byte);
93da5fff 1309 code = SYNTAX (ch0);
8489eb67
RS
1310 if (!(words_include_escapes
1311 && (code == Sescape || code == Scharquote)))
93da5fff 1312 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
8489eb67 1313 break;
6a140a74 1314 DEC_BOTH (from, from_byte);
93da5fff 1315 ch1 = ch0;
8489eb67
RS
1316 }
1317 count++;
1318 }
1319
1320 immediate_quit = 0;
1321
1322 return from;
1323}
1324
1325DEFUN ("forward-word", Fforward_word, Sforward_word, 1, 1, "p",
1326 "Move point forward ARG words (backward if ARG is negative).\n\
1327Normally returns t.\n\
2c6ea900 1328If an edge of the buffer or a field boundary is reached, point is left there\n\
e6d8341f
GM
1329and the function returns nil. Field boundaries are not noticed if\n\
1330`inhibit-field-text-motion' is non-nil.")
8489eb67
RS
1331 (count)
1332 Lisp_Object count;
1333{
2c6ea900 1334 int orig_val, val;
8489eb67
RS
1335 CHECK_NUMBER (count, 0);
1336
2c6ea900
RS
1337 val = orig_val = scan_words (PT, XINT (count));
1338 if (! orig_val)
1339 val = XINT (count) > 0 ? ZV : BEGV;
5878ee6f 1340
b8855607 1341 /* Avoid jumping out of an input field. */
64aa4eb1 1342 val = XFASTINT (Fconstrain_to_field (make_number (val), make_number (PT),
070fe99c 1343 Qt, Qnil, Qnil));
64aa4eb1 1344
8489eb67 1345 SET_PT (val);
e6d8341f 1346 return val == orig_val ? Qt : Qnil;
8489eb67
RS
1347}
1348\f
195d1361
RS
1349Lisp_Object skip_chars ();
1350
1351DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
1352 "Move point forward, stopping before a char not in STRING, or at pos LIM.\n\
1353STRING is like the inside of a `[...]' in a regular expression\n\
74cf0beb
RS
1354except that `]' is never special and `\\' quotes `^', `-' or `\\'\n\
1355 (but not as the end of a range; quoting is never needed there).\n\
195d1361
RS
1356Thus, with arg \"a-zA-Z\", this skips letters stopping before first nonletter.\n\
1357With arg \"^a-zA-Z\", skips nonletters stopping before first letter.\n\
1358Returns the distance traveled, either zero or positive.")
1359 (string, lim)
1360 Lisp_Object string, lim;
1361{
1362 return skip_chars (1, 0, string, lim);
1363}
1364
1365DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
1366 "Move point backward, stopping after a char not in STRING, or at pos LIM.\n\
1367See `skip-chars-forward' for details.\n\
1368Returns the distance traveled, either zero or negative.")
1369 (string, lim)
1370 Lisp_Object string, lim;
1371{
1372 return skip_chars (0, 0, string, lim);
1373}
1374
1375DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
1376 "Move point forward across chars in specified syntax classes.\n\
1377SYNTAX is a string of syntax code characters.\n\
1378Stop before a char whose syntax is not in SYNTAX, or at position LIM.\n\
1379If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
1380This function returns the distance traveled, either zero or positive.")
1381 (syntax, lim)
1382 Lisp_Object syntax, lim;
1383{
1384 return skip_chars (1, 1, syntax, lim);
1385}
1386
1387DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
1388 "Move point backward across chars in specified syntax classes.\n\
1389SYNTAX is a string of syntax code characters.\n\
1390Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.\n\
1391If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
1392This function returns the distance traveled, either zero or negative.")
1393 (syntax, lim)
1394 Lisp_Object syntax, lim;
1395{
1396 return skip_chars (0, 1, syntax, lim);
1397}
1398
6a140a74 1399static Lisp_Object
195d1361
RS
1400skip_chars (forwardp, syntaxp, string, lim)
1401 int forwardp, syntaxp;
1402 Lisp_Object string, lim;
1403{
195d1361
RS
1404 register unsigned int c;
1405 unsigned char fastmap[0400];
7e68b0ea 1406 /* If SYNTAXP is 0, STRING may contain multi-byte form of characters
9690d026
KH
1407 of which codes don't fit in FASTMAP. In that case, set the
1408 ranges of characters in CHAR_RANGES. */
5df4982e 1409 int *char_ranges;
7e68b0ea 1410 int n_char_ranges = 0;
195d1361 1411 int negate = 0;
4101e6fe 1412 register int i, i_byte;
7e68b0ea 1413 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1674d9a2
RS
1414 int string_multibyte;
1415 int size_byte;
82d497fc
KH
1416 unsigned char *str;
1417 int len;
195d1361
RS
1418
1419 CHECK_STRING (string, 0);
5df4982e 1420 char_ranges = (int *) alloca (XSTRING (string)->size * (sizeof (int)) * 2);
1674d9a2 1421 string_multibyte = STRING_MULTIBYTE (string);
82d497fc 1422 str = XSTRING (string)->data;
1674d9a2 1423 size_byte = STRING_BYTES (XSTRING (string));
195d1361 1424
82d497fc
KH
1425 /* Adjust the multibyteness of the string to that of the buffer. */
1426 if (multibyte != string_multibyte)
1427 {
1428 int nbytes;
1429
1430 if (multibyte)
1431 nbytes = count_size_as_multibyte (XSTRING (string)->data,
1432 XSTRING (string)->size);
1433 else
1434 nbytes = XSTRING (string)->size;
1435 if (nbytes != size_byte)
1436 {
1437 str = (unsigned char *) alloca (nbytes);
e39091b6 1438 copy_text (XSTRING (string)->data, str, size_byte,
82d497fc
KH
1439 string_multibyte, multibyte);
1440 size_byte = nbytes;
1441 }
1442 }
1443
195d1361
RS
1444 if (NILP (lim))
1445 XSETINT (lim, forwardp ? ZV : BEGV);
1446 else
6a140a74 1447 CHECK_NUMBER_COERCE_MARKER (lim, 0);
195d1361
RS
1448
1449 /* In any case, don't allow scan outside bounds of buffer. */
195d1361
RS
1450 if (XINT (lim) > ZV)
1451 XSETFASTINT (lim, ZV);
1452 if (XINT (lim) < BEGV)
1453 XSETFASTINT (lim, BEGV);
1454
195d1361
RS
1455 bzero (fastmap, sizeof fastmap);
1456
82d497fc 1457 i_byte = 0;
4101e6fe 1458
13090112 1459 if (i_byte < size_byte
4101e6fe 1460 && XSTRING (string)->data[0] == '^')
195d1361 1461 {
82d497fc 1462 negate = 1; i_byte++;
195d1361
RS
1463 }
1464
1465 /* Find the characters specified and set their elements of fastmap.
1466 If syntaxp, each character counts as itself.
1467 Otherwise, handle backslashes and ranges specially. */
1468
13090112 1469 while (i_byte < size_byte)
195d1361 1470 {
82d497fc
KH
1471 c = STRING_CHAR_AND_LENGTH (str + i_byte, size_byte - i_byte, len);
1472 i_byte += len;
4101e6fe 1473
195d1361 1474 if (syntaxp)
4101e6fe 1475 fastmap[syntax_spec_code[c & 0377]] = 1;
195d1361
RS
1476 else
1477 {
1478 if (c == '\\')
1479 {
13090112 1480 if (i_byte == size_byte)
4101e6fe
RS
1481 break;
1482
82d497fc
KH
1483 c = STRING_CHAR_AND_LENGTH (str+i_byte, size_byte-i_byte, len);
1484 i_byte += len;
195d1361 1485 }
13090112 1486 if (i_byte < size_byte
82d497fc 1487 && str[i_byte] == '-')
195d1361 1488 {
9690d026 1489 unsigned int c2;
4101e6fe
RS
1490
1491 /* Skip over the dash. */
82d497fc 1492 i_byte++;
4101e6fe 1493
13090112 1494 if (i_byte == size_byte)
4101e6fe
RS
1495 break;
1496
1497 /* Get the end of the range. */
82d497fc
KH
1498 c2 =STRING_CHAR_AND_LENGTH (str+i_byte, size_byte-i_byte, len);
1499 i_byte += len;
7e68b0ea 1500
4101e6fe 1501 if (SINGLE_BYTE_CHAR_P (c))
06274af5
KH
1502 {
1503 if (! SINGLE_BYTE_CHAR_P (c2))
e39091b6 1504 {
7ae339a7
KH
1505 /* Handle a range starting with a character of
1506 less than 256, and ending with a character of
1507 not less than 256. Split that into two
1508 ranges, the low one ending at 0377, and the
1509 high one starting at the smallest character
1510 in the charset of C2 and ending at C2. */
e39091b6
KH
1511 int charset = CHAR_CHARSET (c2);
1512 int c1 = MAKE_CHAR (charset, 0, 0);
1513
e39091b6
KH
1514 char_ranges[n_char_ranges++] = c1;
1515 char_ranges[n_char_ranges++] = c2;
465cf84c 1516 c2 = 0377;
e39091b6 1517 }
06274af5
KH
1518 while (c <= c2)
1519 {
1520 fastmap[c] = 1;
1521 c++;
1522 }
1523 }
9690d026 1524 else if (c <= c2) /* Both C and C2 are multibyte char. */
195d1361 1525 {
9690d026
KH
1526 char_ranges[n_char_ranges++] = c;
1527 char_ranges[n_char_ranges++] = c2;
195d1361 1528 }
195d1361
RS
1529 }
1530 else
7e68b0ea 1531 {
e39091b6
KH
1532 if (SINGLE_BYTE_CHAR_P (c))
1533 fastmap[c] = 1;
1534 else
7e68b0ea 1535 {
4101e6fe
RS
1536 char_ranges[n_char_ranges++] = c;
1537 char_ranges[n_char_ranges++] = c;
7e68b0ea
RS
1538 }
1539 }
195d1361
RS
1540 }
1541 }
1542
9690d026 1543 /* If ^ was the first character, complement the fastmap. */
195d1361
RS
1544 if (negate)
1545 for (i = 0; i < sizeof fastmap; i++)
9690d026 1546 fastmap[i] ^= 1;
195d1361
RS
1547
1548 {
1549 int start_point = PT;
1550 int pos = PT;
6a140a74 1551 int pos_byte = PT_BYTE;
195d1361
RS
1552
1553 immediate_quit = 1;
1554 if (syntaxp)
1555 {
1556 SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
1557 if (forwardp)
1558 {
7e68b0ea 1559 if (multibyte)
195d1361 1560 {
ee0cdb48
KH
1561 if (pos < XINT (lim))
1562 while (fastmap[(int) SYNTAX (FETCH_CHAR (pos_byte))])
1563 {
1564 /* Since we already checked for multibyteness,
1565 avoid using INC_BOTH which checks again. */
1566 INC_POS (pos_byte);
1567 pos++;
1568 if (pos >= XINT (lim))
1569 break;
1570 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1571 }
7e68b0ea
RS
1572 }
1573 else
1574 {
1575 while (pos < XINT (lim)
1576 && fastmap[(int) SYNTAX (FETCH_BYTE (pos))])
1577 {
1578 pos++;
1579 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1580 }
195d1361
RS
1581 }
1582 }
1583 else
1584 {
7e68b0ea 1585 if (multibyte)
195d1361 1586 {
7e68b0ea
RS
1587 while (pos > XINT (lim))
1588 {
6a140a74 1589 int savepos = pos_byte;
ef316cf0
RS
1590 /* Since we already checked for multibyteness,
1591 avoid using DEC_BOTH which checks again. */
1592 pos--;
1593 DEC_POS (pos_byte);
4c8535eb 1594 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
6a140a74 1595 if (!fastmap[(int) SYNTAX (FETCH_CHAR (pos_byte))])
7e68b0ea 1596 {
6a140a74
RS
1597 pos++;
1598 pos_byte = savepos;
7e68b0ea
RS
1599 break;
1600 }
7e68b0ea
RS
1601 }
1602 }
1603 else
1604 {
ee0cdb48
KH
1605 if (pos > XINT (lim))
1606 while (fastmap[(int) SYNTAX (FETCH_BYTE (pos - 1))])
1607 {
1608 pos--;
1609 if (pos <= XINT (lim))
4c8535eb 1610 break;
ee0cdb48
KH
1611 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
1612 }
195d1361
RS
1613 }
1614 }
1615 }
1616 else
1617 {
1618 if (forwardp)
1619 {
7e68b0ea 1620 if (multibyte)
9690d026 1621 while (pos < XINT (lim))
7e68b0ea 1622 {
9690d026
KH
1623 c = FETCH_MULTIBYTE_CHAR (pos_byte);
1624 if (SINGLE_BYTE_CHAR_P (c))
7e68b0ea 1625 {
9690d026
KH
1626 if (!fastmap[c])
1627 break;
1628 }
1629 else
1630 {
1631 /* If we are looking at a multibyte character,
1632 we must look up the character in the table
1633 CHAR_RANGES. If there's no data in the
1634 table, that character is not what we want to
1635 skip. */
1636
e39091b6
KH
1637 /* The following code do the right thing even if
1638 n_char_ranges is zero (i.e. no data in
1639 CHAR_RANGES). */
7e68b0ea 1640 for (i = 0; i < n_char_ranges; i += 2)
e39091b6 1641 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
7e68b0ea
RS
1642 break;
1643 if (!(negate ^ (i < n_char_ranges)))
1644 break;
7e68b0ea 1645 }
e39091b6 1646 INC_BOTH (pos, pos_byte);
7e68b0ea
RS
1647 }
1648 else
1649 while (pos < XINT (lim) && fastmap[FETCH_BYTE (pos)])
1650 pos++;
195d1361
RS
1651 }
1652 else
1653 {
7e68b0ea
RS
1654 if (multibyte)
1655 while (pos > XINT (lim))
1656 {
e39091b6
KH
1657 int prev_pos_byte = pos_byte;
1658
1659 DEC_POS (prev_pos_byte);
9690d026
KH
1660 c = FETCH_MULTIBYTE_CHAR (prev_pos_byte);
1661 if (SINGLE_BYTE_CHAR_P (c))
1662 {
1663 if (!fastmap[c])
1664 break;
1665 }
1666 else
7e68b0ea 1667 {
9690d026 1668 /* See the comment in the previous similar code. */
e39091b6
KH
1669 for (i = 0; i < n_char_ranges; i += 2)
1670 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1671 break;
1672 if (!(negate ^ (i < n_char_ranges)))
1673 break;
7e68b0ea 1674 }
e39091b6
KH
1675 pos--;
1676 pos_byte = prev_pos_byte;
7e68b0ea
RS
1677 }
1678 else
1679 while (pos > XINT (lim) && fastmap[FETCH_BYTE (pos - 1)])
1680 pos--;
195d1361
RS
1681 }
1682 }
7e68b0ea 1683
6a140a74
RS
1684#if 0 /* Not needed now that a position in mid-character
1685 cannot be specified in Lisp. */
7e68b0ea
RS
1686 if (multibyte
1687 /* INC_POS or DEC_POS might have moved POS over LIM. */
1688 && (forwardp ? (pos > XINT (lim)) : (pos < XINT (lim))))
1689 pos = XINT (lim);
6a140a74 1690#endif
7e68b0ea 1691
6a140a74
RS
1692 if (! multibyte)
1693 pos_byte = pos;
1694
1695 SET_PT_BOTH (pos, pos_byte);
195d1361
RS
1696 immediate_quit = 0;
1697
1698 return make_number (PT - start_point);
1699 }
1700}
1701\f
95ff8dfc
RS
1702/* Jump over a comment, assuming we are at the beginning of one.
1703 FROM is the current position.
1704 FROM_BYTE is the bytepos corresponding to FROM.
1705 Do not move past STOP (a charpos).
1706 The comment over which we have to jump is of style STYLE
1707 (either SYNTAX_COMMENT_STYLE(foo) or ST_COMMENT_STYLE).
1708 NESTING should be positive to indicate the nesting at the beginning
1709 for nested comments and should be zero or negative else.
1710 ST_COMMENT_STYLE cannot be nested.
1711 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
1712 (or 0 If the search cannot start in the middle of a two-character).
1713
1714 If successful, return 1 and store the charpos of the comment's end
1715 into *CHARPOS_PTR and the corresponding bytepos into *BYTEPOS_PTR.
1716 Else, return 0 and store the charpos STOP into *CHARPOS_PTR, the
1717 corresponding bytepos into *BYTEPOS_PTR and the current nesting
1718 (as defined for state.incomment) in *INCOMMENT_PTR.
1719
1720 The comment end is the last character of the comment rather than the
1721 character just after the comment.
1722
1723 Global syntax data is assumed to initially be valid for FROM and
1724 remains valid for forward search starting at the returned position. */
1725
1726static int
1727forw_comment (from, from_byte, stop, nesting, style, prev_syntax,
1728 charpos_ptr, bytepos_ptr, incomment_ptr)
1729 int from, from_byte, stop;
1730 int nesting, style, prev_syntax;
1731 int *charpos_ptr, *bytepos_ptr, *incomment_ptr;
1732{
1733 register int c, c1;
1734 register enum syntaxcode code;
1735 register int syntax;
1736
1737 if (nesting <= 0) nesting = -1;
1738
1739 /* Enter the loop in the middle so that we find
1740 a 2-char comment ender if we start in the middle of it. */
1741 syntax = prev_syntax;
1742 if (syntax != 0) goto forw_incomment;
1743
1744 while (1)
1745 {
1746 if (from == stop)
1747 {
1748 *incomment_ptr = nesting;
1749 *charpos_ptr = from;
1750 *bytepos_ptr = from_byte;
1751 return 0;
1752 }
1753 c = FETCH_CHAR (from_byte);
1754 syntax = SYNTAX_WITH_FLAGS (c);
1755 code = syntax & 0xff;
1756 if (code == Sendcomment
1757 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
abf8a9ff
SM
1758 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ?
1759 (nesting > 0 && --nesting == 0) : nesting < 0))
95ff8dfc
RS
1760 /* we have encountered a comment end of the same style
1761 as the comment sequence which began this comment
1762 section */
1763 break;
1764 if (code == Scomment_fence
1765 && style == ST_COMMENT_STYLE)
1766 /* we have encountered a comment end of the same style
1767 as the comment sequence which began this comment
1768 section. */
1769 break;
1770 if (nesting > 0
1771 && code == Scomment
abf8a9ff 1772 && SYNTAX_FLAGS_COMMENT_NESTED (syntax)
95ff8dfc
RS
1773 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style)
1774 /* we have encountered a nested comment of the same style
1775 as the comment sequence which began this comment section */
1776 nesting++;
1777 INC_BOTH (from, from_byte);
1778 UPDATE_SYNTAX_TABLE_FORWARD (from);
1779
1780 forw_incomment:
1781 if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
1782 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
1783 && (c1 = FETCH_CHAR (from_byte),
abf8a9ff
SM
1784 SYNTAX_COMEND_SECOND (c1))
1785 && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
1786 SYNTAX_COMMENT_NESTED (c1)) ? nesting > 0 : nesting < 0))
4ffe723b
GM
1787 {
1788 if (--nesting <= 0)
1789 /* we have encountered a comment end of the same style
1790 as the comment sequence which began this comment
1791 section */
1792 break;
1793 else
1794 {
1795 INC_BOTH (from, from_byte);
1796 UPDATE_SYNTAX_TABLE_FORWARD (from);
1797 }
1798 }
95ff8dfc
RS
1799 if (nesting > 0
1800 && from < stop
1801 && SYNTAX_FLAGS_COMSTART_FIRST (syntax)
1802 && (c1 = FETCH_CHAR (from_byte),
1803 SYNTAX_COMMENT_STYLE (c1) == style
abf8a9ff
SM
1804 && SYNTAX_COMSTART_SECOND (c1))
1805 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
1806 SYNTAX_COMMENT_NESTED (c1)))
95ff8dfc
RS
1807 /* we have encountered a nested comment of the same style
1808 as the comment sequence which began this comment
1809 section */
1810 {
1811 INC_BOTH (from, from_byte);
1812 UPDATE_SYNTAX_TABLE_FORWARD (from);
1813 nesting++;
1814 }
1815 }
1816 *charpos_ptr = from;
1817 *bytepos_ptr = from_byte;
1818 return 1;
1819}
1820
b3cfe0c8
RS
1821DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
1822 "Move forward across up to N comments. If N is negative, move backward.\n\
b3cfe0c8 1823Stop scanning if we find something other than a comment or whitespace.\n\
c81a3712 1824Set point to where scanning stops.\n\
b3cfe0c8
RS
1825If N comments are found as expected, with nothing except whitespace\n\
1826between them, return t; otherwise return nil.")
1827 (count)
840f481c 1828 Lisp_Object count;
b3cfe0c8
RS
1829{
1830 register int from;
6a140a74 1831 int from_byte;
b3cfe0c8 1832 register int stop;
8ea151b2 1833 register int c, c1;
b3cfe0c8
RS
1834 register enum syntaxcode code;
1835 int comstyle = 0; /* style of comment encountered */
95ff8dfc 1836 int comnested = 0; /* whether the comment is nestable or not */
be720845 1837 int found;
840f481c 1838 int count1;
6a140a74 1839 int out_charpos, out_bytepos;
95ff8dfc 1840 int dummy;
840f481c
RS
1841
1842 CHECK_NUMBER (count, 0);
1843 count1 = XINT (count);
195d1361 1844 stop = count1 > 0 ? ZV : BEGV;
b3cfe0c8
RS
1845
1846 immediate_quit = 1;
1847 QUIT;
1848
1849 from = PT;
6a140a74 1850 from_byte = PT_BYTE;
b3cfe0c8 1851
195d1361 1852 SETUP_SYNTAX_TABLE (from, count1);
840f481c 1853 while (count1 > 0)
b3cfe0c8 1854 {
04882296 1855 do
b3cfe0c8 1856 {
f902a008
RS
1857 int comstart_first;
1858
04882296
KH
1859 if (from == stop)
1860 {
ef316cf0 1861 SET_PT_BOTH (from, from_byte);
b7e6e612 1862 immediate_quit = 0;
04882296
KH
1863 return Qnil;
1864 }
6a140a74 1865 c = FETCH_CHAR (from_byte);
b3cfe0c8 1866 code = SYNTAX (c);
f902a008 1867 comstart_first = SYNTAX_COMSTART_FIRST (c);
95ff8dfc 1868 comnested = SYNTAX_COMMENT_NESTED (c);
e6365855 1869 comstyle = SYNTAX_COMMENT_STYLE (c);
6a140a74 1870 INC_BOTH (from, from_byte);
f902a008 1871 UPDATE_SYNTAX_TABLE_FORWARD (from);
f902a008 1872 if (from < stop && comstart_first
6a140a74 1873 && (c1 = FETCH_CHAR (from_byte),
8ea151b2 1874 SYNTAX_COMSTART_SECOND (c1)))
b3cfe0c8 1875 {
7fc8191e
RS
1876 /* We have encountered a comment start sequence and we
1877 are ignoring all text inside comments. We must record
b3cfe0c8
RS
1878 the comment style this sequence begins so that later,
1879 only a comment end of the same style actually ends
7fc8191e 1880 the comment section. */
b3cfe0c8 1881 code = Scomment;
8ea151b2 1882 comstyle = SYNTAX_COMMENT_STYLE (c1);
95ff8dfc 1883 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
6a140a74 1884 INC_BOTH (from, from_byte);
95ff8dfc 1885 UPDATE_SYNTAX_TABLE_FORWARD (from);
b3cfe0c8 1886 }
04882296 1887 }
abf8a9ff 1888 while (code == Swhitespace || (code == Sendcomment && c == '\n'));
6a140a74 1889
da71ab91
KH
1890 if (code == Scomment_fence)
1891 comstyle = ST_COMMENT_STYLE;
1892 else if (code != Scomment)
04882296
KH
1893 {
1894 immediate_quit = 0;
6a140a74 1895 DEC_BOTH (from, from_byte);
ef316cf0 1896 SET_PT_BOTH (from, from_byte);
04882296
KH
1897 return Qnil;
1898 }
1899 /* We're at the start of a comment. */
95ff8dfc
RS
1900 found = forw_comment (from, from_byte, stop, comnested, comstyle, 0,
1901 &out_charpos, &out_bytepos, &dummy);
1902 from = out_charpos; from_byte = out_bytepos;
1903 if (!found)
04882296 1904 {
95ff8dfc
RS
1905 immediate_quit = 0;
1906 SET_PT_BOTH (from, from_byte);
1907 return Qnil;
b3cfe0c8 1908 }
95ff8dfc
RS
1909 INC_BOTH (from, from_byte);
1910 UPDATE_SYNTAX_TABLE_FORWARD (from);
04882296 1911 /* We have skipped one comment. */
840f481c 1912 count1--;
b3cfe0c8
RS
1913 }
1914
840f481c 1915 while (count1 < 0)
b3cfe0c8 1916 {
b9145dbb 1917 while (1)
b3cfe0c8 1918 {
c65adb44 1919 int quoted;
f902a008 1920
b9145dbb
RS
1921 if (from <= stop)
1922 {
6a140a74 1923 SET_PT_BOTH (BEGV, BEGV_BYTE);
b9145dbb
RS
1924 immediate_quit = 0;
1925 return Qnil;
1926 }
b3cfe0c8 1927
6a140a74 1928 DEC_BOTH (from, from_byte);
f902a008 1929 /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */
6a140a74 1930 quoted = char_quoted (from, from_byte);
b3cfe0c8 1931 if (quoted)
195d1361 1932 {
6a140a74
RS
1933 DEC_BOTH (from, from_byte);
1934 goto leave;
195d1361 1935 }
6a140a74 1936 c = FETCH_CHAR (from_byte);
b3cfe0c8
RS
1937 code = SYNTAX (c);
1938 comstyle = 0;
95ff8dfc 1939 comnested = SYNTAX_COMMENT_NESTED (c);
7fc8191e
RS
1940 if (code == Sendcomment)
1941 comstyle = SYNTAX_COMMENT_STYLE (c);
b3cfe0c8 1942 if (from > stop && SYNTAX_COMEND_SECOND (c)
f902a008 1943 && prev_char_comend_first (from, from_byte)
89c6809a 1944 && !char_quoted (from - 1, dec_bytepos (from_byte)))
b3cfe0c8 1945 {
7fc8191e 1946 /* We must record the comment style encountered so that
b3cfe0c8 1947 later, we can match only the proper comment begin
7fc8191e 1948 sequence of the same style. */
6a140a74 1949 DEC_BOTH (from, from_byte);
f902a008
RS
1950 code = Sendcomment;
1951 /* Calling char_quoted, above, set up global syntax position
1952 at the new value of FROM. */
95ff8dfc
RS
1953 c1 = FETCH_CHAR (from_byte);
1954 comstyle = SYNTAX_COMMENT_STYLE (c1);
1955 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
b3cfe0c8
RS
1956 }
1957
195d1361
RS
1958 if (code == Scomment_fence)
1959 {
1960 /* Skip until first preceding unquoted comment_fence. */
6a140a74 1961 int found = 0, ini = from, ini_byte = from_byte;
195d1361 1962
6a140a74 1963 while (1)
195d1361 1964 {
6a140a74
RS
1965 DEC_BOTH (from, from_byte);
1966 if (from == stop)
1967 break;
195d1361 1968 UPDATE_SYNTAX_TABLE_BACKWARD (from);
6a140a74
RS
1969 c = FETCH_CHAR (from_byte);
1970 if (SYNTAX (c) == Scomment_fence
1971 && !char_quoted (from, from_byte))
195d1361
RS
1972 {
1973 found = 1;
1974 break;
1975 }
1976 }
1977 if (found == 0)
1978 {
1979 from = ini; /* Set point to ini + 1. */
6a140a74 1980 from_byte = ini_byte;
195d1361
RS
1981 goto leave;
1982 }
1983 }
1984 else if (code == Sendcomment)
b3cfe0c8 1985 {
95ff8dfc 1986 found = back_comment (from, from_byte, stop, comnested, comstyle,
6a140a74 1987 &out_charpos, &out_bytepos);
1aa963c8
SM
1988 if (found == -1)
1989 {
abf8a9ff
SM
1990 if (c == '\n')
1991 /* This end-of-line is not an end-of-comment.
1992 Treat it like a whitespace.
1993 CC-mode (and maybe others) relies on this behavior. */
1994 ;
1995 else
1996 {
1997 /* Failure: we should go back to the end of this
1998 not-quite-endcomment. */
1999 if (SYNTAX(c) != code)
2000 /* It was a two-char Sendcomment. */
2001 INC_BOTH (from, from_byte);
2002 goto leave;
2003 }
1aa963c8 2004 }
2b927d02 2005 else
abf8a9ff
SM
2006 {
2007 /* We have skipped one comment. */
2008 from = out_charpos, from_byte = out_bytepos;
2009 break;
2010 }
b3cfe0c8 2011 }
abf8a9ff 2012 else if (code != Swhitespace)
b3cfe0c8 2013 {
195d1361 2014 leave:
b3cfe0c8 2015 immediate_quit = 0;
6a140a74 2016 INC_BOTH (from, from_byte);
ef316cf0 2017 SET_PT_BOTH (from, from_byte);
b3cfe0c8
RS
2018 return Qnil;
2019 }
2020 }
2021
840f481c 2022 count1++;
b3cfe0c8
RS
2023 }
2024
ef316cf0 2025 SET_PT_BOTH (from, from_byte);
b3cfe0c8
RS
2026 immediate_quit = 0;
2027 return Qt;
2028}
2029\f
bd25db08 2030/* Return syntax code of character C if C is a single byte character
db9f48c3 2031 or `multibyte_symbol_p' is zero. Otherwise, return Ssymbol. */
bd25db08
KH
2032
2033#define SYNTAX_WITH_MULTIBYTE_CHECK(c) \
2034 ((SINGLE_BYTE_CHAR_P (c) || !multibyte_symbol_p) \
2035 ? SYNTAX (c) : Ssymbol)
2036
6a140a74 2037static Lisp_Object
8489eb67
RS
2038scan_lists (from, count, depth, sexpflag)
2039 register int from;
2040 int count, depth, sexpflag;
2041{
2042 Lisp_Object val;
195d1361 2043 register int stop = count > 0 ? ZV : BEGV;
93da5fff
KH
2044 register int c, c1;
2045 int stringterm;
8489eb67
RS
2046 int quoted;
2047 int mathexit = 0;
93da5fff 2048 register enum syntaxcode code, temp_code;
195d1361 2049 int min_depth = depth; /* Err out if depth gets less than this. */
e5d4f4dc 2050 int comstyle = 0; /* style of comment encountered */
95ff8dfc 2051 int comnested = 0; /* whether the comment is nestable or not */
93da5fff 2052 int temp_pos;
7bf5e9e4 2053 int last_good = from;
195d1361 2054 int found;
c3907a7e 2055 int from_byte;
6a140a74 2056 int out_bytepos, out_charpos;
95ff8dfc 2057 int temp, dummy;
bd25db08 2058 int multibyte_symbol_p = sexpflag && multibyte_syntax_as_symbol;
8489eb67
RS
2059
2060 if (depth > 0) min_depth = 0;
2061
c3907a7e
RS
2062 if (from > ZV) from = ZV;
2063 if (from < BEGV) from = BEGV;
2064
2065 from_byte = CHAR_TO_BYTE (from);
2066
8489eb67
RS
2067 immediate_quit = 1;
2068 QUIT;
2069
195d1361 2070 SETUP_SYNTAX_TABLE (from, count);
8489eb67
RS
2071 while (count > 0)
2072 {
8489eb67
RS
2073 while (from < stop)
2074 {
f902a008 2075 int comstart_first, prefix;
195d1361 2076 UPDATE_SYNTAX_TABLE_FORWARD (from);
6a140a74 2077 c = FETCH_CHAR (from_byte);
bd25db08 2078 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
f902a008 2079 comstart_first = SYNTAX_COMSTART_FIRST (c);
95ff8dfc 2080 comnested = SYNTAX_COMMENT_NESTED (c);
e6365855 2081 comstyle = SYNTAX_COMMENT_STYLE (c);
f902a008 2082 prefix = SYNTAX_PREFIX (c);
7bf5e9e4
RS
2083 if (depth == min_depth)
2084 last_good = from;
6a140a74 2085 INC_BOTH (from, from_byte);
195d1361 2086 UPDATE_SYNTAX_TABLE_FORWARD (from);
f902a008 2087 if (from < stop && comstart_first
6a140a74 2088 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from_byte))
8489eb67 2089 && parse_sexp_ignore_comments)
e5d4f4dc
RS
2090 {
2091 /* we have encountered a comment start sequence and we
195d1361 2092 are ignoring all text inside comments. We must record
e5d4f4dc
RS
2093 the comment style this sequence begins so that later,
2094 only a comment end of the same style actually ends
2095 the comment section */
2096 code = Scomment;
95ff8dfc
RS
2097 c1 = FETCH_CHAR (from_byte);
2098 comstyle = SYNTAX_COMMENT_STYLE (c1);
2099 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
6a140a74 2100 INC_BOTH (from, from_byte);
f902a008 2101 UPDATE_SYNTAX_TABLE_FORWARD (from);
e5d4f4dc
RS
2102 }
2103
f902a008 2104 if (prefix)
8489eb67
RS
2105 continue;
2106
0220c518 2107 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
2108 {
2109 case Sescape:
2110 case Scharquote:
2111 if (from == stop) goto lose;
6a140a74 2112 INC_BOTH (from, from_byte);
8489eb67
RS
2113 /* treat following character as a word constituent */
2114 case Sword:
2115 case Ssymbol:
2116 if (depth || !sexpflag) break;
195d1361 2117 /* This word counts as a sexp; return at end of it. */
8489eb67
RS
2118 while (from < stop)
2119 {
195d1361 2120 UPDATE_SYNTAX_TABLE_FORWARD (from);
7c7ca3d2
RS
2121
2122 /* Some compilers can't handle this inside the switch. */
bd25db08
KH
2123 c = FETCH_CHAR (from_byte);
2124 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
7c7ca3d2 2125 switch (temp)
8489eb67
RS
2126 {
2127 case Scharquote:
2128 case Sescape:
6a140a74 2129 INC_BOTH (from, from_byte);
8489eb67
RS
2130 if (from == stop) goto lose;
2131 break;
2132 case Sword:
2133 case Ssymbol:
2134 case Squote:
2135 break;
2136 default:
2137 goto done;
2138 }
6a140a74 2139 INC_BOTH (from, from_byte);
8489eb67
RS
2140 }
2141 goto done;
2142
195d1361 2143 case Scomment_fence:
95ff8dfc
RS
2144 comstyle = ST_COMMENT_STYLE;
2145 /* FALLTHROUGH */
2146 case Scomment:
8489eb67 2147 if (!parse_sexp_ignore_comments) break;
95ff8dfc
RS
2148 UPDATE_SYNTAX_TABLE_FORWARD (from);
2149 found = forw_comment (from, from_byte, stop,
2150 comnested, comstyle, 0,
2151 &out_charpos, &out_bytepos, &dummy);
2152 from = out_charpos, from_byte = out_bytepos;
2153 if (!found)
8489eb67 2154 {
95ff8dfc
RS
2155 if (depth == 0)
2156 goto done;
2157 goto lose;
8489eb67 2158 }
95ff8dfc
RS
2159 INC_BOTH (from, from_byte);
2160 UPDATE_SYNTAX_TABLE_FORWARD (from);
8489eb67
RS
2161 break;
2162
2163 case Smath:
2164 if (!sexpflag)
2165 break;
6a140a74
RS
2166 if (from != stop && c == FETCH_CHAR (from_byte))
2167 {
2168 INC_BOTH (from, from_byte);
2169 }
8489eb67
RS
2170 if (mathexit)
2171 {
2172 mathexit = 0;
2173 goto close1;
2174 }
2175 mathexit = 1;
2176
2177 case Sopen:
2178 if (!++depth) goto done;
2179 break;
2180
2181 case Sclose:
2182 close1:
2183 if (!--depth) goto done;
2184 if (depth < min_depth)
7bf5e9e4
RS
2185 Fsignal (Qscan_error,
2186 Fcons (build_string ("Containing expression ends prematurely"),
2187 Fcons (make_number (last_good),
2188 Fcons (make_number (from), Qnil))));
8489eb67
RS
2189 break;
2190
2191 case Sstring:
195d1361 2192 case Sstring_fence:
ef316cf0 2193 temp_pos = dec_bytepos (from_byte);
93da5fff 2194 stringterm = FETCH_CHAR (temp_pos);
8489eb67
RS
2195 while (1)
2196 {
2197 if (from >= stop) goto lose;
195d1361 2198 UPDATE_SYNTAX_TABLE_FORWARD (from);
bd25db08 2199 c = FETCH_CHAR (from_byte);
42ebfa31
SM
2200 if (code == Sstring
2201 ? (c == stringterm
2202 && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
bd25db08 2203 : SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring_fence)
195d1361 2204 break;
7c7ca3d2
RS
2205
2206 /* Some compilers can't handle this inside the switch. */
bd25db08 2207 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
7c7ca3d2 2208 switch (temp)
8489eb67
RS
2209 {
2210 case Scharquote:
2211 case Sescape:
6a140a74 2212 INC_BOTH (from, from_byte);
8489eb67 2213 }
6a140a74 2214 INC_BOTH (from, from_byte);
8489eb67 2215 }
6a140a74 2216 INC_BOTH (from, from_byte);
8489eb67
RS
2217 if (!depth && sexpflag) goto done;
2218 break;
2219 }
2220 }
2221
2222 /* Reached end of buffer. Error if within object, return nil if between */
2223 if (depth) goto lose;
2224
2225 immediate_quit = 0;
2226 return Qnil;
2227
2228 /* End of object reached */
2229 done:
2230 count--;
2231 }
2232
2233 while (count < 0)
2234 {
8489eb67
RS
2235 while (from > stop)
2236 {
6a140a74 2237 DEC_BOTH (from, from_byte);
195d1361 2238 UPDATE_SYNTAX_TABLE_BACKWARD (from);
6a140a74 2239 c = FETCH_CHAR (from_byte);
bd25db08 2240 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
7bf5e9e4
RS
2241 if (depth == min_depth)
2242 last_good = from;
7fc8191e 2243 comstyle = 0;
95ff8dfc 2244 comnested = SYNTAX_COMMENT_NESTED (c);
7fc8191e
RS
2245 if (code == Sendcomment)
2246 comstyle = SYNTAX_COMMENT_STYLE (c);
8489eb67 2247 if (from > stop && SYNTAX_COMEND_SECOND (c)
1674d9a2 2248 && prev_char_comend_first (from, from_byte)
8489eb67 2249 && parse_sexp_ignore_comments)
e5d4f4dc 2250 {
f902a008 2251 /* We must record the comment style encountered so that
e5d4f4dc 2252 later, we can match only the proper comment begin
f902a008 2253 sequence of the same style. */
6a140a74 2254 DEC_BOTH (from, from_byte);
f902a008
RS
2255 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2256 code = Sendcomment;
95ff8dfc
RS
2257 c1 = FETCH_CHAR (from_byte);
2258 comstyle = SYNTAX_COMMENT_STYLE (c1);
2259 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
e5d4f4dc
RS
2260 }
2261
9828a477 2262 /* Quoting turns anything except a comment-ender
e6365855 2263 into a word character. Note that this cannot be true
f902a008 2264 if we decremented FROM in the if-statement above. */
9828a477
KH
2265 if (code != Sendcomment && char_quoted (from, from_byte))
2266 code = Sword;
2267 else if (SYNTAX_PREFIX (c))
8489eb67
RS
2268 continue;
2269
9828a477 2270 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
2271 {
2272 case Sword:
2273 case Ssymbol:
9828a477
KH
2274 case Sescape:
2275 case Scharquote:
8489eb67 2276 if (depth || !sexpflag) break;
195d1361
RS
2277 /* This word counts as a sexp; count object finished
2278 after passing it. */
8489eb67
RS
2279 while (from > stop)
2280 {
6a140a74 2281 temp_pos = from_byte;
ef316cf0
RS
2282 if (! NILP (current_buffer->enable_multibyte_characters))
2283 DEC_POS (temp_pos);
2284 else
2285 temp_pos--;
6a140a74 2286 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
9828a477 2287 c1 = FETCH_CHAR (temp_pos);
bd25db08 2288 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
9828a477
KH
2289 /* Don't allow comment-end to be quoted. */
2290 if (temp_code == Sendcomment)
2291 goto done2;
6a140a74 2292 quoted = char_quoted (from - 1, temp_pos);
8489eb67 2293 if (quoted)
93da5fff 2294 {
6a140a74 2295 DEC_BOTH (from, from_byte);
ef316cf0 2296 temp_pos = dec_bytepos (temp_pos);
6a140a74 2297 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
93da5fff
KH
2298 }
2299 c1 = FETCH_CHAR (temp_pos);
bd25db08 2300 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
93da5fff
KH
2301 if (! (quoted || temp_code == Sword
2302 || temp_code == Ssymbol
2303 || temp_code == Squote))
8489eb67 2304 goto done2;
6a140a74 2305 DEC_BOTH (from, from_byte);
8489eb67
RS
2306 }
2307 goto done2;
2308
2309 case Smath:
2310 if (!sexpflag)
2311 break;
ef316cf0 2312 temp_pos = dec_bytepos (from_byte);
6a140a74 2313 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
93da5fff 2314 if (from != stop && c == FETCH_CHAR (temp_pos))
6a140a74 2315 DEC_BOTH (from, from_byte);
8489eb67
RS
2316 if (mathexit)
2317 {
2318 mathexit = 0;
2319 goto open2;
2320 }
2321 mathexit = 1;
2322
2323 case Sclose:
2324 if (!++depth) goto done2;
2325 break;
2326
2327 case Sopen:
2328 open2:
2329 if (!--depth) goto done2;
2330 if (depth < min_depth)
7bf5e9e4
RS
2331 Fsignal (Qscan_error,
2332 Fcons (build_string ("Containing expression ends prematurely"),
2333 Fcons (make_number (last_good),
2334 Fcons (make_number (from), Qnil))));
8489eb67
RS
2335 break;
2336
2337 case Sendcomment:
2338 if (!parse_sexp_ignore_comments)
2339 break;
95ff8dfc 2340 found = back_comment (from, from_byte, stop, comnested, comstyle,
6a140a74 2341 &out_charpos, &out_bytepos);
1aa963c8
SM
2342 /* FIXME: if found == -1, then it really wasn't a comment-end.
2343 For single-char Sendcomment, we can't do much about it apart
2344 from skipping the char.
2345 For 2-char endcomments, we could try again, taking both
2346 chars as separate entities, but it's a lot of trouble
2347 for very little gain, so we don't bother either. -sm */
6a140a74
RS
2348 if (found != -1)
2349 from = out_charpos, from_byte = out_bytepos;
8489eb67
RS
2350 break;
2351
195d1361
RS
2352 case Scomment_fence:
2353 case Sstring_fence:
2354 while (1)
2355 {
6a140a74 2356 DEC_BOTH (from, from_byte);
195d1361
RS
2357 if (from == stop) goto lose;
2358 UPDATE_SYNTAX_TABLE_BACKWARD (from);
6a140a74 2359 if (!char_quoted (from, from_byte)
bd25db08
KH
2360 && (c = FETCH_CHAR (from_byte),
2361 SYNTAX_WITH_MULTIBYTE_CHECK (c) == code))
195d1361
RS
2362 break;
2363 }
2364 if (code == Sstring_fence && !depth && sexpflag) goto done2;
2365 break;
2366
8489eb67 2367 case Sstring:
6a140a74 2368 stringterm = FETCH_CHAR (from_byte);
8489eb67
RS
2369 while (1)
2370 {
2371 if (from == stop) goto lose;
6a140a74 2372 temp_pos = from_byte;
ef316cf0
RS
2373 if (! NILP (current_buffer->enable_multibyte_characters))
2374 DEC_POS (temp_pos);
2375 else
2376 temp_pos--;
6a140a74
RS
2377 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2378 if (!char_quoted (from - 1, temp_pos)
42ebfa31
SM
2379 && stringterm == (c = FETCH_CHAR (temp_pos))
2380 && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
8489eb67 2381 break;
6a140a74 2382 DEC_BOTH (from, from_byte);
8489eb67 2383 }
6a140a74 2384 DEC_BOTH (from, from_byte);
8489eb67
RS
2385 if (!depth && sexpflag) goto done2;
2386 break;
2387 }
2388 }
2389
2390 /* Reached start of buffer. Error if within object, return nil if between */
2391 if (depth) goto lose;
2392
2393 immediate_quit = 0;
2394 return Qnil;
2395
2396 done2:
2397 count++;
2398 }
2399
2400
2401 immediate_quit = 0;
1e142fb7 2402 XSETFASTINT (val, from);
8489eb67
RS
2403 return val;
2404
2405 lose:
7bf5e9e4
RS
2406 Fsignal (Qscan_error,
2407 Fcons (build_string ("Unbalanced parentheses"),
2408 Fcons (make_number (last_good),
2409 Fcons (make_number (from), Qnil))));
2410
8489eb67
RS
2411 /* NOTREACHED */
2412}
2413
8489eb67
RS
2414DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
2415 "Scan from character number FROM by COUNT lists.\n\
2416Returns the character number of the position thus found.\n\
2417\n\
2418If DEPTH is nonzero, paren depth begins counting from that value,\n\
2419only places where the depth in parentheses becomes zero\n\
2420are candidates for stopping; COUNT such places are counted.\n\
2421Thus, a positive value for DEPTH means go out levels.\n\
2422\n\
2423Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
2424\n\
2425If the beginning or end of (the accessible part of) the buffer is reached\n\
2426and the depth is wrong, an error is signaled.\n\
2427If the depth is right but the count is not used up, nil is returned.")
2428 (from, count, depth)
2429 Lisp_Object from, count, depth;
2430{
2431 CHECK_NUMBER (from, 0);
2432 CHECK_NUMBER (count, 1);
2433 CHECK_NUMBER (depth, 2);
2434
2435 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
2436}
2437
2438DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
2439 "Scan from character number FROM by COUNT balanced expressions.\n\
2440If COUNT is negative, scan backwards.\n\
2441Returns the character number of the position thus found.\n\
2442\n\
2443Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
2444\n\
2445If the beginning or end of (the accessible part of) the buffer is reached\n\
2446in the middle of a parenthetical grouping, an error is signaled.\n\
2447If the beginning or end is reached between groupings\n\
2448but before count is used up, nil is returned.")
2449 (from, count)
2450 Lisp_Object from, count;
2451{
2452 CHECK_NUMBER (from, 0);
2453 CHECK_NUMBER (count, 1);
2454
2455 return scan_lists (XINT (from), XINT (count), 0, 1);
2456}
2457
2458DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
2459 0, 0, 0,
2460 "Move point backward over any number of chars with prefix syntax.\n\
2461This includes chars with \"quote\" or \"prefix\" syntax (' or p).")
2462 ()
2463{
2464 int beg = BEGV;
6a140a74
RS
2465 int opoint = PT;
2466 int opoint_byte = PT_BYTE;
6ec8bbd2 2467 int pos = PT;
6a140a74 2468 int pos_byte = PT_BYTE;
93da5fff 2469 int c;
93da5fff 2470
f902a008 2471 if (pos <= beg)
195d1361 2472 {
f902a008
RS
2473 SET_PT_BOTH (opoint, opoint_byte);
2474
2475 return Qnil;
195d1361 2476 }
8489eb67 2477
f902a008
RS
2478 SETUP_SYNTAX_TABLE (pos, -1);
2479
6a140a74
RS
2480 DEC_BOTH (pos, pos_byte);
2481
1fd3172d 2482 while (!char_quoted (pos, pos_byte)
195d1361 2483 /* Previous statement updates syntax table. */
6a140a74 2484 && ((c = FETCH_CHAR (pos_byte), SYNTAX (c) == Squote)
93da5fff
KH
2485 || SYNTAX_PREFIX (c)))
2486 {
1fd3172d
RS
2487 opoint = pos;
2488 opoint_byte = pos_byte;
2489
2490 if (pos + 1 > beg)
2491 DEC_BOTH (pos, pos_byte);
93da5fff 2492 }
8489eb67 2493
6a140a74 2494 SET_PT_BOTH (opoint, opoint_byte);
8489eb67
RS
2495
2496 return Qnil;
2497}
2498\f
6a140a74 2499/* Parse forward from FROM / FROM_BYTE to END,
e5d4f4dc
RS
2500 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
2501 and return a description of the state of the parse at END.
c81a3712 2502 If STOPBEFORE is nonzero, stop at the start of an atom.
644ea4df
RS
2503 If COMMENTSTOP is 1, stop at the start of a comment.
2504 If COMMENTSTOP is -1, stop at the start or end of a comment,
2505 after the beginning of a string, or after the end of a string. */
8489eb67 2506
340f92b5 2507static void
6a140a74 2508scan_sexps_forward (stateptr, from, from_byte, end, targetdepth,
c81a3712 2509 stopbefore, oldstate, commentstop)
e5d4f4dc 2510 struct lisp_parse_state *stateptr;
8489eb67
RS
2511 register int from;
2512 int end, targetdepth, stopbefore;
2513 Lisp_Object oldstate;
c81a3712 2514 int commentstop;
8489eb67
RS
2515{
2516 struct lisp_parse_state state;
2517
2518 register enum syntaxcode code;
95ff8dfc
RS
2519 int c1;
2520 int comnested;
8489eb67
RS
2521 struct level { int last, prev; };
2522 struct level levelstart[100];
2523 register struct level *curlevel = levelstart;
2524 struct level *endlevel = levelstart + 100;
8489eb67
RS
2525 register int depth; /* Paren depth of current scanning location.
2526 level - levelstart equals this except
2527 when the depth becomes negative. */
2528 int mindepth; /* Lowest DEPTH value seen. */
2529 int start_quoted = 0; /* Nonzero means starting after a char quote */
2530 Lisp_Object tem;
93da5fff 2531 int prev_from; /* Keep one character before FROM. */
6a140a74 2532 int prev_from_byte;
1fd3172d 2533 int prev_from_syntax;
195d1361
RS
2534 int boundary_stop = commentstop == -1;
2535 int nofence;
95ff8dfc
RS
2536 int found;
2537 int out_bytepos, out_charpos;
7c7ca3d2 2538 int temp;
93da5fff
KH
2539
2540 prev_from = from;
6a140a74
RS
2541 prev_from_byte = from_byte;
2542 if (from != BEGV)
2543 DEC_BOTH (prev_from, prev_from_byte);
93da5fff
KH
2544
2545 /* Use this macro instead of `from++'. */
6a140a74
RS
2546#define INC_FROM \
2547do { prev_from = from; \
2548 prev_from_byte = from_byte; \
1fd3172d
RS
2549 prev_from_syntax \
2550 = SYNTAX_WITH_FLAGS (FETCH_CHAR (prev_from_byte)); \
ef316cf0 2551 INC_BOTH (from, from_byte); \
1fd3172d 2552 UPDATE_SYNTAX_TABLE_FORWARD (from); \
6a140a74 2553 } while (0)
8489eb67
RS
2554
2555 immediate_quit = 1;
2556 QUIT;
2557
265a9e55 2558 if (NILP (oldstate))
8489eb67
RS
2559 {
2560 depth = 0;
2561 state.instring = -1;
2562 state.incomment = 0;
195d1361
RS
2563 state.comstyle = 0; /* comment style a by default. */
2564 state.comstr_start = -1; /* no comment/string seen. */
8489eb67
RS
2565 }
2566 else
2567 {
2568 tem = Fcar (oldstate);
265a9e55 2569 if (!NILP (tem))
8489eb67
RS
2570 depth = XINT (tem);
2571 else
2572 depth = 0;
2573
2574 oldstate = Fcdr (oldstate);
2575 oldstate = Fcdr (oldstate);
2576 oldstate = Fcdr (oldstate);
2577 tem = Fcar (oldstate);
195d1361 2578 /* Check whether we are inside string_fence-style string: */
e76f1c44
GM
2579 state.instring = (!NILP (tem)
2580 ? (INTEGERP (tem) ? XINT (tem) : ST_STRING_STYLE)
2581 : -1);
8489eb67
RS
2582
2583 oldstate = Fcdr (oldstate);
2584 tem = Fcar (oldstate);
e76f1c44
GM
2585 state.incomment = (!NILP (tem)
2586 ? (INTEGERP (tem) ? XINT (tem) : -1)
95ff8dfc 2587 : 0);
8489eb67
RS
2588
2589 oldstate = Fcdr (oldstate);
2590 tem = Fcar (oldstate);
265a9e55 2591 start_quoted = !NILP (tem);
e5d4f4dc 2592
95ff8dfc 2593 /* if the eighth element of the list is nil, we are in comment
195d1361
RS
2594 style a. If it is non-nil, we are in comment style b */
2595 oldstate = Fcdr (oldstate);
e5d4f4dc 2596 oldstate = Fcdr (oldstate);
195d1361 2597 tem = Fcar (oldstate);
e76f1c44
GM
2598 state.comstyle = NILP (tem) ? 0 : (EQ (tem, Qsyntax_table)
2599 ? ST_COMMENT_STYLE : 1);
195d1361 2600
e5d4f4dc 2601 oldstate = Fcdr (oldstate);
e5d4f4dc 2602 tem = Fcar (oldstate);
195d1361 2603 state.comstr_start = NILP (tem) ? -1 : XINT (tem) ;
1a102a58
RS
2604 oldstate = Fcdr (oldstate);
2605 tem = Fcar (oldstate);
2606 while (!NILP (tem)) /* >= second enclosing sexps. */
2607 {
2608 /* curlevel++->last ran into compiler bug on Apollo */
2609 curlevel->last = XINT (Fcar (tem));
2610 if (++curlevel == endlevel)
02010917 2611 curlevel--; /* error ("Nesting too deep for parser"); */
1a102a58
RS
2612 curlevel->prev = -1;
2613 curlevel->last = -1;
2614 tem = Fcdr (tem);
2615 }
8489eb67
RS
2616 }
2617 state.quoted = 0;
2618 mindepth = depth;
2619
2620 curlevel->prev = -1;
2621 curlevel->last = -1;
2622
326b283b
RS
2623 SETUP_SYNTAX_TABLE (prev_from, 1);
2624 prev_from_syntax = SYNTAX_WITH_FLAGS (FETCH_CHAR (prev_from_byte));
2625 UPDATE_SYNTAX_TABLE_FORWARD (from);
2626
195d1361 2627 /* Enter the loop at a place appropriate for initial state. */
8489eb67 2628
326b283b
RS
2629 if (state.incomment)
2630 goto startincomment;
8489eb67
RS
2631 if (state.instring >= 0)
2632 {
195d1361 2633 nofence = state.instring != ST_STRING_STYLE;
326b283b
RS
2634 if (start_quoted)
2635 goto startquotedinstring;
8489eb67
RS
2636 goto startinstring;
2637 }
326b283b
RS
2638 else if (start_quoted)
2639 goto startquoted;
1fd3172d 2640
ccf89641 2641#if 0 /* This seems to be redundant with the identical code above. */
1fd3172d
RS
2642 SETUP_SYNTAX_TABLE (prev_from, 1);
2643 prev_from_syntax = SYNTAX_WITH_FLAGS (FETCH_CHAR (prev_from_byte));
2644 UPDATE_SYNTAX_TABLE_FORWARD (from);
ccf89641 2645#endif
1fd3172d 2646
8489eb67
RS
2647 while (from < end)
2648 {
93da5fff 2649 INC_FROM;
1fd3172d 2650 code = prev_from_syntax & 0xff;
4c920633 2651
8f9dc2ed 2652 if (code == Scomment)
95ff8dfc 2653 {
d355bd8a 2654 state.comstyle = SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax);
95ff8dfc
RS
2655 state.incomment = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax) ?
2656 1 : -1);
2657 state.comstr_start = prev_from;
2658 }
4c920633 2659 else if (code == Scomment_fence)
e5d4f4dc
RS
2660 {
2661 /* Record the comment style we have entered so that only
2662 the comment-end sequence of the same style actually
2663 terminates the comment section. */
95ff8dfc
RS
2664 state.comstyle = ST_COMMENT_STYLE;
2665 state.incomment = -1;
195d1361 2666 state.comstr_start = prev_from;
e5d4f4dc 2667 code = Scomment;
e5d4f4dc 2668 }
4c920633 2669 else if (from < end)
1fd3172d 2670 if (SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax))
95ff8dfc
RS
2671 if (c1 = FETCH_CHAR (from_byte),
2672 SYNTAX_COMSTART_SECOND (c1))
1fd3172d 2673 /* Duplicate code to avoid a complex if-expression
4c920633
RS
2674 which causes trouble for the SGI compiler. */
2675 {
2676 /* Record the comment style we have entered so that only
2677 the comment-end sequence of the same style actually
2678 terminates the comment section. */
95ff8dfc
RS
2679 state.comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from_byte));
2680 comnested = SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax);
2681 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2682 state.incomment = comnested ? 1 : -1;
4c920633 2683 state.comstr_start = prev_from;
95ff8dfc 2684 INC_FROM;
4c920633
RS
2685 code = Scomment;
2686 }
e5d4f4dc 2687
1fd3172d 2688 if (SYNTAX_FLAGS_PREFIX (prev_from_syntax))
8489eb67 2689 continue;
0220c518 2690 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
2691 {
2692 case Sescape:
2693 case Scharquote:
2694 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 2695 curlevel->last = prev_from;
8489eb67
RS
2696 startquoted:
2697 if (from == end) goto endquoted;
93da5fff 2698 INC_FROM;
8489eb67
RS
2699 goto symstarted;
2700 /* treat following character as a word constituent */
2701 case Sword:
2702 case Ssymbol:
2703 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 2704 curlevel->last = prev_from;
8489eb67
RS
2705 symstarted:
2706 while (from < end)
2707 {
7c7ca3d2
RS
2708 /* Some compilers can't handle this inside the switch. */
2709 temp = SYNTAX (FETCH_CHAR (from_byte));
2710 switch (temp)
8489eb67
RS
2711 {
2712 case Scharquote:
2713 case Sescape:
93da5fff 2714 INC_FROM;
8489eb67
RS
2715 if (from == end) goto endquoted;
2716 break;
2717 case Sword:
2718 case Ssymbol:
2719 case Squote:
2720 break;
2721 default:
2722 goto symdone;
2723 }
93da5fff 2724 INC_FROM;
8489eb67
RS
2725 }
2726 symdone:
2727 curlevel->prev = curlevel->last;
2728 break;
2729
2730 case Scomment:
195d1361 2731 if (commentstop || boundary_stop) goto done;
d355bd8a
SM
2732 startincomment:
2733 /* The (from == BEGV) test was to enter the loop in the middle so
95ff8dfc 2734 that we find a 2-char comment ender even if we start in the
e6365855 2735 middle of it. We don't want to do that if we're just at the
d355bd8a 2736 beginning of the comment (think of (*) ... (*)). */
95ff8dfc
RS
2737 found = forw_comment (from, from_byte, end,
2738 state.incomment, state.comstyle,
e6365855
SM
2739 (from == BEGV || from < state.comstr_start + 3)
2740 ? 0 : prev_from_syntax,
95ff8dfc
RS
2741 &out_charpos, &out_bytepos, &state.incomment);
2742 from = out_charpos; from_byte = out_bytepos;
2743 /* Beware! prev_from and friends are invalid now.
2744 Luckily, the `done' doesn't use them and the INC_FROM
2745 sets them to a sane value without looking at them. */
2746 if (!found) goto done;
d355bd8a 2747 INC_FROM;
8489eb67 2748 state.incomment = 0;
e5d4f4dc 2749 state.comstyle = 0; /* reset the comment style */
195d1361 2750 if (boundary_stop) goto done;
8489eb67
RS
2751 break;
2752
2753 case Sopen:
2754 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2755 depth++;
2756 /* curlevel++->last ran into compiler bug on Apollo */
93da5fff 2757 curlevel->last = prev_from;
8489eb67 2758 if (++curlevel == endlevel)
02010917 2759 curlevel--; /* error ("Nesting too deep for parser"); */
8489eb67
RS
2760 curlevel->prev = -1;
2761 curlevel->last = -1;
30844415 2762 if (targetdepth == depth) goto done;
8489eb67
RS
2763 break;
2764
2765 case Sclose:
2766 depth--;
2767 if (depth < mindepth)
2768 mindepth = depth;
2769 if (curlevel != levelstart)
2770 curlevel--;
2771 curlevel->prev = curlevel->last;
30844415 2772 if (targetdepth == depth) goto done;
8489eb67
RS
2773 break;
2774
2775 case Sstring:
195d1361
RS
2776 case Sstring_fence:
2777 state.comstr_start = from - 1;
8489eb67 2778 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 2779 curlevel->last = prev_from;
195d1361 2780 state.instring = (code == Sstring
6a140a74 2781 ? (FETCH_CHAR (prev_from_byte))
195d1361
RS
2782 : ST_STRING_STYLE);
2783 if (boundary_stop) goto done;
8489eb67 2784 startinstring:
195d1361 2785 {
644ea4df 2786 nofence = state.instring != ST_STRING_STYLE;
195d1361 2787
644ea4df
RS
2788 while (1)
2789 {
2790 int c;
195d1361 2791
644ea4df 2792 if (from >= end) goto done;
6a140a74 2793 c = FETCH_CHAR (from_byte);
7c7ca3d2
RS
2794 /* Some compilers can't handle this inside the switch. */
2795 temp = SYNTAX (c);
ccf89641
KH
2796
2797 /* Check TEMP here so that if the char has
2798 a syntax-table property which says it is NOT
2799 a string character, it does not end the string. */
2800 if (nofence && c == state.instring && temp == Sstring)
2801 break;
2802
7c7ca3d2 2803 switch (temp)
644ea4df
RS
2804 {
2805 case Sstring_fence:
2806 if (!nofence) goto string_end;
2807 break;
2808 case Scharquote:
2809 case Sescape:
2810 INC_FROM;
2811 startquotedinstring:
2812 if (from >= end) goto endquoted;
195d1361 2813 }
644ea4df
RS
2814 INC_FROM;
2815 }
195d1361
RS
2816 }
2817 string_end:
8489eb67
RS
2818 state.instring = -1;
2819 curlevel->prev = curlevel->last;
93da5fff 2820 INC_FROM;
195d1361 2821 if (boundary_stop) goto done;
8489eb67
RS
2822 break;
2823
2824 case Smath:
2825 break;
2826 }
2827 }
2828 goto done;
2829
2830 stop: /* Here if stopping before start of sexp. */
93da5fff 2831 from = prev_from; /* We have just fetched the char that starts it; */
8489eb67
RS
2832 goto done; /* but return the position before it. */
2833
2834 endquoted:
2835 state.quoted = 1;
2836 done:
2837 state.depth = depth;
2838 state.mindepth = mindepth;
2839 state.thislevelstart = curlevel->prev;
2840 state.prevlevelstart
2841 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
2842 state.location = from;
1a102a58
RS
2843 state.levelstarts = Qnil;
2844 while (--curlevel >= levelstart)
2845 state.levelstarts = Fcons (make_number (curlevel->last),
2846 state.levelstarts);
8489eb67
RS
2847 immediate_quit = 0;
2848
e5d4f4dc 2849 *stateptr = state;
8489eb67
RS
2850}
2851
2852/* This comment supplies the doc string for parse-partial-sexp,
2853 for make-docfile to see. We cannot put this in the real DEFUN
2854 due to limits in the Unix cpp.
2855
c81a3712 2856DEFUN ("parse-partial-sexp", Ffoo, Sfoo, 2, 6, 0,
8489eb67
RS
2857 "Parse Lisp syntax starting at FROM until TO; return status of parse at TO.\n\
2858Parsing stops at TO or when certain criteria are met;\n\
2859 point is set to where parsing stops.\n\
2860If fifth arg STATE is omitted or nil,\n\
2861 parsing assumes that FROM is the beginning of a function.\n\
1a102a58 2862Value is a list of ten elements describing final state of parsing:\n\
af50f9e5
RS
2863 0. depth in parens.\n\
2864 1. character address of start of innermost containing list; nil if none.\n\
2865 2. character address of start of last complete sexp terminated.\n\
2866 3. non-nil if inside a string.\n\
195d1361 2867 (it is the character that will terminate the string,\n\
bff37d2a 2868 or t if the string should be terminated by a generic string delimiter.)\n\
95ff8dfc
RS
2869 4. nil if outside a comment, t if inside a non-nestable comment, \n\
2870 else an integer (the current comment nesting).\n\
af50f9e5
RS
2871 5. t if following a quote character.\n\
2872 6. the minimum paren-depth encountered during this scan.\n\
943e731c 2873 7. t if in a comment of style b; symbol `syntax-table' if the comment\n\
bff37d2a
RS
2874 should be terminated by a generic comment delimiter.\n\
2875 8. character address of start of comment or string; nil if not in one.\n\
1a102a58 2876 9. Intermediate data for continuation of parsing (subject to change).\n\
8489eb67
RS
2877If third arg TARGETDEPTH is non-nil, parsing stops if the depth\n\
2878in parentheses becomes equal to TARGETDEPTH.\n\
2879Fourth arg STOPBEFORE non-nil means stop when come to\n\
2880 any character that starts a sexp.\n\
bff37d2a
RS
2881Fifth arg STATE is a nine-element list like what this function returns.\n\
2882 It is used to initialize the state of the parse. Elements number 1, 2, 6\n\
2883 and 8 are ignored; you can leave off element 8 (the last) entirely.\n\
2884Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.\n\
943e731c
DL
2885 If it is symbol `syntax-table', stop after the start of a comment or a\n\
2886 string, or after end of a comment or a string.")
c81a3712 2887 (from, to, targetdepth, stopbefore, state, commentstop)
8489eb67
RS
2888*/
2889
c81a3712 2890DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
8489eb67 2891 0 /* See immediately above */)
c81a3712
RS
2892 (from, to, targetdepth, stopbefore, oldstate, commentstop)
2893 Lisp_Object from, to, targetdepth, stopbefore, oldstate, commentstop;
8489eb67
RS
2894{
2895 struct lisp_parse_state state;
2896 int target;
2897
265a9e55 2898 if (!NILP (targetdepth))
8489eb67
RS
2899 {
2900 CHECK_NUMBER (targetdepth, 3);
2901 target = XINT (targetdepth);
2902 }
2903 else
2904 target = -100000; /* We won't reach this depth */
2905
2906 validate_region (&from, &to);
6a140a74
RS
2907 scan_sexps_forward (&state, XINT (from), CHAR_TO_BYTE (XINT (from)),
2908 XINT (to),
c81a3712 2909 target, !NILP (stopbefore), oldstate,
195d1361
RS
2910 (NILP (commentstop)
2911 ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
8489eb67
RS
2912
2913 SET_PT (state.location);
2914
2915 return Fcons (make_number (state.depth),
2916 Fcons (state.prevlevelstart < 0 ? Qnil : make_number (state.prevlevelstart),
2917 Fcons (state.thislevelstart < 0 ? Qnil : make_number (state.thislevelstart),
195d1361
RS
2918 Fcons (state.instring >= 0
2919 ? (state.instring == ST_STRING_STYLE
2920 ? Qt : make_number (state.instring)) : Qnil,
95ff8dfc
RS
2921 Fcons (state.incomment < 0 ? Qt :
2922 (state.incomment == 0 ? Qnil :
2923 make_number (state.incomment)),
8489eb67 2924 Fcons (state.quoted ? Qt : Qnil,
bff37d2a
RS
2925 Fcons (make_number (state.mindepth),
2926 Fcons ((state.comstyle
2927 ? (state.comstyle == ST_COMMENT_STYLE
2928 ? Qsyntax_table : Qt) :
2929 Qnil),
0beaf54f
DL
2930 Fcons (((state.incomment
2931 || (state.instring >= 0))
bff37d2a
RS
2932 ? make_number (state.comstr_start)
2933 : Qnil),
1a102a58 2934 Fcons (state.levelstarts, Qnil))))))))));
8489eb67
RS
2935}
2936\f
dfcf069d 2937void
8489eb67
RS
2938init_syntax_once ()
2939{
78f9a1f7 2940 register int i, c;
8ea151b2 2941 Lisp_Object temp;
8489eb67 2942
5ebaddf5
RS
2943 /* This has to be done here, before we call Fmake_char_table. */
2944 Qsyntax_table = intern ("syntax-table");
2945 staticpro (&Qsyntax_table);
2946
2947 /* Intern this now in case it isn't already done.
2948 Setting this variable twice is harmless.
2949 But don't staticpro it here--that is done in alloc.c. */
2950 Qchar_table_extra_slots = intern ("char-table-extra-slots");
2951
93da5fff 2952 /* Create objects which can be shared among syntax tables. */
42ebfa31 2953 Vsyntax_code_object = Fmake_vector (make_number (Smax), Qnil);
93da5fff
KH
2954 for (i = 0; i < XVECTOR (Vsyntax_code_object)->size; i++)
2955 XVECTOR (Vsyntax_code_object)->contents[i]
2956 = Fcons (make_number (i), Qnil);
2957
5ebaddf5
RS
2958 /* Now we are ready to set up this property, so we can
2959 create syntax tables. */
2960 Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
2961
93da5fff 2962 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
8489eb67 2963
5ebaddf5 2964 Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
8489eb67 2965
93da5fff 2966 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
8489eb67 2967 for (i = 'a'; i <= 'z'; i++)
8ea151b2 2968 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
8489eb67 2969 for (i = 'A'; i <= 'Z'; i++)
8ea151b2 2970 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
8489eb67 2971 for (i = '0'; i <= '9'; i++)
8ea151b2
RS
2972 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
2973
2974 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
2975 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
2976
2977 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
2978 Fcons (make_number (Sopen), make_number (')')));
2979 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
2980 Fcons (make_number (Sclose), make_number ('(')));
2981 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
2982 Fcons (make_number (Sopen), make_number (']')));
2983 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
2984 Fcons (make_number (Sclose), make_number ('[')));
2985 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
2986 Fcons (make_number (Sopen), make_number ('}')));
2987 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
2988 Fcons (make_number (Sclose), make_number ('{')));
2989 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
2990 Fcons (make_number ((int) Sstring), Qnil));
2991 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
2992 Fcons (make_number ((int) Sescape), Qnil));
2993
93da5fff 2994 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Ssymbol];
8489eb67 2995 for (i = 0; i < 10; i++)
78f9a1f7
KH
2996 {
2997 c = "_-+*/&|<>="[i];
2998 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
2999 }
8489eb67 3000
93da5fff 3001 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
8489eb67 3002 for (i = 0; i < 12; i++)
78f9a1f7
KH
3003 {
3004 c = ".,;:?!#@~^'`"[i];
3005 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3006 }
bd25db08
KH
3007
3008 /* All multibyte characters have syntax `word' by default. */
3009 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
3010 for (i = CHAR_TABLE_SINGLE_BYTE_SLOTS; i < CHAR_TABLE_ORDINARY_SLOTS; i++)
3011 XCHAR_TABLE (Vstandard_syntax_table)->contents[i] = temp;
8489eb67
RS
3012}
3013
dfcf069d 3014void
8489eb67
RS
3015syms_of_syntax ()
3016{
3017 Qsyntax_table_p = intern ("syntax-table-p");
3018 staticpro (&Qsyntax_table_p);
3019
93da5fff
KH
3020 staticpro (&Vsyntax_code_object);
3021
7bf5e9e4
RS
3022 Qscan_error = intern ("scan-error");
3023 staticpro (&Qscan_error);
3024 Fput (Qscan_error, Qerror_conditions,
f3be100f 3025 Fcons (Qscan_error, Fcons (Qerror, Qnil)));
7bf5e9e4
RS
3026 Fput (Qscan_error, Qerror_message,
3027 build_string ("Scan error"));
3028
8489eb67
RS
3029 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments,
3030 "Non-nil means `forward-sexp', etc., should treat comments as whitespace.");
3031
195d1361
RS
3032 DEFVAR_BOOL ("parse-sexp-lookup-properties", &parse_sexp_lookup_properties,
3033 "Non-nil means `forward-sexp', etc., grant `syntax-table' property.\n\
3034The value of this property should be either a syntax table, or a cons\n\
3035of the form (SYNTAXCODE . MATCHCHAR), SYNTAXCODE being the numeric\n\
3036syntax code, MATCHCHAR being nil or the character to match (which is\n\
3037relevant only for open/close type.");
3038
8489eb67
RS
3039 words_include_escapes = 0;
3040 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes,
3041 "Non-nil means `forward-word', etc., should treat escape chars part of words.");
3042
bd25db08
KH
3043 DEFVAR_BOOL ("multibyte-syntax-as-symbol", &multibyte_syntax_as_symbol,
3044 "Non-nil means `scan-sexps' treats all multibyte characters as symbol.");
3045 multibyte_syntax_as_symbol = 0;
3046
f4ed767f
GM
3047 DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
3048 &open_paren_in_column_0_is_defun_start,
3049 "Non-nil means an open paren in column 0 denotes the start of a defun.");
3050 open_paren_in_column_0_is_defun_start = 1;
3051
8489eb67
RS
3052 defsubr (&Ssyntax_table_p);
3053 defsubr (&Ssyntax_table);
3054 defsubr (&Sstandard_syntax_table);
3055 defsubr (&Scopy_syntax_table);
3056 defsubr (&Sset_syntax_table);
3057 defsubr (&Schar_syntax);
beefa22e 3058 defsubr (&Smatching_paren);
c65adb44 3059 defsubr (&Sstring_to_syntax);
8489eb67
RS
3060 defsubr (&Smodify_syntax_entry);
3061 defsubr (&Sdescribe_syntax);
3062
3063 defsubr (&Sforward_word);
3064
195d1361
RS
3065 defsubr (&Sskip_chars_forward);
3066 defsubr (&Sskip_chars_backward);
3067 defsubr (&Sskip_syntax_forward);
3068 defsubr (&Sskip_syntax_backward);
3069
b3cfe0c8 3070 defsubr (&Sforward_comment);
8489eb67
RS
3071 defsubr (&Sscan_lists);
3072 defsubr (&Sscan_sexps);
3073 defsubr (&Sbackward_prefix_chars);
3074 defsubr (&Sparse_partial_sexp);
3075}