(mail-signature): Allow expression as value.
[bpt/emacs.git] / src / syntax.c
CommitLineData
8489eb67 1/* GNU Emacs routines to deal with syntax tables; also word and list parsing.
b29ff194 2 Copyright (C) 1985, 87, 93, 94, 95, 97, 1998 Free Software Foundation, Inc.
8489eb67
RS
3
4This file is part of GNU Emacs.
5
6GNU Emacs is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
a4275ad1 8the Free Software Foundation; either version 2, or (at your option)
8489eb67
RS
9any later version.
10
11GNU Emacs is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GNU Emacs; see the file COPYING. If not, write to
3b7ad313
EN
18the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19Boston, MA 02111-1307, USA. */
8489eb67
RS
20
21
18160b98 22#include <config.h>
8489eb67
RS
23#include <ctype.h>
24#include "lisp.h"
25#include "commands.h"
26#include "buffer.h"
93da5fff 27#include "charset.h"
195d1361
RS
28
29/* Make syntax table lookup grant data in gl_state. */
30#define SYNTAX_ENTRY_VIA_PROPERTY
31
8489eb67 32#include "syntax.h"
195d1361
RS
33#include "intervals.h"
34
35/* We use these constants in place for comment-style and
36 string-ender-char to distinguish comments/strings started by
37 comment_fence and string_fence codes. */
38
39#define ST_COMMENT_STYLE (256 + 1)
40#define ST_STRING_STYLE (256 + 2)
93da5fff 41#include "category.h"
8489eb67 42
7bf5e9e4 43Lisp_Object Qsyntax_table_p, Qsyntax_table, Qscan_error;
8489eb67
RS
44
45int words_include_escapes;
195d1361 46int parse_sexp_lookup_properties;
8489eb67 47
8ea151b2
RS
48/* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
49 if not compiled with GCC. No need to mark it, since it is used
50 only very temporarily. */
51Lisp_Object syntax_temp;
52
e5d4f4dc
RS
53/* This is the internal form of the parse state used in parse-partial-sexp. */
54
55struct lisp_parse_state
56 {
195d1361
RS
57 int depth; /* Depth at end of parsing. */
58 int instring; /* -1 if not within string, else desired terminator. */
59 int incomment; /* Nonzero if within a comment at end of parsing. */
60 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
e5d4f4dc
RS
61 int quoted; /* Nonzero if just after an escape char at end of parsing */
62 int thislevelstart; /* Char number of most recent start-of-expression at current level */
63 int prevlevelstart; /* Char number of start of containing expression */
195d1361 64 int location; /* Char number at which parsing stopped. */
e5d4f4dc 65 int mindepth; /* Minimum depth seen while scanning. */
195d1361 66 int comstr_start; /* Position just after last comment/string starter. */
e5d4f4dc
RS
67 };
68\f
37bef230
RS
69/* These variables are a cache for finding the start of a defun.
70 find_start_pos is the place for which the defun start was found.
71 find_start_value is the defun start position found for it.
6a140a74 72 find_start_value_byte is the corresponding byte position.
37bef230
RS
73 find_start_buffer is the buffer it was found in.
74 find_start_begv is the BEGV value when it was found.
75 find_start_modiff is the value of MODIFF when it was found. */
76
77static int find_start_pos;
78static int find_start_value;
6a140a74 79static int find_start_value_byte;
37bef230
RS
80static struct buffer *find_start_buffer;
81static int find_start_begv;
82static int find_start_modiff;
6a140a74
RS
83
84
85static int find_defun_start P_ ((int, int));
86static int back_comment P_ ((int, int, int, int, int *, int *));
87static int char_quoted P_ ((int, int));
88static Lisp_Object skip_chars P_ ((int, int, Lisp_Object, Lisp_Object));
89static Lisp_Object scan_lists P_ ((int, int, int, int));
90static void scan_sexps_forward P_ ((struct lisp_parse_state *,
91 int, int, int, int,
92 int, Lisp_Object, int));
195d1361
RS
93\f
94
95struct gl_state_s gl_state; /* Global state of syntax parser. */
96
97INTERVAL interval_of ();
98#define INTERVALS_AT_ONCE 10 /* 1 + max-number of intervals
99 to scan to property-change. */
100
6a140a74
RS
101/* Update gl_state to an appropriate interval which contains CHARPOS. The
102 sign of COUNT give the relative position of CHARPOS wrt the previously
195d1361 103 valid interval. If INIT, only [be]_property fields of gl_state are
6a140a74 104 valid at start, the rest is filled basing on OBJECT.
195d1361 105
6a140a74 106 `gl_state.*_i' are the intervals, and CHARPOS is further in the search
195d1361
RS
107 direction than the intervals - or in an interval. We update the
108 current syntax-table basing on the property of this interval, and
6a140a74 109 update the interval to start further than CHARPOS - or be
195d1361 110 NULL_INTERVAL. We also update lim_property to be the next value of
6a140a74 111 charpos to call this subroutine again - or be before/after the
195d1361
RS
112 start/end of OBJECT. */
113
114void
6a140a74
RS
115update_syntax_table (charpos, count, init, object)
116 int charpos, count, init;
195d1361
RS
117 Lisp_Object object;
118{
119 Lisp_Object tmp_table;
120 int cnt = 0, doing_extra = 0, invalidate = 1;
121 INTERVAL i, oldi;
122
123 if (init)
124 {
125 gl_state.start = gl_state.b_property;
126 gl_state.stop = gl_state.e_property;
6a140a74 127 gl_state.forward_i = interval_of (charpos, object);
195d1361
RS
128 i = gl_state.backward_i = gl_state.forward_i;
129 gl_state.left_ok = gl_state.right_ok = 1;
130 invalidate = 0;
131 if (NULL_INTERVAL_P (i))
132 return;
d80f4cc7
RS
133 /* interval_of () updates only ->position of the return value,
134 update the parents manually to speed up update_interval. */
135 while (!NULL_PARENT (i))
136 {
137 if (AM_RIGHT_CHILD (i))
138 i->parent->position = i->position
139 - LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */
140 - TOTAL_LENGTH (i->parent)
141 + LEFT_TOTAL_LENGTH (i->parent);
142 else
143 i->parent->position = i->position - LEFT_TOTAL_LENGTH (i)
144 + TOTAL_LENGTH (i);
145 i = i->parent;
146 }
147 i = gl_state.forward_i;
ee0cdb48
KH
148 gl_state.b_property = i->position - 1 - gl_state.offset;
149 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361
RS
150 goto update;
151 }
152 oldi = i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
153
6a140a74
RS
154 /* We are guarantied to be called with CHARPOS either in i,
155 or further off. */
195d1361
RS
156 if (NULL_INTERVAL_P (i))
157 error ("Error in syntax_table logic for to-the-end intervals");
6a140a74 158 else if (charpos < i->position) /* Move left. */
195d1361
RS
159 {
160 if (count > 0)
161 error ("Error in syntax_table logic for intervals <-.");
162 /* Update the interval. */
6a140a74 163 i = update_interval (i, charpos);
195d1361
RS
164 if (oldi->position != INTERVAL_LAST_POS (i))
165 {
166 invalidate = 0;
167 gl_state.right_ok = 1; /* Invalidate the other end. */
168 gl_state.forward_i = i;
ee0cdb48 169 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361
RS
170 }
171 }
6a140a74 172 else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right. */
195d1361
RS
173 {
174 if (count < 0)
175 error ("Error in syntax_table logic for intervals ->.");
176 /* Update the interval. */
6a140a74 177 i = update_interval (i, charpos);
195d1361
RS
178 if (i->position != INTERVAL_LAST_POS (oldi))
179 {
180 invalidate = 0;
181 gl_state.left_ok = 1; /* Invalidate the other end. */
182 gl_state.backward_i = i;
ee0cdb48 183 gl_state.b_property = i->position - 1 - gl_state.offset;
195d1361
RS
184 }
185 }
186 else if (count > 0 ? gl_state.right_ok : gl_state.left_ok)
187 {
188 /* We do not need to recalculate tmp_table. */
189 tmp_table = gl_state.old_prop;
190 }
191
192 update:
193 tmp_table = textget (i->plist, Qsyntax_table);
194
195 if (invalidate)
196 invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
197
198 if (invalidate) /* Did not get to adjacent interval. */
199 { /* with the same table => */
200 /* invalidate the old range. */
201 if (count > 0)
202 {
203 gl_state.backward_i = i;
204 gl_state.left_ok = 1; /* Invalidate the other end. */
ee0cdb48 205 gl_state.b_property = i->position - 1 - gl_state.offset;
195d1361
RS
206 }
207 else
208 {
209 gl_state.forward_i = i;
210 gl_state.right_ok = 1; /* Invalidate the other end. */
ee0cdb48 211 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361
RS
212 }
213 }
37bef230 214
195d1361
RS
215 gl_state.current_syntax_table = tmp_table;
216 gl_state.old_prop = tmp_table;
2e34157c 217 if (EQ (Fsyntax_table_p (tmp_table), Qt))
195d1361
RS
218 {
219 gl_state.use_global = 0;
220 }
221 else if (CONSP (tmp_table))
222 {
223 gl_state.use_global = 1;
224 gl_state.global_code = tmp_table;
225 }
226 else
227 {
228 gl_state.use_global = 0;
229 gl_state.current_syntax_table = current_buffer->syntax_table;
230 }
231
232 while (!NULL_INTERVAL_P (i))
233 {
234 if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
235 {
236 if (count > 0)
237 gl_state.right_ok = 0;
238 else
239 gl_state.left_ok = 0;
240 break;
241 }
242 else if (cnt == INTERVALS_AT_ONCE)
243 {
244 if (count > 0)
245 gl_state.right_ok = 1;
246 else
247 gl_state.left_ok = 1;
248 break;
249 }
250 cnt++;
251 i = count > 0 ? next_interval (i) : previous_interval (i);
252 }
253 if (NULL_INTERVAL_P (i))
254 { /* This property goes to the end. */
255 if (count > 0)
256 gl_state.e_property = gl_state.stop;
257 else
258 gl_state.b_property = gl_state.start;
259 }
260 else
261 {
262 if (count > 0)
263 {
ee0cdb48 264 gl_state.e_property = i->position - gl_state.offset;
195d1361
RS
265 gl_state.forward_i = i;
266 }
267 else
268 {
ee0cdb48 269 gl_state.b_property = i->position + LENGTH (i) - 1 - gl_state.offset;
195d1361
RS
270 gl_state.backward_i = i;
271 }
272 }
273}
274\f
6a140a74
RS
275/* Returns TRUE if char at CHARPOS is quoted.
276 Global syntax-table data should be set up already to be good at CHARPOS
277 or after. On return global syntax data is good for lookup at CHARPOS. */
195d1361
RS
278
279static int
6a140a74
RS
280char_quoted (charpos, bytepos)
281 register int charpos, bytepos;
195d1361
RS
282{
283 register enum syntaxcode code;
284 register int beg = BEGV;
285 register int quoted = 0;
6a140a74
RS
286 int orig = charpos;
287
288 DEC_BOTH (charpos, bytepos);
195d1361 289
6a140a74 290 while (bytepos >= beg)
195d1361 291 {
6a140a74
RS
292 UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
293 code = SYNTAX (FETCH_CHAR (bytepos));
294 if (! (code == Scharquote || code == Sescape))
295 break;
296
297 DEC_BOTH (charpos, bytepos);
298 quoted = !quoted;
195d1361 299 }
6a140a74
RS
300
301 UPDATE_SYNTAX_TABLE (orig);
195d1361
RS
302 return quoted;
303}
6a140a74
RS
304
305/* Return the bytepos one character after BYTEPOS.
306 We assume that BYTEPOS is not at the end of the buffer. */
307
308INLINE int
309inc_bytepos (bytepos)
310 int bytepos;
311{
ef316cf0
RS
312 if (NILP (current_buffer->enable_multibyte_characters))
313 return bytepos + 1;
314
6a140a74
RS
315 INC_POS (bytepos);
316 return bytepos;
317}
318
319/* Return the bytepos one character before BYTEPOS.
320 We assume that BYTEPOS is not at the start of the buffer. */
321
322INLINE int
323dec_bytepos (bytepos)
324 int bytepos;
325{
ef316cf0
RS
326 if (NILP (current_buffer->enable_multibyte_characters))
327 return bytepos - 1;
328
6a140a74
RS
329 DEC_POS (bytepos);
330 return bytepos;
331}
195d1361 332\f
37bef230
RS
333/* Find a defun-start that is the last one before POS (or nearly the last).
334 We record what we find, so that another call in the same area
195d1361
RS
335 can return the same value right away.
336
337 There is no promise at which position the global syntax data is
338 valid on return from the subroutine, so the caller should explicitly
339 update the global data. */
37bef230
RS
340
341static int
6a140a74
RS
342find_defun_start (pos, pos_byte)
343 int pos, pos_byte;
37bef230
RS
344{
345 int tem;
346 int shortage;
6a140a74 347 int opoint = PT, opoint_byte = PT_BYTE;
37bef230
RS
348
349 /* Use previous finding, if it's valid and applies to this inquiry. */
350 if (current_buffer == find_start_buffer
351 /* Reuse the defun-start even if POS is a little farther on.
352 POS might be in the next defun, but that's ok.
353 Our value may not be the best possible, but will still be usable. */
354 && pos <= find_start_pos + 1000
355 && pos >= find_start_value
356 && BEGV == find_start_begv
357 && MODIFF == find_start_modiff)
358 return find_start_value;
359
360 /* Back up to start of line. */
6a140a74 361 scan_newline (pos, pos_byte, BEGV, BEGV_BYTE, -1, 1);
37bef230 362
195d1361
RS
363 /* We optimize syntax-table lookup for rare updates. Thus we accept
364 only those `^\s(' which are good in global _and_ text-property
365 syntax-tables. */
366 gl_state.current_syntax_table = current_buffer->syntax_table;
367 gl_state.use_global = 0;
6a140a74 368 while (PT > BEGV)
37bef230
RS
369 {
370 /* Open-paren at start of line means we found our defun-start. */
6a140a74 371 if (SYNTAX (FETCH_CHAR (PT_BYTE)) == Sopen)
195d1361 372 {
6a140a74
RS
373 SETUP_SYNTAX_TABLE (PT + 1, -1); /* Try again... */
374 if (SYNTAX (FETCH_CHAR (PT_BYTE)) == Sopen)
195d1361
RS
375 break;
376 /* Now fallback to the default value. */
377 gl_state.current_syntax_table = current_buffer->syntax_table;
378 gl_state.use_global = 0;
379 }
37bef230 380 /* Move to beg of previous line. */
6a140a74 381 scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
37bef230
RS
382 }
383
384 /* Record what we found, for the next try. */
6a140a74
RS
385 find_start_value = PT;
386 find_start_value_byte = PT_BYTE;
37bef230
RS
387 find_start_buffer = current_buffer;
388 find_start_modiff = MODIFF;
389 find_start_begv = BEGV;
390 find_start_pos = pos;
391
6a140a74
RS
392 TEMP_SET_PT_BOTH (opoint, opoint_byte);
393
37bef230
RS
394 return find_start_value;
395}
396\f
6a140a74
RS
397/* Checks whether charpos FROM is at the end of a comment.
398 FROM_BYTE is the bytepos corresponding to FROM.
399 Do not move back before STOP.
400
401 Return a positive value if we find a comment ending at FROM/FROM_BYTE;
402 return -1 otherwise.
403
404 If successful, store the charpos of the comment's beginning
405 into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
527a32d9
KH
406
407 Global syntax data remains valid for backward search starting at
408 the returned value (or at FROM, if the search was not successful). */
195d1361
RS
409
410static int
6a140a74
RS
411back_comment (from, from_byte, stop, comstyle, charpos_ptr, bytepos_ptr)
412 int from, from_byte, stop;
413 int comstyle;
414 int *charpos_ptr, *bytepos_ptr;
195d1361
RS
415{
416 /* Look back, counting the parity of string-quotes,
417 and recording the comment-starters seen.
418 When we reach a safe place, assume that's not in a string;
419 then step the main scan to the earliest comment-starter seen
420 an even number of string quotes away from the safe place.
421
422 OFROM[I] is position of the earliest comment-starter seen
423 which is I+2X quotes from the comment-end.
424 PARITY is current parity of quotes from the comment end. */
425 int parity = 0;
426 int my_stringend = 0;
427 int string_lossage = 0;
428 int comment_end = from;
6a140a74 429 int comment_end_byte = from_byte;
195d1361 430 int comstart_pos = 0;
6a140a74 431 int comstart_byte;
eb35b628
RS
432 /* Value that PARITY had, when we reached the position
433 in COMSTART_POS. */
434 int comstart_parity = 0;
195d1361 435 int scanstart = from - 1;
eb35b628
RS
436 /* Place where the containing defun starts,
437 or 0 if we didn't come across it yet. */
438 int defun_start = 0;
439 int defun_start_byte = 0;
195d1361 440 register enum syntaxcode code;
ea315ed6 441 int c;
195d1361
RS
442
443 /* At beginning of range to scan, we're outside of strings;
444 that determines quote parity to the comment-end. */
445 while (from != stop)
446 {
6a140a74
RS
447 int temp_byte;
448
195d1361 449 /* Move back and examine a character. */
6a140a74 450 DEC_BOTH (from, from_byte);
195d1361
RS
451 UPDATE_SYNTAX_TABLE_BACKWARD (from);
452
6a140a74 453 c = FETCH_CHAR (from_byte);
195d1361
RS
454 code = SYNTAX (c);
455
527a32d9 456 /* If this char is the second of a 2-char comment end sequence,
195d1361
RS
457 back up and give the pair the appropriate syntax. */
458 if (from > stop && SYNTAX_COMEND_SECOND (c)
6a140a74
RS
459 && (temp_byte = dec_bytepos (from_byte),
460 SYNTAX_COMEND_FIRST (FETCH_CHAR (temp_byte))))
195d1361
RS
461 {
462 code = Sendcomment;
6a140a74 463 DEC_BOTH (from, from_byte);
195d1361
RS
464 /* This is apparently the best we can do: */
465 UPDATE_SYNTAX_TABLE_BACKWARD (from);
6a140a74 466 c = FETCH_CHAR (from_byte);
195d1361
RS
467 }
468
469 /* If this char starts a 2-char comment start sequence,
470 treat it like a 1-char comment starter. */
527a32d9 471 if (from < scanstart && SYNTAX_COMSTART_FIRST (c)
6a140a74
RS
472 && (temp_byte = inc_bytepos (from_byte),
473 (SYNTAX_COMSTART_SECOND (FETCH_CHAR (temp_byte))
474 && comstyle == SYNTAX_COMMENT_STYLE (FETCH_CHAR (temp_byte)))))
527a32d9 475 code = Scomment;
195d1361 476
9828a477
KH
477 /* Ignore escaped characters, except comment-enders. */
478 if (code != Sendcomment && char_quoted (from, from_byte))
195d1361
RS
479 continue;
480
481 /* Track parity of quotes. */
482 if (code == Sstring)
483 {
484 parity ^= 1;
485 if (my_stringend == 0)
486 my_stringend = c;
487 /* If we have two kinds of string delimiters.
488 There's no way to grok this scanning backwards. */
489 else if (my_stringend != c)
490 string_lossage = 1;
491 }
492
493 if (code == Sstring_fence || code == Scomment_fence)
494 {
495 parity ^= 1;
496 if (my_stringend == 0)
6a140a74
RS
497 my_stringend
498 = code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE;
195d1361
RS
499 /* If we have two kinds of string delimiters.
500 There's no way to grok this scanning backwards. */
501 else if (my_stringend != (code == Sstring_fence
502 ? ST_STRING_STYLE : ST_COMMENT_STYLE))
503 string_lossage = 1;
504 }
505
506 /* Record comment-starters according to that
507 quote-parity to the comment-end. */
508 if (code == Scomment)
509 {
510 comstart_parity = parity;
511 comstart_pos = from;
6a140a74 512 comstart_byte = from_byte;
195d1361
RS
513 }
514
515 /* If we find another earlier comment-ender,
516 any comment-starts earlier than that don't count
517 (because they go with the earlier comment-ender). */
518 if (code == Sendcomment
6a140a74 519 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from_byte)) == comstyle)
195d1361
RS
520 break;
521
522 /* Assume a defun-start point is outside of strings. */
523 if (code == Sopen
6a140a74
RS
524 && (from == stop
525 || (temp_byte = dec_bytepos (from_byte),
526 FETCH_CHAR (temp_byte) == '\n')))
eb35b628
RS
527 {
528 defun_start = from;
529 defun_start_byte = from_byte;
530 break;
531 }
195d1361
RS
532 }
533
534 if (comstart_pos == 0)
535 {
536 from = comment_end;
6a140a74 537 from_byte = comment_end_byte;
195d1361
RS
538 UPDATE_SYNTAX_TABLE_FORWARD (comment_end - 1);
539 }
540 /* If the earliest comment starter
541 is followed by uniform paired string quotes or none,
542 we know it can't be inside a string
543 since if it were then the comment ender would be inside one.
544 So it does start a comment. Skip back to it. */
545 else if (comstart_parity == 0 && !string_lossage)
546 {
547 from = comstart_pos;
6a140a74 548 from_byte = comstart_byte;
195d1361
RS
549 /* Globals are correct now. */
550 }
551 else
552 {
553 /* We had two kinds of string delimiters mixed up
554 together. Decode this going forwards.
555 Scan fwd from the previous comment ender
556 to the one in question; this records where we
557 last passed a comment starter. */
558 struct lisp_parse_state state;
eb35b628
RS
559 /* If we did not already find the defun start, find it now. */
560 if (defun_start == 0)
561 {
562 defun_start = find_defun_start (comment_end, comment_end_byte);
563 defun_start_byte = find_start_value_byte;
564 }
6a140a74 565 scan_sexps_forward (&state,
eb35b628 566 defun_start, defun_start_byte,
195d1361
RS
567 comment_end - 1, -10000, 0, Qnil, 0);
568 if (state.incomment)
569 {
570 /* scan_sexps_forward changed the direction of search in
571 global variables, so we need to update it completely. */
572
573 from = state.comstr_start;
574 }
575 else
576 {
577 from = comment_end;
578 }
6a140a74 579 from_byte = CHAR_TO_BYTE (from);
195d1361
RS
580 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
581 }
582
6a140a74
RS
583 *charpos_ptr = from;
584 *bytepos_ptr = from_byte;
585
195d1361
RS
586 return from;
587}
588\f
8489eb67 589DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
2203e1e8 590 "Return t if OBJECT is a syntax table.\n\
8ea151b2 591Currently, any char-table counts as a syntax table.")
2203e1e8
EN
592 (object)
593 Lisp_Object object;
8489eb67 594{
2203e1e8 595 if (CHAR_TABLE_P (object)
e704cb4b 596 && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
8489eb67
RS
597 return Qt;
598 return Qnil;
599}
600
8ea151b2 601static void
8489eb67
RS
602check_syntax_table (obj)
603 Lisp_Object obj;
604{
d1be9f0f 605 if (!(CHAR_TABLE_P (obj)
e704cb4b 606 && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table)))
d1be9f0f 607 wrong_type_argument (Qsyntax_table_p, obj);
8489eb67
RS
608}
609
8489eb67
RS
610DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
611 "Return the current syntax table.\n\
612This is the one specified by the current buffer.")
613 ()
614{
615 return current_buffer->syntax_table;
616}
617
618DEFUN ("standard-syntax-table", Fstandard_syntax_table,
619 Sstandard_syntax_table, 0, 0, 0,
620 "Return the standard syntax table.\n\
621This is the one used for new buffers.")
622 ()
623{
624 return Vstandard_syntax_table;
625}
626
627DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
628 "Construct a new syntax table and return it.\n\
629It is a copy of the TABLE, which defaults to the standard syntax table.")
630 (table)
631 Lisp_Object table;
632{
8ea151b2
RS
633 Lisp_Object copy;
634
265a9e55 635 if (!NILP (table))
8ea151b2
RS
636 check_syntax_table (table);
637 else
638 table = Vstandard_syntax_table;
639
640 copy = Fcopy_sequence (table);
0f867324
RS
641
642 /* Only the standard syntax table should have a default element.
643 Other syntax tables should inherit from parents instead. */
644 XCHAR_TABLE (copy)->defalt = Qnil;
645
646 /* Copied syntax tables should all have parents.
647 If we copied one with no parent, such as the standard syntax table,
648 use the standard syntax table as the copy's parent. */
649 if (NILP (XCHAR_TABLE (copy)->parent))
650 Fset_char_table_parent (copy, Vstandard_syntax_table);
8ea151b2 651 return copy;
8489eb67
RS
652}
653
654DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
655 "Select a new syntax table for the current buffer.\n\
656One argument, a syntax table.")
657 (table)
658 Lisp_Object table;
659{
8ea151b2 660 check_syntax_table (table);
8489eb67
RS
661 current_buffer->syntax_table = table;
662 /* Indicate that this buffer now has a specified syntax table. */
f110a664
RS
663 current_buffer->local_var_flags
664 |= XFASTINT (buffer_local_flags.syntax_table);
8489eb67
RS
665 return table;
666}
667\f
668/* Convert a letter which signifies a syntax code
669 into the code it signifies.
195d1361 670 This is used by modify-syntax-entry, and other things. */
8489eb67
RS
671
672unsigned char syntax_spec_code[0400] =
673 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
674 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
675 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
676 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
195d1361 677 (char) Swhitespace, (char) Scomment_fence, (char) Sstring, 0377,
8489eb67
RS
678 (char) Smath, 0377, 0377, (char) Squote,
679 (char) Sopen, (char) Sclose, 0377, 0377,
680 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
681 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
682 0377, 0377, 0377, 0377,
683 (char) Scomment, 0377, (char) Sendcomment, 0377,
6cb71bf6 684 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
8489eb67
RS
685 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
686 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
687 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
688 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
689 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
690 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
195d1361 691 0377, 0377, 0377, 0377, (char) Sstring_fence, 0377, 0377, 0377
8489eb67
RS
692 };
693
195d1361 694/* Indexed by syntax code, give the letter that describes it. */
8489eb67 695
195d1361 696char syntax_code_spec[16] =
8489eb67 697 {
195d1361
RS
698 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
699 '!', '|'
8489eb67 700 };
93da5fff
KH
701
702/* Indexed by syntax code, give the object (cons of syntax code and
703 nil) to be stored in syntax table. Since these objects can be
704 shared among syntax tables, we generate them in advance. By
705 sharing objects, the function `describe-syntax' can give a more
706 compact listing. */
707static Lisp_Object Vsyntax_code_object;
708
8489eb67 709\f
8ea151b2
RS
710/* Look up the value for CHARACTER in syntax table TABLE's parent
711 and its parents. SYNTAX_ENTRY calls this, when TABLE itself has nil
712 for CHARACTER. It's actually used only when not compiled with GCC. */
713
714Lisp_Object
715syntax_parent_lookup (table, character)
716 Lisp_Object table;
717 int character;
718{
719 Lisp_Object value;
720
721 while (1)
722 {
723 table = XCHAR_TABLE (table)->parent;
724 if (NILP (table))
725 return Qnil;
726
727 value = XCHAR_TABLE (table)->contents[character];
728 if (!NILP (value))
729 return value;
730 }
731}
732
8489eb67 733DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
2203e1e8
EN
734 "Return the syntax code of CHARACTER, described by a character.\n\
735For example, if CHARACTER is a word constituent,\n\
736the character `w' is returned.\n\
8489eb67
RS
737The characters that correspond to various syntax codes\n\
738are listed in the documentation of `modify-syntax-entry'.")
2203e1e8
EN
739 (character)
740 Lisp_Object character;
8489eb67 741{
8ea151b2 742 int char_int;
195d1361
RS
743 gl_state.current_syntax_table = current_buffer->syntax_table;
744
745 gl_state.use_global = 0;
2203e1e8
EN
746 CHECK_NUMBER (character, 0);
747 char_int = XINT (character);
8ea151b2 748 return make_number (syntax_code_spec[(int) SYNTAX (char_int)]);
beefa22e
RS
749}
750
751DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
2203e1e8
EN
752 "Return the matching parenthesis of CHARACTER, or nil if none.")
753 (character)
754 Lisp_Object character;
beefa22e 755{
8ea151b2 756 int char_int, code;
195d1361
RS
757 gl_state.current_syntax_table = current_buffer->syntax_table;
758 gl_state.use_global = 0;
2203e1e8
EN
759 CHECK_NUMBER (character, 0);
760 char_int = XINT (character);
8ea151b2 761 code = SYNTAX (char_int);
a8bd7cd8 762 if (code == Sopen || code == Sclose)
2e34157c 763 return SYNTAX_MATCH (char_int);
beefa22e 764 return Qnil;
8489eb67
RS
765}
766
767/* This comment supplies the doc string for modify-syntax-entry,
768 for make-docfile to see. We cannot put this in the real DEFUN
769 due to limits in the Unix cpp.
770
2fcea188 771DEFUN ("modify-syntax-entry", foo, bar, 2, 3, 0,
8489eb67
RS
772 "Set syntax for character CHAR according to string S.\n\
773The syntax is changed only for table TABLE, which defaults to\n\
774 the current buffer's syntax table.\n\
775The first character of S should be one of the following:\n\
32676c08
JB
776 Space or - whitespace syntax. w word constituent.\n\
777 _ symbol constituent. . punctuation.\n\
778 ( open-parenthesis. ) close-parenthesis.\n\
779 \" string quote. \\ escape.\n\
780 $ paired delimiter. ' expression quote or prefix operator.\n\
abae13bd 781 < comment starter. > comment ender.\n\
6cb71bf6 782 / character-quote. @ inherit from `standard-syntax-table'.\n\
abae13bd 783\n\
8489eb67
RS
784Only single-character comment start and end sequences are represented thus.\n\
785Two-character sequences are represented as described below.\n\
786The second character of S is the matching parenthesis,\n\
787 used only if the first character is `(' or `)'.\n\
788Any additional characters are flags.\n\
e5d4f4dc 789Defined flags are the characters 1, 2, 3, 4, b, and p.\n\
2203e1e8
EN
790 1 means CHAR is the start of a two-char comment start sequence.\n\
791 2 means CHAR is the second character of such a sequence.\n\
792 3 means CHAR is the start of a two-char comment end sequence.\n\
793 4 means CHAR is the second character of such a sequence.\n\
e5d4f4dc 794\n\
195d1361 795There can be up to two orthogonal comment sequences. This is to support\n\
e5d4f4dc 796language modes such as C++. By default, all comment sequences are of style\n\
abae13bd
RS
797a, but you can set the comment sequence style to b (on the second character\n\
798of a comment-start, or the first character of a comment-end sequence) using\n\
e5d4f4dc 799this flag:\n\
2203e1e8 800 b means CHAR is part of comment sequence b.\n\
e5d4f4dc 801\n\
2203e1e8 802 p means CHAR is a prefix character for `backward-prefix-chars';\n\
e5d4f4dc 803 such characters are treated as whitespace when they occur\n\
8489eb67 804 between expressions.")
2fcea188 805 (char, s, table)
8489eb67
RS
806*/
807
808DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
809 /* I really don't know why this is interactive
810 help-form should at least be made useful whilst reading the second arg
811 */
812 "cSet syntax for character: \nsSet syntax for %s to: ",
813 0 /* See immediately above */)
814 (c, newentry, syntax_table)
815 Lisp_Object c, newentry, syntax_table;
816{
8ea151b2 817 register unsigned char *p;
8489eb67 818 register enum syntaxcode code;
247e20a8 819 int val;
8ea151b2 820 Lisp_Object match;
8489eb67
RS
821
822 CHECK_NUMBER (c, 0);
823 CHECK_STRING (newentry, 1);
8ea151b2 824
265a9e55 825 if (NILP (syntax_table))
8489eb67
RS
826 syntax_table = current_buffer->syntax_table;
827 else
8ea151b2 828 check_syntax_table (syntax_table);
8489eb67
RS
829
830 p = XSTRING (newentry)->data;
831 code = (enum syntaxcode) syntax_spec_code[*p++];
832 if (((int) code & 0377) == 0377)
2e34157c 833 error ("invalid syntax description letter: %c", p[-1]);
8489eb67 834
8ea151b2
RS
835 if (code == Sinherit)
836 {
e704cb4b 837 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), Qnil);
8ea151b2
RS
838 return Qnil;
839 }
840
841 if (*p)
d1be9f0f 842 {
93da5fff
KH
843 int len;
844 int character = STRING_CHAR_AND_LENGTH (p, XSTRING (newentry)->size - 1,
845 len);
846 XSETINT (match, character);
d1be9f0f
RS
847 if (XFASTINT (match) == ' ')
848 match = Qnil;
93da5fff 849 p += len;
d1be9f0f
RS
850 }
851 else
8ea151b2 852 match = Qnil;
8489eb67 853
8ea151b2 854 val = (int) code;
8489eb67
RS
855 while (*p)
856 switch (*p++)
857 {
858 case '1':
247e20a8 859 val |= 1 << 16;
8489eb67
RS
860 break;
861
862 case '2':
247e20a8 863 val |= 1 << 17;
8489eb67
RS
864 break;
865
866 case '3':
247e20a8 867 val |= 1 << 18;
8489eb67
RS
868 break;
869
870 case '4':
247e20a8 871 val |= 1 << 19;
8489eb67
RS
872 break;
873
874 case 'p':
247e20a8 875 val |= 1 << 20;
8489eb67 876 break;
e5d4f4dc
RS
877
878 case 'b':
247e20a8 879 val |= 1 << 21;
e5d4f4dc 880 break;
8489eb67
RS
881 }
882
93da5fff
KH
883 if (val < XVECTOR (Vsyntax_code_object)->size && NILP (match))
884 newentry = XVECTOR (Vsyntax_code_object)->contents[val];
885 else
886 /* Since we can't use a shared object, let's make a new one. */
887 newentry = Fcons (make_number (val), match);
888
e704cb4b 889 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), newentry);
8489eb67
RS
890
891 return Qnil;
892}
893\f
894/* Dump syntax table to buffer in human-readable format */
895
340f92b5 896static void
8489eb67
RS
897describe_syntax (value)
898 Lisp_Object value;
899{
900 register enum syntaxcode code;
e5d4f4dc 901 char desc, match, start1, start2, end1, end2, prefix, comstyle;
8489eb67 902 char str[2];
8ea151b2 903 Lisp_Object first, match_lisp;
8489eb67
RS
904
905 Findent_to (make_number (16), make_number (1));
906
8ea151b2
RS
907 if (NILP (value))
908 {
93da5fff 909 insert_string ("default\n");
8ea151b2
RS
910 return;
911 }
912
908b7fea
KH
913 if (CHAR_TABLE_P (value))
914 {
915 insert_string ("deeper char-table ...\n");
916 return;
917 }
918
8ea151b2
RS
919 if (!CONSP (value))
920 {
93da5fff 921 insert_string ("invalid\n");
8ea151b2
RS
922 return;
923 }
924
925 first = XCONS (value)->car;
926 match_lisp = XCONS (value)->cdr;
927
928 if (!INTEGERP (first) || !(NILP (match_lisp) || INTEGERP (match_lisp)))
8489eb67 929 {
93da5fff 930 insert_string ("invalid\n");
8489eb67
RS
931 return;
932 }
933
e704cb4b 934 code = (enum syntaxcode) (XINT (first) & 0377);
8ea151b2
RS
935 start1 = (XINT (first) >> 16) & 1;
936 start2 = (XINT (first) >> 17) & 1;
937 end1 = (XINT (first) >> 18) & 1;
938 end2 = (XINT (first) >> 19) & 1;
939 prefix = (XINT (first) >> 20) & 1;
940 comstyle = (XINT (first) >> 21) & 1;
8489eb67
RS
941
942 if ((int) code < 0 || (int) code >= (int) Smax)
943 {
944 insert_string ("invalid");
945 return;
946 }
947 desc = syntax_code_spec[(int) code];
948
949 str[0] = desc, str[1] = 0;
950 insert (str, 1);
951
93da5fff
KH
952 if (NILP (match_lisp))
953 insert (" ", 1);
954 else
955 insert_char (XINT (match_lisp));
8489eb67 956
8489eb67
RS
957 if (start1)
958 insert ("1", 1);
959 if (start2)
960 insert ("2", 1);
961
962 if (end1)
963 insert ("3", 1);
964 if (end2)
965 insert ("4", 1);
966
967 if (prefix)
968 insert ("p", 1);
e5d4f4dc
RS
969 if (comstyle)
970 insert ("b", 1);
8489eb67
RS
971
972 insert_string ("\twhich means: ");
973
0220c518 974 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
975 {
976 case Swhitespace:
977 insert_string ("whitespace"); break;
978 case Spunct:
979 insert_string ("punctuation"); break;
980 case Sword:
981 insert_string ("word"); break;
982 case Ssymbol:
983 insert_string ("symbol"); break;
984 case Sopen:
985 insert_string ("open"); break;
986 case Sclose:
987 insert_string ("close"); break;
988 case Squote:
989 insert_string ("quote"); break;
990 case Sstring:
991 insert_string ("string"); break;
992 case Smath:
993 insert_string ("math"); break;
994 case Sescape:
995 insert_string ("escape"); break;
996 case Scharquote:
997 insert_string ("charquote"); break;
998 case Scomment:
999 insert_string ("comment"); break;
1000 case Sendcomment:
1001 insert_string ("endcomment"); break;
1002 default:
1003 insert_string ("invalid");
1004 return;
1005 }
1006
8ea151b2 1007 if (!NILP (match_lisp))
8489eb67
RS
1008 {
1009 insert_string (", matches ");
8ea151b2 1010 insert_char (XINT (match_lisp));
8489eb67
RS
1011 }
1012
1013 if (start1)
1014 insert_string (",\n\t is the first character of a comment-start sequence");
1015 if (start2)
1016 insert_string (",\n\t is the second character of a comment-start sequence");
1017
1018 if (end1)
1019 insert_string (",\n\t is the first character of a comment-end sequence");
1020 if (end2)
1021 insert_string (",\n\t is the second character of a comment-end sequence");
e5d4f4dc
RS
1022 if (comstyle)
1023 insert_string (" (comment style b)");
1024
8489eb67
RS
1025 if (prefix)
1026 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
1027
1028 insert_string ("\n");
1029}
1030
340f92b5 1031static Lisp_Object
8489eb67
RS
1032describe_syntax_1 (vector)
1033 Lisp_Object vector;
1034{
1035 struct buffer *old = current_buffer;
1036 set_buffer_internal (XBUFFER (Vstandard_output));
e41819d4 1037 describe_vector (vector, Qnil, describe_syntax, 0, Qnil, Qnil, (int *) 0, 0);
93da5fff
KH
1038 while (! NILP (XCHAR_TABLE (vector)->parent))
1039 {
1040 vector = XCHAR_TABLE (vector)->parent;
1041 insert_string ("\nThe parent syntax table is:");
e41819d4
RS
1042 describe_vector (vector, Qnil, describe_syntax, 0, Qnil, Qnil,
1043 (int *) 0, 0);
93da5fff
KH
1044 }
1045
36cd82fe 1046 call0 (intern ("help-mode"));
8489eb67
RS
1047 set_buffer_internal (old);
1048 return Qnil;
1049}
1050
1051DEFUN ("describe-syntax", Fdescribe_syntax, Sdescribe_syntax, 0, 0, "",
1052 "Describe the syntax specifications in the syntax table.\n\
1053The descriptions are inserted in a buffer, which is then displayed.")
1054 ()
1055{
1056 internal_with_output_to_temp_buffer
1057 ("*Help*", describe_syntax_1, current_buffer->syntax_table);
1058
1059 return Qnil;
1060}
1061\f
195d1361
RS
1062int parse_sexp_ignore_comments;
1063
8489eb67
RS
1064/* Return the position across COUNT words from FROM.
1065 If that many words cannot be found before the end of the buffer, return 0.
1066 COUNT negative means scan backward and stop at word beginning. */
1067
dfcf069d 1068int
8489eb67
RS
1069scan_words (from, count)
1070 register int from, count;
1071{
1072 register int beg = BEGV;
1073 register int end = ZV;
6a140a74 1074 register int from_byte = CHAR_TO_BYTE (from);
93da5fff
KH
1075 register enum syntaxcode code;
1076 int ch0, ch1;
8489eb67
RS
1077
1078 immediate_quit = 1;
1079 QUIT;
1080
195d1361
RS
1081 SETUP_SYNTAX_TABLE (from, count);
1082
8489eb67
RS
1083 while (count > 0)
1084 {
1085 while (1)
1086 {
1087 if (from == end)
1088 {
1089 immediate_quit = 0;
1090 return 0;
1091 }
195d1361 1092 UPDATE_SYNTAX_TABLE_FORWARD (from);
6a140a74 1093 ch0 = FETCH_CHAR (from_byte);
93da5fff 1094 code = SYNTAX (ch0);
6a140a74 1095 INC_BOTH (from, from_byte);
8489eb67
RS
1096 if (words_include_escapes
1097 && (code == Sescape || code == Scharquote))
1098 break;
1099 if (code == Sword)
1100 break;
8489eb67 1101 }
93da5fff
KH
1102 /* Now CH0 is a character which begins a word and FROM is the
1103 position of the next character. */
8489eb67
RS
1104 while (1)
1105 {
1106 if (from == end) break;
195d1361 1107 UPDATE_SYNTAX_TABLE_FORWARD (from);
6a140a74 1108 ch1 = FETCH_CHAR (from_byte);
93da5fff 1109 code = SYNTAX (ch1);
8489eb67
RS
1110 if (!(words_include_escapes
1111 && (code == Sescape || code == Scharquote)))
93da5fff 1112 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
8489eb67 1113 break;
6a140a74 1114 INC_BOTH (from, from_byte);
93da5fff 1115 ch0 = ch1;
8489eb67
RS
1116 }
1117 count--;
1118 }
1119 while (count < 0)
1120 {
1121 while (1)
1122 {
1123 if (from == beg)
1124 {
1125 immediate_quit = 0;
1126 return 0;
1127 }
6a140a74 1128 DEC_BOTH (from, from_byte);
195d1361 1129 UPDATE_SYNTAX_TABLE_BACKWARD (from);
6a140a74 1130 ch1 = FETCH_CHAR (from_byte);
93da5fff 1131 code = SYNTAX (ch1);
8489eb67
RS
1132 if (words_include_escapes
1133 && (code == Sescape || code == Scharquote))
1134 break;
1135 if (code == Sword)
1136 break;
8489eb67 1137 }
93da5fff
KH
1138 /* Now CH1 is a character which ends a word and FROM is the
1139 position of it. */
8489eb67
RS
1140 while (1)
1141 {
6a140a74
RS
1142 int temp_byte;
1143
1144 if (from == beg)
1145 break;
1146 temp_byte = dec_bytepos (from_byte);
195d1361 1147 UPDATE_SYNTAX_TABLE_BACKWARD (from);
6a140a74 1148 ch0 = FETCH_CHAR (temp_byte);
93da5fff 1149 code = SYNTAX (ch0);
8489eb67
RS
1150 if (!(words_include_escapes
1151 && (code == Sescape || code == Scharquote)))
93da5fff 1152 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
8489eb67 1153 break;
6a140a74 1154 DEC_BOTH (from, from_byte);
93da5fff 1155 ch1 = ch0;
8489eb67
RS
1156 }
1157 count++;
1158 }
1159
1160 immediate_quit = 0;
1161
1162 return from;
1163}
1164
1165DEFUN ("forward-word", Fforward_word, Sforward_word, 1, 1, "p",
1166 "Move point forward ARG words (backward if ARG is negative).\n\
1167Normally returns t.\n\
1168If an edge of the buffer is reached, point is left there\n\
1169and nil is returned.")
1170 (count)
1171 Lisp_Object count;
1172{
1173 int val;
1174 CHECK_NUMBER (count, 0);
1175
6ec8bbd2 1176 if (!(val = scan_words (PT, XINT (count))))
8489eb67
RS
1177 {
1178 SET_PT (XINT (count) > 0 ? ZV : BEGV);
1179 return Qnil;
1180 }
1181 SET_PT (val);
1182 return Qt;
1183}
1184\f
195d1361
RS
1185Lisp_Object skip_chars ();
1186
1187DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
1188 "Move point forward, stopping before a char not in STRING, or at pos LIM.\n\
1189STRING is like the inside of a `[...]' in a regular expression\n\
1190except that `]' is never special and `\\' quotes `^', `-' or `\\'.\n\
1191Thus, with arg \"a-zA-Z\", this skips letters stopping before first nonletter.\n\
1192With arg \"^a-zA-Z\", skips nonletters stopping before first letter.\n\
1193Returns the distance traveled, either zero or positive.")
1194 (string, lim)
1195 Lisp_Object string, lim;
1196{
1197 return skip_chars (1, 0, string, lim);
1198}
1199
1200DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
1201 "Move point backward, stopping after a char not in STRING, or at pos LIM.\n\
1202See `skip-chars-forward' for details.\n\
1203Returns the distance traveled, either zero or negative.")
1204 (string, lim)
1205 Lisp_Object string, lim;
1206{
1207 return skip_chars (0, 0, string, lim);
1208}
1209
1210DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
1211 "Move point forward across chars in specified syntax classes.\n\
1212SYNTAX is a string of syntax code characters.\n\
1213Stop before a char whose syntax is not in SYNTAX, or at position LIM.\n\
1214If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
1215This function returns the distance traveled, either zero or positive.")
1216 (syntax, lim)
1217 Lisp_Object syntax, lim;
1218{
1219 return skip_chars (1, 1, syntax, lim);
1220}
1221
1222DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
1223 "Move point backward across chars in specified syntax classes.\n\
1224SYNTAX is a string of syntax code characters.\n\
1225Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.\n\
1226If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
1227This function returns the distance traveled, either zero or negative.")
1228 (syntax, lim)
1229 Lisp_Object syntax, lim;
1230{
1231 return skip_chars (0, 1, syntax, lim);
1232}
1233
6a140a74 1234static Lisp_Object
195d1361
RS
1235skip_chars (forwardp, syntaxp, string, lim)
1236 int forwardp, syntaxp;
1237 Lisp_Object string, lim;
1238{
1239 register unsigned char *p, *pend;
1240 register unsigned int c;
7e68b0ea 1241 register int ch;
195d1361 1242 unsigned char fastmap[0400];
7e68b0ea
RS
1243 /* If SYNTAXP is 0, STRING may contain multi-byte form of characters
1244 of which codes don't fit in FASTMAP. In that case, we set the
1245 first byte of multibyte form (i.e. base leading-code) in FASTMAP
1246 and set the actual ranges of characters in CHAR_RANGES. In the
1247 form "X-Y" of STRING, both X and Y must belong to the same
1248 character set because a range striding across character sets is
1249 meaningless. */
5df4982e 1250 int *char_ranges;
7e68b0ea 1251 int n_char_ranges = 0;
195d1361 1252 int negate = 0;
4101e6fe 1253 register int i, i_byte;
7e68b0ea 1254 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
4101e6fe 1255 int string_multibyte = STRING_MULTIBYTE (string);
195d1361
RS
1256
1257 CHECK_STRING (string, 0);
5df4982e 1258 char_ranges = (int *) alloca (XSTRING (string)->size * (sizeof (int)) * 2);
195d1361
RS
1259
1260 if (NILP (lim))
1261 XSETINT (lim, forwardp ? ZV : BEGV);
1262 else
6a140a74 1263 CHECK_NUMBER_COERCE_MARKER (lim, 0);
195d1361
RS
1264
1265 /* In any case, don't allow scan outside bounds of buffer. */
195d1361
RS
1266 if (XINT (lim) > ZV)
1267 XSETFASTINT (lim, ZV);
1268 if (XINT (lim) < BEGV)
1269 XSETFASTINT (lim, BEGV);
1270
195d1361
RS
1271 bzero (fastmap, sizeof fastmap);
1272
4101e6fe
RS
1273 i = 0, i_byte = 0;
1274
1275 if (i < XSTRING (string)->size
1276 && XSTRING (string)->data[0] == '^')
195d1361 1277 {
4101e6fe 1278 negate = 1; i++, i_byte++;
195d1361
RS
1279 }
1280
1281 /* Find the characters specified and set their elements of fastmap.
1282 If syntaxp, each character counts as itself.
1283 Otherwise, handle backslashes and ranges specially. */
1284
4101e6fe 1285 while (i < XSTRING (string)->size)
195d1361 1286 {
4101e6fe
RS
1287 int c_leading_code;
1288
1289 if (string_multibyte)
7e68b0ea 1290 {
4101e6fe
RS
1291 c_leading_code = XSTRING (string)->data[i_byte];
1292 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
7e68b0ea
RS
1293 }
1294 else
4101e6fe
RS
1295 c = c_leading_code = XSTRING (string)->data[i++];
1296
1297 /* Convert multibyteness between what the string has
1298 and what the buffer has. */
1299 if (multibyte)
b29ff194 1300 c = unibyte_char_to_multibyte (c);
4101e6fe
RS
1301 else
1302 c &= 0377;
1303
195d1361 1304 if (syntaxp)
4101e6fe 1305 fastmap[syntax_spec_code[c & 0377]] = 1;
195d1361
RS
1306 else
1307 {
1308 if (c == '\\')
1309 {
4101e6fe
RS
1310 if (i == XSTRING (string)->size)
1311 break;
1312
1313 if (string_multibyte)
1314 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1315 else
1316 c = XSTRING (string)->data[i++];
195d1361 1317 }
9828a477 1318 if (i < XSTRING (string)->size && XSTRING (string)->data[i] == '-')
195d1361 1319 {
4101e6fe
RS
1320 unsigned int c2;
1321
1322 /* Skip over the dash. */
1323 i++, i_byte++;
1324
1325 if (i == XSTRING (string)->size)
1326 break;
1327
1328 /* Get the end of the range. */
1329 if (string_multibyte)
1330 FETCH_STRING_CHAR_ADVANCE (c2, string, i, i_byte);
1331 else
1332 c2 = XSTRING (string)->data[i++];
7e68b0ea 1333
4101e6fe
RS
1334 if (SINGLE_BYTE_CHAR_P (c))
1335 while (c <= c2)
7e68b0ea
RS
1336 {
1337 fastmap[c] = 1;
1338 c++;
1339 }
1340 else
195d1361 1341 {
4101e6fe
RS
1342 fastmap[c_leading_code] = 1;
1343 if (c <= c2)
1344 {
1345 char_ranges[n_char_ranges++] = c;
1346 char_ranges[n_char_ranges++] = c2;
1347 }
195d1361 1348 }
195d1361
RS
1349 }
1350 else
7e68b0ea 1351 {
4101e6fe
RS
1352 fastmap[c_leading_code] = 1;
1353 if (!SINGLE_BYTE_CHAR_P (c))
7e68b0ea 1354 {
4101e6fe
RS
1355 char_ranges[n_char_ranges++] = c;
1356 char_ranges[n_char_ranges++] = c;
7e68b0ea
RS
1357 }
1358 }
195d1361
RS
1359 }
1360 }
1361
7e68b0ea
RS
1362 /* If ^ was the first character, complement the fastmap. In
1363 addition, as all multibyte characters have possibility of
1364 matching, set all entries for base leading codes, which is
1365 harmless even if SYNTAXP is 1. */
195d1361
RS
1366
1367 if (negate)
1368 for (i = 0; i < sizeof fastmap; i++)
7e68b0ea
RS
1369 {
1370 if (!multibyte || !BASE_LEADING_CODE_P (i))
1371 fastmap[i] ^= 1;
1372 else
1373 fastmap[i] = 1;
1374 }
195d1361
RS
1375
1376 {
1377 int start_point = PT;
1378 int pos = PT;
6a140a74 1379 int pos_byte = PT_BYTE;
195d1361
RS
1380
1381 immediate_quit = 1;
1382 if (syntaxp)
1383 {
1384 SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
1385 if (forwardp)
1386 {
7e68b0ea 1387 if (multibyte)
195d1361 1388 {
ee0cdb48
KH
1389 if (pos < XINT (lim))
1390 while (fastmap[(int) SYNTAX (FETCH_CHAR (pos_byte))])
1391 {
1392 /* Since we already checked for multibyteness,
1393 avoid using INC_BOTH which checks again. */
1394 INC_POS (pos_byte);
1395 pos++;
1396 if (pos >= XINT (lim))
1397 break;
1398 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1399 }
7e68b0ea
RS
1400 }
1401 else
1402 {
1403 while (pos < XINT (lim)
1404 && fastmap[(int) SYNTAX (FETCH_BYTE (pos))])
1405 {
1406 pos++;
1407 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1408 }
195d1361
RS
1409 }
1410 }
1411 else
1412 {
7e68b0ea 1413 if (multibyte)
195d1361 1414 {
7e68b0ea
RS
1415 while (pos > XINT (lim))
1416 {
6a140a74 1417 int savepos = pos_byte;
ef316cf0
RS
1418 /* Since we already checked for multibyteness,
1419 avoid using DEC_BOTH which checks again. */
1420 pos--;
1421 DEC_POS (pos_byte);
4c8535eb 1422 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
6a140a74 1423 if (!fastmap[(int) SYNTAX (FETCH_CHAR (pos_byte))])
7e68b0ea 1424 {
6a140a74
RS
1425 pos++;
1426 pos_byte = savepos;
7e68b0ea
RS
1427 break;
1428 }
7e68b0ea
RS
1429 }
1430 }
1431 else
1432 {
ee0cdb48
KH
1433 if (pos > XINT (lim))
1434 while (fastmap[(int) SYNTAX (FETCH_BYTE (pos - 1))])
1435 {
1436 pos--;
1437 if (pos <= XINT (lim))
4c8535eb 1438 break;
ee0cdb48
KH
1439 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
1440 }
195d1361
RS
1441 }
1442 }
1443 }
1444 else
1445 {
1446 if (forwardp)
1447 {
7e68b0ea 1448 if (multibyte)
6a140a74 1449 while (pos < XINT (lim) && fastmap[(c = FETCH_BYTE (pos_byte))])
7e68b0ea
RS
1450 {
1451 if (!BASE_LEADING_CODE_P (c))
6a140a74 1452 INC_BOTH (pos, pos_byte);
7e68b0ea
RS
1453 else if (n_char_ranges)
1454 {
1455 /* We much check CHAR_RANGES for a multibyte
1456 character. */
6a140a74 1457 ch = FETCH_MULTIBYTE_CHAR (pos_byte);
7e68b0ea
RS
1458 for (i = 0; i < n_char_ranges; i += 2)
1459 if ((ch >= char_ranges[i] && ch <= char_ranges[i + 1]))
1460 break;
1461 if (!(negate ^ (i < n_char_ranges)))
1462 break;
1463
6a140a74 1464 INC_BOTH (pos, pos_byte);
7e68b0ea
RS
1465 }
1466 else
1467 {
1468 if (!negate) break;
6a140a74 1469 INC_BOTH (pos, pos_byte);
7e68b0ea
RS
1470 }
1471 }
1472 else
1473 while (pos < XINT (lim) && fastmap[FETCH_BYTE (pos)])
1474 pos++;
195d1361
RS
1475 }
1476 else
1477 {
7e68b0ea
RS
1478 if (multibyte)
1479 while (pos > XINT (lim))
1480 {
6a140a74
RS
1481 int savepos = pos_byte;
1482 DEC_BOTH (pos, pos_byte);
1483 if (fastmap[(c = FETCH_BYTE (pos_byte))])
7e68b0ea
RS
1484 {
1485 if (!BASE_LEADING_CODE_P (c))
1486 ;
1487 else if (n_char_ranges)
1488 {
1489 /* We much check CHAR_RANGES for a multibyte
1490 character. */
6a140a74 1491 ch = FETCH_MULTIBYTE_CHAR (pos_byte);
7e68b0ea
RS
1492 for (i = 0; i < n_char_ranges; i += 2)
1493 if (ch >= char_ranges[i] && ch <= char_ranges[i + 1])
1494 break;
1495 if (!(negate ^ (i < n_char_ranges)))
1496 {
6a140a74
RS
1497 pos++;
1498 pos_byte = savepos;
7e68b0ea
RS
1499 break;
1500 }
1501 }
1502 else
1503 if (!negate)
1504 {
6a140a74
RS
1505 pos++;
1506 pos_byte = savepos;
7e68b0ea
RS
1507 break;
1508 }
1509 }
1510 else
1511 {
6a140a74
RS
1512 pos++;
1513 pos_byte = savepos;
7e68b0ea
RS
1514 break;
1515 }
1516 }
1517 else
1518 while (pos > XINT (lim) && fastmap[FETCH_BYTE (pos - 1)])
1519 pos--;
195d1361
RS
1520 }
1521 }
7e68b0ea 1522
6a140a74
RS
1523#if 0 /* Not needed now that a position in mid-character
1524 cannot be specified in Lisp. */
7e68b0ea
RS
1525 if (multibyte
1526 /* INC_POS or DEC_POS might have moved POS over LIM. */
1527 && (forwardp ? (pos > XINT (lim)) : (pos < XINT (lim))))
1528 pos = XINT (lim);
6a140a74 1529#endif
7e68b0ea 1530
6a140a74
RS
1531 if (! multibyte)
1532 pos_byte = pos;
1533
1534 SET_PT_BOTH (pos, pos_byte);
195d1361
RS
1535 immediate_quit = 0;
1536
1537 return make_number (PT - start_point);
1538 }
1539}
1540\f
b3cfe0c8
RS
1541DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
1542 "Move forward across up to N comments. If N is negative, move backward.\n\
b3cfe0c8 1543Stop scanning if we find something other than a comment or whitespace.\n\
c81a3712 1544Set point to where scanning stops.\n\
b3cfe0c8
RS
1545If N comments are found as expected, with nothing except whitespace\n\
1546between them, return t; otherwise return nil.")
1547 (count)
840f481c 1548 Lisp_Object count;
b3cfe0c8
RS
1549{
1550 register int from;
6a140a74 1551 int from_byte;
b3cfe0c8 1552 register int stop;
8ea151b2 1553 register int c, c1;
b3cfe0c8
RS
1554 register enum syntaxcode code;
1555 int comstyle = 0; /* style of comment encountered */
be720845 1556 int found;
840f481c 1557 int count1;
93da5fff 1558 int temp_pos;
6a140a74 1559 int out_charpos, out_bytepos;
840f481c
RS
1560
1561 CHECK_NUMBER (count, 0);
1562 count1 = XINT (count);
195d1361 1563 stop = count1 > 0 ? ZV : BEGV;
b3cfe0c8
RS
1564
1565 immediate_quit = 1;
1566 QUIT;
1567
1568 from = PT;
6a140a74 1569 from_byte = PT_BYTE;
b3cfe0c8 1570
195d1361 1571 SETUP_SYNTAX_TABLE (from, count1);
840f481c 1572 while (count1 > 0)
b3cfe0c8 1573 {
04882296 1574 do
b3cfe0c8 1575 {
04882296
KH
1576 if (from == stop)
1577 {
ef316cf0 1578 SET_PT_BOTH (from, from_byte);
b7e6e612 1579 immediate_quit = 0;
04882296
KH
1580 return Qnil;
1581 }
195d1361 1582 UPDATE_SYNTAX_TABLE_FORWARD (from);
6a140a74 1583 c = FETCH_CHAR (from_byte);
b3cfe0c8 1584 code = SYNTAX (c);
6a140a74 1585 INC_BOTH (from, from_byte);
b3cfe0c8
RS
1586 comstyle = 0;
1587 if (from < stop && SYNTAX_COMSTART_FIRST (c)
6a140a74 1588 && (c1 = FETCH_CHAR (from_byte),
8ea151b2 1589 SYNTAX_COMSTART_SECOND (c1)))
b3cfe0c8 1590 {
7fc8191e
RS
1591 /* We have encountered a comment start sequence and we
1592 are ignoring all text inside comments. We must record
b3cfe0c8
RS
1593 the comment style this sequence begins so that later,
1594 only a comment end of the same style actually ends
7fc8191e 1595 the comment section. */
b3cfe0c8 1596 code = Scomment;
8ea151b2 1597 comstyle = SYNTAX_COMMENT_STYLE (c1);
6a140a74 1598 INC_BOTH (from, from_byte);
b3cfe0c8 1599 }
04882296
KH
1600 }
1601 while (code == Swhitespace || code == Sendcomment);
6a140a74 1602
195d1361 1603 if (code != Scomment && code != Scomment_fence)
04882296
KH
1604 {
1605 immediate_quit = 0;
6a140a74 1606 DEC_BOTH (from, from_byte);
ef316cf0 1607 SET_PT_BOTH (from, from_byte);
04882296
KH
1608 return Qnil;
1609 }
1610 /* We're at the start of a comment. */
1611 while (1)
1612 {
1613 if (from == stop)
b3cfe0c8
RS
1614 {
1615 immediate_quit = 0;
ef316cf0 1616 SET_PT_BOTH (from, from_byte);
b3cfe0c8
RS
1617 return Qnil;
1618 }
195d1361 1619 UPDATE_SYNTAX_TABLE_FORWARD (from);
6a140a74
RS
1620 c = FETCH_CHAR (from_byte);
1621 INC_BOTH (from, from_byte);
04882296
KH
1622 if (SYNTAX (c) == Sendcomment
1623 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1624 /* we have encountered a comment end of the same style
1625 as the comment sequence which began this comment
1626 section */
1627 break;
195d1361
RS
1628 if (SYNTAX (c) == Scomment_fence
1629 && comstyle == ST_COMMENT_STYLE)
1630 /* we have encountered a comment end of the same style
1631 as the comment sequence which began this comment
1632 section. */
1633 break;
04882296 1634 if (from < stop && SYNTAX_COMEND_FIRST (c)
6a140a74 1635 && (c1 = FETCH_CHAR (from_byte),
8ea151b2 1636 SYNTAX_COMEND_SECOND (c1))
04882296
KH
1637 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1638 /* we have encountered a comment end of the same style
1639 as the comment sequence which began this comment
1640 section */
6a140a74
RS
1641 {
1642 INC_BOTH (from, from_byte);
1643 break;
1644 }
b3cfe0c8 1645 }
04882296 1646 /* We have skipped one comment. */
840f481c 1647 count1--;
b3cfe0c8
RS
1648 }
1649
840f481c 1650 while (count1 < 0)
b3cfe0c8 1651 {
b9145dbb 1652 while (1)
b3cfe0c8
RS
1653 {
1654 int quoted;
b9145dbb
RS
1655 if (from <= stop)
1656 {
6a140a74 1657 SET_PT_BOTH (BEGV, BEGV_BYTE);
b9145dbb
RS
1658 immediate_quit = 0;
1659 return Qnil;
1660 }
b3cfe0c8 1661
6a140a74
RS
1662 DEC_BOTH (from, from_byte);
1663 quoted = char_quoted (from, from_byte);
b3cfe0c8 1664 if (quoted)
195d1361 1665 {
6a140a74
RS
1666 DEC_BOTH (from, from_byte);
1667 goto leave;
195d1361
RS
1668 }
1669 UPDATE_SYNTAX_TABLE_BACKWARD (from);
6a140a74 1670 c = FETCH_CHAR (from_byte);
b3cfe0c8
RS
1671 code = SYNTAX (c);
1672 comstyle = 0;
7fc8191e
RS
1673 if (code == Sendcomment)
1674 comstyle = SYNTAX_COMMENT_STYLE (c);
ef316cf0 1675 temp_pos = dec_bytepos (from_byte);
b3cfe0c8 1676 if (from > stop && SYNTAX_COMEND_SECOND (c)
93da5fff 1677 && (c1 = FETCH_CHAR (temp_pos),
8ea151b2 1678 SYNTAX_COMEND_FIRST (c1))
6a140a74 1679 && !char_quoted (from - 1, temp_pos))
b3cfe0c8 1680 {
7fc8191e 1681 /* We must record the comment style encountered so that
b3cfe0c8 1682 later, we can match only the proper comment begin
7fc8191e 1683 sequence of the same style. */
b3cfe0c8 1684 code = Sendcomment;
8ea151b2 1685 comstyle = SYNTAX_COMMENT_STYLE (c1);
6a140a74 1686 DEC_BOTH (from, from_byte);
b3cfe0c8 1687 }
ae03af34
RS
1688 if (from > stop && SYNTAX_COMSTART_SECOND (c)
1689 && (c1 = FETCH_CHAR (temp_pos),
1690 SYNTAX_COMSTART_FIRST (c1))
6a140a74 1691 && !char_quoted (from - 1, temp_pos))
ae03af34
RS
1692 {
1693 /* We must record the comment style encountered so that
1694 later, we can match only the proper comment begin
1695 sequence of the same style. */
1696 code = Scomment;
6a140a74 1697 DEC_BOTH (from, from_byte);
ae03af34 1698 }
b3cfe0c8 1699
195d1361
RS
1700 if (code == Scomment_fence)
1701 {
1702 /* Skip until first preceding unquoted comment_fence. */
6a140a74 1703 int found = 0, ini = from, ini_byte = from_byte;
195d1361 1704
6a140a74 1705 while (1)
195d1361 1706 {
6a140a74
RS
1707 DEC_BOTH (from, from_byte);
1708 if (from == stop)
1709 break;
195d1361 1710 UPDATE_SYNTAX_TABLE_BACKWARD (from);
6a140a74
RS
1711 c = FETCH_CHAR (from_byte);
1712 if (SYNTAX (c) == Scomment_fence
1713 && !char_quoted (from, from_byte))
195d1361
RS
1714 {
1715 found = 1;
1716 break;
1717 }
1718 }
1719 if (found == 0)
1720 {
1721 from = ini; /* Set point to ini + 1. */
6a140a74 1722 from_byte = ini_byte;
195d1361
RS
1723 goto leave;
1724 }
1725 }
1726 else if (code == Sendcomment)
b3cfe0c8 1727 {
6a140a74
RS
1728 found = back_comment (from, from_byte, stop, comstyle,
1729 &out_charpos, &out_bytepos);
1730 if (found != -1)
1731 from = out_charpos, from_byte = out_bytepos;
7fc8191e
RS
1732 /* We have skipped one comment. */
1733 break;
b3cfe0c8 1734 }
195d1361 1735 else if (code != Swhitespace && code != Scomment)
b3cfe0c8 1736 {
195d1361 1737 leave:
b3cfe0c8 1738 immediate_quit = 0;
6a140a74 1739 INC_BOTH (from, from_byte);
ef316cf0 1740 SET_PT_BOTH (from, from_byte);
b3cfe0c8
RS
1741 return Qnil;
1742 }
1743 }
1744
840f481c 1745 count1++;
b3cfe0c8
RS
1746 }
1747
ef316cf0 1748 SET_PT_BOTH (from, from_byte);
b3cfe0c8
RS
1749 immediate_quit = 0;
1750 return Qt;
1751}
1752\f
6a140a74 1753static Lisp_Object
8489eb67
RS
1754scan_lists (from, count, depth, sexpflag)
1755 register int from;
1756 int count, depth, sexpflag;
1757{
1758 Lisp_Object val;
195d1361 1759 register int stop = count > 0 ? ZV : BEGV;
93da5fff
KH
1760 register int c, c1;
1761 int stringterm;
8489eb67
RS
1762 int quoted;
1763 int mathexit = 0;
93da5fff 1764 register enum syntaxcode code, temp_code;
195d1361 1765 int min_depth = depth; /* Err out if depth gets less than this. */
e5d4f4dc 1766 int comstyle = 0; /* style of comment encountered */
93da5fff 1767 int temp_pos;
7bf5e9e4 1768 int last_good = from;
195d1361 1769 int found;
6a140a74
RS
1770 int from_byte = CHAR_TO_BYTE (from);
1771 int out_bytepos, out_charpos;
8489eb67
RS
1772
1773 if (depth > 0) min_depth = 0;
1774
1775 immediate_quit = 1;
1776 QUIT;
1777
195d1361 1778 SETUP_SYNTAX_TABLE (from, count);
8489eb67
RS
1779 while (count > 0)
1780 {
8489eb67
RS
1781 while (from < stop)
1782 {
195d1361 1783 UPDATE_SYNTAX_TABLE_FORWARD (from);
6a140a74 1784 c = FETCH_CHAR (from_byte);
e5d4f4dc 1785 code = SYNTAX (c);
7bf5e9e4
RS
1786 if (depth == min_depth)
1787 last_good = from;
6a140a74 1788 INC_BOTH (from, from_byte);
195d1361 1789 UPDATE_SYNTAX_TABLE_FORWARD (from);
8489eb67 1790 if (from < stop && SYNTAX_COMSTART_FIRST (c)
6a140a74 1791 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from_byte))
8489eb67 1792 && parse_sexp_ignore_comments)
e5d4f4dc
RS
1793 {
1794 /* we have encountered a comment start sequence and we
195d1361 1795 are ignoring all text inside comments. We must record
e5d4f4dc
RS
1796 the comment style this sequence begins so that later,
1797 only a comment end of the same style actually ends
1798 the comment section */
1799 code = Scomment;
6a140a74
RS
1800 comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from_byte));
1801 INC_BOTH (from, from_byte);
e5d4f4dc
RS
1802 }
1803
195d1361 1804 UPDATE_SYNTAX_TABLE_FORWARD (from);
8489eb67
RS
1805 if (SYNTAX_PREFIX (c))
1806 continue;
1807
0220c518 1808 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
1809 {
1810 case Sescape:
1811 case Scharquote:
1812 if (from == stop) goto lose;
6a140a74 1813 INC_BOTH (from, from_byte);
8489eb67
RS
1814 /* treat following character as a word constituent */
1815 case Sword:
1816 case Ssymbol:
1817 if (depth || !sexpflag) break;
195d1361 1818 /* This word counts as a sexp; return at end of it. */
8489eb67
RS
1819 while (from < stop)
1820 {
195d1361 1821 UPDATE_SYNTAX_TABLE_FORWARD (from);
6a140a74 1822 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from_byte))))
8489eb67
RS
1823 {
1824 case Scharquote:
1825 case Sescape:
6a140a74 1826 INC_BOTH (from, from_byte);
8489eb67
RS
1827 if (from == stop) goto lose;
1828 break;
1829 case Sword:
1830 case Ssymbol:
1831 case Squote:
1832 break;
1833 default:
1834 goto done;
1835 }
6a140a74 1836 INC_BOTH (from, from_byte);
8489eb67
RS
1837 }
1838 goto done;
1839
1840 case Scomment:
195d1361 1841 case Scomment_fence:
8489eb67
RS
1842 if (!parse_sexp_ignore_comments) break;
1843 while (1)
1844 {
c01f7ec0
RS
1845 if (from == stop)
1846 {
1847 if (depth == 0)
1848 goto done;
1849 goto lose;
1850 }
195d1361 1851 UPDATE_SYNTAX_TABLE_FORWARD (from);
6a140a74 1852 c = FETCH_CHAR (from_byte);
195d1361
RS
1853 if (code == Scomment
1854 ? (SYNTAX (c) == Sendcomment
1855 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1856 : (SYNTAX (c) == Scomment_fence))
e5d4f4dc
RS
1857 /* we have encountered a comment end of the same style
1858 as the comment sequence which began this comment
1859 section */
8489eb67 1860 break;
6a140a74 1861 INC_BOTH (from, from_byte);
8489eb67 1862 if (from < stop && SYNTAX_COMEND_FIRST (c)
6a140a74 1863 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from_byte))
195d1361
RS
1864 && SYNTAX_COMMENT_STYLE (c) == comstyle
1865 && code == Scomment)
e5d4f4dc
RS
1866 /* we have encountered a comment end of the same style
1867 as the comment sequence which began this comment
1868 section */
6a140a74
RS
1869 {
1870 INC_BOTH (from, from_byte);
1871 break;
1872 }
8489eb67
RS
1873 }
1874 break;
1875
1876 case Smath:
1877 if (!sexpflag)
1878 break;
6a140a74
RS
1879 if (from != stop && c == FETCH_CHAR (from_byte))
1880 {
1881 INC_BOTH (from, from_byte);
1882 }
8489eb67
RS
1883 if (mathexit)
1884 {
1885 mathexit = 0;
1886 goto close1;
1887 }
1888 mathexit = 1;
1889
1890 case Sopen:
1891 if (!++depth) goto done;
1892 break;
1893
1894 case Sclose:
1895 close1:
1896 if (!--depth) goto done;
1897 if (depth < min_depth)
7bf5e9e4
RS
1898 Fsignal (Qscan_error,
1899 Fcons (build_string ("Containing expression ends prematurely"),
1900 Fcons (make_number (last_good),
1901 Fcons (make_number (from), Qnil))));
8489eb67
RS
1902 break;
1903
1904 case Sstring:
195d1361 1905 case Sstring_fence:
ef316cf0 1906 temp_pos = dec_bytepos (from_byte);
93da5fff 1907 stringterm = FETCH_CHAR (temp_pos);
8489eb67
RS
1908 while (1)
1909 {
1910 if (from >= stop) goto lose;
195d1361
RS
1911 UPDATE_SYNTAX_TABLE_FORWARD (from);
1912 if (code == Sstring
6a140a74
RS
1913 ? (FETCH_CHAR (from_byte) == stringterm)
1914 : SYNTAX (FETCH_CHAR (from_byte)) == Sstring_fence)
195d1361 1915 break;
6a140a74 1916 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from_byte))))
8489eb67
RS
1917 {
1918 case Scharquote:
1919 case Sescape:
6a140a74 1920 INC_BOTH (from, from_byte);
8489eb67 1921 }
6a140a74 1922 INC_BOTH (from, from_byte);
8489eb67 1923 }
6a140a74 1924 INC_BOTH (from, from_byte);
8489eb67
RS
1925 if (!depth && sexpflag) goto done;
1926 break;
1927 }
1928 }
1929
1930 /* Reached end of buffer. Error if within object, return nil if between */
1931 if (depth) goto lose;
1932
1933 immediate_quit = 0;
1934 return Qnil;
1935
1936 /* End of object reached */
1937 done:
1938 count--;
1939 }
1940
1941 while (count < 0)
1942 {
8489eb67
RS
1943 while (from > stop)
1944 {
6a140a74 1945 DEC_BOTH (from, from_byte);
195d1361 1946 UPDATE_SYNTAX_TABLE_BACKWARD (from);
6a140a74 1947 c = FETCH_CHAR (from_byte);
8489eb67 1948 code = SYNTAX (c);
7bf5e9e4
RS
1949 if (depth == min_depth)
1950 last_good = from;
7fc8191e
RS
1951 comstyle = 0;
1952 if (code == Sendcomment)
1953 comstyle = SYNTAX_COMMENT_STYLE (c);
6a140a74 1954 temp_pos = from_byte;
ef316cf0
RS
1955 if (! NILP (current_buffer->enable_multibyte_characters))
1956 DEC_POS (temp_pos);
1957 else
1958 temp_pos--;
8489eb67 1959 if (from > stop && SYNTAX_COMEND_SECOND (c)
93da5fff 1960 && (c1 = FETCH_CHAR (temp_pos), SYNTAX_COMEND_FIRST (c1))
8489eb67 1961 && parse_sexp_ignore_comments)
e5d4f4dc
RS
1962 {
1963 /* we must record the comment style encountered so that
1964 later, we can match only the proper comment begin
1965 sequence of the same style */
1966 code = Sendcomment;
93da5fff 1967 comstyle = SYNTAX_COMMENT_STYLE (c1);
6a140a74 1968 DEC_BOTH (from, from_byte);
e5d4f4dc
RS
1969 }
1970
9828a477
KH
1971 /* Quoting turns anything except a comment-ender
1972 into a word character. */
1973 if (code != Sendcomment && char_quoted (from, from_byte))
1974 code = Sword;
1975 else if (SYNTAX_PREFIX (c))
8489eb67
RS
1976 continue;
1977
9828a477 1978 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
1979 {
1980 case Sword:
1981 case Ssymbol:
9828a477
KH
1982 case Sescape:
1983 case Scharquote:
8489eb67 1984 if (depth || !sexpflag) break;
195d1361
RS
1985 /* This word counts as a sexp; count object finished
1986 after passing it. */
8489eb67
RS
1987 while (from > stop)
1988 {
6a140a74 1989 temp_pos = from_byte;
ef316cf0
RS
1990 if (! NILP (current_buffer->enable_multibyte_characters))
1991 DEC_POS (temp_pos);
1992 else
1993 temp_pos--;
6a140a74 1994 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
9828a477
KH
1995 c1 = FETCH_CHAR (temp_pos);
1996 temp_code = SYNTAX (c1);
1997 /* Don't allow comment-end to be quoted. */
1998 if (temp_code == Sendcomment)
1999 goto done2;
6a140a74 2000 quoted = char_quoted (from - 1, temp_pos);
8489eb67 2001 if (quoted)
93da5fff 2002 {
6a140a74 2003 DEC_BOTH (from, from_byte);
ef316cf0 2004 temp_pos = dec_bytepos (temp_pos);
6a140a74 2005 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
93da5fff
KH
2006 }
2007 c1 = FETCH_CHAR (temp_pos);
2008 temp_code = SYNTAX (c1);
2009 if (! (quoted || temp_code == Sword
2010 || temp_code == Ssymbol
2011 || temp_code == Squote))
8489eb67 2012 goto done2;
6a140a74 2013 DEC_BOTH (from, from_byte);
8489eb67
RS
2014 }
2015 goto done2;
2016
2017 case Smath:
2018 if (!sexpflag)
2019 break;
ef316cf0 2020 temp_pos = dec_bytepos (from_byte);
6a140a74 2021 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
93da5fff 2022 if (from != stop && c == FETCH_CHAR (temp_pos))
6a140a74 2023 DEC_BOTH (from, from_byte);
8489eb67
RS
2024 if (mathexit)
2025 {
2026 mathexit = 0;
2027 goto open2;
2028 }
2029 mathexit = 1;
2030
2031 case Sclose:
2032 if (!++depth) goto done2;
2033 break;
2034
2035 case Sopen:
2036 open2:
2037 if (!--depth) goto done2;
2038 if (depth < min_depth)
7bf5e9e4
RS
2039 Fsignal (Qscan_error,
2040 Fcons (build_string ("Containing expression ends prematurely"),
2041 Fcons (make_number (last_good),
2042 Fcons (make_number (from), Qnil))));
8489eb67
RS
2043 break;
2044
2045 case Sendcomment:
2046 if (!parse_sexp_ignore_comments)
2047 break;
6a140a74
RS
2048 found = back_comment (from, from_byte, stop, comstyle,
2049 &out_charpos, &out_bytepos);
2050 if (found != -1)
2051 from = out_charpos, from_byte = out_bytepos;
8489eb67
RS
2052 break;
2053
195d1361
RS
2054 case Scomment_fence:
2055 case Sstring_fence:
2056 while (1)
2057 {
6a140a74 2058 DEC_BOTH (from, from_byte);
195d1361
RS
2059 if (from == stop) goto lose;
2060 UPDATE_SYNTAX_TABLE_BACKWARD (from);
6a140a74
RS
2061 if (!char_quoted (from, from_byte)
2062 && SYNTAX (FETCH_CHAR (from_byte)) == code)
195d1361
RS
2063 break;
2064 }
2065 if (code == Sstring_fence && !depth && sexpflag) goto done2;
2066 break;
2067
8489eb67 2068 case Sstring:
6a140a74 2069 stringterm = FETCH_CHAR (from_byte);
8489eb67
RS
2070 while (1)
2071 {
2072 if (from == stop) goto lose;
6a140a74 2073 temp_pos = from_byte;
ef316cf0
RS
2074 if (! NILP (current_buffer->enable_multibyte_characters))
2075 DEC_POS (temp_pos);
2076 else
2077 temp_pos--;
6a140a74
RS
2078 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2079 if (!char_quoted (from - 1, temp_pos)
93da5fff 2080 && stringterm == FETCH_CHAR (temp_pos))
8489eb67 2081 break;
6a140a74 2082 DEC_BOTH (from, from_byte);
8489eb67 2083 }
6a140a74 2084 DEC_BOTH (from, from_byte);
8489eb67
RS
2085 if (!depth && sexpflag) goto done2;
2086 break;
2087 }
2088 }
2089
2090 /* Reached start of buffer. Error if within object, return nil if between */
2091 if (depth) goto lose;
2092
2093 immediate_quit = 0;
2094 return Qnil;
2095
2096 done2:
2097 count++;
2098 }
2099
2100
2101 immediate_quit = 0;
1e142fb7 2102 XSETFASTINT (val, from);
8489eb67
RS
2103 return val;
2104
2105 lose:
7bf5e9e4
RS
2106 Fsignal (Qscan_error,
2107 Fcons (build_string ("Unbalanced parentheses"),
2108 Fcons (make_number (last_good),
2109 Fcons (make_number (from), Qnil))));
2110
8489eb67
RS
2111 /* NOTREACHED */
2112}
2113
8489eb67
RS
2114DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
2115 "Scan from character number FROM by COUNT lists.\n\
2116Returns the character number of the position thus found.\n\
2117\n\
2118If DEPTH is nonzero, paren depth begins counting from that value,\n\
2119only places where the depth in parentheses becomes zero\n\
2120are candidates for stopping; COUNT such places are counted.\n\
2121Thus, a positive value for DEPTH means go out levels.\n\
2122\n\
2123Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
2124\n\
2125If the beginning or end of (the accessible part of) the buffer is reached\n\
2126and the depth is wrong, an error is signaled.\n\
2127If the depth is right but the count is not used up, nil is returned.")
2128 (from, count, depth)
2129 Lisp_Object from, count, depth;
2130{
2131 CHECK_NUMBER (from, 0);
2132 CHECK_NUMBER (count, 1);
2133 CHECK_NUMBER (depth, 2);
2134
2135 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
2136}
2137
2138DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
2139 "Scan from character number FROM by COUNT balanced expressions.\n\
2140If COUNT is negative, scan backwards.\n\
2141Returns the character number of the position thus found.\n\
2142\n\
2143Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
2144\n\
2145If the beginning or end of (the accessible part of) the buffer is reached\n\
2146in the middle of a parenthetical grouping, an error is signaled.\n\
2147If the beginning or end is reached between groupings\n\
2148but before count is used up, nil is returned.")
2149 (from, count)
2150 Lisp_Object from, count;
2151{
2152 CHECK_NUMBER (from, 0);
2153 CHECK_NUMBER (count, 1);
2154
2155 return scan_lists (XINT (from), XINT (count), 0, 1);
2156}
2157
2158DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
2159 0, 0, 0,
2160 "Move point backward over any number of chars with prefix syntax.\n\
2161This includes chars with \"quote\" or \"prefix\" syntax (' or p).")
2162 ()
2163{
2164 int beg = BEGV;
6a140a74
RS
2165 int opoint = PT;
2166 int opoint_byte = PT_BYTE;
6ec8bbd2 2167 int pos = PT;
6a140a74 2168 int pos_byte = PT_BYTE;
93da5fff 2169 int c;
93da5fff 2170
195d1361
RS
2171 if (pos > beg)
2172 {
2173 SETUP_SYNTAX_TABLE (pos, -1);
2174 }
8489eb67 2175
6a140a74
RS
2176 DEC_BOTH (pos, pos_byte);
2177
1fd3172d 2178 while (!char_quoted (pos, pos_byte)
195d1361 2179 /* Previous statement updates syntax table. */
6a140a74 2180 && ((c = FETCH_CHAR (pos_byte), SYNTAX (c) == Squote)
93da5fff
KH
2181 || SYNTAX_PREFIX (c)))
2182 {
1fd3172d
RS
2183 opoint = pos;
2184 opoint_byte = pos_byte;
2185
2186 if (pos + 1 > beg)
2187 DEC_BOTH (pos, pos_byte);
93da5fff 2188 }
8489eb67 2189
6a140a74 2190 SET_PT_BOTH (opoint, opoint_byte);
8489eb67
RS
2191
2192 return Qnil;
2193}
2194\f
6a140a74 2195/* Parse forward from FROM / FROM_BYTE to END,
e5d4f4dc
RS
2196 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
2197 and return a description of the state of the parse at END.
c81a3712 2198 If STOPBEFORE is nonzero, stop at the start of an atom.
644ea4df
RS
2199 If COMMENTSTOP is 1, stop at the start of a comment.
2200 If COMMENTSTOP is -1, stop at the start or end of a comment,
2201 after the beginning of a string, or after the end of a string. */
8489eb67 2202
340f92b5 2203static void
6a140a74 2204scan_sexps_forward (stateptr, from, from_byte, end, targetdepth,
c81a3712 2205 stopbefore, oldstate, commentstop)
e5d4f4dc 2206 struct lisp_parse_state *stateptr;
8489eb67
RS
2207 register int from;
2208 int end, targetdepth, stopbefore;
2209 Lisp_Object oldstate;
c81a3712 2210 int commentstop;
8489eb67
RS
2211{
2212 struct lisp_parse_state state;
2213
2214 register enum syntaxcode code;
2215 struct level { int last, prev; };
2216 struct level levelstart[100];
2217 register struct level *curlevel = levelstart;
2218 struct level *endlevel = levelstart + 100;
93da5fff 2219 int prev;
8489eb67
RS
2220 register int depth; /* Paren depth of current scanning location.
2221 level - levelstart equals this except
2222 when the depth becomes negative. */
2223 int mindepth; /* Lowest DEPTH value seen. */
2224 int start_quoted = 0; /* Nonzero means starting after a char quote */
2225 Lisp_Object tem;
93da5fff 2226 int prev_from; /* Keep one character before FROM. */
6a140a74 2227 int prev_from_byte;
1fd3172d 2228 int prev_from_syntax;
195d1361
RS
2229 int boundary_stop = commentstop == -1;
2230 int nofence;
93da5fff
KH
2231
2232 prev_from = from;
6a140a74
RS
2233 prev_from_byte = from_byte;
2234 if (from != BEGV)
2235 DEC_BOTH (prev_from, prev_from_byte);
93da5fff
KH
2236
2237 /* Use this macro instead of `from++'. */
6a140a74
RS
2238#define INC_FROM \
2239do { prev_from = from; \
2240 prev_from_byte = from_byte; \
1fd3172d
RS
2241 prev_from_syntax \
2242 = SYNTAX_WITH_FLAGS (FETCH_CHAR (prev_from_byte)); \
ef316cf0 2243 INC_BOTH (from, from_byte); \
1fd3172d 2244 UPDATE_SYNTAX_TABLE_FORWARD (from); \
6a140a74 2245 } while (0)
8489eb67
RS
2246
2247 immediate_quit = 1;
2248 QUIT;
2249
265a9e55 2250 if (NILP (oldstate))
8489eb67
RS
2251 {
2252 depth = 0;
2253 state.instring = -1;
2254 state.incomment = 0;
195d1361
RS
2255 state.comstyle = 0; /* comment style a by default. */
2256 state.comstr_start = -1; /* no comment/string seen. */
8489eb67
RS
2257 }
2258 else
2259 {
2260 tem = Fcar (oldstate);
265a9e55 2261 if (!NILP (tem))
8489eb67
RS
2262 depth = XINT (tem);
2263 else
2264 depth = 0;
2265
2266 oldstate = Fcdr (oldstate);
2267 oldstate = Fcdr (oldstate);
2268 oldstate = Fcdr (oldstate);
2269 tem = Fcar (oldstate);
195d1361
RS
2270 /* Check whether we are inside string_fence-style string: */
2271 state.instring = ( !NILP (tem)
2272 ? ( INTEGERP (tem) ? XINT (tem) : ST_STRING_STYLE)
2273 : -1);
8489eb67
RS
2274
2275 oldstate = Fcdr (oldstate);
2276 tem = Fcar (oldstate);
265a9e55 2277 state.incomment = !NILP (tem);
8489eb67
RS
2278
2279 oldstate = Fcdr (oldstate);
2280 tem = Fcar (oldstate);
265a9e55 2281 start_quoted = !NILP (tem);
e5d4f4dc
RS
2282
2283 /* if the eight element of the list is nil, we are in comment
195d1361
RS
2284 style a. If it is non-nil, we are in comment style b */
2285 oldstate = Fcdr (oldstate);
e5d4f4dc 2286 oldstate = Fcdr (oldstate);
195d1361
RS
2287 tem = Fcar (oldstate);
2288 state.comstyle = NILP (tem) ? 0 : ( EQ (tem, Qsyntax_table)
2289 ? ST_COMMENT_STYLE : 1 );
2290
e5d4f4dc 2291 oldstate = Fcdr (oldstate);
e5d4f4dc 2292 tem = Fcar (oldstate);
195d1361 2293 state.comstr_start = NILP (tem) ? -1 : XINT (tem) ;
8489eb67
RS
2294 }
2295 state.quoted = 0;
2296 mindepth = depth;
2297
2298 curlevel->prev = -1;
2299 curlevel->last = -1;
2300
195d1361 2301 /* Enter the loop at a place appropriate for initial state. */
8489eb67
RS
2302
2303 if (state.incomment) goto startincomment;
2304 if (state.instring >= 0)
2305 {
195d1361 2306 nofence = state.instring != ST_STRING_STYLE;
8489eb67
RS
2307 if (start_quoted) goto startquotedinstring;
2308 goto startinstring;
2309 }
2310 if (start_quoted) goto startquoted;
2311
1fd3172d
RS
2312
2313 SETUP_SYNTAX_TABLE (prev_from, 1);
2314 prev_from_syntax = SYNTAX_WITH_FLAGS (FETCH_CHAR (prev_from_byte));
2315 UPDATE_SYNTAX_TABLE_FORWARD (from);
2316
8489eb67
RS
2317 while (from < end)
2318 {
93da5fff 2319 INC_FROM;
1fd3172d 2320 code = prev_from_syntax & 0xff;
4c920633 2321
8f9dc2ed 2322 if (code == Scomment)
195d1361 2323 state.comstr_start = prev_from;
4c920633 2324 else if (code == Scomment_fence)
e5d4f4dc
RS
2325 {
2326 /* Record the comment style we have entered so that only
2327 the comment-end sequence of the same style actually
2328 terminates the comment section. */
195d1361
RS
2329 state.comstyle = ( code == Scomment_fence
2330 ? ST_COMMENT_STYLE
6a140a74 2331 : SYNTAX_COMMENT_STYLE (FETCH_CHAR (from_byte)));
195d1361 2332 state.comstr_start = prev_from;
1fd3172d
RS
2333 if (code != Scomment_fence)
2334 INC_FROM;
e5d4f4dc 2335 code = Scomment;
e5d4f4dc 2336 }
4c920633 2337 else if (from < end)
1fd3172d 2338 if (SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax))
6a140a74 2339 if (SYNTAX_COMSTART_SECOND (FETCH_CHAR (from_byte)))
1fd3172d 2340 /* Duplicate code to avoid a complex if-expression
4c920633
RS
2341 which causes trouble for the SGI compiler. */
2342 {
2343 /* Record the comment style we have entered so that only
2344 the comment-end sequence of the same style actually
2345 terminates the comment section. */
2346 state.comstyle = ( code == Scomment_fence
2347 ? ST_COMMENT_STYLE
6a140a74 2348 : SYNTAX_COMMENT_STYLE (FETCH_CHAR (from_byte)));
4c920633 2349 state.comstr_start = prev_from;
1fd3172d
RS
2350 if (code != Scomment_fence)
2351 INC_FROM;
4c920633
RS
2352 code = Scomment;
2353 }
e5d4f4dc 2354
1fd3172d 2355 if (SYNTAX_FLAGS_PREFIX (prev_from_syntax))
8489eb67 2356 continue;
0220c518 2357 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
2358 {
2359 case Sescape:
2360 case Scharquote:
2361 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 2362 curlevel->last = prev_from;
8489eb67
RS
2363 startquoted:
2364 if (from == end) goto endquoted;
93da5fff 2365 INC_FROM;
8489eb67
RS
2366 goto symstarted;
2367 /* treat following character as a word constituent */
2368 case Sword:
2369 case Ssymbol:
2370 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 2371 curlevel->last = prev_from;
8489eb67
RS
2372 symstarted:
2373 while (from < end)
2374 {
6a140a74 2375 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from_byte))))
8489eb67
RS
2376 {
2377 case Scharquote:
2378 case Sescape:
93da5fff 2379 INC_FROM;
8489eb67
RS
2380 if (from == end) goto endquoted;
2381 break;
2382 case Sword:
2383 case Ssymbol:
2384 case Squote:
2385 break;
2386 default:
2387 goto symdone;
2388 }
93da5fff 2389 INC_FROM;
8489eb67
RS
2390 }
2391 symdone:
2392 curlevel->prev = curlevel->last;
2393 break;
2394
5a28e48c 2395 startincomment:
195d1361 2396 if (commentstop == 1)
5a28e48c
RS
2397 goto done;
2398 if (from != BEGV)
2399 {
2400 /* Enter the loop in the middle so that we find
2401 a 2-char comment ender if we start in the middle of it. */
5a28e48c
RS
2402 goto startincomment_1;
2403 }
2404 /* At beginning of buffer, enter the loop the ordinary way. */
195d1361
RS
2405 state.incomment = 1;
2406 goto commentloop;
5a28e48c 2407
8489eb67
RS
2408 case Scomment:
2409 state.incomment = 1;
195d1361
RS
2410 if (commentstop || boundary_stop) goto done;
2411 commentloop:
8489eb67
RS
2412 while (1)
2413 {
2414 if (from == end) goto done;
6a140a74 2415 prev = FETCH_CHAR (from_byte);
e5d4f4dc
RS
2416 if (SYNTAX (prev) == Sendcomment
2417 && SYNTAX_COMMENT_STYLE (prev) == state.comstyle)
2418 /* Only terminate the comment section if the endcomment
2419 of the same style as the start sequence has been
2420 encountered. */
8489eb67 2421 break;
195d1361
RS
2422 if (state.comstyle == ST_COMMENT_STYLE
2423 && SYNTAX (prev) == Scomment_fence)
2424 break;
93da5fff 2425 INC_FROM;
5a28e48c 2426 startincomment_1:
1fd3172d 2427 if (from < end && SYNTAX_FLAGS_COMEND_FIRST (prev_from_syntax)
6a140a74 2428 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from_byte))
1fd3172d
RS
2429 && (SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax)
2430 == state.comstyle))
e5d4f4dc
RS
2431 /* Only terminate the comment section if the end-comment
2432 sequence of the same style as the start sequence has
2433 been encountered. */
6a140a74 2434 break;
8489eb67 2435 }
195d1361 2436 INC_FROM;
8489eb67 2437 state.incomment = 0;
e5d4f4dc 2438 state.comstyle = 0; /* reset the comment style */
195d1361 2439 if (boundary_stop) goto done;
8489eb67
RS
2440 break;
2441
2442 case Sopen:
2443 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2444 depth++;
2445 /* curlevel++->last ran into compiler bug on Apollo */
93da5fff 2446 curlevel->last = prev_from;
8489eb67
RS
2447 if (++curlevel == endlevel)
2448 error ("Nesting too deep for parser");
2449 curlevel->prev = -1;
2450 curlevel->last = -1;
30844415 2451 if (targetdepth == depth) goto done;
8489eb67
RS
2452 break;
2453
2454 case Sclose:
2455 depth--;
2456 if (depth < mindepth)
2457 mindepth = depth;
2458 if (curlevel != levelstart)
2459 curlevel--;
2460 curlevel->prev = curlevel->last;
30844415 2461 if (targetdepth == depth) goto done;
8489eb67
RS
2462 break;
2463
2464 case Sstring:
195d1361
RS
2465 case Sstring_fence:
2466 state.comstr_start = from - 1;
8489eb67 2467 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 2468 curlevel->last = prev_from;
195d1361 2469 state.instring = (code == Sstring
6a140a74 2470 ? (FETCH_CHAR (prev_from_byte))
195d1361
RS
2471 : ST_STRING_STYLE);
2472 if (boundary_stop) goto done;
8489eb67 2473 startinstring:
195d1361 2474 {
644ea4df 2475 nofence = state.instring != ST_STRING_STYLE;
195d1361 2476
644ea4df
RS
2477 while (1)
2478 {
2479 int c;
195d1361 2480
644ea4df 2481 if (from >= end) goto done;
6a140a74 2482 c = FETCH_CHAR (from_byte);
644ea4df 2483 if (nofence && c == state.instring) break;
644ea4df
RS
2484 switch (SWITCH_ENUM_CAST (SYNTAX (c)))
2485 {
2486 case Sstring_fence:
2487 if (!nofence) goto string_end;
2488 break;
2489 case Scharquote:
2490 case Sescape:
2491 INC_FROM;
2492 startquotedinstring:
2493 if (from >= end) goto endquoted;
195d1361 2494 }
644ea4df
RS
2495 INC_FROM;
2496 }
195d1361
RS
2497 }
2498 string_end:
8489eb67
RS
2499 state.instring = -1;
2500 curlevel->prev = curlevel->last;
93da5fff 2501 INC_FROM;
195d1361 2502 if (boundary_stop) goto done;
8489eb67
RS
2503 break;
2504
2505 case Smath:
2506 break;
2507 }
2508 }
2509 goto done;
2510
2511 stop: /* Here if stopping before start of sexp. */
93da5fff 2512 from = prev_from; /* We have just fetched the char that starts it; */
8489eb67
RS
2513 goto done; /* but return the position before it. */
2514
2515 endquoted:
2516 state.quoted = 1;
2517 done:
2518 state.depth = depth;
2519 state.mindepth = mindepth;
2520 state.thislevelstart = curlevel->prev;
2521 state.prevlevelstart
2522 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
2523 state.location = from;
2524 immediate_quit = 0;
2525
e5d4f4dc 2526 *stateptr = state;
8489eb67
RS
2527}
2528
2529/* This comment supplies the doc string for parse-partial-sexp,
2530 for make-docfile to see. We cannot put this in the real DEFUN
2531 due to limits in the Unix cpp.
2532
c81a3712 2533DEFUN ("parse-partial-sexp", Ffoo, Sfoo, 2, 6, 0,
8489eb67
RS
2534 "Parse Lisp syntax starting at FROM until TO; return status of parse at TO.\n\
2535Parsing stops at TO or when certain criteria are met;\n\
2536 point is set to where parsing stops.\n\
2537If fifth arg STATE is omitted or nil,\n\
2538 parsing assumes that FROM is the beginning of a function.\n\
195d1361 2539Value is a list of nine elements describing final state of parsing:\n\
af50f9e5
RS
2540 0. depth in parens.\n\
2541 1. character address of start of innermost containing list; nil if none.\n\
2542 2. character address of start of last complete sexp terminated.\n\
2543 3. non-nil if inside a string.\n\
195d1361 2544 (it is the character that will terminate the string,\n\
bff37d2a 2545 or t if the string should be terminated by a generic string delimiter.)\n\
af50f9e5
RS
2546 4. t if inside a comment.\n\
2547 5. t if following a quote character.\n\
2548 6. the minimum paren-depth encountered during this scan.\n\
bff37d2a
RS
2549 7. t if in a comment of style b; `syntax-table' if the comment\n\
2550 should be terminated by a generic comment delimiter.\n\
2551 8. character address of start of comment or string; nil if not in one.\n\
8489eb67
RS
2552If third arg TARGETDEPTH is non-nil, parsing stops if the depth\n\
2553in parentheses becomes equal to TARGETDEPTH.\n\
2554Fourth arg STOPBEFORE non-nil means stop when come to\n\
2555 any character that starts a sexp.\n\
bff37d2a
RS
2556Fifth arg STATE is a nine-element list like what this function returns.\n\
2557 It is used to initialize the state of the parse. Elements number 1, 2, 6\n\
2558 and 8 are ignored; you can leave off element 8 (the last) entirely.\n\
2559Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.\n\
2560 If it is `syntax-table', stop after the start of a comment or a string,\n\
2561 or after end of a comment or a string.")
c81a3712 2562 (from, to, targetdepth, stopbefore, state, commentstop)
8489eb67
RS
2563*/
2564
c81a3712 2565DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
8489eb67 2566 0 /* See immediately above */)
c81a3712
RS
2567 (from, to, targetdepth, stopbefore, oldstate, commentstop)
2568 Lisp_Object from, to, targetdepth, stopbefore, oldstate, commentstop;
8489eb67
RS
2569{
2570 struct lisp_parse_state state;
2571 int target;
2572
265a9e55 2573 if (!NILP (targetdepth))
8489eb67
RS
2574 {
2575 CHECK_NUMBER (targetdepth, 3);
2576 target = XINT (targetdepth);
2577 }
2578 else
2579 target = -100000; /* We won't reach this depth */
2580
2581 validate_region (&from, &to);
6a140a74
RS
2582 scan_sexps_forward (&state, XINT (from), CHAR_TO_BYTE (XINT (from)),
2583 XINT (to),
c81a3712 2584 target, !NILP (stopbefore), oldstate,
195d1361
RS
2585 (NILP (commentstop)
2586 ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
8489eb67
RS
2587
2588 SET_PT (state.location);
2589
2590 return Fcons (make_number (state.depth),
2591 Fcons (state.prevlevelstart < 0 ? Qnil : make_number (state.prevlevelstart),
2592 Fcons (state.thislevelstart < 0 ? Qnil : make_number (state.thislevelstart),
195d1361
RS
2593 Fcons (state.instring >= 0
2594 ? (state.instring == ST_STRING_STYLE
2595 ? Qt : make_number (state.instring)) : Qnil,
8489eb67
RS
2596 Fcons (state.incomment ? Qt : Qnil,
2597 Fcons (state.quoted ? Qt : Qnil,
bff37d2a
RS
2598 Fcons (make_number (state.mindepth),
2599 Fcons ((state.comstyle
2600 ? (state.comstyle == ST_COMMENT_STYLE
2601 ? Qsyntax_table : Qt) :
2602 Qnil),
2603 Fcons ((state.incomment || state.instring
2604 ? make_number (state.comstr_start)
2605 : Qnil),
2606 Qnil)))))))));
8489eb67
RS
2607}
2608\f
dfcf069d 2609void
8489eb67
RS
2610init_syntax_once ()
2611{
78f9a1f7 2612 register int i, c;
8ea151b2 2613 Lisp_Object temp;
8489eb67 2614
5ebaddf5
RS
2615 /* This has to be done here, before we call Fmake_char_table. */
2616 Qsyntax_table = intern ("syntax-table");
2617 staticpro (&Qsyntax_table);
2618
2619 /* Intern this now in case it isn't already done.
2620 Setting this variable twice is harmless.
2621 But don't staticpro it here--that is done in alloc.c. */
2622 Qchar_table_extra_slots = intern ("char-table-extra-slots");
2623
93da5fff 2624 /* Create objects which can be shared among syntax tables. */
44d8a793 2625 Vsyntax_code_object = Fmake_vector (make_number (13), Qnil);
93da5fff
KH
2626 for (i = 0; i < XVECTOR (Vsyntax_code_object)->size; i++)
2627 XVECTOR (Vsyntax_code_object)->contents[i]
2628 = Fcons (make_number (i), Qnil);
2629
5ebaddf5
RS
2630 /* Now we are ready to set up this property, so we can
2631 create syntax tables. */
2632 Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
2633
93da5fff 2634 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
8489eb67 2635
5ebaddf5 2636 Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
8489eb67 2637
93da5fff 2638 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
8489eb67 2639 for (i = 'a'; i <= 'z'; i++)
8ea151b2 2640 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
8489eb67 2641 for (i = 'A'; i <= 'Z'; i++)
8ea151b2 2642 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
8489eb67 2643 for (i = '0'; i <= '9'; i++)
8ea151b2
RS
2644 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
2645
2646 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
2647 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
2648
2649 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
2650 Fcons (make_number (Sopen), make_number (')')));
2651 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
2652 Fcons (make_number (Sclose), make_number ('(')));
2653 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
2654 Fcons (make_number (Sopen), make_number (']')));
2655 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
2656 Fcons (make_number (Sclose), make_number ('[')));
2657 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
2658 Fcons (make_number (Sopen), make_number ('}')));
2659 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
2660 Fcons (make_number (Sclose), make_number ('{')));
2661 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
2662 Fcons (make_number ((int) Sstring), Qnil));
2663 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
2664 Fcons (make_number ((int) Sescape), Qnil));
2665
93da5fff 2666 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Ssymbol];
8489eb67 2667 for (i = 0; i < 10; i++)
78f9a1f7
KH
2668 {
2669 c = "_-+*/&|<>="[i];
2670 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
2671 }
8489eb67 2672
93da5fff 2673 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
8489eb67 2674 for (i = 0; i < 12; i++)
78f9a1f7
KH
2675 {
2676 c = ".,;:?!#@~^'`"[i];
2677 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
2678 }
8489eb67
RS
2679}
2680
dfcf069d 2681void
8489eb67
RS
2682syms_of_syntax ()
2683{
2684 Qsyntax_table_p = intern ("syntax-table-p");
2685 staticpro (&Qsyntax_table_p);
2686
93da5fff
KH
2687 staticpro (&Vsyntax_code_object);
2688
7bf5e9e4
RS
2689 Qscan_error = intern ("scan-error");
2690 staticpro (&Qscan_error);
2691 Fput (Qscan_error, Qerror_conditions,
2692 Fcons (Qerror, Qnil));
2693 Fput (Qscan_error, Qerror_message,
2694 build_string ("Scan error"));
2695
8489eb67
RS
2696 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments,
2697 "Non-nil means `forward-sexp', etc., should treat comments as whitespace.");
2698
195d1361
RS
2699 DEFVAR_BOOL ("parse-sexp-lookup-properties", &parse_sexp_lookup_properties,
2700 "Non-nil means `forward-sexp', etc., grant `syntax-table' property.\n\
2701The value of this property should be either a syntax table, or a cons\n\
2702of the form (SYNTAXCODE . MATCHCHAR), SYNTAXCODE being the numeric\n\
2703syntax code, MATCHCHAR being nil or the character to match (which is\n\
2704relevant only for open/close type.");
2705
8489eb67
RS
2706 words_include_escapes = 0;
2707 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes,
2708 "Non-nil means `forward-word', etc., should treat escape chars part of words.");
2709
2710 defsubr (&Ssyntax_table_p);
2711 defsubr (&Ssyntax_table);
2712 defsubr (&Sstandard_syntax_table);
2713 defsubr (&Scopy_syntax_table);
2714 defsubr (&Sset_syntax_table);
2715 defsubr (&Schar_syntax);
beefa22e 2716 defsubr (&Smatching_paren);
8489eb67
RS
2717 defsubr (&Smodify_syntax_entry);
2718 defsubr (&Sdescribe_syntax);
2719
2720 defsubr (&Sforward_word);
2721
195d1361
RS
2722 defsubr (&Sskip_chars_forward);
2723 defsubr (&Sskip_chars_backward);
2724 defsubr (&Sskip_syntax_forward);
2725 defsubr (&Sskip_syntax_backward);
2726
b3cfe0c8 2727 defsubr (&Sforward_comment);
8489eb67
RS
2728 defsubr (&Sscan_lists);
2729 defsubr (&Sscan_sexps);
2730 defsubr (&Sbackward_prefix_chars);
2731 defsubr (&Sparse_partial_sexp);
2732}