(update_syntax_table): Properly update `position' field of used intervals.
[bpt/emacs.git] / src / syntax.c
1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985, 87, 93, 94, 95, 97, 1998 Free Software Foundation, Inc.
3
4 This file is part of GNU Emacs.
5
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
20
21
22 #include <config.h>
23 #include <ctype.h>
24 #include "lisp.h"
25 #include "commands.h"
26 #include "buffer.h"
27 #include "charset.h"
28
29 /* Make syntax table lookup grant data in gl_state. */
30 #define SYNTAX_ENTRY_VIA_PROPERTY
31
32 #include "syntax.h"
33 #include "intervals.h"
34
35 /* We use these constants in place for comment-style and
36 string-ender-char to distinguish comments/strings started by
37 comment_fence and string_fence codes. */
38
39 #define ST_COMMENT_STYLE (256 + 1)
40 #define ST_STRING_STYLE (256 + 2)
41 #include "category.h"
42
43 Lisp_Object Qsyntax_table_p, Qsyntax_table, Qscan_error;
44
45 int words_include_escapes;
46 int parse_sexp_lookup_properties;
47
48 /* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
49 if not compiled with GCC. No need to mark it, since it is used
50 only very temporarily. */
51 Lisp_Object syntax_temp;
52
53 /* This is the internal form of the parse state used in parse-partial-sexp. */
54
55 struct lisp_parse_state
56 {
57 int depth; /* Depth at end of parsing. */
58 int instring; /* -1 if not within string, else desired terminator. */
59 int incomment; /* Nonzero if within a comment at end of parsing. */
60 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
61 int quoted; /* Nonzero if just after an escape char at end of parsing */
62 int thislevelstart; /* Char number of most recent start-of-expression at current level */
63 int prevlevelstart; /* Char number of start of containing expression */
64 int location; /* Char number at which parsing stopped. */
65 int mindepth; /* Minimum depth seen while scanning. */
66 int comstr_start; /* Position just after last comment/string starter. */
67 };
68 \f
69 /* These variables are a cache for finding the start of a defun.
70 find_start_pos is the place for which the defun start was found.
71 find_start_value is the defun start position found for it.
72 find_start_value_byte is the corresponding byte position.
73 find_start_buffer is the buffer it was found in.
74 find_start_begv is the BEGV value when it was found.
75 find_start_modiff is the value of MODIFF when it was found. */
76
77 static int find_start_pos;
78 static int find_start_value;
79 static int find_start_value_byte;
80 static struct buffer *find_start_buffer;
81 static int find_start_begv;
82 static int find_start_modiff;
83
84
85 static int find_defun_start P_ ((int, int));
86 static int back_comment P_ ((int, int, int, int, int *, int *));
87 static int char_quoted P_ ((int, int));
88 static Lisp_Object skip_chars P_ ((int, int, Lisp_Object, Lisp_Object));
89 static Lisp_Object scan_lists P_ ((int, int, int, int));
90 static void scan_sexps_forward P_ ((struct lisp_parse_state *,
91 int, int, int, int,
92 int, Lisp_Object, int));
93 \f
94
95 struct gl_state_s gl_state; /* Global state of syntax parser. */
96
97 INTERVAL interval_of ();
98 #define INTERVALS_AT_ONCE 10 /* 1 + max-number of intervals
99 to scan to property-change. */
100
101 /* Update gl_state to an appropriate interval which contains CHARPOS. The
102 sign of COUNT give the relative position of CHARPOS wrt the previously
103 valid interval. If INIT, only [be]_property fields of gl_state are
104 valid at start, the rest is filled basing on OBJECT.
105
106 `gl_state.*_i' are the intervals, and CHARPOS is further in the search
107 direction than the intervals - or in an interval. We update the
108 current syntax-table basing on the property of this interval, and
109 update the interval to start further than CHARPOS - or be
110 NULL_INTERVAL. We also update lim_property to be the next value of
111 charpos to call this subroutine again - or be before/after the
112 start/end of OBJECT. */
113
114 void
115 update_syntax_table (charpos, count, init, object)
116 int charpos, count, init;
117 Lisp_Object object;
118 {
119 Lisp_Object tmp_table;
120 int cnt = 0, doing_extra = 0, invalidate = 1;
121 INTERVAL i, oldi;
122
123 if (init)
124 {
125 gl_state.start = gl_state.b_property;
126 gl_state.stop = gl_state.e_property;
127 gl_state.forward_i = interval_of (charpos, object);
128 i = gl_state.backward_i = gl_state.forward_i;
129 gl_state.left_ok = gl_state.right_ok = 1;
130 invalidate = 0;
131 if (NULL_INTERVAL_P (i))
132 return;
133 /* interval_of () updates only ->position of the return value,
134 update the parents manually to speed up update_interval. */
135 while (!NULL_PARENT (i))
136 {
137 if (AM_RIGHT_CHILD (i))
138 i->parent->position = i->position
139 - LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */
140 - TOTAL_LENGTH (i->parent)
141 + LEFT_TOTAL_LENGTH (i->parent);
142 else
143 i->parent->position = i->position - LEFT_TOTAL_LENGTH (i)
144 + TOTAL_LENGTH (i);
145 i = i->parent;
146 }
147 i = gl_state.forward_i;
148 gl_state.b_property = i->position - 1 - gl_state.offset;
149 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
150 goto update;
151 }
152 oldi = i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
153
154 /* We are guarantied to be called with CHARPOS either in i,
155 or further off. */
156 if (NULL_INTERVAL_P (i))
157 error ("Error in syntax_table logic for to-the-end intervals");
158 else if (charpos < i->position) /* Move left. */
159 {
160 if (count > 0)
161 error ("Error in syntax_table logic for intervals <-.");
162 /* Update the interval. */
163 i = update_interval (i, charpos);
164 if (oldi->position != INTERVAL_LAST_POS (i))
165 {
166 invalidate = 0;
167 gl_state.right_ok = 1; /* Invalidate the other end. */
168 gl_state.forward_i = i;
169 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
170 }
171 }
172 else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right. */
173 {
174 if (count < 0)
175 error ("Error in syntax_table logic for intervals ->.");
176 /* Update the interval. */
177 i = update_interval (i, charpos);
178 if (i->position != INTERVAL_LAST_POS (oldi))
179 {
180 invalidate = 0;
181 gl_state.left_ok = 1; /* Invalidate the other end. */
182 gl_state.backward_i = i;
183 gl_state.b_property = i->position - 1 - gl_state.offset;
184 }
185 }
186 else if (count > 0 ? gl_state.right_ok : gl_state.left_ok)
187 {
188 /* We do not need to recalculate tmp_table. */
189 tmp_table = gl_state.old_prop;
190 }
191
192 update:
193 tmp_table = textget (i->plist, Qsyntax_table);
194
195 if (invalidate)
196 invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
197
198 if (invalidate) /* Did not get to adjacent interval. */
199 { /* with the same table => */
200 /* invalidate the old range. */
201 if (count > 0)
202 {
203 gl_state.backward_i = i;
204 gl_state.left_ok = 1; /* Invalidate the other end. */
205 gl_state.b_property = i->position - 1 - gl_state.offset;
206 }
207 else
208 {
209 gl_state.forward_i = i;
210 gl_state.right_ok = 1; /* Invalidate the other end. */
211 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
212 }
213 }
214
215 gl_state.current_syntax_table = tmp_table;
216 gl_state.old_prop = tmp_table;
217 if (EQ (Fsyntax_table_p (tmp_table), Qt))
218 {
219 gl_state.use_global = 0;
220 }
221 else if (CONSP (tmp_table))
222 {
223 gl_state.use_global = 1;
224 gl_state.global_code = tmp_table;
225 }
226 else
227 {
228 gl_state.use_global = 0;
229 gl_state.current_syntax_table = current_buffer->syntax_table;
230 }
231
232 while (!NULL_INTERVAL_P (i))
233 {
234 if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
235 {
236 if (count > 0)
237 gl_state.right_ok = 0;
238 else
239 gl_state.left_ok = 0;
240 break;
241 }
242 else if (cnt == INTERVALS_AT_ONCE)
243 {
244 if (count > 0)
245 gl_state.right_ok = 1;
246 else
247 gl_state.left_ok = 1;
248 break;
249 }
250 cnt++;
251 i = count > 0 ? next_interval (i) : previous_interval (i);
252 }
253 if (NULL_INTERVAL_P (i))
254 { /* This property goes to the end. */
255 if (count > 0)
256 gl_state.e_property = gl_state.stop;
257 else
258 gl_state.b_property = gl_state.start;
259 }
260 else
261 {
262 if (count > 0)
263 {
264 gl_state.e_property = i->position - gl_state.offset;
265 gl_state.forward_i = i;
266 }
267 else
268 {
269 gl_state.b_property = i->position + LENGTH (i) - 1 - gl_state.offset;
270 gl_state.backward_i = i;
271 }
272 }
273 }
274 \f
275 /* Returns TRUE if char at CHARPOS is quoted.
276 Global syntax-table data should be set up already to be good at CHARPOS
277 or after. On return global syntax data is good for lookup at CHARPOS. */
278
279 static int
280 char_quoted (charpos, bytepos)
281 register int charpos, bytepos;
282 {
283 register enum syntaxcode code;
284 register int beg = BEGV;
285 register int quoted = 0;
286 int orig = charpos;
287
288 DEC_BOTH (charpos, bytepos);
289
290 while (bytepos >= beg)
291 {
292 UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
293 code = SYNTAX (FETCH_CHAR (bytepos));
294 if (! (code == Scharquote || code == Sescape))
295 break;
296
297 DEC_BOTH (charpos, bytepos);
298 quoted = !quoted;
299 }
300
301 UPDATE_SYNTAX_TABLE (orig);
302 return quoted;
303 }
304
305 /* Return the bytepos one character after BYTEPOS.
306 We assume that BYTEPOS is not at the end of the buffer. */
307
308 INLINE int
309 inc_bytepos (bytepos)
310 int bytepos;
311 {
312 if (NILP (current_buffer->enable_multibyte_characters))
313 return bytepos + 1;
314
315 INC_POS (bytepos);
316 return bytepos;
317 }
318
319 /* Return the bytepos one character before BYTEPOS.
320 We assume that BYTEPOS is not at the start of the buffer. */
321
322 INLINE int
323 dec_bytepos (bytepos)
324 int bytepos;
325 {
326 if (NILP (current_buffer->enable_multibyte_characters))
327 return bytepos - 1;
328
329 DEC_POS (bytepos);
330 return bytepos;
331 }
332 \f
333 /* Find a defun-start that is the last one before POS (or nearly the last).
334 We record what we find, so that another call in the same area
335 can return the same value right away.
336
337 There is no promise at which position the global syntax data is
338 valid on return from the subroutine, so the caller should explicitly
339 update the global data. */
340
341 static int
342 find_defun_start (pos, pos_byte)
343 int pos, pos_byte;
344 {
345 int tem;
346 int shortage;
347 int opoint = PT, opoint_byte = PT_BYTE;
348
349 /* Use previous finding, if it's valid and applies to this inquiry. */
350 if (current_buffer == find_start_buffer
351 /* Reuse the defun-start even if POS is a little farther on.
352 POS might be in the next defun, but that's ok.
353 Our value may not be the best possible, but will still be usable. */
354 && pos <= find_start_pos + 1000
355 && pos >= find_start_value
356 && BEGV == find_start_begv
357 && MODIFF == find_start_modiff)
358 return find_start_value;
359
360 /* Back up to start of line. */
361 scan_newline (pos, pos_byte, BEGV, BEGV_BYTE, -1, 1);
362
363 /* We optimize syntax-table lookup for rare updates. Thus we accept
364 only those `^\s(' which are good in global _and_ text-property
365 syntax-tables. */
366 gl_state.current_syntax_table = current_buffer->syntax_table;
367 gl_state.use_global = 0;
368 while (PT > BEGV)
369 {
370 /* Open-paren at start of line means we found our defun-start. */
371 if (SYNTAX (FETCH_CHAR (PT_BYTE)) == Sopen)
372 {
373 SETUP_SYNTAX_TABLE (PT + 1, -1); /* Try again... */
374 if (SYNTAX (FETCH_CHAR (PT_BYTE)) == Sopen)
375 break;
376 /* Now fallback to the default value. */
377 gl_state.current_syntax_table = current_buffer->syntax_table;
378 gl_state.use_global = 0;
379 }
380 /* Move to beg of previous line. */
381 scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
382 }
383
384 /* Record what we found, for the next try. */
385 find_start_value = PT;
386 find_start_value_byte = PT_BYTE;
387 find_start_buffer = current_buffer;
388 find_start_modiff = MODIFF;
389 find_start_begv = BEGV;
390 find_start_pos = pos;
391
392 TEMP_SET_PT_BOTH (opoint, opoint_byte);
393
394 return find_start_value;
395 }
396 \f
397 /* Checks whether charpos FROM is at the end of a comment.
398 FROM_BYTE is the bytepos corresponding to FROM.
399 Do not move back before STOP.
400
401 Return a positive value if we find a comment ending at FROM/FROM_BYTE;
402 return -1 otherwise.
403
404 If successful, store the charpos of the comment's beginning
405 into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
406
407 Global syntax data remains valid for backward search starting at
408 the returned value (or at FROM, if the search was not successful). */
409
410 static int
411 back_comment (from, from_byte, stop, comstyle, charpos_ptr, bytepos_ptr)
412 int from, from_byte, stop;
413 int comstyle;
414 int *charpos_ptr, *bytepos_ptr;
415 {
416 /* Look back, counting the parity of string-quotes,
417 and recording the comment-starters seen.
418 When we reach a safe place, assume that's not in a string;
419 then step the main scan to the earliest comment-starter seen
420 an even number of string quotes away from the safe place.
421
422 OFROM[I] is position of the earliest comment-starter seen
423 which is I+2X quotes from the comment-end.
424 PARITY is current parity of quotes from the comment end. */
425 int parity = 0;
426 int my_stringend = 0;
427 int string_lossage = 0;
428 int comment_end = from;
429 int comment_end_byte = from_byte;
430 int comstart_pos = 0;
431 int comstart_byte;
432 /* Value that PARITY had, when we reached the position
433 in COMSTART_POS. */
434 int comstart_parity = 0;
435 int scanstart = from - 1;
436 /* Place where the containing defun starts,
437 or 0 if we didn't come across it yet. */
438 int defun_start = 0;
439 int defun_start_byte = 0;
440 register enum syntaxcode code;
441 int c;
442
443 /* At beginning of range to scan, we're outside of strings;
444 that determines quote parity to the comment-end. */
445 while (from != stop)
446 {
447 int temp_byte;
448
449 /* Move back and examine a character. */
450 DEC_BOTH (from, from_byte);
451 UPDATE_SYNTAX_TABLE_BACKWARD (from);
452
453 c = FETCH_CHAR (from_byte);
454 code = SYNTAX (c);
455
456 /* If this char is the second of a 2-char comment end sequence,
457 back up and give the pair the appropriate syntax. */
458 if (from > stop && SYNTAX_COMEND_SECOND (c)
459 && (temp_byte = dec_bytepos (from_byte),
460 SYNTAX_COMEND_FIRST (FETCH_CHAR (temp_byte))))
461 {
462 code = Sendcomment;
463 DEC_BOTH (from, from_byte);
464 /* This is apparently the best we can do: */
465 UPDATE_SYNTAX_TABLE_BACKWARD (from);
466 c = FETCH_CHAR (from_byte);
467 }
468
469 /* If this char starts a 2-char comment start sequence,
470 treat it like a 1-char comment starter. */
471 if (from < scanstart && SYNTAX_COMSTART_FIRST (c)
472 && (temp_byte = inc_bytepos (from_byte),
473 (SYNTAX_COMSTART_SECOND (FETCH_CHAR (temp_byte))
474 && comstyle == SYNTAX_COMMENT_STYLE (FETCH_CHAR (temp_byte)))))
475 code = Scomment;
476
477 /* Ignore escaped characters, except comment-enders. */
478 if (code != Sendcomment && char_quoted (from, from_byte))
479 continue;
480
481 /* Track parity of quotes. */
482 if (code == Sstring)
483 {
484 parity ^= 1;
485 if (my_stringend == 0)
486 my_stringend = c;
487 /* If we have two kinds of string delimiters.
488 There's no way to grok this scanning backwards. */
489 else if (my_stringend != c)
490 string_lossage = 1;
491 }
492
493 if (code == Sstring_fence || code == Scomment_fence)
494 {
495 parity ^= 1;
496 if (my_stringend == 0)
497 my_stringend
498 = code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE;
499 /* If we have two kinds of string delimiters.
500 There's no way to grok this scanning backwards. */
501 else if (my_stringend != (code == Sstring_fence
502 ? ST_STRING_STYLE : ST_COMMENT_STYLE))
503 string_lossage = 1;
504 }
505
506 /* Record comment-starters according to that
507 quote-parity to the comment-end. */
508 if (code == Scomment)
509 {
510 comstart_parity = parity;
511 comstart_pos = from;
512 comstart_byte = from_byte;
513 }
514
515 /* If we find another earlier comment-ender,
516 any comment-starts earlier than that don't count
517 (because they go with the earlier comment-ender). */
518 if (code == Sendcomment
519 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from_byte)) == comstyle)
520 break;
521
522 /* Assume a defun-start point is outside of strings. */
523 if (code == Sopen
524 && (from == stop
525 || (temp_byte = dec_bytepos (from_byte),
526 FETCH_CHAR (temp_byte) == '\n')))
527 {
528 defun_start = from;
529 defun_start_byte = from_byte;
530 break;
531 }
532 }
533
534 if (comstart_pos == 0)
535 {
536 from = comment_end;
537 from_byte = comment_end_byte;
538 UPDATE_SYNTAX_TABLE_FORWARD (comment_end - 1);
539 }
540 /* If the earliest comment starter
541 is followed by uniform paired string quotes or none,
542 we know it can't be inside a string
543 since if it were then the comment ender would be inside one.
544 So it does start a comment. Skip back to it. */
545 else if (comstart_parity == 0 && !string_lossage)
546 {
547 from = comstart_pos;
548 from_byte = comstart_byte;
549 /* Globals are correct now. */
550 }
551 else
552 {
553 /* We had two kinds of string delimiters mixed up
554 together. Decode this going forwards.
555 Scan fwd from the previous comment ender
556 to the one in question; this records where we
557 last passed a comment starter. */
558 struct lisp_parse_state state;
559 /* If we did not already find the defun start, find it now. */
560 if (defun_start == 0)
561 {
562 defun_start = find_defun_start (comment_end, comment_end_byte);
563 defun_start_byte = find_start_value_byte;
564 }
565 scan_sexps_forward (&state,
566 defun_start, defun_start_byte,
567 comment_end - 1, -10000, 0, Qnil, 0);
568 if (state.incomment)
569 {
570 /* scan_sexps_forward changed the direction of search in
571 global variables, so we need to update it completely. */
572
573 from = state.comstr_start;
574 }
575 else
576 {
577 from = comment_end;
578 }
579 from_byte = CHAR_TO_BYTE (from);
580 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
581 }
582
583 *charpos_ptr = from;
584 *bytepos_ptr = from_byte;
585
586 return from;
587 }
588 \f
589 DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
590 "Return t if OBJECT is a syntax table.\n\
591 Currently, any char-table counts as a syntax table.")
592 (object)
593 Lisp_Object object;
594 {
595 if (CHAR_TABLE_P (object)
596 && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
597 return Qt;
598 return Qnil;
599 }
600
601 static void
602 check_syntax_table (obj)
603 Lisp_Object obj;
604 {
605 if (!(CHAR_TABLE_P (obj)
606 && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table)))
607 wrong_type_argument (Qsyntax_table_p, obj);
608 }
609
610 DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
611 "Return the current syntax table.\n\
612 This is the one specified by the current buffer.")
613 ()
614 {
615 return current_buffer->syntax_table;
616 }
617
618 DEFUN ("standard-syntax-table", Fstandard_syntax_table,
619 Sstandard_syntax_table, 0, 0, 0,
620 "Return the standard syntax table.\n\
621 This is the one used for new buffers.")
622 ()
623 {
624 return Vstandard_syntax_table;
625 }
626
627 DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
628 "Construct a new syntax table and return it.\n\
629 It is a copy of the TABLE, which defaults to the standard syntax table.")
630 (table)
631 Lisp_Object table;
632 {
633 Lisp_Object copy;
634
635 if (!NILP (table))
636 check_syntax_table (table);
637 else
638 table = Vstandard_syntax_table;
639
640 copy = Fcopy_sequence (table);
641
642 /* Only the standard syntax table should have a default element.
643 Other syntax tables should inherit from parents instead. */
644 XCHAR_TABLE (copy)->defalt = Qnil;
645
646 /* Copied syntax tables should all have parents.
647 If we copied one with no parent, such as the standard syntax table,
648 use the standard syntax table as the copy's parent. */
649 if (NILP (XCHAR_TABLE (copy)->parent))
650 Fset_char_table_parent (copy, Vstandard_syntax_table);
651 return copy;
652 }
653
654 DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
655 "Select a new syntax table for the current buffer.\n\
656 One argument, a syntax table.")
657 (table)
658 Lisp_Object table;
659 {
660 check_syntax_table (table);
661 current_buffer->syntax_table = table;
662 /* Indicate that this buffer now has a specified syntax table. */
663 current_buffer->local_var_flags
664 |= XFASTINT (buffer_local_flags.syntax_table);
665 return table;
666 }
667 \f
668 /* Convert a letter which signifies a syntax code
669 into the code it signifies.
670 This is used by modify-syntax-entry, and other things. */
671
672 unsigned char syntax_spec_code[0400] =
673 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
674 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
675 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
676 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
677 (char) Swhitespace, (char) Scomment_fence, (char) Sstring, 0377,
678 (char) Smath, 0377, 0377, (char) Squote,
679 (char) Sopen, (char) Sclose, 0377, 0377,
680 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
681 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
682 0377, 0377, 0377, 0377,
683 (char) Scomment, 0377, (char) Sendcomment, 0377,
684 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
685 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
686 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
687 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
688 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
689 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
690 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
691 0377, 0377, 0377, 0377, (char) Sstring_fence, 0377, 0377, 0377
692 };
693
694 /* Indexed by syntax code, give the letter that describes it. */
695
696 char syntax_code_spec[16] =
697 {
698 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
699 '!', '|'
700 };
701
702 /* Indexed by syntax code, give the object (cons of syntax code and
703 nil) to be stored in syntax table. Since these objects can be
704 shared among syntax tables, we generate them in advance. By
705 sharing objects, the function `describe-syntax' can give a more
706 compact listing. */
707 static Lisp_Object Vsyntax_code_object;
708
709 \f
710 /* Look up the value for CHARACTER in syntax table TABLE's parent
711 and its parents. SYNTAX_ENTRY calls this, when TABLE itself has nil
712 for CHARACTER. It's actually used only when not compiled with GCC. */
713
714 Lisp_Object
715 syntax_parent_lookup (table, character)
716 Lisp_Object table;
717 int character;
718 {
719 Lisp_Object value;
720
721 while (1)
722 {
723 table = XCHAR_TABLE (table)->parent;
724 if (NILP (table))
725 return Qnil;
726
727 value = XCHAR_TABLE (table)->contents[character];
728 if (!NILP (value))
729 return value;
730 }
731 }
732
733 DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
734 "Return the syntax code of CHARACTER, described by a character.\n\
735 For example, if CHARACTER is a word constituent,\n\
736 the character `w' is returned.\n\
737 The characters that correspond to various syntax codes\n\
738 are listed in the documentation of `modify-syntax-entry'.")
739 (character)
740 Lisp_Object character;
741 {
742 int char_int;
743 gl_state.current_syntax_table = current_buffer->syntax_table;
744
745 gl_state.use_global = 0;
746 CHECK_NUMBER (character, 0);
747 char_int = XINT (character);
748 return make_number (syntax_code_spec[(int) SYNTAX (char_int)]);
749 }
750
751 DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
752 "Return the matching parenthesis of CHARACTER, or nil if none.")
753 (character)
754 Lisp_Object character;
755 {
756 int char_int, code;
757 gl_state.current_syntax_table = current_buffer->syntax_table;
758 gl_state.use_global = 0;
759 CHECK_NUMBER (character, 0);
760 char_int = XINT (character);
761 code = SYNTAX (char_int);
762 if (code == Sopen || code == Sclose)
763 return SYNTAX_MATCH (char_int);
764 return Qnil;
765 }
766
767 /* This comment supplies the doc string for modify-syntax-entry,
768 for make-docfile to see. We cannot put this in the real DEFUN
769 due to limits in the Unix cpp.
770
771 DEFUN ("modify-syntax-entry", foo, bar, 2, 3, 0,
772 "Set syntax for character CHAR according to string S.\n\
773 The syntax is changed only for table TABLE, which defaults to\n\
774 the current buffer's syntax table.\n\
775 The first character of S should be one of the following:\n\
776 Space or - whitespace syntax. w word constituent.\n\
777 _ symbol constituent. . punctuation.\n\
778 ( open-parenthesis. ) close-parenthesis.\n\
779 \" string quote. \\ escape.\n\
780 $ paired delimiter. ' expression quote or prefix operator.\n\
781 < comment starter. > comment ender.\n\
782 / character-quote. @ inherit from `standard-syntax-table'.\n\
783 \n\
784 Only single-character comment start and end sequences are represented thus.\n\
785 Two-character sequences are represented as described below.\n\
786 The second character of S is the matching parenthesis,\n\
787 used only if the first character is `(' or `)'.\n\
788 Any additional characters are flags.\n\
789 Defined flags are the characters 1, 2, 3, 4, b, and p.\n\
790 1 means CHAR is the start of a two-char comment start sequence.\n\
791 2 means CHAR is the second character of such a sequence.\n\
792 3 means CHAR is the start of a two-char comment end sequence.\n\
793 4 means CHAR is the second character of such a sequence.\n\
794 \n\
795 There can be up to two orthogonal comment sequences. This is to support\n\
796 language modes such as C++. By default, all comment sequences are of style\n\
797 a, but you can set the comment sequence style to b (on the second character\n\
798 of a comment-start, or the first character of a comment-end sequence) using\n\
799 this flag:\n\
800 b means CHAR is part of comment sequence b.\n\
801 \n\
802 p means CHAR is a prefix character for `backward-prefix-chars';\n\
803 such characters are treated as whitespace when they occur\n\
804 between expressions.")
805 (char, s, table)
806 */
807
808 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
809 /* I really don't know why this is interactive
810 help-form should at least be made useful whilst reading the second arg
811 */
812 "cSet syntax for character: \nsSet syntax for %s to: ",
813 0 /* See immediately above */)
814 (c, newentry, syntax_table)
815 Lisp_Object c, newentry, syntax_table;
816 {
817 register unsigned char *p;
818 register enum syntaxcode code;
819 int val;
820 Lisp_Object match;
821
822 CHECK_NUMBER (c, 0);
823 CHECK_STRING (newentry, 1);
824
825 if (NILP (syntax_table))
826 syntax_table = current_buffer->syntax_table;
827 else
828 check_syntax_table (syntax_table);
829
830 p = XSTRING (newentry)->data;
831 code = (enum syntaxcode) syntax_spec_code[*p++];
832 if (((int) code & 0377) == 0377)
833 error ("invalid syntax description letter: %c", p[-1]);
834
835 if (code == Sinherit)
836 {
837 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), Qnil);
838 return Qnil;
839 }
840
841 if (*p)
842 {
843 int len;
844 int character = STRING_CHAR_AND_LENGTH (p, XSTRING (newentry)->size - 1,
845 len);
846 XSETINT (match, character);
847 if (XFASTINT (match) == ' ')
848 match = Qnil;
849 p += len;
850 }
851 else
852 match = Qnil;
853
854 val = (int) code;
855 while (*p)
856 switch (*p++)
857 {
858 case '1':
859 val |= 1 << 16;
860 break;
861
862 case '2':
863 val |= 1 << 17;
864 break;
865
866 case '3':
867 val |= 1 << 18;
868 break;
869
870 case '4':
871 val |= 1 << 19;
872 break;
873
874 case 'p':
875 val |= 1 << 20;
876 break;
877
878 case 'b':
879 val |= 1 << 21;
880 break;
881 }
882
883 if (val < XVECTOR (Vsyntax_code_object)->size && NILP (match))
884 newentry = XVECTOR (Vsyntax_code_object)->contents[val];
885 else
886 /* Since we can't use a shared object, let's make a new one. */
887 newentry = Fcons (make_number (val), match);
888
889 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), newentry);
890
891 return Qnil;
892 }
893 \f
894 /* Dump syntax table to buffer in human-readable format */
895
896 static void
897 describe_syntax (value)
898 Lisp_Object value;
899 {
900 register enum syntaxcode code;
901 char desc, match, start1, start2, end1, end2, prefix, comstyle;
902 char str[2];
903 Lisp_Object first, match_lisp;
904
905 Findent_to (make_number (16), make_number (1));
906
907 if (NILP (value))
908 {
909 insert_string ("default\n");
910 return;
911 }
912
913 if (CHAR_TABLE_P (value))
914 {
915 insert_string ("deeper char-table ...\n");
916 return;
917 }
918
919 if (!CONSP (value))
920 {
921 insert_string ("invalid\n");
922 return;
923 }
924
925 first = XCONS (value)->car;
926 match_lisp = XCONS (value)->cdr;
927
928 if (!INTEGERP (first) || !(NILP (match_lisp) || INTEGERP (match_lisp)))
929 {
930 insert_string ("invalid\n");
931 return;
932 }
933
934 code = (enum syntaxcode) (XINT (first) & 0377);
935 start1 = (XINT (first) >> 16) & 1;
936 start2 = (XINT (first) >> 17) & 1;
937 end1 = (XINT (first) >> 18) & 1;
938 end2 = (XINT (first) >> 19) & 1;
939 prefix = (XINT (first) >> 20) & 1;
940 comstyle = (XINT (first) >> 21) & 1;
941
942 if ((int) code < 0 || (int) code >= (int) Smax)
943 {
944 insert_string ("invalid");
945 return;
946 }
947 desc = syntax_code_spec[(int) code];
948
949 str[0] = desc, str[1] = 0;
950 insert (str, 1);
951
952 if (NILP (match_lisp))
953 insert (" ", 1);
954 else
955 insert_char (XINT (match_lisp));
956
957 if (start1)
958 insert ("1", 1);
959 if (start2)
960 insert ("2", 1);
961
962 if (end1)
963 insert ("3", 1);
964 if (end2)
965 insert ("4", 1);
966
967 if (prefix)
968 insert ("p", 1);
969 if (comstyle)
970 insert ("b", 1);
971
972 insert_string ("\twhich means: ");
973
974 switch (SWITCH_ENUM_CAST (code))
975 {
976 case Swhitespace:
977 insert_string ("whitespace"); break;
978 case Spunct:
979 insert_string ("punctuation"); break;
980 case Sword:
981 insert_string ("word"); break;
982 case Ssymbol:
983 insert_string ("symbol"); break;
984 case Sopen:
985 insert_string ("open"); break;
986 case Sclose:
987 insert_string ("close"); break;
988 case Squote:
989 insert_string ("quote"); break;
990 case Sstring:
991 insert_string ("string"); break;
992 case Smath:
993 insert_string ("math"); break;
994 case Sescape:
995 insert_string ("escape"); break;
996 case Scharquote:
997 insert_string ("charquote"); break;
998 case Scomment:
999 insert_string ("comment"); break;
1000 case Sendcomment:
1001 insert_string ("endcomment"); break;
1002 default:
1003 insert_string ("invalid");
1004 return;
1005 }
1006
1007 if (!NILP (match_lisp))
1008 {
1009 insert_string (", matches ");
1010 insert_char (XINT (match_lisp));
1011 }
1012
1013 if (start1)
1014 insert_string (",\n\t is the first character of a comment-start sequence");
1015 if (start2)
1016 insert_string (",\n\t is the second character of a comment-start sequence");
1017
1018 if (end1)
1019 insert_string (",\n\t is the first character of a comment-end sequence");
1020 if (end2)
1021 insert_string (",\n\t is the second character of a comment-end sequence");
1022 if (comstyle)
1023 insert_string (" (comment style b)");
1024
1025 if (prefix)
1026 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
1027
1028 insert_string ("\n");
1029 }
1030
1031 static Lisp_Object
1032 describe_syntax_1 (vector)
1033 Lisp_Object vector;
1034 {
1035 struct buffer *old = current_buffer;
1036 set_buffer_internal (XBUFFER (Vstandard_output));
1037 describe_vector (vector, Qnil, describe_syntax, 0, Qnil, Qnil, (int *) 0, 0);
1038 while (! NILP (XCHAR_TABLE (vector)->parent))
1039 {
1040 vector = XCHAR_TABLE (vector)->parent;
1041 insert_string ("\nThe parent syntax table is:");
1042 describe_vector (vector, Qnil, describe_syntax, 0, Qnil, Qnil,
1043 (int *) 0, 0);
1044 }
1045
1046 call0 (intern ("help-mode"));
1047 set_buffer_internal (old);
1048 return Qnil;
1049 }
1050
1051 DEFUN ("describe-syntax", Fdescribe_syntax, Sdescribe_syntax, 0, 0, "",
1052 "Describe the syntax specifications in the syntax table.\n\
1053 The descriptions are inserted in a buffer, which is then displayed.")
1054 ()
1055 {
1056 internal_with_output_to_temp_buffer
1057 ("*Help*", describe_syntax_1, current_buffer->syntax_table);
1058
1059 return Qnil;
1060 }
1061 \f
1062 int parse_sexp_ignore_comments;
1063
1064 /* Return the position across COUNT words from FROM.
1065 If that many words cannot be found before the end of the buffer, return 0.
1066 COUNT negative means scan backward and stop at word beginning. */
1067
1068 scan_words (from, count)
1069 register int from, count;
1070 {
1071 register int beg = BEGV;
1072 register int end = ZV;
1073 register int from_byte = CHAR_TO_BYTE (from);
1074 register enum syntaxcode code;
1075 int ch0, ch1;
1076
1077 immediate_quit = 1;
1078 QUIT;
1079
1080 SETUP_SYNTAX_TABLE (from, count);
1081
1082 while (count > 0)
1083 {
1084 while (1)
1085 {
1086 if (from == end)
1087 {
1088 immediate_quit = 0;
1089 return 0;
1090 }
1091 UPDATE_SYNTAX_TABLE_FORWARD (from);
1092 ch0 = FETCH_CHAR (from_byte);
1093 code = SYNTAX (ch0);
1094 INC_BOTH (from, from_byte);
1095 if (words_include_escapes
1096 && (code == Sescape || code == Scharquote))
1097 break;
1098 if (code == Sword)
1099 break;
1100 }
1101 /* Now CH0 is a character which begins a word and FROM is the
1102 position of the next character. */
1103 while (1)
1104 {
1105 if (from == end) break;
1106 UPDATE_SYNTAX_TABLE_FORWARD (from);
1107 ch1 = FETCH_CHAR (from_byte);
1108 code = SYNTAX (ch1);
1109 if (!(words_include_escapes
1110 && (code == Sescape || code == Scharquote)))
1111 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
1112 break;
1113 INC_BOTH (from, from_byte);
1114 ch0 = ch1;
1115 }
1116 count--;
1117 }
1118 while (count < 0)
1119 {
1120 while (1)
1121 {
1122 if (from == beg)
1123 {
1124 immediate_quit = 0;
1125 return 0;
1126 }
1127 DEC_BOTH (from, from_byte);
1128 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1129 ch1 = FETCH_CHAR (from_byte);
1130 code = SYNTAX (ch1);
1131 if (words_include_escapes
1132 && (code == Sescape || code == Scharquote))
1133 break;
1134 if (code == Sword)
1135 break;
1136 }
1137 /* Now CH1 is a character which ends a word and FROM is the
1138 position of it. */
1139 while (1)
1140 {
1141 int temp_byte;
1142
1143 if (from == beg)
1144 break;
1145 temp_byte = dec_bytepos (from_byte);
1146 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1147 ch0 = FETCH_CHAR (temp_byte);
1148 code = SYNTAX (ch0);
1149 if (!(words_include_escapes
1150 && (code == Sescape || code == Scharquote)))
1151 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
1152 break;
1153 DEC_BOTH (from, from_byte);
1154 ch1 = ch0;
1155 }
1156 count++;
1157 }
1158
1159 immediate_quit = 0;
1160
1161 return from;
1162 }
1163
1164 DEFUN ("forward-word", Fforward_word, Sforward_word, 1, 1, "p",
1165 "Move point forward ARG words (backward if ARG is negative).\n\
1166 Normally returns t.\n\
1167 If an edge of the buffer is reached, point is left there\n\
1168 and nil is returned.")
1169 (count)
1170 Lisp_Object count;
1171 {
1172 int val;
1173 CHECK_NUMBER (count, 0);
1174
1175 if (!(val = scan_words (PT, XINT (count))))
1176 {
1177 SET_PT (XINT (count) > 0 ? ZV : BEGV);
1178 return Qnil;
1179 }
1180 SET_PT (val);
1181 return Qt;
1182 }
1183 \f
1184 Lisp_Object skip_chars ();
1185
1186 DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
1187 "Move point forward, stopping before a char not in STRING, or at pos LIM.\n\
1188 STRING is like the inside of a `[...]' in a regular expression\n\
1189 except that `]' is never special and `\\' quotes `^', `-' or `\\'.\n\
1190 Thus, with arg \"a-zA-Z\", this skips letters stopping before first nonletter.\n\
1191 With arg \"^a-zA-Z\", skips nonletters stopping before first letter.\n\
1192 Returns the distance traveled, either zero or positive.")
1193 (string, lim)
1194 Lisp_Object string, lim;
1195 {
1196 return skip_chars (1, 0, string, lim);
1197 }
1198
1199 DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
1200 "Move point backward, stopping after a char not in STRING, or at pos LIM.\n\
1201 See `skip-chars-forward' for details.\n\
1202 Returns the distance traveled, either zero or negative.")
1203 (string, lim)
1204 Lisp_Object string, lim;
1205 {
1206 return skip_chars (0, 0, string, lim);
1207 }
1208
1209 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
1210 "Move point forward across chars in specified syntax classes.\n\
1211 SYNTAX is a string of syntax code characters.\n\
1212 Stop before a char whose syntax is not in SYNTAX, or at position LIM.\n\
1213 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
1214 This function returns the distance traveled, either zero or positive.")
1215 (syntax, lim)
1216 Lisp_Object syntax, lim;
1217 {
1218 return skip_chars (1, 1, syntax, lim);
1219 }
1220
1221 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
1222 "Move point backward across chars in specified syntax classes.\n\
1223 SYNTAX is a string of syntax code characters.\n\
1224 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.\n\
1225 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
1226 This function returns the distance traveled, either zero or negative.")
1227 (syntax, lim)
1228 Lisp_Object syntax, lim;
1229 {
1230 return skip_chars (0, 1, syntax, lim);
1231 }
1232
1233 static Lisp_Object
1234 skip_chars (forwardp, syntaxp, string, lim)
1235 int forwardp, syntaxp;
1236 Lisp_Object string, lim;
1237 {
1238 register unsigned char *p, *pend;
1239 register unsigned int c;
1240 register int ch;
1241 unsigned char fastmap[0400];
1242 /* If SYNTAXP is 0, STRING may contain multi-byte form of characters
1243 of which codes don't fit in FASTMAP. In that case, we set the
1244 first byte of multibyte form (i.e. base leading-code) in FASTMAP
1245 and set the actual ranges of characters in CHAR_RANGES. In the
1246 form "X-Y" of STRING, both X and Y must belong to the same
1247 character set because a range striding across character sets is
1248 meaningless. */
1249 int *char_ranges;
1250 int n_char_ranges = 0;
1251 int negate = 0;
1252 register int i, i_byte;
1253 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1254 int string_multibyte = STRING_MULTIBYTE (string);
1255
1256 CHECK_STRING (string, 0);
1257 char_ranges = (int *) alloca (XSTRING (string)->size * (sizeof (int)) * 2);
1258
1259 if (NILP (lim))
1260 XSETINT (lim, forwardp ? ZV : BEGV);
1261 else
1262 CHECK_NUMBER_COERCE_MARKER (lim, 0);
1263
1264 /* In any case, don't allow scan outside bounds of buffer. */
1265 if (XINT (lim) > ZV)
1266 XSETFASTINT (lim, ZV);
1267 if (XINT (lim) < BEGV)
1268 XSETFASTINT (lim, BEGV);
1269
1270 bzero (fastmap, sizeof fastmap);
1271
1272 i = 0, i_byte = 0;
1273
1274 if (i < XSTRING (string)->size
1275 && XSTRING (string)->data[0] == '^')
1276 {
1277 negate = 1; i++, i_byte++;
1278 }
1279
1280 /* Find the characters specified and set their elements of fastmap.
1281 If syntaxp, each character counts as itself.
1282 Otherwise, handle backslashes and ranges specially. */
1283
1284 while (i < XSTRING (string)->size)
1285 {
1286 int c_leading_code;
1287
1288 if (string_multibyte)
1289 {
1290 c_leading_code = XSTRING (string)->data[i_byte];
1291 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1292 }
1293 else
1294 c = c_leading_code = XSTRING (string)->data[i++];
1295
1296 /* Convert multibyteness between what the string has
1297 and what the buffer has. */
1298 if (multibyte)
1299 c = unibyte_char_to_multibyte (c);
1300 else
1301 c &= 0377;
1302
1303 if (syntaxp)
1304 fastmap[syntax_spec_code[c & 0377]] = 1;
1305 else
1306 {
1307 if (c == '\\')
1308 {
1309 if (i == XSTRING (string)->size)
1310 break;
1311
1312 if (string_multibyte)
1313 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1314 else
1315 c = XSTRING (string)->data[i++];
1316 }
1317 if (i < XSTRING (string)->size && XSTRING (string)->data[i] == '-')
1318 {
1319 unsigned int c2;
1320
1321 /* Skip over the dash. */
1322 i++, i_byte++;
1323
1324 if (i == XSTRING (string)->size)
1325 break;
1326
1327 /* Get the end of the range. */
1328 if (string_multibyte)
1329 FETCH_STRING_CHAR_ADVANCE (c2, string, i, i_byte);
1330 else
1331 c2 = XSTRING (string)->data[i++];
1332
1333 if (SINGLE_BYTE_CHAR_P (c))
1334 while (c <= c2)
1335 {
1336 fastmap[c] = 1;
1337 c++;
1338 }
1339 else
1340 {
1341 fastmap[c_leading_code] = 1;
1342 if (c <= c2)
1343 {
1344 char_ranges[n_char_ranges++] = c;
1345 char_ranges[n_char_ranges++] = c2;
1346 }
1347 }
1348 }
1349 else
1350 {
1351 fastmap[c_leading_code] = 1;
1352 if (!SINGLE_BYTE_CHAR_P (c))
1353 {
1354 char_ranges[n_char_ranges++] = c;
1355 char_ranges[n_char_ranges++] = c;
1356 }
1357 }
1358 }
1359 }
1360
1361 /* If ^ was the first character, complement the fastmap. In
1362 addition, as all multibyte characters have possibility of
1363 matching, set all entries for base leading codes, which is
1364 harmless even if SYNTAXP is 1. */
1365
1366 if (negate)
1367 for (i = 0; i < sizeof fastmap; i++)
1368 {
1369 if (!multibyte || !BASE_LEADING_CODE_P (i))
1370 fastmap[i] ^= 1;
1371 else
1372 fastmap[i] = 1;
1373 }
1374
1375 {
1376 int start_point = PT;
1377 int pos = PT;
1378 int pos_byte = PT_BYTE;
1379
1380 immediate_quit = 1;
1381 if (syntaxp)
1382 {
1383 SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
1384 if (forwardp)
1385 {
1386 if (multibyte)
1387 {
1388 if (pos < XINT (lim))
1389 while (fastmap[(int) SYNTAX (FETCH_CHAR (pos_byte))])
1390 {
1391 /* Since we already checked for multibyteness,
1392 avoid using INC_BOTH which checks again. */
1393 INC_POS (pos_byte);
1394 pos++;
1395 if (pos >= XINT (lim))
1396 break;
1397 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1398 }
1399 }
1400 else
1401 {
1402 while (pos < XINT (lim)
1403 && fastmap[(int) SYNTAX (FETCH_BYTE (pos))])
1404 {
1405 pos++;
1406 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1407 }
1408 }
1409 }
1410 else
1411 {
1412 if (multibyte)
1413 {
1414 while (pos > XINT (lim))
1415 {
1416 int savepos = pos_byte;
1417 /* Since we already checked for multibyteness,
1418 avoid using DEC_BOTH which checks again. */
1419 pos--;
1420 DEC_POS (pos_byte);
1421 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
1422 if (!fastmap[(int) SYNTAX (FETCH_CHAR (pos_byte))])
1423 {
1424 pos++;
1425 pos_byte = savepos;
1426 break;
1427 }
1428 }
1429 }
1430 else
1431 {
1432 if (pos > XINT (lim))
1433 while (fastmap[(int) SYNTAX (FETCH_BYTE (pos - 1))])
1434 {
1435 pos--;
1436 if (pos <= XINT (lim))
1437 break;
1438 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
1439 }
1440 }
1441 }
1442 }
1443 else
1444 {
1445 if (forwardp)
1446 {
1447 if (multibyte)
1448 while (pos < XINT (lim) && fastmap[(c = FETCH_BYTE (pos_byte))])
1449 {
1450 if (!BASE_LEADING_CODE_P (c))
1451 INC_BOTH (pos, pos_byte);
1452 else if (n_char_ranges)
1453 {
1454 /* We much check CHAR_RANGES for a multibyte
1455 character. */
1456 ch = FETCH_MULTIBYTE_CHAR (pos_byte);
1457 for (i = 0; i < n_char_ranges; i += 2)
1458 if ((ch >= char_ranges[i] && ch <= char_ranges[i + 1]))
1459 break;
1460 if (!(negate ^ (i < n_char_ranges)))
1461 break;
1462
1463 INC_BOTH (pos, pos_byte);
1464 }
1465 else
1466 {
1467 if (!negate) break;
1468 INC_BOTH (pos, pos_byte);
1469 }
1470 }
1471 else
1472 while (pos < XINT (lim) && fastmap[FETCH_BYTE (pos)])
1473 pos++;
1474 }
1475 else
1476 {
1477 if (multibyte)
1478 while (pos > XINT (lim))
1479 {
1480 int savepos = pos_byte;
1481 DEC_BOTH (pos, pos_byte);
1482 if (fastmap[(c = FETCH_BYTE (pos_byte))])
1483 {
1484 if (!BASE_LEADING_CODE_P (c))
1485 ;
1486 else if (n_char_ranges)
1487 {
1488 /* We much check CHAR_RANGES for a multibyte
1489 character. */
1490 ch = FETCH_MULTIBYTE_CHAR (pos_byte);
1491 for (i = 0; i < n_char_ranges; i += 2)
1492 if (ch >= char_ranges[i] && ch <= char_ranges[i + 1])
1493 break;
1494 if (!(negate ^ (i < n_char_ranges)))
1495 {
1496 pos++;
1497 pos_byte = savepos;
1498 break;
1499 }
1500 }
1501 else
1502 if (!negate)
1503 {
1504 pos++;
1505 pos_byte = savepos;
1506 break;
1507 }
1508 }
1509 else
1510 {
1511 pos++;
1512 pos_byte = savepos;
1513 break;
1514 }
1515 }
1516 else
1517 while (pos > XINT (lim) && fastmap[FETCH_BYTE (pos - 1)])
1518 pos--;
1519 }
1520 }
1521
1522 #if 0 /* Not needed now that a position in mid-character
1523 cannot be specified in Lisp. */
1524 if (multibyte
1525 /* INC_POS or DEC_POS might have moved POS over LIM. */
1526 && (forwardp ? (pos > XINT (lim)) : (pos < XINT (lim))))
1527 pos = XINT (lim);
1528 #endif
1529
1530 if (! multibyte)
1531 pos_byte = pos;
1532
1533 SET_PT_BOTH (pos, pos_byte);
1534 immediate_quit = 0;
1535
1536 return make_number (PT - start_point);
1537 }
1538 }
1539 \f
1540 DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
1541 "Move forward across up to N comments. If N is negative, move backward.\n\
1542 Stop scanning if we find something other than a comment or whitespace.\n\
1543 Set point to where scanning stops.\n\
1544 If N comments are found as expected, with nothing except whitespace\n\
1545 between them, return t; otherwise return nil.")
1546 (count)
1547 Lisp_Object count;
1548 {
1549 register int from;
1550 int from_byte;
1551 register int stop;
1552 register int c, c1;
1553 register enum syntaxcode code;
1554 int comstyle = 0; /* style of comment encountered */
1555 int found;
1556 int count1;
1557 int temp_pos;
1558 int out_charpos, out_bytepos;
1559
1560 CHECK_NUMBER (count, 0);
1561 count1 = XINT (count);
1562 stop = count1 > 0 ? ZV : BEGV;
1563
1564 immediate_quit = 1;
1565 QUIT;
1566
1567 from = PT;
1568 from_byte = PT_BYTE;
1569
1570 SETUP_SYNTAX_TABLE (from, count1);
1571 while (count1 > 0)
1572 {
1573 do
1574 {
1575 if (from == stop)
1576 {
1577 SET_PT_BOTH (from, from_byte);
1578 immediate_quit = 0;
1579 return Qnil;
1580 }
1581 UPDATE_SYNTAX_TABLE_FORWARD (from);
1582 c = FETCH_CHAR (from_byte);
1583 code = SYNTAX (c);
1584 INC_BOTH (from, from_byte);
1585 comstyle = 0;
1586 if (from < stop && SYNTAX_COMSTART_FIRST (c)
1587 && (c1 = FETCH_CHAR (from_byte),
1588 SYNTAX_COMSTART_SECOND (c1)))
1589 {
1590 /* We have encountered a comment start sequence and we
1591 are ignoring all text inside comments. We must record
1592 the comment style this sequence begins so that later,
1593 only a comment end of the same style actually ends
1594 the comment section. */
1595 code = Scomment;
1596 comstyle = SYNTAX_COMMENT_STYLE (c1);
1597 INC_BOTH (from, from_byte);
1598 }
1599 }
1600 while (code == Swhitespace || code == Sendcomment);
1601
1602 if (code != Scomment && code != Scomment_fence)
1603 {
1604 immediate_quit = 0;
1605 DEC_BOTH (from, from_byte);
1606 SET_PT_BOTH (from, from_byte);
1607 return Qnil;
1608 }
1609 /* We're at the start of a comment. */
1610 while (1)
1611 {
1612 if (from == stop)
1613 {
1614 immediate_quit = 0;
1615 SET_PT_BOTH (from, from_byte);
1616 return Qnil;
1617 }
1618 UPDATE_SYNTAX_TABLE_FORWARD (from);
1619 c = FETCH_CHAR (from_byte);
1620 INC_BOTH (from, from_byte);
1621 if (SYNTAX (c) == Sendcomment
1622 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1623 /* we have encountered a comment end of the same style
1624 as the comment sequence which began this comment
1625 section */
1626 break;
1627 if (SYNTAX (c) == Scomment_fence
1628 && comstyle == ST_COMMENT_STYLE)
1629 /* we have encountered a comment end of the same style
1630 as the comment sequence which began this comment
1631 section. */
1632 break;
1633 if (from < stop && SYNTAX_COMEND_FIRST (c)
1634 && (c1 = FETCH_CHAR (from_byte),
1635 SYNTAX_COMEND_SECOND (c1))
1636 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1637 /* we have encountered a comment end of the same style
1638 as the comment sequence which began this comment
1639 section */
1640 {
1641 INC_BOTH (from, from_byte);
1642 break;
1643 }
1644 }
1645 /* We have skipped one comment. */
1646 count1--;
1647 }
1648
1649 while (count1 < 0)
1650 {
1651 while (1)
1652 {
1653 int quoted;
1654 if (from <= stop)
1655 {
1656 SET_PT_BOTH (BEGV, BEGV_BYTE);
1657 immediate_quit = 0;
1658 return Qnil;
1659 }
1660
1661 DEC_BOTH (from, from_byte);
1662 quoted = char_quoted (from, from_byte);
1663 if (quoted)
1664 {
1665 DEC_BOTH (from, from_byte);
1666 goto leave;
1667 }
1668 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1669 c = FETCH_CHAR (from_byte);
1670 code = SYNTAX (c);
1671 comstyle = 0;
1672 if (code == Sendcomment)
1673 comstyle = SYNTAX_COMMENT_STYLE (c);
1674 temp_pos = dec_bytepos (from_byte);
1675 if (from > stop && SYNTAX_COMEND_SECOND (c)
1676 && (c1 = FETCH_CHAR (temp_pos),
1677 SYNTAX_COMEND_FIRST (c1))
1678 && !char_quoted (from - 1, temp_pos))
1679 {
1680 /* We must record the comment style encountered so that
1681 later, we can match only the proper comment begin
1682 sequence of the same style. */
1683 code = Sendcomment;
1684 comstyle = SYNTAX_COMMENT_STYLE (c1);
1685 DEC_BOTH (from, from_byte);
1686 }
1687 if (from > stop && SYNTAX_COMSTART_SECOND (c)
1688 && (c1 = FETCH_CHAR (temp_pos),
1689 SYNTAX_COMSTART_FIRST (c1))
1690 && !char_quoted (from - 1, temp_pos))
1691 {
1692 /* We must record the comment style encountered so that
1693 later, we can match only the proper comment begin
1694 sequence of the same style. */
1695 code = Scomment;
1696 DEC_BOTH (from, from_byte);
1697 }
1698
1699 if (code == Scomment_fence)
1700 {
1701 /* Skip until first preceding unquoted comment_fence. */
1702 int found = 0, ini = from, ini_byte = from_byte;
1703
1704 while (1)
1705 {
1706 DEC_BOTH (from, from_byte);
1707 if (from == stop)
1708 break;
1709 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1710 c = FETCH_CHAR (from_byte);
1711 if (SYNTAX (c) == Scomment_fence
1712 && !char_quoted (from, from_byte))
1713 {
1714 found = 1;
1715 break;
1716 }
1717 }
1718 if (found == 0)
1719 {
1720 from = ini; /* Set point to ini + 1. */
1721 from_byte = ini_byte;
1722 goto leave;
1723 }
1724 }
1725 else if (code == Sendcomment)
1726 {
1727 found = back_comment (from, from_byte, stop, comstyle,
1728 &out_charpos, &out_bytepos);
1729 if (found != -1)
1730 from = out_charpos, from_byte = out_bytepos;
1731 /* We have skipped one comment. */
1732 break;
1733 }
1734 else if (code != Swhitespace && code != Scomment)
1735 {
1736 leave:
1737 immediate_quit = 0;
1738 INC_BOTH (from, from_byte);
1739 SET_PT_BOTH (from, from_byte);
1740 return Qnil;
1741 }
1742 }
1743
1744 count1++;
1745 }
1746
1747 SET_PT_BOTH (from, from_byte);
1748 immediate_quit = 0;
1749 return Qt;
1750 }
1751 \f
1752 static Lisp_Object
1753 scan_lists (from, count, depth, sexpflag)
1754 register int from;
1755 int count, depth, sexpflag;
1756 {
1757 Lisp_Object val;
1758 register int stop = count > 0 ? ZV : BEGV;
1759 register int c, c1;
1760 int stringterm;
1761 int quoted;
1762 int mathexit = 0;
1763 register enum syntaxcode code, temp_code;
1764 int min_depth = depth; /* Err out if depth gets less than this. */
1765 int comstyle = 0; /* style of comment encountered */
1766 int temp_pos;
1767 int last_good = from;
1768 int found;
1769 int from_byte = CHAR_TO_BYTE (from);
1770 int out_bytepos, out_charpos;
1771
1772 if (depth > 0) min_depth = 0;
1773
1774 immediate_quit = 1;
1775 QUIT;
1776
1777 SETUP_SYNTAX_TABLE (from, count);
1778 while (count > 0)
1779 {
1780 while (from < stop)
1781 {
1782 UPDATE_SYNTAX_TABLE_FORWARD (from);
1783 c = FETCH_CHAR (from_byte);
1784 code = SYNTAX (c);
1785 if (depth == min_depth)
1786 last_good = from;
1787 INC_BOTH (from, from_byte);
1788 UPDATE_SYNTAX_TABLE_FORWARD (from);
1789 if (from < stop && SYNTAX_COMSTART_FIRST (c)
1790 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from_byte))
1791 && parse_sexp_ignore_comments)
1792 {
1793 /* we have encountered a comment start sequence and we
1794 are ignoring all text inside comments. We must record
1795 the comment style this sequence begins so that later,
1796 only a comment end of the same style actually ends
1797 the comment section */
1798 code = Scomment;
1799 comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from_byte));
1800 INC_BOTH (from, from_byte);
1801 }
1802
1803 UPDATE_SYNTAX_TABLE_FORWARD (from);
1804 if (SYNTAX_PREFIX (c))
1805 continue;
1806
1807 switch (SWITCH_ENUM_CAST (code))
1808 {
1809 case Sescape:
1810 case Scharquote:
1811 if (from == stop) goto lose;
1812 INC_BOTH (from, from_byte);
1813 /* treat following character as a word constituent */
1814 case Sword:
1815 case Ssymbol:
1816 if (depth || !sexpflag) break;
1817 /* This word counts as a sexp; return at end of it. */
1818 while (from < stop)
1819 {
1820 UPDATE_SYNTAX_TABLE_FORWARD (from);
1821 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from_byte))))
1822 {
1823 case Scharquote:
1824 case Sescape:
1825 INC_BOTH (from, from_byte);
1826 if (from == stop) goto lose;
1827 break;
1828 case Sword:
1829 case Ssymbol:
1830 case Squote:
1831 break;
1832 default:
1833 goto done;
1834 }
1835 INC_BOTH (from, from_byte);
1836 }
1837 goto done;
1838
1839 case Scomment:
1840 case Scomment_fence:
1841 if (!parse_sexp_ignore_comments) break;
1842 while (1)
1843 {
1844 if (from == stop)
1845 {
1846 if (depth == 0)
1847 goto done;
1848 goto lose;
1849 }
1850 UPDATE_SYNTAX_TABLE_FORWARD (from);
1851 c = FETCH_CHAR (from_byte);
1852 if (code == Scomment
1853 ? (SYNTAX (c) == Sendcomment
1854 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1855 : (SYNTAX (c) == Scomment_fence))
1856 /* we have encountered a comment end of the same style
1857 as the comment sequence which began this comment
1858 section */
1859 break;
1860 INC_BOTH (from, from_byte);
1861 if (from < stop && SYNTAX_COMEND_FIRST (c)
1862 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from_byte))
1863 && SYNTAX_COMMENT_STYLE (c) == comstyle
1864 && code == Scomment)
1865 /* we have encountered a comment end of the same style
1866 as the comment sequence which began this comment
1867 section */
1868 {
1869 INC_BOTH (from, from_byte);
1870 break;
1871 }
1872 }
1873 break;
1874
1875 case Smath:
1876 if (!sexpflag)
1877 break;
1878 if (from != stop && c == FETCH_CHAR (from_byte))
1879 {
1880 INC_BOTH (from, from_byte);
1881 }
1882 if (mathexit)
1883 {
1884 mathexit = 0;
1885 goto close1;
1886 }
1887 mathexit = 1;
1888
1889 case Sopen:
1890 if (!++depth) goto done;
1891 break;
1892
1893 case Sclose:
1894 close1:
1895 if (!--depth) goto done;
1896 if (depth < min_depth)
1897 Fsignal (Qscan_error,
1898 Fcons (build_string ("Containing expression ends prematurely"),
1899 Fcons (make_number (last_good),
1900 Fcons (make_number (from), Qnil))));
1901 break;
1902
1903 case Sstring:
1904 case Sstring_fence:
1905 temp_pos = dec_bytepos (from_byte);
1906 stringterm = FETCH_CHAR (temp_pos);
1907 while (1)
1908 {
1909 if (from >= stop) goto lose;
1910 UPDATE_SYNTAX_TABLE_FORWARD (from);
1911 if (code == Sstring
1912 ? (FETCH_CHAR (from_byte) == stringterm)
1913 : SYNTAX (FETCH_CHAR (from_byte)) == Sstring_fence)
1914 break;
1915 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from_byte))))
1916 {
1917 case Scharquote:
1918 case Sescape:
1919 INC_BOTH (from, from_byte);
1920 }
1921 INC_BOTH (from, from_byte);
1922 }
1923 INC_BOTH (from, from_byte);
1924 if (!depth && sexpflag) goto done;
1925 break;
1926 }
1927 }
1928
1929 /* Reached end of buffer. Error if within object, return nil if between */
1930 if (depth) goto lose;
1931
1932 immediate_quit = 0;
1933 return Qnil;
1934
1935 /* End of object reached */
1936 done:
1937 count--;
1938 }
1939
1940 while (count < 0)
1941 {
1942 while (from > stop)
1943 {
1944 DEC_BOTH (from, from_byte);
1945 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1946 c = FETCH_CHAR (from_byte);
1947 code = SYNTAX (c);
1948 if (depth == min_depth)
1949 last_good = from;
1950 comstyle = 0;
1951 if (code == Sendcomment)
1952 comstyle = SYNTAX_COMMENT_STYLE (c);
1953 temp_pos = from_byte;
1954 if (! NILP (current_buffer->enable_multibyte_characters))
1955 DEC_POS (temp_pos);
1956 else
1957 temp_pos--;
1958 if (from > stop && SYNTAX_COMEND_SECOND (c)
1959 && (c1 = FETCH_CHAR (temp_pos), SYNTAX_COMEND_FIRST (c1))
1960 && parse_sexp_ignore_comments)
1961 {
1962 /* we must record the comment style encountered so that
1963 later, we can match only the proper comment begin
1964 sequence of the same style */
1965 code = Sendcomment;
1966 comstyle = SYNTAX_COMMENT_STYLE (c1);
1967 DEC_BOTH (from, from_byte);
1968 }
1969
1970 /* Quoting turns anything except a comment-ender
1971 into a word character. */
1972 if (code != Sendcomment && char_quoted (from, from_byte))
1973 code = Sword;
1974 else if (SYNTAX_PREFIX (c))
1975 continue;
1976
1977 switch (SWITCH_ENUM_CAST (code))
1978 {
1979 case Sword:
1980 case Ssymbol:
1981 case Sescape:
1982 case Scharquote:
1983 if (depth || !sexpflag) break;
1984 /* This word counts as a sexp; count object finished
1985 after passing it. */
1986 while (from > stop)
1987 {
1988 temp_pos = from_byte;
1989 if (! NILP (current_buffer->enable_multibyte_characters))
1990 DEC_POS (temp_pos);
1991 else
1992 temp_pos--;
1993 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
1994 c1 = FETCH_CHAR (temp_pos);
1995 temp_code = SYNTAX (c1);
1996 /* Don't allow comment-end to be quoted. */
1997 if (temp_code == Sendcomment)
1998 goto done2;
1999 quoted = char_quoted (from - 1, temp_pos);
2000 if (quoted)
2001 {
2002 DEC_BOTH (from, from_byte);
2003 temp_pos = dec_bytepos (temp_pos);
2004 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2005 }
2006 c1 = FETCH_CHAR (temp_pos);
2007 temp_code = SYNTAX (c1);
2008 if (! (quoted || temp_code == Sword
2009 || temp_code == Ssymbol
2010 || temp_code == Squote))
2011 goto done2;
2012 DEC_BOTH (from, from_byte);
2013 }
2014 goto done2;
2015
2016 case Smath:
2017 if (!sexpflag)
2018 break;
2019 temp_pos = dec_bytepos (from_byte);
2020 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2021 if (from != stop && c == FETCH_CHAR (temp_pos))
2022 DEC_BOTH (from, from_byte);
2023 if (mathexit)
2024 {
2025 mathexit = 0;
2026 goto open2;
2027 }
2028 mathexit = 1;
2029
2030 case Sclose:
2031 if (!++depth) goto done2;
2032 break;
2033
2034 case Sopen:
2035 open2:
2036 if (!--depth) goto done2;
2037 if (depth < min_depth)
2038 Fsignal (Qscan_error,
2039 Fcons (build_string ("Containing expression ends prematurely"),
2040 Fcons (make_number (last_good),
2041 Fcons (make_number (from), Qnil))));
2042 break;
2043
2044 case Sendcomment:
2045 if (!parse_sexp_ignore_comments)
2046 break;
2047 found = back_comment (from, from_byte, stop, comstyle,
2048 &out_charpos, &out_bytepos);
2049 if (found != -1)
2050 from = out_charpos, from_byte = out_bytepos;
2051 break;
2052
2053 case Scomment_fence:
2054 case Sstring_fence:
2055 while (1)
2056 {
2057 DEC_BOTH (from, from_byte);
2058 if (from == stop) goto lose;
2059 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2060 if (!char_quoted (from, from_byte)
2061 && SYNTAX (FETCH_CHAR (from_byte)) == code)
2062 break;
2063 }
2064 if (code == Sstring_fence && !depth && sexpflag) goto done2;
2065 break;
2066
2067 case Sstring:
2068 stringterm = FETCH_CHAR (from_byte);
2069 while (1)
2070 {
2071 if (from == stop) goto lose;
2072 temp_pos = from_byte;
2073 if (! NILP (current_buffer->enable_multibyte_characters))
2074 DEC_POS (temp_pos);
2075 else
2076 temp_pos--;
2077 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2078 if (!char_quoted (from - 1, temp_pos)
2079 && stringterm == FETCH_CHAR (temp_pos))
2080 break;
2081 DEC_BOTH (from, from_byte);
2082 }
2083 DEC_BOTH (from, from_byte);
2084 if (!depth && sexpflag) goto done2;
2085 break;
2086 }
2087 }
2088
2089 /* Reached start of buffer. Error if within object, return nil if between */
2090 if (depth) goto lose;
2091
2092 immediate_quit = 0;
2093 return Qnil;
2094
2095 done2:
2096 count++;
2097 }
2098
2099
2100 immediate_quit = 0;
2101 XSETFASTINT (val, from);
2102 return val;
2103
2104 lose:
2105 Fsignal (Qscan_error,
2106 Fcons (build_string ("Unbalanced parentheses"),
2107 Fcons (make_number (last_good),
2108 Fcons (make_number (from), Qnil))));
2109
2110 /* NOTREACHED */
2111 }
2112
2113 DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
2114 "Scan from character number FROM by COUNT lists.\n\
2115 Returns the character number of the position thus found.\n\
2116 \n\
2117 If DEPTH is nonzero, paren depth begins counting from that value,\n\
2118 only places where the depth in parentheses becomes zero\n\
2119 are candidates for stopping; COUNT such places are counted.\n\
2120 Thus, a positive value for DEPTH means go out levels.\n\
2121 \n\
2122 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
2123 \n\
2124 If the beginning or end of (the accessible part of) the buffer is reached\n\
2125 and the depth is wrong, an error is signaled.\n\
2126 If the depth is right but the count is not used up, nil is returned.")
2127 (from, count, depth)
2128 Lisp_Object from, count, depth;
2129 {
2130 CHECK_NUMBER (from, 0);
2131 CHECK_NUMBER (count, 1);
2132 CHECK_NUMBER (depth, 2);
2133
2134 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
2135 }
2136
2137 DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
2138 "Scan from character number FROM by COUNT balanced expressions.\n\
2139 If COUNT is negative, scan backwards.\n\
2140 Returns the character number of the position thus found.\n\
2141 \n\
2142 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
2143 \n\
2144 If the beginning or end of (the accessible part of) the buffer is reached\n\
2145 in the middle of a parenthetical grouping, an error is signaled.\n\
2146 If the beginning or end is reached between groupings\n\
2147 but before count is used up, nil is returned.")
2148 (from, count)
2149 Lisp_Object from, count;
2150 {
2151 CHECK_NUMBER (from, 0);
2152 CHECK_NUMBER (count, 1);
2153
2154 return scan_lists (XINT (from), XINT (count), 0, 1);
2155 }
2156
2157 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
2158 0, 0, 0,
2159 "Move point backward over any number of chars with prefix syntax.\n\
2160 This includes chars with \"quote\" or \"prefix\" syntax (' or p).")
2161 ()
2162 {
2163 int beg = BEGV;
2164 int opoint = PT;
2165 int opoint_byte = PT_BYTE;
2166 int pos = PT;
2167 int pos_byte = PT_BYTE;
2168 int c;
2169
2170 if (pos > beg)
2171 {
2172 SETUP_SYNTAX_TABLE (pos, -1);
2173 }
2174
2175 DEC_BOTH (pos, pos_byte);
2176
2177 while (pos + 1 > beg && !char_quoted (pos, pos_byte)
2178 /* Previous statement updates syntax table. */
2179 && ((c = FETCH_CHAR (pos_byte), SYNTAX (c) == Squote)
2180 || SYNTAX_PREFIX (c)))
2181 {
2182 DEC_BOTH (pos, pos_byte);
2183 }
2184
2185 SET_PT_BOTH (opoint, opoint_byte);
2186
2187 return Qnil;
2188 }
2189 \f
2190 /* Parse forward from FROM / FROM_BYTE to END,
2191 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
2192 and return a description of the state of the parse at END.
2193 If STOPBEFORE is nonzero, stop at the start of an atom.
2194 If COMMENTSTOP is 1, stop at the start of a comment.
2195 If COMMENTSTOP is -1, stop at the start or end of a comment,
2196 after the beginning of a string, or after the end of a string. */
2197
2198 static void
2199 scan_sexps_forward (stateptr, from, from_byte, end, targetdepth,
2200 stopbefore, oldstate, commentstop)
2201 struct lisp_parse_state *stateptr;
2202 register int from;
2203 int end, targetdepth, stopbefore;
2204 Lisp_Object oldstate;
2205 int commentstop;
2206 {
2207 struct lisp_parse_state state;
2208
2209 register enum syntaxcode code;
2210 struct level { int last, prev; };
2211 struct level levelstart[100];
2212 register struct level *curlevel = levelstart;
2213 struct level *endlevel = levelstart + 100;
2214 int prev;
2215 register int depth; /* Paren depth of current scanning location.
2216 level - levelstart equals this except
2217 when the depth becomes negative. */
2218 int mindepth; /* Lowest DEPTH value seen. */
2219 int start_quoted = 0; /* Nonzero means starting after a char quote */
2220 Lisp_Object tem;
2221 int prev_from; /* Keep one character before FROM. */
2222 int prev_from_byte;
2223 int boundary_stop = commentstop == -1;
2224 int nofence;
2225
2226 prev_from = from;
2227 prev_from_byte = from_byte;
2228 if (from != BEGV)
2229 DEC_BOTH (prev_from, prev_from_byte);
2230
2231 /* Use this macro instead of `from++'. */
2232 #define INC_FROM \
2233 do { prev_from = from; \
2234 prev_from_byte = from_byte; \
2235 INC_BOTH (from, from_byte); \
2236 } while (0)
2237
2238 immediate_quit = 1;
2239 QUIT;
2240
2241 SETUP_SYNTAX_TABLE (from, 1);
2242
2243 if (NILP (oldstate))
2244 {
2245 depth = 0;
2246 state.instring = -1;
2247 state.incomment = 0;
2248 state.comstyle = 0; /* comment style a by default. */
2249 state.comstr_start = -1; /* no comment/string seen. */
2250 }
2251 else
2252 {
2253 tem = Fcar (oldstate);
2254 if (!NILP (tem))
2255 depth = XINT (tem);
2256 else
2257 depth = 0;
2258
2259 oldstate = Fcdr (oldstate);
2260 oldstate = Fcdr (oldstate);
2261 oldstate = Fcdr (oldstate);
2262 tem = Fcar (oldstate);
2263 /* Check whether we are inside string_fence-style string: */
2264 state.instring = ( !NILP (tem)
2265 ? ( INTEGERP (tem) ? XINT (tem) : ST_STRING_STYLE)
2266 : -1);
2267
2268 oldstate = Fcdr (oldstate);
2269 tem = Fcar (oldstate);
2270 state.incomment = !NILP (tem);
2271
2272 oldstate = Fcdr (oldstate);
2273 tem = Fcar (oldstate);
2274 start_quoted = !NILP (tem);
2275
2276 /* if the eight element of the list is nil, we are in comment
2277 style a. If it is non-nil, we are in comment style b */
2278 oldstate = Fcdr (oldstate);
2279 oldstate = Fcdr (oldstate);
2280 tem = Fcar (oldstate);
2281 state.comstyle = NILP (tem) ? 0 : ( EQ (tem, Qsyntax_table)
2282 ? ST_COMMENT_STYLE : 1 );
2283
2284 oldstate = Fcdr (oldstate);
2285 tem = Fcar (oldstate);
2286 state.comstr_start = NILP (tem) ? -1 : XINT (tem) ;
2287 }
2288 state.quoted = 0;
2289 mindepth = depth;
2290
2291 curlevel->prev = -1;
2292 curlevel->last = -1;
2293
2294 /* Enter the loop at a place appropriate for initial state. */
2295
2296 if (state.incomment) goto startincomment;
2297 if (state.instring >= 0)
2298 {
2299 nofence = state.instring != ST_STRING_STYLE;
2300 if (start_quoted) goto startquotedinstring;
2301 goto startinstring;
2302 }
2303 if (start_quoted) goto startquoted;
2304
2305 while (from < end)
2306 {
2307 UPDATE_SYNTAX_TABLE_FORWARD (from);
2308 code = SYNTAX (FETCH_CHAR (from_byte));
2309 INC_FROM;
2310
2311 if (code == Scomment)
2312 state.comstr_start = prev_from;
2313 else if (code == Scomment_fence)
2314 {
2315 /* Record the comment style we have entered so that only
2316 the comment-end sequence of the same style actually
2317 terminates the comment section. */
2318 state.comstyle = ( code == Scomment_fence
2319 ? ST_COMMENT_STYLE
2320 : SYNTAX_COMMENT_STYLE (FETCH_CHAR (from_byte)));
2321 state.comstr_start = prev_from;
2322 if (code != Scomment_fence) INC_FROM;
2323 code = Scomment;
2324 }
2325 else if (from < end)
2326 if (SYNTAX_COMSTART_FIRST (FETCH_CHAR (prev_from_byte)))
2327 if (SYNTAX_COMSTART_SECOND (FETCH_CHAR (from_byte)))
2328 /* Duplicate code to avoid a very complex if-expression
2329 which causes trouble for the SGI compiler. */
2330 {
2331 /* Record the comment style we have entered so that only
2332 the comment-end sequence of the same style actually
2333 terminates the comment section. */
2334 state.comstyle = ( code == Scomment_fence
2335 ? ST_COMMENT_STYLE
2336 : SYNTAX_COMMENT_STYLE (FETCH_CHAR (from_byte)));
2337 state.comstr_start = prev_from;
2338 if (code != Scomment_fence) INC_FROM;
2339 code = Scomment;
2340 }
2341
2342 if (SYNTAX_PREFIX (FETCH_CHAR (prev_from_byte)))
2343 continue;
2344 switch (SWITCH_ENUM_CAST (code))
2345 {
2346 case Sescape:
2347 case Scharquote:
2348 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2349 curlevel->last = prev_from;
2350 startquoted:
2351 if (from == end) goto endquoted;
2352 INC_FROM;
2353 goto symstarted;
2354 /* treat following character as a word constituent */
2355 case Sword:
2356 case Ssymbol:
2357 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2358 curlevel->last = prev_from;
2359 symstarted:
2360 while (from < end)
2361 {
2362 UPDATE_SYNTAX_TABLE_FORWARD (from);
2363 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from_byte))))
2364 {
2365 case Scharquote:
2366 case Sescape:
2367 INC_FROM;
2368 if (from == end) goto endquoted;
2369 break;
2370 case Sword:
2371 case Ssymbol:
2372 case Squote:
2373 break;
2374 default:
2375 goto symdone;
2376 }
2377 INC_FROM;
2378 }
2379 symdone:
2380 curlevel->prev = curlevel->last;
2381 break;
2382
2383 startincomment:
2384 if (commentstop == 1)
2385 goto done;
2386 if (from != BEGV)
2387 {
2388 /* Enter the loop in the middle so that we find
2389 a 2-char comment ender if we start in the middle of it. */
2390 prev = FETCH_CHAR (prev_from_byte);
2391 goto startincomment_1;
2392 }
2393 /* At beginning of buffer, enter the loop the ordinary way. */
2394 state.incomment = 1;
2395 goto commentloop;
2396
2397 case Scomment:
2398 state.incomment = 1;
2399 if (commentstop || boundary_stop) goto done;
2400 commentloop:
2401 while (1)
2402 {
2403 if (from == end) goto done;
2404 UPDATE_SYNTAX_TABLE_FORWARD (from);
2405 prev = FETCH_CHAR (from_byte);
2406 if (SYNTAX (prev) == Sendcomment
2407 && SYNTAX_COMMENT_STYLE (prev) == state.comstyle)
2408 /* Only terminate the comment section if the endcomment
2409 of the same style as the start sequence has been
2410 encountered. */
2411 break;
2412 if (state.comstyle == ST_COMMENT_STYLE
2413 && SYNTAX (prev) == Scomment_fence)
2414 break;
2415 INC_FROM;
2416 startincomment_1:
2417 if (from < end && SYNTAX_COMEND_FIRST (prev)
2418 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from_byte))
2419 && SYNTAX_COMMENT_STYLE (prev) == state.comstyle)
2420 /* Only terminate the comment section if the end-comment
2421 sequence of the same style as the start sequence has
2422 been encountered. */
2423 break;
2424 }
2425 INC_FROM;
2426 state.incomment = 0;
2427 state.comstyle = 0; /* reset the comment style */
2428 if (boundary_stop) goto done;
2429 break;
2430
2431 case Sopen:
2432 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2433 depth++;
2434 /* curlevel++->last ran into compiler bug on Apollo */
2435 curlevel->last = prev_from;
2436 if (++curlevel == endlevel)
2437 error ("Nesting too deep for parser");
2438 curlevel->prev = -1;
2439 curlevel->last = -1;
2440 if (targetdepth == depth) goto done;
2441 break;
2442
2443 case Sclose:
2444 depth--;
2445 if (depth < mindepth)
2446 mindepth = depth;
2447 if (curlevel != levelstart)
2448 curlevel--;
2449 curlevel->prev = curlevel->last;
2450 if (targetdepth == depth) goto done;
2451 break;
2452
2453 case Sstring:
2454 case Sstring_fence:
2455 state.comstr_start = from - 1;
2456 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2457 curlevel->last = prev_from;
2458 state.instring = (code == Sstring
2459 ? (FETCH_CHAR (prev_from_byte))
2460 : ST_STRING_STYLE);
2461 if (boundary_stop) goto done;
2462 startinstring:
2463 {
2464 nofence = state.instring != ST_STRING_STYLE;
2465
2466 while (1)
2467 {
2468 int c;
2469
2470 if (from >= end) goto done;
2471 c = FETCH_CHAR (from_byte);
2472 if (nofence && c == state.instring) break;
2473 UPDATE_SYNTAX_TABLE_FORWARD (from);
2474 switch (SWITCH_ENUM_CAST (SYNTAX (c)))
2475 {
2476 case Sstring_fence:
2477 if (!nofence) goto string_end;
2478 break;
2479 case Scharquote:
2480 case Sescape:
2481 INC_FROM;
2482 startquotedinstring:
2483 if (from >= end) goto endquoted;
2484 }
2485 INC_FROM;
2486 }
2487 }
2488 string_end:
2489 state.instring = -1;
2490 curlevel->prev = curlevel->last;
2491 INC_FROM;
2492 if (boundary_stop) goto done;
2493 break;
2494
2495 case Smath:
2496 break;
2497 }
2498 }
2499 goto done;
2500
2501 stop: /* Here if stopping before start of sexp. */
2502 from = prev_from; /* We have just fetched the char that starts it; */
2503 goto done; /* but return the position before it. */
2504
2505 endquoted:
2506 state.quoted = 1;
2507 done:
2508 state.depth = depth;
2509 state.mindepth = mindepth;
2510 state.thislevelstart = curlevel->prev;
2511 state.prevlevelstart
2512 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
2513 state.location = from;
2514 immediate_quit = 0;
2515
2516 *stateptr = state;
2517 }
2518
2519 /* This comment supplies the doc string for parse-partial-sexp,
2520 for make-docfile to see. We cannot put this in the real DEFUN
2521 due to limits in the Unix cpp.
2522
2523 DEFUN ("parse-partial-sexp", Ffoo, Sfoo, 2, 6, 0,
2524 "Parse Lisp syntax starting at FROM until TO; return status of parse at TO.\n\
2525 Parsing stops at TO or when certain criteria are met;\n\
2526 point is set to where parsing stops.\n\
2527 If fifth arg STATE is omitted or nil,\n\
2528 parsing assumes that FROM is the beginning of a function.\n\
2529 Value is a list of nine elements describing final state of parsing:\n\
2530 0. depth in parens.\n\
2531 1. character address of start of innermost containing list; nil if none.\n\
2532 2. character address of start of last complete sexp terminated.\n\
2533 3. non-nil if inside a string.\n\
2534 (it is the character that will terminate the string,\n\
2535 or t if the string should be terminated by a generic string delimiter.)\n\
2536 4. t if inside a comment.\n\
2537 5. t if following a quote character.\n\
2538 6. the minimum paren-depth encountered during this scan.\n\
2539 7. t if in a comment of style b; `syntax-table' if the comment\n\
2540 should be terminated by a generic comment delimiter.\n\
2541 8. character address of start of comment or string; nil if not in one.\n\
2542 If third arg TARGETDEPTH is non-nil, parsing stops if the depth\n\
2543 in parentheses becomes equal to TARGETDEPTH.\n\
2544 Fourth arg STOPBEFORE non-nil means stop when come to\n\
2545 any character that starts a sexp.\n\
2546 Fifth arg STATE is a nine-element list like what this function returns.\n\
2547 It is used to initialize the state of the parse. Elements number 1, 2, 6\n\
2548 and 8 are ignored; you can leave off element 8 (the last) entirely.\n\
2549 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.\n\
2550 If it is `syntax-table', stop after the start of a comment or a string,\n\
2551 or after end of a comment or a string.")
2552 (from, to, targetdepth, stopbefore, state, commentstop)
2553 */
2554
2555 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
2556 0 /* See immediately above */)
2557 (from, to, targetdepth, stopbefore, oldstate, commentstop)
2558 Lisp_Object from, to, targetdepth, stopbefore, oldstate, commentstop;
2559 {
2560 struct lisp_parse_state state;
2561 int target;
2562
2563 if (!NILP (targetdepth))
2564 {
2565 CHECK_NUMBER (targetdepth, 3);
2566 target = XINT (targetdepth);
2567 }
2568 else
2569 target = -100000; /* We won't reach this depth */
2570
2571 validate_region (&from, &to);
2572 scan_sexps_forward (&state, XINT (from), CHAR_TO_BYTE (XINT (from)),
2573 XINT (to),
2574 target, !NILP (stopbefore), oldstate,
2575 (NILP (commentstop)
2576 ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
2577
2578 SET_PT (state.location);
2579
2580 return Fcons (make_number (state.depth),
2581 Fcons (state.prevlevelstart < 0 ? Qnil : make_number (state.prevlevelstart),
2582 Fcons (state.thislevelstart < 0 ? Qnil : make_number (state.thislevelstart),
2583 Fcons (state.instring >= 0
2584 ? (state.instring == ST_STRING_STYLE
2585 ? Qt : make_number (state.instring)) : Qnil,
2586 Fcons (state.incomment ? Qt : Qnil,
2587 Fcons (state.quoted ? Qt : Qnil,
2588 Fcons (make_number (state.mindepth),
2589 Fcons ((state.comstyle
2590 ? (state.comstyle == ST_COMMENT_STYLE
2591 ? Qsyntax_table : Qt) :
2592 Qnil),
2593 Fcons ((state.incomment || state.instring
2594 ? make_number (state.comstr_start)
2595 : Qnil),
2596 Qnil)))))))));
2597 }
2598 \f
2599 init_syntax_once ()
2600 {
2601 register int i, c;
2602 Lisp_Object temp;
2603
2604 /* This has to be done here, before we call Fmake_char_table. */
2605 Qsyntax_table = intern ("syntax-table");
2606 staticpro (&Qsyntax_table);
2607
2608 /* Intern this now in case it isn't already done.
2609 Setting this variable twice is harmless.
2610 But don't staticpro it here--that is done in alloc.c. */
2611 Qchar_table_extra_slots = intern ("char-table-extra-slots");
2612
2613 /* Create objects which can be shared among syntax tables. */
2614 Vsyntax_code_object = Fmake_vector (make_number (13), Qnil);
2615 for (i = 0; i < XVECTOR (Vsyntax_code_object)->size; i++)
2616 XVECTOR (Vsyntax_code_object)->contents[i]
2617 = Fcons (make_number (i), Qnil);
2618
2619 /* Now we are ready to set up this property, so we can
2620 create syntax tables. */
2621 Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
2622
2623 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
2624
2625 Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
2626
2627 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
2628 for (i = 'a'; i <= 'z'; i++)
2629 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
2630 for (i = 'A'; i <= 'Z'; i++)
2631 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
2632 for (i = '0'; i <= '9'; i++)
2633 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
2634
2635 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
2636 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
2637
2638 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
2639 Fcons (make_number (Sopen), make_number (')')));
2640 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
2641 Fcons (make_number (Sclose), make_number ('(')));
2642 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
2643 Fcons (make_number (Sopen), make_number (']')));
2644 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
2645 Fcons (make_number (Sclose), make_number ('[')));
2646 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
2647 Fcons (make_number (Sopen), make_number ('}')));
2648 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
2649 Fcons (make_number (Sclose), make_number ('{')));
2650 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
2651 Fcons (make_number ((int) Sstring), Qnil));
2652 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
2653 Fcons (make_number ((int) Sescape), Qnil));
2654
2655 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Ssymbol];
2656 for (i = 0; i < 10; i++)
2657 {
2658 c = "_-+*/&|<>="[i];
2659 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
2660 }
2661
2662 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
2663 for (i = 0; i < 12; i++)
2664 {
2665 c = ".,;:?!#@~^'`"[i];
2666 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
2667 }
2668 }
2669
2670 syms_of_syntax ()
2671 {
2672 Qsyntax_table_p = intern ("syntax-table-p");
2673 staticpro (&Qsyntax_table_p);
2674
2675 staticpro (&Vsyntax_code_object);
2676
2677 Qscan_error = intern ("scan-error");
2678 staticpro (&Qscan_error);
2679 Fput (Qscan_error, Qerror_conditions,
2680 Fcons (Qerror, Qnil));
2681 Fput (Qscan_error, Qerror_message,
2682 build_string ("Scan error"));
2683
2684 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments,
2685 "Non-nil means `forward-sexp', etc., should treat comments as whitespace.");
2686
2687 DEFVAR_BOOL ("parse-sexp-lookup-properties", &parse_sexp_lookup_properties,
2688 "Non-nil means `forward-sexp', etc., grant `syntax-table' property.\n\
2689 The value of this property should be either a syntax table, or a cons\n\
2690 of the form (SYNTAXCODE . MATCHCHAR), SYNTAXCODE being the numeric\n\
2691 syntax code, MATCHCHAR being nil or the character to match (which is\n\
2692 relevant only for open/close type.");
2693
2694 words_include_escapes = 0;
2695 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes,
2696 "Non-nil means `forward-word', etc., should treat escape chars part of words.");
2697
2698 defsubr (&Ssyntax_table_p);
2699 defsubr (&Ssyntax_table);
2700 defsubr (&Sstandard_syntax_table);
2701 defsubr (&Scopy_syntax_table);
2702 defsubr (&Sset_syntax_table);
2703 defsubr (&Schar_syntax);
2704 defsubr (&Smatching_paren);
2705 defsubr (&Smodify_syntax_entry);
2706 defsubr (&Sdescribe_syntax);
2707
2708 defsubr (&Sforward_word);
2709
2710 defsubr (&Sskip_chars_forward);
2711 defsubr (&Sskip_chars_backward);
2712 defsubr (&Sskip_syntax_forward);
2713 defsubr (&Sskip_syntax_backward);
2714
2715 defsubr (&Sforward_comment);
2716 defsubr (&Sscan_lists);
2717 defsubr (&Sscan_sexps);
2718 defsubr (&Sbackward_prefix_chars);
2719 defsubr (&Sparse_partial_sexp);
2720 }