Sync to HEAD
[bpt/emacs.git] / src / syntax.c
1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985, 87, 93, 94, 95, 97, 1998, 1999, 2004 Free Software Foundation, Inc.
3
4 This file is part of GNU Emacs.
5
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
20
21
22 #include <config.h>
23 #include <ctype.h>
24 #include "lisp.h"
25 #include "commands.h"
26 #include "buffer.h"
27 #include "character.h"
28 #include "keymap.h"
29
30 /* Make syntax table lookup grant data in gl_state. */
31 #define SYNTAX_ENTRY_VIA_PROPERTY
32
33 #include "syntax.h"
34 #include "intervals.h"
35
36 /* We use these constants in place for comment-style and
37 string-ender-char to distinguish comments/strings started by
38 comment_fence and string_fence codes. */
39
40 #define ST_COMMENT_STYLE (256 + 1)
41 #define ST_STRING_STYLE (256 + 2)
42 #include "category.h"
43
44 Lisp_Object Qsyntax_table_p, Qsyntax_table, Qscan_error;
45
46 int words_include_escapes;
47 int parse_sexp_lookup_properties;
48
49 /* Nonzero means `scan-sexps' treat all multibyte characters as symbol. */
50 int multibyte_syntax_as_symbol;
51
52 /* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
53 if not compiled with GCC. No need to mark it, since it is used
54 only very temporarily. */
55 Lisp_Object syntax_temp;
56
57 /* Non-zero means an open parenthesis in column 0 is always considered
58 to be the start of a defun. Zero means an open parenthesis in
59 column 0 has no special meaning. */
60
61 int open_paren_in_column_0_is_defun_start;
62
63 /* This is the internal form of the parse state used in parse-partial-sexp. */
64
65 struct lisp_parse_state
66 {
67 int depth; /* Depth at end of parsing. */
68 int instring; /* -1 if not within string, else desired terminator. */
69 int incomment; /* -1 if in unnestable comment else comment nesting */
70 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
71 int quoted; /* Nonzero if just after an escape char at end of parsing */
72 int thislevelstart; /* Char number of most recent start-of-expression at current level */
73 int prevlevelstart; /* Char number of start of containing expression */
74 int location; /* Char number at which parsing stopped. */
75 int mindepth; /* Minimum depth seen while scanning. */
76 int comstr_start; /* Position just after last comment/string starter. */
77 Lisp_Object levelstarts; /* Char numbers of starts-of-expression
78 of levels (starting from outermost). */
79 };
80 \f
81 /* These variables are a cache for finding the start of a defun.
82 find_start_pos is the place for which the defun start was found.
83 find_start_value is the defun start position found for it.
84 find_start_value_byte is the corresponding byte position.
85 find_start_buffer is the buffer it was found in.
86 find_start_begv is the BEGV value when it was found.
87 find_start_modiff is the value of MODIFF when it was found. */
88
89 static int find_start_pos;
90 static int find_start_value;
91 static int find_start_value_byte;
92 static struct buffer *find_start_buffer;
93 static int find_start_begv;
94 static int find_start_modiff;
95
96
97 static int find_defun_start P_ ((int, int));
98 static int back_comment P_ ((int, int, int, int, int, int *, int *));
99 static int char_quoted P_ ((int, int));
100 static Lisp_Object skip_chars P_ ((int, Lisp_Object, Lisp_Object));
101 static Lisp_Object skip_syntaxes P_ ((int, Lisp_Object, Lisp_Object));
102 static Lisp_Object scan_lists P_ ((int, int, int, int));
103 static void scan_sexps_forward P_ ((struct lisp_parse_state *,
104 int, int, int, int,
105 int, Lisp_Object, int));
106 \f
107
108 struct gl_state_s gl_state; /* Global state of syntax parser. */
109
110 INTERVAL interval_of ();
111 #define INTERVALS_AT_ONCE 10 /* 1 + max-number of intervals
112 to scan to property-change. */
113
114 /* Update gl_state to an appropriate interval which contains CHARPOS. The
115 sign of COUNT give the relative position of CHARPOS wrt the previously
116 valid interval. If INIT, only [be]_property fields of gl_state are
117 valid at start, the rest is filled basing on OBJECT.
118
119 `gl_state.*_i' are the intervals, and CHARPOS is further in the search
120 direction than the intervals - or in an interval. We update the
121 current syntax-table basing on the property of this interval, and
122 update the interval to start further than CHARPOS - or be
123 NULL_INTERVAL. We also update lim_property to be the next value of
124 charpos to call this subroutine again - or be before/after the
125 start/end of OBJECT. */
126
127 void
128 update_syntax_table (charpos, count, init, object)
129 int charpos, count, init;
130 Lisp_Object object;
131 {
132 Lisp_Object tmp_table;
133 int cnt = 0, invalidate = 1;
134 INTERVAL i, oldi;
135
136 if (init)
137 {
138 gl_state.old_prop = Qnil;
139 gl_state.start = gl_state.b_property;
140 gl_state.stop = gl_state.e_property;
141 i = interval_of (charpos, object);
142 gl_state.backward_i = gl_state.forward_i = i;
143 invalidate = 0;
144 if (NULL_INTERVAL_P (i))
145 return;
146 /* interval_of updates only ->position of the return value, so
147 update the parents manually to speed up update_interval. */
148 while (!NULL_PARENT (i))
149 {
150 if (AM_RIGHT_CHILD (i))
151 INTERVAL_PARENT (i)->position = i->position
152 - LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */
153 - TOTAL_LENGTH (INTERVAL_PARENT (i))
154 + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i));
155 else
156 INTERVAL_PARENT (i)->position = i->position - LEFT_TOTAL_LENGTH (i)
157 + TOTAL_LENGTH (i);
158 i = INTERVAL_PARENT (i);
159 }
160 i = gl_state.forward_i;
161 gl_state.b_property = i->position - gl_state.offset;
162 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
163 goto update;
164 }
165 oldi = i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
166
167 /* We are guaranteed to be called with CHARPOS either in i,
168 or further off. */
169 if (NULL_INTERVAL_P (i))
170 error ("Error in syntax_table logic for to-the-end intervals");
171 else if (charpos < i->position) /* Move left. */
172 {
173 if (count > 0)
174 error ("Error in syntax_table logic for intervals <-");
175 /* Update the interval. */
176 i = update_interval (i, charpos);
177 if (INTERVAL_LAST_POS (i) != gl_state.b_property)
178 {
179 invalidate = 0;
180 gl_state.forward_i = i;
181 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
182 }
183 }
184 else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right. */
185 {
186 if (count < 0)
187 error ("Error in syntax_table logic for intervals ->");
188 /* Update the interval. */
189 i = update_interval (i, charpos);
190 if (i->position != gl_state.e_property)
191 {
192 invalidate = 0;
193 gl_state.backward_i = i;
194 gl_state.b_property = i->position - gl_state.offset;
195 }
196 }
197
198 update:
199 tmp_table = textget (i->plist, Qsyntax_table);
200
201 if (invalidate)
202 invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
203
204 if (invalidate) /* Did not get to adjacent interval. */
205 { /* with the same table => */
206 /* invalidate the old range. */
207 if (count > 0)
208 {
209 gl_state.backward_i = i;
210 gl_state.b_property = i->position - gl_state.offset;
211 }
212 else
213 {
214 gl_state.forward_i = i;
215 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
216 }
217 }
218
219 if (!EQ (tmp_table, gl_state.old_prop))
220 {
221 gl_state.current_syntax_table = tmp_table;
222 gl_state.old_prop = tmp_table;
223 if (EQ (Fsyntax_table_p (tmp_table), Qt))
224 {
225 gl_state.use_global = 0;
226 }
227 else if (CONSP (tmp_table))
228 {
229 gl_state.use_global = 1;
230 gl_state.global_code = tmp_table;
231 }
232 else
233 {
234 gl_state.use_global = 0;
235 gl_state.current_syntax_table = current_buffer->syntax_table;
236 }
237 }
238
239 while (!NULL_INTERVAL_P (i))
240 {
241 if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
242 {
243 if (count > 0)
244 {
245 gl_state.e_property = i->position - gl_state.offset;
246 gl_state.forward_i = i;
247 }
248 else
249 {
250 gl_state.b_property = i->position + LENGTH (i) - gl_state.offset;
251 gl_state.backward_i = i;
252 }
253 return;
254 }
255 else if (cnt == INTERVALS_AT_ONCE)
256 {
257 if (count > 0)
258 {
259 gl_state.e_property = i->position + LENGTH (i) - gl_state.offset;
260 gl_state.forward_i = i;
261 }
262 else
263 {
264 gl_state.b_property = i->position - gl_state.offset;
265 gl_state.backward_i = i;
266 }
267 return;
268 }
269 cnt++;
270 i = count > 0 ? next_interval (i) : previous_interval (i);
271 }
272 eassert (NULL_INTERVAL_P (i)); /* This property goes to the end. */
273 if (count > 0)
274 gl_state.e_property = gl_state.stop;
275 else
276 gl_state.b_property = gl_state.start;
277 }
278 \f
279 /* Returns TRUE if char at CHARPOS is quoted.
280 Global syntax-table data should be set up already to be good at CHARPOS
281 or after. On return global syntax data is good for lookup at CHARPOS. */
282
283 static int
284 char_quoted (charpos, bytepos)
285 register int charpos, bytepos;
286 {
287 register enum syntaxcode code;
288 register int beg = BEGV;
289 register int quoted = 0;
290 int orig = charpos;
291
292 DEC_BOTH (charpos, bytepos);
293
294 while (bytepos >= beg)
295 {
296 UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
297 code = SYNTAX (FETCH_CHAR_AS_MULTIBYTE (bytepos));
298 if (! (code == Scharquote || code == Sescape))
299 break;
300
301 DEC_BOTH (charpos, bytepos);
302 quoted = !quoted;
303 }
304
305 UPDATE_SYNTAX_TABLE (orig);
306 return quoted;
307 }
308
309 /* Return the bytepos one character after BYTEPOS.
310 We assume that BYTEPOS is not at the end of the buffer. */
311
312 INLINE int
313 inc_bytepos (bytepos)
314 int bytepos;
315 {
316 if (NILP (current_buffer->enable_multibyte_characters))
317 return bytepos + 1;
318
319 INC_POS (bytepos);
320 return bytepos;
321 }
322
323 /* Return the bytepos one character before BYTEPOS.
324 We assume that BYTEPOS is not at the start of the buffer. */
325
326 INLINE int
327 dec_bytepos (bytepos)
328 int bytepos;
329 {
330 if (NILP (current_buffer->enable_multibyte_characters))
331 return bytepos - 1;
332
333 DEC_POS (bytepos);
334 return bytepos;
335 }
336 \f
337 /* Return a defun-start position before before POS and not too far before.
338 It should be the last one before POS, or nearly the last.
339
340 When open_paren_in_column_0_is_defun_start is nonzero,
341 only the beginning of the buffer is treated as a defun-start.
342
343 We record the information about where the scan started
344 and what its result was, so that another call in the same area
345 can return the same value very quickly.
346
347 There is no promise at which position the global syntax data is
348 valid on return from the subroutine, so the caller should explicitly
349 update the global data. */
350
351 static int
352 find_defun_start (pos, pos_byte)
353 int pos, pos_byte;
354 {
355 int opoint = PT, opoint_byte = PT_BYTE;
356
357 if (!open_paren_in_column_0_is_defun_start)
358 {
359 find_start_value_byte = BEGV_BYTE;
360 return BEGV;
361 }
362
363 /* Use previous finding, if it's valid and applies to this inquiry. */
364 if (current_buffer == find_start_buffer
365 /* Reuse the defun-start even if POS is a little farther on.
366 POS might be in the next defun, but that's ok.
367 Our value may not be the best possible, but will still be usable. */
368 && pos <= find_start_pos + 1000
369 && pos >= find_start_value
370 && BEGV == find_start_begv
371 && MODIFF == find_start_modiff)
372 return find_start_value;
373
374 /* Back up to start of line. */
375 scan_newline (pos, pos_byte, BEGV, BEGV_BYTE, -1, 1);
376
377 /* We optimize syntax-table lookup for rare updates. Thus we accept
378 only those `^\s(' which are good in global _and_ text-property
379 syntax-tables. */
380 gl_state.current_syntax_table = current_buffer->syntax_table;
381 gl_state.use_global = 0;
382 while (PT > BEGV)
383 {
384 /* Open-paren at start of line means we may have found our
385 defun-start. */
386 if (SYNTAX (FETCH_CHAR_AS_MULTIBYTE (PT_BYTE)) == Sopen)
387 {
388 SETUP_SYNTAX_TABLE (PT + 1, -1); /* Try again... */
389 if (SYNTAX (FETCH_CHAR_AS_MULTIBYTE (PT_BYTE)) == Sopen)
390 break;
391 /* Now fallback to the default value. */
392 gl_state.current_syntax_table = current_buffer->syntax_table;
393 gl_state.use_global = 0;
394 }
395 /* Move to beg of previous line. */
396 scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
397 }
398
399 /* Record what we found, for the next try. */
400 find_start_value = PT;
401 find_start_value_byte = PT_BYTE;
402 find_start_buffer = current_buffer;
403 find_start_modiff = MODIFF;
404 find_start_begv = BEGV;
405 find_start_pos = pos;
406
407 TEMP_SET_PT_BOTH (opoint, opoint_byte);
408
409 return find_start_value;
410 }
411 \f
412 /* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE. */
413
414 static int
415 prev_char_comend_first (pos, pos_byte)
416 int pos, pos_byte;
417 {
418 int c, val;
419
420 DEC_BOTH (pos, pos_byte);
421 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
422 c = FETCH_CHAR (pos_byte);
423 val = SYNTAX_COMEND_FIRST (c);
424 UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
425 return val;
426 }
427
428 /* Return the SYNTAX_COMSTART_FIRST of the character before POS, POS_BYTE. */
429
430 /* static int
431 * prev_char_comstart_first (pos, pos_byte)
432 * int pos, pos_byte;
433 * {
434 * int c, val;
435 *
436 * DEC_BOTH (pos, pos_byte);
437 * UPDATE_SYNTAX_TABLE_BACKWARD (pos);
438 * c = FETCH_CHAR (pos_byte);
439 * val = SYNTAX_COMSTART_FIRST (c);
440 * UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
441 * return val;
442 * } */
443
444 /* Checks whether charpos FROM is at the end of a comment.
445 FROM_BYTE is the bytepos corresponding to FROM.
446 Do not move back before STOP.
447
448 Return a positive value if we find a comment ending at FROM/FROM_BYTE;
449 return -1 otherwise.
450
451 If successful, store the charpos of the comment's beginning
452 into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
453
454 Global syntax data remains valid for backward search starting at
455 the returned value (or at FROM, if the search was not successful). */
456
457 static int
458 back_comment (from, from_byte, stop, comnested, comstyle, charpos_ptr, bytepos_ptr)
459 int from, from_byte, stop;
460 int comnested, comstyle;
461 int *charpos_ptr, *bytepos_ptr;
462 {
463 /* Look back, counting the parity of string-quotes,
464 and recording the comment-starters seen.
465 When we reach a safe place, assume that's not in a string;
466 then step the main scan to the earliest comment-starter seen
467 an even number of string quotes away from the safe place.
468
469 OFROM[I] is position of the earliest comment-starter seen
470 which is I+2X quotes from the comment-end.
471 PARITY is current parity of quotes from the comment end. */
472 int string_style = -1; /* Presumed outside of any string. */
473 int string_lossage = 0;
474 /* Not a real lossage: indicates that we have passed a matching comment
475 starter plus a non-matching comment-ender, meaning that any matching
476 comment-starter we might see later could be a false positive (hidden
477 inside another comment).
478 Test case: { a (* b } c (* d *) */
479 int comment_lossage = 0;
480 int comment_end = from;
481 int comment_end_byte = from_byte;
482 int comstart_pos = 0;
483 int comstart_byte;
484 /* Place where the containing defun starts,
485 or 0 if we didn't come across it yet. */
486 int defun_start = 0;
487 int defun_start_byte = 0;
488 register enum syntaxcode code;
489 int nesting = 1; /* current comment nesting */
490 int c;
491 int syntax = 0;
492
493 /* FIXME: A }} comment-ender style leads to incorrect behavior
494 in the case of {{ c }}} because we ignore the last two chars which are
495 assumed to be comment-enders although they aren't. */
496
497 /* At beginning of range to scan, we're outside of strings;
498 that determines quote parity to the comment-end. */
499 while (from != stop)
500 {
501 int temp_byte, prev_syntax;
502 int com2start, com2end;
503
504 /* Move back and examine a character. */
505 DEC_BOTH (from, from_byte);
506 UPDATE_SYNTAX_TABLE_BACKWARD (from);
507
508 prev_syntax = syntax;
509 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
510 syntax = SYNTAX_WITH_FLAGS (c);
511 code = SYNTAX (c);
512
513 /* Check for 2-char comment markers. */
514 com2start = (SYNTAX_FLAGS_COMSTART_FIRST (syntax)
515 && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax)
516 && comstyle == SYNTAX_FLAGS_COMMENT_STYLE (prev_syntax)
517 && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax)
518 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested);
519 com2end = (SYNTAX_FLAGS_COMEND_FIRST (syntax)
520 && SYNTAX_FLAGS_COMEND_SECOND (prev_syntax));
521
522 /* Nasty cases with overlapping 2-char comment markers:
523 - snmp-mode: -- c -- foo -- c --
524 --- c --
525 ------ c --
526 - c-mode: *||*
527 |* *|* *|
528 |*| |* |*|
529 /// */
530
531 /* If a 2-char comment sequence partly overlaps with another,
532 we don't try to be clever. */
533 if (from > stop && (com2end || com2start))
534 {
535 int next = from, next_byte = from_byte, next_c, next_syntax;
536 DEC_BOTH (next, next_byte);
537 UPDATE_SYNTAX_TABLE_BACKWARD (next);
538 next_c = FETCH_CHAR_AS_MULTIBYTE (next_byte);
539 next_syntax = SYNTAX_WITH_FLAGS (next_c);
540 if (((com2start || comnested)
541 && SYNTAX_FLAGS_COMEND_SECOND (syntax)
542 && SYNTAX_FLAGS_COMEND_FIRST (next_syntax))
543 || ((com2end || comnested)
544 && SYNTAX_FLAGS_COMSTART_SECOND (syntax)
545 && comstyle == SYNTAX_FLAGS_COMMENT_STYLE (syntax)
546 && SYNTAX_FLAGS_COMSTART_FIRST (next_syntax)))
547 goto lossage;
548 /* UPDATE_SYNTAX_TABLE_FORWARD (next + 1); */
549 }
550
551 if (com2start && comstart_pos == 0)
552 /* We're looking at a comment starter. But it might be a comment
553 ender as well (see snmp-mode). The first time we see one, we
554 need to consider it as a comment starter,
555 and the subsequent times as a comment ender. */
556 com2end = 0;
557
558 /* Turn a 2-char comment sequences into the appropriate syntax. */
559 if (com2end)
560 code = Sendcomment;
561 else if (com2start)
562 code = Scomment;
563 /* Ignore comment starters of a different style. */
564 else if (code == Scomment
565 && (comstyle != SYNTAX_FLAGS_COMMENT_STYLE (syntax)
566 || SYNTAX_FLAGS_COMMENT_NESTED (syntax) != comnested))
567 continue;
568
569 /* Ignore escaped characters, except comment-enders. */
570 if (code != Sendcomment && char_quoted (from, from_byte))
571 continue;
572
573 switch (code)
574 {
575 case Sstring_fence:
576 case Scomment_fence:
577 c = (code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE);
578 case Sstring:
579 /* Track parity of quotes. */
580 if (string_style == -1)
581 /* Entering a string. */
582 string_style = c;
583 else if (string_style == c)
584 /* Leaving the string. */
585 string_style = -1;
586 else
587 /* If we have two kinds of string delimiters.
588 There's no way to grok this scanning backwards. */
589 string_lossage = 1;
590 break;
591
592 case Scomment:
593 /* We've already checked that it is the relevant comstyle. */
594 if (string_style != -1 || comment_lossage || string_lossage)
595 /* There are odd string quotes involved, so let's be careful.
596 Test case in Pascal: " { " a { " } */
597 goto lossage;
598
599 if (!comnested)
600 {
601 /* Record best comment-starter so far. */
602 comstart_pos = from;
603 comstart_byte = from_byte;
604 }
605 else if (--nesting <= 0)
606 /* nested comments have to be balanced, so we don't need to
607 keep looking for earlier ones. We use here the same (slightly
608 incorrect) reasoning as below: since it is followed by uniform
609 paired string quotes, this comment-start has to be outside of
610 strings, else the comment-end itself would be inside a string. */
611 goto done;
612 break;
613
614 case Sendcomment:
615 if (SYNTAX_FLAGS_COMMENT_STYLE (syntax) == comstyle
616 && ((com2end && SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax))
617 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested)
618 /* This is the same style of comment ender as ours. */
619 {
620 if (comnested)
621 nesting++;
622 else
623 /* Anything before that can't count because it would match
624 this comment-ender rather than ours. */
625 from = stop; /* Break out of the loop. */
626 }
627 else if (comstart_pos != 0 || c != '\n')
628 /* We're mixing comment styles here, so we'd better be careful.
629 The (comstart_pos != 0 || c != '\n') check is not quite correct
630 (we should just always set comment_lossage), but removing it
631 would imply that any multiline comment in C would go through
632 lossage, which seems overkill.
633 The failure should only happen in the rare cases such as
634 { (* } *) */
635 comment_lossage = 1;
636 break;
637
638 case Sopen:
639 /* Assume a defun-start point is outside of strings. */
640 if (open_paren_in_column_0_is_defun_start
641 && (from == stop
642 || (temp_byte = dec_bytepos (from_byte),
643 FETCH_CHAR (temp_byte) == '\n')))
644 {
645 defun_start = from;
646 defun_start_byte = from_byte;
647 from = stop; /* Break out of the loop. */
648 }
649 break;
650
651 default:
652 break;
653 }
654 }
655
656 if (comstart_pos == 0)
657 {
658 from = comment_end;
659 from_byte = comment_end_byte;
660 UPDATE_SYNTAX_TABLE_FORWARD (comment_end - 1);
661 }
662 /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
663 or `done'), then we've found the beginning of the non-nested comment. */
664 else if (1) /* !comnested */
665 {
666 from = comstart_pos;
667 from_byte = comstart_byte;
668 /* Globals are correct now. */
669 }
670 else
671 {
672 struct lisp_parse_state state;
673 lossage:
674 /* We had two kinds of string delimiters mixed up
675 together. Decode this going forwards.
676 Scan fwd from a known safe place (beginning-of-defun)
677 to the one in question; this records where we
678 last passed a comment starter. */
679 /* If we did not already find the defun start, find it now. */
680 if (defun_start == 0)
681 {
682 defun_start = find_defun_start (comment_end, comment_end_byte);
683 defun_start_byte = find_start_value_byte;
684 }
685 do
686 {
687 scan_sexps_forward (&state,
688 defun_start, defun_start_byte,
689 comment_end, -10000, 0, Qnil, 0);
690 defun_start = comment_end;
691 if (state.incomment == (comnested ? 1 : -1)
692 && state.comstyle == comstyle)
693 from = state.comstr_start;
694 else
695 {
696 from = comment_end;
697 if (state.incomment)
698 /* If comment_end is inside some other comment, maybe ours
699 is nested, so we need to try again from within the
700 surrounding comment. Example: { a (* " *) */
701 {
702 /* FIXME: We should advance by one or two chars. */
703 defun_start = state.comstr_start + 2;
704 defun_start_byte = CHAR_TO_BYTE (defun_start);
705 }
706 }
707 } while (defun_start < comment_end);
708
709 from_byte = CHAR_TO_BYTE (from);
710 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
711 }
712
713 done:
714 *charpos_ptr = from;
715 *bytepos_ptr = from_byte;
716
717 return (from == comment_end) ? -1 : from;
718 }
719 \f
720 DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
721 doc: /* Return t if OBJECT is a syntax table.
722 Currently, any char-table counts as a syntax table. */)
723 (object)
724 Lisp_Object object;
725 {
726 if (CHAR_TABLE_P (object)
727 && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
728 return Qt;
729 return Qnil;
730 }
731
732 static void
733 check_syntax_table (obj)
734 Lisp_Object obj;
735 {
736 if (!(CHAR_TABLE_P (obj)
737 && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table)))
738 wrong_type_argument (Qsyntax_table_p, obj);
739 }
740
741 DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
742 doc: /* Return the current syntax table.
743 This is the one specified by the current buffer. */)
744 ()
745 {
746 return current_buffer->syntax_table;
747 }
748
749 DEFUN ("standard-syntax-table", Fstandard_syntax_table,
750 Sstandard_syntax_table, 0, 0, 0,
751 doc: /* Return the standard syntax table.
752 This is the one used for new buffers. */)
753 ()
754 {
755 return Vstandard_syntax_table;
756 }
757
758 DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
759 doc: /* Construct a new syntax table and return it.
760 It is a copy of the TABLE, which defaults to the standard syntax table. */)
761 (table)
762 Lisp_Object table;
763 {
764 Lisp_Object copy;
765
766 if (!NILP (table))
767 check_syntax_table (table);
768 else
769 table = Vstandard_syntax_table;
770
771 copy = Fcopy_sequence (table);
772
773 /* Only the standard syntax table should have a default element.
774 Other syntax tables should inherit from parents instead. */
775 XCHAR_TABLE (copy)->defalt = Qnil;
776
777 /* Copied syntax tables should all have parents.
778 If we copied one with no parent, such as the standard syntax table,
779 use the standard syntax table as the copy's parent. */
780 if (NILP (XCHAR_TABLE (copy)->parent))
781 Fset_char_table_parent (copy, Vstandard_syntax_table);
782 return copy;
783 }
784
785 DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
786 doc: /* Select a new syntax table for the current buffer.
787 One argument, a syntax table. */)
788 (table)
789 Lisp_Object table;
790 {
791 int idx;
792 check_syntax_table (table);
793 current_buffer->syntax_table = table;
794 /* Indicate that this buffer now has a specified syntax table. */
795 idx = PER_BUFFER_VAR_IDX (syntax_table);
796 SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
797 return table;
798 }
799 \f
800 /* Convert a letter which signifies a syntax code
801 into the code it signifies.
802 This is used by modify-syntax-entry, and other things. */
803
804 unsigned char syntax_spec_code[0400] =
805 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
806 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
807 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
808 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
809 (char) Swhitespace, (char) Scomment_fence, (char) Sstring, 0377,
810 (char) Smath, 0377, 0377, (char) Squote,
811 (char) Sopen, (char) Sclose, 0377, 0377,
812 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
813 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
814 0377, 0377, 0377, 0377,
815 (char) Scomment, 0377, (char) Sendcomment, 0377,
816 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
817 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
818 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
819 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
820 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
821 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
822 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
823 0377, 0377, 0377, 0377, (char) Sstring_fence, 0377, 0377, 0377
824 };
825
826 /* Indexed by syntax code, give the letter that describes it. */
827
828 char syntax_code_spec[16] =
829 {
830 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
831 '!', '|'
832 };
833
834 /* Indexed by syntax code, give the object (cons of syntax code and
835 nil) to be stored in syntax table. Since these objects can be
836 shared among syntax tables, we generate them in advance. By
837 sharing objects, the function `describe-syntax' can give a more
838 compact listing. */
839 static Lisp_Object Vsyntax_code_object;
840
841 \f
842 DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
843 doc: /* Return the syntax code of CHARACTER, described by a character.
844 For example, if CHARACTER is a word constituent,
845 the character `w' is returned.
846 The characters that correspond to various syntax codes
847 are listed in the documentation of `modify-syntax-entry'. */)
848 (character)
849 Lisp_Object character;
850 {
851 int char_int;
852 gl_state.current_syntax_table = current_buffer->syntax_table;
853
854 gl_state.use_global = 0;
855 CHECK_NUMBER (character);
856 char_int = XINT (character);
857 return make_number (syntax_code_spec[(int) SYNTAX (char_int)]);
858 }
859
860 DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
861 doc: /* Return the matching parenthesis of CHARACTER, or nil if none. */)
862 (character)
863 Lisp_Object character;
864 {
865 int char_int, code;
866 gl_state.current_syntax_table = current_buffer->syntax_table;
867 gl_state.use_global = 0;
868 CHECK_NUMBER (character);
869 char_int = XINT (character);
870 code = SYNTAX (char_int);
871 if (code == Sopen || code == Sclose)
872 return SYNTAX_MATCH (char_int);
873 return Qnil;
874 }
875
876 DEFUN ("string-to-syntax", Fstring_to_syntax, Sstring_to_syntax, 1, 1, 0,
877 doc: /* Convert a syntax specification STRING into syntax cell form.
878 STRING should be a string as it is allowed as argument of
879 `modify-syntax-entry'. Value is the equivalent cons cell
880 (CODE . MATCHING-CHAR) that can be used as value of a `syntax-table'
881 text property. */)
882 (string)
883 Lisp_Object string;
884 {
885 register const unsigned char *p;
886 register enum syntaxcode code;
887 int val;
888 Lisp_Object match;
889
890 CHECK_STRING (string);
891
892 p = SDATA (string);
893 code = (enum syntaxcode) syntax_spec_code[*p++];
894 if (((int) code & 0377) == 0377)
895 error ("invalid syntax description letter: %c", p[-1]);
896
897 if (code == Sinherit)
898 return Qnil;
899
900 if (*p)
901 {
902 int len;
903 int character = (STRING_CHAR_AND_LENGTH
904 (p, SBYTES (string) - 1, len));
905 XSETINT (match, character);
906 if (XFASTINT (match) == ' ')
907 match = Qnil;
908 p += len;
909 }
910 else
911 match = Qnil;
912
913 val = (int) code;
914 while (*p)
915 switch (*p++)
916 {
917 case '1':
918 val |= 1 << 16;
919 break;
920
921 case '2':
922 val |= 1 << 17;
923 break;
924
925 case '3':
926 val |= 1 << 18;
927 break;
928
929 case '4':
930 val |= 1 << 19;
931 break;
932
933 case 'p':
934 val |= 1 << 20;
935 break;
936
937 case 'b':
938 val |= 1 << 21;
939 break;
940
941 case 'n':
942 val |= 1 << 22;
943 break;
944 }
945
946 if (val < XVECTOR (Vsyntax_code_object)->size && NILP (match))
947 return XVECTOR (Vsyntax_code_object)->contents[val];
948 else
949 /* Since we can't use a shared object, let's make a new one. */
950 return Fcons (make_number (val), match);
951 }
952
953 /* I really don't know why this is interactive
954 help-form should at least be made useful whilst reading the second arg. */
955 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
956 "cSet syntax for character: \nsSet syntax for %s to: ",
957 doc: /* Set syntax for character CHAR according to string NEWENTRY.
958 The syntax is changed only for table SYNTAX_TABLE, which defaults to
959 the current buffer's syntax table.
960 CHAR may be a cons (MIN . MAX), in which case, syntaxes of all characters
961 in the range MIN and MAX are changed.
962 The first character of NEWENTRY should be one of the following:
963 Space or - whitespace syntax. w word constituent.
964 _ symbol constituent. . punctuation.
965 ( open-parenthesis. ) close-parenthesis.
966 " string quote. \\ escape.
967 $ paired delimiter. ' expression quote or prefix operator.
968 < comment starter. > comment ender.
969 / character-quote. @ inherit from `standard-syntax-table'.
970 | generic string fence. ! generic comment fence.
971
972 Only single-character comment start and end sequences are represented thus.
973 Two-character sequences are represented as described below.
974 The second character of NEWENTRY is the matching parenthesis,
975 used only if the first character is `(' or `)'.
976 Any additional characters are flags.
977 Defined flags are the characters 1, 2, 3, 4, b, p, and n.
978 1 means CHAR is the start of a two-char comment start sequence.
979 2 means CHAR is the second character of such a sequence.
980 3 means CHAR is the start of a two-char comment end sequence.
981 4 means CHAR is the second character of such a sequence.
982
983 There can be up to two orthogonal comment sequences. This is to support
984 language modes such as C++. By default, all comment sequences are of style
985 a, but you can set the comment sequence style to b (on the second character
986 of a comment-start, or the first character of a comment-end sequence) using
987 this flag:
988 b means CHAR is part of comment sequence b.
989 n means CHAR is part of a nestable comment sequence.
990
991 p means CHAR is a prefix character for `backward-prefix-chars';
992 such characters are treated as whitespace when they occur
993 between expressions.
994 usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE) */)
995 (c, newentry, syntax_table)
996 Lisp_Object c, newentry, syntax_table;
997 {
998 if (CONSP (c))
999 {
1000 CHECK_CHARACTER_CAR (c);
1001 CHECK_CHARACTER_CDR (c);
1002 }
1003 else
1004 CHECK_CHARACTER (c);
1005
1006 if (NILP (syntax_table))
1007 syntax_table = current_buffer->syntax_table;
1008 else
1009 check_syntax_table (syntax_table);
1010
1011 newentry = Fstring_to_syntax (newentry);
1012 if (CONSP (c))
1013 SET_RAW_SYNTAX_ENTRY_RANGE (syntax_table, c, newentry);
1014 else
1015 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), newentry);
1016 return Qnil;
1017 }
1018 \f
1019 /* Dump syntax table to buffer in human-readable format */
1020
1021 DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value,
1022 Sinternal_describe_syntax_value, 1, 1, 0,
1023 doc: /* Insert a description of the internal syntax description SYNTAX at point. */)
1024 (syntax)
1025 Lisp_Object syntax;
1026 {
1027 register enum syntaxcode code;
1028 char desc, start1, start2, end1, end2, prefix, comstyle, comnested;
1029 char str[2];
1030 Lisp_Object first, match_lisp, value = syntax;
1031
1032 if (NILP (value))
1033 {
1034 insert_string ("default");
1035 return syntax;
1036 }
1037
1038 if (CHAR_TABLE_P (value))
1039 {
1040 insert_string ("deeper char-table ...");
1041 return syntax;
1042 }
1043
1044 if (!CONSP (value))
1045 {
1046 insert_string ("invalid");
1047 return syntax;
1048 }
1049
1050 first = XCAR (value);
1051 match_lisp = XCDR (value);
1052
1053 if (!INTEGERP (first) || !(NILP (match_lisp) || INTEGERP (match_lisp)))
1054 {
1055 insert_string ("invalid");
1056 return syntax;
1057 }
1058
1059 code = (enum syntaxcode) (XINT (first) & 0377);
1060 start1 = (XINT (first) >> 16) & 1;
1061 start2 = (XINT (first) >> 17) & 1;
1062 end1 = (XINT (first) >> 18) & 1;
1063 end2 = (XINT (first) >> 19) & 1;
1064 prefix = (XINT (first) >> 20) & 1;
1065 comstyle = (XINT (first) >> 21) & 1;
1066 comnested = (XINT (first) >> 22) & 1;
1067
1068 if ((int) code < 0 || (int) code >= (int) Smax)
1069 {
1070 insert_string ("invalid");
1071 return syntax;
1072 }
1073 desc = syntax_code_spec[(int) code];
1074
1075 str[0] = desc, str[1] = 0;
1076 insert (str, 1);
1077
1078 if (NILP (match_lisp))
1079 insert (" ", 1);
1080 else
1081 insert_char (XINT (match_lisp));
1082
1083 if (start1)
1084 insert ("1", 1);
1085 if (start2)
1086 insert ("2", 1);
1087
1088 if (end1)
1089 insert ("3", 1);
1090 if (end2)
1091 insert ("4", 1);
1092
1093 if (prefix)
1094 insert ("p", 1);
1095 if (comstyle)
1096 insert ("b", 1);
1097 if (comnested)
1098 insert ("n", 1);
1099
1100 insert_string ("\twhich means: ");
1101
1102 switch (SWITCH_ENUM_CAST (code))
1103 {
1104 case Swhitespace:
1105 insert_string ("whitespace"); break;
1106 case Spunct:
1107 insert_string ("punctuation"); break;
1108 case Sword:
1109 insert_string ("word"); break;
1110 case Ssymbol:
1111 insert_string ("symbol"); break;
1112 case Sopen:
1113 insert_string ("open"); break;
1114 case Sclose:
1115 insert_string ("close"); break;
1116 case Squote:
1117 insert_string ("prefix"); break;
1118 case Sstring:
1119 insert_string ("string"); break;
1120 case Smath:
1121 insert_string ("math"); break;
1122 case Sescape:
1123 insert_string ("escape"); break;
1124 case Scharquote:
1125 insert_string ("charquote"); break;
1126 case Scomment:
1127 insert_string ("comment"); break;
1128 case Sendcomment:
1129 insert_string ("endcomment"); break;
1130 case Sinherit:
1131 insert_string ("inherit"); break;
1132 case Scomment_fence:
1133 insert_string ("comment fence"); break;
1134 case Sstring_fence:
1135 insert_string ("string fence"); break;
1136 default:
1137 insert_string ("invalid");
1138 return syntax;
1139 }
1140
1141 if (!NILP (match_lisp))
1142 {
1143 insert_string (", matches ");
1144 insert_char (XINT (match_lisp));
1145 }
1146
1147 if (start1)
1148 insert_string (",\n\t is the first character of a comment-start sequence");
1149 if (start2)
1150 insert_string (",\n\t is the second character of a comment-start sequence");
1151
1152 if (end1)
1153 insert_string (",\n\t is the first character of a comment-end sequence");
1154 if (end2)
1155 insert_string (",\n\t is the second character of a comment-end sequence");
1156 if (comstyle)
1157 insert_string (" (comment style b)");
1158 if (comnested)
1159 insert_string (" (nestable)");
1160
1161 if (prefix)
1162 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
1163
1164 return syntax;
1165 }
1166 \f
1167 int parse_sexp_ignore_comments;
1168
1169 /* Char-table of functions that find the next or previous word
1170 boundary. */
1171 Lisp_Object Vfind_word_boundary_function_table;
1172
1173 /* Return the position across COUNT words from FROM.
1174 If that many words cannot be found before the end of the buffer, return 0.
1175 COUNT negative means scan backward and stop at word beginning. */
1176
1177 int
1178 scan_words (from, count)
1179 register int from, count;
1180 {
1181 register int beg = BEGV;
1182 register int end = ZV;
1183 register int from_byte = CHAR_TO_BYTE (from);
1184 register enum syntaxcode code;
1185 int ch0, ch1;
1186 Lisp_Object func, script, pos;
1187
1188 immediate_quit = 1;
1189 QUIT;
1190
1191 SETUP_SYNTAX_TABLE (from, count);
1192
1193 while (count > 0)
1194 {
1195 while (1)
1196 {
1197 if (from == end)
1198 {
1199 immediate_quit = 0;
1200 return 0;
1201 }
1202 UPDATE_SYNTAX_TABLE_FORWARD (from);
1203 ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1204 code = SYNTAX (ch0);
1205 INC_BOTH (from, from_byte);
1206 if (words_include_escapes
1207 && (code == Sescape || code == Scharquote))
1208 break;
1209 if (code == Sword)
1210 break;
1211 }
1212 /* Now CH0 is a character which begins a word and FROM is the
1213 position of the next character. */
1214 func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch0);
1215 if (! NILP (Ffboundp (func)))
1216 {
1217 pos = call2 (func, make_number (from - 1), make_number (end));
1218 if (INTEGERP (pos) && XINT (pos) > from)
1219 {
1220 from = XINT (pos);
1221 from_byte = CHAR_TO_BYTE (from);
1222 }
1223 }
1224 else
1225 {
1226 script = CHAR_TABLE_REF (Vchar_script_table, ch0);
1227 while (1)
1228 {
1229 if (from == end) break;
1230 UPDATE_SYNTAX_TABLE_FORWARD (from);
1231 ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1232 code = SYNTAX (ch1);
1233 if ((code != Sword
1234 && (! words_include_escapes
1235 || (code != Sescape && code != Scharquote)))
1236 || ! EQ (CHAR_TABLE_REF (Vchar_script_table, ch1), script))
1237 break;
1238 INC_BOTH (from, from_byte);
1239 ch0 = ch1;
1240 }
1241 }
1242 count--;
1243 }
1244 while (count < 0)
1245 {
1246 while (1)
1247 {
1248 if (from == beg)
1249 {
1250 immediate_quit = 0;
1251 return 0;
1252 }
1253 DEC_BOTH (from, from_byte);
1254 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1255 ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1256 code = SYNTAX (ch1);
1257 if (words_include_escapes
1258 && (code == Sescape || code == Scharquote))
1259 break;
1260 if (code == Sword)
1261 break;
1262 }
1263 /* Now CH1 is a character which ends a word and FROM is the
1264 position of it. */
1265 func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch1);
1266 if (! NILP (Ffboundp (func)))
1267 {
1268 pos = call2 (func, make_number (from), make_number (beg));
1269 if (INTEGERP (pos) && XINT (pos) < from)
1270 {
1271 from = XINT (pos);
1272 from_byte = CHAR_TO_BYTE (from);
1273 }
1274 }
1275 else
1276 {
1277 script = CHAR_TABLE_REF (Vchar_script_table, ch1);
1278 while (1)
1279 {
1280 int temp_byte;
1281
1282 if (from == beg)
1283 break;
1284 temp_byte = dec_bytepos (from_byte);
1285 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1286 ch0 = FETCH_CHAR_AS_MULTIBYTE (temp_byte);
1287 code = SYNTAX (ch0);
1288 if ((code != Sword
1289 && (! words_include_escapes
1290 || (code != Sescape && code != Scharquote)))
1291 || ! EQ (CHAR_TABLE_REF (Vchar_script_table, ch0), script))
1292 break;
1293 DEC_BOTH (from, from_byte);
1294 ch1 = ch0;
1295 }
1296 }
1297 count++;
1298 }
1299
1300 immediate_quit = 0;
1301
1302 return from;
1303 }
1304
1305 DEFUN ("forward-word", Fforward_word, Sforward_word, 0, 1, "p",
1306 doc: /* Move point forward ARG words (backward if ARG is negative).
1307 Normally returns t.
1308 If an edge of the buffer or a field boundary is reached, point is left there
1309 and the function returns nil. Field boundaries are not noticed if
1310 `inhibit-field-text-motion' is non-nil. */)
1311 (arg)
1312 Lisp_Object arg;
1313 {
1314 int orig_val, val;
1315
1316 if (NILP (arg))
1317 XSETFASTINT (arg, 1);
1318 else
1319 CHECK_NUMBER (arg);
1320
1321 val = orig_val = scan_words (PT, XINT (arg));
1322 if (! orig_val)
1323 val = XINT (arg) > 0 ? ZV : BEGV;
1324
1325 /* Avoid jumping out of an input field. */
1326 val = XFASTINT (Fconstrain_to_field (make_number (val), make_number (PT),
1327 Qt, Qnil, Qnil));
1328
1329 SET_PT (val);
1330 return val == orig_val ? Qt : Qnil;
1331 }
1332 \f
1333 Lisp_Object skip_chars ();
1334
1335 DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
1336 doc: /* Move point forward, stopping before a char not in STRING, or at pos LIM.
1337 STRING is like the inside of a `[...]' in a regular expression
1338 except that `]' is never special and `\\' quotes `^', `-' or `\\'
1339 (but not as the end of a range; quoting is never needed there).
1340 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1341 With arg "^a-zA-Z", skips nonletters stopping before first letter.
1342 Returns the distance traveled, either zero or positive.
1343 Note that char classes, e.g. `[:alpha:]', are not currently supported;
1344 they will be treated as literals. */)
1345 (string, lim)
1346 Lisp_Object string, lim;
1347 {
1348 return skip_chars (1, string, lim);
1349 }
1350
1351 DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
1352 doc: /* Move point backward, stopping after a char not in STRING, or at pos LIM.
1353 See `skip-chars-forward' for details.
1354 Returns the distance traveled, either zero or negative. */)
1355 (string, lim)
1356 Lisp_Object string, lim;
1357 {
1358 return skip_chars (0, string, lim);
1359 }
1360
1361 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
1362 doc: /* Move point forward across chars in specified syntax classes.
1363 SYNTAX is a string of syntax code characters.
1364 Stop before a char whose syntax is not in SYNTAX, or at position LIM.
1365 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1366 This function returns the distance traveled, either zero or positive. */)
1367 (syntax, lim)
1368 Lisp_Object syntax, lim;
1369 {
1370 return skip_syntaxes (1, syntax, lim);
1371 }
1372
1373 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
1374 doc: /* Move point backward across chars in specified syntax classes.
1375 SYNTAX is a string of syntax code characters.
1376 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.
1377 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1378 This function returns the distance traveled, either zero or negative. */)
1379 (syntax, lim)
1380 Lisp_Object syntax, lim;
1381 {
1382 return skip_syntaxes (0, syntax, lim);
1383 }
1384
1385 static Lisp_Object
1386 skip_chars (forwardp, string, lim)
1387 int forwardp;
1388 Lisp_Object string, lim;
1389 {
1390 register unsigned int c;
1391 unsigned char fastmap[0400];
1392 /* Store the ranges of non-ASCII characters. */
1393 int *char_ranges;
1394 int n_char_ranges = 0;
1395 int negate = 0;
1396 register int i, i_byte;
1397 /* Set to 1 if the current buffer is multibyte and the region
1398 contains non-ASCII chars. */
1399 int multibyte;
1400 /* Set to 1 if STRING is multibyte and it contains non-ASCII
1401 chars. */
1402 int string_multibyte;
1403 int size_byte;
1404 const unsigned char *str;
1405 int len;
1406
1407 CHECK_STRING (string);
1408
1409 if (NILP (lim))
1410 XSETINT (lim, forwardp ? ZV : BEGV);
1411 else
1412 CHECK_NUMBER_COERCE_MARKER (lim);
1413
1414 /* In any case, don't allow scan outside bounds of buffer. */
1415 if (XINT (lim) > ZV)
1416 XSETFASTINT (lim, ZV);
1417 if (XINT (lim) < BEGV)
1418 XSETFASTINT (lim, BEGV);
1419
1420 multibyte = (!NILP (current_buffer->enable_multibyte_characters)
1421 && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
1422 string_multibyte = SBYTES (string) > SCHARS (string);
1423
1424 bzero (fastmap, sizeof fastmap);
1425
1426 str = SDATA (string);
1427 size_byte = SBYTES (string);
1428
1429 i_byte = 0;
1430 if (i_byte < size_byte
1431 && SREF (string, 0) == '^')
1432 {
1433 negate = 1; i_byte++;
1434 }
1435
1436 /* Find the characters specified and set their elements of fastmap.
1437 Handle backslashes and ranges specially.
1438
1439 If STRING contains non-ASCII characters, setup char_ranges for
1440 them and use fastmap only for their leading codes. */
1441
1442 if (! string_multibyte)
1443 {
1444 int string_has_eight_bit = 0;
1445
1446 /* At first setup fastmap. */
1447 while (i_byte < size_byte)
1448 {
1449 c = str[i_byte++];
1450
1451 if (c == '\\')
1452 {
1453 if (i_byte == size_byte)
1454 break;
1455
1456 c = str[i_byte++];
1457 }
1458 /* Treat `-' as range character only if another character
1459 follows. */
1460 if (i_byte + 1 < size_byte
1461 && str[i_byte] == '-')
1462 {
1463 unsigned int c2;
1464
1465 /* Skip over the dash. */
1466 i_byte++;
1467
1468 /* Get the end of the range. */
1469 c2 = str[i_byte++];
1470 if (c2 == '\\'
1471 && i_byte < size_byte)
1472 c2 = str[i_byte++];
1473
1474 if (c <= c2)
1475 {
1476 while (c <= c2)
1477 fastmap[c++] = 1;
1478 if (! ASCII_CHAR_P (c2))
1479 string_has_eight_bit = 1;
1480 }
1481 }
1482 else
1483 {
1484 fastmap[c] = 1;
1485 if (! ASCII_CHAR_P (c))
1486 string_has_eight_bit = 1;
1487 }
1488 }
1489
1490 /* If the current range is multibyte and STRING contains
1491 eight-bit chars, arrange fastmap and setup char_ranges for
1492 the corresponding multibyte chars. */
1493 if (multibyte && string_has_eight_bit)
1494 {
1495 unsigned char fastmap2[0400];
1496 int range_start_byte, range_start_char;
1497
1498 bcopy (fastmap2 + 0200, fastmap + 0200, 0200);
1499 bzero (fastmap + 0200, 0200);
1500 /* We are sure that this loop stops. */
1501 for (i = 0200; ! fastmap2[i]; i++);
1502 c = unibyte_char_to_multibyte (i);
1503 fastmap[CHAR_LEADING_CODE (c)] = 1;
1504 range_start_byte = i;
1505 range_start_char = c;
1506 char_ranges = (int *) alloca (sizeof (int) * 128 * 2);
1507 for (i = 129; i < 0400; i++)
1508 {
1509 c = unibyte_char_to_multibyte (i);
1510 fastmap[CHAR_LEADING_CODE (c)] = 1;
1511 if (i - range_start_byte != c - range_start_char)
1512 {
1513 char_ranges[n_char_ranges++] = range_start_char;
1514 char_ranges[n_char_ranges++] = ((i - 1 - range_start_byte)
1515 + range_start_char);
1516 range_start_byte = i;
1517 range_start_char = c;
1518 }
1519 }
1520 char_ranges[n_char_ranges++] = range_start_char;
1521 char_ranges[n_char_ranges++] = ((i - 1 - range_start_byte)
1522 + range_start_char);
1523 }
1524 }
1525 else /* STRING is multibyte */
1526 {
1527 char_ranges = (int *) alloca (sizeof (int) * SCHARS (string) * 2);
1528
1529 while (i_byte < size_byte)
1530 {
1531 unsigned char leading_code;
1532
1533 leading_code = str[i_byte];
1534 c = STRING_CHAR_AND_LENGTH (str + i_byte, size_byte-i_byte, len);
1535 i_byte += len;
1536
1537 if (c == '\\')
1538 {
1539 if (i_byte == size_byte)
1540 break;
1541
1542 leading_code = str[i_byte];
1543 c = STRING_CHAR_AND_LENGTH (str + i_byte,
1544 size_byte - i_byte, len);
1545 i_byte += len;
1546 }
1547 /* Treat `-' as range character only if another character
1548 follows. */
1549 if (i_byte + 1 < size_byte
1550 && str[i_byte] == '-')
1551 {
1552 unsigned int c2;
1553 unsigned char leading_code2;
1554
1555 /* Skip over the dash. */
1556 i_byte++;
1557
1558 /* Get the end of the range. */
1559 leading_code2 = str[i_byte];
1560 c2 = STRING_CHAR_AND_LENGTH (str + i_byte,
1561 size_byte - i_byte, len);
1562 i_byte += len;
1563
1564 if (c2 == '\\'
1565 && i_byte < size_byte)
1566 {
1567 leading_code2 = str[i_byte];
1568 c2 =STRING_CHAR_AND_LENGTH (str + i_byte, size_byte-i_byte, len);
1569 i_byte += len;
1570 }
1571
1572 if (c > c2)
1573 continue;
1574 if (ASCII_CHAR_P (c))
1575 {
1576 while (c <= c2 && c < 0x80)
1577 fastmap[c++] = 1;
1578 leading_code = CHAR_LEADING_CODE (c);
1579 }
1580 if (! ASCII_CHAR_P (c))
1581 {
1582 while (leading_code <= leading_code2)
1583 fastmap[leading_code++] = 1;
1584 if (c <= c2)
1585 {
1586 char_ranges[n_char_ranges++] = c;
1587 char_ranges[n_char_ranges++] = c2;
1588 }
1589 }
1590 }
1591 else
1592 {
1593 if (ASCII_CHAR_P (c))
1594 fastmap[c] = 1;
1595 else
1596 {
1597 fastmap[leading_code] = 1;
1598 char_ranges[n_char_ranges++] = c;
1599 char_ranges[n_char_ranges++] = c;
1600 }
1601 }
1602 }
1603
1604 /* If the current range is unibyte and STRING contains non-ASCII
1605 chars, arrange fastmap for the corresponding unibyte
1606 chars. */
1607
1608 if (! multibyte && n_char_ranges > 0)
1609 {
1610 bzero (fastmap + 0200, 0200);
1611 for (i = 0; i < n_char_ranges; i += 2)
1612 {
1613 int c1 = char_ranges[i];
1614 int c2 = char_ranges[i + 1];
1615
1616 for (; c1 <= c2; c1++)
1617 fastmap[CHAR_TO_BYTE8 (c1)] = 1;
1618 }
1619 }
1620 }
1621
1622 /* If ^ was the first character, complement the fastmap. */
1623 if (negate)
1624 {
1625 if (! multibyte)
1626 for (i = 0; i < sizeof fastmap; i++)
1627 fastmap[i] ^= 1;
1628 else
1629 {
1630 for (i = 0; i < 0200; i++)
1631 fastmap[i] ^= 1;
1632 /* All non-ASCII chars possibly match. */
1633 for (; i < sizeof fastmap; i++)
1634 fastmap[i] = 1;
1635 }
1636 }
1637
1638 {
1639 int start_point = PT;
1640 int pos = PT;
1641 int pos_byte = PT_BYTE;
1642 unsigned char *p = PT_ADDR, *endp, *stop;
1643
1644 if (forwardp)
1645 {
1646 endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
1647 stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
1648 }
1649 else
1650 {
1651 endp = CHAR_POS_ADDR (XINT (lim));
1652 stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
1653 }
1654
1655 immediate_quit = 1;
1656 if (forwardp)
1657 {
1658 if (multibyte)
1659 while (1)
1660 {
1661 int nbytes;
1662
1663 if (p >= stop)
1664 {
1665 if (p >= endp)
1666 break;
1667 p = GAP_END_ADDR;
1668 stop = endp;
1669 }
1670 if (! fastmap[*p])
1671 break;
1672 c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes);
1673 if (! ASCII_CHAR_P (c))
1674 {
1675 /* As we are looking at a multibyte character, we
1676 must look up the character in the table
1677 CHAR_RANGES. If there's no data in the table,
1678 that character is not what we want to skip. */
1679
1680 /* The following code do the right thing even if
1681 n_char_ranges is zero (i.e. no data in
1682 CHAR_RANGES). */
1683 for (i = 0; i < n_char_ranges; i += 2)
1684 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1685 break;
1686 if (!(negate ^ (i < n_char_ranges)))
1687 break;
1688 }
1689 p += nbytes, pos++, pos_byte += nbytes;
1690 }
1691 else
1692 while (1)
1693 {
1694 if (p >= stop)
1695 {
1696 if (p >= endp)
1697 break;
1698 p = GAP_END_ADDR;
1699 stop = endp;
1700 }
1701 if (!fastmap[*p])
1702 break;
1703 p++, pos++, pos_byte++;
1704 }
1705 }
1706 else
1707 {
1708 if (multibyte)
1709 while (1)
1710 {
1711 unsigned char *prev_p;
1712
1713 if (p <= stop)
1714 {
1715 if (p <= endp)
1716 break;
1717 p = GPT_ADDR;
1718 stop = endp;
1719 }
1720 prev_p = p;
1721 while (--p >= stop && ! CHAR_HEAD_P (*p));
1722 if (! fastmap[*p])
1723 break;
1724 c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH);
1725 if (! ASCII_CHAR_P (c))
1726 {
1727 /* See the comment in the previous similar code. */
1728 for (i = 0; i < n_char_ranges; i += 2)
1729 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1730 break;
1731 if (!(negate ^ (i < n_char_ranges)))
1732 break;
1733 }
1734 pos--, pos_byte -= prev_p - p;
1735 }
1736 else
1737 while (1)
1738 {
1739 if (p <= stop)
1740 {
1741 if (p <= endp)
1742 break;
1743 p = GPT_ADDR;
1744 stop = endp;
1745 }
1746 if (!fastmap[p[-1]])
1747 break;
1748 p--, pos--, pos_byte--;
1749 }
1750 }
1751
1752 SET_PT_BOTH (pos, pos_byte);
1753 immediate_quit = 0;
1754
1755 return make_number (PT - start_point);
1756 }
1757 }
1758
1759
1760 static Lisp_Object
1761 skip_syntaxes (forwardp, string, lim)
1762 int forwardp;
1763 Lisp_Object string, lim;
1764 {
1765 register unsigned int c;
1766 unsigned char fastmap[0400];
1767 int negate = 0;
1768 register int i, i_byte;
1769 int multibyte;
1770 int size_byte;
1771 unsigned char *str;
1772
1773 CHECK_STRING (string);
1774
1775 if (NILP (lim))
1776 XSETINT (lim, forwardp ? ZV : BEGV);
1777 else
1778 CHECK_NUMBER_COERCE_MARKER (lim);
1779
1780 /* In any case, don't allow scan outside bounds of buffer. */
1781 if (XINT (lim) > ZV)
1782 XSETFASTINT (lim, ZV);
1783 if (XINT (lim) < BEGV)
1784 XSETFASTINT (lim, BEGV);
1785
1786 if (forwardp ? (PT >= XFASTINT (lim)) : (PT <= XFASTINT (lim)))
1787 return Qnil;
1788
1789 multibyte = (!NILP (current_buffer->enable_multibyte_characters)
1790 && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
1791
1792 bzero (fastmap, sizeof fastmap);
1793
1794 if (SBYTES (string) > SCHARS (string))
1795 /* As this is very rare case (syntax spec is ASCII only), don't
1796 consider efficiency. */
1797 string = string_make_unibyte (string);
1798
1799 str = SDATA (string);
1800 size_byte = SBYTES (string);
1801
1802 i_byte = 0;
1803 if (i_byte < size_byte
1804 && SREF (string, 0) == '^')
1805 {
1806 negate = 1; i_byte++;
1807 }
1808
1809 /* Find the syntaxes specified and set their elements of fastmap. */
1810
1811 while (i_byte < size_byte)
1812 {
1813 c = str[i_byte++];
1814 fastmap[syntax_spec_code[c]] = 1;
1815 }
1816
1817 /* If ^ was the first character, complement the fastmap. */
1818 if (negate)
1819 for (i = 0; i < sizeof fastmap; i++)
1820 fastmap[i] ^= 1;
1821
1822 {
1823 int start_point = PT;
1824 int pos = PT;
1825 int pos_byte = PT_BYTE;
1826 unsigned char *p = PT_ADDR, *endp, *stop;
1827
1828 if (forwardp)
1829 {
1830 endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
1831 stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
1832 }
1833 else
1834 {
1835 endp = CHAR_POS_ADDR (XINT (lim));
1836 stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
1837 }
1838
1839 immediate_quit = 1;
1840 SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
1841 if (forwardp)
1842 {
1843 if (multibyte)
1844 {
1845 while (1)
1846 {
1847 int nbytes;
1848
1849 if (p >= stop)
1850 {
1851 if (p >= endp)
1852 break;
1853 p = GAP_END_ADDR;
1854 stop = endp;
1855 }
1856 c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes);
1857 if (! fastmap[(int) SYNTAX (c)])
1858 break;
1859 p += nbytes, pos++, pos_byte += nbytes;
1860 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1861 }
1862 }
1863 else
1864 {
1865 while (1)
1866 {
1867 if (p >= stop)
1868 {
1869 if (p >= endp)
1870 break;
1871 p = GAP_END_ADDR;
1872 stop = endp;
1873 }
1874 if (! fastmap[(int) SYNTAX (*p)])
1875 break;
1876 p++, pos++, pos_byte++;
1877 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1878 }
1879 }
1880 }
1881 else
1882 {
1883 if (multibyte)
1884 {
1885 while (1)
1886 {
1887 unsigned char *prev_p;
1888
1889 if (p <= stop)
1890 {
1891 if (p <= endp)
1892 break;
1893 p = GPT_ADDR;
1894 stop = endp;
1895 }
1896 prev_p = p;
1897 while (--p >= stop && ! CHAR_HEAD_P (*p));
1898 c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH);
1899 if (! fastmap[(int) SYNTAX (c)])
1900 break;
1901 pos--, pos_byte -= prev_p - p;
1902 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
1903 }
1904 }
1905 else
1906 {
1907 while (1)
1908 {
1909 if (p <= stop)
1910 {
1911 if (p <= endp)
1912 break;
1913 p = GPT_ADDR;
1914 stop = endp;
1915 }
1916 if (! fastmap[(int) SYNTAX (p[-1])])
1917 break;
1918 p--, pos--, pos_byte--;
1919 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
1920 }
1921 }
1922 }
1923
1924 SET_PT_BOTH (pos, pos_byte);
1925 immediate_quit = 0;
1926
1927 return make_number (PT - start_point);
1928 }
1929 }
1930 \f
1931 /* Jump over a comment, assuming we are at the beginning of one.
1932 FROM is the current position.
1933 FROM_BYTE is the bytepos corresponding to FROM.
1934 Do not move past STOP (a charpos).
1935 The comment over which we have to jump is of style STYLE
1936 (either SYNTAX_COMMENT_STYLE(foo) or ST_COMMENT_STYLE).
1937 NESTING should be positive to indicate the nesting at the beginning
1938 for nested comments and should be zero or negative else.
1939 ST_COMMENT_STYLE cannot be nested.
1940 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
1941 (or 0 If the search cannot start in the middle of a two-character).
1942
1943 If successful, return 1 and store the charpos of the comment's end
1944 into *CHARPOS_PTR and the corresponding bytepos into *BYTEPOS_PTR.
1945 Else, return 0 and store the charpos STOP into *CHARPOS_PTR, the
1946 corresponding bytepos into *BYTEPOS_PTR and the current nesting
1947 (as defined for state.incomment) in *INCOMMENT_PTR.
1948
1949 The comment end is the last character of the comment rather than the
1950 character just after the comment.
1951
1952 Global syntax data is assumed to initially be valid for FROM and
1953 remains valid for forward search starting at the returned position. */
1954
1955 static int
1956 forw_comment (from, from_byte, stop, nesting, style, prev_syntax,
1957 charpos_ptr, bytepos_ptr, incomment_ptr)
1958 int from, from_byte, stop;
1959 int nesting, style, prev_syntax;
1960 int *charpos_ptr, *bytepos_ptr, *incomment_ptr;
1961 {
1962 register int c, c1;
1963 register enum syntaxcode code;
1964 register int syntax;
1965
1966 if (nesting <= 0) nesting = -1;
1967
1968 /* Enter the loop in the middle so that we find
1969 a 2-char comment ender if we start in the middle of it. */
1970 syntax = prev_syntax;
1971 if (syntax != 0) goto forw_incomment;
1972
1973 while (1)
1974 {
1975 if (from == stop)
1976 {
1977 *incomment_ptr = nesting;
1978 *charpos_ptr = from;
1979 *bytepos_ptr = from_byte;
1980 return 0;
1981 }
1982 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1983 syntax = SYNTAX_WITH_FLAGS (c);
1984 code = syntax & 0xff;
1985 if (code == Sendcomment
1986 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
1987 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ?
1988 (nesting > 0 && --nesting == 0) : nesting < 0))
1989 /* we have encountered a comment end of the same style
1990 as the comment sequence which began this comment
1991 section */
1992 break;
1993 if (code == Scomment_fence
1994 && style == ST_COMMENT_STYLE)
1995 /* we have encountered a comment end of the same style
1996 as the comment sequence which began this comment
1997 section. */
1998 break;
1999 if (nesting > 0
2000 && code == Scomment
2001 && SYNTAX_FLAGS_COMMENT_NESTED (syntax)
2002 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style)
2003 /* we have encountered a nested comment of the same style
2004 as the comment sequence which began this comment section */
2005 nesting++;
2006 INC_BOTH (from, from_byte);
2007 UPDATE_SYNTAX_TABLE_FORWARD (from);
2008
2009 forw_incomment:
2010 if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
2011 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
2012 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2013 SYNTAX_COMEND_SECOND (c1))
2014 && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
2015 SYNTAX_COMMENT_NESTED (c1)) ? nesting > 0 : nesting < 0))
2016 {
2017 if (--nesting <= 0)
2018 /* we have encountered a comment end of the same style
2019 as the comment sequence which began this comment
2020 section */
2021 break;
2022 else
2023 {
2024 INC_BOTH (from, from_byte);
2025 UPDATE_SYNTAX_TABLE_FORWARD (from);
2026 }
2027 }
2028 if (nesting > 0
2029 && from < stop
2030 && SYNTAX_FLAGS_COMSTART_FIRST (syntax)
2031 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2032 SYNTAX_COMMENT_STYLE (c1) == style
2033 && SYNTAX_COMSTART_SECOND (c1))
2034 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
2035 SYNTAX_COMMENT_NESTED (c1)))
2036 /* we have encountered a nested comment of the same style
2037 as the comment sequence which began this comment
2038 section */
2039 {
2040 INC_BOTH (from, from_byte);
2041 UPDATE_SYNTAX_TABLE_FORWARD (from);
2042 nesting++;
2043 }
2044 }
2045 *charpos_ptr = from;
2046 *bytepos_ptr = from_byte;
2047 return 1;
2048 }
2049
2050 DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
2051 doc: /*
2052 Move forward across up to COUNT comments. If COUNT is negative, move backward.
2053 Stop scanning if we find something other than a comment or whitespace.
2054 Set point to where scanning stops.
2055 If COUNT comments are found as expected, with nothing except whitespace
2056 between them, return t; otherwise return nil. */)
2057 (count)
2058 Lisp_Object count;
2059 {
2060 register int from;
2061 int from_byte;
2062 register int stop;
2063 register int c, c1;
2064 register enum syntaxcode code;
2065 int comstyle = 0; /* style of comment encountered */
2066 int comnested = 0; /* whether the comment is nestable or not */
2067 int found;
2068 int count1;
2069 int out_charpos, out_bytepos;
2070 int dummy;
2071
2072 CHECK_NUMBER (count);
2073 count1 = XINT (count);
2074 stop = count1 > 0 ? ZV : BEGV;
2075
2076 immediate_quit = 1;
2077 QUIT;
2078
2079 from = PT;
2080 from_byte = PT_BYTE;
2081
2082 SETUP_SYNTAX_TABLE (from, count1);
2083 while (count1 > 0)
2084 {
2085 do
2086 {
2087 int comstart_first;
2088
2089 if (from == stop)
2090 {
2091 SET_PT_BOTH (from, from_byte);
2092 immediate_quit = 0;
2093 return Qnil;
2094 }
2095 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2096 code = SYNTAX (c);
2097 comstart_first = SYNTAX_COMSTART_FIRST (c);
2098 comnested = SYNTAX_COMMENT_NESTED (c);
2099 comstyle = SYNTAX_COMMENT_STYLE (c);
2100 INC_BOTH (from, from_byte);
2101 UPDATE_SYNTAX_TABLE_FORWARD (from);
2102 if (from < stop && comstart_first
2103 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2104 SYNTAX_COMSTART_SECOND (c1)))
2105 {
2106 /* We have encountered a comment start sequence and we
2107 are ignoring all text inside comments. We must record
2108 the comment style this sequence begins so that later,
2109 only a comment end of the same style actually ends
2110 the comment section. */
2111 code = Scomment;
2112 comstyle = SYNTAX_COMMENT_STYLE (c1);
2113 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2114 INC_BOTH (from, from_byte);
2115 UPDATE_SYNTAX_TABLE_FORWARD (from);
2116 }
2117 }
2118 while (code == Swhitespace || (code == Sendcomment && c == '\n'));
2119
2120 if (code == Scomment_fence)
2121 comstyle = ST_COMMENT_STYLE;
2122 else if (code != Scomment)
2123 {
2124 immediate_quit = 0;
2125 DEC_BOTH (from, from_byte);
2126 SET_PT_BOTH (from, from_byte);
2127 return Qnil;
2128 }
2129 /* We're at the start of a comment. */
2130 found = forw_comment (from, from_byte, stop, comnested, comstyle, 0,
2131 &out_charpos, &out_bytepos, &dummy);
2132 from = out_charpos; from_byte = out_bytepos;
2133 if (!found)
2134 {
2135 immediate_quit = 0;
2136 SET_PT_BOTH (from, from_byte);
2137 return Qnil;
2138 }
2139 INC_BOTH (from, from_byte);
2140 UPDATE_SYNTAX_TABLE_FORWARD (from);
2141 /* We have skipped one comment. */
2142 count1--;
2143 }
2144
2145 while (count1 < 0)
2146 {
2147 while (1)
2148 {
2149 int quoted;
2150
2151 if (from <= stop)
2152 {
2153 SET_PT_BOTH (BEGV, BEGV_BYTE);
2154 immediate_quit = 0;
2155 return Qnil;
2156 }
2157
2158 DEC_BOTH (from, from_byte);
2159 /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */
2160 quoted = char_quoted (from, from_byte);
2161 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2162 code = SYNTAX (c);
2163 comstyle = 0;
2164 comnested = SYNTAX_COMMENT_NESTED (c);
2165 if (code == Sendcomment)
2166 comstyle = SYNTAX_COMMENT_STYLE (c);
2167 if (from > stop && SYNTAX_COMEND_SECOND (c)
2168 && prev_char_comend_first (from, from_byte)
2169 && !char_quoted (from - 1, dec_bytepos (from_byte)))
2170 {
2171 /* We must record the comment style encountered so that
2172 later, we can match only the proper comment begin
2173 sequence of the same style. */
2174 DEC_BOTH (from, from_byte);
2175 code = Sendcomment;
2176 /* Calling char_quoted, above, set up global syntax position
2177 at the new value of FROM. */
2178 c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2179 comstyle = SYNTAX_COMMENT_STYLE (c1);
2180 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2181 }
2182
2183 if (code == Scomment_fence)
2184 {
2185 /* Skip until first preceding unquoted comment_fence. */
2186 int found = 0, ini = from, ini_byte = from_byte;
2187
2188 while (1)
2189 {
2190 DEC_BOTH (from, from_byte);
2191 if (from == stop)
2192 break;
2193 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2194 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2195 if (SYNTAX (c) == Scomment_fence
2196 && !char_quoted (from, from_byte))
2197 {
2198 found = 1;
2199 break;
2200 }
2201 }
2202 if (found == 0)
2203 {
2204 from = ini; /* Set point to ini + 1. */
2205 from_byte = ini_byte;
2206 goto leave;
2207 }
2208 }
2209 else if (code == Sendcomment)
2210 {
2211 found = back_comment (from, from_byte, stop, comnested, comstyle,
2212 &out_charpos, &out_bytepos);
2213 if (found == -1)
2214 {
2215 if (c == '\n')
2216 /* This end-of-line is not an end-of-comment.
2217 Treat it like a whitespace.
2218 CC-mode (and maybe others) relies on this behavior. */
2219 ;
2220 else
2221 {
2222 /* Failure: we should go back to the end of this
2223 not-quite-endcomment. */
2224 if (SYNTAX(c) != code)
2225 /* It was a two-char Sendcomment. */
2226 INC_BOTH (from, from_byte);
2227 goto leave;
2228 }
2229 }
2230 else
2231 {
2232 /* We have skipped one comment. */
2233 from = out_charpos, from_byte = out_bytepos;
2234 break;
2235 }
2236 }
2237 else if (code != Swhitespace || quoted)
2238 {
2239 leave:
2240 immediate_quit = 0;
2241 INC_BOTH (from, from_byte);
2242 SET_PT_BOTH (from, from_byte);
2243 return Qnil;
2244 }
2245 }
2246
2247 count1++;
2248 }
2249
2250 SET_PT_BOTH (from, from_byte);
2251 immediate_quit = 0;
2252 return Qt;
2253 }
2254 \f
2255 /* Return syntax code of character C if C is an ASCII character
2256 or `multibyte_symbol_p' is zero. Otherwise, return Ssymbol. */
2257
2258 #define SYNTAX_WITH_MULTIBYTE_CHECK(c) \
2259 ((ASCII_CHAR_P (c) || !multibyte_symbol_p) \
2260 ? SYNTAX (c) : Ssymbol)
2261
2262 static Lisp_Object
2263 scan_lists (from, count, depth, sexpflag)
2264 register int from;
2265 int count, depth, sexpflag;
2266 {
2267 Lisp_Object val;
2268 register int stop = count > 0 ? ZV : BEGV;
2269 register int c, c1;
2270 int stringterm;
2271 int quoted;
2272 int mathexit = 0;
2273 register enum syntaxcode code, temp_code;
2274 int min_depth = depth; /* Err out if depth gets less than this. */
2275 int comstyle = 0; /* style of comment encountered */
2276 int comnested = 0; /* whether the comment is nestable or not */
2277 int temp_pos;
2278 int last_good = from;
2279 int found;
2280 int from_byte;
2281 int out_bytepos, out_charpos;
2282 int temp, dummy;
2283 int multibyte_symbol_p = sexpflag && multibyte_syntax_as_symbol;
2284
2285 if (depth > 0) min_depth = 0;
2286
2287 if (from > ZV) from = ZV;
2288 if (from < BEGV) from = BEGV;
2289
2290 from_byte = CHAR_TO_BYTE (from);
2291
2292 immediate_quit = 1;
2293 QUIT;
2294
2295 SETUP_SYNTAX_TABLE (from, count);
2296 while (count > 0)
2297 {
2298 while (from < stop)
2299 {
2300 int comstart_first, prefix;
2301 UPDATE_SYNTAX_TABLE_FORWARD (from);
2302 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2303 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2304 comstart_first = SYNTAX_COMSTART_FIRST (c);
2305 comnested = SYNTAX_COMMENT_NESTED (c);
2306 comstyle = SYNTAX_COMMENT_STYLE (c);
2307 prefix = SYNTAX_PREFIX (c);
2308 if (depth == min_depth)
2309 last_good = from;
2310 INC_BOTH (from, from_byte);
2311 UPDATE_SYNTAX_TABLE_FORWARD (from);
2312 if (from < stop && comstart_first
2313 && SYNTAX_COMSTART_SECOND (FETCH_CHAR_AS_MULTIBYTE (from_byte))
2314 && parse_sexp_ignore_comments)
2315 {
2316 /* we have encountered a comment start sequence and we
2317 are ignoring all text inside comments. We must record
2318 the comment style this sequence begins so that later,
2319 only a comment end of the same style actually ends
2320 the comment section */
2321 code = Scomment;
2322 c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2323 comstyle = SYNTAX_COMMENT_STYLE (c1);
2324 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2325 INC_BOTH (from, from_byte);
2326 UPDATE_SYNTAX_TABLE_FORWARD (from);
2327 }
2328
2329 if (prefix)
2330 continue;
2331
2332 switch (SWITCH_ENUM_CAST (code))
2333 {
2334 case Sescape:
2335 case Scharquote:
2336 if (from == stop) goto lose;
2337 INC_BOTH (from, from_byte);
2338 /* treat following character as a word constituent */
2339 case Sword:
2340 case Ssymbol:
2341 if (depth || !sexpflag) break;
2342 /* This word counts as a sexp; return at end of it. */
2343 while (from < stop)
2344 {
2345 UPDATE_SYNTAX_TABLE_FORWARD (from);
2346
2347 /* Some compilers can't handle this inside the switch. */
2348 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2349 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2350 switch (temp)
2351 {
2352 case Scharquote:
2353 case Sescape:
2354 INC_BOTH (from, from_byte);
2355 if (from == stop) goto lose;
2356 break;
2357 case Sword:
2358 case Ssymbol:
2359 case Squote:
2360 break;
2361 default:
2362 goto done;
2363 }
2364 INC_BOTH (from, from_byte);
2365 }
2366 goto done;
2367
2368 case Scomment_fence:
2369 comstyle = ST_COMMENT_STYLE;
2370 /* FALLTHROUGH */
2371 case Scomment:
2372 if (!parse_sexp_ignore_comments) break;
2373 UPDATE_SYNTAX_TABLE_FORWARD (from);
2374 found = forw_comment (from, from_byte, stop,
2375 comnested, comstyle, 0,
2376 &out_charpos, &out_bytepos, &dummy);
2377 from = out_charpos, from_byte = out_bytepos;
2378 if (!found)
2379 {
2380 if (depth == 0)
2381 goto done;
2382 goto lose;
2383 }
2384 INC_BOTH (from, from_byte);
2385 UPDATE_SYNTAX_TABLE_FORWARD (from);
2386 break;
2387
2388 case Smath:
2389 if (!sexpflag)
2390 break;
2391 if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (from_byte))
2392 {
2393 INC_BOTH (from, from_byte);
2394 }
2395 if (mathexit)
2396 {
2397 mathexit = 0;
2398 goto close1;
2399 }
2400 mathexit = 1;
2401
2402 case Sopen:
2403 if (!++depth) goto done;
2404 break;
2405
2406 case Sclose:
2407 close1:
2408 if (!--depth) goto done;
2409 if (depth < min_depth)
2410 Fsignal (Qscan_error,
2411 Fcons (build_string ("Containing expression ends prematurely"),
2412 Fcons (make_number (last_good),
2413 Fcons (make_number (from), Qnil))));
2414 break;
2415
2416 case Sstring:
2417 case Sstring_fence:
2418 temp_pos = dec_bytepos (from_byte);
2419 stringterm = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2420 while (1)
2421 {
2422 if (from >= stop) goto lose;
2423 UPDATE_SYNTAX_TABLE_FORWARD (from);
2424 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2425 if (code == Sstring
2426 ? (c == stringterm
2427 && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
2428 : SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring_fence)
2429 break;
2430
2431 /* Some compilers can't handle this inside the switch. */
2432 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2433 switch (temp)
2434 {
2435 case Scharquote:
2436 case Sescape:
2437 INC_BOTH (from, from_byte);
2438 }
2439 INC_BOTH (from, from_byte);
2440 }
2441 INC_BOTH (from, from_byte);
2442 if (!depth && sexpflag) goto done;
2443 break;
2444 default:
2445 /* Ignore whitespace, punctuation, quote, endcomment. */
2446 break;
2447 }
2448 }
2449
2450 /* Reached end of buffer. Error if within object, return nil if between */
2451 if (depth) goto lose;
2452
2453 immediate_quit = 0;
2454 return Qnil;
2455
2456 /* End of object reached */
2457 done:
2458 count--;
2459 }
2460
2461 while (count < 0)
2462 {
2463 while (from > stop)
2464 {
2465 DEC_BOTH (from, from_byte);
2466 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2467 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2468 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2469 if (depth == min_depth)
2470 last_good = from;
2471 comstyle = 0;
2472 comnested = SYNTAX_COMMENT_NESTED (c);
2473 if (code == Sendcomment)
2474 comstyle = SYNTAX_COMMENT_STYLE (c);
2475 if (from > stop && SYNTAX_COMEND_SECOND (c)
2476 && prev_char_comend_first (from, from_byte)
2477 && parse_sexp_ignore_comments)
2478 {
2479 /* We must record the comment style encountered so that
2480 later, we can match only the proper comment begin
2481 sequence of the same style. */
2482 DEC_BOTH (from, from_byte);
2483 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2484 code = Sendcomment;
2485 c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2486 comstyle = SYNTAX_COMMENT_STYLE (c1);
2487 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2488 }
2489
2490 /* Quoting turns anything except a comment-ender
2491 into a word character. Note that this cannot be true
2492 if we decremented FROM in the if-statement above. */
2493 if (code != Sendcomment && char_quoted (from, from_byte))
2494 {
2495 DEC_BOTH (from, from_byte);
2496 code = Sword;
2497 }
2498 else if (SYNTAX_PREFIX (c))
2499 continue;
2500
2501 switch (SWITCH_ENUM_CAST (code))
2502 {
2503 case Sword:
2504 case Ssymbol:
2505 case Sescape:
2506 case Scharquote:
2507 if (depth || !sexpflag) break;
2508 /* This word counts as a sexp; count object finished
2509 after passing it. */
2510 while (from > stop)
2511 {
2512 temp_pos = from_byte;
2513 if (! NILP (current_buffer->enable_multibyte_characters))
2514 DEC_POS (temp_pos);
2515 else
2516 temp_pos--;
2517 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2518 c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2519 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
2520 /* Don't allow comment-end to be quoted. */
2521 if (temp_code == Sendcomment)
2522 goto done2;
2523 quoted = char_quoted (from - 1, temp_pos);
2524 if (quoted)
2525 {
2526 DEC_BOTH (from, from_byte);
2527 temp_pos = dec_bytepos (temp_pos);
2528 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2529 }
2530 c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2531 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
2532 if (! (quoted || temp_code == Sword
2533 || temp_code == Ssymbol
2534 || temp_code == Squote))
2535 goto done2;
2536 DEC_BOTH (from, from_byte);
2537 }
2538 goto done2;
2539
2540 case Smath:
2541 if (!sexpflag)
2542 break;
2543 temp_pos = dec_bytepos (from_byte);
2544 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2545 if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (temp_pos))
2546 DEC_BOTH (from, from_byte);
2547 if (mathexit)
2548 {
2549 mathexit = 0;
2550 goto open2;
2551 }
2552 mathexit = 1;
2553
2554 case Sclose:
2555 if (!++depth) goto done2;
2556 break;
2557
2558 case Sopen:
2559 open2:
2560 if (!--depth) goto done2;
2561 if (depth < min_depth)
2562 Fsignal (Qscan_error,
2563 Fcons (build_string ("Containing expression ends prematurely"),
2564 Fcons (make_number (last_good),
2565 Fcons (make_number (from), Qnil))));
2566 break;
2567
2568 case Sendcomment:
2569 if (!parse_sexp_ignore_comments)
2570 break;
2571 found = back_comment (from, from_byte, stop, comnested, comstyle,
2572 &out_charpos, &out_bytepos);
2573 /* FIXME: if found == -1, then it really wasn't a comment-end.
2574 For single-char Sendcomment, we can't do much about it apart
2575 from skipping the char.
2576 For 2-char endcomments, we could try again, taking both
2577 chars as separate entities, but it's a lot of trouble
2578 for very little gain, so we don't bother either. -sm */
2579 if (found != -1)
2580 from = out_charpos, from_byte = out_bytepos;
2581 break;
2582
2583 case Scomment_fence:
2584 case Sstring_fence:
2585 while (1)
2586 {
2587 if (from == stop) goto lose;
2588 DEC_BOTH (from, from_byte);
2589 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2590 if (!char_quoted (from, from_byte)
2591 && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2592 SYNTAX_WITH_MULTIBYTE_CHECK (c) == code))
2593 break;
2594 }
2595 if (code == Sstring_fence && !depth && sexpflag) goto done2;
2596 break;
2597
2598 case Sstring:
2599 stringterm = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2600 while (1)
2601 {
2602 if (from == stop) goto lose;
2603 DEC_BOTH (from, from_byte);
2604 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2605 if (!char_quoted (from, from_byte)
2606 && (stringterm
2607 == (c = FETCH_CHAR_AS_MULTIBYTE (from_byte)))
2608 && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
2609 break;
2610 }
2611 if (!depth && sexpflag) goto done2;
2612 break;
2613 default:
2614 /* Ignore whitespace, punctuation, quote, endcomment. */
2615 break;
2616 }
2617 }
2618
2619 /* Reached start of buffer. Error if within object, return nil if between */
2620 if (depth) goto lose;
2621
2622 immediate_quit = 0;
2623 return Qnil;
2624
2625 done2:
2626 count++;
2627 }
2628
2629
2630 immediate_quit = 0;
2631 XSETFASTINT (val, from);
2632 return val;
2633
2634 lose:
2635 Fsignal (Qscan_error,
2636 Fcons (build_string ("Unbalanced parentheses"),
2637 Fcons (make_number (last_good),
2638 Fcons (make_number (from), Qnil))));
2639
2640 /* NOTREACHED */
2641 }
2642
2643 DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
2644 doc: /* Scan from character number FROM by COUNT lists.
2645 Returns the character number of the position thus found.
2646
2647 If DEPTH is nonzero, paren depth begins counting from that value,
2648 only places where the depth in parentheses becomes zero
2649 are candidates for stopping; COUNT such places are counted.
2650 Thus, a positive value for DEPTH means go out levels.
2651
2652 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2653
2654 If the beginning or end of (the accessible part of) the buffer is reached
2655 and the depth is wrong, an error is signaled.
2656 If the depth is right but the count is not used up, nil is returned. */)
2657 (from, count, depth)
2658 Lisp_Object from, count, depth;
2659 {
2660 CHECK_NUMBER (from);
2661 CHECK_NUMBER (count);
2662 CHECK_NUMBER (depth);
2663
2664 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
2665 }
2666
2667 DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
2668 doc: /* Scan from character number FROM by COUNT balanced expressions.
2669 If COUNT is negative, scan backwards.
2670 Returns the character number of the position thus found.
2671
2672 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2673
2674 If the beginning or end of (the accessible part of) the buffer is reached
2675 in the middle of a parenthetical grouping, an error is signaled.
2676 If the beginning or end is reached between groupings
2677 but before count is used up, nil is returned. */)
2678 (from, count)
2679 Lisp_Object from, count;
2680 {
2681 CHECK_NUMBER (from);
2682 CHECK_NUMBER (count);
2683
2684 return scan_lists (XINT (from), XINT (count), 0, 1);
2685 }
2686
2687 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
2688 0, 0, 0,
2689 doc: /* Move point backward over any number of chars with prefix syntax.
2690 This includes chars with "quote" or "prefix" syntax (' or p). */)
2691 ()
2692 {
2693 int beg = BEGV;
2694 int opoint = PT;
2695 int opoint_byte = PT_BYTE;
2696 int pos = PT;
2697 int pos_byte = PT_BYTE;
2698 int c;
2699
2700 if (pos <= beg)
2701 {
2702 SET_PT_BOTH (opoint, opoint_byte);
2703
2704 return Qnil;
2705 }
2706
2707 SETUP_SYNTAX_TABLE (pos, -1);
2708
2709 DEC_BOTH (pos, pos_byte);
2710
2711 while (!char_quoted (pos, pos_byte)
2712 /* Previous statement updates syntax table. */
2713 && ((c = FETCH_CHAR_AS_MULTIBYTE (pos_byte), SYNTAX (c) == Squote)
2714 || SYNTAX_PREFIX (c)))
2715 {
2716 opoint = pos;
2717 opoint_byte = pos_byte;
2718
2719 if (pos + 1 > beg)
2720 DEC_BOTH (pos, pos_byte);
2721 }
2722
2723 SET_PT_BOTH (opoint, opoint_byte);
2724
2725 return Qnil;
2726 }
2727 \f
2728 /* Parse forward from FROM / FROM_BYTE to END,
2729 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
2730 and return a description of the state of the parse at END.
2731 If STOPBEFORE is nonzero, stop at the start of an atom.
2732 If COMMENTSTOP is 1, stop at the start of a comment.
2733 If COMMENTSTOP is -1, stop at the start or end of a comment,
2734 after the beginning of a string, or after the end of a string. */
2735
2736 static void
2737 scan_sexps_forward (stateptr, from, from_byte, end, targetdepth,
2738 stopbefore, oldstate, commentstop)
2739 struct lisp_parse_state *stateptr;
2740 register int from;
2741 int from_byte;
2742 int end, targetdepth, stopbefore;
2743 Lisp_Object oldstate;
2744 int commentstop;
2745 {
2746 struct lisp_parse_state state;
2747
2748 register enum syntaxcode code;
2749 int c1;
2750 int comnested;
2751 struct level { int last, prev; };
2752 struct level levelstart[100];
2753 register struct level *curlevel = levelstart;
2754 struct level *endlevel = levelstart + 100;
2755 register int depth; /* Paren depth of current scanning location.
2756 level - levelstart equals this except
2757 when the depth becomes negative. */
2758 int mindepth; /* Lowest DEPTH value seen. */
2759 int start_quoted = 0; /* Nonzero means starting after a char quote */
2760 Lisp_Object tem;
2761 int prev_from; /* Keep one character before FROM. */
2762 int prev_from_byte;
2763 int prev_from_syntax;
2764 int boundary_stop = commentstop == -1;
2765 int nofence;
2766 int found;
2767 int out_bytepos, out_charpos;
2768 int temp;
2769
2770 prev_from = from;
2771 prev_from_byte = from_byte;
2772 if (from != BEGV)
2773 DEC_BOTH (prev_from, prev_from_byte);
2774
2775 /* Use this macro instead of `from++'. */
2776 #define INC_FROM \
2777 do { prev_from = from; \
2778 prev_from_byte = from_byte; \
2779 prev_from_syntax \
2780 = SYNTAX_WITH_FLAGS (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte)); \
2781 INC_BOTH (from, from_byte); \
2782 if (from < end) \
2783 UPDATE_SYNTAX_TABLE_FORWARD (from); \
2784 } while (0)
2785
2786 immediate_quit = 1;
2787 QUIT;
2788
2789 if (NILP (oldstate))
2790 {
2791 depth = 0;
2792 state.instring = -1;
2793 state.incomment = 0;
2794 state.comstyle = 0; /* comment style a by default. */
2795 state.comstr_start = -1; /* no comment/string seen. */
2796 }
2797 else
2798 {
2799 tem = Fcar (oldstate);
2800 if (!NILP (tem))
2801 depth = XINT (tem);
2802 else
2803 depth = 0;
2804
2805 oldstate = Fcdr (oldstate);
2806 oldstate = Fcdr (oldstate);
2807 oldstate = Fcdr (oldstate);
2808 tem = Fcar (oldstate);
2809 /* Check whether we are inside string_fence-style string: */
2810 state.instring = (!NILP (tem)
2811 ? (INTEGERP (tem) ? XINT (tem) : ST_STRING_STYLE)
2812 : -1);
2813
2814 oldstate = Fcdr (oldstate);
2815 tem = Fcar (oldstate);
2816 state.incomment = (!NILP (tem)
2817 ? (INTEGERP (tem) ? XINT (tem) : -1)
2818 : 0);
2819
2820 oldstate = Fcdr (oldstate);
2821 tem = Fcar (oldstate);
2822 start_quoted = !NILP (tem);
2823
2824 /* if the eighth element of the list is nil, we are in comment
2825 style a. If it is non-nil, we are in comment style b */
2826 oldstate = Fcdr (oldstate);
2827 oldstate = Fcdr (oldstate);
2828 tem = Fcar (oldstate);
2829 state.comstyle = NILP (tem) ? 0 : (EQ (tem, Qsyntax_table)
2830 ? ST_COMMENT_STYLE : 1);
2831
2832 oldstate = Fcdr (oldstate);
2833 tem = Fcar (oldstate);
2834 state.comstr_start = NILP (tem) ? -1 : XINT (tem) ;
2835 oldstate = Fcdr (oldstate);
2836 tem = Fcar (oldstate);
2837 while (!NILP (tem)) /* >= second enclosing sexps. */
2838 {
2839 /* curlevel++->last ran into compiler bug on Apollo */
2840 curlevel->last = XINT (Fcar (tem));
2841 if (++curlevel == endlevel)
2842 curlevel--; /* error ("Nesting too deep for parser"); */
2843 curlevel->prev = -1;
2844 curlevel->last = -1;
2845 tem = Fcdr (tem);
2846 }
2847 }
2848 state.quoted = 0;
2849 mindepth = depth;
2850
2851 curlevel->prev = -1;
2852 curlevel->last = -1;
2853
2854 SETUP_SYNTAX_TABLE (prev_from, 1);
2855 prev_from_syntax = SYNTAX_WITH_FLAGS (FETCH_CHAR (prev_from_byte));
2856 UPDATE_SYNTAX_TABLE_FORWARD (from);
2857
2858 /* Enter the loop at a place appropriate for initial state. */
2859
2860 if (state.incomment)
2861 goto startincomment;
2862 if (state.instring >= 0)
2863 {
2864 nofence = state.instring != ST_STRING_STYLE;
2865 if (start_quoted)
2866 goto startquotedinstring;
2867 goto startinstring;
2868 }
2869 else if (start_quoted)
2870 goto startquoted;
2871
2872 while (from < end)
2873 {
2874 INC_FROM;
2875 code = prev_from_syntax & 0xff;
2876
2877 if (code == Scomment)
2878 {
2879 state.comstyle = SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax);
2880 state.incomment = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax) ?
2881 1 : -1);
2882 state.comstr_start = prev_from;
2883 }
2884 else if (code == Scomment_fence)
2885 {
2886 /* Record the comment style we have entered so that only
2887 the comment-end sequence of the same style actually
2888 terminates the comment section. */
2889 state.comstyle = ST_COMMENT_STYLE;
2890 state.incomment = -1;
2891 state.comstr_start = prev_from;
2892 code = Scomment;
2893 }
2894 else if (from < end)
2895 if (SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax))
2896 if (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2897 SYNTAX_COMSTART_SECOND (c1))
2898 /* Duplicate code to avoid a complex if-expression
2899 which causes trouble for the SGI compiler. */
2900 {
2901 /* Record the comment style we have entered so that only
2902 the comment-end sequence of the same style actually
2903 terminates the comment section. */
2904 state.comstyle = SYNTAX_COMMENT_STYLE (c1);
2905 comnested = SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax);
2906 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2907 state.incomment = comnested ? 1 : -1;
2908 state.comstr_start = prev_from;
2909 INC_FROM;
2910 code = Scomment;
2911 }
2912
2913 if (SYNTAX_FLAGS_PREFIX (prev_from_syntax))
2914 continue;
2915 switch (SWITCH_ENUM_CAST (code))
2916 {
2917 case Sescape:
2918 case Scharquote:
2919 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2920 curlevel->last = prev_from;
2921 startquoted:
2922 if (from == end) goto endquoted;
2923 INC_FROM;
2924 goto symstarted;
2925 /* treat following character as a word constituent */
2926 case Sword:
2927 case Ssymbol:
2928 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2929 curlevel->last = prev_from;
2930 symstarted:
2931 while (from < end)
2932 {
2933 /* Some compilers can't handle this inside the switch. */
2934 temp = SYNTAX (FETCH_CHAR_AS_MULTIBYTE (from_byte));
2935 switch (temp)
2936 {
2937 case Scharquote:
2938 case Sescape:
2939 INC_FROM;
2940 if (from == end) goto endquoted;
2941 break;
2942 case Sword:
2943 case Ssymbol:
2944 case Squote:
2945 break;
2946 default:
2947 goto symdone;
2948 }
2949 INC_FROM;
2950 }
2951 symdone:
2952 curlevel->prev = curlevel->last;
2953 break;
2954
2955 case Scomment_fence: /* Can't happen because it's handled above. */
2956 case Scomment:
2957 if (commentstop || boundary_stop) goto done;
2958 startincomment:
2959 /* The (from == BEGV) test was to enter the loop in the middle so
2960 that we find a 2-char comment ender even if we start in the
2961 middle of it. We don't want to do that if we're just at the
2962 beginning of the comment (think of (*) ... (*)). */
2963 found = forw_comment (from, from_byte, end,
2964 state.incomment, state.comstyle,
2965 (from == BEGV || from < state.comstr_start + 3)
2966 ? 0 : prev_from_syntax,
2967 &out_charpos, &out_bytepos, &state.incomment);
2968 from = out_charpos; from_byte = out_bytepos;
2969 /* Beware! prev_from and friends are invalid now.
2970 Luckily, the `done' doesn't use them and the INC_FROM
2971 sets them to a sane value without looking at them. */
2972 if (!found) goto done;
2973 INC_FROM;
2974 state.incomment = 0;
2975 state.comstyle = 0; /* reset the comment style */
2976 if (boundary_stop) goto done;
2977 break;
2978
2979 case Sopen:
2980 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2981 depth++;
2982 /* curlevel++->last ran into compiler bug on Apollo */
2983 curlevel->last = prev_from;
2984 if (++curlevel == endlevel)
2985 curlevel--; /* error ("Nesting too deep for parser"); */
2986 curlevel->prev = -1;
2987 curlevel->last = -1;
2988 if (targetdepth == depth) goto done;
2989 break;
2990
2991 case Sclose:
2992 depth--;
2993 if (depth < mindepth)
2994 mindepth = depth;
2995 if (curlevel != levelstart)
2996 curlevel--;
2997 curlevel->prev = curlevel->last;
2998 if (targetdepth == depth) goto done;
2999 break;
3000
3001 case Sstring:
3002 case Sstring_fence:
3003 state.comstr_start = from - 1;
3004 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3005 curlevel->last = prev_from;
3006 state.instring = (code == Sstring
3007 ? (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte))
3008 : ST_STRING_STYLE);
3009 if (boundary_stop) goto done;
3010 startinstring:
3011 {
3012 nofence = state.instring != ST_STRING_STYLE;
3013
3014 while (1)
3015 {
3016 int c;
3017
3018 if (from >= end) goto done;
3019 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3020 /* Some compilers can't handle this inside the switch. */
3021 temp = SYNTAX (c);
3022
3023 /* Check TEMP here so that if the char has
3024 a syntax-table property which says it is NOT
3025 a string character, it does not end the string. */
3026 if (nofence && c == state.instring && temp == Sstring)
3027 break;
3028
3029 switch (temp)
3030 {
3031 case Sstring_fence:
3032 if (!nofence) goto string_end;
3033 break;
3034 case Scharquote:
3035 case Sescape:
3036 INC_FROM;
3037 startquotedinstring:
3038 if (from >= end) goto endquoted;
3039 }
3040 INC_FROM;
3041 }
3042 }
3043 string_end:
3044 state.instring = -1;
3045 curlevel->prev = curlevel->last;
3046 INC_FROM;
3047 if (boundary_stop) goto done;
3048 break;
3049
3050 case Smath:
3051 /* FIXME: We should do something with it. */
3052 break;
3053 default:
3054 /* Ignore whitespace, punctuation, quote, endcomment. */
3055 break;
3056 }
3057 }
3058 goto done;
3059
3060 stop: /* Here if stopping before start of sexp. */
3061 from = prev_from; /* We have just fetched the char that starts it; */
3062 goto done; /* but return the position before it. */
3063
3064 endquoted:
3065 state.quoted = 1;
3066 done:
3067 state.depth = depth;
3068 state.mindepth = mindepth;
3069 state.thislevelstart = curlevel->prev;
3070 state.prevlevelstart
3071 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
3072 state.location = from;
3073 state.levelstarts = Qnil;
3074 while (--curlevel >= levelstart)
3075 state.levelstarts = Fcons (make_number (curlevel->last),
3076 state.levelstarts);
3077 immediate_quit = 0;
3078
3079 *stateptr = state;
3080 }
3081
3082 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
3083 doc: /* Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
3084 Parsing stops at TO or when certain criteria are met;
3085 point is set to where parsing stops.
3086 If fifth arg OLDSTATE is omitted or nil,
3087 parsing assumes that FROM is the beginning of a function.
3088 Value is a list of ten elements describing final state of parsing:
3089 0. depth in parens.
3090 1. character address of start of innermost containing list; nil if none.
3091 2. character address of start of last complete sexp terminated.
3092 3. non-nil if inside a string.
3093 (it is the character that will terminate the string,
3094 or t if the string should be terminated by a generic string delimiter.)
3095 4. nil if outside a comment, t if inside a non-nestable comment,
3096 else an integer (the current comment nesting).
3097 5. t if following a quote character.
3098 6. the minimum paren-depth encountered during this scan.
3099 7. t if in a comment of style b; symbol `syntax-table' if the comment
3100 should be terminated by a generic comment delimiter.
3101 8. character address of start of comment or string; nil if not in one.
3102 9. Intermediate data for continuation of parsing (subject to change).
3103 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
3104 in parentheses becomes equal to TARGETDEPTH.
3105 Fourth arg STOPBEFORE non-nil means stop when come to
3106 any character that starts a sexp.
3107 Fifth arg OLDSTATE is a nine-element list like what this function returns.
3108 It is used to initialize the state of the parse. Elements number 1, 2, 6
3109 and 8 are ignored; you can leave off element 8 (the last) entirely.
3110 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.
3111 If it is symbol `syntax-table', stop after the start of a comment or a
3112 string, or after end of a comment or a string. */)
3113 (from, to, targetdepth, stopbefore, oldstate, commentstop)
3114 Lisp_Object from, to, targetdepth, stopbefore, oldstate, commentstop;
3115 {
3116 struct lisp_parse_state state;
3117 int target;
3118
3119 if (!NILP (targetdepth))
3120 {
3121 CHECK_NUMBER (targetdepth);
3122 target = XINT (targetdepth);
3123 }
3124 else
3125 target = -100000; /* We won't reach this depth */
3126
3127 validate_region (&from, &to);
3128 scan_sexps_forward (&state, XINT (from), CHAR_TO_BYTE (XINT (from)),
3129 XINT (to),
3130 target, !NILP (stopbefore), oldstate,
3131 (NILP (commentstop)
3132 ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
3133
3134 SET_PT (state.location);
3135
3136 return Fcons (make_number (state.depth),
3137 Fcons (state.prevlevelstart < 0 ? Qnil : make_number (state.prevlevelstart),
3138 Fcons (state.thislevelstart < 0 ? Qnil : make_number (state.thislevelstart),
3139 Fcons (state.instring >= 0
3140 ? (state.instring == ST_STRING_STYLE
3141 ? Qt : make_number (state.instring)) : Qnil,
3142 Fcons (state.incomment < 0 ? Qt :
3143 (state.incomment == 0 ? Qnil :
3144 make_number (state.incomment)),
3145 Fcons (state.quoted ? Qt : Qnil,
3146 Fcons (make_number (state.mindepth),
3147 Fcons ((state.comstyle
3148 ? (state.comstyle == ST_COMMENT_STYLE
3149 ? Qsyntax_table : Qt) :
3150 Qnil),
3151 Fcons (((state.incomment
3152 || (state.instring >= 0))
3153 ? make_number (state.comstr_start)
3154 : Qnil),
3155 Fcons (state.levelstarts, Qnil))))))))));
3156 }
3157 \f
3158 void
3159 init_syntax_once ()
3160 {
3161 register int i, c;
3162 Lisp_Object temp;
3163
3164 /* This has to be done here, before we call Fmake_char_table. */
3165 Qsyntax_table = intern ("syntax-table");
3166 staticpro (&Qsyntax_table);
3167
3168 /* Intern this now in case it isn't already done.
3169 Setting this variable twice is harmless.
3170 But don't staticpro it here--that is done in alloc.c. */
3171 Qchar_table_extra_slots = intern ("char-table-extra-slots");
3172
3173 /* Create objects which can be shared among syntax tables. */
3174 Vsyntax_code_object = Fmake_vector (make_number (Smax), Qnil);
3175 for (i = 0; i < XVECTOR (Vsyntax_code_object)->size; i++)
3176 XVECTOR (Vsyntax_code_object)->contents[i]
3177 = Fcons (make_number (i), Qnil);
3178
3179 /* Now we are ready to set up this property, so we can
3180 create syntax tables. */
3181 Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
3182
3183 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
3184
3185 Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
3186
3187 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
3188 for (i = 'a'; i <= 'z'; i++)
3189 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3190 for (i = 'A'; i <= 'Z'; i++)
3191 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3192 for (i = '0'; i <= '9'; i++)
3193 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3194
3195 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
3196 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
3197
3198 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
3199 Fcons (make_number (Sopen), make_number (')')));
3200 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
3201 Fcons (make_number (Sclose), make_number ('(')));
3202 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
3203 Fcons (make_number (Sopen), make_number (']')));
3204 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
3205 Fcons (make_number (Sclose), make_number ('[')));
3206 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
3207 Fcons (make_number (Sopen), make_number ('}')));
3208 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
3209 Fcons (make_number (Sclose), make_number ('{')));
3210 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
3211 Fcons (make_number ((int) Sstring), Qnil));
3212 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
3213 Fcons (make_number ((int) Sescape), Qnil));
3214
3215 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Ssymbol];
3216 for (i = 0; i < 10; i++)
3217 {
3218 c = "_-+*/&|<>="[i];
3219 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3220 }
3221
3222 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
3223 for (i = 0; i < 12; i++)
3224 {
3225 c = ".,;:?!#@~^'`"[i];
3226 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3227 }
3228
3229 /* All multibyte characters have syntax `word' by default. */
3230 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
3231 char_table_set_range (Vstandard_syntax_table, 0x80, MAX_CHAR, temp);
3232 }
3233
3234 void
3235 syms_of_syntax ()
3236 {
3237 Qsyntax_table_p = intern ("syntax-table-p");
3238 staticpro (&Qsyntax_table_p);
3239
3240 staticpro (&Vsyntax_code_object);
3241
3242 Qscan_error = intern ("scan-error");
3243 staticpro (&Qscan_error);
3244 Fput (Qscan_error, Qerror_conditions,
3245 Fcons (Qscan_error, Fcons (Qerror, Qnil)));
3246 Fput (Qscan_error, Qerror_message,
3247 build_string ("Scan error"));
3248
3249 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments,
3250 doc: /* Non-nil means `forward-sexp', etc., should treat comments as whitespace. */);
3251
3252 DEFVAR_BOOL ("parse-sexp-lookup-properties", &parse_sexp_lookup_properties,
3253 doc: /* Non-nil means `forward-sexp', etc., obey `syntax-table' property.
3254 Otherwise, that text property is simply ignored.
3255 See the info node `(elisp)Syntax Properties' for a description of the
3256 `syntax-table' property. */);
3257
3258 words_include_escapes = 0;
3259 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes,
3260 doc: /* Non-nil means `forward-word', etc., should treat escape chars part of words. */);
3261
3262 DEFVAR_BOOL ("multibyte-syntax-as-symbol", &multibyte_syntax_as_symbol,
3263 doc: /* Non-nil means `scan-sexps' treats all multibyte characters as symbol. */);
3264 multibyte_syntax_as_symbol = 0;
3265
3266 DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
3267 &open_paren_in_column_0_is_defun_start,
3268 doc: /* *Non-nil means an open paren in column 0 denotes the start of a defun. */);
3269 open_paren_in_column_0_is_defun_start = 1;
3270
3271
3272 DEFVAR_LISP ("find-word-boundary-function-table",
3273 &Vfind_word_boundary_function_table,
3274 doc: /*
3275 Char table of functions to search for the word boundary.
3276 Each function is called with two arguments; POS and LIMIT.
3277 POS and LIMIT are character positions in the current buffer.
3278
3279 If POS is less than LIMIT, POS is at the first character of a word,
3280 and the return value of a function is a position after the last
3281 character of that word.
3282
3283 If POS is not less than LIMIT, POS is at the last character of a word,
3284 and the return value of a function is a position at the first
3285 character of that word.
3286
3287 In both cases, LIMIT bounds the search. */);
3288 Vfind_word_boundary_function_table = Fmake_char_table (Qnil, Qnil);
3289
3290 defsubr (&Ssyntax_table_p);
3291 defsubr (&Ssyntax_table);
3292 defsubr (&Sstandard_syntax_table);
3293 defsubr (&Scopy_syntax_table);
3294 defsubr (&Sset_syntax_table);
3295 defsubr (&Schar_syntax);
3296 defsubr (&Smatching_paren);
3297 defsubr (&Sstring_to_syntax);
3298 defsubr (&Smodify_syntax_entry);
3299 defsubr (&Sinternal_describe_syntax_value);
3300
3301 defsubr (&Sforward_word);
3302
3303 defsubr (&Sskip_chars_forward);
3304 defsubr (&Sskip_chars_backward);
3305 defsubr (&Sskip_syntax_forward);
3306 defsubr (&Sskip_syntax_backward);
3307
3308 defsubr (&Sforward_comment);
3309 defsubr (&Sscan_lists);
3310 defsubr (&Sscan_sexps);
3311 defsubr (&Sbackward_prefix_chars);
3312 defsubr (&Sparse_partial_sexp);
3313 }
3314
3315 /* arch-tag: 3e297b9f-088e-4b64-8f4c-fb0b3443e412
3316 (do not change this comment) */