*** empty log message ***
[bpt/emacs.git] / src / syntax.c
1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985, 87, 93, 94, 95, 97, 1998, 1999 Free Software Foundation, Inc.
3
4 This file is part of GNU Emacs.
5
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
20
21
22 #include <config.h>
23 #include <ctype.h>
24 #include "lisp.h"
25 #include "commands.h"
26 #include "buffer.h"
27 #include "charset.h"
28
29 /* Make syntax table lookup grant data in gl_state. */
30 #define SYNTAX_ENTRY_VIA_PROPERTY
31
32 #include "syntax.h"
33 #include "intervals.h"
34
35 /* We use these constants in place for comment-style and
36 string-ender-char to distinguish comments/strings started by
37 comment_fence and string_fence codes. */
38
39 #define ST_COMMENT_STYLE (256 + 1)
40 #define ST_STRING_STYLE (256 + 2)
41 #include "category.h"
42
43 Lisp_Object Qsyntax_table_p, Qsyntax_table, Qscan_error;
44
45 int words_include_escapes;
46 int parse_sexp_lookup_properties;
47
48 /* Nonzero means `scan-sexps' treat all multibyte characters as symbol. */
49 int multibyte_syntax_as_symbol;
50
51 /* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
52 if not compiled with GCC. No need to mark it, since it is used
53 only very temporarily. */
54 Lisp_Object syntax_temp;
55
56 /* Non-zero means an open parenthesis in column 0 is always considered
57 to be the start of a defun. Zero means an open parenthesis in
58 column 0 has no special meaning. */
59
60 int open_paren_in_column_0_is_defun_start;
61
62 /* This is the internal form of the parse state used in parse-partial-sexp. */
63
64 struct lisp_parse_state
65 {
66 int depth; /* Depth at end of parsing. */
67 int instring; /* -1 if not within string, else desired terminator. */
68 int incomment; /* -1 if in unnestable comment else comment nesting */
69 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
70 int quoted; /* Nonzero if just after an escape char at end of parsing */
71 int thislevelstart; /* Char number of most recent start-of-expression at current level */
72 int prevlevelstart; /* Char number of start of containing expression */
73 int location; /* Char number at which parsing stopped. */
74 int mindepth; /* Minimum depth seen while scanning. */
75 int comstr_start; /* Position just after last comment/string starter. */
76 Lisp_Object levelstarts; /* Char numbers of starts-of-expression
77 of levels (starting from outermost). */
78 };
79 \f
80 /* These variables are a cache for finding the start of a defun.
81 find_start_pos is the place for which the defun start was found.
82 find_start_value is the defun start position found for it.
83 find_start_value_byte is the corresponding byte position.
84 find_start_buffer is the buffer it was found in.
85 find_start_begv is the BEGV value when it was found.
86 find_start_modiff is the value of MODIFF when it was found. */
87
88 static int find_start_pos;
89 static int find_start_value;
90 static int find_start_value_byte;
91 static struct buffer *find_start_buffer;
92 static int find_start_begv;
93 static int find_start_modiff;
94
95
96 static int find_defun_start P_ ((int, int));
97 static int back_comment P_ ((int, int, int, int, int, int *, int *));
98 static int char_quoted P_ ((int, int));
99 static Lisp_Object skip_chars P_ ((int, int, Lisp_Object, Lisp_Object));
100 static Lisp_Object scan_lists P_ ((int, int, int, int));
101 static void scan_sexps_forward P_ ((struct lisp_parse_state *,
102 int, int, int, int,
103 int, Lisp_Object, int));
104 \f
105
106 struct gl_state_s gl_state; /* Global state of syntax parser. */
107
108 INTERVAL interval_of ();
109 #define INTERVALS_AT_ONCE 10 /* 1 + max-number of intervals
110 to scan to property-change. */
111
112 /* Update gl_state to an appropriate interval which contains CHARPOS. The
113 sign of COUNT give the relative position of CHARPOS wrt the previously
114 valid interval. If INIT, only [be]_property fields of gl_state are
115 valid at start, the rest is filled basing on OBJECT.
116
117 `gl_state.*_i' are the intervals, and CHARPOS is further in the search
118 direction than the intervals - or in an interval. We update the
119 current syntax-table basing on the property of this interval, and
120 update the interval to start further than CHARPOS - or be
121 NULL_INTERVAL. We also update lim_property to be the next value of
122 charpos to call this subroutine again - or be before/after the
123 start/end of OBJECT. */
124
125 void
126 update_syntax_table (charpos, count, init, object)
127 int charpos, count, init;
128 Lisp_Object object;
129 {
130 Lisp_Object tmp_table;
131 int cnt = 0, invalidate = 1;
132 INTERVAL i, oldi;
133
134 if (init)
135 {
136 gl_state.start = gl_state.b_property;
137 gl_state.stop = gl_state.e_property;
138 gl_state.forward_i = interval_of (charpos, object);
139 i = gl_state.backward_i = gl_state.forward_i;
140 gl_state.left_ok = gl_state.right_ok = 1;
141 invalidate = 0;
142 if (NULL_INTERVAL_P (i))
143 return;
144 /* interval_of updates only ->position of the return value, so
145 update the parents manually to speed up update_interval. */
146 while (!NULL_PARENT (i))
147 {
148 if (AM_RIGHT_CHILD (i))
149 INTERVAL_PARENT (i)->position = i->position
150 - LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */
151 - TOTAL_LENGTH (INTERVAL_PARENT (i))
152 + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i));
153 else
154 INTERVAL_PARENT (i)->position = i->position - LEFT_TOTAL_LENGTH (i)
155 + TOTAL_LENGTH (i);
156 i = INTERVAL_PARENT (i);
157 }
158 i = gl_state.forward_i;
159 gl_state.b_property = i->position - 1 - gl_state.offset;
160 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
161 goto update;
162 }
163 oldi = i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
164
165 /* We are guarantied to be called with CHARPOS either in i,
166 or further off. */
167 if (NULL_INTERVAL_P (i))
168 error ("Error in syntax_table logic for to-the-end intervals");
169 else if (charpos < i->position) /* Move left. */
170 {
171 if (count > 0)
172 error ("Error in syntax_table logic for intervals <-");
173 /* Update the interval. */
174 i = update_interval (i, charpos);
175 if (oldi->position != INTERVAL_LAST_POS (i))
176 {
177 invalidate = 0;
178 gl_state.right_ok = 1; /* Invalidate the other end. */
179 gl_state.forward_i = i;
180 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
181 }
182 }
183 else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right. */
184 {
185 if (count < 0)
186 error ("Error in syntax_table logic for intervals ->");
187 /* Update the interval. */
188 i = update_interval (i, charpos);
189 if (i->position != INTERVAL_LAST_POS (oldi))
190 {
191 invalidate = 0;
192 gl_state.left_ok = 1; /* Invalidate the other end. */
193 gl_state.backward_i = i;
194 gl_state.b_property = i->position - 1 - gl_state.offset;
195 }
196 }
197 else if (count > 0 ? gl_state.right_ok : gl_state.left_ok)
198 {
199 /* We do not need to recalculate tmp_table. */
200 tmp_table = gl_state.old_prop;
201 }
202
203 update:
204 tmp_table = textget (i->plist, Qsyntax_table);
205
206 if (invalidate)
207 invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
208
209 if (invalidate) /* Did not get to adjacent interval. */
210 { /* with the same table => */
211 /* invalidate the old range. */
212 if (count > 0)
213 {
214 gl_state.backward_i = i;
215 gl_state.left_ok = 1; /* Invalidate the other end. */
216 gl_state.b_property = i->position - 1 - gl_state.offset;
217 }
218 else
219 {
220 gl_state.forward_i = i;
221 gl_state.right_ok = 1; /* Invalidate the other end. */
222 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
223 }
224 }
225
226 gl_state.current_syntax_table = tmp_table;
227 gl_state.old_prop = tmp_table;
228 if (EQ (Fsyntax_table_p (tmp_table), Qt))
229 {
230 gl_state.use_global = 0;
231 }
232 else if (CONSP (tmp_table))
233 {
234 gl_state.use_global = 1;
235 gl_state.global_code = tmp_table;
236 }
237 else
238 {
239 gl_state.use_global = 0;
240 gl_state.current_syntax_table = current_buffer->syntax_table;
241 }
242
243 while (!NULL_INTERVAL_P (i))
244 {
245 if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
246 {
247 if (count > 0)
248 gl_state.right_ok = 0;
249 else
250 gl_state.left_ok = 0;
251 break;
252 }
253 else if (cnt == INTERVALS_AT_ONCE)
254 {
255 if (count > 0)
256 gl_state.right_ok = 1;
257 else
258 gl_state.left_ok = 1;
259 break;
260 }
261 cnt++;
262 i = count > 0 ? next_interval (i) : previous_interval (i);
263 }
264 if (NULL_INTERVAL_P (i))
265 { /* This property goes to the end. */
266 if (count > 0)
267 gl_state.e_property = gl_state.stop;
268 else
269 gl_state.b_property = gl_state.start;
270 }
271 else
272 {
273 if (count > 0)
274 {
275 gl_state.e_property = i->position - gl_state.offset;
276 gl_state.forward_i = i;
277 }
278 else
279 {
280 gl_state.b_property = i->position + LENGTH (i) - 1 - gl_state.offset;
281 gl_state.backward_i = i;
282 }
283 }
284 }
285 \f
286 /* Returns TRUE if char at CHARPOS is quoted.
287 Global syntax-table data should be set up already to be good at CHARPOS
288 or after. On return global syntax data is good for lookup at CHARPOS. */
289
290 static int
291 char_quoted (charpos, bytepos)
292 register int charpos, bytepos;
293 {
294 register enum syntaxcode code;
295 register int beg = BEGV;
296 register int quoted = 0;
297 int orig = charpos;
298
299 DEC_BOTH (charpos, bytepos);
300
301 while (bytepos >= beg)
302 {
303 UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
304 code = SYNTAX (FETCH_CHAR (bytepos));
305 if (! (code == Scharquote || code == Sescape))
306 break;
307
308 DEC_BOTH (charpos, bytepos);
309 quoted = !quoted;
310 }
311
312 UPDATE_SYNTAX_TABLE (orig);
313 return quoted;
314 }
315
316 /* Return the bytepos one character after BYTEPOS.
317 We assume that BYTEPOS is not at the end of the buffer. */
318
319 INLINE int
320 inc_bytepos (bytepos)
321 int bytepos;
322 {
323 if (NILP (current_buffer->enable_multibyte_characters))
324 return bytepos + 1;
325
326 INC_POS (bytepos);
327 return bytepos;
328 }
329
330 /* Return the bytepos one character before BYTEPOS.
331 We assume that BYTEPOS is not at the start of the buffer. */
332
333 INLINE int
334 dec_bytepos (bytepos)
335 int bytepos;
336 {
337 if (NILP (current_buffer->enable_multibyte_characters))
338 return bytepos - 1;
339
340 DEC_POS (bytepos);
341 return bytepos;
342 }
343 \f
344 /* Find a defun-start that is the last one before POS (or nearly the last).
345 We record what we find, so that another call in the same area
346 can return the same value right away.
347
348 There is no promise at which position the global syntax data is
349 valid on return from the subroutine, so the caller should explicitly
350 update the global data. */
351
352 static int
353 find_defun_start (pos, pos_byte)
354 int pos, pos_byte;
355 {
356 int opoint = PT, opoint_byte = PT_BYTE;
357
358 /* Use previous finding, if it's valid and applies to this inquiry. */
359 if (current_buffer == find_start_buffer
360 /* Reuse the defun-start even if POS is a little farther on.
361 POS might be in the next defun, but that's ok.
362 Our value may not be the best possible, but will still be usable. */
363 && pos <= find_start_pos + 1000
364 && pos >= find_start_value
365 && BEGV == find_start_begv
366 && MODIFF == find_start_modiff)
367 return find_start_value;
368
369 /* Back up to start of line. */
370 scan_newline (pos, pos_byte, BEGV, BEGV_BYTE, -1, 1);
371
372 /* We optimize syntax-table lookup for rare updates. Thus we accept
373 only those `^\s(' which are good in global _and_ text-property
374 syntax-tables. */
375 gl_state.current_syntax_table = current_buffer->syntax_table;
376 gl_state.use_global = 0;
377 if (open_paren_in_column_0_is_defun_start)
378 {
379 while (PT > BEGV)
380 {
381 /* Open-paren at start of line means we may have found our
382 defun-start. */
383 if (SYNTAX (FETCH_CHAR (PT_BYTE)) == Sopen)
384 {
385 SETUP_SYNTAX_TABLE (PT + 1, -1); /* Try again... */
386 if (SYNTAX (FETCH_CHAR (PT_BYTE)) == Sopen)
387 break;
388 /* Now fallback to the default value. */
389 gl_state.current_syntax_table = current_buffer->syntax_table;
390 gl_state.use_global = 0;
391 }
392 /* Move to beg of previous line. */
393 scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
394 }
395 }
396
397 /* Record what we found, for the next try. */
398 find_start_value = PT;
399 find_start_value_byte = PT_BYTE;
400 find_start_buffer = current_buffer;
401 find_start_modiff = MODIFF;
402 find_start_begv = BEGV;
403 find_start_pos = pos;
404
405 TEMP_SET_PT_BOTH (opoint, opoint_byte);
406
407 return find_start_value;
408 }
409 \f
410 /* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE. */
411
412 static int
413 prev_char_comend_first (pos, pos_byte)
414 int pos, pos_byte;
415 {
416 int c, val;
417
418 DEC_BOTH (pos, pos_byte);
419 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
420 c = FETCH_CHAR (pos_byte);
421 val = SYNTAX_COMEND_FIRST (c);
422 UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
423 return val;
424 }
425
426 /* Return the SYNTAX_COMSTART_FIRST of the character before POS, POS_BYTE. */
427
428 /* static int
429 * prev_char_comstart_first (pos, pos_byte)
430 * int pos, pos_byte;
431 * {
432 * int c, val;
433 *
434 * DEC_BOTH (pos, pos_byte);
435 * UPDATE_SYNTAX_TABLE_BACKWARD (pos);
436 * c = FETCH_CHAR (pos_byte);
437 * val = SYNTAX_COMSTART_FIRST (c);
438 * UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
439 * return val;
440 * } */
441
442 /* Checks whether charpos FROM is at the end of a comment.
443 FROM_BYTE is the bytepos corresponding to FROM.
444 Do not move back before STOP.
445
446 Return a positive value if we find a comment ending at FROM/FROM_BYTE;
447 return -1 otherwise.
448
449 If successful, store the charpos of the comment's beginning
450 into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
451
452 Global syntax data remains valid for backward search starting at
453 the returned value (or at FROM, if the search was not successful). */
454
455 static int
456 back_comment (from, from_byte, stop, comnested, comstyle, charpos_ptr, bytepos_ptr)
457 int from, from_byte, stop;
458 int comnested, comstyle;
459 int *charpos_ptr, *bytepos_ptr;
460 {
461 /* Look back, counting the parity of string-quotes,
462 and recording the comment-starters seen.
463 When we reach a safe place, assume that's not in a string;
464 then step the main scan to the earliest comment-starter seen
465 an even number of string quotes away from the safe place.
466
467 OFROM[I] is position of the earliest comment-starter seen
468 which is I+2X quotes from the comment-end.
469 PARITY is current parity of quotes from the comment end. */
470 int string_style = -1; /* Presumed outside of any string. */
471 int string_lossage = 0;
472 /* Not a real lossage: indicates that we have passed a matching comment
473 starter plus an non-matching comment-ender, meaning that any matching
474 comment-starter we might see later could be a false positive (hidden
475 inside another comment).
476 Test case: { a (* b } c (* d *) */
477 int comment_lossage = 0;
478 int comment_end = from;
479 int comment_end_byte = from_byte;
480 int comstart_pos = 0;
481 int comstart_byte;
482 /* Place where the containing defun starts,
483 or 0 if we didn't come across it yet. */
484 int defun_start = 0;
485 int defun_start_byte = 0;
486 register enum syntaxcode code;
487 int nesting = 1; /* current comment nesting */
488 int c;
489 int syntax = 0;
490
491 /* FIXME: A }} comment-ender style leads to incorrect behavior
492 in the case of {{ c }}} because we ignore the last two chars which are
493 assumed to be comment-enders although they aren't. */
494
495 /* At beginning of range to scan, we're outside of strings;
496 that determines quote parity to the comment-end. */
497 while (from != stop)
498 {
499 int temp_byte, prev_syntax;
500 int com2start, com2end;
501
502 /* Move back and examine a character. */
503 DEC_BOTH (from, from_byte);
504 UPDATE_SYNTAX_TABLE_BACKWARD (from);
505
506 prev_syntax = syntax;
507 c = FETCH_CHAR (from_byte);
508 syntax = SYNTAX_WITH_FLAGS (c);
509 code = SYNTAX (c);
510
511 /* Check for 2-char comment markers. */
512 com2start = (SYNTAX_FLAGS_COMSTART_FIRST (syntax)
513 && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax)
514 && comstyle == SYNTAX_FLAGS_COMMENT_STYLE (prev_syntax)
515 && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax)
516 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested);
517 com2end = (SYNTAX_FLAGS_COMEND_FIRST (syntax)
518 && SYNTAX_FLAGS_COMEND_SECOND (prev_syntax));
519
520 /* Nasty cases with overlapping 2-char comment markers:
521 - snmp-mode: -- c -- foo -- c --
522 --- c --
523 ------ c --
524 - c-mode: *||*
525 |* *|* *|
526 |*| |* |*|
527 /// */
528
529 /* If a 2-char comment sequence partly overlaps with another,
530 we don't try to be clever. */
531 if (from > stop && (com2end || com2start))
532 {
533 int next = from, next_byte = from_byte, next_c, next_syntax;
534 DEC_BOTH (next, next_byte);
535 UPDATE_SYNTAX_TABLE_BACKWARD (next);
536 next_c = FETCH_CHAR (next_byte);
537 next_syntax = SYNTAX_WITH_FLAGS (next_c);
538 if (((com2start || comnested)
539 && SYNTAX_FLAGS_COMEND_SECOND (syntax)
540 && SYNTAX_FLAGS_COMEND_FIRST (next_syntax))
541 || ((com2end || comnested)
542 && SYNTAX_FLAGS_COMSTART_SECOND (syntax)
543 && comstyle == SYNTAX_FLAGS_COMMENT_STYLE (syntax)
544 && SYNTAX_FLAGS_COMSTART_FIRST (next_syntax)))
545 goto lossage;
546 /* UPDATE_SYNTAX_TABLE_FORWARD (next + 1); */
547 }
548
549 if (com2start && comstart_pos == 0)
550 /* We're looking at a comment starter. But it might be a comment
551 ender as well (see snmp-mode). The first time we see one, we
552 need to consider it as a comment starter,
553 and the subsequent times as a comment ender. */
554 com2end = 0;
555
556 /* Turn a 2-char comment sequences into the appropriate syntax. */
557 if (com2end)
558 code = Sendcomment;
559 else if (com2start)
560 code = Scomment;
561 /* Ignore comment starters of a different style. */
562 else if (code == Scomment
563 && (comstyle != SYNTAX_FLAGS_COMMENT_STYLE (syntax)
564 || SYNTAX_FLAGS_COMMENT_NESTED (syntax) != comnested))
565 continue;
566
567 /* Ignore escaped characters, except comment-enders. */
568 if (code != Sendcomment && char_quoted (from, from_byte))
569 continue;
570
571 switch (code)
572 {
573 case Sstring_fence:
574 case Scomment_fence:
575 c = (code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE);
576 case Sstring:
577 /* Track parity of quotes. */
578 if (string_style == -1)
579 /* Entering a string. */
580 string_style = c;
581 else if (string_style == c)
582 /* Leaving the string. */
583 string_style = -1;
584 else
585 /* If we have two kinds of string delimiters.
586 There's no way to grok this scanning backwards. */
587 string_lossage = 1;
588 break;
589
590 case Scomment:
591 /* We've already checked that it is the relevant comstyle. */
592 if (string_style != -1 || comment_lossage || string_lossage)
593 /* There are odd string quotes involved, so let's be careful.
594 Test case in Pascal: " { " a { " } */
595 goto lossage;
596
597 if (!comnested)
598 {
599 /* Record best comment-starter so far. */
600 comstart_pos = from;
601 comstart_byte = from_byte;
602 }
603 else if (--nesting <= 0)
604 /* nested comments have to be balanced, so we don't need to
605 keep looking for earlier ones. We use here the same (slightly
606 incorrect) reasoning as below: since it is followed by uniform
607 paired string quotes, this comment-start has to be outside of
608 strings, else the comment-end itself would be inside a string. */
609 goto done;
610 break;
611
612 case Sendcomment:
613 if (SYNTAX_FLAGS_COMMENT_STYLE (syntax) == comstyle
614 && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax)
615 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested)
616 /* This is the same style of comment ender as ours. */
617 {
618 if (comnested)
619 nesting++;
620 else
621 /* Anything before that can't count because it would match
622 this comment-ender rather than ours. */
623 from = stop; /* Break out of the loop. */
624 }
625 else if (comstart_pos != 0 || c != '\n')
626 /* We're mixing comment styles here, so we'd better be careful.
627 The (comstart_pos != 0 || c != '\n') check is not quite correct
628 (we should just always set comment_lossage), but removing it
629 would imply that any multiline comment in C would go through
630 lossage, which seems overkill.
631 The failure should only happen in the rare cases such as
632 { (* } *) */
633 comment_lossage = 1;
634 break;
635
636 case Sopen:
637 /* Assume a defun-start point is outside of strings. */
638 if (open_paren_in_column_0_is_defun_start
639 && (from == stop
640 || (temp_byte = dec_bytepos (from_byte),
641 FETCH_CHAR (temp_byte) == '\n')))
642 {
643 defun_start = from;
644 defun_start_byte = from_byte;
645 from = stop; /* Break out of the loop. */
646 }
647 break;
648
649 default:
650 break;
651 }
652 }
653
654 if (comstart_pos == 0)
655 {
656 from = comment_end;
657 from_byte = comment_end_byte;
658 UPDATE_SYNTAX_TABLE_FORWARD (comment_end - 1);
659 }
660 /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
661 or `done'), then we've found the beginning of the non-nested comment. */
662 else if (1) /* !comnested */
663 {
664 from = comstart_pos;
665 from_byte = comstart_byte;
666 /* Globals are correct now. */
667 }
668 else
669 {
670 struct lisp_parse_state state;
671 lossage:
672 /* We had two kinds of string delimiters mixed up
673 together. Decode this going forwards.
674 Scan fwd from a known safe place (beginning-of-defun)
675 to the one in question; this records where we
676 last passed a comment starter. */
677 /* If we did not already find the defun start, find it now. */
678 if (defun_start == 0)
679 {
680 defun_start = find_defun_start (comment_end, comment_end_byte);
681 defun_start_byte = find_start_value_byte;
682 }
683 do
684 {
685 scan_sexps_forward (&state,
686 defun_start, defun_start_byte,
687 comment_end, -10000, 0, Qnil, 0);
688 defun_start = comment_end;
689 if (state.incomment == (comnested ? 1 : -1)
690 && state.comstyle == comstyle)
691 from = state.comstr_start;
692 else
693 {
694 from = comment_end;
695 if (state.incomment)
696 /* If comment_end is inside some other comment, maybe ours
697 is nested, so we need to try again from within the
698 surrounding comment. Example: { a (* " *) */
699 {
700 /* FIXME: We should advance by one or two chars. */
701 defun_start = state.comstr_start + 2;
702 defun_start_byte = CHAR_TO_BYTE (defun_start);
703 }
704 }
705 } while (defun_start < comment_end);
706
707 from_byte = CHAR_TO_BYTE (from);
708 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
709 }
710
711 done:
712 *charpos_ptr = from;
713 *bytepos_ptr = from_byte;
714
715 return (from == comment_end) ? -1 : from;
716 }
717 \f
718 DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
719 "Return t if OBJECT is a syntax table.\n\
720 Currently, any char-table counts as a syntax table.")
721 (object)
722 Lisp_Object object;
723 {
724 if (CHAR_TABLE_P (object)
725 && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
726 return Qt;
727 return Qnil;
728 }
729
730 static void
731 check_syntax_table (obj)
732 Lisp_Object obj;
733 {
734 if (!(CHAR_TABLE_P (obj)
735 && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table)))
736 wrong_type_argument (Qsyntax_table_p, obj);
737 }
738
739 DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
740 "Return the current syntax table.\n\
741 This is the one specified by the current buffer.")
742 ()
743 {
744 return current_buffer->syntax_table;
745 }
746
747 DEFUN ("standard-syntax-table", Fstandard_syntax_table,
748 Sstandard_syntax_table, 0, 0, 0,
749 "Return the standard syntax table.\n\
750 This is the one used for new buffers.")
751 ()
752 {
753 return Vstandard_syntax_table;
754 }
755
756 DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
757 "Construct a new syntax table and return it.\n\
758 It is a copy of the TABLE, which defaults to the standard syntax table.")
759 (table)
760 Lisp_Object table;
761 {
762 Lisp_Object copy;
763
764 if (!NILP (table))
765 check_syntax_table (table);
766 else
767 table = Vstandard_syntax_table;
768
769 copy = Fcopy_sequence (table);
770
771 /* Only the standard syntax table should have a default element.
772 Other syntax tables should inherit from parents instead. */
773 XCHAR_TABLE (copy)->defalt = Qnil;
774
775 /* Copied syntax tables should all have parents.
776 If we copied one with no parent, such as the standard syntax table,
777 use the standard syntax table as the copy's parent. */
778 if (NILP (XCHAR_TABLE (copy)->parent))
779 Fset_char_table_parent (copy, Vstandard_syntax_table);
780 return copy;
781 }
782
783 DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
784 "Select a new syntax table for the current buffer.\n\
785 One argument, a syntax table.")
786 (table)
787 Lisp_Object table;
788 {
789 int idx;
790 check_syntax_table (table);
791 current_buffer->syntax_table = table;
792 /* Indicate that this buffer now has a specified syntax table. */
793 idx = PER_BUFFER_VAR_IDX (syntax_table);
794 SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
795 return table;
796 }
797 \f
798 /* Convert a letter which signifies a syntax code
799 into the code it signifies.
800 This is used by modify-syntax-entry, and other things. */
801
802 unsigned char syntax_spec_code[0400] =
803 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
804 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
805 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
806 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
807 (char) Swhitespace, (char) Scomment_fence, (char) Sstring, 0377,
808 (char) Smath, 0377, 0377, (char) Squote,
809 (char) Sopen, (char) Sclose, 0377, 0377,
810 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
811 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
812 0377, 0377, 0377, 0377,
813 (char) Scomment, 0377, (char) Sendcomment, 0377,
814 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
815 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
816 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
817 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
818 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
819 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
820 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
821 0377, 0377, 0377, 0377, (char) Sstring_fence, 0377, 0377, 0377
822 };
823
824 /* Indexed by syntax code, give the letter that describes it. */
825
826 char syntax_code_spec[16] =
827 {
828 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
829 '!', '|'
830 };
831
832 /* Indexed by syntax code, give the object (cons of syntax code and
833 nil) to be stored in syntax table. Since these objects can be
834 shared among syntax tables, we generate them in advance. By
835 sharing objects, the function `describe-syntax' can give a more
836 compact listing. */
837 static Lisp_Object Vsyntax_code_object;
838
839 \f
840 /* Look up the value for CHARACTER in syntax table TABLE's parent
841 and its parents. SYNTAX_ENTRY calls this, when TABLE itself has nil
842 for CHARACTER. It's actually used only when not compiled with GCC. */
843
844 Lisp_Object
845 syntax_parent_lookup (table, character)
846 Lisp_Object table;
847 int character;
848 {
849 Lisp_Object value;
850
851 while (1)
852 {
853 table = XCHAR_TABLE (table)->parent;
854 if (NILP (table))
855 return Qnil;
856
857 value = XCHAR_TABLE (table)->contents[character];
858 if (!NILP (value))
859 return value;
860 }
861 }
862
863 DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
864 "Return the syntax code of CHARACTER, described by a character.\n\
865 For example, if CHARACTER is a word constituent,\n\
866 the character `w' is returned.\n\
867 The characters that correspond to various syntax codes\n\
868 are listed in the documentation of `modify-syntax-entry'.")
869 (character)
870 Lisp_Object character;
871 {
872 int char_int;
873 gl_state.current_syntax_table = current_buffer->syntax_table;
874
875 gl_state.use_global = 0;
876 CHECK_NUMBER (character, 0);
877 char_int = XINT (character);
878 return make_number (syntax_code_spec[(int) SYNTAX (char_int)]);
879 }
880
881 DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
882 "Return the matching parenthesis of CHARACTER, or nil if none.")
883 (character)
884 Lisp_Object character;
885 {
886 int char_int, code;
887 gl_state.current_syntax_table = current_buffer->syntax_table;
888 gl_state.use_global = 0;
889 CHECK_NUMBER (character, 0);
890 char_int = XINT (character);
891 code = SYNTAX (char_int);
892 if (code == Sopen || code == Sclose)
893 return SYNTAX_MATCH (char_int);
894 return Qnil;
895 }
896
897 /* This comment supplies the doc string for modify-syntax-entry,
898 for make-docfile to see. We cannot put this in the real DEFUN
899 due to limits in the Unix cpp.
900
901 DEFUN ("modify-syntax-entry", foo, bar, 2, 3, 0,
902 "Set syntax for character CHAR according to string S.\n\
903 The syntax is changed only for table TABLE, which defaults to\n\
904 the current buffer's syntax table.\n\
905 The first character of S should be one of the following:\n\
906 Space or - whitespace syntax. w word constituent.\n\
907 _ symbol constituent. . punctuation.\n\
908 ( open-parenthesis. ) close-parenthesis.\n\
909 \" string quote. \\ escape.\n\
910 $ paired delimiter. ' expression quote or prefix operator.\n\
911 < comment starter. > comment ender.\n\
912 / character-quote. @ inherit from `standard-syntax-table'.\n\
913 \n\
914 Only single-character comment start and end sequences are represented thus.\n\
915 Two-character sequences are represented as described below.\n\
916 The second character of S is the matching parenthesis,\n\
917 used only if the first character is `(' or `)'.\n\
918 Any additional characters are flags.\n\
919 Defined flags are the characters 1, 2, 3, 4, b, p, and n.\n\
920 1 means CHAR is the start of a two-char comment start sequence.\n\
921 2 means CHAR is the second character of such a sequence.\n\
922 3 means CHAR is the start of a two-char comment end sequence.\n\
923 4 means CHAR is the second character of such a sequence.\n\
924 \n\
925 There can be up to two orthogonal comment sequences. This is to support\n\
926 language modes such as C++. By default, all comment sequences are of style\n\
927 a, but you can set the comment sequence style to b (on the second character\n\
928 of a comment-start, or the first character of a comment-end sequence) using\n\
929 this flag:\n\
930 b means CHAR is part of comment sequence b.\n\
931 n means CHAR is part of a nestable comment sequence.\n\
932 \n\
933 p means CHAR is a prefix character for `backward-prefix-chars';\n\
934 such characters are treated as whitespace when they occur\n\
935 between expressions.")
936 (char, s, table)
937 */
938
939 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
940 /* I really don't know why this is interactive
941 help-form should at least be made useful whilst reading the second arg
942 */
943 "cSet syntax for character: \nsSet syntax for %s to: ",
944 0 /* See immediately above */)
945 (c, newentry, syntax_table)
946 Lisp_Object c, newentry, syntax_table;
947 {
948 register unsigned char *p;
949 register enum syntaxcode code;
950 int val;
951 Lisp_Object match;
952
953 CHECK_NUMBER (c, 0);
954 CHECK_STRING (newentry, 1);
955
956 if (NILP (syntax_table))
957 syntax_table = current_buffer->syntax_table;
958 else
959 check_syntax_table (syntax_table);
960
961 p = XSTRING (newentry)->data;
962 code = (enum syntaxcode) syntax_spec_code[*p++];
963 if (((int) code & 0377) == 0377)
964 error ("invalid syntax description letter: %c", p[-1]);
965
966 if (code == Sinherit)
967 {
968 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), Qnil);
969 return Qnil;
970 }
971
972 if (*p)
973 {
974 int len;
975 int character = (STRING_CHAR_AND_LENGTH
976 (p, STRING_BYTES (XSTRING (newentry)) - 1, len));
977 XSETINT (match, character);
978 if (XFASTINT (match) == ' ')
979 match = Qnil;
980 p += len;
981 }
982 else
983 match = Qnil;
984
985 val = (int) code;
986 while (*p)
987 switch (*p++)
988 {
989 case '1':
990 val |= 1 << 16;
991 break;
992
993 case '2':
994 val |= 1 << 17;
995 break;
996
997 case '3':
998 val |= 1 << 18;
999 break;
1000
1001 case '4':
1002 val |= 1 << 19;
1003 break;
1004
1005 case 'p':
1006 val |= 1 << 20;
1007 break;
1008
1009 case 'b':
1010 val |= 1 << 21;
1011 break;
1012
1013 case 'n':
1014 val |= 1 << 22;
1015 break;
1016 }
1017
1018 if (val < XVECTOR (Vsyntax_code_object)->size && NILP (match))
1019 newentry = XVECTOR (Vsyntax_code_object)->contents[val];
1020 else
1021 /* Since we can't use a shared object, let's make a new one. */
1022 newentry = Fcons (make_number (val), match);
1023
1024 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), newentry);
1025
1026 return Qnil;
1027 }
1028 \f
1029 /* Dump syntax table to buffer in human-readable format */
1030
1031 static void
1032 describe_syntax (value)
1033 Lisp_Object value;
1034 {
1035 register enum syntaxcode code;
1036 char desc, start1, start2, end1, end2, prefix, comstyle, comnested;
1037 char str[2];
1038 Lisp_Object first, match_lisp;
1039
1040 Findent_to (make_number (16), make_number (1));
1041
1042 if (NILP (value))
1043 {
1044 insert_string ("default\n");
1045 return;
1046 }
1047
1048 if (CHAR_TABLE_P (value))
1049 {
1050 insert_string ("deeper char-table ...\n");
1051 return;
1052 }
1053
1054 if (!CONSP (value))
1055 {
1056 insert_string ("invalid\n");
1057 return;
1058 }
1059
1060 first = XCAR (value);
1061 match_lisp = XCDR (value);
1062
1063 if (!INTEGERP (first) || !(NILP (match_lisp) || INTEGERP (match_lisp)))
1064 {
1065 insert_string ("invalid\n");
1066 return;
1067 }
1068
1069 code = (enum syntaxcode) (XINT (first) & 0377);
1070 start1 = (XINT (first) >> 16) & 1;
1071 start2 = (XINT (first) >> 17) & 1;
1072 end1 = (XINT (first) >> 18) & 1;
1073 end2 = (XINT (first) >> 19) & 1;
1074 prefix = (XINT (first) >> 20) & 1;
1075 comstyle = (XINT (first) >> 21) & 1;
1076 comnested = (XINT (first) >> 22) & 1;
1077
1078 if ((int) code < 0 || (int) code >= (int) Smax)
1079 {
1080 insert_string ("invalid");
1081 return;
1082 }
1083 desc = syntax_code_spec[(int) code];
1084
1085 str[0] = desc, str[1] = 0;
1086 insert (str, 1);
1087
1088 if (NILP (match_lisp))
1089 insert (" ", 1);
1090 else
1091 insert_char (XINT (match_lisp));
1092
1093 if (start1)
1094 insert ("1", 1);
1095 if (start2)
1096 insert ("2", 1);
1097
1098 if (end1)
1099 insert ("3", 1);
1100 if (end2)
1101 insert ("4", 1);
1102
1103 if (prefix)
1104 insert ("p", 1);
1105 if (comstyle)
1106 insert ("b", 1);
1107 if (comnested)
1108 insert ("n", 1);
1109
1110 insert_string ("\twhich means: ");
1111
1112 switch (SWITCH_ENUM_CAST (code))
1113 {
1114 case Swhitespace:
1115 insert_string ("whitespace"); break;
1116 case Spunct:
1117 insert_string ("punctuation"); break;
1118 case Sword:
1119 insert_string ("word"); break;
1120 case Ssymbol:
1121 insert_string ("symbol"); break;
1122 case Sopen:
1123 insert_string ("open"); break;
1124 case Sclose:
1125 insert_string ("close"); break;
1126 case Squote:
1127 insert_string ("quote"); break;
1128 case Sstring:
1129 insert_string ("string"); break;
1130 case Smath:
1131 insert_string ("math"); break;
1132 case Sescape:
1133 insert_string ("escape"); break;
1134 case Scharquote:
1135 insert_string ("charquote"); break;
1136 case Scomment:
1137 insert_string ("comment"); break;
1138 case Sendcomment:
1139 insert_string ("endcomment"); break;
1140 default:
1141 insert_string ("invalid");
1142 return;
1143 }
1144
1145 if (!NILP (match_lisp))
1146 {
1147 insert_string (", matches ");
1148 insert_char (XINT (match_lisp));
1149 }
1150
1151 if (start1)
1152 insert_string (",\n\t is the first character of a comment-start sequence");
1153 if (start2)
1154 insert_string (",\n\t is the second character of a comment-start sequence");
1155
1156 if (end1)
1157 insert_string (",\n\t is the first character of a comment-end sequence");
1158 if (end2)
1159 insert_string (",\n\t is the second character of a comment-end sequence");
1160 if (comstyle)
1161 insert_string (" (comment style b)");
1162 if (comnested)
1163 insert_string (" (nestable)");
1164
1165 if (prefix)
1166 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
1167
1168 insert_string ("\n");
1169 }
1170
1171 static Lisp_Object
1172 describe_syntax_1 (vector)
1173 Lisp_Object vector;
1174 {
1175 struct buffer *old = current_buffer;
1176 set_buffer_internal (XBUFFER (Vstandard_output));
1177 describe_vector (vector, Qnil, describe_syntax, 0, Qnil, Qnil, (int *) 0, 0);
1178 while (! NILP (XCHAR_TABLE (vector)->parent))
1179 {
1180 vector = XCHAR_TABLE (vector)->parent;
1181 insert_string ("\nThe parent syntax table is:");
1182 describe_vector (vector, Qnil, describe_syntax, 0, Qnil, Qnil,
1183 (int *) 0, 0);
1184 }
1185
1186 call0 (intern ("help-mode"));
1187 set_buffer_internal (old);
1188 return Qnil;
1189 }
1190
1191 DEFUN ("describe-syntax", Fdescribe_syntax, Sdescribe_syntax, 0, 0, "",
1192 "Describe the syntax specifications in the syntax table.\n\
1193 The descriptions are inserted in a buffer, which is then displayed.")
1194 ()
1195 {
1196 internal_with_output_to_temp_buffer
1197 ("*Help*", describe_syntax_1, current_buffer->syntax_table);
1198
1199 return Qnil;
1200 }
1201 \f
1202 int parse_sexp_ignore_comments;
1203
1204 /* Return the position across COUNT words from FROM.
1205 If that many words cannot be found before the end of the buffer, return 0.
1206 COUNT negative means scan backward and stop at word beginning. */
1207
1208 int
1209 scan_words (from, count)
1210 register int from, count;
1211 {
1212 register int beg = BEGV;
1213 register int end = ZV;
1214 register int from_byte = CHAR_TO_BYTE (from);
1215 register enum syntaxcode code;
1216 int ch0, ch1;
1217
1218 immediate_quit = 1;
1219 QUIT;
1220
1221 SETUP_SYNTAX_TABLE (from, count);
1222
1223 while (count > 0)
1224 {
1225 while (1)
1226 {
1227 if (from == end)
1228 {
1229 immediate_quit = 0;
1230 return 0;
1231 }
1232 UPDATE_SYNTAX_TABLE_FORWARD (from);
1233 ch0 = FETCH_CHAR (from_byte);
1234 code = SYNTAX (ch0);
1235 INC_BOTH (from, from_byte);
1236 if (words_include_escapes
1237 && (code == Sescape || code == Scharquote))
1238 break;
1239 if (code == Sword)
1240 break;
1241 }
1242 /* Now CH0 is a character which begins a word and FROM is the
1243 position of the next character. */
1244 while (1)
1245 {
1246 if (from == end) break;
1247 UPDATE_SYNTAX_TABLE_FORWARD (from);
1248 ch1 = FETCH_CHAR (from_byte);
1249 code = SYNTAX (ch1);
1250 if (!(words_include_escapes
1251 && (code == Sescape || code == Scharquote)))
1252 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
1253 break;
1254 INC_BOTH (from, from_byte);
1255 ch0 = ch1;
1256 }
1257 count--;
1258 }
1259 while (count < 0)
1260 {
1261 while (1)
1262 {
1263 if (from == beg)
1264 {
1265 immediate_quit = 0;
1266 return 0;
1267 }
1268 DEC_BOTH (from, from_byte);
1269 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1270 ch1 = FETCH_CHAR (from_byte);
1271 code = SYNTAX (ch1);
1272 if (words_include_escapes
1273 && (code == Sescape || code == Scharquote))
1274 break;
1275 if (code == Sword)
1276 break;
1277 }
1278 /* Now CH1 is a character which ends a word and FROM is the
1279 position of it. */
1280 while (1)
1281 {
1282 int temp_byte;
1283
1284 if (from == beg)
1285 break;
1286 temp_byte = dec_bytepos (from_byte);
1287 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1288 ch0 = FETCH_CHAR (temp_byte);
1289 code = SYNTAX (ch0);
1290 if (!(words_include_escapes
1291 && (code == Sescape || code == Scharquote)))
1292 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
1293 break;
1294 DEC_BOTH (from, from_byte);
1295 ch1 = ch0;
1296 }
1297 count++;
1298 }
1299
1300 immediate_quit = 0;
1301
1302 return from;
1303 }
1304
1305 DEFUN ("forward-word", Fforward_word, Sforward_word, 1, 1, "p",
1306 "Move point forward ARG words (backward if ARG is negative).\n\
1307 Normally returns t.\n\
1308 If an edge of the buffer or a field boundary is reached, point is left there\n\
1309 and the function returns nil. Field boundaries are not noticed if\n\
1310 `inhibit-field-text-motion' is non-nil.")
1311 (count)
1312 Lisp_Object count;
1313 {
1314 int orig_val, val;
1315 CHECK_NUMBER (count, 0);
1316
1317 val = orig_val = scan_words (PT, XINT (count));
1318 if (! orig_val)
1319 val = XINT (count) > 0 ? ZV : BEGV;
1320
1321 /* Avoid jumping out of an input field. */
1322 val = XFASTINT (Fconstrain_to_field (make_number (val), make_number (PT),
1323 Qt, Qnil, Qnil));
1324
1325 SET_PT (val);
1326 return val == orig_val ? Qt : Qnil;
1327 }
1328 \f
1329 Lisp_Object skip_chars ();
1330
1331 DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
1332 "Move point forward, stopping before a char not in STRING, or at pos LIM.\n\
1333 STRING is like the inside of a `[...]' in a regular expression\n\
1334 except that `]' is never special and `\\' quotes `^', `-' or `\\'\n\
1335 (but not as the end of a range; quoting is never needed there).\n\
1336 Thus, with arg \"a-zA-Z\", this skips letters stopping before first nonletter.\n\
1337 With arg \"^a-zA-Z\", skips nonletters stopping before first letter.\n\
1338 Returns the distance traveled, either zero or positive.")
1339 (string, lim)
1340 Lisp_Object string, lim;
1341 {
1342 return skip_chars (1, 0, string, lim);
1343 }
1344
1345 DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
1346 "Move point backward, stopping after a char not in STRING, or at pos LIM.\n\
1347 See `skip-chars-forward' for details.\n\
1348 Returns the distance traveled, either zero or negative.")
1349 (string, lim)
1350 Lisp_Object string, lim;
1351 {
1352 return skip_chars (0, 0, string, lim);
1353 }
1354
1355 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
1356 "Move point forward across chars in specified syntax classes.\n\
1357 SYNTAX is a string of syntax code characters.\n\
1358 Stop before a char whose syntax is not in SYNTAX, or at position LIM.\n\
1359 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
1360 This function returns the distance traveled, either zero or positive.")
1361 (syntax, lim)
1362 Lisp_Object syntax, lim;
1363 {
1364 return skip_chars (1, 1, syntax, lim);
1365 }
1366
1367 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
1368 "Move point backward across chars in specified syntax classes.\n\
1369 SYNTAX is a string of syntax code characters.\n\
1370 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.\n\
1371 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
1372 This function returns the distance traveled, either zero or negative.")
1373 (syntax, lim)
1374 Lisp_Object syntax, lim;
1375 {
1376 return skip_chars (0, 1, syntax, lim);
1377 }
1378
1379 static Lisp_Object
1380 skip_chars (forwardp, syntaxp, string, lim)
1381 int forwardp, syntaxp;
1382 Lisp_Object string, lim;
1383 {
1384 register unsigned int c;
1385 register int ch;
1386 unsigned char fastmap[0400];
1387 /* If SYNTAXP is 0, STRING may contain multi-byte form of characters
1388 of which codes don't fit in FASTMAP. In that case, set the
1389 ranges of characters in CHAR_RANGES. */
1390 int *char_ranges;
1391 int n_char_ranges = 0;
1392 int negate = 0;
1393 register int i, i_byte;
1394 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1395 int string_multibyte;
1396 int size_byte;
1397 unsigned char *str;
1398 int len;
1399
1400 CHECK_STRING (string, 0);
1401 char_ranges = (int *) alloca (XSTRING (string)->size * (sizeof (int)) * 2);
1402 string_multibyte = STRING_MULTIBYTE (string);
1403 str = XSTRING (string)->data;
1404 size_byte = STRING_BYTES (XSTRING (string));
1405
1406 /* Adjust the multibyteness of the string to that of the buffer. */
1407 if (multibyte != string_multibyte)
1408 {
1409 int nbytes;
1410
1411 if (multibyte)
1412 nbytes = count_size_as_multibyte (XSTRING (string)->data,
1413 XSTRING (string)->size);
1414 else
1415 nbytes = XSTRING (string)->size;
1416 if (nbytes != size_byte)
1417 {
1418 str = (unsigned char *) alloca (nbytes);
1419 copy_text (XSTRING (string)->data, str, size_byte,
1420 string_multibyte, multibyte);
1421 size_byte = nbytes;
1422 }
1423 }
1424
1425 if (NILP (lim))
1426 XSETINT (lim, forwardp ? ZV : BEGV);
1427 else
1428 CHECK_NUMBER_COERCE_MARKER (lim, 0);
1429
1430 /* In any case, don't allow scan outside bounds of buffer. */
1431 if (XINT (lim) > ZV)
1432 XSETFASTINT (lim, ZV);
1433 if (XINT (lim) < BEGV)
1434 XSETFASTINT (lim, BEGV);
1435
1436 bzero (fastmap, sizeof fastmap);
1437
1438 i_byte = 0;
1439
1440 if (i_byte < size_byte
1441 && XSTRING (string)->data[0] == '^')
1442 {
1443 negate = 1; i_byte++;
1444 }
1445
1446 /* Find the characters specified and set their elements of fastmap.
1447 If syntaxp, each character counts as itself.
1448 Otherwise, handle backslashes and ranges specially. */
1449
1450 while (i_byte < size_byte)
1451 {
1452 c = STRING_CHAR_AND_LENGTH (str + i_byte, size_byte - i_byte, len);
1453 i_byte += len;
1454
1455 if (syntaxp)
1456 fastmap[syntax_spec_code[c & 0377]] = 1;
1457 else
1458 {
1459 if (c == '\\')
1460 {
1461 if (i_byte == size_byte)
1462 break;
1463
1464 c = STRING_CHAR_AND_LENGTH (str+i_byte, size_byte-i_byte, len);
1465 i_byte += len;
1466 }
1467 if (i_byte < size_byte
1468 && str[i_byte] == '-')
1469 {
1470 unsigned int c2;
1471
1472 /* Skip over the dash. */
1473 i_byte++;
1474
1475 if (i_byte == size_byte)
1476 break;
1477
1478 /* Get the end of the range. */
1479 c2 =STRING_CHAR_AND_LENGTH (str+i_byte, size_byte-i_byte, len);
1480 i_byte += len;
1481
1482 if (SINGLE_BYTE_CHAR_P (c))
1483 {
1484 if (! SINGLE_BYTE_CHAR_P (c2))
1485 {
1486 /* Handle a range such as \177-\377 in multibyte
1487 mode. Split that into two ranges, the low
1488 one ending at 0237, and the high one starting
1489 at the smallest character in the charset of
1490 C2 and ending at C2. */
1491 int charset = CHAR_CHARSET (c2);
1492 int c1 = MAKE_CHAR (charset, 0, 0);
1493
1494 char_ranges[n_char_ranges++] = c1;
1495 char_ranges[n_char_ranges++] = c2;
1496 c2 = 0237;
1497 }
1498 while (c <= c2)
1499 {
1500 fastmap[c] = 1;
1501 c++;
1502 }
1503 }
1504 else if (c <= c2) /* Both C and C2 are multibyte char. */
1505 {
1506 char_ranges[n_char_ranges++] = c;
1507 char_ranges[n_char_ranges++] = c2;
1508 }
1509 }
1510 else
1511 {
1512 if (SINGLE_BYTE_CHAR_P (c))
1513 fastmap[c] = 1;
1514 else
1515 {
1516 char_ranges[n_char_ranges++] = c;
1517 char_ranges[n_char_ranges++] = c;
1518 }
1519 }
1520 }
1521 }
1522
1523 /* If ^ was the first character, complement the fastmap. */
1524 if (negate)
1525 for (i = 0; i < sizeof fastmap; i++)
1526 fastmap[i] ^= 1;
1527
1528 {
1529 int start_point = PT;
1530 int pos = PT;
1531 int pos_byte = PT_BYTE;
1532
1533 immediate_quit = 1;
1534 if (syntaxp)
1535 {
1536 SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
1537 if (forwardp)
1538 {
1539 if (multibyte)
1540 {
1541 if (pos < XINT (lim))
1542 while (fastmap[(int) SYNTAX (FETCH_CHAR (pos_byte))])
1543 {
1544 /* Since we already checked for multibyteness,
1545 avoid using INC_BOTH which checks again. */
1546 INC_POS (pos_byte);
1547 pos++;
1548 if (pos >= XINT (lim))
1549 break;
1550 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1551 }
1552 }
1553 else
1554 {
1555 while (pos < XINT (lim)
1556 && fastmap[(int) SYNTAX (FETCH_BYTE (pos))])
1557 {
1558 pos++;
1559 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1560 }
1561 }
1562 }
1563 else
1564 {
1565 if (multibyte)
1566 {
1567 while (pos > XINT (lim))
1568 {
1569 int savepos = pos_byte;
1570 /* Since we already checked for multibyteness,
1571 avoid using DEC_BOTH which checks again. */
1572 pos--;
1573 DEC_POS (pos_byte);
1574 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
1575 if (!fastmap[(int) SYNTAX (FETCH_CHAR (pos_byte))])
1576 {
1577 pos++;
1578 pos_byte = savepos;
1579 break;
1580 }
1581 }
1582 }
1583 else
1584 {
1585 if (pos > XINT (lim))
1586 while (fastmap[(int) SYNTAX (FETCH_BYTE (pos - 1))])
1587 {
1588 pos--;
1589 if (pos <= XINT (lim))
1590 break;
1591 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
1592 }
1593 }
1594 }
1595 }
1596 else
1597 {
1598 if (forwardp)
1599 {
1600 if (multibyte)
1601 while (pos < XINT (lim))
1602 {
1603 c = FETCH_MULTIBYTE_CHAR (pos_byte);
1604 if (SINGLE_BYTE_CHAR_P (c))
1605 {
1606 if (!fastmap[c])
1607 break;
1608 }
1609 else
1610 {
1611 /* If we are looking at a multibyte character,
1612 we must look up the character in the table
1613 CHAR_RANGES. If there's no data in the
1614 table, that character is not what we want to
1615 skip. */
1616
1617 /* The following code do the right thing even if
1618 n_char_ranges is zero (i.e. no data in
1619 CHAR_RANGES). */
1620 for (i = 0; i < n_char_ranges; i += 2)
1621 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1622 break;
1623 if (!(negate ^ (i < n_char_ranges)))
1624 break;
1625 }
1626 INC_BOTH (pos, pos_byte);
1627 }
1628 else
1629 while (pos < XINT (lim) && fastmap[FETCH_BYTE (pos)])
1630 pos++;
1631 }
1632 else
1633 {
1634 if (multibyte)
1635 while (pos > XINT (lim))
1636 {
1637 int prev_pos_byte = pos_byte;
1638
1639 DEC_POS (prev_pos_byte);
1640 c = FETCH_MULTIBYTE_CHAR (prev_pos_byte);
1641 if (SINGLE_BYTE_CHAR_P (c))
1642 {
1643 if (!fastmap[c])
1644 break;
1645 }
1646 else
1647 {
1648 /* See the comment in the previous similar code. */
1649 for (i = 0; i < n_char_ranges; i += 2)
1650 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1651 break;
1652 if (!(negate ^ (i < n_char_ranges)))
1653 break;
1654 }
1655 pos--;
1656 pos_byte = prev_pos_byte;
1657 }
1658 else
1659 while (pos > XINT (lim) && fastmap[FETCH_BYTE (pos - 1)])
1660 pos--;
1661 }
1662 }
1663
1664 #if 0 /* Not needed now that a position in mid-character
1665 cannot be specified in Lisp. */
1666 if (multibyte
1667 /* INC_POS or DEC_POS might have moved POS over LIM. */
1668 && (forwardp ? (pos > XINT (lim)) : (pos < XINT (lim))))
1669 pos = XINT (lim);
1670 #endif
1671
1672 if (! multibyte)
1673 pos_byte = pos;
1674
1675 SET_PT_BOTH (pos, pos_byte);
1676 immediate_quit = 0;
1677
1678 return make_number (PT - start_point);
1679 }
1680 }
1681 \f
1682 /* Jump over a comment, assuming we are at the beginning of one.
1683 FROM is the current position.
1684 FROM_BYTE is the bytepos corresponding to FROM.
1685 Do not move past STOP (a charpos).
1686 The comment over which we have to jump is of style STYLE
1687 (either SYNTAX_COMMENT_STYLE(foo) or ST_COMMENT_STYLE).
1688 NESTING should be positive to indicate the nesting at the beginning
1689 for nested comments and should be zero or negative else.
1690 ST_COMMENT_STYLE cannot be nested.
1691 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
1692 (or 0 If the search cannot start in the middle of a two-character).
1693
1694 If successful, return 1 and store the charpos of the comment's end
1695 into *CHARPOS_PTR and the corresponding bytepos into *BYTEPOS_PTR.
1696 Else, return 0 and store the charpos STOP into *CHARPOS_PTR, the
1697 corresponding bytepos into *BYTEPOS_PTR and the current nesting
1698 (as defined for state.incomment) in *INCOMMENT_PTR.
1699
1700 The comment end is the last character of the comment rather than the
1701 character just after the comment.
1702
1703 Global syntax data is assumed to initially be valid for FROM and
1704 remains valid for forward search starting at the returned position. */
1705
1706 static int
1707 forw_comment (from, from_byte, stop, nesting, style, prev_syntax,
1708 charpos_ptr, bytepos_ptr, incomment_ptr)
1709 int from, from_byte, stop;
1710 int nesting, style, prev_syntax;
1711 int *charpos_ptr, *bytepos_ptr, *incomment_ptr;
1712 {
1713 register int c, c1;
1714 register enum syntaxcode code;
1715 register int syntax;
1716
1717 if (nesting <= 0) nesting = -1;
1718
1719 /* Enter the loop in the middle so that we find
1720 a 2-char comment ender if we start in the middle of it. */
1721 syntax = prev_syntax;
1722 if (syntax != 0) goto forw_incomment;
1723
1724 while (1)
1725 {
1726 if (from == stop)
1727 {
1728 *incomment_ptr = nesting;
1729 *charpos_ptr = from;
1730 *bytepos_ptr = from_byte;
1731 return 0;
1732 }
1733 c = FETCH_CHAR (from_byte);
1734 syntax = SYNTAX_WITH_FLAGS (c);
1735 code = syntax & 0xff;
1736 if (code == Sendcomment
1737 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
1738 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ?
1739 (nesting > 0 && --nesting == 0) : nesting < 0))
1740 /* we have encountered a comment end of the same style
1741 as the comment sequence which began this comment
1742 section */
1743 break;
1744 if (code == Scomment_fence
1745 && style == ST_COMMENT_STYLE)
1746 /* we have encountered a comment end of the same style
1747 as the comment sequence which began this comment
1748 section. */
1749 break;
1750 if (nesting > 0
1751 && code == Scomment
1752 && SYNTAX_FLAGS_COMMENT_NESTED (syntax)
1753 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style)
1754 /* we have encountered a nested comment of the same style
1755 as the comment sequence which began this comment section */
1756 nesting++;
1757 INC_BOTH (from, from_byte);
1758 UPDATE_SYNTAX_TABLE_FORWARD (from);
1759
1760 forw_incomment:
1761 if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
1762 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
1763 && (c1 = FETCH_CHAR (from_byte),
1764 SYNTAX_COMEND_SECOND (c1))
1765 && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
1766 SYNTAX_COMMENT_NESTED (c1)) ? nesting > 0 : nesting < 0))
1767 {
1768 if (--nesting <= 0)
1769 /* we have encountered a comment end of the same style
1770 as the comment sequence which began this comment
1771 section */
1772 break;
1773 else
1774 {
1775 INC_BOTH (from, from_byte);
1776 UPDATE_SYNTAX_TABLE_FORWARD (from);
1777 }
1778 }
1779 if (nesting > 0
1780 && from < stop
1781 && SYNTAX_FLAGS_COMSTART_FIRST (syntax)
1782 && (c1 = FETCH_CHAR (from_byte),
1783 SYNTAX_COMMENT_STYLE (c1) == style
1784 && SYNTAX_COMSTART_SECOND (c1))
1785 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
1786 SYNTAX_COMMENT_NESTED (c1)))
1787 /* we have encountered a nested comment of the same style
1788 as the comment sequence which began this comment
1789 section */
1790 {
1791 INC_BOTH (from, from_byte);
1792 UPDATE_SYNTAX_TABLE_FORWARD (from);
1793 nesting++;
1794 }
1795 }
1796 *charpos_ptr = from;
1797 *bytepos_ptr = from_byte;
1798 return 1;
1799 }
1800
1801 DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
1802 "Move forward across up to N comments. If N is negative, move backward.\n\
1803 Stop scanning if we find something other than a comment or whitespace.\n\
1804 Set point to where scanning stops.\n\
1805 If N comments are found as expected, with nothing except whitespace\n\
1806 between them, return t; otherwise return nil.")
1807 (count)
1808 Lisp_Object count;
1809 {
1810 register int from;
1811 int from_byte;
1812 register int stop;
1813 register int c, c1;
1814 register enum syntaxcode code;
1815 int comstyle = 0; /* style of comment encountered */
1816 int comnested = 0; /* whether the comment is nestable or not */
1817 int found;
1818 int count1;
1819 int out_charpos, out_bytepos;
1820 int dummy;
1821
1822 CHECK_NUMBER (count, 0);
1823 count1 = XINT (count);
1824 stop = count1 > 0 ? ZV : BEGV;
1825
1826 immediate_quit = 1;
1827 QUIT;
1828
1829 from = PT;
1830 from_byte = PT_BYTE;
1831
1832 SETUP_SYNTAX_TABLE (from, count1);
1833 while (count1 > 0)
1834 {
1835 do
1836 {
1837 int comstart_first;
1838
1839 if (from == stop)
1840 {
1841 SET_PT_BOTH (from, from_byte);
1842 immediate_quit = 0;
1843 return Qnil;
1844 }
1845 c = FETCH_CHAR (from_byte);
1846 code = SYNTAX (c);
1847 comstart_first = SYNTAX_COMSTART_FIRST (c);
1848 comnested = SYNTAX_COMMENT_NESTED (c);
1849 comstyle = SYNTAX_COMMENT_STYLE (c);
1850 INC_BOTH (from, from_byte);
1851 UPDATE_SYNTAX_TABLE_FORWARD (from);
1852 if (from < stop && comstart_first
1853 && (c1 = FETCH_CHAR (from_byte),
1854 SYNTAX_COMSTART_SECOND (c1)))
1855 {
1856 /* We have encountered a comment start sequence and we
1857 are ignoring all text inside comments. We must record
1858 the comment style this sequence begins so that later,
1859 only a comment end of the same style actually ends
1860 the comment section. */
1861 code = Scomment;
1862 comstyle = SYNTAX_COMMENT_STYLE (c1);
1863 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
1864 INC_BOTH (from, from_byte);
1865 UPDATE_SYNTAX_TABLE_FORWARD (from);
1866 }
1867 }
1868 while (code == Swhitespace || (code == Sendcomment && c == '\n'));
1869
1870 if (code == Scomment_fence)
1871 comstyle = ST_COMMENT_STYLE;
1872 else if (code != Scomment)
1873 {
1874 immediate_quit = 0;
1875 DEC_BOTH (from, from_byte);
1876 SET_PT_BOTH (from, from_byte);
1877 return Qnil;
1878 }
1879 /* We're at the start of a comment. */
1880 found = forw_comment (from, from_byte, stop, comnested, comstyle, 0,
1881 &out_charpos, &out_bytepos, &dummy);
1882 from = out_charpos; from_byte = out_bytepos;
1883 if (!found)
1884 {
1885 immediate_quit = 0;
1886 SET_PT_BOTH (from, from_byte);
1887 return Qnil;
1888 }
1889 INC_BOTH (from, from_byte);
1890 UPDATE_SYNTAX_TABLE_FORWARD (from);
1891 /* We have skipped one comment. */
1892 count1--;
1893 }
1894
1895 while (count1 < 0)
1896 {
1897 while (1)
1898 {
1899 int quoted, comstart_second;
1900
1901 if (from <= stop)
1902 {
1903 SET_PT_BOTH (BEGV, BEGV_BYTE);
1904 immediate_quit = 0;
1905 return Qnil;
1906 }
1907
1908 DEC_BOTH (from, from_byte);
1909 /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */
1910 quoted = char_quoted (from, from_byte);
1911 if (quoted)
1912 {
1913 DEC_BOTH (from, from_byte);
1914 goto leave;
1915 }
1916 c = FETCH_CHAR (from_byte);
1917 code = SYNTAX (c);
1918 comstyle = 0;
1919 comnested = SYNTAX_COMMENT_NESTED (c);
1920 if (code == Sendcomment)
1921 comstyle = SYNTAX_COMMENT_STYLE (c);
1922 if (from > stop && SYNTAX_COMEND_SECOND (c)
1923 && prev_char_comend_first (from, from_byte)
1924 && !char_quoted (from - 1, dec_bytepos (from_byte)))
1925 {
1926 /* We must record the comment style encountered so that
1927 later, we can match only the proper comment begin
1928 sequence of the same style. */
1929 DEC_BOTH (from, from_byte);
1930 code = Sendcomment;
1931 /* Calling char_quoted, above, set up global syntax position
1932 at the new value of FROM. */
1933 c1 = FETCH_CHAR (from_byte);
1934 comstyle = SYNTAX_COMMENT_STYLE (c1);
1935 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
1936 }
1937
1938 if (code == Scomment_fence)
1939 {
1940 /* Skip until first preceding unquoted comment_fence. */
1941 int found = 0, ini = from, ini_byte = from_byte;
1942
1943 while (1)
1944 {
1945 DEC_BOTH (from, from_byte);
1946 if (from == stop)
1947 break;
1948 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1949 c = FETCH_CHAR (from_byte);
1950 if (SYNTAX (c) == Scomment_fence
1951 && !char_quoted (from, from_byte))
1952 {
1953 found = 1;
1954 break;
1955 }
1956 }
1957 if (found == 0)
1958 {
1959 from = ini; /* Set point to ini + 1. */
1960 from_byte = ini_byte;
1961 goto leave;
1962 }
1963 }
1964 else if (code == Sendcomment)
1965 {
1966 found = back_comment (from, from_byte, stop, comnested, comstyle,
1967 &out_charpos, &out_bytepos);
1968 if (found == -1)
1969 {
1970 if (c == '\n')
1971 /* This end-of-line is not an end-of-comment.
1972 Treat it like a whitespace.
1973 CC-mode (and maybe others) relies on this behavior. */
1974 ;
1975 else
1976 {
1977 /* Failure: we should go back to the end of this
1978 not-quite-endcomment. */
1979 if (SYNTAX(c) != code)
1980 /* It was a two-char Sendcomment. */
1981 INC_BOTH (from, from_byte);
1982 goto leave;
1983 }
1984 }
1985 else
1986 {
1987 /* We have skipped one comment. */
1988 from = out_charpos, from_byte = out_bytepos;
1989 break;
1990 }
1991 }
1992 else if (code != Swhitespace)
1993 {
1994 leave:
1995 immediate_quit = 0;
1996 INC_BOTH (from, from_byte);
1997 SET_PT_BOTH (from, from_byte);
1998 return Qnil;
1999 }
2000 }
2001
2002 count1++;
2003 }
2004
2005 SET_PT_BOTH (from, from_byte);
2006 immediate_quit = 0;
2007 return Qt;
2008 }
2009 \f
2010 /* Return syntax code of character C if C is a single byte character
2011 or `multibyte_symbol_p' is zero. Otherwise, retrun Ssymbol. */
2012
2013 #define SYNTAX_WITH_MULTIBYTE_CHECK(c) \
2014 ((SINGLE_BYTE_CHAR_P (c) || !multibyte_symbol_p) \
2015 ? SYNTAX (c) : Ssymbol)
2016
2017 static Lisp_Object
2018 scan_lists (from, count, depth, sexpflag)
2019 register int from;
2020 int count, depth, sexpflag;
2021 {
2022 Lisp_Object val;
2023 register int stop = count > 0 ? ZV : BEGV;
2024 register int c, c1;
2025 int stringterm;
2026 int quoted;
2027 int mathexit = 0;
2028 register enum syntaxcode code, temp_code;
2029 int min_depth = depth; /* Err out if depth gets less than this. */
2030 int comstyle = 0; /* style of comment encountered */
2031 int comnested = 0; /* whether the comment is nestable or not */
2032 int temp_pos;
2033 int last_good = from;
2034 int found;
2035 int from_byte;
2036 int out_bytepos, out_charpos;
2037 int temp, dummy;
2038 int multibyte_symbol_p = sexpflag && multibyte_syntax_as_symbol;
2039
2040 if (depth > 0) min_depth = 0;
2041
2042 if (from > ZV) from = ZV;
2043 if (from < BEGV) from = BEGV;
2044
2045 from_byte = CHAR_TO_BYTE (from);
2046
2047 immediate_quit = 1;
2048 QUIT;
2049
2050 SETUP_SYNTAX_TABLE (from, count);
2051 while (count > 0)
2052 {
2053 while (from < stop)
2054 {
2055 int comstart_first, prefix;
2056 UPDATE_SYNTAX_TABLE_FORWARD (from);
2057 c = FETCH_CHAR (from_byte);
2058 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2059 comstart_first = SYNTAX_COMSTART_FIRST (c);
2060 comnested = SYNTAX_COMMENT_NESTED (c);
2061 comstyle = SYNTAX_COMMENT_STYLE (c);
2062 prefix = SYNTAX_PREFIX (c);
2063 if (depth == min_depth)
2064 last_good = from;
2065 INC_BOTH (from, from_byte);
2066 UPDATE_SYNTAX_TABLE_FORWARD (from);
2067 if (from < stop && comstart_first
2068 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from_byte))
2069 && parse_sexp_ignore_comments)
2070 {
2071 /* we have encountered a comment start sequence and we
2072 are ignoring all text inside comments. We must record
2073 the comment style this sequence begins so that later,
2074 only a comment end of the same style actually ends
2075 the comment section */
2076 code = Scomment;
2077 c1 = FETCH_CHAR (from_byte);
2078 comstyle = SYNTAX_COMMENT_STYLE (c1);
2079 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2080 INC_BOTH (from, from_byte);
2081 UPDATE_SYNTAX_TABLE_FORWARD (from);
2082 }
2083
2084 if (prefix)
2085 continue;
2086
2087 switch (SWITCH_ENUM_CAST (code))
2088 {
2089 case Sescape:
2090 case Scharquote:
2091 if (from == stop) goto lose;
2092 INC_BOTH (from, from_byte);
2093 /* treat following character as a word constituent */
2094 case Sword:
2095 case Ssymbol:
2096 if (depth || !sexpflag) break;
2097 /* This word counts as a sexp; return at end of it. */
2098 while (from < stop)
2099 {
2100 UPDATE_SYNTAX_TABLE_FORWARD (from);
2101
2102 /* Some compilers can't handle this inside the switch. */
2103 c = FETCH_CHAR (from_byte);
2104 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2105 switch (temp)
2106 {
2107 case Scharquote:
2108 case Sescape:
2109 INC_BOTH (from, from_byte);
2110 if (from == stop) goto lose;
2111 break;
2112 case Sword:
2113 case Ssymbol:
2114 case Squote:
2115 break;
2116 default:
2117 goto done;
2118 }
2119 INC_BOTH (from, from_byte);
2120 }
2121 goto done;
2122
2123 case Scomment_fence:
2124 comstyle = ST_COMMENT_STYLE;
2125 /* FALLTHROUGH */
2126 case Scomment:
2127 if (!parse_sexp_ignore_comments) break;
2128 UPDATE_SYNTAX_TABLE_FORWARD (from);
2129 found = forw_comment (from, from_byte, stop,
2130 comnested, comstyle, 0,
2131 &out_charpos, &out_bytepos, &dummy);
2132 from = out_charpos, from_byte = out_bytepos;
2133 if (!found)
2134 {
2135 if (depth == 0)
2136 goto done;
2137 goto lose;
2138 }
2139 INC_BOTH (from, from_byte);
2140 UPDATE_SYNTAX_TABLE_FORWARD (from);
2141 break;
2142
2143 case Smath:
2144 if (!sexpflag)
2145 break;
2146 if (from != stop && c == FETCH_CHAR (from_byte))
2147 {
2148 INC_BOTH (from, from_byte);
2149 }
2150 if (mathexit)
2151 {
2152 mathexit = 0;
2153 goto close1;
2154 }
2155 mathexit = 1;
2156
2157 case Sopen:
2158 if (!++depth) goto done;
2159 break;
2160
2161 case Sclose:
2162 close1:
2163 if (!--depth) goto done;
2164 if (depth < min_depth)
2165 Fsignal (Qscan_error,
2166 Fcons (build_string ("Containing expression ends prematurely"),
2167 Fcons (make_number (last_good),
2168 Fcons (make_number (from), Qnil))));
2169 break;
2170
2171 case Sstring:
2172 case Sstring_fence:
2173 temp_pos = dec_bytepos (from_byte);
2174 stringterm = FETCH_CHAR (temp_pos);
2175 while (1)
2176 {
2177 if (from >= stop) goto lose;
2178 UPDATE_SYNTAX_TABLE_FORWARD (from);
2179 c = FETCH_CHAR (from_byte);
2180 if (code == Sstring
2181 ? c == stringterm
2182 : SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring_fence)
2183 break;
2184
2185 /* Some compilers can't handle this inside the switch. */
2186 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2187 switch (temp)
2188 {
2189 case Scharquote:
2190 case Sescape:
2191 INC_BOTH (from, from_byte);
2192 }
2193 INC_BOTH (from, from_byte);
2194 }
2195 INC_BOTH (from, from_byte);
2196 if (!depth && sexpflag) goto done;
2197 break;
2198 }
2199 }
2200
2201 /* Reached end of buffer. Error if within object, return nil if between */
2202 if (depth) goto lose;
2203
2204 immediate_quit = 0;
2205 return Qnil;
2206
2207 /* End of object reached */
2208 done:
2209 count--;
2210 }
2211
2212 while (count < 0)
2213 {
2214 while (from > stop)
2215 {
2216 DEC_BOTH (from, from_byte);
2217 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2218 c = FETCH_CHAR (from_byte);
2219 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2220 if (depth == min_depth)
2221 last_good = from;
2222 comstyle = 0;
2223 comnested = SYNTAX_COMMENT_NESTED (c);
2224 if (code == Sendcomment)
2225 comstyle = SYNTAX_COMMENT_STYLE (c);
2226 if (from > stop && SYNTAX_COMEND_SECOND (c)
2227 && prev_char_comend_first (from, from_byte)
2228 && parse_sexp_ignore_comments)
2229 {
2230 /* We must record the comment style encountered so that
2231 later, we can match only the proper comment begin
2232 sequence of the same style. */
2233 DEC_BOTH (from, from_byte);
2234 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2235 code = Sendcomment;
2236 c1 = FETCH_CHAR (from_byte);
2237 comstyle = SYNTAX_COMMENT_STYLE (c1);
2238 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2239 }
2240
2241 /* Quoting turns anything except a comment-ender
2242 into a word character. Note that this cannot be true
2243 if we decremented FROM in the if-statement above. */
2244 if (code != Sendcomment && char_quoted (from, from_byte))
2245 code = Sword;
2246 else if (SYNTAX_PREFIX (c))
2247 continue;
2248
2249 switch (SWITCH_ENUM_CAST (code))
2250 {
2251 case Sword:
2252 case Ssymbol:
2253 case Sescape:
2254 case Scharquote:
2255 if (depth || !sexpflag) break;
2256 /* This word counts as a sexp; count object finished
2257 after passing it. */
2258 while (from > stop)
2259 {
2260 temp_pos = from_byte;
2261 if (! NILP (current_buffer->enable_multibyte_characters))
2262 DEC_POS (temp_pos);
2263 else
2264 temp_pos--;
2265 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2266 c1 = FETCH_CHAR (temp_pos);
2267 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
2268 /* Don't allow comment-end to be quoted. */
2269 if (temp_code == Sendcomment)
2270 goto done2;
2271 quoted = char_quoted (from - 1, temp_pos);
2272 if (quoted)
2273 {
2274 DEC_BOTH (from, from_byte);
2275 temp_pos = dec_bytepos (temp_pos);
2276 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2277 }
2278 c1 = FETCH_CHAR (temp_pos);
2279 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
2280 if (! (quoted || temp_code == Sword
2281 || temp_code == Ssymbol
2282 || temp_code == Squote))
2283 goto done2;
2284 DEC_BOTH (from, from_byte);
2285 }
2286 goto done2;
2287
2288 case Smath:
2289 if (!sexpflag)
2290 break;
2291 temp_pos = dec_bytepos (from_byte);
2292 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2293 if (from != stop && c == FETCH_CHAR (temp_pos))
2294 DEC_BOTH (from, from_byte);
2295 if (mathexit)
2296 {
2297 mathexit = 0;
2298 goto open2;
2299 }
2300 mathexit = 1;
2301
2302 case Sclose:
2303 if (!++depth) goto done2;
2304 break;
2305
2306 case Sopen:
2307 open2:
2308 if (!--depth) goto done2;
2309 if (depth < min_depth)
2310 Fsignal (Qscan_error,
2311 Fcons (build_string ("Containing expression ends prematurely"),
2312 Fcons (make_number (last_good),
2313 Fcons (make_number (from), Qnil))));
2314 break;
2315
2316 case Sendcomment:
2317 if (!parse_sexp_ignore_comments)
2318 break;
2319 found = back_comment (from, from_byte, stop, comnested, comstyle,
2320 &out_charpos, &out_bytepos);
2321 /* FIXME: if found == -1, then it really wasn't a comment-end.
2322 For single-char Sendcomment, we can't do much about it apart
2323 from skipping the char.
2324 For 2-char endcomments, we could try again, taking both
2325 chars as separate entities, but it's a lot of trouble
2326 for very little gain, so we don't bother either. -sm */
2327 if (found != -1)
2328 from = out_charpos, from_byte = out_bytepos;
2329 break;
2330
2331 case Scomment_fence:
2332 case Sstring_fence:
2333 while (1)
2334 {
2335 DEC_BOTH (from, from_byte);
2336 if (from == stop) goto lose;
2337 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2338 if (!char_quoted (from, from_byte)
2339 && (c = FETCH_CHAR (from_byte),
2340 SYNTAX_WITH_MULTIBYTE_CHECK (c) == code))
2341 break;
2342 }
2343 if (code == Sstring_fence && !depth && sexpflag) goto done2;
2344 break;
2345
2346 case Sstring:
2347 stringterm = FETCH_CHAR (from_byte);
2348 while (1)
2349 {
2350 if (from == stop) goto lose;
2351 temp_pos = from_byte;
2352 if (! NILP (current_buffer->enable_multibyte_characters))
2353 DEC_POS (temp_pos);
2354 else
2355 temp_pos--;
2356 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2357 if (!char_quoted (from - 1, temp_pos)
2358 && stringterm == FETCH_CHAR (temp_pos))
2359 break;
2360 DEC_BOTH (from, from_byte);
2361 }
2362 DEC_BOTH (from, from_byte);
2363 if (!depth && sexpflag) goto done2;
2364 break;
2365 }
2366 }
2367
2368 /* Reached start of buffer. Error if within object, return nil if between */
2369 if (depth) goto lose;
2370
2371 immediate_quit = 0;
2372 return Qnil;
2373
2374 done2:
2375 count++;
2376 }
2377
2378
2379 immediate_quit = 0;
2380 XSETFASTINT (val, from);
2381 return val;
2382
2383 lose:
2384 Fsignal (Qscan_error,
2385 Fcons (build_string ("Unbalanced parentheses"),
2386 Fcons (make_number (last_good),
2387 Fcons (make_number (from), Qnil))));
2388
2389 /* NOTREACHED */
2390 }
2391
2392 DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
2393 "Scan from character number FROM by COUNT lists.\n\
2394 Returns the character number of the position thus found.\n\
2395 \n\
2396 If DEPTH is nonzero, paren depth begins counting from that value,\n\
2397 only places where the depth in parentheses becomes zero\n\
2398 are candidates for stopping; COUNT such places are counted.\n\
2399 Thus, a positive value for DEPTH means go out levels.\n\
2400 \n\
2401 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
2402 \n\
2403 If the beginning or end of (the accessible part of) the buffer is reached\n\
2404 and the depth is wrong, an error is signaled.\n\
2405 If the depth is right but the count is not used up, nil is returned.")
2406 (from, count, depth)
2407 Lisp_Object from, count, depth;
2408 {
2409 CHECK_NUMBER (from, 0);
2410 CHECK_NUMBER (count, 1);
2411 CHECK_NUMBER (depth, 2);
2412
2413 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
2414 }
2415
2416 DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
2417 "Scan from character number FROM by COUNT balanced expressions.\n\
2418 If COUNT is negative, scan backwards.\n\
2419 Returns the character number of the position thus found.\n\
2420 \n\
2421 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
2422 \n\
2423 If the beginning or end of (the accessible part of) the buffer is reached\n\
2424 in the middle of a parenthetical grouping, an error is signaled.\n\
2425 If the beginning or end is reached between groupings\n\
2426 but before count is used up, nil is returned.")
2427 (from, count)
2428 Lisp_Object from, count;
2429 {
2430 CHECK_NUMBER (from, 0);
2431 CHECK_NUMBER (count, 1);
2432
2433 return scan_lists (XINT (from), XINT (count), 0, 1);
2434 }
2435
2436 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
2437 0, 0, 0,
2438 "Move point backward over any number of chars with prefix syntax.\n\
2439 This includes chars with \"quote\" or \"prefix\" syntax (' or p).")
2440 ()
2441 {
2442 int beg = BEGV;
2443 int opoint = PT;
2444 int opoint_byte = PT_BYTE;
2445 int pos = PT;
2446 int pos_byte = PT_BYTE;
2447 int c;
2448
2449 if (pos <= beg)
2450 {
2451 SET_PT_BOTH (opoint, opoint_byte);
2452
2453 return Qnil;
2454 }
2455
2456 SETUP_SYNTAX_TABLE (pos, -1);
2457
2458 DEC_BOTH (pos, pos_byte);
2459
2460 while (!char_quoted (pos, pos_byte)
2461 /* Previous statement updates syntax table. */
2462 && ((c = FETCH_CHAR (pos_byte), SYNTAX (c) == Squote)
2463 || SYNTAX_PREFIX (c)))
2464 {
2465 opoint = pos;
2466 opoint_byte = pos_byte;
2467
2468 if (pos + 1 > beg)
2469 DEC_BOTH (pos, pos_byte);
2470 }
2471
2472 SET_PT_BOTH (opoint, opoint_byte);
2473
2474 return Qnil;
2475 }
2476 \f
2477 /* Parse forward from FROM / FROM_BYTE to END,
2478 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
2479 and return a description of the state of the parse at END.
2480 If STOPBEFORE is nonzero, stop at the start of an atom.
2481 If COMMENTSTOP is 1, stop at the start of a comment.
2482 If COMMENTSTOP is -1, stop at the start or end of a comment,
2483 after the beginning of a string, or after the end of a string. */
2484
2485 static void
2486 scan_sexps_forward (stateptr, from, from_byte, end, targetdepth,
2487 stopbefore, oldstate, commentstop)
2488 struct lisp_parse_state *stateptr;
2489 register int from;
2490 int end, targetdepth, stopbefore;
2491 Lisp_Object oldstate;
2492 int commentstop;
2493 {
2494 struct lisp_parse_state state;
2495
2496 register enum syntaxcode code;
2497 int c1;
2498 int comnested;
2499 struct level { int last, prev; };
2500 struct level levelstart[100];
2501 register struct level *curlevel = levelstart;
2502 struct level *endlevel = levelstart + 100;
2503 register int depth; /* Paren depth of current scanning location.
2504 level - levelstart equals this except
2505 when the depth becomes negative. */
2506 int mindepth; /* Lowest DEPTH value seen. */
2507 int start_quoted = 0; /* Nonzero means starting after a char quote */
2508 Lisp_Object tem;
2509 int prev_from; /* Keep one character before FROM. */
2510 int prev_from_byte;
2511 int prev_from_syntax;
2512 int boundary_stop = commentstop == -1;
2513 int nofence;
2514 int found;
2515 int out_bytepos, out_charpos;
2516 int temp;
2517
2518 prev_from = from;
2519 prev_from_byte = from_byte;
2520 if (from != BEGV)
2521 DEC_BOTH (prev_from, prev_from_byte);
2522
2523 /* Use this macro instead of `from++'. */
2524 #define INC_FROM \
2525 do { prev_from = from; \
2526 prev_from_byte = from_byte; \
2527 prev_from_syntax \
2528 = SYNTAX_WITH_FLAGS (FETCH_CHAR (prev_from_byte)); \
2529 INC_BOTH (from, from_byte); \
2530 UPDATE_SYNTAX_TABLE_FORWARD (from); \
2531 } while (0)
2532
2533 immediate_quit = 1;
2534 QUIT;
2535
2536 if (NILP (oldstate))
2537 {
2538 depth = 0;
2539 state.instring = -1;
2540 state.incomment = 0;
2541 state.comstyle = 0; /* comment style a by default. */
2542 state.comstr_start = -1; /* no comment/string seen. */
2543 }
2544 else
2545 {
2546 tem = Fcar (oldstate);
2547 if (!NILP (tem))
2548 depth = XINT (tem);
2549 else
2550 depth = 0;
2551
2552 oldstate = Fcdr (oldstate);
2553 oldstate = Fcdr (oldstate);
2554 oldstate = Fcdr (oldstate);
2555 tem = Fcar (oldstate);
2556 /* Check whether we are inside string_fence-style string: */
2557 state.instring = (!NILP (tem)
2558 ? (INTEGERP (tem) ? XINT (tem) : ST_STRING_STYLE)
2559 : -1);
2560
2561 oldstate = Fcdr (oldstate);
2562 tem = Fcar (oldstate);
2563 state.incomment = (!NILP (tem)
2564 ? (INTEGERP (tem) ? XINT (tem) : -1)
2565 : 0);
2566
2567 oldstate = Fcdr (oldstate);
2568 tem = Fcar (oldstate);
2569 start_quoted = !NILP (tem);
2570
2571 /* if the eighth element of the list is nil, we are in comment
2572 style a. If it is non-nil, we are in comment style b */
2573 oldstate = Fcdr (oldstate);
2574 oldstate = Fcdr (oldstate);
2575 tem = Fcar (oldstate);
2576 state.comstyle = NILP (tem) ? 0 : (EQ (tem, Qsyntax_table)
2577 ? ST_COMMENT_STYLE : 1);
2578
2579 oldstate = Fcdr (oldstate);
2580 tem = Fcar (oldstate);
2581 state.comstr_start = NILP (tem) ? -1 : XINT (tem) ;
2582 oldstate = Fcdr (oldstate);
2583 tem = Fcar (oldstate);
2584 while (!NILP (tem)) /* >= second enclosing sexps. */
2585 {
2586 /* curlevel++->last ran into compiler bug on Apollo */
2587 curlevel->last = XINT (Fcar (tem));
2588 if (++curlevel == endlevel)
2589 curlevel--; /* error ("Nesting too deep for parser"); */
2590 curlevel->prev = -1;
2591 curlevel->last = -1;
2592 tem = Fcdr (tem);
2593 }
2594 }
2595 state.quoted = 0;
2596 mindepth = depth;
2597
2598 curlevel->prev = -1;
2599 curlevel->last = -1;
2600
2601 SETUP_SYNTAX_TABLE (prev_from, 1);
2602 prev_from_syntax = SYNTAX_WITH_FLAGS (FETCH_CHAR (prev_from_byte));
2603 UPDATE_SYNTAX_TABLE_FORWARD (from);
2604
2605 /* Enter the loop at a place appropriate for initial state. */
2606
2607 if (state.incomment)
2608 goto startincomment;
2609 if (state.instring >= 0)
2610 {
2611 nofence = state.instring != ST_STRING_STYLE;
2612 if (start_quoted)
2613 goto startquotedinstring;
2614 goto startinstring;
2615 }
2616 else if (start_quoted)
2617 goto startquoted;
2618
2619 #if 0 /* This seems to be redundant with the identical code above. */
2620 SETUP_SYNTAX_TABLE (prev_from, 1);
2621 prev_from_syntax = SYNTAX_WITH_FLAGS (FETCH_CHAR (prev_from_byte));
2622 UPDATE_SYNTAX_TABLE_FORWARD (from);
2623 #endif
2624
2625 while (from < end)
2626 {
2627 INC_FROM;
2628 code = prev_from_syntax & 0xff;
2629
2630 if (code == Scomment)
2631 {
2632 state.comstyle = SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax);
2633 state.incomment = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax) ?
2634 1 : -1);
2635 state.comstr_start = prev_from;
2636 }
2637 else if (code == Scomment_fence)
2638 {
2639 /* Record the comment style we have entered so that only
2640 the comment-end sequence of the same style actually
2641 terminates the comment section. */
2642 state.comstyle = ST_COMMENT_STYLE;
2643 state.incomment = -1;
2644 state.comstr_start = prev_from;
2645 code = Scomment;
2646 }
2647 else if (from < end)
2648 if (SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax))
2649 if (c1 = FETCH_CHAR (from_byte),
2650 SYNTAX_COMSTART_SECOND (c1))
2651 /* Duplicate code to avoid a complex if-expression
2652 which causes trouble for the SGI compiler. */
2653 {
2654 /* Record the comment style we have entered so that only
2655 the comment-end sequence of the same style actually
2656 terminates the comment section. */
2657 state.comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from_byte));
2658 comnested = SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax);
2659 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2660 state.incomment = comnested ? 1 : -1;
2661 state.comstr_start = prev_from;
2662 INC_FROM;
2663 code = Scomment;
2664 }
2665
2666 if (SYNTAX_FLAGS_PREFIX (prev_from_syntax))
2667 continue;
2668 switch (SWITCH_ENUM_CAST (code))
2669 {
2670 case Sescape:
2671 case Scharquote:
2672 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2673 curlevel->last = prev_from;
2674 startquoted:
2675 if (from == end) goto endquoted;
2676 INC_FROM;
2677 goto symstarted;
2678 /* treat following character as a word constituent */
2679 case Sword:
2680 case Ssymbol:
2681 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2682 curlevel->last = prev_from;
2683 symstarted:
2684 while (from < end)
2685 {
2686 /* Some compilers can't handle this inside the switch. */
2687 temp = SYNTAX (FETCH_CHAR (from_byte));
2688 switch (temp)
2689 {
2690 case Scharquote:
2691 case Sescape:
2692 INC_FROM;
2693 if (from == end) goto endquoted;
2694 break;
2695 case Sword:
2696 case Ssymbol:
2697 case Squote:
2698 break;
2699 default:
2700 goto symdone;
2701 }
2702 INC_FROM;
2703 }
2704 symdone:
2705 curlevel->prev = curlevel->last;
2706 break;
2707
2708 case Scomment:
2709 if (commentstop || boundary_stop) goto done;
2710 startincomment:
2711 /* The (from == BEGV) test was to enter the loop in the middle so
2712 that we find a 2-char comment ender even if we start in the
2713 middle of it. We don't want to do that if we're just at the
2714 beginning of the comment (think of (*) ... (*)). */
2715 found = forw_comment (from, from_byte, end,
2716 state.incomment, state.comstyle,
2717 (from == BEGV || from < state.comstr_start + 3)
2718 ? 0 : prev_from_syntax,
2719 &out_charpos, &out_bytepos, &state.incomment);
2720 from = out_charpos; from_byte = out_bytepos;
2721 /* Beware! prev_from and friends are invalid now.
2722 Luckily, the `done' doesn't use them and the INC_FROM
2723 sets them to a sane value without looking at them. */
2724 if (!found) goto done;
2725 INC_FROM;
2726 state.incomment = 0;
2727 state.comstyle = 0; /* reset the comment style */
2728 if (boundary_stop) goto done;
2729 break;
2730
2731 case Sopen:
2732 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2733 depth++;
2734 /* curlevel++->last ran into compiler bug on Apollo */
2735 curlevel->last = prev_from;
2736 if (++curlevel == endlevel)
2737 curlevel--; /* error ("Nesting too deep for parser"); */
2738 curlevel->prev = -1;
2739 curlevel->last = -1;
2740 if (targetdepth == depth) goto done;
2741 break;
2742
2743 case Sclose:
2744 depth--;
2745 if (depth < mindepth)
2746 mindepth = depth;
2747 if (curlevel != levelstart)
2748 curlevel--;
2749 curlevel->prev = curlevel->last;
2750 if (targetdepth == depth) goto done;
2751 break;
2752
2753 case Sstring:
2754 case Sstring_fence:
2755 state.comstr_start = from - 1;
2756 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2757 curlevel->last = prev_from;
2758 state.instring = (code == Sstring
2759 ? (FETCH_CHAR (prev_from_byte))
2760 : ST_STRING_STYLE);
2761 if (boundary_stop) goto done;
2762 startinstring:
2763 {
2764 nofence = state.instring != ST_STRING_STYLE;
2765
2766 while (1)
2767 {
2768 int c;
2769
2770 if (from >= end) goto done;
2771 c = FETCH_CHAR (from_byte);
2772 /* Some compilers can't handle this inside the switch. */
2773 temp = SYNTAX (c);
2774
2775 /* Check TEMP here so that if the char has
2776 a syntax-table property which says it is NOT
2777 a string character, it does not end the string. */
2778 if (nofence && c == state.instring && temp == Sstring)
2779 break;
2780
2781 switch (temp)
2782 {
2783 case Sstring_fence:
2784 if (!nofence) goto string_end;
2785 break;
2786 case Scharquote:
2787 case Sescape:
2788 INC_FROM;
2789 startquotedinstring:
2790 if (from >= end) goto endquoted;
2791 }
2792 INC_FROM;
2793 }
2794 }
2795 string_end:
2796 state.instring = -1;
2797 curlevel->prev = curlevel->last;
2798 INC_FROM;
2799 if (boundary_stop) goto done;
2800 break;
2801
2802 case Smath:
2803 break;
2804 }
2805 }
2806 goto done;
2807
2808 stop: /* Here if stopping before start of sexp. */
2809 from = prev_from; /* We have just fetched the char that starts it; */
2810 goto done; /* but return the position before it. */
2811
2812 endquoted:
2813 state.quoted = 1;
2814 done:
2815 state.depth = depth;
2816 state.mindepth = mindepth;
2817 state.thislevelstart = curlevel->prev;
2818 state.prevlevelstart
2819 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
2820 state.location = from;
2821 state.levelstarts = Qnil;
2822 while (--curlevel >= levelstart)
2823 state.levelstarts = Fcons (make_number (curlevel->last),
2824 state.levelstarts);
2825 immediate_quit = 0;
2826
2827 *stateptr = state;
2828 }
2829
2830 /* This comment supplies the doc string for parse-partial-sexp,
2831 for make-docfile to see. We cannot put this in the real DEFUN
2832 due to limits in the Unix cpp.
2833
2834 DEFUN ("parse-partial-sexp", Ffoo, Sfoo, 2, 6, 0,
2835 "Parse Lisp syntax starting at FROM until TO; return status of parse at TO.\n\
2836 Parsing stops at TO or when certain criteria are met;\n\
2837 point is set to where parsing stops.\n\
2838 If fifth arg STATE is omitted or nil,\n\
2839 parsing assumes that FROM is the beginning of a function.\n\
2840 Value is a list of ten elements describing final state of parsing:\n\
2841 0. depth in parens.\n\
2842 1. character address of start of innermost containing list; nil if none.\n\
2843 2. character address of start of last complete sexp terminated.\n\
2844 3. non-nil if inside a string.\n\
2845 (it is the character that will terminate the string,\n\
2846 or t if the string should be terminated by a generic string delimiter.)\n\
2847 4. nil if outside a comment, t if inside a non-nestable comment, \n\
2848 else an integer (the current comment nesting).\n\
2849 5. t if following a quote character.\n\
2850 6. the minimum paren-depth encountered during this scan.\n\
2851 7. t if in a comment of style b; symbol `syntax-table' if the comment\n\
2852 should be terminated by a generic comment delimiter.\n\
2853 8. character address of start of comment or string; nil if not in one.\n\
2854 9. Intermediate data for continuation of parsing (subject to change).\n\
2855 If third arg TARGETDEPTH is non-nil, parsing stops if the depth\n\
2856 in parentheses becomes equal to TARGETDEPTH.\n\
2857 Fourth arg STOPBEFORE non-nil means stop when come to\n\
2858 any character that starts a sexp.\n\
2859 Fifth arg STATE is a nine-element list like what this function returns.\n\
2860 It is used to initialize the state of the parse. Elements number 1, 2, 6\n\
2861 and 8 are ignored; you can leave off element 8 (the last) entirely.\n\
2862 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.\n\
2863 If it is symbol `syntax-table', stop after the start of a comment or a\n\
2864 string, or after end of a comment or a string.")
2865 (from, to, targetdepth, stopbefore, state, commentstop)
2866 */
2867
2868 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
2869 0 /* See immediately above */)
2870 (from, to, targetdepth, stopbefore, oldstate, commentstop)
2871 Lisp_Object from, to, targetdepth, stopbefore, oldstate, commentstop;
2872 {
2873 struct lisp_parse_state state;
2874 int target;
2875
2876 if (!NILP (targetdepth))
2877 {
2878 CHECK_NUMBER (targetdepth, 3);
2879 target = XINT (targetdepth);
2880 }
2881 else
2882 target = -100000; /* We won't reach this depth */
2883
2884 validate_region (&from, &to);
2885 scan_sexps_forward (&state, XINT (from), CHAR_TO_BYTE (XINT (from)),
2886 XINT (to),
2887 target, !NILP (stopbefore), oldstate,
2888 (NILP (commentstop)
2889 ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
2890
2891 SET_PT (state.location);
2892
2893 return Fcons (make_number (state.depth),
2894 Fcons (state.prevlevelstart < 0 ? Qnil : make_number (state.prevlevelstart),
2895 Fcons (state.thislevelstart < 0 ? Qnil : make_number (state.thislevelstart),
2896 Fcons (state.instring >= 0
2897 ? (state.instring == ST_STRING_STYLE
2898 ? Qt : make_number (state.instring)) : Qnil,
2899 Fcons (state.incomment < 0 ? Qt :
2900 (state.incomment == 0 ? Qnil :
2901 make_number (state.incomment)),
2902 Fcons (state.quoted ? Qt : Qnil,
2903 Fcons (make_number (state.mindepth),
2904 Fcons ((state.comstyle
2905 ? (state.comstyle == ST_COMMENT_STYLE
2906 ? Qsyntax_table : Qt) :
2907 Qnil),
2908 Fcons (((state.incomment
2909 || (state.instring >= 0))
2910 ? make_number (state.comstr_start)
2911 : Qnil),
2912 Fcons (state.levelstarts, Qnil))))))))));
2913 }
2914 \f
2915 void
2916 init_syntax_once ()
2917 {
2918 register int i, c;
2919 Lisp_Object temp;
2920
2921 /* This has to be done here, before we call Fmake_char_table. */
2922 Qsyntax_table = intern ("syntax-table");
2923 staticpro (&Qsyntax_table);
2924
2925 /* Intern this now in case it isn't already done.
2926 Setting this variable twice is harmless.
2927 But don't staticpro it here--that is done in alloc.c. */
2928 Qchar_table_extra_slots = intern ("char-table-extra-slots");
2929
2930 /* Create objects which can be shared among syntax tables. */
2931 Vsyntax_code_object = Fmake_vector (make_number (13), Qnil);
2932 for (i = 0; i < XVECTOR (Vsyntax_code_object)->size; i++)
2933 XVECTOR (Vsyntax_code_object)->contents[i]
2934 = Fcons (make_number (i), Qnil);
2935
2936 /* Now we are ready to set up this property, so we can
2937 create syntax tables. */
2938 Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
2939
2940 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
2941
2942 Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
2943
2944 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
2945 for (i = 'a'; i <= 'z'; i++)
2946 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
2947 for (i = 'A'; i <= 'Z'; i++)
2948 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
2949 for (i = '0'; i <= '9'; i++)
2950 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
2951
2952 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
2953 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
2954
2955 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
2956 Fcons (make_number (Sopen), make_number (')')));
2957 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
2958 Fcons (make_number (Sclose), make_number ('(')));
2959 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
2960 Fcons (make_number (Sopen), make_number (']')));
2961 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
2962 Fcons (make_number (Sclose), make_number ('[')));
2963 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
2964 Fcons (make_number (Sopen), make_number ('}')));
2965 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
2966 Fcons (make_number (Sclose), make_number ('{')));
2967 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
2968 Fcons (make_number ((int) Sstring), Qnil));
2969 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
2970 Fcons (make_number ((int) Sescape), Qnil));
2971
2972 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Ssymbol];
2973 for (i = 0; i < 10; i++)
2974 {
2975 c = "_-+*/&|<>="[i];
2976 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
2977 }
2978
2979 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
2980 for (i = 0; i < 12; i++)
2981 {
2982 c = ".,;:?!#@~^'`"[i];
2983 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
2984 }
2985
2986 /* All multibyte characters have syntax `word' by default. */
2987 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
2988 for (i = CHAR_TABLE_SINGLE_BYTE_SLOTS; i < CHAR_TABLE_ORDINARY_SLOTS; i++)
2989 XCHAR_TABLE (Vstandard_syntax_table)->contents[i] = temp;
2990 }
2991
2992 void
2993 syms_of_syntax ()
2994 {
2995 Qsyntax_table_p = intern ("syntax-table-p");
2996 staticpro (&Qsyntax_table_p);
2997
2998 staticpro (&Vsyntax_code_object);
2999
3000 Qscan_error = intern ("scan-error");
3001 staticpro (&Qscan_error);
3002 Fput (Qscan_error, Qerror_conditions,
3003 Fcons (Qscan_error, Fcons (Qerror, Qnil)));
3004 Fput (Qscan_error, Qerror_message,
3005 build_string ("Scan error"));
3006
3007 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments,
3008 "Non-nil means `forward-sexp', etc., should treat comments as whitespace.");
3009
3010 DEFVAR_BOOL ("parse-sexp-lookup-properties", &parse_sexp_lookup_properties,
3011 "Non-nil means `forward-sexp', etc., grant `syntax-table' property.\n\
3012 The value of this property should be either a syntax table, or a cons\n\
3013 of the form (SYNTAXCODE . MATCHCHAR), SYNTAXCODE being the numeric\n\
3014 syntax code, MATCHCHAR being nil or the character to match (which is\n\
3015 relevant only for open/close type.");
3016
3017 words_include_escapes = 0;
3018 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes,
3019 "Non-nil means `forward-word', etc., should treat escape chars part of words.");
3020
3021 DEFVAR_BOOL ("multibyte-syntax-as-symbol", &multibyte_syntax_as_symbol,
3022 "Non-nil means `scan-sexps' treats all multibyte characters as symbol.");
3023 multibyte_syntax_as_symbol = 0;
3024
3025 DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
3026 &open_paren_in_column_0_is_defun_start,
3027 "Non-nil means an open paren in column 0 denotes the start of a defun.");
3028 open_paren_in_column_0_is_defun_start = 1;
3029
3030 defsubr (&Ssyntax_table_p);
3031 defsubr (&Ssyntax_table);
3032 defsubr (&Sstandard_syntax_table);
3033 defsubr (&Scopy_syntax_table);
3034 defsubr (&Sset_syntax_table);
3035 defsubr (&Schar_syntax);
3036 defsubr (&Smatching_paren);
3037 defsubr (&Smodify_syntax_entry);
3038 defsubr (&Sdescribe_syntax);
3039
3040 defsubr (&Sforward_word);
3041
3042 defsubr (&Sskip_chars_forward);
3043 defsubr (&Sskip_chars_backward);
3044 defsubr (&Sskip_syntax_forward);
3045 defsubr (&Sskip_syntax_backward);
3046
3047 defsubr (&Sforward_comment);
3048 defsubr (&Sscan_lists);
3049 defsubr (&Sscan_sexps);
3050 defsubr (&Sbackward_prefix_chars);
3051 defsubr (&Sparse_partial_sexp);
3052 }