Merge from emacs-23 branch, up to 2010-05-20T21:33:58Z!juri@jurta.org.
[bpt/emacs.git] / src / syntax.c
1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985, 1987, 1993, 1994, 1995, 1997, 1998, 1999, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
5
6 This file is part of GNU Emacs.
7
8 GNU Emacs is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
12
13 GNU Emacs is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
20
21
22 #include <config.h>
23 #include <ctype.h>
24 #include <setjmp.h>
25 #include "lisp.h"
26 #include "commands.h"
27 #include "buffer.h"
28 #include "character.h"
29 #include "keymap.h"
30 #include "regex.h"
31
32 /* Make syntax table lookup grant data in gl_state. */
33 #define SYNTAX_ENTRY_VIA_PROPERTY
34
35 #include "syntax.h"
36 #include "intervals.h"
37 #include "category.h"
38
39 /* Then there are seven single-bit flags that have the following meanings:
40 1. This character is the first of a two-character comment-start sequence.
41 2. This character is the second of a two-character comment-start sequence.
42 3. This character is the first of a two-character comment-end sequence.
43 4. This character is the second of a two-character comment-end sequence.
44 5. This character is a prefix, for backward-prefix-chars.
45 6. The char is part of a delimiter for comments of style "b".
46 7. This character is part of a nestable comment sequence.
47 8. The char is part of a delimiter for comments of style "c".
48 Note that any two-character sequence whose first character has flag 1
49 and whose second character has flag 2 will be interpreted as a comment start.
50
51 bit 6 and 8 are used to discriminate between different comment styles.
52 Languages such as C++ allow two orthogonal syntax start/end pairs
53 and bit 6 is used to determine whether a comment-end or Scommentend
54 ends style a or b. Comment markers can start style a, b, c, or bc.
55 Style a is always the default.
56 For 2-char comment markers, the style b flag is only looked up on the second
57 char of the comment marker and on the first char of the comment ender.
58 For style c (like to for the nested flag), the flag can be placed on any
59 one of the chars.
60 */
61
62 /* These macros extract specific flags from an integer
63 that holds the syntax code and the flags. */
64
65 #define SYNTAX_FLAGS_COMSTART_FIRST(flags) (((flags) >> 16) & 1)
66
67 #define SYNTAX_FLAGS_COMSTART_SECOND(flags) (((flags) >> 17) & 1)
68
69 #define SYNTAX_FLAGS_COMEND_FIRST(flags) (((flags) >> 18) & 1)
70
71 #define SYNTAX_FLAGS_COMEND_SECOND(flags) (((flags) >> 19) & 1)
72
73 #define SYNTAX_FLAGS_PREFIX(flags) (((flags) >> 20) & 1)
74
75 #define SYNTAX_FLAGS_COMMENT_STYLEB(flags) (((flags) >> 21) & 1)
76 #define SYNTAX_FLAGS_COMMENT_STYLEC(flags) (((flags) >> 22) & 2)
77 /* FLAGS should be the flags of the main char of the comment marker, e.g.
78 the second for comstart and the first for comend. */
79 #define SYNTAX_FLAGS_COMMENT_STYLE(flags, other_flags) \
80 (SYNTAX_FLAGS_COMMENT_STYLEB (flags) \
81 | SYNTAX_FLAGS_COMMENT_STYLEC (flags) \
82 | SYNTAX_FLAGS_COMMENT_STYLEC (other_flags))
83
84 #define SYNTAX_FLAGS_COMMENT_NESTED(flags) (((flags) >> 22) & 1)
85
86 /* These macros extract a particular flag for a given character. */
87
88 #define SYNTAX_COMEND_FIRST(c) \
89 (SYNTAX_FLAGS_COMEND_FIRST (SYNTAX_WITH_FLAGS (c)))
90 #define SYNTAX_PREFIX(c) (SYNTAX_FLAGS_PREFIX (SYNTAX_WITH_FLAGS (c)))
91
92 /* We use these constants in place for comment-style and
93 string-ender-char to distinguish comments/strings started by
94 comment_fence and string_fence codes. */
95
96 #define ST_COMMENT_STYLE (256 + 1)
97 #define ST_STRING_STYLE (256 + 2)
98
99 Lisp_Object Qsyntax_table_p, Qsyntax_table, Qscan_error;
100
101 int words_include_escapes;
102 int parse_sexp_lookup_properties;
103
104 /* Nonzero means `scan-sexps' treat all multibyte characters as symbol. */
105 int multibyte_syntax_as_symbol;
106
107 /* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
108 if not compiled with GCC. No need to mark it, since it is used
109 only very temporarily. */
110 Lisp_Object syntax_temp;
111
112 /* Non-zero means an open parenthesis in column 0 is always considered
113 to be the start of a defun. Zero means an open parenthesis in
114 column 0 has no special meaning. */
115
116 int open_paren_in_column_0_is_defun_start;
117
118 /* This is the internal form of the parse state used in parse-partial-sexp. */
119
120 struct lisp_parse_state
121 {
122 int depth; /* Depth at end of parsing. */
123 int instring; /* -1 if not within string, else desired terminator. */
124 int incomment; /* -1 if in unnestable comment else comment nesting */
125 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
126 int quoted; /* Nonzero if just after an escape char at end of parsing */
127 int mindepth; /* Minimum depth seen while scanning. */
128 /* Char number of most recent start-of-expression at current level */
129 EMACS_INT thislevelstart;
130 /* Char number of start of containing expression */
131 EMACS_INT prevlevelstart;
132 EMACS_INT location; /* Char number at which parsing stopped. */
133 EMACS_INT comstr_start; /* Position of last comment/string starter. */
134 Lisp_Object levelstarts; /* Char numbers of starts-of-expression
135 of levels (starting from outermost). */
136 };
137 \f
138 /* These variables are a cache for finding the start of a defun.
139 find_start_pos is the place for which the defun start was found.
140 find_start_value is the defun start position found for it.
141 find_start_value_byte is the corresponding byte position.
142 find_start_buffer is the buffer it was found in.
143 find_start_begv is the BEGV value when it was found.
144 find_start_modiff is the value of MODIFF when it was found. */
145
146 static EMACS_INT find_start_pos;
147 static EMACS_INT find_start_value;
148 static EMACS_INT find_start_value_byte;
149 static struct buffer *find_start_buffer;
150 static EMACS_INT find_start_begv;
151 static int find_start_modiff;
152
153
154 static Lisp_Object skip_chars (int, Lisp_Object, Lisp_Object, int);
155 static Lisp_Object skip_syntaxes (int, Lisp_Object, Lisp_Object);
156 static Lisp_Object scan_lists (EMACS_INT, EMACS_INT, EMACS_INT, int);
157 static void scan_sexps_forward (struct lisp_parse_state *,
158 EMACS_INT, EMACS_INT, EMACS_INT, int,
159 int, Lisp_Object, int);
160 static int in_classes (int, Lisp_Object);
161 \f
162 /* Whether the syntax of the character C has the prefix flag set. */
163 int syntax_prefix_flag_p (int c)
164 {
165 return SYNTAX_PREFIX (c);
166 }
167
168 struct gl_state_s gl_state; /* Global state of syntax parser. */
169
170 #define INTERVALS_AT_ONCE 10 /* 1 + max-number of intervals
171 to scan to property-change. */
172
173 /* Update gl_state to an appropriate interval which contains CHARPOS. The
174 sign of COUNT give the relative position of CHARPOS wrt the previously
175 valid interval. If INIT, only [be]_property fields of gl_state are
176 valid at start, the rest is filled basing on OBJECT.
177
178 `gl_state.*_i' are the intervals, and CHARPOS is further in the search
179 direction than the intervals - or in an interval. We update the
180 current syntax-table basing on the property of this interval, and
181 update the interval to start further than CHARPOS - or be
182 NULL_INTERVAL. We also update lim_property to be the next value of
183 charpos to call this subroutine again - or be before/after the
184 start/end of OBJECT. */
185
186 void
187 update_syntax_table (EMACS_INT charpos, int count, int init,
188 Lisp_Object object)
189 {
190 Lisp_Object tmp_table;
191 int cnt = 0, invalidate = 1;
192 INTERVAL i;
193
194 if (init)
195 {
196 gl_state.old_prop = Qnil;
197 gl_state.start = gl_state.b_property;
198 gl_state.stop = gl_state.e_property;
199 i = interval_of (charpos, object);
200 gl_state.backward_i = gl_state.forward_i = i;
201 invalidate = 0;
202 if (NULL_INTERVAL_P (i))
203 return;
204 /* interval_of updates only ->position of the return value, so
205 update the parents manually to speed up update_interval. */
206 while (!NULL_PARENT (i))
207 {
208 if (AM_RIGHT_CHILD (i))
209 INTERVAL_PARENT (i)->position = i->position
210 - LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */
211 - TOTAL_LENGTH (INTERVAL_PARENT (i))
212 + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i));
213 else
214 INTERVAL_PARENT (i)->position = i->position - LEFT_TOTAL_LENGTH (i)
215 + TOTAL_LENGTH (i);
216 i = INTERVAL_PARENT (i);
217 }
218 i = gl_state.forward_i;
219 gl_state.b_property = i->position - gl_state.offset;
220 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
221 goto update;
222 }
223 i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
224
225 /* We are guaranteed to be called with CHARPOS either in i,
226 or further off. */
227 if (NULL_INTERVAL_P (i))
228 error ("Error in syntax_table logic for to-the-end intervals");
229 else if (charpos < i->position) /* Move left. */
230 {
231 if (count > 0)
232 error ("Error in syntax_table logic for intervals <-");
233 /* Update the interval. */
234 i = update_interval (i, charpos);
235 if (INTERVAL_LAST_POS (i) != gl_state.b_property)
236 {
237 invalidate = 0;
238 gl_state.forward_i = i;
239 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
240 }
241 }
242 else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right. */
243 {
244 if (count < 0)
245 error ("Error in syntax_table logic for intervals ->");
246 /* Update the interval. */
247 i = update_interval (i, charpos);
248 if (i->position != gl_state.e_property)
249 {
250 invalidate = 0;
251 gl_state.backward_i = i;
252 gl_state.b_property = i->position - gl_state.offset;
253 }
254 }
255
256 update:
257 tmp_table = textget (i->plist, Qsyntax_table);
258
259 if (invalidate)
260 invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
261
262 if (invalidate) /* Did not get to adjacent interval. */
263 { /* with the same table => */
264 /* invalidate the old range. */
265 if (count > 0)
266 {
267 gl_state.backward_i = i;
268 gl_state.b_property = i->position - gl_state.offset;
269 }
270 else
271 {
272 gl_state.forward_i = i;
273 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
274 }
275 }
276
277 if (!EQ (tmp_table, gl_state.old_prop))
278 {
279 gl_state.current_syntax_table = tmp_table;
280 gl_state.old_prop = tmp_table;
281 if (EQ (Fsyntax_table_p (tmp_table), Qt))
282 {
283 gl_state.use_global = 0;
284 }
285 else if (CONSP (tmp_table))
286 {
287 gl_state.use_global = 1;
288 gl_state.global_code = tmp_table;
289 }
290 else
291 {
292 gl_state.use_global = 0;
293 gl_state.current_syntax_table = current_buffer->syntax_table;
294 }
295 }
296
297 while (!NULL_INTERVAL_P (i))
298 {
299 if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
300 {
301 if (count > 0)
302 {
303 gl_state.e_property = i->position - gl_state.offset;
304 gl_state.forward_i = i;
305 }
306 else
307 {
308 gl_state.b_property
309 = i->position + LENGTH (i) - gl_state.offset;
310 gl_state.backward_i = i;
311 }
312 return;
313 }
314 else if (cnt == INTERVALS_AT_ONCE)
315 {
316 if (count > 0)
317 {
318 gl_state.e_property
319 = i->position + LENGTH (i) - gl_state.offset
320 /* e_property at EOB is not set to ZV but to ZV+1, so that
321 we can do INC(from);UPDATE_SYNTAX_TABLE_FORWARD without
322 having to check eob between the two. */
323 + (NULL_INTERVAL_P (next_interval (i)) ? 1 : 0);
324 gl_state.forward_i = i;
325 }
326 else
327 {
328 gl_state.b_property = i->position - gl_state.offset;
329 gl_state.backward_i = i;
330 }
331 return;
332 }
333 cnt++;
334 i = count > 0 ? next_interval (i) : previous_interval (i);
335 }
336 eassert (NULL_INTERVAL_P (i)); /* This property goes to the end. */
337 if (count > 0)
338 gl_state.e_property = gl_state.stop;
339 else
340 gl_state.b_property = gl_state.start;
341 }
342 \f
343 /* Returns TRUE if char at CHARPOS is quoted.
344 Global syntax-table data should be set up already to be good at CHARPOS
345 or after. On return global syntax data is good for lookup at CHARPOS. */
346
347 static int
348 char_quoted (EMACS_INT charpos, EMACS_INT bytepos)
349 {
350 register enum syntaxcode code;
351 register EMACS_INT beg = BEGV;
352 register int quoted = 0;
353 EMACS_INT orig = charpos;
354
355 while (charpos > beg)
356 {
357 int c;
358 DEC_BOTH (charpos, bytepos);
359
360 UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
361 c = FETCH_CHAR_AS_MULTIBYTE (bytepos);
362 code = SYNTAX (c);
363 if (! (code == Scharquote || code == Sescape))
364 break;
365
366 quoted = !quoted;
367 }
368
369 UPDATE_SYNTAX_TABLE (orig);
370 return quoted;
371 }
372
373 /* Return the bytepos one character before BYTEPOS.
374 We assume that BYTEPOS is not at the start of the buffer. */
375
376 static INLINE EMACS_INT
377 dec_bytepos (EMACS_INT bytepos)
378 {
379 if (NILP (current_buffer->enable_multibyte_characters))
380 return bytepos - 1;
381
382 DEC_POS (bytepos);
383 return bytepos;
384 }
385 \f
386 /* Return a defun-start position before POS and not too far before.
387 It should be the last one before POS, or nearly the last.
388
389 When open_paren_in_column_0_is_defun_start is nonzero,
390 only the beginning of the buffer is treated as a defun-start.
391
392 We record the information about where the scan started
393 and what its result was, so that another call in the same area
394 can return the same value very quickly.
395
396 There is no promise at which position the global syntax data is
397 valid on return from the subroutine, so the caller should explicitly
398 update the global data. */
399
400 static EMACS_INT
401 find_defun_start (EMACS_INT pos, EMACS_INT pos_byte)
402 {
403 EMACS_INT opoint = PT, opoint_byte = PT_BYTE;
404
405 if (!open_paren_in_column_0_is_defun_start)
406 {
407 find_start_value_byte = BEGV_BYTE;
408 return BEGV;
409 }
410
411 /* Use previous finding, if it's valid and applies to this inquiry. */
412 if (current_buffer == find_start_buffer
413 /* Reuse the defun-start even if POS is a little farther on.
414 POS might be in the next defun, but that's ok.
415 Our value may not be the best possible, but will still be usable. */
416 && pos <= find_start_pos + 1000
417 && pos >= find_start_value
418 && BEGV == find_start_begv
419 && MODIFF == find_start_modiff)
420 return find_start_value;
421
422 /* Back up to start of line. */
423 scan_newline (pos, pos_byte, BEGV, BEGV_BYTE, -1, 1);
424
425 /* We optimize syntax-table lookup for rare updates. Thus we accept
426 only those `^\s(' which are good in global _and_ text-property
427 syntax-tables. */
428 SETUP_BUFFER_SYNTAX_TABLE ();
429 while (PT > BEGV)
430 {
431 int c;
432
433 /* Open-paren at start of line means we may have found our
434 defun-start. */
435 c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
436 if (SYNTAX (c) == Sopen)
437 {
438 SETUP_SYNTAX_TABLE (PT + 1, -1); /* Try again... */
439 c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
440 if (SYNTAX (c) == Sopen)
441 break;
442 /* Now fallback to the default value. */
443 SETUP_BUFFER_SYNTAX_TABLE ();
444 }
445 /* Move to beg of previous line. */
446 scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
447 }
448
449 /* Record what we found, for the next try. */
450 find_start_value = PT;
451 find_start_value_byte = PT_BYTE;
452 find_start_buffer = current_buffer;
453 find_start_modiff = MODIFF;
454 find_start_begv = BEGV;
455 find_start_pos = pos;
456
457 TEMP_SET_PT_BOTH (opoint, opoint_byte);
458
459 return find_start_value;
460 }
461 \f
462 /* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE. */
463
464 static int
465 prev_char_comend_first (EMACS_INT pos, EMACS_INT pos_byte)
466 {
467 int c, val;
468
469 DEC_BOTH (pos, pos_byte);
470 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
471 c = FETCH_CHAR (pos_byte);
472 val = SYNTAX_COMEND_FIRST (c);
473 UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
474 return val;
475 }
476
477 /* Return the SYNTAX_COMSTART_FIRST of the character before POS, POS_BYTE. */
478
479 /* static int
480 * prev_char_comstart_first (pos, pos_byte)
481 * int pos, pos_byte;
482 * {
483 * int c, val;
484 *
485 * DEC_BOTH (pos, pos_byte);
486 * UPDATE_SYNTAX_TABLE_BACKWARD (pos);
487 * c = FETCH_CHAR (pos_byte);
488 * val = SYNTAX_COMSTART_FIRST (c);
489 * UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
490 * return val;
491 * } */
492
493 /* Checks whether charpos FROM is at the end of a comment.
494 FROM_BYTE is the bytepos corresponding to FROM.
495 Do not move back before STOP.
496
497 Return a positive value if we find a comment ending at FROM/FROM_BYTE;
498 return -1 otherwise.
499
500 If successful, store the charpos of the comment's beginning
501 into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
502
503 Global syntax data remains valid for backward search starting at
504 the returned value (or at FROM, if the search was not successful). */
505
506 static int
507 back_comment (EMACS_INT from, EMACS_INT from_byte, EMACS_INT stop, int comnested, int comstyle, EMACS_INT *charpos_ptr, EMACS_INT *bytepos_ptr)
508 {
509 /* Look back, counting the parity of string-quotes,
510 and recording the comment-starters seen.
511 When we reach a safe place, assume that's not in a string;
512 then step the main scan to the earliest comment-starter seen
513 an even number of string quotes away from the safe place.
514
515 OFROM[I] is position of the earliest comment-starter seen
516 which is I+2X quotes from the comment-end.
517 PARITY is current parity of quotes from the comment end. */
518 int string_style = -1; /* Presumed outside of any string. */
519 int string_lossage = 0;
520 /* Not a real lossage: indicates that we have passed a matching comment
521 starter plus a non-matching comment-ender, meaning that any matching
522 comment-starter we might see later could be a false positive (hidden
523 inside another comment).
524 Test case: { a (* b } c (* d *) */
525 int comment_lossage = 0;
526 EMACS_INT comment_end = from;
527 EMACS_INT comment_end_byte = from_byte;
528 EMACS_INT comstart_pos = 0;
529 EMACS_INT comstart_byte;
530 /* Place where the containing defun starts,
531 or 0 if we didn't come across it yet. */
532 EMACS_INT defun_start = 0;
533 EMACS_INT defun_start_byte = 0;
534 register enum syntaxcode code;
535 int nesting = 1; /* current comment nesting */
536 int c;
537 int syntax = 0;
538
539 /* FIXME: A }} comment-ender style leads to incorrect behavior
540 in the case of {{ c }}} because we ignore the last two chars which are
541 assumed to be comment-enders although they aren't. */
542
543 /* At beginning of range to scan, we're outside of strings;
544 that determines quote parity to the comment-end. */
545 while (from != stop)
546 {
547 EMACS_INT temp_byte;
548 int prev_syntax, com2start, com2end;
549 int comstart;
550
551 /* Move back and examine a character. */
552 DEC_BOTH (from, from_byte);
553 UPDATE_SYNTAX_TABLE_BACKWARD (from);
554
555 prev_syntax = syntax;
556 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
557 syntax = SYNTAX_WITH_FLAGS (c);
558 code = SYNTAX (c);
559
560 /* Check for 2-char comment markers. */
561 com2start = (SYNTAX_FLAGS_COMSTART_FIRST (syntax)
562 && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax)
563 && (comstyle
564 == SYNTAX_FLAGS_COMMENT_STYLE (prev_syntax, syntax))
565 && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax)
566 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested);
567 com2end = (SYNTAX_FLAGS_COMEND_FIRST (syntax)
568 && SYNTAX_FLAGS_COMEND_SECOND (prev_syntax));
569 comstart = (com2start || code == Scomment);
570
571 /* Nasty cases with overlapping 2-char comment markers:
572 - snmp-mode: -- c -- foo -- c --
573 --- c --
574 ------ c --
575 - c-mode: *||*
576 |* *|* *|
577 |*| |* |*|
578 /// */
579
580 /* If a 2-char comment sequence partly overlaps with another,
581 we don't try to be clever. E.g. |*| in C, or }% in modes that
582 have %..\n and %{..}%. */
583 if (from > stop && (com2end || comstart))
584 {
585 EMACS_INT next = from, next_byte = from_byte;
586 int next_c, next_syntax;
587 DEC_BOTH (next, next_byte);
588 UPDATE_SYNTAX_TABLE_BACKWARD (next);
589 next_c = FETCH_CHAR_AS_MULTIBYTE (next_byte);
590 next_syntax = SYNTAX_WITH_FLAGS (next_c);
591 if (((comstart || comnested)
592 && SYNTAX_FLAGS_COMEND_SECOND (syntax)
593 && SYNTAX_FLAGS_COMEND_FIRST (next_syntax))
594 || ((com2end || comnested)
595 && SYNTAX_FLAGS_COMSTART_SECOND (syntax)
596 && (comstyle
597 == SYNTAX_FLAGS_COMMENT_STYLE (syntax, prev_syntax))
598 && SYNTAX_FLAGS_COMSTART_FIRST (next_syntax)))
599 goto lossage;
600 /* UPDATE_SYNTAX_TABLE_FORWARD (next + 1); */
601 }
602
603 if (com2start && comstart_pos == 0)
604 /* We're looking at a comment starter. But it might be a comment
605 ender as well (see snmp-mode). The first time we see one, we
606 need to consider it as a comment starter,
607 and the subsequent times as a comment ender. */
608 com2end = 0;
609
610 /* Turn a 2-char comment sequences into the appropriate syntax. */
611 if (com2end)
612 code = Sendcomment;
613 else if (com2start)
614 code = Scomment;
615 /* Ignore comment starters of a different style. */
616 else if (code == Scomment
617 && (comstyle != SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0)
618 || SYNTAX_FLAGS_COMMENT_NESTED (syntax) != comnested))
619 continue;
620
621 /* Ignore escaped characters, except comment-enders. */
622 if (code != Sendcomment && char_quoted (from, from_byte))
623 continue;
624
625 switch (code)
626 {
627 case Sstring_fence:
628 case Scomment_fence:
629 c = (code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE);
630 case Sstring:
631 /* Track parity of quotes. */
632 if (string_style == -1)
633 /* Entering a string. */
634 string_style = c;
635 else if (string_style == c)
636 /* Leaving the string. */
637 string_style = -1;
638 else
639 /* If we have two kinds of string delimiters.
640 There's no way to grok this scanning backwards. */
641 string_lossage = 1;
642 break;
643
644 case Scomment:
645 /* We've already checked that it is the relevant comstyle. */
646 if (string_style != -1 || comment_lossage || string_lossage)
647 /* There are odd string quotes involved, so let's be careful.
648 Test case in Pascal: " { " a { " } */
649 goto lossage;
650
651 if (!comnested)
652 {
653 /* Record best comment-starter so far. */
654 comstart_pos = from;
655 comstart_byte = from_byte;
656 }
657 else if (--nesting <= 0)
658 /* nested comments have to be balanced, so we don't need to
659 keep looking for earlier ones. We use here the same (slightly
660 incorrect) reasoning as below: since it is followed by uniform
661 paired string quotes, this comment-start has to be outside of
662 strings, else the comment-end itself would be inside a string. */
663 goto done;
664 break;
665
666 case Sendcomment:
667 if (SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == comstyle
668 && ((com2end && SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax))
669 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested)
670 /* This is the same style of comment ender as ours. */
671 {
672 if (comnested)
673 nesting++;
674 else
675 /* Anything before that can't count because it would match
676 this comment-ender rather than ours. */
677 from = stop; /* Break out of the loop. */
678 }
679 else if (comstart_pos != 0 || c != '\n')
680 /* We're mixing comment styles here, so we'd better be careful.
681 The (comstart_pos != 0 || c != '\n') check is not quite correct
682 (we should just always set comment_lossage), but removing it
683 would imply that any multiline comment in C would go through
684 lossage, which seems overkill.
685 The failure should only happen in the rare cases such as
686 { (* } *) */
687 comment_lossage = 1;
688 break;
689
690 case Sopen:
691 /* Assume a defun-start point is outside of strings. */
692 if (open_paren_in_column_0_is_defun_start
693 && (from == stop
694 || (temp_byte = dec_bytepos (from_byte),
695 FETCH_CHAR (temp_byte) == '\n')))
696 {
697 defun_start = from;
698 defun_start_byte = from_byte;
699 from = stop; /* Break out of the loop. */
700 }
701 break;
702
703 default:
704 break;
705 }
706 }
707
708 if (comstart_pos == 0)
709 {
710 from = comment_end;
711 from_byte = comment_end_byte;
712 UPDATE_SYNTAX_TABLE_FORWARD (comment_end - 1);
713 }
714 /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
715 or `done'), then we've found the beginning of the non-nested comment. */
716 else if (1) /* !comnested */
717 {
718 from = comstart_pos;
719 from_byte = comstart_byte;
720 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
721 }
722 else
723 {
724 struct lisp_parse_state state;
725 lossage:
726 /* We had two kinds of string delimiters mixed up
727 together. Decode this going forwards.
728 Scan fwd from a known safe place (beginning-of-defun)
729 to the one in question; this records where we
730 last passed a comment starter. */
731 /* If we did not already find the defun start, find it now. */
732 if (defun_start == 0)
733 {
734 defun_start = find_defun_start (comment_end, comment_end_byte);
735 defun_start_byte = find_start_value_byte;
736 }
737 do
738 {
739 scan_sexps_forward (&state,
740 defun_start, defun_start_byte,
741 comment_end, -10000, 0, Qnil, 0);
742 defun_start = comment_end;
743 if (state.incomment == (comnested ? 1 : -1)
744 && state.comstyle == comstyle)
745 from = state.comstr_start;
746 else
747 {
748 from = comment_end;
749 if (state.incomment)
750 /* If comment_end is inside some other comment, maybe ours
751 is nested, so we need to try again from within the
752 surrounding comment. Example: { a (* " *) */
753 {
754 /* FIXME: We should advance by one or two chars. */
755 defun_start = state.comstr_start + 2;
756 defun_start_byte = CHAR_TO_BYTE (defun_start);
757 }
758 }
759 } while (defun_start < comment_end);
760
761 from_byte = CHAR_TO_BYTE (from);
762 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
763 }
764
765 done:
766 *charpos_ptr = from;
767 *bytepos_ptr = from_byte;
768
769 return (from == comment_end) ? -1 : from;
770 }
771 \f
772 DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
773 doc: /* Return t if OBJECT is a syntax table.
774 Currently, any char-table counts as a syntax table. */)
775 (Lisp_Object object)
776 {
777 if (CHAR_TABLE_P (object)
778 && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
779 return Qt;
780 return Qnil;
781 }
782
783 static void
784 check_syntax_table (Lisp_Object obj)
785 {
786 CHECK_TYPE (CHAR_TABLE_P (obj) && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table),
787 Qsyntax_table_p, obj);
788 }
789
790 DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
791 doc: /* Return the current syntax table.
792 This is the one specified by the current buffer. */)
793 (void)
794 {
795 return current_buffer->syntax_table;
796 }
797
798 DEFUN ("standard-syntax-table", Fstandard_syntax_table,
799 Sstandard_syntax_table, 0, 0, 0,
800 doc: /* Return the standard syntax table.
801 This is the one used for new buffers. */)
802 (void)
803 {
804 return Vstandard_syntax_table;
805 }
806
807 DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
808 doc: /* Construct a new syntax table and return it.
809 It is a copy of the TABLE, which defaults to the standard syntax table. */)
810 (Lisp_Object table)
811 {
812 Lisp_Object copy;
813
814 if (!NILP (table))
815 check_syntax_table (table);
816 else
817 table = Vstandard_syntax_table;
818
819 copy = Fcopy_sequence (table);
820
821 /* Only the standard syntax table should have a default element.
822 Other syntax tables should inherit from parents instead. */
823 XCHAR_TABLE (copy)->defalt = Qnil;
824
825 /* Copied syntax tables should all have parents.
826 If we copied one with no parent, such as the standard syntax table,
827 use the standard syntax table as the copy's parent. */
828 if (NILP (XCHAR_TABLE (copy)->parent))
829 Fset_char_table_parent (copy, Vstandard_syntax_table);
830 return copy;
831 }
832
833 DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
834 doc: /* Select a new syntax table for the current buffer.
835 One argument, a syntax table. */)
836 (Lisp_Object table)
837 {
838 int idx;
839 check_syntax_table (table);
840 current_buffer->syntax_table = table;
841 /* Indicate that this buffer now has a specified syntax table. */
842 idx = PER_BUFFER_VAR_IDX (syntax_table);
843 SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
844 return table;
845 }
846 \f
847 /* Convert a letter which signifies a syntax code
848 into the code it signifies.
849 This is used by modify-syntax-entry, and other things. */
850
851 unsigned char syntax_spec_code[0400] =
852 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
853 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
854 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
855 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
856 (char) Swhitespace, (char) Scomment_fence, (char) Sstring, 0377,
857 (char) Smath, 0377, 0377, (char) Squote,
858 (char) Sopen, (char) Sclose, 0377, 0377,
859 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
860 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
861 0377, 0377, 0377, 0377,
862 (char) Scomment, 0377, (char) Sendcomment, 0377,
863 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
864 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
865 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
866 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
867 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
868 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
869 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
870 0377, 0377, 0377, 0377, (char) Sstring_fence, 0377, 0377, 0377
871 };
872
873 /* Indexed by syntax code, give the letter that describes it. */
874
875 char syntax_code_spec[16] =
876 {
877 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
878 '!', '|'
879 };
880
881 /* Indexed by syntax code, give the object (cons of syntax code and
882 nil) to be stored in syntax table. Since these objects can be
883 shared among syntax tables, we generate them in advance. By
884 sharing objects, the function `describe-syntax' can give a more
885 compact listing. */
886 static Lisp_Object Vsyntax_code_object;
887
888 \f
889 DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
890 doc: /* Return the syntax code of CHARACTER, described by a character.
891 For example, if CHARACTER is a word constituent, the
892 character `w' (119) is returned.
893 The characters that correspond to various syntax codes
894 are listed in the documentation of `modify-syntax-entry'. */)
895 (Lisp_Object character)
896 {
897 int char_int;
898 CHECK_CHARACTER (character);
899 char_int = XINT (character);
900 SETUP_BUFFER_SYNTAX_TABLE ();
901 return make_number (syntax_code_spec[(int) SYNTAX (char_int)]);
902 }
903
904 DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
905 doc: /* Return the matching parenthesis of CHARACTER, or nil if none. */)
906 (Lisp_Object character)
907 {
908 int char_int, code;
909 CHECK_NUMBER (character);
910 char_int = XINT (character);
911 SETUP_BUFFER_SYNTAX_TABLE ();
912 code = SYNTAX (char_int);
913 if (code == Sopen || code == Sclose)
914 return SYNTAX_MATCH (char_int);
915 return Qnil;
916 }
917
918 DEFUN ("string-to-syntax", Fstring_to_syntax, Sstring_to_syntax, 1, 1, 0,
919 doc: /* Convert a syntax specification STRING into syntax cell form.
920 STRING should be a string as it is allowed as argument of
921 `modify-syntax-entry'. Value is the equivalent cons cell
922 \(CODE . MATCHING-CHAR) that can be used as value of a `syntax-table'
923 text property. */)
924 (Lisp_Object string)
925 {
926 register const unsigned char *p;
927 register enum syntaxcode code;
928 int val;
929 Lisp_Object match;
930
931 CHECK_STRING (string);
932
933 p = SDATA (string);
934 code = (enum syntaxcode) syntax_spec_code[*p++];
935 if (((int) code & 0377) == 0377)
936 error ("Invalid syntax description letter: %c", p[-1]);
937
938 if (code == Sinherit)
939 return Qnil;
940
941 if (*p)
942 {
943 int len;
944 int character = STRING_CHAR_AND_LENGTH (p, len);
945 XSETINT (match, character);
946 if (XFASTINT (match) == ' ')
947 match = Qnil;
948 p += len;
949 }
950 else
951 match = Qnil;
952
953 val = (int) code;
954 while (*p)
955 switch (*p++)
956 {
957 case '1':
958 val |= 1 << 16;
959 break;
960
961 case '2':
962 val |= 1 << 17;
963 break;
964
965 case '3':
966 val |= 1 << 18;
967 break;
968
969 case '4':
970 val |= 1 << 19;
971 break;
972
973 case 'p':
974 val |= 1 << 20;
975 break;
976
977 case 'b':
978 val |= 1 << 21;
979 break;
980
981 case 'n':
982 val |= 1 << 22;
983 break;
984
985 case 'c':
986 val |= 1 << 23;
987 break;
988 }
989
990 if (val < XVECTOR (Vsyntax_code_object)->size && NILP (match))
991 return XVECTOR (Vsyntax_code_object)->contents[val];
992 else
993 /* Since we can't use a shared object, let's make a new one. */
994 return Fcons (make_number (val), match);
995 }
996
997 /* I really don't know why this is interactive
998 help-form should at least be made useful whilst reading the second arg. */
999 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
1000 "cSet syntax for character: \nsSet syntax for %s to: ",
1001 doc: /* Set syntax for character CHAR according to string NEWENTRY.
1002 The syntax is changed only for table SYNTAX-TABLE, which defaults to
1003 the current buffer's syntax table.
1004 CHAR may be a cons (MIN . MAX), in which case, syntaxes of all characters
1005 in the range MIN to MAX are changed.
1006 The first character of NEWENTRY should be one of the following:
1007 Space or - whitespace syntax. w word constituent.
1008 _ symbol constituent. . punctuation.
1009 ( open-parenthesis. ) close-parenthesis.
1010 " string quote. \\ escape.
1011 $ paired delimiter. ' expression quote or prefix operator.
1012 < comment starter. > comment ender.
1013 / character-quote. @ inherit from `standard-syntax-table'.
1014 | generic string fence. ! generic comment fence.
1015
1016 Only single-character comment start and end sequences are represented thus.
1017 Two-character sequences are represented as described below.
1018 The second character of NEWENTRY is the matching parenthesis,
1019 used only if the first character is `(' or `)'.
1020 Any additional characters are flags.
1021 Defined flags are the characters 1, 2, 3, 4, b, p, and n.
1022 1 means CHAR is the start of a two-char comment start sequence.
1023 2 means CHAR is the second character of such a sequence.
1024 3 means CHAR is the start of a two-char comment end sequence.
1025 4 means CHAR is the second character of such a sequence.
1026
1027 There can be several orthogonal comment sequences. This is to support
1028 language modes such as C++. By default, all comment sequences are of style
1029 a, but you can set the comment sequence style to b (on the second character
1030 of a comment-start, and the first character of a comment-end sequence) and/or
1031 c (on any of its chars) using this flag:
1032 b means CHAR is part of comment sequence b.
1033 c means CHAR is part of comment sequence c.
1034 n means CHAR is part of a nestable comment sequence.
1035
1036 p means CHAR is a prefix character for `backward-prefix-chars';
1037 such characters are treated as whitespace when they occur
1038 between expressions.
1039 usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE) */)
1040 (Lisp_Object c, Lisp_Object newentry, Lisp_Object syntax_table)
1041 {
1042 if (CONSP (c))
1043 {
1044 CHECK_CHARACTER_CAR (c);
1045 CHECK_CHARACTER_CDR (c);
1046 }
1047 else
1048 CHECK_CHARACTER (c);
1049
1050 if (NILP (syntax_table))
1051 syntax_table = current_buffer->syntax_table;
1052 else
1053 check_syntax_table (syntax_table);
1054
1055 newentry = Fstring_to_syntax (newentry);
1056 if (CONSP (c))
1057 SET_RAW_SYNTAX_ENTRY_RANGE (syntax_table, c, newentry);
1058 else
1059 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), newentry);
1060
1061 /* We clear the regexp cache, since character classes can now have
1062 different values from those in the compiled regexps.*/
1063 clear_regexp_cache ();
1064
1065 return Qnil;
1066 }
1067 \f
1068 /* Dump syntax table to buffer in human-readable format */
1069
1070 DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value,
1071 Sinternal_describe_syntax_value, 1, 1, 0,
1072 doc: /* Insert a description of the internal syntax description SYNTAX at point. */)
1073 (Lisp_Object syntax)
1074 {
1075 register enum syntaxcode code;
1076 int syntax_code;
1077 char desc, start1, start2, end1, end2, prefix,
1078 comstyleb, comstylec, comnested;
1079 char str[2];
1080 Lisp_Object first, match_lisp, value = syntax;
1081
1082 if (NILP (value))
1083 {
1084 insert_string ("default");
1085 return syntax;
1086 }
1087
1088 if (CHAR_TABLE_P (value))
1089 {
1090 insert_string ("deeper char-table ...");
1091 return syntax;
1092 }
1093
1094 if (!CONSP (value))
1095 {
1096 insert_string ("invalid");
1097 return syntax;
1098 }
1099
1100 first = XCAR (value);
1101 match_lisp = XCDR (value);
1102
1103 if (!INTEGERP (first) || !(NILP (match_lisp) || INTEGERP (match_lisp)))
1104 {
1105 insert_string ("invalid");
1106 return syntax;
1107 }
1108
1109 syntax_code = XINT (first);
1110 code = (enum syntaxcode) (syntax_code & 0377);
1111 start1 = SYNTAX_FLAGS_COMSTART_FIRST (syntax_code);
1112 start2 = SYNTAX_FLAGS_COMSTART_SECOND (syntax_code);;
1113 end1 = SYNTAX_FLAGS_COMEND_FIRST (syntax_code);
1114 end2 = SYNTAX_FLAGS_COMEND_SECOND (syntax_code);
1115 prefix = SYNTAX_FLAGS_PREFIX (syntax_code);
1116 comstyleb = SYNTAX_FLAGS_COMMENT_STYLEB (syntax_code);
1117 comstylec = SYNTAX_FLAGS_COMMENT_STYLEC (syntax_code);
1118 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax_code);
1119
1120 if ((int) code < 0 || (int) code >= (int) Smax)
1121 {
1122 insert_string ("invalid");
1123 return syntax;
1124 }
1125 desc = syntax_code_spec[(int) code];
1126
1127 str[0] = desc, str[1] = 0;
1128 insert (str, 1);
1129
1130 if (NILP (match_lisp))
1131 insert (" ", 1);
1132 else
1133 insert_char (XINT (match_lisp));
1134
1135 if (start1)
1136 insert ("1", 1);
1137 if (start2)
1138 insert ("2", 1);
1139
1140 if (end1)
1141 insert ("3", 1);
1142 if (end2)
1143 insert ("4", 1);
1144
1145 if (prefix)
1146 insert ("p", 1);
1147 if (comstyleb)
1148 insert ("b", 1);
1149 if (comstylec)
1150 insert ("c", 1);
1151 if (comnested)
1152 insert ("n", 1);
1153
1154 insert_string ("\twhich means: ");
1155
1156 switch (SWITCH_ENUM_CAST (code))
1157 {
1158 case Swhitespace:
1159 insert_string ("whitespace"); break;
1160 case Spunct:
1161 insert_string ("punctuation"); break;
1162 case Sword:
1163 insert_string ("word"); break;
1164 case Ssymbol:
1165 insert_string ("symbol"); break;
1166 case Sopen:
1167 insert_string ("open"); break;
1168 case Sclose:
1169 insert_string ("close"); break;
1170 case Squote:
1171 insert_string ("prefix"); break;
1172 case Sstring:
1173 insert_string ("string"); break;
1174 case Smath:
1175 insert_string ("math"); break;
1176 case Sescape:
1177 insert_string ("escape"); break;
1178 case Scharquote:
1179 insert_string ("charquote"); break;
1180 case Scomment:
1181 insert_string ("comment"); break;
1182 case Sendcomment:
1183 insert_string ("endcomment"); break;
1184 case Sinherit:
1185 insert_string ("inherit"); break;
1186 case Scomment_fence:
1187 insert_string ("comment fence"); break;
1188 case Sstring_fence:
1189 insert_string ("string fence"); break;
1190 default:
1191 insert_string ("invalid");
1192 return syntax;
1193 }
1194
1195 if (!NILP (match_lisp))
1196 {
1197 insert_string (", matches ");
1198 insert_char (XINT (match_lisp));
1199 }
1200
1201 if (start1)
1202 insert_string (",\n\t is the first character of a comment-start sequence");
1203 if (start2)
1204 insert_string (",\n\t is the second character of a comment-start sequence");
1205
1206 if (end1)
1207 insert_string (",\n\t is the first character of a comment-end sequence");
1208 if (end2)
1209 insert_string (",\n\t is the second character of a comment-end sequence");
1210 if (comstyleb)
1211 insert_string (" (comment style b)");
1212 if (comstylec)
1213 insert_string (" (comment style c)");
1214 if (comnested)
1215 insert_string (" (nestable)");
1216
1217 if (prefix)
1218 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
1219
1220 return syntax;
1221 }
1222 \f
1223 int parse_sexp_ignore_comments;
1224
1225 /* Char-table of functions that find the next or previous word
1226 boundary. */
1227 Lisp_Object Vfind_word_boundary_function_table;
1228
1229 /* Return the position across COUNT words from FROM.
1230 If that many words cannot be found before the end of the buffer, return 0.
1231 COUNT negative means scan backward and stop at word beginning. */
1232
1233 EMACS_INT
1234 scan_words (register EMACS_INT from, register EMACS_INT count)
1235 {
1236 register EMACS_INT beg = BEGV;
1237 register EMACS_INT end = ZV;
1238 register EMACS_INT from_byte = CHAR_TO_BYTE (from);
1239 register enum syntaxcode code;
1240 int ch0, ch1;
1241 Lisp_Object func, script, pos;
1242
1243 immediate_quit = 1;
1244 QUIT;
1245
1246 SETUP_SYNTAX_TABLE (from, count);
1247
1248 while (count > 0)
1249 {
1250 while (1)
1251 {
1252 if (from == end)
1253 {
1254 immediate_quit = 0;
1255 return 0;
1256 }
1257 UPDATE_SYNTAX_TABLE_FORWARD (from);
1258 ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1259 code = SYNTAX (ch0);
1260 INC_BOTH (from, from_byte);
1261 if (words_include_escapes
1262 && (code == Sescape || code == Scharquote))
1263 break;
1264 if (code == Sword)
1265 break;
1266 }
1267 /* Now CH0 is a character which begins a word and FROM is the
1268 position of the next character. */
1269 func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch0);
1270 if (! NILP (Ffboundp (func)))
1271 {
1272 pos = call2 (func, make_number (from - 1), make_number (end));
1273 if (INTEGERP (pos) && XINT (pos) > from)
1274 {
1275 from = XINT (pos);
1276 from_byte = CHAR_TO_BYTE (from);
1277 }
1278 }
1279 else
1280 {
1281 script = CHAR_TABLE_REF (Vchar_script_table, ch0);
1282 while (1)
1283 {
1284 if (from == end) break;
1285 UPDATE_SYNTAX_TABLE_FORWARD (from);
1286 ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1287 code = SYNTAX (ch1);
1288 if ((code != Sword
1289 && (! words_include_escapes
1290 || (code != Sescape && code != Scharquote)))
1291 || word_boundary_p (ch0, ch1))
1292 break;
1293 INC_BOTH (from, from_byte);
1294 ch0 = ch1;
1295 }
1296 }
1297 count--;
1298 }
1299 while (count < 0)
1300 {
1301 while (1)
1302 {
1303 if (from == beg)
1304 {
1305 immediate_quit = 0;
1306 return 0;
1307 }
1308 DEC_BOTH (from, from_byte);
1309 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1310 ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1311 code = SYNTAX (ch1);
1312 if (words_include_escapes
1313 && (code == Sescape || code == Scharquote))
1314 break;
1315 if (code == Sword)
1316 break;
1317 }
1318 /* Now CH1 is a character which ends a word and FROM is the
1319 position of it. */
1320 func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch1);
1321 if (! NILP (Ffboundp (func)))
1322 {
1323 pos = call2 (func, make_number (from), make_number (beg));
1324 if (INTEGERP (pos) && XINT (pos) < from)
1325 {
1326 from = XINT (pos);
1327 from_byte = CHAR_TO_BYTE (from);
1328 }
1329 }
1330 else
1331 {
1332 script = CHAR_TABLE_REF (Vchar_script_table, ch1);
1333 while (1)
1334 {
1335 if (from == beg)
1336 break;
1337 DEC_BOTH (from, from_byte);
1338 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1339 ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1340 code = SYNTAX (ch0);
1341 if ((code != Sword
1342 && (! words_include_escapes
1343 || (code != Sescape && code != Scharquote)))
1344 || word_boundary_p (ch0, ch1))
1345 {
1346 INC_BOTH (from, from_byte);
1347 break;
1348 }
1349 ch1 = ch0;
1350 }
1351 }
1352 count++;
1353 }
1354
1355 immediate_quit = 0;
1356
1357 return from;
1358 }
1359
1360 DEFUN ("forward-word", Fforward_word, Sforward_word, 0, 1, "^p",
1361 doc: /* Move point forward ARG words (backward if ARG is negative).
1362 Normally returns t.
1363 If an edge of the buffer or a field boundary is reached, point is left there
1364 and the function returns nil. Field boundaries are not noticed if
1365 `inhibit-field-text-motion' is non-nil. */)
1366 (Lisp_Object arg)
1367 {
1368 Lisp_Object tmp;
1369 int orig_val, val;
1370
1371 if (NILP (arg))
1372 XSETFASTINT (arg, 1);
1373 else
1374 CHECK_NUMBER (arg);
1375
1376 val = orig_val = scan_words (PT, XINT (arg));
1377 if (! orig_val)
1378 val = XINT (arg) > 0 ? ZV : BEGV;
1379
1380 /* Avoid jumping out of an input field. */
1381 tmp = Fconstrain_to_field (make_number (val), make_number (PT),
1382 Qt, Qnil, Qnil);
1383 val = XFASTINT (tmp);
1384
1385 SET_PT (val);
1386 return val == orig_val ? Qt : Qnil;
1387 }
1388 \f
1389 Lisp_Object skip_chars (int, Lisp_Object, Lisp_Object, int);
1390
1391 DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
1392 doc: /* Move point forward, stopping before a char not in STRING, or at pos LIM.
1393 STRING is like the inside of a `[...]' in a regular expression
1394 except that `]' is never special and `\\' quotes `^', `-' or `\\'
1395 (but not at the end of a range; quoting is never needed there).
1396 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1397 With arg "^a-zA-Z", skips nonletters stopping before first letter.
1398 Char classes, e.g. `[:alpha:]', are supported.
1399
1400 Returns the distance traveled, either zero or positive. */)
1401 (Lisp_Object string, Lisp_Object lim)
1402 {
1403 return skip_chars (1, string, lim, 1);
1404 }
1405
1406 DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
1407 doc: /* Move point backward, stopping after a char not in STRING, or at pos LIM.
1408 See `skip-chars-forward' for details.
1409 Returns the distance traveled, either zero or negative. */)
1410 (Lisp_Object string, Lisp_Object lim)
1411 {
1412 return skip_chars (0, string, lim, 1);
1413 }
1414
1415 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
1416 doc: /* Move point forward across chars in specified syntax classes.
1417 SYNTAX is a string of syntax code characters.
1418 Stop before a char whose syntax is not in SYNTAX, or at position LIM.
1419 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1420 This function returns the distance traveled, either zero or positive. */)
1421 (Lisp_Object syntax, Lisp_Object lim)
1422 {
1423 return skip_syntaxes (1, syntax, lim);
1424 }
1425
1426 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
1427 doc: /* Move point backward across chars in specified syntax classes.
1428 SYNTAX is a string of syntax code characters.
1429 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.
1430 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1431 This function returns the distance traveled, either zero or negative. */)
1432 (Lisp_Object syntax, Lisp_Object lim)
1433 {
1434 return skip_syntaxes (0, syntax, lim);
1435 }
1436
1437 static Lisp_Object
1438 skip_chars (int forwardp, Lisp_Object string, Lisp_Object lim, int handle_iso_classes)
1439 {
1440 register unsigned int c;
1441 unsigned char fastmap[0400];
1442 /* Store the ranges of non-ASCII characters. */
1443 int *char_ranges;
1444 int n_char_ranges = 0;
1445 int negate = 0;
1446 register EMACS_INT i, i_byte;
1447 /* Set to 1 if the current buffer is multibyte and the region
1448 contains non-ASCII chars. */
1449 int multibyte;
1450 /* Set to 1 if STRING is multibyte and it contains non-ASCII
1451 chars. */
1452 int string_multibyte;
1453 EMACS_INT size_byte;
1454 const unsigned char *str;
1455 int len;
1456 Lisp_Object iso_classes;
1457
1458 CHECK_STRING (string);
1459 iso_classes = Qnil;
1460
1461 if (NILP (lim))
1462 XSETINT (lim, forwardp ? ZV : BEGV);
1463 else
1464 CHECK_NUMBER_COERCE_MARKER (lim);
1465
1466 /* In any case, don't allow scan outside bounds of buffer. */
1467 if (XINT (lim) > ZV)
1468 XSETFASTINT (lim, ZV);
1469 if (XINT (lim) < BEGV)
1470 XSETFASTINT (lim, BEGV);
1471
1472 multibyte = (!NILP (current_buffer->enable_multibyte_characters)
1473 && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
1474 string_multibyte = SBYTES (string) > SCHARS (string);
1475
1476 memset (fastmap, 0, sizeof fastmap);
1477
1478 str = SDATA (string);
1479 size_byte = SBYTES (string);
1480
1481 i_byte = 0;
1482 if (i_byte < size_byte
1483 && SREF (string, 0) == '^')
1484 {
1485 negate = 1; i_byte++;
1486 }
1487
1488 /* Find the characters specified and set their elements of fastmap.
1489 Handle backslashes and ranges specially.
1490
1491 If STRING contains non-ASCII characters, setup char_ranges for
1492 them and use fastmap only for their leading codes. */
1493
1494 if (! string_multibyte)
1495 {
1496 int string_has_eight_bit = 0;
1497
1498 /* At first setup fastmap. */
1499 while (i_byte < size_byte)
1500 {
1501 c = str[i_byte++];
1502
1503 if (handle_iso_classes && c == '['
1504 && i_byte < size_byte
1505 && str[i_byte] == ':')
1506 {
1507 const unsigned char *class_beg = str + i_byte + 1;
1508 const unsigned char *class_end = class_beg;
1509 const unsigned char *class_limit = str + size_byte - 2;
1510 /* Leave room for the null. */
1511 unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1512 re_wctype_t cc;
1513
1514 if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1515 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1516
1517 while (class_end < class_limit
1518 && *class_end >= 'a' && *class_end <= 'z')
1519 class_end++;
1520
1521 if (class_end == class_beg
1522 || *class_end != ':' || class_end[1] != ']')
1523 goto not_a_class_name;
1524
1525 memcpy (class_name, class_beg, class_end - class_beg);
1526 class_name[class_end - class_beg] = 0;
1527
1528 cc = re_wctype (class_name);
1529 if (cc == 0)
1530 error ("Invalid ISO C character class");
1531
1532 iso_classes = Fcons (make_number (cc), iso_classes);
1533
1534 i_byte = class_end + 2 - str;
1535 continue;
1536 }
1537
1538 not_a_class_name:
1539 if (c == '\\')
1540 {
1541 if (i_byte == size_byte)
1542 break;
1543
1544 c = str[i_byte++];
1545 }
1546 /* Treat `-' as range character only if another character
1547 follows. */
1548 if (i_byte + 1 < size_byte
1549 && str[i_byte] == '-')
1550 {
1551 unsigned int c2;
1552
1553 /* Skip over the dash. */
1554 i_byte++;
1555
1556 /* Get the end of the range. */
1557 c2 = str[i_byte++];
1558 if (c2 == '\\'
1559 && i_byte < size_byte)
1560 c2 = str[i_byte++];
1561
1562 if (c <= c2)
1563 {
1564 while (c <= c2)
1565 fastmap[c++] = 1;
1566 if (! ASCII_CHAR_P (c2))
1567 string_has_eight_bit = 1;
1568 }
1569 }
1570 else
1571 {
1572 fastmap[c] = 1;
1573 if (! ASCII_CHAR_P (c))
1574 string_has_eight_bit = 1;
1575 }
1576 }
1577
1578 /* If the current range is multibyte and STRING contains
1579 eight-bit chars, arrange fastmap and setup char_ranges for
1580 the corresponding multibyte chars. */
1581 if (multibyte && string_has_eight_bit)
1582 {
1583 unsigned char fastmap2[0400];
1584 int range_start_byte, range_start_char;
1585
1586 memcpy (fastmap + 0200, fastmap2 + 0200, 0200);
1587 memset (fastmap + 0200, 0, 0200);
1588 /* We are sure that this loop stops. */
1589 for (i = 0200; ! fastmap2[i]; i++);
1590 c = BYTE8_TO_CHAR (i);
1591 fastmap[CHAR_LEADING_CODE (c)] = 1;
1592 range_start_byte = i;
1593 range_start_char = c;
1594 char_ranges = (int *) alloca (sizeof (int) * 128 * 2);
1595 for (i = 129; i < 0400; i++)
1596 {
1597 c = BYTE8_TO_CHAR (i);
1598 fastmap[CHAR_LEADING_CODE (c)] = 1;
1599 if (i - range_start_byte != c - range_start_char)
1600 {
1601 char_ranges[n_char_ranges++] = range_start_char;
1602 char_ranges[n_char_ranges++] = ((i - 1 - range_start_byte)
1603 + range_start_char);
1604 range_start_byte = i;
1605 range_start_char = c;
1606 }
1607 }
1608 char_ranges[n_char_ranges++] = range_start_char;
1609 char_ranges[n_char_ranges++] = ((i - 1 - range_start_byte)
1610 + range_start_char);
1611 }
1612 }
1613 else /* STRING is multibyte */
1614 {
1615 char_ranges = (int *) alloca (sizeof (int) * SCHARS (string) * 2);
1616
1617 while (i_byte < size_byte)
1618 {
1619 unsigned char leading_code;
1620
1621 leading_code = str[i_byte];
1622 c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1623 i_byte += len;
1624
1625 if (handle_iso_classes && c == '['
1626 && i_byte < size_byte
1627 && STRING_CHAR (str + i_byte) == ':')
1628 {
1629 const unsigned char *class_beg = str + i_byte + 1;
1630 const unsigned char *class_end = class_beg;
1631 const unsigned char *class_limit = str + size_byte - 2;
1632 /* Leave room for the null. */
1633 unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1634 re_wctype_t cc;
1635
1636 if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1637 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1638
1639 while (class_end < class_limit
1640 && *class_end >= 'a' && *class_end <= 'z')
1641 class_end++;
1642
1643 if (class_end == class_beg
1644 || *class_end != ':' || class_end[1] != ']')
1645 goto not_a_class_name_multibyte;
1646
1647 memcpy (class_name, class_beg, class_end - class_beg);
1648 class_name[class_end - class_beg] = 0;
1649
1650 cc = re_wctype (class_name);
1651 if (cc == 0)
1652 error ("Invalid ISO C character class");
1653
1654 iso_classes = Fcons (make_number (cc), iso_classes);
1655
1656 i_byte = class_end + 2 - str;
1657 continue;
1658 }
1659
1660 not_a_class_name_multibyte:
1661 if (c == '\\')
1662 {
1663 if (i_byte == size_byte)
1664 break;
1665
1666 leading_code = str[i_byte];
1667 c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1668 i_byte += len;
1669 }
1670 /* Treat `-' as range character only if another character
1671 follows. */
1672 if (i_byte + 1 < size_byte
1673 && str[i_byte] == '-')
1674 {
1675 unsigned int c2;
1676 unsigned char leading_code2;
1677
1678 /* Skip over the dash. */
1679 i_byte++;
1680
1681 /* Get the end of the range. */
1682 leading_code2 = str[i_byte];
1683 c2 = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1684 i_byte += len;
1685
1686 if (c2 == '\\'
1687 && i_byte < size_byte)
1688 {
1689 leading_code2 = str[i_byte];
1690 c2 =STRING_CHAR_AND_LENGTH (str + i_byte, len);
1691 i_byte += len;
1692 }
1693
1694 if (c > c2)
1695 continue;
1696 if (ASCII_CHAR_P (c))
1697 {
1698 while (c <= c2 && c < 0x80)
1699 fastmap[c++] = 1;
1700 leading_code = CHAR_LEADING_CODE (c);
1701 }
1702 if (! ASCII_CHAR_P (c))
1703 {
1704 while (leading_code <= leading_code2)
1705 fastmap[leading_code++] = 1;
1706 if (c <= c2)
1707 {
1708 char_ranges[n_char_ranges++] = c;
1709 char_ranges[n_char_ranges++] = c2;
1710 }
1711 }
1712 }
1713 else
1714 {
1715 if (ASCII_CHAR_P (c))
1716 fastmap[c] = 1;
1717 else
1718 {
1719 fastmap[leading_code] = 1;
1720 char_ranges[n_char_ranges++] = c;
1721 char_ranges[n_char_ranges++] = c;
1722 }
1723 }
1724 }
1725
1726 /* If the current range is unibyte and STRING contains non-ASCII
1727 chars, arrange fastmap for the corresponding unibyte
1728 chars. */
1729
1730 if (! multibyte && n_char_ranges > 0)
1731 {
1732 memset (fastmap + 0200, 0, 0200);
1733 for (i = 0; i < n_char_ranges; i += 2)
1734 {
1735 int c1 = char_ranges[i];
1736 int c2 = char_ranges[i + 1];
1737
1738 for (; c1 <= c2; c1++)
1739 {
1740 int b = CHAR_TO_BYTE_SAFE (c1);
1741 if (b >= 0)
1742 fastmap[b] = 1;
1743 }
1744 }
1745 }
1746 }
1747
1748 /* If ^ was the first character, complement the fastmap. */
1749 if (negate)
1750 {
1751 if (! multibyte)
1752 for (i = 0; i < sizeof fastmap; i++)
1753 fastmap[i] ^= 1;
1754 else
1755 {
1756 for (i = 0; i < 0200; i++)
1757 fastmap[i] ^= 1;
1758 /* All non-ASCII chars possibly match. */
1759 for (; i < sizeof fastmap; i++)
1760 fastmap[i] = 1;
1761 }
1762 }
1763
1764 {
1765 EMACS_INT start_point = PT;
1766 EMACS_INT pos = PT;
1767 EMACS_INT pos_byte = PT_BYTE;
1768 unsigned char *p = PT_ADDR, *endp, *stop;
1769
1770 if (forwardp)
1771 {
1772 endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
1773 stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
1774 }
1775 else
1776 {
1777 endp = CHAR_POS_ADDR (XINT (lim));
1778 stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
1779 }
1780
1781 immediate_quit = 1;
1782 /* This code may look up syntax tables using macros that rely on the
1783 gl_state object. To make sure this object is not out of date,
1784 let's initialize it manually.
1785 We ignore syntax-table text-properties for now, since that's
1786 what we've done in the past. */
1787 SETUP_BUFFER_SYNTAX_TABLE ();
1788 if (forwardp)
1789 {
1790 if (multibyte)
1791 while (1)
1792 {
1793 int nbytes;
1794
1795 if (p >= stop)
1796 {
1797 if (p >= endp)
1798 break;
1799 p = GAP_END_ADDR;
1800 stop = endp;
1801 }
1802 c = STRING_CHAR_AND_LENGTH (p, nbytes);
1803 if (! NILP (iso_classes) && in_classes (c, iso_classes))
1804 {
1805 if (negate)
1806 break;
1807 else
1808 goto fwd_ok;
1809 }
1810
1811 if (! fastmap[*p])
1812 break;
1813 if (! ASCII_CHAR_P (c))
1814 {
1815 /* As we are looking at a multibyte character, we
1816 must look up the character in the table
1817 CHAR_RANGES. If there's no data in the table,
1818 that character is not what we want to skip. */
1819
1820 /* The following code do the right thing even if
1821 n_char_ranges is zero (i.e. no data in
1822 CHAR_RANGES). */
1823 for (i = 0; i < n_char_ranges; i += 2)
1824 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1825 break;
1826 if (!(negate ^ (i < n_char_ranges)))
1827 break;
1828 }
1829 fwd_ok:
1830 p += nbytes, pos++, pos_byte += nbytes;
1831 }
1832 else
1833 while (1)
1834 {
1835 if (p >= stop)
1836 {
1837 if (p >= endp)
1838 break;
1839 p = GAP_END_ADDR;
1840 stop = endp;
1841 }
1842
1843 if (!NILP (iso_classes) && in_classes (*p, iso_classes))
1844 {
1845 if (negate)
1846 break;
1847 else
1848 goto fwd_unibyte_ok;
1849 }
1850
1851 if (!fastmap[*p])
1852 break;
1853 fwd_unibyte_ok:
1854 p++, pos++, pos_byte++;
1855 }
1856 }
1857 else
1858 {
1859 if (multibyte)
1860 while (1)
1861 {
1862 unsigned char *prev_p;
1863
1864 if (p <= stop)
1865 {
1866 if (p <= endp)
1867 break;
1868 p = GPT_ADDR;
1869 stop = endp;
1870 }
1871 prev_p = p;
1872 while (--p >= stop && ! CHAR_HEAD_P (*p));
1873 c = STRING_CHAR (p);
1874
1875 if (! NILP (iso_classes) && in_classes (c, iso_classes))
1876 {
1877 if (negate)
1878 break;
1879 else
1880 goto back_ok;
1881 }
1882
1883 if (! fastmap[*p])
1884 break;
1885 if (! ASCII_CHAR_P (c))
1886 {
1887 /* See the comment in the previous similar code. */
1888 for (i = 0; i < n_char_ranges; i += 2)
1889 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1890 break;
1891 if (!(negate ^ (i < n_char_ranges)))
1892 break;
1893 }
1894 back_ok:
1895 pos--, pos_byte -= prev_p - p;
1896 }
1897 else
1898 while (1)
1899 {
1900 if (p <= stop)
1901 {
1902 if (p <= endp)
1903 break;
1904 p = GPT_ADDR;
1905 stop = endp;
1906 }
1907
1908 if (! NILP (iso_classes) && in_classes (p[-1], iso_classes))
1909 {
1910 if (negate)
1911 break;
1912 else
1913 goto back_unibyte_ok;
1914 }
1915
1916 if (!fastmap[p[-1]])
1917 break;
1918 back_unibyte_ok:
1919 p--, pos--, pos_byte--;
1920 }
1921 }
1922
1923 SET_PT_BOTH (pos, pos_byte);
1924 immediate_quit = 0;
1925
1926 return make_number (PT - start_point);
1927 }
1928 }
1929
1930
1931 static Lisp_Object
1932 skip_syntaxes (int forwardp, Lisp_Object string, Lisp_Object lim)
1933 {
1934 register unsigned int c;
1935 unsigned char fastmap[0400];
1936 int negate = 0;
1937 register EMACS_INT i, i_byte;
1938 int multibyte;
1939 EMACS_INT size_byte;
1940 unsigned char *str;
1941
1942 CHECK_STRING (string);
1943
1944 if (NILP (lim))
1945 XSETINT (lim, forwardp ? ZV : BEGV);
1946 else
1947 CHECK_NUMBER_COERCE_MARKER (lim);
1948
1949 /* In any case, don't allow scan outside bounds of buffer. */
1950 if (XINT (lim) > ZV)
1951 XSETFASTINT (lim, ZV);
1952 if (XINT (lim) < BEGV)
1953 XSETFASTINT (lim, BEGV);
1954
1955 if (forwardp ? (PT >= XFASTINT (lim)) : (PT <= XFASTINT (lim)))
1956 return make_number (0);
1957
1958 multibyte = (!NILP (current_buffer->enable_multibyte_characters)
1959 && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
1960
1961 memset (fastmap, 0, sizeof fastmap);
1962
1963 if (SBYTES (string) > SCHARS (string))
1964 /* As this is very rare case (syntax spec is ASCII only), don't
1965 consider efficiency. */
1966 string = string_make_unibyte (string);
1967
1968 str = SDATA (string);
1969 size_byte = SBYTES (string);
1970
1971 i_byte = 0;
1972 if (i_byte < size_byte
1973 && SREF (string, 0) == '^')
1974 {
1975 negate = 1; i_byte++;
1976 }
1977
1978 /* Find the syntaxes specified and set their elements of fastmap. */
1979
1980 while (i_byte < size_byte)
1981 {
1982 c = str[i_byte++];
1983 fastmap[syntax_spec_code[c]] = 1;
1984 }
1985
1986 /* If ^ was the first character, complement the fastmap. */
1987 if (negate)
1988 for (i = 0; i < sizeof fastmap; i++)
1989 fastmap[i] ^= 1;
1990
1991 {
1992 EMACS_INT start_point = PT;
1993 EMACS_INT pos = PT;
1994 EMACS_INT pos_byte = PT_BYTE;
1995 unsigned char *p = PT_ADDR, *endp, *stop;
1996
1997 if (forwardp)
1998 {
1999 endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
2000 stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
2001 }
2002 else
2003 {
2004 endp = CHAR_POS_ADDR (XINT (lim));
2005 stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
2006 }
2007
2008 immediate_quit = 1;
2009 SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
2010 if (forwardp)
2011 {
2012 if (multibyte)
2013 {
2014 while (1)
2015 {
2016 int nbytes;
2017
2018 if (p >= stop)
2019 {
2020 if (p >= endp)
2021 break;
2022 p = GAP_END_ADDR;
2023 stop = endp;
2024 }
2025 c = STRING_CHAR_AND_LENGTH (p, nbytes);
2026 if (! fastmap[(int) SYNTAX (c)])
2027 break;
2028 p += nbytes, pos++, pos_byte += nbytes;
2029 UPDATE_SYNTAX_TABLE_FORWARD (pos);
2030 }
2031 }
2032 else
2033 {
2034 while (1)
2035 {
2036 if (p >= stop)
2037 {
2038 if (p >= endp)
2039 break;
2040 p = GAP_END_ADDR;
2041 stop = endp;
2042 }
2043 if (! fastmap[(int) SYNTAX (*p)])
2044 break;
2045 p++, pos++, pos_byte++;
2046 UPDATE_SYNTAX_TABLE_FORWARD (pos);
2047 }
2048 }
2049 }
2050 else
2051 {
2052 if (multibyte)
2053 {
2054 while (1)
2055 {
2056 unsigned char *prev_p;
2057
2058 if (p <= stop)
2059 {
2060 if (p <= endp)
2061 break;
2062 p = GPT_ADDR;
2063 stop = endp;
2064 }
2065 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
2066 prev_p = p;
2067 while (--p >= stop && ! CHAR_HEAD_P (*p));
2068 c = STRING_CHAR (p);
2069 if (! fastmap[(int) SYNTAX (c)])
2070 break;
2071 pos--, pos_byte -= prev_p - p;
2072 }
2073 }
2074 else
2075 {
2076 while (1)
2077 {
2078 if (p <= stop)
2079 {
2080 if (p <= endp)
2081 break;
2082 p = GPT_ADDR;
2083 stop = endp;
2084 }
2085 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
2086 if (! fastmap[(int) SYNTAX (p[-1])])
2087 break;
2088 p--, pos--, pos_byte--;
2089 }
2090 }
2091 }
2092
2093 SET_PT_BOTH (pos, pos_byte);
2094 immediate_quit = 0;
2095
2096 return make_number (PT - start_point);
2097 }
2098 }
2099
2100 /* Return 1 if character C belongs to one of the ISO classes
2101 in the list ISO_CLASSES. Each class is represented by an
2102 integer which is its type according to re_wctype. */
2103
2104 static int
2105 in_classes (int c, Lisp_Object iso_classes)
2106 {
2107 int fits_class = 0;
2108
2109 while (CONSP (iso_classes))
2110 {
2111 Lisp_Object elt;
2112 elt = XCAR (iso_classes);
2113 iso_classes = XCDR (iso_classes);
2114
2115 if (re_iswctype (c, XFASTINT (elt)))
2116 fits_class = 1;
2117 }
2118
2119 return fits_class;
2120 }
2121 \f
2122 /* Jump over a comment, assuming we are at the beginning of one.
2123 FROM is the current position.
2124 FROM_BYTE is the bytepos corresponding to FROM.
2125 Do not move past STOP (a charpos).
2126 The comment over which we have to jump is of style STYLE
2127 (either SYNTAX_FLAGS_COMMENT_STYLE(foo) or ST_COMMENT_STYLE).
2128 NESTING should be positive to indicate the nesting at the beginning
2129 for nested comments and should be zero or negative else.
2130 ST_COMMENT_STYLE cannot be nested.
2131 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
2132 (or 0 If the search cannot start in the middle of a two-character).
2133
2134 If successful, return 1 and store the charpos of the comment's end
2135 into *CHARPOS_PTR and the corresponding bytepos into *BYTEPOS_PTR.
2136 Else, return 0 and store the charpos STOP into *CHARPOS_PTR, the
2137 corresponding bytepos into *BYTEPOS_PTR and the current nesting
2138 (as defined for state.incomment) in *INCOMMENT_PTR.
2139
2140 The comment end is the last character of the comment rather than the
2141 character just after the comment.
2142
2143 Global syntax data is assumed to initially be valid for FROM and
2144 remains valid for forward search starting at the returned position. */
2145
2146 static int
2147 forw_comment (EMACS_INT from, EMACS_INT from_byte, EMACS_INT stop,
2148 int nesting, int style, int prev_syntax,
2149 EMACS_INT *charpos_ptr, EMACS_INT *bytepos_ptr,
2150 int *incomment_ptr)
2151 {
2152 register int c, c1;
2153 register enum syntaxcode code;
2154 register int syntax, other_syntax;
2155
2156 if (nesting <= 0) nesting = -1;
2157
2158 /* Enter the loop in the middle so that we find
2159 a 2-char comment ender if we start in the middle of it. */
2160 syntax = prev_syntax;
2161 if (syntax != 0) goto forw_incomment;
2162
2163 while (1)
2164 {
2165 if (from == stop)
2166 {
2167 *incomment_ptr = nesting;
2168 *charpos_ptr = from;
2169 *bytepos_ptr = from_byte;
2170 return 0;
2171 }
2172 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2173 syntax = SYNTAX_WITH_FLAGS (c);
2174 code = syntax & 0xff;
2175 if (code == Sendcomment
2176 && SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == style
2177 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ?
2178 (nesting > 0 && --nesting == 0) : nesting < 0))
2179 /* we have encountered a comment end of the same style
2180 as the comment sequence which began this comment
2181 section */
2182 break;
2183 if (code == Scomment_fence
2184 && style == ST_COMMENT_STYLE)
2185 /* we have encountered a comment end of the same style
2186 as the comment sequence which began this comment
2187 section. */
2188 break;
2189 if (nesting > 0
2190 && code == Scomment
2191 && SYNTAX_FLAGS_COMMENT_NESTED (syntax)
2192 && SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == style)
2193 /* we have encountered a nested comment of the same style
2194 as the comment sequence which began this comment section */
2195 nesting++;
2196 INC_BOTH (from, from_byte);
2197 UPDATE_SYNTAX_TABLE_FORWARD (from);
2198
2199 forw_incomment:
2200 if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
2201 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2202 other_syntax = SYNTAX_WITH_FLAGS (c1),
2203 SYNTAX_FLAGS_COMEND_SECOND (other_syntax))
2204 && SYNTAX_FLAGS_COMMENT_STYLE (syntax, other_syntax) == style
2205 && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
2206 SYNTAX_FLAGS_COMMENT_NESTED (other_syntax))
2207 ? nesting > 0 : nesting < 0))
2208 {
2209 if (--nesting <= 0)
2210 /* we have encountered a comment end of the same style
2211 as the comment sequence which began this comment
2212 section */
2213 break;
2214 else
2215 {
2216 INC_BOTH (from, from_byte);
2217 UPDATE_SYNTAX_TABLE_FORWARD (from);
2218 }
2219 }
2220 if (nesting > 0
2221 && from < stop
2222 && SYNTAX_FLAGS_COMSTART_FIRST (syntax)
2223 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2224 other_syntax = SYNTAX_WITH_FLAGS (c1),
2225 SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax) == style
2226 && SYNTAX_FLAGS_COMSTART_SECOND (other_syntax))
2227 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
2228 SYNTAX_FLAGS_COMMENT_NESTED (other_syntax)))
2229 /* we have encountered a nested comment of the same style
2230 as the comment sequence which began this comment
2231 section */
2232 {
2233 INC_BOTH (from, from_byte);
2234 UPDATE_SYNTAX_TABLE_FORWARD (from);
2235 nesting++;
2236 }
2237 }
2238 *charpos_ptr = from;
2239 *bytepos_ptr = from_byte;
2240 return 1;
2241 }
2242
2243 DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
2244 doc: /*
2245 Move forward across up to COUNT comments. If COUNT is negative, move backward.
2246 Stop scanning if we find something other than a comment or whitespace.
2247 Set point to where scanning stops.
2248 If COUNT comments are found as expected, with nothing except whitespace
2249 between them, return t; otherwise return nil. */)
2250 (Lisp_Object count)
2251 {
2252 register EMACS_INT from;
2253 EMACS_INT from_byte;
2254 register EMACS_INT stop;
2255 register int c, c1;
2256 register enum syntaxcode code;
2257 int comstyle = 0; /* style of comment encountered */
2258 int comnested = 0; /* whether the comment is nestable or not */
2259 int found;
2260 EMACS_INT count1;
2261 EMACS_INT out_charpos, out_bytepos;
2262 int dummy;
2263
2264 CHECK_NUMBER (count);
2265 count1 = XINT (count);
2266 stop = count1 > 0 ? ZV : BEGV;
2267
2268 immediate_quit = 1;
2269 QUIT;
2270
2271 from = PT;
2272 from_byte = PT_BYTE;
2273
2274 SETUP_SYNTAX_TABLE (from, count1);
2275 while (count1 > 0)
2276 {
2277 do
2278 {
2279 int comstart_first, syntax, other_syntax;
2280
2281 if (from == stop)
2282 {
2283 SET_PT_BOTH (from, from_byte);
2284 immediate_quit = 0;
2285 return Qnil;
2286 }
2287 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2288 syntax = SYNTAX_WITH_FLAGS (c);
2289 code = SYNTAX (c);
2290 comstart_first = SYNTAX_FLAGS_COMSTART_FIRST (syntax);
2291 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2292 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2293 INC_BOTH (from, from_byte);
2294 UPDATE_SYNTAX_TABLE_FORWARD (from);
2295 if (from < stop && comstart_first
2296 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2297 other_syntax = SYNTAX_WITH_FLAGS (c1),
2298 SYNTAX_FLAGS_COMSTART_SECOND (other_syntax)))
2299 {
2300 /* We have encountered a comment start sequence and we
2301 are ignoring all text inside comments. We must record
2302 the comment style this sequence begins so that later,
2303 only a comment end of the same style actually ends
2304 the comment section. */
2305 code = Scomment;
2306 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2307 comnested
2308 = comnested || SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2309 INC_BOTH (from, from_byte);
2310 UPDATE_SYNTAX_TABLE_FORWARD (from);
2311 }
2312 }
2313 while (code == Swhitespace || (code == Sendcomment && c == '\n'));
2314
2315 if (code == Scomment_fence)
2316 comstyle = ST_COMMENT_STYLE;
2317 else if (code != Scomment)
2318 {
2319 immediate_quit = 0;
2320 DEC_BOTH (from, from_byte);
2321 SET_PT_BOTH (from, from_byte);
2322 return Qnil;
2323 }
2324 /* We're at the start of a comment. */
2325 found = forw_comment (from, from_byte, stop, comnested, comstyle, 0,
2326 &out_charpos, &out_bytepos, &dummy);
2327 from = out_charpos; from_byte = out_bytepos;
2328 if (!found)
2329 {
2330 immediate_quit = 0;
2331 SET_PT_BOTH (from, from_byte);
2332 return Qnil;
2333 }
2334 INC_BOTH (from, from_byte);
2335 UPDATE_SYNTAX_TABLE_FORWARD (from);
2336 /* We have skipped one comment. */
2337 count1--;
2338 }
2339
2340 while (count1 < 0)
2341 {
2342 while (1)
2343 {
2344 int quoted, syntax;
2345
2346 if (from <= stop)
2347 {
2348 SET_PT_BOTH (BEGV, BEGV_BYTE);
2349 immediate_quit = 0;
2350 return Qnil;
2351 }
2352
2353 DEC_BOTH (from, from_byte);
2354 /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */
2355 quoted = char_quoted (from, from_byte);
2356 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2357 syntax = SYNTAX_WITH_FLAGS (c);
2358 code = SYNTAX (c);
2359 comstyle = 0;
2360 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2361 if (code == Sendcomment)
2362 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2363 if (from > stop && SYNTAX_FLAGS_COMEND_SECOND (syntax)
2364 && prev_char_comend_first (from, from_byte)
2365 && !char_quoted (from - 1, dec_bytepos (from_byte)))
2366 {
2367 int other_syntax;
2368 /* We must record the comment style encountered so that
2369 later, we can match only the proper comment begin
2370 sequence of the same style. */
2371 DEC_BOTH (from, from_byte);
2372 code = Sendcomment;
2373 /* Calling char_quoted, above, set up global syntax position
2374 at the new value of FROM. */
2375 c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2376 other_syntax = SYNTAX_WITH_FLAGS (c1);
2377 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2378 comnested
2379 = comnested || SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2380 }
2381
2382 if (code == Scomment_fence)
2383 {
2384 /* Skip until first preceding unquoted comment_fence. */
2385 int found = 0;
2386 EMACS_INT ini = from, ini_byte = from_byte;
2387
2388 while (1)
2389 {
2390 DEC_BOTH (from, from_byte);
2391 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2392 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2393 if (SYNTAX (c) == Scomment_fence
2394 && !char_quoted (from, from_byte))
2395 {
2396 found = 1;
2397 break;
2398 }
2399 else if (from == stop)
2400 break;
2401 }
2402 if (found == 0)
2403 {
2404 from = ini; /* Set point to ini + 1. */
2405 from_byte = ini_byte;
2406 goto leave;
2407 }
2408 else
2409 /* We have skipped one comment. */
2410 break;
2411 }
2412 else if (code == Sendcomment)
2413 {
2414 found = back_comment (from, from_byte, stop, comnested, comstyle,
2415 &out_charpos, &out_bytepos);
2416 if (found == -1)
2417 {
2418 if (c == '\n')
2419 /* This end-of-line is not an end-of-comment.
2420 Treat it like a whitespace.
2421 CC-mode (and maybe others) relies on this behavior. */
2422 ;
2423 else
2424 {
2425 /* Failure: we should go back to the end of this
2426 not-quite-endcomment. */
2427 if (SYNTAX (c) != code)
2428 /* It was a two-char Sendcomment. */
2429 INC_BOTH (from, from_byte);
2430 goto leave;
2431 }
2432 }
2433 else
2434 {
2435 /* We have skipped one comment. */
2436 from = out_charpos, from_byte = out_bytepos;
2437 break;
2438 }
2439 }
2440 else if (code != Swhitespace || quoted)
2441 {
2442 leave:
2443 immediate_quit = 0;
2444 INC_BOTH (from, from_byte);
2445 SET_PT_BOTH (from, from_byte);
2446 return Qnil;
2447 }
2448 }
2449
2450 count1++;
2451 }
2452
2453 SET_PT_BOTH (from, from_byte);
2454 immediate_quit = 0;
2455 return Qt;
2456 }
2457 \f
2458 /* Return syntax code of character C if C is an ASCII character
2459 or `multibyte_symbol_p' is zero. Otherwise, return Ssymbol. */
2460
2461 #define SYNTAX_WITH_MULTIBYTE_CHECK(c) \
2462 ((ASCII_CHAR_P (c) || !multibyte_symbol_p) \
2463 ? SYNTAX (c) : Ssymbol)
2464
2465 static Lisp_Object
2466 scan_lists (register EMACS_INT from, EMACS_INT count, EMACS_INT depth, int sexpflag)
2467 {
2468 Lisp_Object val;
2469 register EMACS_INT stop = count > 0 ? ZV : BEGV;
2470 register int c, c1;
2471 int stringterm;
2472 int quoted;
2473 int mathexit = 0;
2474 register enum syntaxcode code, temp_code;
2475 int min_depth = depth; /* Err out if depth gets less than this. */
2476 int comstyle = 0; /* style of comment encountered */
2477 int comnested = 0; /* whether the comment is nestable or not */
2478 EMACS_INT temp_pos;
2479 EMACS_INT last_good = from;
2480 int found;
2481 EMACS_INT from_byte;
2482 EMACS_INT out_bytepos, out_charpos;
2483 int temp, dummy;
2484 int multibyte_symbol_p = sexpflag && multibyte_syntax_as_symbol;
2485
2486 if (depth > 0) min_depth = 0;
2487
2488 if (from > ZV) from = ZV;
2489 if (from < BEGV) from = BEGV;
2490
2491 from_byte = CHAR_TO_BYTE (from);
2492
2493 immediate_quit = 1;
2494 QUIT;
2495
2496 SETUP_SYNTAX_TABLE (from, count);
2497 while (count > 0)
2498 {
2499 while (from < stop)
2500 {
2501 int comstart_first, prefix, syntax, other_syntax;
2502 UPDATE_SYNTAX_TABLE_FORWARD (from);
2503 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2504 syntax = SYNTAX_WITH_FLAGS (c);
2505 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2506 comstart_first = SYNTAX_FLAGS_COMSTART_FIRST (syntax);
2507 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2508 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2509 prefix = SYNTAX_FLAGS_PREFIX (syntax);
2510 if (depth == min_depth)
2511 last_good = from;
2512 INC_BOTH (from, from_byte);
2513 UPDATE_SYNTAX_TABLE_FORWARD (from);
2514 if (from < stop && comstart_first
2515 && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2516 other_syntax = SYNTAX_WITH_FLAGS (c),
2517 SYNTAX_FLAGS_COMSTART_SECOND (other_syntax))
2518 && parse_sexp_ignore_comments)
2519 {
2520 /* we have encountered a comment start sequence and we
2521 are ignoring all text inside comments. We must record
2522 the comment style this sequence begins so that later,
2523 only a comment end of the same style actually ends
2524 the comment section */
2525 code = Scomment;
2526 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2527 comnested
2528 = comnested || SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2529 INC_BOTH (from, from_byte);
2530 UPDATE_SYNTAX_TABLE_FORWARD (from);
2531 }
2532
2533 if (prefix)
2534 continue;
2535
2536 switch (SWITCH_ENUM_CAST (code))
2537 {
2538 case Sescape:
2539 case Scharquote:
2540 if (from == stop)
2541 goto lose;
2542 INC_BOTH (from, from_byte);
2543 /* treat following character as a word constituent */
2544 case Sword:
2545 case Ssymbol:
2546 if (depth || !sexpflag) break;
2547 /* This word counts as a sexp; return at end of it. */
2548 while (from < stop)
2549 {
2550 UPDATE_SYNTAX_TABLE_FORWARD (from);
2551
2552 /* Some compilers can't handle this inside the switch. */
2553 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2554 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2555 switch (temp)
2556 {
2557 case Scharquote:
2558 case Sescape:
2559 INC_BOTH (from, from_byte);
2560 if (from == stop)
2561 goto lose;
2562 break;
2563 case Sword:
2564 case Ssymbol:
2565 case Squote:
2566 break;
2567 default:
2568 goto done;
2569 }
2570 INC_BOTH (from, from_byte);
2571 }
2572 goto done;
2573
2574 case Scomment_fence:
2575 comstyle = ST_COMMENT_STYLE;
2576 /* FALLTHROUGH */
2577 case Scomment:
2578 if (!parse_sexp_ignore_comments) break;
2579 UPDATE_SYNTAX_TABLE_FORWARD (from);
2580 found = forw_comment (from, from_byte, stop,
2581 comnested, comstyle, 0,
2582 &out_charpos, &out_bytepos, &dummy);
2583 from = out_charpos, from_byte = out_bytepos;
2584 if (!found)
2585 {
2586 if (depth == 0)
2587 goto done;
2588 goto lose;
2589 }
2590 INC_BOTH (from, from_byte);
2591 UPDATE_SYNTAX_TABLE_FORWARD (from);
2592 break;
2593
2594 case Smath:
2595 if (!sexpflag)
2596 break;
2597 if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (from_byte))
2598 {
2599 INC_BOTH (from, from_byte);
2600 }
2601 if (mathexit)
2602 {
2603 mathexit = 0;
2604 goto close1;
2605 }
2606 mathexit = 1;
2607
2608 case Sopen:
2609 if (!++depth) goto done;
2610 break;
2611
2612 case Sclose:
2613 close1:
2614 if (!--depth) goto done;
2615 if (depth < min_depth)
2616 xsignal3 (Qscan_error,
2617 build_string ("Containing expression ends prematurely"),
2618 make_number (last_good), make_number (from));
2619 break;
2620
2621 case Sstring:
2622 case Sstring_fence:
2623 temp_pos = dec_bytepos (from_byte);
2624 stringterm = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2625 while (1)
2626 {
2627 if (from >= stop)
2628 goto lose;
2629 UPDATE_SYNTAX_TABLE_FORWARD (from);
2630 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2631 if (code == Sstring
2632 ? (c == stringterm
2633 && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
2634 : SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring_fence)
2635 break;
2636
2637 /* Some compilers can't handle this inside the switch. */
2638 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2639 switch (temp)
2640 {
2641 case Scharquote:
2642 case Sescape:
2643 INC_BOTH (from, from_byte);
2644 }
2645 INC_BOTH (from, from_byte);
2646 }
2647 INC_BOTH (from, from_byte);
2648 if (!depth && sexpflag) goto done;
2649 break;
2650 default:
2651 /* Ignore whitespace, punctuation, quote, endcomment. */
2652 break;
2653 }
2654 }
2655
2656 /* Reached end of buffer. Error if within object, return nil if between */
2657 if (depth)
2658 goto lose;
2659
2660 immediate_quit = 0;
2661 return Qnil;
2662
2663 /* End of object reached */
2664 done:
2665 count--;
2666 }
2667
2668 while (count < 0)
2669 {
2670 while (from > stop)
2671 {
2672 int syntax;
2673 DEC_BOTH (from, from_byte);
2674 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2675 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2676 syntax= SYNTAX_WITH_FLAGS (c);
2677 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2678 if (depth == min_depth)
2679 last_good = from;
2680 comstyle = 0;
2681 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2682 if (code == Sendcomment)
2683 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2684 if (from > stop && SYNTAX_FLAGS_COMEND_SECOND (syntax)
2685 && prev_char_comend_first (from, from_byte)
2686 && parse_sexp_ignore_comments)
2687 {
2688 /* We must record the comment style encountered so that
2689 later, we can match only the proper comment begin
2690 sequence of the same style. */
2691 int c1, other_syntax;
2692 DEC_BOTH (from, from_byte);
2693 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2694 code = Sendcomment;
2695 c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2696 other_syntax = SYNTAX_WITH_FLAGS (c1);
2697 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2698 comnested
2699 = comnested || SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2700 }
2701
2702 /* Quoting turns anything except a comment-ender
2703 into a word character. Note that this cannot be true
2704 if we decremented FROM in the if-statement above. */
2705 if (code != Sendcomment && char_quoted (from, from_byte))
2706 {
2707 DEC_BOTH (from, from_byte);
2708 code = Sword;
2709 }
2710 else if (SYNTAX_FLAGS_PREFIX (syntax))
2711 continue;
2712
2713 switch (SWITCH_ENUM_CAST (code))
2714 {
2715 case Sword:
2716 case Ssymbol:
2717 case Sescape:
2718 case Scharquote:
2719 if (depth || !sexpflag) break;
2720 /* This word counts as a sexp; count object finished
2721 after passing it. */
2722 while (from > stop)
2723 {
2724 temp_pos = from_byte;
2725 if (! NILP (current_buffer->enable_multibyte_characters))
2726 DEC_POS (temp_pos);
2727 else
2728 temp_pos--;
2729 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2730 c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2731 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
2732 /* Don't allow comment-end to be quoted. */
2733 if (temp_code == Sendcomment)
2734 goto done2;
2735 quoted = char_quoted (from - 1, temp_pos);
2736 if (quoted)
2737 {
2738 DEC_BOTH (from, from_byte);
2739 temp_pos = dec_bytepos (temp_pos);
2740 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2741 }
2742 c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2743 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
2744 if (! (quoted || temp_code == Sword
2745 || temp_code == Ssymbol
2746 || temp_code == Squote))
2747 goto done2;
2748 DEC_BOTH (from, from_byte);
2749 }
2750 goto done2;
2751
2752 case Smath:
2753 if (!sexpflag)
2754 break;
2755 temp_pos = dec_bytepos (from_byte);
2756 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2757 if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (temp_pos))
2758 DEC_BOTH (from, from_byte);
2759 if (mathexit)
2760 {
2761 mathexit = 0;
2762 goto open2;
2763 }
2764 mathexit = 1;
2765
2766 case Sclose:
2767 if (!++depth) goto done2;
2768 break;
2769
2770 case Sopen:
2771 open2:
2772 if (!--depth) goto done2;
2773 if (depth < min_depth)
2774 xsignal3 (Qscan_error,
2775 build_string ("Containing expression ends prematurely"),
2776 make_number (last_good), make_number (from));
2777 break;
2778
2779 case Sendcomment:
2780 if (!parse_sexp_ignore_comments)
2781 break;
2782 found = back_comment (from, from_byte, stop, comnested, comstyle,
2783 &out_charpos, &out_bytepos);
2784 /* FIXME: if found == -1, then it really wasn't a comment-end.
2785 For single-char Sendcomment, we can't do much about it apart
2786 from skipping the char.
2787 For 2-char endcomments, we could try again, taking both
2788 chars as separate entities, but it's a lot of trouble
2789 for very little gain, so we don't bother either. -sm */
2790 if (found != -1)
2791 from = out_charpos, from_byte = out_bytepos;
2792 break;
2793
2794 case Scomment_fence:
2795 case Sstring_fence:
2796 while (1)
2797 {
2798 if (from == stop)
2799 goto lose;
2800 DEC_BOTH (from, from_byte);
2801 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2802 if (!char_quoted (from, from_byte)
2803 && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2804 SYNTAX_WITH_MULTIBYTE_CHECK (c) == code))
2805 break;
2806 }
2807 if (code == Sstring_fence && !depth && sexpflag) goto done2;
2808 break;
2809
2810 case Sstring:
2811 stringterm = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2812 while (1)
2813 {
2814 if (from == stop)
2815 goto lose;
2816 DEC_BOTH (from, from_byte);
2817 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2818 if (!char_quoted (from, from_byte)
2819 && (stringterm
2820 == (c = FETCH_CHAR_AS_MULTIBYTE (from_byte)))
2821 && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
2822 break;
2823 }
2824 if (!depth && sexpflag) goto done2;
2825 break;
2826 default:
2827 /* Ignore whitespace, punctuation, quote, endcomment. */
2828 break;
2829 }
2830 }
2831
2832 /* Reached start of buffer. Error if within object, return nil if between */
2833 if (depth)
2834 goto lose;
2835
2836 immediate_quit = 0;
2837 return Qnil;
2838
2839 done2:
2840 count++;
2841 }
2842
2843
2844 immediate_quit = 0;
2845 XSETFASTINT (val, from);
2846 return val;
2847
2848 lose:
2849 xsignal3 (Qscan_error,
2850 build_string ("Unbalanced parentheses"),
2851 make_number (last_good), make_number (from));
2852 }
2853
2854 DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
2855 doc: /* Scan from character number FROM by COUNT lists.
2856 Returns the character number of the position thus found.
2857
2858 If DEPTH is nonzero, paren depth begins counting from that value,
2859 only places where the depth in parentheses becomes zero
2860 are candidates for stopping; COUNT such places are counted.
2861 Thus, a positive value for DEPTH means go out levels.
2862
2863 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2864
2865 If the beginning or end of (the accessible part of) the buffer is reached
2866 and the depth is wrong, an error is signaled.
2867 If the depth is right but the count is not used up, nil is returned. */)
2868 (Lisp_Object from, Lisp_Object count, Lisp_Object depth)
2869 {
2870 CHECK_NUMBER (from);
2871 CHECK_NUMBER (count);
2872 CHECK_NUMBER (depth);
2873
2874 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
2875 }
2876
2877 DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
2878 doc: /* Scan from character number FROM by COUNT balanced expressions.
2879 If COUNT is negative, scan backwards.
2880 Returns the character number of the position thus found.
2881
2882 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2883
2884 If the beginning or end of (the accessible part of) the buffer is reached
2885 in the middle of a parenthetical grouping, an error is signaled.
2886 If the beginning or end is reached between groupings
2887 but before count is used up, nil is returned. */)
2888 (Lisp_Object from, Lisp_Object count)
2889 {
2890 CHECK_NUMBER (from);
2891 CHECK_NUMBER (count);
2892
2893 return scan_lists (XINT (from), XINT (count), 0, 1);
2894 }
2895
2896 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
2897 0, 0, 0,
2898 doc: /* Move point backward over any number of chars with prefix syntax.
2899 This includes chars with "quote" or "prefix" syntax (' or p). */)
2900 (void)
2901 {
2902 EMACS_INT beg = BEGV;
2903 EMACS_INT opoint = PT;
2904 EMACS_INT opoint_byte = PT_BYTE;
2905 EMACS_INT pos = PT;
2906 EMACS_INT pos_byte = PT_BYTE;
2907 int c;
2908
2909 if (pos <= beg)
2910 {
2911 SET_PT_BOTH (opoint, opoint_byte);
2912
2913 return Qnil;
2914 }
2915
2916 SETUP_SYNTAX_TABLE (pos, -1);
2917
2918 DEC_BOTH (pos, pos_byte);
2919
2920 while (!char_quoted (pos, pos_byte)
2921 /* Previous statement updates syntax table. */
2922 && ((c = FETCH_CHAR_AS_MULTIBYTE (pos_byte), SYNTAX (c) == Squote)
2923 || SYNTAX_PREFIX (c)))
2924 {
2925 opoint = pos;
2926 opoint_byte = pos_byte;
2927
2928 if (pos + 1 > beg)
2929 DEC_BOTH (pos, pos_byte);
2930 }
2931
2932 SET_PT_BOTH (opoint, opoint_byte);
2933
2934 return Qnil;
2935 }
2936 \f
2937 /* Parse forward from FROM / FROM_BYTE to END,
2938 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
2939 and return a description of the state of the parse at END.
2940 If STOPBEFORE is nonzero, stop at the start of an atom.
2941 If COMMENTSTOP is 1, stop at the start of a comment.
2942 If COMMENTSTOP is -1, stop at the start or end of a comment,
2943 after the beginning of a string, or after the end of a string. */
2944
2945 static void
2946 scan_sexps_forward (struct lisp_parse_state *stateptr,
2947 EMACS_INT from, EMACS_INT from_byte, EMACS_INT end,
2948 int targetdepth, int stopbefore,
2949 Lisp_Object oldstate, int commentstop)
2950 {
2951 struct lisp_parse_state state;
2952
2953 register enum syntaxcode code;
2954 int c1;
2955 int comnested;
2956 struct level { int last, prev; };
2957 struct level levelstart[100];
2958 register struct level *curlevel = levelstart;
2959 struct level *endlevel = levelstart + 100;
2960 register int depth; /* Paren depth of current scanning location.
2961 level - levelstart equals this except
2962 when the depth becomes negative. */
2963 int mindepth; /* Lowest DEPTH value seen. */
2964 int start_quoted = 0; /* Nonzero means starting after a char quote */
2965 Lisp_Object tem;
2966 EMACS_INT prev_from; /* Keep one character before FROM. */
2967 EMACS_INT prev_from_byte;
2968 int prev_from_syntax;
2969 int boundary_stop = commentstop == -1;
2970 int nofence;
2971 int found;
2972 EMACS_INT out_bytepos, out_charpos;
2973 int temp;
2974
2975 prev_from = from;
2976 prev_from_byte = from_byte;
2977 if (from != BEGV)
2978 DEC_BOTH (prev_from, prev_from_byte);
2979
2980 /* Use this macro instead of `from++'. */
2981 #define INC_FROM \
2982 do { prev_from = from; \
2983 prev_from_byte = from_byte; \
2984 temp = FETCH_CHAR_AS_MULTIBYTE (prev_from_byte); \
2985 prev_from_syntax = SYNTAX_WITH_FLAGS (temp); \
2986 INC_BOTH (from, from_byte); \
2987 if (from < end) \
2988 UPDATE_SYNTAX_TABLE_FORWARD (from); \
2989 } while (0)
2990
2991 immediate_quit = 1;
2992 QUIT;
2993
2994 if (NILP (oldstate))
2995 {
2996 depth = 0;
2997 state.instring = -1;
2998 state.incomment = 0;
2999 state.comstyle = 0; /* comment style a by default. */
3000 state.comstr_start = -1; /* no comment/string seen. */
3001 }
3002 else
3003 {
3004 tem = Fcar (oldstate);
3005 if (!NILP (tem))
3006 depth = XINT (tem);
3007 else
3008 depth = 0;
3009
3010 oldstate = Fcdr (oldstate);
3011 oldstate = Fcdr (oldstate);
3012 oldstate = Fcdr (oldstate);
3013 tem = Fcar (oldstate);
3014 /* Check whether we are inside string_fence-style string: */
3015 state.instring = (!NILP (tem)
3016 ? (INTEGERP (tem) ? XINT (tem) : ST_STRING_STYLE)
3017 : -1);
3018
3019 oldstate = Fcdr (oldstate);
3020 tem = Fcar (oldstate);
3021 state.incomment = (!NILP (tem)
3022 ? (INTEGERP (tem) ? XINT (tem) : -1)
3023 : 0);
3024
3025 oldstate = Fcdr (oldstate);
3026 tem = Fcar (oldstate);
3027 start_quoted = !NILP (tem);
3028
3029 /* if the eighth element of the list is nil, we are in comment
3030 style a. If it is non-nil, we are in comment style b */
3031 oldstate = Fcdr (oldstate);
3032 oldstate = Fcdr (oldstate);
3033 tem = Fcar (oldstate);
3034 state.comstyle = (NILP (tem)
3035 ? 0
3036 : (EQ (tem, Qsyntax_table)
3037 ? ST_COMMENT_STYLE
3038 : INTEGERP (tem) ? XINT (tem) : 1));
3039
3040 oldstate = Fcdr (oldstate);
3041 tem = Fcar (oldstate);
3042 state.comstr_start = NILP (tem) ? -1 : XINT (tem) ;
3043 oldstate = Fcdr (oldstate);
3044 tem = Fcar (oldstate);
3045 while (!NILP (tem)) /* >= second enclosing sexps. */
3046 {
3047 /* curlevel++->last ran into compiler bug on Apollo */
3048 curlevel->last = XINT (Fcar (tem));
3049 if (++curlevel == endlevel)
3050 curlevel--; /* error ("Nesting too deep for parser"); */
3051 curlevel->prev = -1;
3052 curlevel->last = -1;
3053 tem = Fcdr (tem);
3054 }
3055 }
3056 state.quoted = 0;
3057 mindepth = depth;
3058
3059 curlevel->prev = -1;
3060 curlevel->last = -1;
3061
3062 SETUP_SYNTAX_TABLE (prev_from, 1);
3063 temp = FETCH_CHAR (prev_from_byte);
3064 prev_from_syntax = SYNTAX_WITH_FLAGS (temp);
3065 UPDATE_SYNTAX_TABLE_FORWARD (from);
3066
3067 /* Enter the loop at a place appropriate for initial state. */
3068
3069 if (state.incomment)
3070 goto startincomment;
3071 if (state.instring >= 0)
3072 {
3073 nofence = state.instring != ST_STRING_STYLE;
3074 if (start_quoted)
3075 goto startquotedinstring;
3076 goto startinstring;
3077 }
3078 else if (start_quoted)
3079 goto startquoted;
3080
3081 while (from < end)
3082 {
3083 int syntax;
3084 INC_FROM;
3085 code = prev_from_syntax & 0xff;
3086
3087 if (from < end
3088 && SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax)
3089 && (c1 = FETCH_CHAR (from_byte),
3090 syntax = SYNTAX_WITH_FLAGS (c1),
3091 SYNTAX_FLAGS_COMSTART_SECOND (syntax)))
3092 /* Duplicate code to avoid a complex if-expression
3093 which causes trouble for the SGI compiler. */
3094 {
3095 /* Record the comment style we have entered so that only
3096 the comment-end sequence of the same style actually
3097 terminates the comment section. */
3098 state.comstyle
3099 = SYNTAX_FLAGS_COMMENT_STYLE (syntax, prev_from_syntax);
3100 comnested = SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax);
3101 comnested = comnested || SYNTAX_FLAGS_COMMENT_NESTED (syntax);
3102 state.incomment = comnested ? 1 : -1;
3103 state.comstr_start = prev_from;
3104 INC_FROM;
3105 code = Scomment;
3106 }
3107 else if (code == Scomment_fence)
3108 {
3109 /* Record the comment style we have entered so that only
3110 the comment-end sequence of the same style actually
3111 terminates the comment section. */
3112 state.comstyle = ST_COMMENT_STYLE;
3113 state.incomment = -1;
3114 state.comstr_start = prev_from;
3115 code = Scomment;
3116 }
3117 else if (code == Scomment)
3118 {
3119 state.comstyle = SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax, 0);
3120 state.incomment = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax) ?
3121 1 : -1);
3122 state.comstr_start = prev_from;
3123 }
3124
3125 if (SYNTAX_FLAGS_PREFIX (prev_from_syntax))
3126 continue;
3127 switch (SWITCH_ENUM_CAST (code))
3128 {
3129 case Sescape:
3130 case Scharquote:
3131 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3132 curlevel->last = prev_from;
3133 startquoted:
3134 if (from == end) goto endquoted;
3135 INC_FROM;
3136 goto symstarted;
3137 /* treat following character as a word constituent */
3138 case Sword:
3139 case Ssymbol:
3140 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3141 curlevel->last = prev_from;
3142 symstarted:
3143 while (from < end)
3144 {
3145 /* Some compilers can't handle this inside the switch. */
3146 temp = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3147 temp = SYNTAX (temp);
3148 switch (temp)
3149 {
3150 case Scharquote:
3151 case Sescape:
3152 INC_FROM;
3153 if (from == end) goto endquoted;
3154 break;
3155 case Sword:
3156 case Ssymbol:
3157 case Squote:
3158 break;
3159 default:
3160 goto symdone;
3161 }
3162 INC_FROM;
3163 }
3164 symdone:
3165 curlevel->prev = curlevel->last;
3166 break;
3167
3168 case Scomment_fence: /* Can't happen because it's handled above. */
3169 case Scomment:
3170 if (commentstop || boundary_stop) goto done;
3171 startincomment:
3172 /* The (from == BEGV) test was to enter the loop in the middle so
3173 that we find a 2-char comment ender even if we start in the
3174 middle of it. We don't want to do that if we're just at the
3175 beginning of the comment (think of (*) ... (*)). */
3176 found = forw_comment (from, from_byte, end,
3177 state.incomment, state.comstyle,
3178 (from == BEGV || from < state.comstr_start + 3)
3179 ? 0 : prev_from_syntax,
3180 &out_charpos, &out_bytepos, &state.incomment);
3181 from = out_charpos; from_byte = out_bytepos;
3182 /* Beware! prev_from and friends are invalid now.
3183 Luckily, the `done' doesn't use them and the INC_FROM
3184 sets them to a sane value without looking at them. */
3185 if (!found) goto done;
3186 INC_FROM;
3187 state.incomment = 0;
3188 state.comstyle = 0; /* reset the comment style */
3189 if (boundary_stop) goto done;
3190 break;
3191
3192 case Sopen:
3193 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3194 depth++;
3195 /* curlevel++->last ran into compiler bug on Apollo */
3196 curlevel->last = prev_from;
3197 if (++curlevel == endlevel)
3198 curlevel--; /* error ("Nesting too deep for parser"); */
3199 curlevel->prev = -1;
3200 curlevel->last = -1;
3201 if (targetdepth == depth) goto done;
3202 break;
3203
3204 case Sclose:
3205 depth--;
3206 if (depth < mindepth)
3207 mindepth = depth;
3208 if (curlevel != levelstart)
3209 curlevel--;
3210 curlevel->prev = curlevel->last;
3211 if (targetdepth == depth) goto done;
3212 break;
3213
3214 case Sstring:
3215 case Sstring_fence:
3216 state.comstr_start = from - 1;
3217 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3218 curlevel->last = prev_from;
3219 state.instring = (code == Sstring
3220 ? (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte))
3221 : ST_STRING_STYLE);
3222 if (boundary_stop) goto done;
3223 startinstring:
3224 {
3225 nofence = state.instring != ST_STRING_STYLE;
3226
3227 while (1)
3228 {
3229 int c;
3230
3231 if (from >= end) goto done;
3232 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3233 /* Some compilers can't handle this inside the switch. */
3234 temp = SYNTAX (c);
3235
3236 /* Check TEMP here so that if the char has
3237 a syntax-table property which says it is NOT
3238 a string character, it does not end the string. */
3239 if (nofence && c == state.instring && temp == Sstring)
3240 break;
3241
3242 switch (temp)
3243 {
3244 case Sstring_fence:
3245 if (!nofence) goto string_end;
3246 break;
3247 case Scharquote:
3248 case Sescape:
3249 INC_FROM;
3250 startquotedinstring:
3251 if (from >= end) goto endquoted;
3252 }
3253 INC_FROM;
3254 }
3255 }
3256 string_end:
3257 state.instring = -1;
3258 curlevel->prev = curlevel->last;
3259 INC_FROM;
3260 if (boundary_stop) goto done;
3261 break;
3262
3263 case Smath:
3264 /* FIXME: We should do something with it. */
3265 break;
3266 default:
3267 /* Ignore whitespace, punctuation, quote, endcomment. */
3268 break;
3269 }
3270 }
3271 goto done;
3272
3273 stop: /* Here if stopping before start of sexp. */
3274 from = prev_from; /* We have just fetched the char that starts it; */
3275 goto done; /* but return the position before it. */
3276
3277 endquoted:
3278 state.quoted = 1;
3279 done:
3280 state.depth = depth;
3281 state.mindepth = mindepth;
3282 state.thislevelstart = curlevel->prev;
3283 state.prevlevelstart
3284 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
3285 state.location = from;
3286 state.levelstarts = Qnil;
3287 while (--curlevel >= levelstart)
3288 state.levelstarts = Fcons (make_number (curlevel->last),
3289 state.levelstarts);
3290 immediate_quit = 0;
3291
3292 *stateptr = state;
3293 }
3294
3295 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
3296 doc: /* Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
3297 Parsing stops at TO or when certain criteria are met;
3298 point is set to where parsing stops.
3299 If fifth arg OLDSTATE is omitted or nil,
3300 parsing assumes that FROM is the beginning of a function.
3301 Value is a list of elements describing final state of parsing:
3302 0. depth in parens.
3303 1. character address of start of innermost containing list; nil if none.
3304 2. character address of start of last complete sexp terminated.
3305 3. non-nil if inside a string.
3306 (it is the character that will terminate the string,
3307 or t if the string should be terminated by a generic string delimiter.)
3308 4. nil if outside a comment, t if inside a non-nestable comment,
3309 else an integer (the current comment nesting).
3310 5. t if following a quote character.
3311 6. the minimum paren-depth encountered during this scan.
3312 7. style of comment, if any.
3313 8. character address of start of comment or string; nil if not in one.
3314 9. Intermediate data for continuation of parsing (subject to change).
3315 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
3316 in parentheses becomes equal to TARGETDEPTH.
3317 Fourth arg STOPBEFORE non-nil means stop when come to
3318 any character that starts a sexp.
3319 Fifth arg OLDSTATE is a list like what this function returns.
3320 It is used to initialize the state of the parse. Elements number 1, 2, 6
3321 and 8 are ignored.
3322 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.
3323 If it is symbol `syntax-table', stop after the start of a comment or a
3324 string, or after end of a comment or a string. */)
3325 (Lisp_Object from, Lisp_Object to, Lisp_Object targetdepth, Lisp_Object stopbefore, Lisp_Object oldstate, Lisp_Object commentstop)
3326 {
3327 struct lisp_parse_state state;
3328 int target;
3329
3330 if (!NILP (targetdepth))
3331 {
3332 CHECK_NUMBER (targetdepth);
3333 target = XINT (targetdepth);
3334 }
3335 else
3336 target = -100000; /* We won't reach this depth */
3337
3338 validate_region (&from, &to);
3339 scan_sexps_forward (&state, XINT (from), CHAR_TO_BYTE (XINT (from)),
3340 XINT (to),
3341 target, !NILP (stopbefore), oldstate,
3342 (NILP (commentstop)
3343 ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
3344
3345 SET_PT (state.location);
3346
3347 return Fcons (make_number (state.depth),
3348 Fcons (state.prevlevelstart < 0
3349 ? Qnil : make_number (state.prevlevelstart),
3350 Fcons (state.thislevelstart < 0
3351 ? Qnil : make_number (state.thislevelstart),
3352 Fcons (state.instring >= 0
3353 ? (state.instring == ST_STRING_STYLE
3354 ? Qt : make_number (state.instring)) : Qnil,
3355 Fcons (state.incomment < 0 ? Qt :
3356 (state.incomment == 0 ? Qnil :
3357 make_number (state.incomment)),
3358 Fcons (state.quoted ? Qt : Qnil,
3359 Fcons (make_number (state.mindepth),
3360 Fcons ((state.comstyle
3361 ? (state.comstyle == ST_COMMENT_STYLE
3362 ? Qsyntax_table
3363 : make_number (state.comstyle))
3364 : Qnil),
3365 Fcons (((state.incomment
3366 || (state.instring >= 0))
3367 ? make_number (state.comstr_start)
3368 : Qnil),
3369 Fcons (state.levelstarts, Qnil))))))))));
3370 }
3371 \f
3372 void
3373 init_syntax_once (void)
3374 {
3375 register int i, c;
3376 Lisp_Object temp;
3377
3378 /* This has to be done here, before we call Fmake_char_table. */
3379 Qsyntax_table = intern_c_string ("syntax-table");
3380 staticpro (&Qsyntax_table);
3381
3382 /* Intern_C_String this now in case it isn't already done.
3383 Setting this variable twice is harmless.
3384 But don't staticpro it here--that is done in alloc.c. */
3385 Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
3386
3387 /* Create objects which can be shared among syntax tables. */
3388 Vsyntax_code_object = Fmake_vector (make_number (Smax), Qnil);
3389 for (i = 0; i < XVECTOR (Vsyntax_code_object)->size; i++)
3390 XVECTOR (Vsyntax_code_object)->contents[i]
3391 = Fcons (make_number (i), Qnil);
3392
3393 /* Now we are ready to set up this property, so we can
3394 create syntax tables. */
3395 Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
3396
3397 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
3398
3399 Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
3400
3401 /* Control characters should not be whitespace. */
3402 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
3403 for (i = 0; i <= ' ' - 1; i++)
3404 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3405 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 0177, temp);
3406
3407 /* Except that a few really are whitespace. */
3408 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
3409 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ' ', temp);
3410 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\t', temp);
3411 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\n', temp);
3412 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 015, temp);
3413 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 014, temp);
3414
3415 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
3416 for (i = 'a'; i <= 'z'; i++)
3417 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3418 for (i = 'A'; i <= 'Z'; i++)
3419 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3420 for (i = '0'; i <= '9'; i++)
3421 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3422
3423 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
3424 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
3425
3426 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
3427 Fcons (make_number (Sopen), make_number (')')));
3428 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
3429 Fcons (make_number (Sclose), make_number ('(')));
3430 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
3431 Fcons (make_number (Sopen), make_number (']')));
3432 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
3433 Fcons (make_number (Sclose), make_number ('[')));
3434 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
3435 Fcons (make_number (Sopen), make_number ('}')));
3436 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
3437 Fcons (make_number (Sclose), make_number ('{')));
3438 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
3439 Fcons (make_number ((int) Sstring), Qnil));
3440 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
3441 Fcons (make_number ((int) Sescape), Qnil));
3442
3443 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Ssymbol];
3444 for (i = 0; i < 10; i++)
3445 {
3446 c = "_-+*/&|<>="[i];
3447 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3448 }
3449
3450 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
3451 for (i = 0; i < 12; i++)
3452 {
3453 c = ".,;:?!#@~^'`"[i];
3454 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3455 }
3456
3457 /* All multibyte characters have syntax `word' by default. */
3458 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
3459 char_table_set_range (Vstandard_syntax_table, 0x80, MAX_CHAR, temp);
3460 }
3461
3462 void
3463 syms_of_syntax (void)
3464 {
3465 Qsyntax_table_p = intern_c_string ("syntax-table-p");
3466 staticpro (&Qsyntax_table_p);
3467
3468 staticpro (&Vsyntax_code_object);
3469
3470 staticpro (&gl_state.object);
3471 staticpro (&gl_state.global_code);
3472 staticpro (&gl_state.current_syntax_table);
3473 staticpro (&gl_state.old_prop);
3474
3475 /* Defined in regex.c */
3476 staticpro (&re_match_object);
3477
3478 Qscan_error = intern_c_string ("scan-error");
3479 staticpro (&Qscan_error);
3480 Fput (Qscan_error, Qerror_conditions,
3481 pure_cons (Qscan_error, pure_cons (Qerror, Qnil)));
3482 Fput (Qscan_error, Qerror_message,
3483 make_pure_c_string ("Scan error"));
3484
3485 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments,
3486 doc: /* Non-nil means `forward-sexp', etc., should treat comments as whitespace. */);
3487
3488 DEFVAR_BOOL ("parse-sexp-lookup-properties", &parse_sexp_lookup_properties,
3489 doc: /* Non-nil means `forward-sexp', etc., obey `syntax-table' property.
3490 Otherwise, that text property is simply ignored.
3491 See the info node `(elisp)Syntax Properties' for a description of the
3492 `syntax-table' property. */);
3493
3494 words_include_escapes = 0;
3495 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes,
3496 doc: /* Non-nil means `forward-word', etc., should treat escape chars part of words. */);
3497
3498 DEFVAR_BOOL ("multibyte-syntax-as-symbol", &multibyte_syntax_as_symbol,
3499 doc: /* Non-nil means `scan-sexps' treats all multibyte characters as symbol. */);
3500 multibyte_syntax_as_symbol = 0;
3501
3502 DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
3503 &open_paren_in_column_0_is_defun_start,
3504 doc: /* *Non-nil means an open paren in column 0 denotes the start of a defun. */);
3505 open_paren_in_column_0_is_defun_start = 1;
3506
3507
3508 DEFVAR_LISP ("find-word-boundary-function-table",
3509 &Vfind_word_boundary_function_table,
3510 doc: /*
3511 Char table of functions to search for the word boundary.
3512 Each function is called with two arguments; POS and LIMIT.
3513 POS and LIMIT are character positions in the current buffer.
3514
3515 If POS is less than LIMIT, POS is at the first character of a word,
3516 and the return value of a function is a position after the last
3517 character of that word.
3518
3519 If POS is not less than LIMIT, POS is at the last character of a word,
3520 and the return value of a function is a position at the first
3521 character of that word.
3522
3523 In both cases, LIMIT bounds the search. */);
3524 Vfind_word_boundary_function_table = Fmake_char_table (Qnil, Qnil);
3525
3526 defsubr (&Ssyntax_table_p);
3527 defsubr (&Ssyntax_table);
3528 defsubr (&Sstandard_syntax_table);
3529 defsubr (&Scopy_syntax_table);
3530 defsubr (&Sset_syntax_table);
3531 defsubr (&Schar_syntax);
3532 defsubr (&Smatching_paren);
3533 defsubr (&Sstring_to_syntax);
3534 defsubr (&Smodify_syntax_entry);
3535 defsubr (&Sinternal_describe_syntax_value);
3536
3537 defsubr (&Sforward_word);
3538
3539 defsubr (&Sskip_chars_forward);
3540 defsubr (&Sskip_chars_backward);
3541 defsubr (&Sskip_syntax_forward);
3542 defsubr (&Sskip_syntax_backward);
3543
3544 defsubr (&Sforward_comment);
3545 defsubr (&Sscan_lists);
3546 defsubr (&Sscan_sexps);
3547 defsubr (&Sbackward_prefix_chars);
3548 defsubr (&Sparse_partial_sexp);
3549 }
3550
3551 /* arch-tag: 3e297b9f-088e-4b64-8f4c-fb0b3443e412
3552 (do not change this comment) */