f2451332b195f0452a567ddd10f75082f9fbe84b
[bpt/emacs.git] / src / syntax.c
1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985, 1987, 1993-1995, 1997-1999, 2001-2014 Free
3 Software Foundation, Inc.
4
5 This file is part of GNU Emacs.
6
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
19
20
21 #include <config.h>
22
23 #include <sys/types.h>
24
25 #include "lisp.h"
26 #include "commands.h"
27 #include "character.h"
28 #include "buffer.h"
29 #include "keymap.h"
30 #include "regex.h"
31
32 #include "syntax.h"
33 #include "intervals.h"
34 #include "category.h"
35
36 /* Make syntax table lookup grant data in gl_state. */
37 #define SYNTAX(c) syntax_property (c, 1)
38 #define SYNTAX_ENTRY(c) syntax_property_entry (c, 1)
39 #define SYNTAX_WITH_FLAGS(c) syntax_property_with_flags (c, 1)
40
41 /* Eight single-bit flags have the following meanings:
42 1. This character is the first of a two-character comment-start sequence.
43 2. This character is the second of a two-character comment-start sequence.
44 3. This character is the first of a two-character comment-end sequence.
45 4. This character is the second of a two-character comment-end sequence.
46 5. This character is a prefix, for backward-prefix-chars.
47 6. The char is part of a delimiter for comments of style "b".
48 7. This character is part of a nestable comment sequence.
49 8. The char is part of a delimiter for comments of style "c".
50 Note that any two-character sequence whose first character has flag 1
51 and whose second character has flag 2 will be interpreted as a comment start.
52
53 Bits 6 and 8 discriminate among different comment styles.
54 Languages such as C++ allow two orthogonal syntax start/end pairs
55 and bit 6 determines whether a comment-end or Scommentend
56 ends style a or b. Comment markers can start style a, b, c, or bc.
57 Style a is always the default.
58 For 2-char comment markers, the style b flag is looked up only on the second
59 char of the comment marker and on the first char of the comment ender.
60 For style c (like the nested flag), the flag can be placed on any of
61 the chars. */
62
63 /* These functions extract specific flags from an integer
64 that holds the syntax code and the flags. */
65
66 static bool
67 SYNTAX_FLAGS_COMSTART_FIRST (int flags)
68 {
69 return (flags >> 16) & 1;
70 }
71 static bool
72 SYNTAX_FLAGS_COMSTART_SECOND (int flags)
73 {
74 return (flags >> 17) & 1;
75 }
76 static bool
77 SYNTAX_FLAGS_COMEND_FIRST (int flags)
78 {
79 return (flags >> 18) & 1;
80 }
81 static bool
82 SYNTAX_FLAGS_COMEND_SECOND (int flags)
83 {
84 return (flags >> 19) & 1;
85 }
86 static bool
87 SYNTAX_FLAGS_PREFIX (int flags)
88 {
89 return (flags >> 20) & 1;
90 }
91 static bool
92 SYNTAX_FLAGS_COMMENT_STYLEB (int flags)
93 {
94 return (flags >> 21) & 1;
95 }
96 static bool
97 SYNTAX_FLAGS_COMMENT_STYLEC (int flags)
98 {
99 return (flags >> 23) & 1;
100 }
101 static int
102 SYNTAX_FLAGS_COMMENT_STYLEC2 (int flags)
103 {
104 return (flags >> 22) & 2; /* SYNTAX_FLAGS_COMMENT_STYLEC (flags) * 2 */
105 }
106 static bool
107 SYNTAX_FLAGS_COMMENT_NESTED (int flags)
108 {
109 return (flags >> 22) & 1;
110 }
111
112 /* FLAGS should be the flags of the main char of the comment marker, e.g.
113 the second for comstart and the first for comend. */
114 static int
115 SYNTAX_FLAGS_COMMENT_STYLE (int flags, int other_flags)
116 {
117 return (SYNTAX_FLAGS_COMMENT_STYLEB (flags)
118 | SYNTAX_FLAGS_COMMENT_STYLEC2 (flags)
119 | SYNTAX_FLAGS_COMMENT_STYLEC2 (other_flags));
120 }
121
122 /* Extract a particular flag for a given character. */
123
124 static bool
125 SYNTAX_COMEND_FIRST (int c)
126 {
127 return SYNTAX_FLAGS_COMEND_FIRST (SYNTAX_WITH_FLAGS (c));
128 }
129
130 /* We use these constants in place for comment-style and
131 string-ender-char to distinguish comments/strings started by
132 comment_fence and string_fence codes. */
133
134 enum
135 {
136 ST_COMMENT_STYLE = 256 + 1,
137 ST_STRING_STYLE = 256 + 2
138 };
139
140 static Lisp_Object Qsyntax_table_p;
141 static Lisp_Object Qsyntax_table, Qscan_error;
142
143 /* This is the internal form of the parse state used in parse-partial-sexp. */
144
145 struct lisp_parse_state
146 {
147 EMACS_INT depth; /* Depth at end of parsing. */
148 int instring; /* -1 if not within string, else desired terminator. */
149 EMACS_INT incomment; /* -1 if in unnestable comment else comment nesting */
150 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
151 bool quoted; /* True if just after an escape char at end of parsing. */
152 EMACS_INT mindepth; /* Minimum depth seen while scanning. */
153 /* Char number of most recent start-of-expression at current level */
154 ptrdiff_t thislevelstart;
155 /* Char number of start of containing expression */
156 ptrdiff_t prevlevelstart;
157 ptrdiff_t location; /* Char number at which parsing stopped. */
158 ptrdiff_t location_byte; /* Corresponding byte position. */
159 ptrdiff_t comstr_start; /* Position of last comment/string starter. */
160 Lisp_Object levelstarts; /* Char numbers of starts-of-expression
161 of levels (starting from outermost). */
162 };
163 \f
164 /* These variables are a cache for finding the start of a defun.
165 find_start_pos is the place for which the defun start was found.
166 find_start_value is the defun start position found for it.
167 find_start_value_byte is the corresponding byte position.
168 find_start_buffer is the buffer it was found in.
169 find_start_begv is the BEGV value when it was found.
170 find_start_modiff is the value of MODIFF when it was found. */
171
172 static ptrdiff_t find_start_pos;
173 static ptrdiff_t find_start_value;
174 static ptrdiff_t find_start_value_byte;
175 static struct buffer *find_start_buffer;
176 static ptrdiff_t find_start_begv;
177 static EMACS_INT find_start_modiff;
178
179
180 static Lisp_Object skip_chars (bool, Lisp_Object, Lisp_Object, bool);
181 static Lisp_Object skip_syntaxes (bool, Lisp_Object, Lisp_Object);
182 static Lisp_Object scan_lists (EMACS_INT, EMACS_INT, EMACS_INT, bool);
183 static void scan_sexps_forward (struct lisp_parse_state *,
184 ptrdiff_t, ptrdiff_t, ptrdiff_t, EMACS_INT,
185 bool, Lisp_Object, int);
186 static bool in_classes (int, Lisp_Object);
187
188 /* This setter is used only in this file, so it can be private. */
189 static void
190 bset_syntax_table (struct buffer *b, Lisp_Object val)
191 {
192 b->INTERNAL_FIELD (syntax_table) = val;
193 }
194 \f
195 /* Whether the syntax of the character C has the prefix flag set. */
196 bool
197 syntax_prefix_flag_p (int c)
198 {
199 return SYNTAX_FLAGS_PREFIX (SYNTAX_WITH_FLAGS (c));
200 }
201
202 struct gl_state_s gl_state; /* Global state of syntax parser. */
203
204 enum { INTERVALS_AT_ONCE = 10 }; /* 1 + max-number of intervals
205 to scan to property-change. */
206
207 /* Set the syntax entry VAL for char C in table TABLE. */
208
209 static void
210 SET_RAW_SYNTAX_ENTRY (Lisp_Object table, int c, Lisp_Object val)
211 {
212 CHAR_TABLE_SET (table, c, val);
213 }
214
215 /* Set the syntax entry VAL for char-range RANGE in table TABLE.
216 RANGE is a cons (FROM . TO) specifying the range of characters. */
217
218 static void
219 SET_RAW_SYNTAX_ENTRY_RANGE (Lisp_Object table, Lisp_Object range,
220 Lisp_Object val)
221 {
222 Fset_char_table_range (table, range, val);
223 }
224
225 /* Extract the information from the entry for character C
226 in the current syntax table. */
227
228 static Lisp_Object
229 SYNTAX_MATCH (int c)
230 {
231 Lisp_Object ent = SYNTAX_ENTRY (c);
232 return CONSP (ent) ? XCDR (ent) : Qnil;
233 }
234
235 /* This should be called with FROM at the start of forward
236 search, or after the last position of the backward search. It
237 makes sure that the first char is picked up with correct table, so
238 one does not need to call UPDATE_SYNTAX_TABLE immediately after the
239 call.
240 Sign of COUNT gives the direction of the search.
241 */
242
243 static void
244 SETUP_SYNTAX_TABLE (ptrdiff_t from, ptrdiff_t count)
245 {
246 SETUP_BUFFER_SYNTAX_TABLE ();
247 gl_state.b_property = BEGV;
248 gl_state.e_property = ZV + 1;
249 gl_state.object = Qnil;
250 gl_state.offset = 0;
251 if (parse_sexp_lookup_properties)
252 if (count > 0 || from > BEGV)
253 update_syntax_table (count > 0 ? from : from - 1, count, 1, Qnil);
254 }
255
256 /* Same as above, but in OBJECT. If OBJECT is nil, use current buffer.
257 If it is t (which is only used in fast_c_string_match_ignore_case),
258 ignore properties altogether.
259
260 This is meant for regex.c to use. For buffers, regex.c passes arguments
261 to the UPDATE_SYNTAX_TABLE functions which are relative to BEGV.
262 So if it is a buffer, we set the offset field to BEGV. */
263
264 void
265 SETUP_SYNTAX_TABLE_FOR_OBJECT (Lisp_Object object,
266 ptrdiff_t from, ptrdiff_t count)
267 {
268 SETUP_BUFFER_SYNTAX_TABLE ();
269 gl_state.object = object;
270 if (BUFFERP (gl_state.object))
271 {
272 struct buffer *buf = XBUFFER (gl_state.object);
273 gl_state.b_property = 1;
274 gl_state.e_property = BUF_ZV (buf) - BUF_BEGV (buf) + 1;
275 gl_state.offset = BUF_BEGV (buf) - 1;
276 }
277 else if (NILP (gl_state.object))
278 {
279 gl_state.b_property = 1;
280 gl_state.e_property = ZV - BEGV + 1;
281 gl_state.offset = BEGV - 1;
282 }
283 else if (EQ (gl_state.object, Qt))
284 {
285 gl_state.b_property = 0;
286 gl_state.e_property = PTRDIFF_MAX;
287 gl_state.offset = 0;
288 }
289 else
290 {
291 gl_state.b_property = 0;
292 gl_state.e_property = 1 + SCHARS (gl_state.object);
293 gl_state.offset = 0;
294 }
295 if (parse_sexp_lookup_properties)
296 update_syntax_table (from + gl_state.offset - (count <= 0),
297 count, 1, gl_state.object);
298 }
299
300 /* Update gl_state to an appropriate interval which contains CHARPOS. The
301 sign of COUNT give the relative position of CHARPOS wrt the previously
302 valid interval. If INIT, only [be]_property fields of gl_state are
303 valid at start, the rest is filled basing on OBJECT.
304
305 `gl_state.*_i' are the intervals, and CHARPOS is further in the search
306 direction than the intervals - or in an interval. We update the
307 current syntax-table basing on the property of this interval, and
308 update the interval to start further than CHARPOS - or be
309 NULL. We also update lim_property to be the next value of
310 charpos to call this subroutine again - or be before/after the
311 start/end of OBJECT. */
312
313 void
314 update_syntax_table (ptrdiff_t charpos, EMACS_INT count, bool init,
315 Lisp_Object object)
316 {
317 Lisp_Object tmp_table;
318 int cnt = 0;
319 bool invalidate = 1;
320 INTERVAL i;
321
322 if (init)
323 {
324 gl_state.old_prop = Qnil;
325 gl_state.start = gl_state.b_property;
326 gl_state.stop = gl_state.e_property;
327 i = interval_of (charpos, object);
328 gl_state.backward_i = gl_state.forward_i = i;
329 invalidate = 0;
330 if (!i)
331 return;
332 /* interval_of updates only ->position of the return value, so
333 update the parents manually to speed up update_interval. */
334 while (!NULL_PARENT (i))
335 {
336 if (AM_RIGHT_CHILD (i))
337 INTERVAL_PARENT (i)->position = i->position
338 - LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */
339 - TOTAL_LENGTH (INTERVAL_PARENT (i))
340 + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i));
341 else
342 INTERVAL_PARENT (i)->position = i->position - LEFT_TOTAL_LENGTH (i)
343 + TOTAL_LENGTH (i);
344 i = INTERVAL_PARENT (i);
345 }
346 i = gl_state.forward_i;
347 gl_state.b_property = i->position - gl_state.offset;
348 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
349 goto update;
350 }
351 i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
352
353 /* We are guaranteed to be called with CHARPOS either in i,
354 or further off. */
355 if (!i)
356 error ("Error in syntax_table logic for to-the-end intervals");
357 else if (charpos < i->position) /* Move left. */
358 {
359 if (count > 0)
360 error ("Error in syntax_table logic for intervals <-");
361 /* Update the interval. */
362 i = update_interval (i, charpos);
363 if (INTERVAL_LAST_POS (i) != gl_state.b_property)
364 {
365 invalidate = 0;
366 gl_state.forward_i = i;
367 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
368 }
369 }
370 else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right. */
371 {
372 if (count < 0)
373 error ("Error in syntax_table logic for intervals ->");
374 /* Update the interval. */
375 i = update_interval (i, charpos);
376 if (i->position != gl_state.e_property)
377 {
378 invalidate = 0;
379 gl_state.backward_i = i;
380 gl_state.b_property = i->position - gl_state.offset;
381 }
382 }
383
384 update:
385 tmp_table = textget (i->plist, Qsyntax_table);
386
387 if (invalidate)
388 invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
389
390 if (invalidate) /* Did not get to adjacent interval. */
391 { /* with the same table => */
392 /* invalidate the old range. */
393 if (count > 0)
394 {
395 gl_state.backward_i = i;
396 gl_state.b_property = i->position - gl_state.offset;
397 }
398 else
399 {
400 gl_state.forward_i = i;
401 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
402 }
403 }
404
405 if (!EQ (tmp_table, gl_state.old_prop))
406 {
407 gl_state.current_syntax_table = tmp_table;
408 gl_state.old_prop = tmp_table;
409 if (EQ (Fsyntax_table_p (tmp_table), Qt))
410 {
411 gl_state.use_global = 0;
412 }
413 else if (CONSP (tmp_table))
414 {
415 gl_state.use_global = 1;
416 gl_state.global_code = tmp_table;
417 }
418 else
419 {
420 gl_state.use_global = 0;
421 gl_state.current_syntax_table = BVAR (current_buffer, syntax_table);
422 }
423 }
424
425 while (i)
426 {
427 if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
428 {
429 if (count > 0)
430 {
431 gl_state.e_property = i->position - gl_state.offset;
432 gl_state.forward_i = i;
433 }
434 else
435 {
436 gl_state.b_property
437 = i->position + LENGTH (i) - gl_state.offset;
438 gl_state.backward_i = i;
439 }
440 return;
441 }
442 else if (cnt == INTERVALS_AT_ONCE)
443 {
444 if (count > 0)
445 {
446 gl_state.e_property
447 = i->position + LENGTH (i) - gl_state.offset
448 /* e_property at EOB is not set to ZV but to ZV+1, so that
449 we can do INC(from);UPDATE_SYNTAX_TABLE_FORWARD without
450 having to check eob between the two. */
451 + (next_interval (i) ? 0 : 1);
452 gl_state.forward_i = i;
453 }
454 else
455 {
456 gl_state.b_property = i->position - gl_state.offset;
457 gl_state.backward_i = i;
458 }
459 return;
460 }
461 cnt++;
462 i = count > 0 ? next_interval (i) : previous_interval (i);
463 }
464 eassert (i == NULL); /* This property goes to the end. */
465 if (count > 0)
466 gl_state.e_property = gl_state.stop;
467 else
468 gl_state.b_property = gl_state.start;
469 }
470 \f
471 /* Returns true if char at CHARPOS is quoted.
472 Global syntax-table data should be set up already to be good at CHARPOS
473 or after. On return global syntax data is good for lookup at CHARPOS. */
474
475 static bool
476 char_quoted (ptrdiff_t charpos, ptrdiff_t bytepos)
477 {
478 enum syntaxcode code;
479 ptrdiff_t beg = BEGV;
480 bool quoted = 0;
481 ptrdiff_t orig = charpos;
482
483 while (charpos > beg)
484 {
485 int c;
486 DEC_BOTH (charpos, bytepos);
487
488 UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
489 c = FETCH_CHAR_AS_MULTIBYTE (bytepos);
490 code = SYNTAX (c);
491 if (! (code == Scharquote || code == Sescape))
492 break;
493
494 quoted = !quoted;
495 }
496
497 UPDATE_SYNTAX_TABLE (orig);
498 return quoted;
499 }
500
501 /* Return the bytepos one character before BYTEPOS.
502 We assume that BYTEPOS is not at the start of the buffer. */
503
504 static ptrdiff_t
505 dec_bytepos (ptrdiff_t bytepos)
506 {
507 if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
508 return bytepos - 1;
509
510 DEC_POS (bytepos);
511 return bytepos;
512 }
513 \f
514 /* Return a defun-start position before POS and not too far before.
515 It should be the last one before POS, or nearly the last.
516
517 When open_paren_in_column_0_is_defun_start is nonzero,
518 only the beginning of the buffer is treated as a defun-start.
519
520 We record the information about where the scan started
521 and what its result was, so that another call in the same area
522 can return the same value very quickly.
523
524 There is no promise at which position the global syntax data is
525 valid on return from the subroutine, so the caller should explicitly
526 update the global data. */
527
528 static ptrdiff_t
529 find_defun_start (ptrdiff_t pos, ptrdiff_t pos_byte)
530 {
531 ptrdiff_t opoint = PT, opoint_byte = PT_BYTE;
532
533 if (!open_paren_in_column_0_is_defun_start)
534 {
535 find_start_value = BEGV;
536 find_start_value_byte = BEGV_BYTE;
537 find_start_buffer = current_buffer;
538 find_start_modiff = MODIFF;
539 find_start_begv = BEGV;
540 find_start_pos = pos;
541 return BEGV;
542 }
543
544 /* Use previous finding, if it's valid and applies to this inquiry. */
545 if (current_buffer == find_start_buffer
546 /* Reuse the defun-start even if POS is a little farther on.
547 POS might be in the next defun, but that's ok.
548 Our value may not be the best possible, but will still be usable. */
549 && pos <= find_start_pos + 1000
550 && pos >= find_start_value
551 && BEGV == find_start_begv
552 && MODIFF == find_start_modiff)
553 return find_start_value;
554
555 /* Back up to start of line. */
556 scan_newline (pos, pos_byte, BEGV, BEGV_BYTE, -1, 1);
557
558 /* We optimize syntax-table lookup for rare updates. Thus we accept
559 only those `^\s(' which are good in global _and_ text-property
560 syntax-tables. */
561 SETUP_BUFFER_SYNTAX_TABLE ();
562 while (PT > BEGV)
563 {
564 int c;
565
566 /* Open-paren at start of line means we may have found our
567 defun-start. */
568 c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
569 if (SYNTAX (c) == Sopen)
570 {
571 SETUP_SYNTAX_TABLE (PT + 1, -1); /* Try again... */
572 c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
573 if (SYNTAX (c) == Sopen)
574 break;
575 /* Now fallback to the default value. */
576 SETUP_BUFFER_SYNTAX_TABLE ();
577 }
578 /* Move to beg of previous line. */
579 scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
580 }
581
582 /* Record what we found, for the next try. */
583 find_start_value = PT;
584 find_start_value_byte = PT_BYTE;
585 find_start_buffer = current_buffer;
586 find_start_modiff = MODIFF;
587 find_start_begv = BEGV;
588 find_start_pos = pos;
589
590 TEMP_SET_PT_BOTH (opoint, opoint_byte);
591
592 return find_start_value;
593 }
594 \f
595 /* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE. */
596
597 static bool
598 prev_char_comend_first (ptrdiff_t pos, ptrdiff_t pos_byte)
599 {
600 int c;
601 bool val;
602
603 DEC_BOTH (pos, pos_byte);
604 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
605 c = FETCH_CHAR (pos_byte);
606 val = SYNTAX_COMEND_FIRST (c);
607 UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
608 return val;
609 }
610
611 /* Check whether charpos FROM is at the end of a comment.
612 FROM_BYTE is the bytepos corresponding to FROM.
613 Do not move back before STOP.
614
615 Return true if we find a comment ending at FROM/FROM_BYTE.
616
617 If successful, store the charpos of the comment's beginning
618 into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
619
620 Global syntax data remains valid for backward search starting at
621 the returned value (or at FROM, if the search was not successful). */
622
623 static bool
624 back_comment (ptrdiff_t from, ptrdiff_t from_byte, ptrdiff_t stop,
625 bool comnested, int comstyle, ptrdiff_t *charpos_ptr,
626 ptrdiff_t *bytepos_ptr)
627 {
628 /* Look back, counting the parity of string-quotes,
629 and recording the comment-starters seen.
630 When we reach a safe place, assume that's not in a string;
631 then step the main scan to the earliest comment-starter seen
632 an even number of string quotes away from the safe place.
633
634 OFROM[I] is position of the earliest comment-starter seen
635 which is I+2X quotes from the comment-end.
636 PARITY is current parity of quotes from the comment end. */
637 int string_style = -1; /* Presumed outside of any string. */
638 bool string_lossage = 0;
639 /* Not a real lossage: indicates that we have passed a matching comment
640 starter plus a non-matching comment-ender, meaning that any matching
641 comment-starter we might see later could be a false positive (hidden
642 inside another comment).
643 Test case: { a (* b } c (* d *) */
644 bool comment_lossage = 0;
645 ptrdiff_t comment_end = from;
646 ptrdiff_t comment_end_byte = from_byte;
647 ptrdiff_t comstart_pos = 0;
648 ptrdiff_t comstart_byte IF_LINT (= 0);
649 /* Place where the containing defun starts,
650 or 0 if we didn't come across it yet. */
651 ptrdiff_t defun_start = 0;
652 ptrdiff_t defun_start_byte = 0;
653 enum syntaxcode code;
654 ptrdiff_t nesting = 1; /* current comment nesting */
655 int c;
656 int syntax = 0;
657
658 /* FIXME: A }} comment-ender style leads to incorrect behavior
659 in the case of {{ c }}} because we ignore the last two chars which are
660 assumed to be comment-enders although they aren't. */
661
662 /* At beginning of range to scan, we're outside of strings;
663 that determines quote parity to the comment-end. */
664 while (from != stop)
665 {
666 ptrdiff_t temp_byte;
667 int prev_syntax;
668 bool com2start, com2end, comstart;
669
670 /* Move back and examine a character. */
671 DEC_BOTH (from, from_byte);
672 UPDATE_SYNTAX_TABLE_BACKWARD (from);
673
674 prev_syntax = syntax;
675 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
676 syntax = SYNTAX_WITH_FLAGS (c);
677 code = SYNTAX (c);
678
679 /* Check for 2-char comment markers. */
680 com2start = (SYNTAX_FLAGS_COMSTART_FIRST (syntax)
681 && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax)
682 && (comstyle
683 == SYNTAX_FLAGS_COMMENT_STYLE (prev_syntax, syntax))
684 && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax)
685 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested);
686 com2end = (SYNTAX_FLAGS_COMEND_FIRST (syntax)
687 && SYNTAX_FLAGS_COMEND_SECOND (prev_syntax));
688 comstart = (com2start || code == Scomment);
689
690 /* Nasty cases with overlapping 2-char comment markers:
691 - snmp-mode: -- c -- foo -- c --
692 --- c --
693 ------ c --
694 - c-mode: *||*
695 |* *|* *|
696 |*| |* |*|
697 /// */
698
699 /* If a 2-char comment sequence partly overlaps with another,
700 we don't try to be clever. E.g. |*| in C, or }% in modes that
701 have %..\n and %{..}%. */
702 if (from > stop && (com2end || comstart))
703 {
704 ptrdiff_t next = from, next_byte = from_byte;
705 int next_c, next_syntax;
706 DEC_BOTH (next, next_byte);
707 UPDATE_SYNTAX_TABLE_BACKWARD (next);
708 next_c = FETCH_CHAR_AS_MULTIBYTE (next_byte);
709 next_syntax = SYNTAX_WITH_FLAGS (next_c);
710 if (((comstart || comnested)
711 && SYNTAX_FLAGS_COMEND_SECOND (syntax)
712 && SYNTAX_FLAGS_COMEND_FIRST (next_syntax))
713 || ((com2end || comnested)
714 && SYNTAX_FLAGS_COMSTART_SECOND (syntax)
715 && (comstyle
716 == SYNTAX_FLAGS_COMMENT_STYLE (syntax, prev_syntax))
717 && SYNTAX_FLAGS_COMSTART_FIRST (next_syntax)))
718 goto lossage;
719 /* UPDATE_SYNTAX_TABLE_FORWARD (next + 1); */
720 }
721
722 if (com2start && comstart_pos == 0)
723 /* We're looking at a comment starter. But it might be a comment
724 ender as well (see snmp-mode). The first time we see one, we
725 need to consider it as a comment starter,
726 and the subsequent times as a comment ender. */
727 com2end = 0;
728
729 /* Turn a 2-char comment sequences into the appropriate syntax. */
730 if (com2end)
731 code = Sendcomment;
732 else if (com2start)
733 code = Scomment;
734 /* Ignore comment starters of a different style. */
735 else if (code == Scomment
736 && (comstyle != SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0)
737 || SYNTAX_FLAGS_COMMENT_NESTED (syntax) != comnested))
738 continue;
739
740 /* Ignore escaped characters, except comment-enders. */
741 if (code != Sendcomment && char_quoted (from, from_byte))
742 continue;
743
744 switch (code)
745 {
746 case Sstring_fence:
747 case Scomment_fence:
748 c = (code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE);
749 case Sstring:
750 /* Track parity of quotes. */
751 if (string_style == -1)
752 /* Entering a string. */
753 string_style = c;
754 else if (string_style == c)
755 /* Leaving the string. */
756 string_style = -1;
757 else
758 /* If we have two kinds of string delimiters.
759 There's no way to grok this scanning backwards. */
760 string_lossage = 1;
761 break;
762
763 case Scomment:
764 /* We've already checked that it is the relevant comstyle. */
765 if (string_style != -1 || comment_lossage || string_lossage)
766 /* There are odd string quotes involved, so let's be careful.
767 Test case in Pascal: " { " a { " } */
768 goto lossage;
769
770 if (!comnested)
771 {
772 /* Record best comment-starter so far. */
773 comstart_pos = from;
774 comstart_byte = from_byte;
775 }
776 else if (--nesting <= 0)
777 /* nested comments have to be balanced, so we don't need to
778 keep looking for earlier ones. We use here the same (slightly
779 incorrect) reasoning as below: since it is followed by uniform
780 paired string quotes, this comment-start has to be outside of
781 strings, else the comment-end itself would be inside a string. */
782 goto done;
783 break;
784
785 case Sendcomment:
786 if (SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == comstyle
787 && ((com2end && SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax))
788 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested)
789 /* This is the same style of comment ender as ours. */
790 {
791 if (comnested)
792 nesting++;
793 else
794 /* Anything before that can't count because it would match
795 this comment-ender rather than ours. */
796 from = stop; /* Break out of the loop. */
797 }
798 else if (comstart_pos != 0 || c != '\n')
799 /* We're mixing comment styles here, so we'd better be careful.
800 The (comstart_pos != 0 || c != '\n') check is not quite correct
801 (we should just always set comment_lossage), but removing it
802 would imply that any multiline comment in C would go through
803 lossage, which seems overkill.
804 The failure should only happen in the rare cases such as
805 { (* } *) */
806 comment_lossage = 1;
807 break;
808
809 case Sopen:
810 /* Assume a defun-start point is outside of strings. */
811 if (open_paren_in_column_0_is_defun_start
812 && (from == stop
813 || (temp_byte = dec_bytepos (from_byte),
814 FETCH_CHAR (temp_byte) == '\n')))
815 {
816 defun_start = from;
817 defun_start_byte = from_byte;
818 from = stop; /* Break out of the loop. */
819 }
820 break;
821
822 default:
823 break;
824 }
825 }
826
827 if (comstart_pos == 0)
828 {
829 from = comment_end;
830 from_byte = comment_end_byte;
831 UPDATE_SYNTAX_TABLE_FORWARD (comment_end - 1);
832 }
833 /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
834 or `done'), then we've found the beginning of the non-nested comment. */
835 else if (1) /* !comnested */
836 {
837 from = comstart_pos;
838 from_byte = comstart_byte;
839 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
840 }
841 else
842 {
843 struct lisp_parse_state state;
844 lossage:
845 /* We had two kinds of string delimiters mixed up
846 together. Decode this going forwards.
847 Scan fwd from a known safe place (beginning-of-defun)
848 to the one in question; this records where we
849 last passed a comment starter. */
850 /* If we did not already find the defun start, find it now. */
851 if (defun_start == 0)
852 {
853 defun_start = find_defun_start (comment_end, comment_end_byte);
854 defun_start_byte = find_start_value_byte;
855 }
856 do
857 {
858 scan_sexps_forward (&state,
859 defun_start, defun_start_byte,
860 comment_end, TYPE_MINIMUM (EMACS_INT),
861 0, Qnil, 0);
862 defun_start = comment_end;
863 if (state.incomment == (comnested ? 1 : -1)
864 && state.comstyle == comstyle)
865 from = state.comstr_start;
866 else
867 {
868 from = comment_end;
869 if (state.incomment)
870 /* If comment_end is inside some other comment, maybe ours
871 is nested, so we need to try again from within the
872 surrounding comment. Example: { a (* " *) */
873 {
874 /* FIXME: We should advance by one or two chars. */
875 defun_start = state.comstr_start + 2;
876 defun_start_byte = CHAR_TO_BYTE (defun_start);
877 }
878 }
879 } while (defun_start < comment_end);
880
881 from_byte = CHAR_TO_BYTE (from);
882 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
883 }
884
885 done:
886 *charpos_ptr = from;
887 *bytepos_ptr = from_byte;
888
889 return from != comment_end;
890 }
891 \f
892 DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
893 doc: /* Return t if OBJECT is a syntax table.
894 Currently, any char-table counts as a syntax table. */)
895 (Lisp_Object object)
896 {
897 if (CHAR_TABLE_P (object)
898 && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
899 return Qt;
900 return Qnil;
901 }
902
903 static void
904 check_syntax_table (Lisp_Object obj)
905 {
906 CHECK_TYPE (CHAR_TABLE_P (obj) && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table),
907 Qsyntax_table_p, obj);
908 }
909
910 DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
911 doc: /* Return the current syntax table.
912 This is the one specified by the current buffer. */)
913 (void)
914 {
915 return BVAR (current_buffer, syntax_table);
916 }
917
918 DEFUN ("standard-syntax-table", Fstandard_syntax_table,
919 Sstandard_syntax_table, 0, 0, 0,
920 doc: /* Return the standard syntax table.
921 This is the one used for new buffers. */)
922 (void)
923 {
924 return Vstandard_syntax_table;
925 }
926
927 DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
928 doc: /* Construct a new syntax table and return it.
929 It is a copy of the TABLE, which defaults to the standard syntax table. */)
930 (Lisp_Object table)
931 {
932 Lisp_Object copy;
933
934 if (!NILP (table))
935 check_syntax_table (table);
936 else
937 table = Vstandard_syntax_table;
938
939 copy = Fcopy_sequence (table);
940
941 /* Only the standard syntax table should have a default element.
942 Other syntax tables should inherit from parents instead. */
943 set_char_table_defalt (copy, Qnil);
944
945 /* Copied syntax tables should all have parents.
946 If we copied one with no parent, such as the standard syntax table,
947 use the standard syntax table as the copy's parent. */
948 if (NILP (XCHAR_TABLE (copy)->parent))
949 Fset_char_table_parent (copy, Vstandard_syntax_table);
950 return copy;
951 }
952
953 DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
954 doc: /* Select a new syntax table for the current buffer.
955 One argument, a syntax table. */)
956 (Lisp_Object table)
957 {
958 int idx;
959 check_syntax_table (table);
960 bset_syntax_table (current_buffer, table);
961 /* Indicate that this buffer now has a specified syntax table. */
962 idx = PER_BUFFER_VAR_IDX (syntax_table);
963 SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
964 return table;
965 }
966 \f
967 /* Convert a letter which signifies a syntax code
968 into the code it signifies.
969 This is used by modify-syntax-entry, and other things. */
970
971 unsigned char const syntax_spec_code[0400] =
972 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
973 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
974 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
975 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
976 Swhitespace, Scomment_fence, Sstring, 0377, Smath, 0377, 0377, Squote,
977 Sopen, Sclose, 0377, 0377, 0377, Swhitespace, Spunct, Scharquote,
978 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
979 0377, 0377, 0377, 0377, Scomment, 0377, Sendcomment, 0377,
980 Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
981 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
982 0377, 0377, 0377, 0377, 0377, 0377, 0377, Sword,
983 0377, 0377, 0377, 0377, Sescape, 0377, 0377, Ssymbol,
984 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
985 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
986 0377, 0377, 0377, 0377, 0377, 0377, 0377, Sword,
987 0377, 0377, 0377, 0377, Sstring_fence, 0377, 0377, 0377
988 };
989
990 /* Indexed by syntax code, give the letter that describes it. */
991
992 char const syntax_code_spec[16] =
993 {
994 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
995 '!', '|'
996 };
997
998 /* Indexed by syntax code, give the object (cons of syntax code and
999 nil) to be stored in syntax table. Since these objects can be
1000 shared among syntax tables, we generate them in advance. By
1001 sharing objects, the function `describe-syntax' can give a more
1002 compact listing. */
1003 static Lisp_Object Vsyntax_code_object;
1004
1005 \f
1006 DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
1007 doc: /* Return the syntax code of CHARACTER, described by a character.
1008 For example, if CHARACTER is a word constituent, the
1009 character `w' (119) is returned.
1010 The characters that correspond to various syntax codes
1011 are listed in the documentation of `modify-syntax-entry'. */)
1012 (Lisp_Object character)
1013 {
1014 int char_int;
1015 CHECK_CHARACTER (character);
1016 char_int = XINT (character);
1017 SETUP_BUFFER_SYNTAX_TABLE ();
1018 return make_number (syntax_code_spec[SYNTAX (char_int)]);
1019 }
1020
1021 DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
1022 doc: /* Return the matching parenthesis of CHARACTER, or nil if none. */)
1023 (Lisp_Object character)
1024 {
1025 int char_int;
1026 enum syntaxcode code;
1027 CHECK_CHARACTER (character);
1028 char_int = XINT (character);
1029 SETUP_BUFFER_SYNTAX_TABLE ();
1030 code = SYNTAX (char_int);
1031 if (code == Sopen || code == Sclose)
1032 return SYNTAX_MATCH (char_int);
1033 return Qnil;
1034 }
1035
1036 DEFUN ("string-to-syntax", Fstring_to_syntax, Sstring_to_syntax, 1, 1, 0,
1037 doc: /* Convert a syntax descriptor STRING into a raw syntax descriptor.
1038 STRING should be a string of the form allowed as argument of
1039 `modify-syntax-entry'. The return value is a raw syntax descriptor: a
1040 cons cell \(CODE . MATCHING-CHAR) which can be used, for example, as
1041 the value of a `syntax-table' text property. */)
1042 (Lisp_Object string)
1043 {
1044 const unsigned char *p;
1045 int val;
1046 Lisp_Object match;
1047
1048 CHECK_STRING (string);
1049
1050 p = SDATA (string);
1051 val = syntax_spec_code[*p++];
1052 if (val == 0377)
1053 error ("Invalid syntax description letter: %c", p[-1]);
1054
1055 if (val == Sinherit)
1056 return Qnil;
1057
1058 if (*p)
1059 {
1060 int len;
1061 int character = STRING_CHAR_AND_LENGTH (p, len);
1062 XSETINT (match, character);
1063 if (XFASTINT (match) == ' ')
1064 match = Qnil;
1065 p += len;
1066 }
1067 else
1068 match = Qnil;
1069
1070 while (*p)
1071 switch (*p++)
1072 {
1073 case '1':
1074 val |= 1 << 16;
1075 break;
1076
1077 case '2':
1078 val |= 1 << 17;
1079 break;
1080
1081 case '3':
1082 val |= 1 << 18;
1083 break;
1084
1085 case '4':
1086 val |= 1 << 19;
1087 break;
1088
1089 case 'p':
1090 val |= 1 << 20;
1091 break;
1092
1093 case 'b':
1094 val |= 1 << 21;
1095 break;
1096
1097 case 'n':
1098 val |= 1 << 22;
1099 break;
1100
1101 case 'c':
1102 val |= 1 << 23;
1103 break;
1104 }
1105
1106 if (val < ASIZE (Vsyntax_code_object) && NILP (match))
1107 return AREF (Vsyntax_code_object, val);
1108 else
1109 /* Since we can't use a shared object, let's make a new one. */
1110 return Fcons (make_number (val), match);
1111 }
1112
1113 /* I really don't know why this is interactive
1114 help-form should at least be made useful whilst reading the second arg. */
1115 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
1116 "cSet syntax for character: \nsSet syntax for %s to: ",
1117 doc: /* Set syntax for character CHAR according to string NEWENTRY.
1118 The syntax is changed only for table SYNTAX-TABLE, which defaults to
1119 the current buffer's syntax table.
1120 CHAR may be a cons (MIN . MAX), in which case, syntaxes of all characters
1121 in the range MIN to MAX are changed.
1122 The first character of NEWENTRY should be one of the following:
1123 Space or - whitespace syntax. w word constituent.
1124 _ symbol constituent. . punctuation.
1125 ( open-parenthesis. ) close-parenthesis.
1126 " string quote. \\ escape.
1127 $ paired delimiter. ' expression quote or prefix operator.
1128 < comment starter. > comment ender.
1129 / character-quote. @ inherit from parent table.
1130 | generic string fence. ! generic comment fence.
1131
1132 Only single-character comment start and end sequences are represented thus.
1133 Two-character sequences are represented as described below.
1134 The second character of NEWENTRY is the matching parenthesis,
1135 used only if the first character is `(' or `)'.
1136 Any additional characters are flags.
1137 Defined flags are the characters 1, 2, 3, 4, b, p, and n.
1138 1 means CHAR is the start of a two-char comment start sequence.
1139 2 means CHAR is the second character of such a sequence.
1140 3 means CHAR is the start of a two-char comment end sequence.
1141 4 means CHAR is the second character of such a sequence.
1142
1143 There can be several orthogonal comment sequences. This is to support
1144 language modes such as C++. By default, all comment sequences are of style
1145 a, but you can set the comment sequence style to b (on the second character
1146 of a comment-start, and the first character of a comment-end sequence) and/or
1147 c (on any of its chars) using this flag:
1148 b means CHAR is part of comment sequence b.
1149 c means CHAR is part of comment sequence c.
1150 n means CHAR is part of a nestable comment sequence.
1151
1152 p means CHAR is a prefix character for `backward-prefix-chars';
1153 such characters are treated as whitespace when they occur
1154 between expressions.
1155 usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE) */)
1156 (Lisp_Object c, Lisp_Object newentry, Lisp_Object syntax_table)
1157 {
1158 if (CONSP (c))
1159 {
1160 CHECK_CHARACTER_CAR (c);
1161 CHECK_CHARACTER_CDR (c);
1162 }
1163 else
1164 CHECK_CHARACTER (c);
1165
1166 if (NILP (syntax_table))
1167 syntax_table = BVAR (current_buffer, syntax_table);
1168 else
1169 check_syntax_table (syntax_table);
1170
1171 newentry = Fstring_to_syntax (newentry);
1172 if (CONSP (c))
1173 SET_RAW_SYNTAX_ENTRY_RANGE (syntax_table, c, newentry);
1174 else
1175 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), newentry);
1176
1177 /* We clear the regexp cache, since character classes can now have
1178 different values from those in the compiled regexps.*/
1179 clear_regexp_cache ();
1180
1181 return Qnil;
1182 }
1183 \f
1184 /* Dump syntax table to buffer in human-readable format */
1185
1186 DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value,
1187 Sinternal_describe_syntax_value, 1, 1, 0,
1188 doc: /* Insert a description of the internal syntax description SYNTAX at point. */)
1189 (Lisp_Object syntax)
1190 {
1191 int code, syntax_code;
1192 bool start1, start2, end1, end2, prefix, comstyleb, comstylec, comnested;
1193 char str[2];
1194 Lisp_Object first, match_lisp, value = syntax;
1195
1196 if (NILP (value))
1197 {
1198 insert_string ("default");
1199 return syntax;
1200 }
1201
1202 if (CHAR_TABLE_P (value))
1203 {
1204 insert_string ("deeper char-table ...");
1205 return syntax;
1206 }
1207
1208 if (!CONSP (value))
1209 {
1210 insert_string ("invalid");
1211 return syntax;
1212 }
1213
1214 first = XCAR (value);
1215 match_lisp = XCDR (value);
1216
1217 if (!INTEGERP (first) || !(NILP (match_lisp) || CHARACTERP (match_lisp)))
1218 {
1219 insert_string ("invalid");
1220 return syntax;
1221 }
1222
1223 syntax_code = XINT (first) & INT_MAX;
1224 code = syntax_code & 0377;
1225 start1 = SYNTAX_FLAGS_COMSTART_FIRST (syntax_code);
1226 start2 = SYNTAX_FLAGS_COMSTART_SECOND (syntax_code);;
1227 end1 = SYNTAX_FLAGS_COMEND_FIRST (syntax_code);
1228 end2 = SYNTAX_FLAGS_COMEND_SECOND (syntax_code);
1229 prefix = SYNTAX_FLAGS_PREFIX (syntax_code);
1230 comstyleb = SYNTAX_FLAGS_COMMENT_STYLEB (syntax_code);
1231 comstylec = SYNTAX_FLAGS_COMMENT_STYLEC (syntax_code);
1232 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax_code);
1233
1234 if (Smax <= code)
1235 {
1236 insert_string ("invalid");
1237 return syntax;
1238 }
1239
1240 str[0] = syntax_code_spec[code], str[1] = 0;
1241 insert (str, 1);
1242
1243 if (NILP (match_lisp))
1244 insert (" ", 1);
1245 else
1246 insert_char (XINT (match_lisp));
1247
1248 if (start1)
1249 insert ("1", 1);
1250 if (start2)
1251 insert ("2", 1);
1252
1253 if (end1)
1254 insert ("3", 1);
1255 if (end2)
1256 insert ("4", 1);
1257
1258 if (prefix)
1259 insert ("p", 1);
1260 if (comstyleb)
1261 insert ("b", 1);
1262 if (comstylec)
1263 insert ("c", 1);
1264 if (comnested)
1265 insert ("n", 1);
1266
1267 insert_string ("\twhich means: ");
1268
1269 switch (code)
1270 {
1271 case Swhitespace:
1272 insert_string ("whitespace"); break;
1273 case Spunct:
1274 insert_string ("punctuation"); break;
1275 case Sword:
1276 insert_string ("word"); break;
1277 case Ssymbol:
1278 insert_string ("symbol"); break;
1279 case Sopen:
1280 insert_string ("open"); break;
1281 case Sclose:
1282 insert_string ("close"); break;
1283 case Squote:
1284 insert_string ("prefix"); break;
1285 case Sstring:
1286 insert_string ("string"); break;
1287 case Smath:
1288 insert_string ("math"); break;
1289 case Sescape:
1290 insert_string ("escape"); break;
1291 case Scharquote:
1292 insert_string ("charquote"); break;
1293 case Scomment:
1294 insert_string ("comment"); break;
1295 case Sendcomment:
1296 insert_string ("endcomment"); break;
1297 case Sinherit:
1298 insert_string ("inherit"); break;
1299 case Scomment_fence:
1300 insert_string ("comment fence"); break;
1301 case Sstring_fence:
1302 insert_string ("string fence"); break;
1303 default:
1304 insert_string ("invalid");
1305 return syntax;
1306 }
1307
1308 if (!NILP (match_lisp))
1309 {
1310 insert_string (", matches ");
1311 insert_char (XINT (match_lisp));
1312 }
1313
1314 if (start1)
1315 insert_string (",\n\t is the first character of a comment-start sequence");
1316 if (start2)
1317 insert_string (",\n\t is the second character of a comment-start sequence");
1318
1319 if (end1)
1320 insert_string (",\n\t is the first character of a comment-end sequence");
1321 if (end2)
1322 insert_string (",\n\t is the second character of a comment-end sequence");
1323 if (comstyleb)
1324 insert_string (" (comment style b)");
1325 if (comstylec)
1326 insert_string (" (comment style c)");
1327 if (comnested)
1328 insert_string (" (nestable)");
1329
1330 if (prefix)
1331 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
1332
1333 return syntax;
1334 }
1335 \f
1336 /* Return the position across COUNT words from FROM.
1337 If that many words cannot be found before the end of the buffer, return 0.
1338 COUNT negative means scan backward and stop at word beginning. */
1339
1340 ptrdiff_t
1341 scan_words (register ptrdiff_t from, register EMACS_INT count)
1342 {
1343 register ptrdiff_t beg = BEGV;
1344 register ptrdiff_t end = ZV;
1345 register ptrdiff_t from_byte = CHAR_TO_BYTE (from);
1346 register enum syntaxcode code;
1347 int ch0, ch1;
1348 Lisp_Object func, pos;
1349
1350 immediate_quit = 1;
1351 QUIT;
1352
1353 SETUP_SYNTAX_TABLE (from, count);
1354
1355 while (count > 0)
1356 {
1357 while (1)
1358 {
1359 if (from == end)
1360 {
1361 immediate_quit = 0;
1362 return 0;
1363 }
1364 UPDATE_SYNTAX_TABLE_FORWARD (from);
1365 ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1366 code = SYNTAX (ch0);
1367 INC_BOTH (from, from_byte);
1368 if (words_include_escapes
1369 && (code == Sescape || code == Scharquote))
1370 break;
1371 if (code == Sword)
1372 break;
1373 }
1374 /* Now CH0 is a character which begins a word and FROM is the
1375 position of the next character. */
1376 func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch0);
1377 if (! NILP (Ffboundp (func)))
1378 {
1379 pos = call2 (func, make_number (from - 1), make_number (end));
1380 if (INTEGERP (pos) && from < XINT (pos) && XINT (pos) <= ZV)
1381 {
1382 from = XINT (pos);
1383 from_byte = CHAR_TO_BYTE (from);
1384 }
1385 }
1386 else
1387 {
1388 while (1)
1389 {
1390 if (from == end) break;
1391 UPDATE_SYNTAX_TABLE_FORWARD (from);
1392 ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1393 code = SYNTAX (ch1);
1394 if ((code != Sword
1395 && (! words_include_escapes
1396 || (code != Sescape && code != Scharquote)))
1397 || word_boundary_p (ch0, ch1))
1398 break;
1399 INC_BOTH (from, from_byte);
1400 ch0 = ch1;
1401 }
1402 }
1403 count--;
1404 }
1405 while (count < 0)
1406 {
1407 while (1)
1408 {
1409 if (from == beg)
1410 {
1411 immediate_quit = 0;
1412 return 0;
1413 }
1414 DEC_BOTH (from, from_byte);
1415 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1416 ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1417 code = SYNTAX (ch1);
1418 if (words_include_escapes
1419 && (code == Sescape || code == Scharquote))
1420 break;
1421 if (code == Sword)
1422 break;
1423 }
1424 /* Now CH1 is a character which ends a word and FROM is the
1425 position of it. */
1426 func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch1);
1427 if (! NILP (Ffboundp (func)))
1428 {
1429 pos = call2 (func, make_number (from), make_number (beg));
1430 if (INTEGERP (pos) && BEGV <= XINT (pos) && XINT (pos) < from)
1431 {
1432 from = XINT (pos);
1433 from_byte = CHAR_TO_BYTE (from);
1434 }
1435 }
1436 else
1437 {
1438 while (1)
1439 {
1440 if (from == beg)
1441 break;
1442 DEC_BOTH (from, from_byte);
1443 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1444 ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1445 code = SYNTAX (ch0);
1446 if ((code != Sword
1447 && (! words_include_escapes
1448 || (code != Sescape && code != Scharquote)))
1449 || word_boundary_p (ch0, ch1))
1450 {
1451 INC_BOTH (from, from_byte);
1452 break;
1453 }
1454 ch1 = ch0;
1455 }
1456 }
1457 count++;
1458 }
1459
1460 immediate_quit = 0;
1461
1462 return from;
1463 }
1464
1465 DEFUN ("forward-word", Fforward_word, Sforward_word, 0, 1, "^p",
1466 doc: /* Move point forward ARG words (backward if ARG is negative).
1467 If ARG is omitted or nil, move point forward one word.
1468 Normally returns t.
1469 If an edge of the buffer or a field boundary is reached, point is left there
1470 and the function returns nil. Field boundaries are not noticed if
1471 `inhibit-field-text-motion' is non-nil. */)
1472 (Lisp_Object arg)
1473 {
1474 Lisp_Object tmp;
1475 ptrdiff_t orig_val, val;
1476
1477 if (NILP (arg))
1478 XSETFASTINT (arg, 1);
1479 else
1480 CHECK_NUMBER (arg);
1481
1482 val = orig_val = scan_words (PT, XINT (arg));
1483 if (! orig_val)
1484 val = XINT (arg) > 0 ? ZV : BEGV;
1485
1486 /* Avoid jumping out of an input field. */
1487 tmp = Fconstrain_to_field (make_number (val), make_number (PT),
1488 Qnil, Qnil, Qnil);
1489 val = XFASTINT (tmp);
1490
1491 SET_PT (val);
1492 return val == orig_val ? Qt : Qnil;
1493 }
1494 \f
1495 DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
1496 doc: /* Move point forward, stopping before a char not in STRING, or at pos LIM.
1497 STRING is like the inside of a `[...]' in a regular expression
1498 except that `]' is never special and `\\' quotes `^', `-' or `\\'
1499 (but not at the end of a range; quoting is never needed there).
1500 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1501 With arg "^a-zA-Z", skips nonletters stopping before first letter.
1502 Char classes, e.g. `[:alpha:]', are supported.
1503
1504 Returns the distance traveled, either zero or positive. */)
1505 (Lisp_Object string, Lisp_Object lim)
1506 {
1507 return skip_chars (1, string, lim, 1);
1508 }
1509
1510 DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
1511 doc: /* Move point backward, stopping after a char not in STRING, or at pos LIM.
1512 See `skip-chars-forward' for details.
1513 Returns the distance traveled, either zero or negative. */)
1514 (Lisp_Object string, Lisp_Object lim)
1515 {
1516 return skip_chars (0, string, lim, 1);
1517 }
1518
1519 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
1520 doc: /* Move point forward across chars in specified syntax classes.
1521 SYNTAX is a string of syntax code characters.
1522 Stop before a char whose syntax is not in SYNTAX, or at position LIM.
1523 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1524 This function returns the distance traveled, either zero or positive. */)
1525 (Lisp_Object syntax, Lisp_Object lim)
1526 {
1527 return skip_syntaxes (1, syntax, lim);
1528 }
1529
1530 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
1531 doc: /* Move point backward across chars in specified syntax classes.
1532 SYNTAX is a string of syntax code characters.
1533 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.
1534 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1535 This function returns either zero or a negative number, and the absolute value
1536 of this is the distance traveled. */)
1537 (Lisp_Object syntax, Lisp_Object lim)
1538 {
1539 return skip_syntaxes (0, syntax, lim);
1540 }
1541
1542 static Lisp_Object
1543 skip_chars (bool forwardp, Lisp_Object string, Lisp_Object lim,
1544 bool handle_iso_classes)
1545 {
1546 int c;
1547 char fastmap[0400];
1548 /* Store the ranges of non-ASCII characters. */
1549 int *char_ranges IF_LINT (= NULL);
1550 int n_char_ranges = 0;
1551 bool negate = 0;
1552 ptrdiff_t i, i_byte;
1553 /* True if the current buffer is multibyte and the region contains
1554 non-ASCII chars. */
1555 bool multibyte;
1556 /* True if STRING is multibyte and it contains non-ASCII chars. */
1557 bool string_multibyte;
1558 ptrdiff_t size_byte;
1559 const unsigned char *str;
1560 int len;
1561 Lisp_Object iso_classes;
1562
1563 CHECK_STRING (string);
1564 iso_classes = Qnil;
1565
1566 if (NILP (lim))
1567 XSETINT (lim, forwardp ? ZV : BEGV);
1568 else
1569 CHECK_NUMBER_COERCE_MARKER (lim);
1570
1571 /* In any case, don't allow scan outside bounds of buffer. */
1572 if (XINT (lim) > ZV)
1573 XSETFASTINT (lim, ZV);
1574 if (XINT (lim) < BEGV)
1575 XSETFASTINT (lim, BEGV);
1576
1577 multibyte = (!NILP (BVAR (current_buffer, enable_multibyte_characters))
1578 && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
1579 string_multibyte = SBYTES (string) > SCHARS (string);
1580
1581 memset (fastmap, 0, sizeof fastmap);
1582
1583 str = SDATA (string);
1584 size_byte = SBYTES (string);
1585
1586 i_byte = 0;
1587 if (i_byte < size_byte
1588 && SREF (string, 0) == '^')
1589 {
1590 negate = 1; i_byte++;
1591 }
1592
1593 /* Find the characters specified and set their elements of fastmap.
1594 Handle backslashes and ranges specially.
1595
1596 If STRING contains non-ASCII characters, setup char_ranges for
1597 them and use fastmap only for their leading codes. */
1598
1599 if (! string_multibyte)
1600 {
1601 bool string_has_eight_bit = 0;
1602
1603 /* At first setup fastmap. */
1604 while (i_byte < size_byte)
1605 {
1606 c = str[i_byte++];
1607
1608 if (handle_iso_classes && c == '['
1609 && i_byte < size_byte
1610 && str[i_byte] == ':')
1611 {
1612 const unsigned char *class_beg = str + i_byte + 1;
1613 const unsigned char *class_end = class_beg;
1614 const unsigned char *class_limit = str + size_byte - 2;
1615 /* Leave room for the null. */
1616 unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1617 re_wctype_t cc;
1618
1619 if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1620 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1621
1622 while (class_end < class_limit
1623 && *class_end >= 'a' && *class_end <= 'z')
1624 class_end++;
1625
1626 if (class_end == class_beg
1627 || *class_end != ':' || class_end[1] != ']')
1628 goto not_a_class_name;
1629
1630 memcpy (class_name, class_beg, class_end - class_beg);
1631 class_name[class_end - class_beg] = 0;
1632
1633 cc = re_wctype (class_name);
1634 if (cc == 0)
1635 error ("Invalid ISO C character class");
1636
1637 iso_classes = Fcons (make_number (cc), iso_classes);
1638
1639 i_byte = class_end + 2 - str;
1640 continue;
1641 }
1642
1643 not_a_class_name:
1644 if (c == '\\')
1645 {
1646 if (i_byte == size_byte)
1647 break;
1648
1649 c = str[i_byte++];
1650 }
1651 /* Treat `-' as range character only if another character
1652 follows. */
1653 if (i_byte + 1 < size_byte
1654 && str[i_byte] == '-')
1655 {
1656 int c2;
1657
1658 /* Skip over the dash. */
1659 i_byte++;
1660
1661 /* Get the end of the range. */
1662 c2 = str[i_byte++];
1663 if (c2 == '\\'
1664 && i_byte < size_byte)
1665 c2 = str[i_byte++];
1666
1667 if (c <= c2)
1668 {
1669 int lim2 = c2 + 1;
1670 while (c < lim2)
1671 fastmap[c++] = 1;
1672 if (! ASCII_CHAR_P (c2))
1673 string_has_eight_bit = 1;
1674 }
1675 }
1676 else
1677 {
1678 fastmap[c] = 1;
1679 if (! ASCII_CHAR_P (c))
1680 string_has_eight_bit = 1;
1681 }
1682 }
1683
1684 /* If the current range is multibyte and STRING contains
1685 eight-bit chars, arrange fastmap and setup char_ranges for
1686 the corresponding multibyte chars. */
1687 if (multibyte && string_has_eight_bit)
1688 {
1689 char *p1;
1690 char himap[0200 + 1];
1691 memcpy (himap, fastmap + 0200, 0200);
1692 himap[0200] = 0;
1693 memset (fastmap + 0200, 0, 0200);
1694 char_ranges = alloca (sizeof *char_ranges * 128 * 2);
1695 i = 0;
1696
1697 while ((p1 = memchr (himap + i, 1, 0200 - i)))
1698 {
1699 /* Deduce the next range C..C2 from the next clump of 1s
1700 in HIMAP starting with &HIMAP[I]. HIMAP is the high
1701 order half of the old FASTMAP. */
1702 int c2, leading_code;
1703 i = p1 - himap;
1704 c = BYTE8_TO_CHAR (i + 0200);
1705 i += strlen (p1);
1706 c2 = BYTE8_TO_CHAR (i + 0200 - 1);
1707
1708 char_ranges[n_char_ranges++] = c;
1709 char_ranges[n_char_ranges++] = c2;
1710 leading_code = CHAR_LEADING_CODE (c);
1711 memset (fastmap + leading_code, 1,
1712 CHAR_LEADING_CODE (c2) - leading_code + 1);
1713 }
1714 }
1715 }
1716 else /* STRING is multibyte */
1717 {
1718 char_ranges = alloca (sizeof *char_ranges * SCHARS (string) * 2);
1719
1720 while (i_byte < size_byte)
1721 {
1722 int leading_code = str[i_byte];
1723 c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1724 i_byte += len;
1725
1726 if (handle_iso_classes && c == '['
1727 && i_byte < size_byte
1728 && STRING_CHAR (str + i_byte) == ':')
1729 {
1730 const unsigned char *class_beg = str + i_byte + 1;
1731 const unsigned char *class_end = class_beg;
1732 const unsigned char *class_limit = str + size_byte - 2;
1733 /* Leave room for the null. */
1734 unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1735 re_wctype_t cc;
1736
1737 if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1738 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1739
1740 while (class_end < class_limit
1741 && *class_end >= 'a' && *class_end <= 'z')
1742 class_end++;
1743
1744 if (class_end == class_beg
1745 || *class_end != ':' || class_end[1] != ']')
1746 goto not_a_class_name_multibyte;
1747
1748 memcpy (class_name, class_beg, class_end - class_beg);
1749 class_name[class_end - class_beg] = 0;
1750
1751 cc = re_wctype (class_name);
1752 if (cc == 0)
1753 error ("Invalid ISO C character class");
1754
1755 iso_classes = Fcons (make_number (cc), iso_classes);
1756
1757 i_byte = class_end + 2 - str;
1758 continue;
1759 }
1760
1761 not_a_class_name_multibyte:
1762 if (c == '\\')
1763 {
1764 if (i_byte == size_byte)
1765 break;
1766
1767 leading_code = str[i_byte];
1768 c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1769 i_byte += len;
1770 }
1771 /* Treat `-' as range character only if another character
1772 follows. */
1773 if (i_byte + 1 < size_byte
1774 && str[i_byte] == '-')
1775 {
1776 int c2, leading_code2;
1777
1778 /* Skip over the dash. */
1779 i_byte++;
1780
1781 /* Get the end of the range. */
1782 leading_code2 = str[i_byte];
1783 c2 = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1784 i_byte += len;
1785
1786 if (c2 == '\\'
1787 && i_byte < size_byte)
1788 {
1789 leading_code2 = str[i_byte];
1790 c2 = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1791 i_byte += len;
1792 }
1793
1794 if (c > c2)
1795 continue;
1796 if (ASCII_CHAR_P (c))
1797 {
1798 while (c <= c2 && c < 0x80)
1799 fastmap[c++] = 1;
1800 leading_code = CHAR_LEADING_CODE (c);
1801 }
1802 if (! ASCII_CHAR_P (c))
1803 {
1804 int lim2 = leading_code2 + 1;
1805 while (leading_code < lim2)
1806 fastmap[leading_code++] = 1;
1807 if (c <= c2)
1808 {
1809 char_ranges[n_char_ranges++] = c;
1810 char_ranges[n_char_ranges++] = c2;
1811 }
1812 }
1813 }
1814 else
1815 {
1816 if (ASCII_CHAR_P (c))
1817 fastmap[c] = 1;
1818 else
1819 {
1820 fastmap[leading_code] = 1;
1821 char_ranges[n_char_ranges++] = c;
1822 char_ranges[n_char_ranges++] = c;
1823 }
1824 }
1825 }
1826
1827 /* If the current range is unibyte and STRING contains non-ASCII
1828 chars, arrange fastmap for the corresponding unibyte
1829 chars. */
1830
1831 if (! multibyte && n_char_ranges > 0)
1832 {
1833 memset (fastmap + 0200, 0, 0200);
1834 for (i = 0; i < n_char_ranges; i += 2)
1835 {
1836 int c1 = char_ranges[i];
1837 int lim2 = char_ranges[i + 1] + 1;
1838
1839 for (; c1 < lim2; c1++)
1840 {
1841 int b = CHAR_TO_BYTE_SAFE (c1);
1842 if (b >= 0)
1843 fastmap[b] = 1;
1844 }
1845 }
1846 }
1847 }
1848
1849 /* If ^ was the first character, complement the fastmap. */
1850 if (negate)
1851 {
1852 if (! multibyte)
1853 for (i = 0; i < sizeof fastmap; i++)
1854 fastmap[i] ^= 1;
1855 else
1856 {
1857 for (i = 0; i < 0200; i++)
1858 fastmap[i] ^= 1;
1859 /* All non-ASCII chars possibly match. */
1860 for (; i < sizeof fastmap; i++)
1861 fastmap[i] = 1;
1862 }
1863 }
1864
1865 {
1866 ptrdiff_t start_point = PT;
1867 ptrdiff_t pos = PT;
1868 ptrdiff_t pos_byte = PT_BYTE;
1869 unsigned char *p = PT_ADDR, *endp, *stop;
1870
1871 if (forwardp)
1872 {
1873 endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
1874 stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
1875 }
1876 else
1877 {
1878 endp = CHAR_POS_ADDR (XINT (lim));
1879 stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
1880 }
1881
1882 immediate_quit = 1;
1883 /* This code may look up syntax tables using functions that rely on the
1884 gl_state object. To make sure this object is not out of date,
1885 let's initialize it manually.
1886 We ignore syntax-table text-properties for now, since that's
1887 what we've done in the past. */
1888 SETUP_BUFFER_SYNTAX_TABLE ();
1889 if (forwardp)
1890 {
1891 if (multibyte)
1892 while (1)
1893 {
1894 int nbytes;
1895
1896 if (p >= stop)
1897 {
1898 if (p >= endp)
1899 break;
1900 p = GAP_END_ADDR;
1901 stop = endp;
1902 }
1903 c = STRING_CHAR_AND_LENGTH (p, nbytes);
1904 if (! NILP (iso_classes) && in_classes (c, iso_classes))
1905 {
1906 if (negate)
1907 break;
1908 else
1909 goto fwd_ok;
1910 }
1911
1912 if (! fastmap[*p])
1913 break;
1914 if (! ASCII_CHAR_P (c))
1915 {
1916 /* As we are looking at a multibyte character, we
1917 must look up the character in the table
1918 CHAR_RANGES. If there's no data in the table,
1919 that character is not what we want to skip. */
1920
1921 /* The following code do the right thing even if
1922 n_char_ranges is zero (i.e. no data in
1923 CHAR_RANGES). */
1924 for (i = 0; i < n_char_ranges; i += 2)
1925 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1926 break;
1927 if (!(negate ^ (i < n_char_ranges)))
1928 break;
1929 }
1930 fwd_ok:
1931 p += nbytes, pos++, pos_byte += nbytes;
1932 }
1933 else
1934 while (1)
1935 {
1936 if (p >= stop)
1937 {
1938 if (p >= endp)
1939 break;
1940 p = GAP_END_ADDR;
1941 stop = endp;
1942 }
1943
1944 if (!NILP (iso_classes) && in_classes (*p, iso_classes))
1945 {
1946 if (negate)
1947 break;
1948 else
1949 goto fwd_unibyte_ok;
1950 }
1951
1952 if (!fastmap[*p])
1953 break;
1954 fwd_unibyte_ok:
1955 p++, pos++, pos_byte++;
1956 }
1957 }
1958 else
1959 {
1960 if (multibyte)
1961 while (1)
1962 {
1963 unsigned char *prev_p;
1964
1965 if (p <= stop)
1966 {
1967 if (p <= endp)
1968 break;
1969 p = GPT_ADDR;
1970 stop = endp;
1971 }
1972 prev_p = p;
1973 while (--p >= stop && ! CHAR_HEAD_P (*p));
1974 c = STRING_CHAR (p);
1975
1976 if (! NILP (iso_classes) && in_classes (c, iso_classes))
1977 {
1978 if (negate)
1979 break;
1980 else
1981 goto back_ok;
1982 }
1983
1984 if (! fastmap[*p])
1985 break;
1986 if (! ASCII_CHAR_P (c))
1987 {
1988 /* See the comment in the previous similar code. */
1989 for (i = 0; i < n_char_ranges; i += 2)
1990 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1991 break;
1992 if (!(negate ^ (i < n_char_ranges)))
1993 break;
1994 }
1995 back_ok:
1996 pos--, pos_byte -= prev_p - p;
1997 }
1998 else
1999 while (1)
2000 {
2001 if (p <= stop)
2002 {
2003 if (p <= endp)
2004 break;
2005 p = GPT_ADDR;
2006 stop = endp;
2007 }
2008
2009 if (! NILP (iso_classes) && in_classes (p[-1], iso_classes))
2010 {
2011 if (negate)
2012 break;
2013 else
2014 goto back_unibyte_ok;
2015 }
2016
2017 if (!fastmap[p[-1]])
2018 break;
2019 back_unibyte_ok:
2020 p--, pos--, pos_byte--;
2021 }
2022 }
2023
2024 SET_PT_BOTH (pos, pos_byte);
2025 immediate_quit = 0;
2026
2027 return make_number (PT - start_point);
2028 }
2029 }
2030
2031
2032 static Lisp_Object
2033 skip_syntaxes (bool forwardp, Lisp_Object string, Lisp_Object lim)
2034 {
2035 int c;
2036 unsigned char fastmap[0400];
2037 bool negate = 0;
2038 ptrdiff_t i, i_byte;
2039 bool multibyte;
2040 ptrdiff_t size_byte;
2041 unsigned char *str;
2042
2043 CHECK_STRING (string);
2044
2045 if (NILP (lim))
2046 XSETINT (lim, forwardp ? ZV : BEGV);
2047 else
2048 CHECK_NUMBER_COERCE_MARKER (lim);
2049
2050 /* In any case, don't allow scan outside bounds of buffer. */
2051 if (XINT (lim) > ZV)
2052 XSETFASTINT (lim, ZV);
2053 if (XINT (lim) < BEGV)
2054 XSETFASTINT (lim, BEGV);
2055
2056 if (forwardp ? (PT >= XFASTINT (lim)) : (PT <= XFASTINT (lim)))
2057 return make_number (0);
2058
2059 multibyte = (!NILP (BVAR (current_buffer, enable_multibyte_characters))
2060 && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
2061
2062 memset (fastmap, 0, sizeof fastmap);
2063
2064 if (SBYTES (string) > SCHARS (string))
2065 /* As this is very rare case (syntax spec is ASCII only), don't
2066 consider efficiency. */
2067 string = string_make_unibyte (string);
2068
2069 str = SDATA (string);
2070 size_byte = SBYTES (string);
2071
2072 i_byte = 0;
2073 if (i_byte < size_byte
2074 && SREF (string, 0) == '^')
2075 {
2076 negate = 1; i_byte++;
2077 }
2078
2079 /* Find the syntaxes specified and set their elements of fastmap. */
2080
2081 while (i_byte < size_byte)
2082 {
2083 c = str[i_byte++];
2084 fastmap[syntax_spec_code[c]] = 1;
2085 }
2086
2087 /* If ^ was the first character, complement the fastmap. */
2088 if (negate)
2089 for (i = 0; i < sizeof fastmap; i++)
2090 fastmap[i] ^= 1;
2091
2092 {
2093 ptrdiff_t start_point = PT;
2094 ptrdiff_t pos = PT;
2095 ptrdiff_t pos_byte = PT_BYTE;
2096 unsigned char *p = PT_ADDR, *endp, *stop;
2097
2098 if (forwardp)
2099 {
2100 endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
2101 stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
2102 }
2103 else
2104 {
2105 endp = CHAR_POS_ADDR (XINT (lim));
2106 stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
2107 }
2108
2109 immediate_quit = 1;
2110 SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
2111 if (forwardp)
2112 {
2113 if (multibyte)
2114 {
2115 while (1)
2116 {
2117 int nbytes;
2118
2119 if (p >= stop)
2120 {
2121 if (p >= endp)
2122 break;
2123 p = GAP_END_ADDR;
2124 stop = endp;
2125 }
2126 c = STRING_CHAR_AND_LENGTH (p, nbytes);
2127 if (! fastmap[SYNTAX (c)])
2128 break;
2129 p += nbytes, pos++, pos_byte += nbytes;
2130 UPDATE_SYNTAX_TABLE_FORWARD (pos);
2131 }
2132 }
2133 else
2134 {
2135 while (1)
2136 {
2137 if (p >= stop)
2138 {
2139 if (p >= endp)
2140 break;
2141 p = GAP_END_ADDR;
2142 stop = endp;
2143 }
2144 if (! fastmap[SYNTAX (*p)])
2145 break;
2146 p++, pos++, pos_byte++;
2147 UPDATE_SYNTAX_TABLE_FORWARD (pos);
2148 }
2149 }
2150 }
2151 else
2152 {
2153 if (multibyte)
2154 {
2155 while (1)
2156 {
2157 unsigned char *prev_p;
2158
2159 if (p <= stop)
2160 {
2161 if (p <= endp)
2162 break;
2163 p = GPT_ADDR;
2164 stop = endp;
2165 }
2166 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
2167 prev_p = p;
2168 while (--p >= stop && ! CHAR_HEAD_P (*p));
2169 c = STRING_CHAR (p);
2170 if (! fastmap[SYNTAX (c)])
2171 break;
2172 pos--, pos_byte -= prev_p - p;
2173 }
2174 }
2175 else
2176 {
2177 while (1)
2178 {
2179 if (p <= stop)
2180 {
2181 if (p <= endp)
2182 break;
2183 p = GPT_ADDR;
2184 stop = endp;
2185 }
2186 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
2187 if (! fastmap[SYNTAX (p[-1])])
2188 break;
2189 p--, pos--, pos_byte--;
2190 }
2191 }
2192 }
2193
2194 SET_PT_BOTH (pos, pos_byte);
2195 immediate_quit = 0;
2196
2197 return make_number (PT - start_point);
2198 }
2199 }
2200
2201 /* Return true if character C belongs to one of the ISO classes
2202 in the list ISO_CLASSES. Each class is represented by an
2203 integer which is its type according to re_wctype. */
2204
2205 static bool
2206 in_classes (int c, Lisp_Object iso_classes)
2207 {
2208 bool fits_class = 0;
2209
2210 while (CONSP (iso_classes))
2211 {
2212 Lisp_Object elt;
2213 elt = XCAR (iso_classes);
2214 iso_classes = XCDR (iso_classes);
2215
2216 if (re_iswctype (c, XFASTINT (elt)))
2217 fits_class = 1;
2218 }
2219
2220 return fits_class;
2221 }
2222 \f
2223 /* Jump over a comment, assuming we are at the beginning of one.
2224 FROM is the current position.
2225 FROM_BYTE is the bytepos corresponding to FROM.
2226 Do not move past STOP (a charpos).
2227 The comment over which we have to jump is of style STYLE
2228 (either SYNTAX_FLAGS_COMMENT_STYLE (foo) or ST_COMMENT_STYLE).
2229 NESTING should be positive to indicate the nesting at the beginning
2230 for nested comments and should be zero or negative else.
2231 ST_COMMENT_STYLE cannot be nested.
2232 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
2233 (or 0 If the search cannot start in the middle of a two-character).
2234
2235 If successful, return true and store the charpos of the comment's end
2236 into *CHARPOS_PTR and the corresponding bytepos into *BYTEPOS_PTR.
2237 Else, return false and store the charpos STOP into *CHARPOS_PTR, the
2238 corresponding bytepos into *BYTEPOS_PTR and the current nesting
2239 (as defined for state.incomment) in *INCOMMENT_PTR.
2240
2241 The comment end is the last character of the comment rather than the
2242 character just after the comment.
2243
2244 Global syntax data is assumed to initially be valid for FROM and
2245 remains valid for forward search starting at the returned position. */
2246
2247 static bool
2248 forw_comment (ptrdiff_t from, ptrdiff_t from_byte, ptrdiff_t stop,
2249 EMACS_INT nesting, int style, int prev_syntax,
2250 ptrdiff_t *charpos_ptr, ptrdiff_t *bytepos_ptr,
2251 EMACS_INT *incomment_ptr)
2252 {
2253 register int c, c1;
2254 register enum syntaxcode code;
2255 register int syntax, other_syntax;
2256
2257 if (nesting <= 0) nesting = -1;
2258
2259 /* Enter the loop in the middle so that we find
2260 a 2-char comment ender if we start in the middle of it. */
2261 syntax = prev_syntax;
2262 if (syntax != 0) goto forw_incomment;
2263
2264 while (1)
2265 {
2266 if (from == stop)
2267 {
2268 *incomment_ptr = nesting;
2269 *charpos_ptr = from;
2270 *bytepos_ptr = from_byte;
2271 return 0;
2272 }
2273 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2274 syntax = SYNTAX_WITH_FLAGS (c);
2275 code = syntax & 0xff;
2276 if (code == Sendcomment
2277 && SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == style
2278 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ?
2279 (nesting > 0 && --nesting == 0) : nesting < 0))
2280 /* we have encountered a comment end of the same style
2281 as the comment sequence which began this comment
2282 section */
2283 break;
2284 if (code == Scomment_fence
2285 && style == ST_COMMENT_STYLE)
2286 /* we have encountered a comment end of the same style
2287 as the comment sequence which began this comment
2288 section. */
2289 break;
2290 if (nesting > 0
2291 && code == Scomment
2292 && SYNTAX_FLAGS_COMMENT_NESTED (syntax)
2293 && SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == style)
2294 /* we have encountered a nested comment of the same style
2295 as the comment sequence which began this comment section */
2296 nesting++;
2297 INC_BOTH (from, from_byte);
2298 UPDATE_SYNTAX_TABLE_FORWARD (from);
2299
2300 forw_incomment:
2301 if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
2302 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2303 other_syntax = SYNTAX_WITH_FLAGS (c1),
2304 SYNTAX_FLAGS_COMEND_SECOND (other_syntax))
2305 && SYNTAX_FLAGS_COMMENT_STYLE (syntax, other_syntax) == style
2306 && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
2307 SYNTAX_FLAGS_COMMENT_NESTED (other_syntax))
2308 ? nesting > 0 : nesting < 0))
2309 {
2310 if (--nesting <= 0)
2311 /* we have encountered a comment end of the same style
2312 as the comment sequence which began this comment
2313 section */
2314 break;
2315 else
2316 {
2317 INC_BOTH (from, from_byte);
2318 UPDATE_SYNTAX_TABLE_FORWARD (from);
2319 }
2320 }
2321 if (nesting > 0
2322 && from < stop
2323 && SYNTAX_FLAGS_COMSTART_FIRST (syntax)
2324 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2325 other_syntax = SYNTAX_WITH_FLAGS (c1),
2326 SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax) == style
2327 && SYNTAX_FLAGS_COMSTART_SECOND (other_syntax))
2328 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
2329 SYNTAX_FLAGS_COMMENT_NESTED (other_syntax)))
2330 /* we have encountered a nested comment of the same style
2331 as the comment sequence which began this comment
2332 section */
2333 {
2334 INC_BOTH (from, from_byte);
2335 UPDATE_SYNTAX_TABLE_FORWARD (from);
2336 nesting++;
2337 }
2338 }
2339 *charpos_ptr = from;
2340 *bytepos_ptr = from_byte;
2341 return 1;
2342 }
2343
2344 DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
2345 doc: /*
2346 Move forward across up to COUNT comments. If COUNT is negative, move backward.
2347 Stop scanning if we find something other than a comment or whitespace.
2348 Set point to where scanning stops.
2349 If COUNT comments are found as expected, with nothing except whitespace
2350 between them, return t; otherwise return nil. */)
2351 (Lisp_Object count)
2352 {
2353 ptrdiff_t from, from_byte, stop;
2354 int c, c1;
2355 enum syntaxcode code;
2356 int comstyle = 0; /* style of comment encountered */
2357 bool comnested = 0; /* whether the comment is nestable or not */
2358 bool found;
2359 EMACS_INT count1;
2360 ptrdiff_t out_charpos, out_bytepos;
2361 EMACS_INT dummy;
2362
2363 CHECK_NUMBER (count);
2364 count1 = XINT (count);
2365 stop = count1 > 0 ? ZV : BEGV;
2366
2367 immediate_quit = 1;
2368 QUIT;
2369
2370 from = PT;
2371 from_byte = PT_BYTE;
2372
2373 SETUP_SYNTAX_TABLE (from, count1);
2374 while (count1 > 0)
2375 {
2376 do
2377 {
2378 bool comstart_first;
2379 int syntax, other_syntax;
2380
2381 if (from == stop)
2382 {
2383 SET_PT_BOTH (from, from_byte);
2384 immediate_quit = 0;
2385 return Qnil;
2386 }
2387 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2388 syntax = SYNTAX_WITH_FLAGS (c);
2389 code = SYNTAX (c);
2390 comstart_first = SYNTAX_FLAGS_COMSTART_FIRST (syntax);
2391 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2392 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2393 INC_BOTH (from, from_byte);
2394 UPDATE_SYNTAX_TABLE_FORWARD (from);
2395 if (from < stop && comstart_first
2396 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2397 other_syntax = SYNTAX_WITH_FLAGS (c1),
2398 SYNTAX_FLAGS_COMSTART_SECOND (other_syntax)))
2399 {
2400 /* We have encountered a comment start sequence and we
2401 are ignoring all text inside comments. We must record
2402 the comment style this sequence begins so that later,
2403 only a comment end of the same style actually ends
2404 the comment section. */
2405 code = Scomment;
2406 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2407 comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2408 INC_BOTH (from, from_byte);
2409 UPDATE_SYNTAX_TABLE_FORWARD (from);
2410 }
2411 }
2412 while (code == Swhitespace || (code == Sendcomment && c == '\n'));
2413
2414 if (code == Scomment_fence)
2415 comstyle = ST_COMMENT_STYLE;
2416 else if (code != Scomment)
2417 {
2418 immediate_quit = 0;
2419 DEC_BOTH (from, from_byte);
2420 SET_PT_BOTH (from, from_byte);
2421 return Qnil;
2422 }
2423 /* We're at the start of a comment. */
2424 found = forw_comment (from, from_byte, stop, comnested, comstyle, 0,
2425 &out_charpos, &out_bytepos, &dummy);
2426 from = out_charpos; from_byte = out_bytepos;
2427 if (!found)
2428 {
2429 immediate_quit = 0;
2430 SET_PT_BOTH (from, from_byte);
2431 return Qnil;
2432 }
2433 INC_BOTH (from, from_byte);
2434 UPDATE_SYNTAX_TABLE_FORWARD (from);
2435 /* We have skipped one comment. */
2436 count1--;
2437 }
2438
2439 while (count1 < 0)
2440 {
2441 while (1)
2442 {
2443 bool quoted;
2444 int syntax;
2445
2446 if (from <= stop)
2447 {
2448 SET_PT_BOTH (BEGV, BEGV_BYTE);
2449 immediate_quit = 0;
2450 return Qnil;
2451 }
2452
2453 DEC_BOTH (from, from_byte);
2454 /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */
2455 quoted = char_quoted (from, from_byte);
2456 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2457 syntax = SYNTAX_WITH_FLAGS (c);
2458 code = SYNTAX (c);
2459 comstyle = 0;
2460 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2461 if (code == Sendcomment)
2462 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2463 if (from > stop && SYNTAX_FLAGS_COMEND_SECOND (syntax)
2464 && prev_char_comend_first (from, from_byte)
2465 && !char_quoted (from - 1, dec_bytepos (from_byte)))
2466 {
2467 int other_syntax;
2468 /* We must record the comment style encountered so that
2469 later, we can match only the proper comment begin
2470 sequence of the same style. */
2471 DEC_BOTH (from, from_byte);
2472 code = Sendcomment;
2473 /* Calling char_quoted, above, set up global syntax position
2474 at the new value of FROM. */
2475 c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2476 other_syntax = SYNTAX_WITH_FLAGS (c1);
2477 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2478 comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2479 }
2480
2481 if (code == Scomment_fence)
2482 {
2483 /* Skip until first preceding unquoted comment_fence. */
2484 bool fence_found = 0;
2485 ptrdiff_t ini = from, ini_byte = from_byte;
2486
2487 while (1)
2488 {
2489 DEC_BOTH (from, from_byte);
2490 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2491 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2492 if (SYNTAX (c) == Scomment_fence
2493 && !char_quoted (from, from_byte))
2494 {
2495 fence_found = 1;
2496 break;
2497 }
2498 else if (from == stop)
2499 break;
2500 }
2501 if (fence_found == 0)
2502 {
2503 from = ini; /* Set point to ini + 1. */
2504 from_byte = ini_byte;
2505 goto leave;
2506 }
2507 else
2508 /* We have skipped one comment. */
2509 break;
2510 }
2511 else if (code == Sendcomment)
2512 {
2513 found = back_comment (from, from_byte, stop, comnested, comstyle,
2514 &out_charpos, &out_bytepos);
2515 if (!found)
2516 {
2517 if (c == '\n')
2518 /* This end-of-line is not an end-of-comment.
2519 Treat it like a whitespace.
2520 CC-mode (and maybe others) relies on this behavior. */
2521 ;
2522 else
2523 {
2524 /* Failure: we should go back to the end of this
2525 not-quite-endcomment. */
2526 if (SYNTAX (c) != code)
2527 /* It was a two-char Sendcomment. */
2528 INC_BOTH (from, from_byte);
2529 goto leave;
2530 }
2531 }
2532 else
2533 {
2534 /* We have skipped one comment. */
2535 from = out_charpos, from_byte = out_bytepos;
2536 break;
2537 }
2538 }
2539 else if (code != Swhitespace || quoted)
2540 {
2541 leave:
2542 immediate_quit = 0;
2543 INC_BOTH (from, from_byte);
2544 SET_PT_BOTH (from, from_byte);
2545 return Qnil;
2546 }
2547 }
2548
2549 count1++;
2550 }
2551
2552 SET_PT_BOTH (from, from_byte);
2553 immediate_quit = 0;
2554 return Qt;
2555 }
2556 \f
2557 /* Return syntax code of character C if C is an ASCII character
2558 or if MULTIBYTE_SYMBOL_P is false. Otherwise, return Ssymbol. */
2559
2560 static enum syntaxcode
2561 syntax_multibyte (int c, bool multibyte_symbol_p)
2562 {
2563 return ASCII_CHAR_P (c) || !multibyte_symbol_p ? SYNTAX (c) : Ssymbol;
2564 }
2565
2566 static Lisp_Object
2567 scan_lists (EMACS_INT from, EMACS_INT count, EMACS_INT depth, bool sexpflag)
2568 {
2569 Lisp_Object val;
2570 ptrdiff_t stop = count > 0 ? ZV : BEGV;
2571 int c, c1;
2572 int stringterm;
2573 bool quoted;
2574 bool mathexit = 0;
2575 enum syntaxcode code;
2576 EMACS_INT min_depth = depth; /* Err out if depth gets less than this. */
2577 int comstyle = 0; /* style of comment encountered */
2578 bool comnested = 0; /* whether the comment is nestable or not */
2579 ptrdiff_t temp_pos;
2580 EMACS_INT last_good = from;
2581 bool found;
2582 ptrdiff_t from_byte;
2583 ptrdiff_t out_bytepos, out_charpos;
2584 EMACS_INT dummy;
2585 bool multibyte_symbol_p = sexpflag && multibyte_syntax_as_symbol;
2586
2587 if (depth > 0) min_depth = 0;
2588
2589 if (from > ZV) from = ZV;
2590 if (from < BEGV) from = BEGV;
2591
2592 from_byte = CHAR_TO_BYTE (from);
2593
2594 immediate_quit = 1;
2595 QUIT;
2596
2597 SETUP_SYNTAX_TABLE (from, count);
2598 while (count > 0)
2599 {
2600 while (from < stop)
2601 {
2602 bool comstart_first, prefix;
2603 int syntax, other_syntax;
2604 UPDATE_SYNTAX_TABLE_FORWARD (from);
2605 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2606 syntax = SYNTAX_WITH_FLAGS (c);
2607 code = syntax_multibyte (c, multibyte_symbol_p);
2608 comstart_first = SYNTAX_FLAGS_COMSTART_FIRST (syntax);
2609 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2610 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2611 prefix = SYNTAX_FLAGS_PREFIX (syntax);
2612 if (depth == min_depth)
2613 last_good = from;
2614 INC_BOTH (from, from_byte);
2615 UPDATE_SYNTAX_TABLE_FORWARD (from);
2616 if (from < stop && comstart_first
2617 && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2618 other_syntax = SYNTAX_WITH_FLAGS (c),
2619 SYNTAX_FLAGS_COMSTART_SECOND (other_syntax))
2620 && parse_sexp_ignore_comments)
2621 {
2622 /* we have encountered a comment start sequence and we
2623 are ignoring all text inside comments. We must record
2624 the comment style this sequence begins so that later,
2625 only a comment end of the same style actually ends
2626 the comment section */
2627 code = Scomment;
2628 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2629 comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2630 INC_BOTH (from, from_byte);
2631 UPDATE_SYNTAX_TABLE_FORWARD (from);
2632 }
2633
2634 if (prefix)
2635 continue;
2636
2637 switch (code)
2638 {
2639 case Sescape:
2640 case Scharquote:
2641 if (from == stop)
2642 goto lose;
2643 INC_BOTH (from, from_byte);
2644 /* treat following character as a word constituent */
2645 case Sword:
2646 case Ssymbol:
2647 if (depth || !sexpflag) break;
2648 /* This word counts as a sexp; return at end of it. */
2649 while (from < stop)
2650 {
2651 UPDATE_SYNTAX_TABLE_FORWARD (from);
2652
2653 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2654 switch (syntax_multibyte (c, multibyte_symbol_p))
2655 {
2656 case Scharquote:
2657 case Sescape:
2658 INC_BOTH (from, from_byte);
2659 if (from == stop)
2660 goto lose;
2661 break;
2662 case Sword:
2663 case Ssymbol:
2664 case Squote:
2665 break;
2666 default:
2667 goto done;
2668 }
2669 INC_BOTH (from, from_byte);
2670 }
2671 goto done;
2672
2673 case Scomment_fence:
2674 comstyle = ST_COMMENT_STYLE;
2675 /* FALLTHROUGH */
2676 case Scomment:
2677 if (!parse_sexp_ignore_comments) break;
2678 UPDATE_SYNTAX_TABLE_FORWARD (from);
2679 found = forw_comment (from, from_byte, stop,
2680 comnested, comstyle, 0,
2681 &out_charpos, &out_bytepos, &dummy);
2682 from = out_charpos, from_byte = out_bytepos;
2683 if (!found)
2684 {
2685 if (depth == 0)
2686 goto done;
2687 goto lose;
2688 }
2689 INC_BOTH (from, from_byte);
2690 UPDATE_SYNTAX_TABLE_FORWARD (from);
2691 break;
2692
2693 case Smath:
2694 if (!sexpflag)
2695 break;
2696 if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (from_byte))
2697 {
2698 INC_BOTH (from, from_byte);
2699 }
2700 if (mathexit)
2701 {
2702 mathexit = 0;
2703 goto close1;
2704 }
2705 mathexit = 1;
2706
2707 case Sopen:
2708 if (!++depth) goto done;
2709 break;
2710
2711 case Sclose:
2712 close1:
2713 if (!--depth) goto done;
2714 if (depth < min_depth)
2715 xsignal3 (Qscan_error,
2716 build_string ("Containing expression ends prematurely"),
2717 make_number (last_good), make_number (from));
2718 break;
2719
2720 case Sstring:
2721 case Sstring_fence:
2722 temp_pos = dec_bytepos (from_byte);
2723 stringterm = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2724 while (1)
2725 {
2726 enum syntaxcode c_code;
2727 if (from >= stop)
2728 goto lose;
2729 UPDATE_SYNTAX_TABLE_FORWARD (from);
2730 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2731 c_code = syntax_multibyte (c, multibyte_symbol_p);
2732 if (code == Sstring
2733 ? c == stringterm && c_code == Sstring
2734 : c_code == Sstring_fence)
2735 break;
2736
2737 switch (c_code)
2738 {
2739 case Scharquote:
2740 case Sescape:
2741 INC_BOTH (from, from_byte);
2742 }
2743 INC_BOTH (from, from_byte);
2744 }
2745 INC_BOTH (from, from_byte);
2746 if (!depth && sexpflag) goto done;
2747 break;
2748 default:
2749 /* Ignore whitespace, punctuation, quote, endcomment. */
2750 break;
2751 }
2752 }
2753
2754 /* Reached end of buffer. Error if within object, return nil if between */
2755 if (depth)
2756 goto lose;
2757
2758 immediate_quit = 0;
2759 return Qnil;
2760
2761 /* End of object reached */
2762 done:
2763 count--;
2764 }
2765
2766 while (count < 0)
2767 {
2768 while (from > stop)
2769 {
2770 int syntax;
2771 DEC_BOTH (from, from_byte);
2772 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2773 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2774 syntax= SYNTAX_WITH_FLAGS (c);
2775 code = syntax_multibyte (c, multibyte_symbol_p);
2776 if (depth == min_depth)
2777 last_good = from;
2778 comstyle = 0;
2779 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2780 if (code == Sendcomment)
2781 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2782 if (from > stop && SYNTAX_FLAGS_COMEND_SECOND (syntax)
2783 && prev_char_comend_first (from, from_byte)
2784 && parse_sexp_ignore_comments)
2785 {
2786 /* We must record the comment style encountered so that
2787 later, we can match only the proper comment begin
2788 sequence of the same style. */
2789 int c2, other_syntax;
2790 DEC_BOTH (from, from_byte);
2791 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2792 code = Sendcomment;
2793 c2 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2794 other_syntax = SYNTAX_WITH_FLAGS (c2);
2795 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2796 comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2797 }
2798
2799 /* Quoting turns anything except a comment-ender
2800 into a word character. Note that this cannot be true
2801 if we decremented FROM in the if-statement above. */
2802 if (code != Sendcomment && char_quoted (from, from_byte))
2803 {
2804 DEC_BOTH (from, from_byte);
2805 code = Sword;
2806 }
2807 else if (SYNTAX_FLAGS_PREFIX (syntax))
2808 continue;
2809
2810 switch (code)
2811 {
2812 case Sword:
2813 case Ssymbol:
2814 case Sescape:
2815 case Scharquote:
2816 if (depth || !sexpflag) break;
2817 /* This word counts as a sexp; count object finished
2818 after passing it. */
2819 while (from > stop)
2820 {
2821 temp_pos = from_byte;
2822 if (! NILP (BVAR (current_buffer, enable_multibyte_characters)))
2823 DEC_POS (temp_pos);
2824 else
2825 temp_pos--;
2826 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2827 c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2828 /* Don't allow comment-end to be quoted. */
2829 if (syntax_multibyte (c1, multibyte_symbol_p) == Sendcomment)
2830 goto done2;
2831 quoted = char_quoted (from - 1, temp_pos);
2832 if (quoted)
2833 {
2834 DEC_BOTH (from, from_byte);
2835 temp_pos = dec_bytepos (temp_pos);
2836 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2837 }
2838 c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2839 if (! quoted)
2840 switch (syntax_multibyte (c1, multibyte_symbol_p))
2841 {
2842 case Sword: case Ssymbol: case Squote: break;
2843 default: goto done2;
2844 }
2845 DEC_BOTH (from, from_byte);
2846 }
2847 goto done2;
2848
2849 case Smath:
2850 if (!sexpflag)
2851 break;
2852 temp_pos = dec_bytepos (from_byte);
2853 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2854 if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (temp_pos))
2855 DEC_BOTH (from, from_byte);
2856 if (mathexit)
2857 {
2858 mathexit = 0;
2859 goto open2;
2860 }
2861 mathexit = 1;
2862
2863 case Sclose:
2864 if (!++depth) goto done2;
2865 break;
2866
2867 case Sopen:
2868 open2:
2869 if (!--depth) goto done2;
2870 if (depth < min_depth)
2871 xsignal3 (Qscan_error,
2872 build_string ("Containing expression ends prematurely"),
2873 make_number (last_good), make_number (from));
2874 break;
2875
2876 case Sendcomment:
2877 if (!parse_sexp_ignore_comments)
2878 break;
2879 found = back_comment (from, from_byte, stop, comnested, comstyle,
2880 &out_charpos, &out_bytepos);
2881 /* FIXME: if !found, it really wasn't a comment-end.
2882 For single-char Sendcomment, we can't do much about it apart
2883 from skipping the char.
2884 For 2-char endcomments, we could try again, taking both
2885 chars as separate entities, but it's a lot of trouble
2886 for very little gain, so we don't bother either. -sm */
2887 if (found)
2888 from = out_charpos, from_byte = out_bytepos;
2889 break;
2890
2891 case Scomment_fence:
2892 case Sstring_fence:
2893 while (1)
2894 {
2895 if (from == stop)
2896 goto lose;
2897 DEC_BOTH (from, from_byte);
2898 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2899 if (!char_quoted (from, from_byte))
2900 {
2901 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2902 if (syntax_multibyte (c, multibyte_symbol_p) == code)
2903 break;
2904 }
2905 }
2906 if (code == Sstring_fence && !depth && sexpflag) goto done2;
2907 break;
2908
2909 case Sstring:
2910 stringterm = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2911 while (1)
2912 {
2913 if (from == stop)
2914 goto lose;
2915 DEC_BOTH (from, from_byte);
2916 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2917 if (!char_quoted (from, from_byte))
2918 {
2919 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2920 if (c == stringterm
2921 && (syntax_multibyte (c, multibyte_symbol_p)
2922 == Sstring))
2923 break;
2924 }
2925 }
2926 if (!depth && sexpflag) goto done2;
2927 break;
2928 default:
2929 /* Ignore whitespace, punctuation, quote, endcomment. */
2930 break;
2931 }
2932 }
2933
2934 /* Reached start of buffer. Error if within object, return nil if between */
2935 if (depth)
2936 goto lose;
2937
2938 immediate_quit = 0;
2939 return Qnil;
2940
2941 done2:
2942 count++;
2943 }
2944
2945
2946 immediate_quit = 0;
2947 XSETFASTINT (val, from);
2948 return val;
2949
2950 lose:
2951 xsignal3 (Qscan_error,
2952 build_string ("Unbalanced parentheses"),
2953 make_number (last_good), make_number (from));
2954 }
2955
2956 DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
2957 doc: /* Scan from character number FROM by COUNT lists.
2958 Scan forward if COUNT is positive, backward if COUNT is negative.
2959 Return the character number of the position thus found.
2960
2961 A \"list", in this context, refers to a balanced parenthetical
2962 grouping, as determined by the syntax table.
2963
2964 If DEPTH is nonzero, treat that as the nesting depth of the starting
2965 point (i.e. the starting point is DEPTH parentheses deep). This
2966 function scans over parentheses until the depth goes to zero COUNT
2967 times. Hence, positive DEPTH moves out that number of levels of
2968 parentheses, while negative DEPTH moves to a deeper level.
2969
2970 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2971
2972 If we reach the beginning or end of the accessible part of the buffer
2973 before we have scanned over COUNT lists, return nil if the depth at
2974 that point is zero, and signal a error if the depth is nonzero. */)
2975 (Lisp_Object from, Lisp_Object count, Lisp_Object depth)
2976 {
2977 CHECK_NUMBER (from);
2978 CHECK_NUMBER (count);
2979 CHECK_NUMBER (depth);
2980
2981 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
2982 }
2983
2984 DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
2985 doc: /* Scan from character number FROM by COUNT balanced expressions.
2986 If COUNT is negative, scan backwards.
2987 Returns the character number of the position thus found.
2988
2989 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2990
2991 If the beginning or end of (the accessible part of) the buffer is reached
2992 in the middle of a parenthetical grouping, an error is signaled.
2993 If the beginning or end is reached between groupings
2994 but before count is used up, nil is returned. */)
2995 (Lisp_Object from, Lisp_Object count)
2996 {
2997 CHECK_NUMBER (from);
2998 CHECK_NUMBER (count);
2999
3000 return scan_lists (XINT (from), XINT (count), 0, 1);
3001 }
3002
3003 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
3004 0, 0, 0,
3005 doc: /* Move point backward over any number of chars with prefix syntax.
3006 This includes chars with "quote" or "prefix" syntax (' or p). */)
3007 (void)
3008 {
3009 ptrdiff_t beg = BEGV;
3010 ptrdiff_t opoint = PT;
3011 ptrdiff_t opoint_byte = PT_BYTE;
3012 ptrdiff_t pos = PT;
3013 ptrdiff_t pos_byte = PT_BYTE;
3014 int c;
3015
3016 if (pos <= beg)
3017 {
3018 SET_PT_BOTH (opoint, opoint_byte);
3019
3020 return Qnil;
3021 }
3022
3023 SETUP_SYNTAX_TABLE (pos, -1);
3024
3025 DEC_BOTH (pos, pos_byte);
3026
3027 while (!char_quoted (pos, pos_byte)
3028 /* Previous statement updates syntax table. */
3029 && ((c = FETCH_CHAR_AS_MULTIBYTE (pos_byte), SYNTAX (c) == Squote)
3030 || syntax_prefix_flag_p (c)))
3031 {
3032 opoint = pos;
3033 opoint_byte = pos_byte;
3034
3035 if (pos + 1 > beg)
3036 DEC_BOTH (pos, pos_byte);
3037 }
3038
3039 SET_PT_BOTH (opoint, opoint_byte);
3040
3041 return Qnil;
3042 }
3043 \f
3044 /* Parse forward from FROM / FROM_BYTE to END,
3045 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
3046 and return a description of the state of the parse at END.
3047 If STOPBEFORE, stop at the start of an atom.
3048 If COMMENTSTOP is 1, stop at the start of a comment.
3049 If COMMENTSTOP is -1, stop at the start or end of a comment,
3050 after the beginning of a string, or after the end of a string. */
3051
3052 static void
3053 scan_sexps_forward (struct lisp_parse_state *stateptr,
3054 ptrdiff_t from, ptrdiff_t from_byte, ptrdiff_t end,
3055 EMACS_INT targetdepth, bool stopbefore,
3056 Lisp_Object oldstate, int commentstop)
3057 {
3058 struct lisp_parse_state state;
3059 enum syntaxcode code;
3060 int c1;
3061 bool comnested;
3062 struct level { ptrdiff_t last, prev; };
3063 struct level levelstart[100];
3064 struct level *curlevel = levelstart;
3065 struct level *endlevel = levelstart + 100;
3066 EMACS_INT depth; /* Paren depth of current scanning location.
3067 level - levelstart equals this except
3068 when the depth becomes negative. */
3069 EMACS_INT mindepth; /* Lowest DEPTH value seen. */
3070 bool start_quoted = 0; /* True means starting after a char quote. */
3071 Lisp_Object tem;
3072 ptrdiff_t prev_from; /* Keep one character before FROM. */
3073 ptrdiff_t prev_from_byte;
3074 int prev_from_syntax;
3075 bool boundary_stop = commentstop == -1;
3076 bool nofence;
3077 bool found;
3078 ptrdiff_t out_bytepos, out_charpos;
3079 int temp;
3080
3081 prev_from = from;
3082 prev_from_byte = from_byte;
3083 if (from != BEGV)
3084 DEC_BOTH (prev_from, prev_from_byte);
3085
3086 /* Use this macro instead of `from++'. */
3087 #define INC_FROM \
3088 do { prev_from = from; \
3089 prev_from_byte = from_byte; \
3090 temp = FETCH_CHAR_AS_MULTIBYTE (prev_from_byte); \
3091 prev_from_syntax = SYNTAX_WITH_FLAGS (temp); \
3092 INC_BOTH (from, from_byte); \
3093 if (from < end) \
3094 UPDATE_SYNTAX_TABLE_FORWARD (from); \
3095 } while (0)
3096
3097 immediate_quit = 1;
3098 QUIT;
3099
3100 if (NILP (oldstate))
3101 {
3102 depth = 0;
3103 state.instring = -1;
3104 state.incomment = 0;
3105 state.comstyle = 0; /* comment style a by default. */
3106 state.comstr_start = -1; /* no comment/string seen. */
3107 }
3108 else
3109 {
3110 tem = Fcar (oldstate);
3111 if (!NILP (tem))
3112 depth = XINT (tem);
3113 else
3114 depth = 0;
3115
3116 oldstate = Fcdr (oldstate);
3117 oldstate = Fcdr (oldstate);
3118 oldstate = Fcdr (oldstate);
3119 tem = Fcar (oldstate);
3120 /* Check whether we are inside string_fence-style string: */
3121 state.instring = (!NILP (tem)
3122 ? (CHARACTERP (tem) ? XFASTINT (tem) : ST_STRING_STYLE)
3123 : -1);
3124
3125 oldstate = Fcdr (oldstate);
3126 tem = Fcar (oldstate);
3127 state.incomment = (!NILP (tem)
3128 ? (INTEGERP (tem) ? XINT (tem) : -1)
3129 : 0);
3130
3131 oldstate = Fcdr (oldstate);
3132 tem = Fcar (oldstate);
3133 start_quoted = !NILP (tem);
3134
3135 /* if the eighth element of the list is nil, we are in comment
3136 style a. If it is non-nil, we are in comment style b */
3137 oldstate = Fcdr (oldstate);
3138 oldstate = Fcdr (oldstate);
3139 tem = Fcar (oldstate);
3140 state.comstyle = (NILP (tem)
3141 ? 0
3142 : (RANGED_INTEGERP (0, tem, ST_COMMENT_STYLE)
3143 ? XINT (tem)
3144 : ST_COMMENT_STYLE));
3145
3146 oldstate = Fcdr (oldstate);
3147 tem = Fcar (oldstate);
3148 state.comstr_start =
3149 RANGED_INTEGERP (PTRDIFF_MIN, tem, PTRDIFF_MAX) ? XINT (tem) : -1;
3150 oldstate = Fcdr (oldstate);
3151 tem = Fcar (oldstate);
3152 while (!NILP (tem)) /* >= second enclosing sexps. */
3153 {
3154 Lisp_Object temhd = Fcar (tem);
3155 if (RANGED_INTEGERP (PTRDIFF_MIN, temhd, PTRDIFF_MAX))
3156 curlevel->last = XINT (temhd);
3157 if (++curlevel == endlevel)
3158 curlevel--; /* error ("Nesting too deep for parser"); */
3159 curlevel->prev = -1;
3160 curlevel->last = -1;
3161 tem = Fcdr (tem);
3162 }
3163 }
3164 state.quoted = 0;
3165 mindepth = depth;
3166
3167 curlevel->prev = -1;
3168 curlevel->last = -1;
3169
3170 SETUP_SYNTAX_TABLE (prev_from, 1);
3171 temp = FETCH_CHAR (prev_from_byte);
3172 prev_from_syntax = SYNTAX_WITH_FLAGS (temp);
3173 UPDATE_SYNTAX_TABLE_FORWARD (from);
3174
3175 /* Enter the loop at a place appropriate for initial state. */
3176
3177 if (state.incomment)
3178 goto startincomment;
3179 if (state.instring >= 0)
3180 {
3181 nofence = state.instring != ST_STRING_STYLE;
3182 if (start_quoted)
3183 goto startquotedinstring;
3184 goto startinstring;
3185 }
3186 else if (start_quoted)
3187 goto startquoted;
3188
3189 while (from < end)
3190 {
3191 int syntax;
3192 INC_FROM;
3193 code = prev_from_syntax & 0xff;
3194
3195 if (from < end
3196 && SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax)
3197 && (c1 = FETCH_CHAR (from_byte),
3198 syntax = SYNTAX_WITH_FLAGS (c1),
3199 SYNTAX_FLAGS_COMSTART_SECOND (syntax)))
3200 /* Duplicate code to avoid a complex if-expression
3201 which causes trouble for the SGI compiler. */
3202 {
3203 /* Record the comment style we have entered so that only
3204 the comment-end sequence of the same style actually
3205 terminates the comment section. */
3206 state.comstyle
3207 = SYNTAX_FLAGS_COMMENT_STYLE (syntax, prev_from_syntax);
3208 comnested = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax)
3209 | SYNTAX_FLAGS_COMMENT_NESTED (syntax));
3210 state.incomment = comnested ? 1 : -1;
3211 state.comstr_start = prev_from;
3212 INC_FROM;
3213 code = Scomment;
3214 }
3215 else if (code == Scomment_fence)
3216 {
3217 /* Record the comment style we have entered so that only
3218 the comment-end sequence of the same style actually
3219 terminates the comment section. */
3220 state.comstyle = ST_COMMENT_STYLE;
3221 state.incomment = -1;
3222 state.comstr_start = prev_from;
3223 code = Scomment;
3224 }
3225 else if (code == Scomment)
3226 {
3227 state.comstyle = SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax, 0);
3228 state.incomment = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax) ?
3229 1 : -1);
3230 state.comstr_start = prev_from;
3231 }
3232
3233 if (SYNTAX_FLAGS_PREFIX (prev_from_syntax))
3234 continue;
3235 switch (code)
3236 {
3237 case Sescape:
3238 case Scharquote:
3239 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3240 curlevel->last = prev_from;
3241 startquoted:
3242 if (from == end) goto endquoted;
3243 INC_FROM;
3244 goto symstarted;
3245 /* treat following character as a word constituent */
3246 case Sword:
3247 case Ssymbol:
3248 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3249 curlevel->last = prev_from;
3250 symstarted:
3251 while (from < end)
3252 {
3253 int symchar = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3254 switch (SYNTAX (symchar))
3255 {
3256 case Scharquote:
3257 case Sescape:
3258 INC_FROM;
3259 if (from == end) goto endquoted;
3260 break;
3261 case Sword:
3262 case Ssymbol:
3263 case Squote:
3264 break;
3265 default:
3266 goto symdone;
3267 }
3268 INC_FROM;
3269 }
3270 symdone:
3271 curlevel->prev = curlevel->last;
3272 break;
3273
3274 case Scomment_fence: /* Can't happen because it's handled above. */
3275 case Scomment:
3276 if (commentstop || boundary_stop) goto done;
3277 startincomment:
3278 /* The (from == BEGV) test was to enter the loop in the middle so
3279 that we find a 2-char comment ender even if we start in the
3280 middle of it. We don't want to do that if we're just at the
3281 beginning of the comment (think of (*) ... (*)). */
3282 found = forw_comment (from, from_byte, end,
3283 state.incomment, state.comstyle,
3284 (from == BEGV || from < state.comstr_start + 3)
3285 ? 0 : prev_from_syntax,
3286 &out_charpos, &out_bytepos, &state.incomment);
3287 from = out_charpos; from_byte = out_bytepos;
3288 /* Beware! prev_from and friends are invalid now.
3289 Luckily, the `done' doesn't use them and the INC_FROM
3290 sets them to a sane value without looking at them. */
3291 if (!found) goto done;
3292 INC_FROM;
3293 state.incomment = 0;
3294 state.comstyle = 0; /* reset the comment style */
3295 if (boundary_stop) goto done;
3296 break;
3297
3298 case Sopen:
3299 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3300 depth++;
3301 /* curlevel++->last ran into compiler bug on Apollo */
3302 curlevel->last = prev_from;
3303 if (++curlevel == endlevel)
3304 curlevel--; /* error ("Nesting too deep for parser"); */
3305 curlevel->prev = -1;
3306 curlevel->last = -1;
3307 if (targetdepth == depth) goto done;
3308 break;
3309
3310 case Sclose:
3311 depth--;
3312 if (depth < mindepth)
3313 mindepth = depth;
3314 if (curlevel != levelstart)
3315 curlevel--;
3316 curlevel->prev = curlevel->last;
3317 if (targetdepth == depth) goto done;
3318 break;
3319
3320 case Sstring:
3321 case Sstring_fence:
3322 state.comstr_start = from - 1;
3323 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3324 curlevel->last = prev_from;
3325 state.instring = (code == Sstring
3326 ? (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte))
3327 : ST_STRING_STYLE);
3328 if (boundary_stop) goto done;
3329 startinstring:
3330 {
3331 nofence = state.instring != ST_STRING_STYLE;
3332
3333 while (1)
3334 {
3335 int c;
3336 enum syntaxcode c_code;
3337
3338 if (from >= end) goto done;
3339 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3340 c_code = SYNTAX (c);
3341
3342 /* Check C_CODE here so that if the char has
3343 a syntax-table property which says it is NOT
3344 a string character, it does not end the string. */
3345 if (nofence && c == state.instring && c_code == Sstring)
3346 break;
3347
3348 switch (c_code)
3349 {
3350 case Sstring_fence:
3351 if (!nofence) goto string_end;
3352 break;
3353 case Scharquote:
3354 case Sescape:
3355 INC_FROM;
3356 startquotedinstring:
3357 if (from >= end) goto endquoted;
3358 }
3359 INC_FROM;
3360 }
3361 }
3362 string_end:
3363 state.instring = -1;
3364 curlevel->prev = curlevel->last;
3365 INC_FROM;
3366 if (boundary_stop) goto done;
3367 break;
3368
3369 case Smath:
3370 /* FIXME: We should do something with it. */
3371 break;
3372 default:
3373 /* Ignore whitespace, punctuation, quote, endcomment. */
3374 break;
3375 }
3376 }
3377 goto done;
3378
3379 stop: /* Here if stopping before start of sexp. */
3380 from = prev_from; /* We have just fetched the char that starts it; */
3381 from_byte = prev_from_byte;
3382 goto done; /* but return the position before it. */
3383
3384 endquoted:
3385 state.quoted = 1;
3386 done:
3387 state.depth = depth;
3388 state.mindepth = mindepth;
3389 state.thislevelstart = curlevel->prev;
3390 state.prevlevelstart
3391 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
3392 state.location = from;
3393 state.location_byte = from_byte;
3394 state.levelstarts = Qnil;
3395 while (curlevel > levelstart)
3396 state.levelstarts = Fcons (make_number ((--curlevel)->last),
3397 state.levelstarts);
3398 immediate_quit = 0;
3399
3400 *stateptr = state;
3401 }
3402
3403 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
3404 doc: /* Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
3405 Parsing stops at TO or when certain criteria are met;
3406 point is set to where parsing stops.
3407 If fifth arg OLDSTATE is omitted or nil,
3408 parsing assumes that FROM is the beginning of a function.
3409 Value is a list of elements describing final state of parsing:
3410 0. depth in parens.
3411 1. character address of start of innermost containing list; nil if none.
3412 2. character address of start of last complete sexp terminated.
3413 3. non-nil if inside a string.
3414 (it is the character that will terminate the string,
3415 or t if the string should be terminated by a generic string delimiter.)
3416 4. nil if outside a comment, t if inside a non-nestable comment,
3417 else an integer (the current comment nesting).
3418 5. t if following a quote character.
3419 6. the minimum paren-depth encountered during this scan.
3420 7. style of comment, if any.
3421 8. character address of start of comment or string; nil if not in one.
3422 9. Intermediate data for continuation of parsing (subject to change).
3423 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
3424 in parentheses becomes equal to TARGETDEPTH.
3425 Fourth arg STOPBEFORE non-nil means stop when come to
3426 any character that starts a sexp.
3427 Fifth arg OLDSTATE is a list like what this function returns.
3428 It is used to initialize the state of the parse. Elements number 1, 2, 6
3429 are ignored.
3430 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.
3431 If it is symbol `syntax-table', stop after the start of a comment or a
3432 string, or after end of a comment or a string. */)
3433 (Lisp_Object from, Lisp_Object to, Lisp_Object targetdepth,
3434 Lisp_Object stopbefore, Lisp_Object oldstate, Lisp_Object commentstop)
3435 {
3436 struct lisp_parse_state state;
3437 EMACS_INT target;
3438
3439 if (!NILP (targetdepth))
3440 {
3441 CHECK_NUMBER (targetdepth);
3442 target = XINT (targetdepth);
3443 }
3444 else
3445 target = TYPE_MINIMUM (EMACS_INT); /* We won't reach this depth */
3446
3447 validate_region (&from, &to);
3448 scan_sexps_forward (&state, XINT (from), CHAR_TO_BYTE (XINT (from)),
3449 XINT (to),
3450 target, !NILP (stopbefore), oldstate,
3451 (NILP (commentstop)
3452 ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
3453
3454 SET_PT_BOTH (state.location, state.location_byte);
3455
3456 return Fcons (make_number (state.depth),
3457 Fcons (state.prevlevelstart < 0
3458 ? Qnil : make_number (state.prevlevelstart),
3459 Fcons (state.thislevelstart < 0
3460 ? Qnil : make_number (state.thislevelstart),
3461 Fcons (state.instring >= 0
3462 ? (state.instring == ST_STRING_STYLE
3463 ? Qt : make_number (state.instring)) : Qnil,
3464 Fcons (state.incomment < 0 ? Qt :
3465 (state.incomment == 0 ? Qnil :
3466 make_number (state.incomment)),
3467 Fcons (state.quoted ? Qt : Qnil,
3468 Fcons (make_number (state.mindepth),
3469 Fcons ((state.comstyle
3470 ? (state.comstyle == ST_COMMENT_STYLE
3471 ? Qsyntax_table
3472 : make_number (state.comstyle))
3473 : Qnil),
3474 Fcons (((state.incomment
3475 || (state.instring >= 0))
3476 ? make_number (state.comstr_start)
3477 : Qnil),
3478 Fcons (state.levelstarts, Qnil))))))))));
3479 }
3480 \f
3481 void
3482 init_syntax_once (void)
3483 {
3484 register int i, c;
3485 Lisp_Object temp;
3486
3487 /* This has to be done here, before we call Fmake_char_table. */
3488 DEFSYM (Qsyntax_table, "syntax-table");
3489
3490 /* This variable is DEFSYMed in alloc.c and not initialized yet, so
3491 intern it here. NOTE: you must guarantee that init_syntax_once
3492 is called before all other users of this variable. */
3493 Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
3494
3495 /* Create objects which can be shared among syntax tables. */
3496 Vsyntax_code_object = make_uninit_vector (Smax);
3497 for (i = 0; i < Smax; i++)
3498 ASET (Vsyntax_code_object, i, Fcons (make_number (i), Qnil));
3499
3500 /* Now we are ready to set up this property, so we can
3501 create syntax tables. */
3502 Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
3503
3504 temp = AREF (Vsyntax_code_object, Swhitespace);
3505
3506 Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
3507
3508 /* Control characters should not be whitespace. */
3509 temp = AREF (Vsyntax_code_object, Spunct);
3510 for (i = 0; i <= ' ' - 1; i++)
3511 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3512 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 0177, temp);
3513
3514 /* Except that a few really are whitespace. */
3515 temp = AREF (Vsyntax_code_object, Swhitespace);
3516 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ' ', temp);
3517 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\t', temp);
3518 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\n', temp);
3519 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 015, temp);
3520 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 014, temp);
3521
3522 temp = AREF (Vsyntax_code_object, Sword);
3523 for (i = 'a'; i <= 'z'; i++)
3524 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3525 for (i = 'A'; i <= 'Z'; i++)
3526 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3527 for (i = '0'; i <= '9'; i++)
3528 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3529
3530 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
3531 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
3532
3533 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
3534 Fcons (make_number (Sopen), make_number (')')));
3535 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
3536 Fcons (make_number (Sclose), make_number ('(')));
3537 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
3538 Fcons (make_number (Sopen), make_number (']')));
3539 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
3540 Fcons (make_number (Sclose), make_number ('[')));
3541 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
3542 Fcons (make_number (Sopen), make_number ('}')));
3543 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
3544 Fcons (make_number (Sclose), make_number ('{')));
3545 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
3546 Fcons (make_number (Sstring), Qnil));
3547 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
3548 Fcons (make_number (Sescape), Qnil));
3549
3550 temp = AREF (Vsyntax_code_object, Ssymbol);
3551 for (i = 0; i < 10; i++)
3552 {
3553 c = "_-+*/&|<>="[i];
3554 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3555 }
3556
3557 temp = AREF (Vsyntax_code_object, Spunct);
3558 for (i = 0; i < 12; i++)
3559 {
3560 c = ".,;:?!#@~^'`"[i];
3561 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3562 }
3563
3564 /* All multibyte characters have syntax `word' by default. */
3565 temp = AREF (Vsyntax_code_object, Sword);
3566 char_table_set_range (Vstandard_syntax_table, 0x80, MAX_CHAR, temp);
3567 }
3568
3569 void
3570 syms_of_syntax (void)
3571 {
3572 DEFSYM (Qsyntax_table_p, "syntax-table-p");
3573
3574 staticpro (&Vsyntax_code_object);
3575
3576 staticpro (&gl_state.object);
3577 staticpro (&gl_state.global_code);
3578 staticpro (&gl_state.current_syntax_table);
3579 staticpro (&gl_state.old_prop);
3580
3581 /* Defined in regex.c */
3582 staticpro (&re_match_object);
3583
3584 DEFSYM (Qscan_error, "scan-error");
3585 Fput (Qscan_error, Qerror_conditions,
3586 listn (CONSTYPE_PURE, 2, Qscan_error, Qerror));
3587 Fput (Qscan_error, Qerror_message,
3588 build_pure_c_string ("Scan error"));
3589
3590 DEFVAR_BOOL ("parse-sexp-ignore-comments", parse_sexp_ignore_comments,
3591 doc: /* Non-nil means `forward-sexp', etc., should treat comments as whitespace. */);
3592
3593 DEFVAR_BOOL ("parse-sexp-lookup-properties", parse_sexp_lookup_properties,
3594 doc: /* Non-nil means `forward-sexp', etc., obey `syntax-table' property.
3595 Otherwise, that text property is simply ignored.
3596 See the info node `(elisp)Syntax Properties' for a description of the
3597 `syntax-table' property. */);
3598
3599 words_include_escapes = 0;
3600 DEFVAR_BOOL ("words-include-escapes", words_include_escapes,
3601 doc: /* Non-nil means `forward-word', etc., should treat escape chars part of words. */);
3602
3603 DEFVAR_BOOL ("multibyte-syntax-as-symbol", multibyte_syntax_as_symbol,
3604 doc: /* Non-nil means `scan-sexps' treats all multibyte characters as symbol. */);
3605 multibyte_syntax_as_symbol = 0;
3606
3607 DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
3608 open_paren_in_column_0_is_defun_start,
3609 doc: /* Non-nil means an open paren in column 0 denotes the start of a defun. */);
3610 open_paren_in_column_0_is_defun_start = 1;
3611
3612
3613 DEFVAR_LISP ("find-word-boundary-function-table",
3614 Vfind_word_boundary_function_table,
3615 doc: /*
3616 Char table of functions to search for the word boundary.
3617 Each function is called with two arguments; POS and LIMIT.
3618 POS and LIMIT are character positions in the current buffer.
3619
3620 If POS is less than LIMIT, POS is at the first character of a word,
3621 and the return value of a function is a position after the last
3622 character of that word.
3623
3624 If POS is not less than LIMIT, POS is at the last character of a word,
3625 and the return value of a function is a position at the first
3626 character of that word.
3627
3628 In both cases, LIMIT bounds the search. */);
3629 Vfind_word_boundary_function_table = Fmake_char_table (Qnil, Qnil);
3630
3631 defsubr (&Ssyntax_table_p);
3632 defsubr (&Ssyntax_table);
3633 defsubr (&Sstandard_syntax_table);
3634 defsubr (&Scopy_syntax_table);
3635 defsubr (&Sset_syntax_table);
3636 defsubr (&Schar_syntax);
3637 defsubr (&Smatching_paren);
3638 defsubr (&Sstring_to_syntax);
3639 defsubr (&Smodify_syntax_entry);
3640 defsubr (&Sinternal_describe_syntax_value);
3641
3642 defsubr (&Sforward_word);
3643
3644 defsubr (&Sskip_chars_forward);
3645 defsubr (&Sskip_chars_backward);
3646 defsubr (&Sskip_syntax_forward);
3647 defsubr (&Sskip_syntax_backward);
3648
3649 defsubr (&Sforward_comment);
3650 defsubr (&Sscan_lists);
3651 defsubr (&Sscan_sexps);
3652 defsubr (&Sbackward_prefix_chars);
3653 defsubr (&Sparse_partial_sexp);
3654 }