Update copyright year.
[bpt/emacs.git] / src / syntax.c
1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985, 87, 93, 94, 95, 1997 Free Software Foundation, Inc.
3
4 This file is part of GNU Emacs.
5
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
20
21
22 #include <config.h>
23 #include <ctype.h>
24 #include "lisp.h"
25 #include "commands.h"
26 #include "buffer.h"
27 #include "charset.h"
28
29 /* Make syntax table lookup grant data in gl_state. */
30 #define SYNTAX_ENTRY_VIA_PROPERTY
31
32 #include "syntax.h"
33 #include "intervals.h"
34
35 /* We use these constants in place for comment-style and
36 string-ender-char to distinguish comments/strings started by
37 comment_fence and string_fence codes. */
38
39 #define ST_COMMENT_STYLE (256 + 1)
40 #define ST_STRING_STYLE (256 + 2)
41 #include "category.h"
42
43 Lisp_Object Qsyntax_table_p, Qsyntax_table, Qscan_error;
44
45 static void scan_sexps_forward ();
46 static int char_quoted ();
47
48 int words_include_escapes;
49 int parse_sexp_lookup_properties;
50
51 /* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
52 if not compiled with GCC. No need to mark it, since it is used
53 only very temporarily. */
54 Lisp_Object syntax_temp;
55
56 /* This is the internal form of the parse state used in parse-partial-sexp. */
57
58 struct lisp_parse_state
59 {
60 int depth; /* Depth at end of parsing. */
61 int instring; /* -1 if not within string, else desired terminator. */
62 int incomment; /* Nonzero if within a comment at end of parsing. */
63 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
64 int quoted; /* Nonzero if just after an escape char at end of parsing */
65 int thislevelstart; /* Char number of most recent start-of-expression at current level */
66 int prevlevelstart; /* Char number of start of containing expression */
67 int location; /* Char number at which parsing stopped. */
68 int mindepth; /* Minimum depth seen while scanning. */
69 int comstr_start; /* Position just after last comment/string starter. */
70 };
71 \f
72 /* These variables are a cache for finding the start of a defun.
73 find_start_pos is the place for which the defun start was found.
74 find_start_value is the defun start position found for it.
75 find_start_buffer is the buffer it was found in.
76 find_start_begv is the BEGV value when it was found.
77 find_start_modiff is the value of MODIFF when it was found. */
78
79 static int find_start_pos;
80 static int find_start_value;
81 static struct buffer *find_start_buffer;
82 static int find_start_begv;
83 static int find_start_modiff;
84 \f
85
86 struct gl_state_s gl_state; /* Global state of syntax parser. */
87
88 INTERVAL interval_of ();
89 #define INTERVALS_AT_ONCE 10 /* 1 + max-number of intervals
90 to scan to property-change. */
91
92 /*
93 Update gl_state to an appropriate interval which contains POS. The
94 sign of COUNT give the relative position of POS wrt the previously
95 valid interval. If INIT, only [be]_property fields of gl_state are
96 valid at start, the rest is filled basing on OBJECT.
97
98 `gl_state.*_i' are the intervals, and pos is further in the search
99 direction than the intervals - or in an interval. We update the
100 current syntax-table basing on the property of this interval, and
101 update the interval to start further than POS - or be
102 NULL_INTERVAL. We also update lim_property to be the next value of
103 pos to call this subroutine again - or be before/after the
104 start/end of OBJECT. */
105
106 void
107 update_syntax_table (pos, count, init, object)
108 int pos, count, init;
109 Lisp_Object object;
110 {
111 Lisp_Object tmp_table;
112 int cnt = 0, doing_extra = 0, invalidate = 1;
113 INTERVAL i, oldi;
114
115 if (init)
116 {
117 gl_state.start = gl_state.b_property;
118 gl_state.stop = gl_state.e_property;
119 gl_state.forward_i = interval_of (pos, object);
120 i = gl_state.backward_i = gl_state.forward_i;
121 gl_state.left_ok = gl_state.right_ok = 1;
122 invalidate = 0;
123 if (NULL_INTERVAL_P (i))
124 return;
125 gl_state.b_property = i->position - 1;
126 gl_state.e_property = INTERVAL_LAST_POS (i);
127 goto update;
128 }
129 oldi = i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
130
131 /* We are guarantied to be called with pos either in i, of further off. */
132 if (NULL_INTERVAL_P (i))
133 error ("Error in syntax_table logic for to-the-end intervals");
134 else if (pos < i->position) /* Move left. */
135 {
136 if (count > 0)
137 error ("Error in syntax_table logic for intervals <-.");
138 /* Update the interval. */
139 i = update_interval (i, pos);
140 if (oldi->position != INTERVAL_LAST_POS (i))
141 {
142 invalidate = 0;
143 gl_state.right_ok = 1; /* Invalidate the other end. */
144 gl_state.forward_i = i;
145 gl_state.e_property = INTERVAL_LAST_POS (i);
146 }
147 }
148 else if (pos >= INTERVAL_LAST_POS (i)) /* Move right. */
149 {
150 if (count < 0)
151 error ("Error in syntax_table logic for intervals ->.");
152 /* Update the interval. */
153 i = update_interval (i, pos);
154 if (i->position != INTERVAL_LAST_POS (oldi))
155 {
156 invalidate = 0;
157 gl_state.left_ok = 1; /* Invalidate the other end. */
158 gl_state.backward_i = i;
159 gl_state.b_property = i->position - 1;
160 }
161 }
162 else if (count > 0 ? gl_state.right_ok : gl_state.left_ok)
163 {
164 /* We do not need to recalculate tmp_table. */
165 tmp_table = gl_state.old_prop;
166 }
167
168 update:
169 tmp_table = textget (i->plist, Qsyntax_table);
170
171 if (invalidate)
172 invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
173
174 if (invalidate) /* Did not get to adjacent interval. */
175 { /* with the same table => */
176 /* invalidate the old range. */
177 if (count > 0)
178 {
179 gl_state.backward_i = i;
180 gl_state.left_ok = 1; /* Invalidate the other end. */
181 gl_state.b_property = i->position - 1;
182 }
183 else
184 {
185 gl_state.forward_i = i;
186 gl_state.right_ok = 1; /* Invalidate the other end. */
187 gl_state.e_property = INTERVAL_LAST_POS (i);
188 }
189 }
190
191 gl_state.current_syntax_table = tmp_table;
192 gl_state.old_prop = tmp_table;
193 if (Fsyntax_table_p (tmp_table) == Qt)
194 {
195 gl_state.use_global = 0;
196 }
197 else if (CONSP (tmp_table))
198 {
199 gl_state.use_global = 1;
200 gl_state.global_code = tmp_table;
201 }
202 else
203 {
204 gl_state.use_global = 0;
205 gl_state.current_syntax_table = current_buffer->syntax_table;
206 }
207
208 while (!NULL_INTERVAL_P (i))
209 {
210 if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
211 {
212 if (count > 0)
213 gl_state.right_ok = 0;
214 else
215 gl_state.left_ok = 0;
216 break;
217 }
218 else if (cnt == INTERVALS_AT_ONCE)
219 {
220 if (count > 0)
221 gl_state.right_ok = 1;
222 else
223 gl_state.left_ok = 1;
224 break;
225 }
226 cnt++;
227 i = count > 0 ? next_interval (i) : previous_interval (i);
228 }
229 if (NULL_INTERVAL_P (i))
230 { /* This property goes to the end. */
231 if (count > 0)
232 gl_state.e_property = gl_state.stop;
233 else
234 gl_state.b_property = gl_state.start;
235 }
236 else
237 {
238 if (count > 0)
239 {
240 gl_state.e_property = i->position;
241 gl_state.forward_i = i;
242 }
243 else
244 {
245 gl_state.b_property = i->position + LENGTH (i) - 1;
246 gl_state.backward_i = i;
247 }
248 }
249 }
250 \f
251 /* Returns TRUE if char at POS is quoted.
252 Global syntax-table data should be set up already to be good at pos
253 or after. On return global syntax data is good for lookup at POS. */
254
255 static int
256 char_quoted (pos)
257 register int pos;
258 {
259 register enum syntaxcode code;
260 register int beg = BEGV;
261 register int quoted = 0;
262 int temp_pos = pos;
263
264 DEC_POS (temp_pos);
265 while (temp_pos >= beg
266 && ( UPDATE_SYNTAX_TABLE_BACKWARD (temp_pos), 1)
267 && ((code = SYNTAX (FETCH_CHAR (temp_pos))) == Scharquote
268 || code == Sescape))
269 {
270 temp_pos--, quoted = !quoted;
271 }
272 UPDATE_SYNTAX_TABLE (pos);
273 return quoted;
274 }
275 \f
276 /* Find a defun-start that is the last one before POS (or nearly the last).
277 We record what we find, so that another call in the same area
278 can return the same value right away.
279
280 There is no promise at which position the global syntax data is
281 valid on return from the subroutine, so the caller should explicitly
282 update the global data. */
283
284 static int
285 find_defun_start (pos)
286 int pos;
287 {
288 int tem;
289 int shortage;
290
291 /* Use previous finding, if it's valid and applies to this inquiry. */
292 if (current_buffer == find_start_buffer
293 /* Reuse the defun-start even if POS is a little farther on.
294 POS might be in the next defun, but that's ok.
295 Our value may not be the best possible, but will still be usable. */
296 && pos <= find_start_pos + 1000
297 && pos >= find_start_value
298 && BEGV == find_start_begv
299 && MODIFF == find_start_modiff)
300 return find_start_value;
301
302 /* Back up to start of line. */
303 tem = scan_buffer ('\n', pos, BEGV, -1, &shortage, 1);
304
305 /* We optimize syntax-table lookup for rare updates. Thus we accept
306 only those `^\s(' which are good in global _and_ text-property
307 syntax-tables. */
308 gl_state.current_syntax_table = current_buffer->syntax_table;
309 gl_state.use_global = 0;
310 while (tem > BEGV)
311 {
312 /* Open-paren at start of line means we found our defun-start. */
313 if (SYNTAX (FETCH_CHAR (tem)) == Sopen)
314 {
315 SETUP_SYNTAX_TABLE (tem + 1, -1); /* Try again... */
316 if (SYNTAX (FETCH_CHAR (tem)) == Sopen)
317 break;
318 /* Now fallback to the default value. */
319 gl_state.current_syntax_table = current_buffer->syntax_table;
320 gl_state.use_global = 0;
321 }
322 /* Move to beg of previous line. */
323 tem = scan_buffer ('\n', tem, BEGV, -2, &shortage, 1);
324 }
325
326 /* Record what we found, for the next try. */
327 find_start_value = tem;
328 find_start_buffer = current_buffer;
329 find_start_modiff = MODIFF;
330 find_start_begv = BEGV;
331 find_start_pos = pos;
332
333 return find_start_value;
334 }
335 \f
336 /* Checks whether FROM is the end of comment. Does not try to
337 fallback more than to STOP.
338 Returns -1 if cannot find comment ending at from, otherwise start
339 of comment. Global syntax data remains valid for
340 backward search starting at the returned value (or at FROM, if
341 the search was not successful). */
342
343 static int
344 back_comment (from, stop)
345 int from, stop;
346 {
347 /* Look back, counting the parity of string-quotes,
348 and recording the comment-starters seen.
349 When we reach a safe place, assume that's not in a string;
350 then step the main scan to the earliest comment-starter seen
351 an even number of string quotes away from the safe place.
352
353 OFROM[I] is position of the earliest comment-starter seen
354 which is I+2X quotes from the comment-end.
355 PARITY is current parity of quotes from the comment end. */
356 int parity = 0;
357 int my_stringend = 0;
358 int string_lossage = 0;
359 int comment_end = from;
360 int comstart_pos = 0;
361 int comstart_parity = 0;
362 int scanstart = from - 1;
363 register enum syntaxcode code;
364 int c, comstyle = 0;
365
366 /* At beginning of range to scan, we're outside of strings;
367 that determines quote parity to the comment-end. */
368 while (from != stop)
369 {
370 /* Move back and examine a character. */
371 DEC_POS (from);
372 UPDATE_SYNTAX_TABLE_BACKWARD (from);
373
374 c = FETCH_CHAR (from);
375 code = SYNTAX (c);
376
377 /* If this char is the second of a 2-char comment sequence,
378 back up and give the pair the appropriate syntax. */
379 if (from > stop && SYNTAX_COMEND_SECOND (c)
380 && SYNTAX_COMEND_FIRST (FETCH_CHAR (from - 1)))
381 {
382 code = Sendcomment;
383 DEC_POS (from);
384 /* This is apparently the best we can do: */
385 UPDATE_SYNTAX_TABLE_BACKWARD (from);
386 c = FETCH_CHAR (from);
387 }
388
389 /* If this char starts a 2-char comment start sequence,
390 treat it like a 1-char comment starter. */
391 if (from < scanstart && SYNTAX_COMSTART_FIRST (c)
392 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from + 1))
393 && comstyle == SYNTAX_COMMENT_STYLE (FETCH_CHAR (from + 1)))
394 code = Scomment;
395
396 /* Ignore escaped characters. */
397 if (char_quoted (from))
398 continue;
399
400 /* Track parity of quotes. */
401 if (code == Sstring)
402 {
403 parity ^= 1;
404 if (my_stringend == 0)
405 my_stringend = c;
406 /* If we have two kinds of string delimiters.
407 There's no way to grok this scanning backwards. */
408 else if (my_stringend != c)
409 string_lossage = 1;
410 }
411
412 if (code == Sstring_fence || code == Scomment_fence)
413 {
414 parity ^= 1;
415 if (my_stringend == 0)
416 my_stringend =
417 code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE;
418 /* If we have two kinds of string delimiters.
419 There's no way to grok this scanning backwards. */
420 else if (my_stringend != (code == Sstring_fence
421 ? ST_STRING_STYLE : ST_COMMENT_STYLE))
422 string_lossage = 1;
423 }
424
425 /* Record comment-starters according to that
426 quote-parity to the comment-end. */
427 if (code == Scomment)
428 {
429 comstart_parity = parity;
430 comstart_pos = from;
431 }
432
433 /* If we find another earlier comment-ender,
434 any comment-starts earlier than that don't count
435 (because they go with the earlier comment-ender). */
436 if (code == Sendcomment
437 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from)) == comstyle)
438 break;
439
440 /* Assume a defun-start point is outside of strings. */
441 if (code == Sopen
442 && (from == stop || FETCH_CHAR (from - 1) == '\n'))
443 break;
444 }
445
446 if (comstart_pos == 0)
447 {
448 from = comment_end;
449 UPDATE_SYNTAX_TABLE_FORWARD (comment_end - 1);
450 }
451 /* If the earliest comment starter
452 is followed by uniform paired string quotes or none,
453 we know it can't be inside a string
454 since if it were then the comment ender would be inside one.
455 So it does start a comment. Skip back to it. */
456 else if (comstart_parity == 0 && !string_lossage)
457 {
458 from = comstart_pos;
459 /* Globals are correct now. */
460 }
461 else
462 {
463 /* We had two kinds of string delimiters mixed up
464 together. Decode this going forwards.
465 Scan fwd from the previous comment ender
466 to the one in question; this records where we
467 last passed a comment starter. */
468 struct lisp_parse_state state;
469 scan_sexps_forward (&state, find_defun_start (comment_end),
470 comment_end - 1, -10000, 0, Qnil, 0);
471 if (state.incomment)
472 {
473 /* scan_sexps_forward changed the direction of search in
474 global variables, so we need to update it completely. */
475
476 from = state.comstr_start;
477 }
478 else
479 {
480 from = comment_end;
481 }
482 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
483 }
484
485 return from;
486 }
487 \f
488 DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
489 "Return t if OBJECT is a syntax table.\n\
490 Currently, any char-table counts as a syntax table.")
491 (object)
492 Lisp_Object object;
493 {
494 if (CHAR_TABLE_P (object)
495 && XCHAR_TABLE (object)->purpose == Qsyntax_table)
496 return Qt;
497 return Qnil;
498 }
499
500 static void
501 check_syntax_table (obj)
502 Lisp_Object obj;
503 {
504 if (!(CHAR_TABLE_P (obj)
505 && XCHAR_TABLE (obj)->purpose == Qsyntax_table))
506 wrong_type_argument (Qsyntax_table_p, obj);
507 }
508
509 DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
510 "Return the current syntax table.\n\
511 This is the one specified by the current buffer.")
512 ()
513 {
514 return current_buffer->syntax_table;
515 }
516
517 DEFUN ("standard-syntax-table", Fstandard_syntax_table,
518 Sstandard_syntax_table, 0, 0, 0,
519 "Return the standard syntax table.\n\
520 This is the one used for new buffers.")
521 ()
522 {
523 return Vstandard_syntax_table;
524 }
525
526 DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
527 "Construct a new syntax table and return it.\n\
528 It is a copy of the TABLE, which defaults to the standard syntax table.")
529 (table)
530 Lisp_Object table;
531 {
532 Lisp_Object copy;
533
534 if (!NILP (table))
535 check_syntax_table (table);
536 else
537 table = Vstandard_syntax_table;
538
539 copy = Fcopy_sequence (table);
540
541 /* Only the standard syntax table should have a default element.
542 Other syntax tables should inherit from parents instead. */
543 XCHAR_TABLE (copy)->defalt = Qnil;
544
545 /* Copied syntax tables should all have parents.
546 If we copied one with no parent, such as the standard syntax table,
547 use the standard syntax table as the copy's parent. */
548 if (NILP (XCHAR_TABLE (copy)->parent))
549 Fset_char_table_parent (copy, Vstandard_syntax_table);
550 return copy;
551 }
552
553 DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
554 "Select a new syntax table for the current buffer.\n\
555 One argument, a syntax table.")
556 (table)
557 Lisp_Object table;
558 {
559 check_syntax_table (table);
560 current_buffer->syntax_table = table;
561 /* Indicate that this buffer now has a specified syntax table. */
562 current_buffer->local_var_flags
563 |= XFASTINT (buffer_local_flags.syntax_table);
564 return table;
565 }
566 \f
567 /* Convert a letter which signifies a syntax code
568 into the code it signifies.
569 This is used by modify-syntax-entry, and other things. */
570
571 unsigned char syntax_spec_code[0400] =
572 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
573 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
574 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
575 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
576 (char) Swhitespace, (char) Scomment_fence, (char) Sstring, 0377,
577 (char) Smath, 0377, 0377, (char) Squote,
578 (char) Sopen, (char) Sclose, 0377, 0377,
579 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
580 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
581 0377, 0377, 0377, 0377,
582 (char) Scomment, 0377, (char) Sendcomment, 0377,
583 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
584 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
585 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
586 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
587 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
588 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
589 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
590 0377, 0377, 0377, 0377, (char) Sstring_fence, 0377, 0377, 0377
591 };
592
593 /* Indexed by syntax code, give the letter that describes it. */
594
595 char syntax_code_spec[16] =
596 {
597 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
598 '!', '|'
599 };
600
601 /* Indexed by syntax code, give the object (cons of syntax code and
602 nil) to be stored in syntax table. Since these objects can be
603 shared among syntax tables, we generate them in advance. By
604 sharing objects, the function `describe-syntax' can give a more
605 compact listing. */
606 static Lisp_Object Vsyntax_code_object;
607
608 \f
609 /* Look up the value for CHARACTER in syntax table TABLE's parent
610 and its parents. SYNTAX_ENTRY calls this, when TABLE itself has nil
611 for CHARACTER. It's actually used only when not compiled with GCC. */
612
613 Lisp_Object
614 syntax_parent_lookup (table, character)
615 Lisp_Object table;
616 int character;
617 {
618 Lisp_Object value;
619
620 while (1)
621 {
622 table = XCHAR_TABLE (table)->parent;
623 if (NILP (table))
624 return Qnil;
625
626 value = XCHAR_TABLE (table)->contents[character];
627 if (!NILP (value))
628 return value;
629 }
630 }
631
632 DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
633 "Return the syntax code of CHARACTER, described by a character.\n\
634 For example, if CHARACTER is a word constituent,\n\
635 the character `w' is returned.\n\
636 The characters that correspond to various syntax codes\n\
637 are listed in the documentation of `modify-syntax-entry'.")
638 (character)
639 Lisp_Object character;
640 {
641 int char_int;
642 gl_state.current_syntax_table = current_buffer->syntax_table;
643
644 gl_state.use_global = 0;
645 CHECK_NUMBER (character, 0);
646 char_int = XINT (character);
647 return make_number (syntax_code_spec[(int) SYNTAX (char_int)]);
648 }
649
650 DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
651 "Return the matching parenthesis of CHARACTER, or nil if none.")
652 (character)
653 Lisp_Object character;
654 {
655 int char_int, code;
656 gl_state.current_syntax_table = current_buffer->syntax_table;
657 gl_state.use_global = 0;
658 CHECK_NUMBER (character, 0);
659 char_int = XINT (character);
660 code = SYNTAX (char_int);
661 if (code == Sopen || code == Sclose)
662 return make_number (SYNTAX_MATCH (char_int));
663 return Qnil;
664 }
665
666 /* This comment supplies the doc string for modify-syntax-entry,
667 for make-docfile to see. We cannot put this in the real DEFUN
668 due to limits in the Unix cpp.
669
670 DEFUN ("modify-syntax-entry", foo, bar, 2, 3, 0,
671 "Set syntax for character CHAR according to string S.\n\
672 The syntax is changed only for table TABLE, which defaults to\n\
673 the current buffer's syntax table.\n\
674 The first character of S should be one of the following:\n\
675 Space or - whitespace syntax. w word constituent.\n\
676 _ symbol constituent. . punctuation.\n\
677 ( open-parenthesis. ) close-parenthesis.\n\
678 \" string quote. \\ escape.\n\
679 $ paired delimiter. ' expression quote or prefix operator.\n\
680 < comment starter. > comment ender.\n\
681 / character-quote. @ inherit from `standard-syntax-table'.\n\
682 \n\
683 Only single-character comment start and end sequences are represented thus.\n\
684 Two-character sequences are represented as described below.\n\
685 The second character of S is the matching parenthesis,\n\
686 used only if the first character is `(' or `)'.\n\
687 Any additional characters are flags.\n\
688 Defined flags are the characters 1, 2, 3, 4, b, and p.\n\
689 1 means CHAR is the start of a two-char comment start sequence.\n\
690 2 means CHAR is the second character of such a sequence.\n\
691 3 means CHAR is the start of a two-char comment end sequence.\n\
692 4 means CHAR is the second character of such a sequence.\n\
693 \n\
694 There can be up to two orthogonal comment sequences. This is to support\n\
695 language modes such as C++. By default, all comment sequences are of style\n\
696 a, but you can set the comment sequence style to b (on the second character\n\
697 of a comment-start, or the first character of a comment-end sequence) using\n\
698 this flag:\n\
699 b means CHAR is part of comment sequence b.\n\
700 \n\
701 p means CHAR is a prefix character for `backward-prefix-chars';\n\
702 such characters are treated as whitespace when they occur\n\
703 between expressions.")
704 (char, s, table)
705 */
706
707 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
708 /* I really don't know why this is interactive
709 help-form should at least be made useful whilst reading the second arg
710 */
711 "cSet syntax for character: \nsSet syntax for %s to: ",
712 0 /* See immediately above */)
713 (c, newentry, syntax_table)
714 Lisp_Object c, newentry, syntax_table;
715 {
716 register unsigned char *p;
717 register enum syntaxcode code;
718 int val;
719 Lisp_Object match;
720
721 CHECK_NUMBER (c, 0);
722 CHECK_STRING (newentry, 1);
723
724 if (NILP (syntax_table))
725 syntax_table = current_buffer->syntax_table;
726 else
727 check_syntax_table (syntax_table);
728
729 p = XSTRING (newentry)->data;
730 code = (enum syntaxcode) syntax_spec_code[*p++];
731 if (((int) code & 0377) == 0377)
732 error ("invalid syntax description letter: %c", c);
733
734 if (code == Sinherit)
735 {
736 SET_RAW_SYNTAX_ENTRY (syntax_table, c, Qnil);
737 return Qnil;
738 }
739
740 if (*p)
741 {
742 int len;
743 int character = STRING_CHAR_AND_LENGTH (p, XSTRING (newentry)->size - 1,
744 len);
745 XSETINT (match, character);
746 if (XFASTINT (match) == ' ')
747 match = Qnil;
748 p += len;
749 }
750 else
751 match = Qnil;
752
753 val = (int) code;
754 while (*p)
755 switch (*p++)
756 {
757 case '1':
758 val |= 1 << 16;
759 break;
760
761 case '2':
762 val |= 1 << 17;
763 break;
764
765 case '3':
766 val |= 1 << 18;
767 break;
768
769 case '4':
770 val |= 1 << 19;
771 break;
772
773 case 'p':
774 val |= 1 << 20;
775 break;
776
777 case 'b':
778 val |= 1 << 21;
779 break;
780 }
781
782 if (val < XVECTOR (Vsyntax_code_object)->size && NILP (match))
783 newentry = XVECTOR (Vsyntax_code_object)->contents[val];
784 else
785 /* Since we can't use a shared object, let's make a new one. */
786 newentry = Fcons (make_number (val), match);
787
788 SET_RAW_SYNTAX_ENTRY (syntax_table, c, newentry);
789
790 return Qnil;
791 }
792 \f
793 /* Dump syntax table to buffer in human-readable format */
794
795 static void
796 describe_syntax (value)
797 Lisp_Object value;
798 {
799 register enum syntaxcode code;
800 char desc, match, start1, start2, end1, end2, prefix, comstyle;
801 char str[2];
802 Lisp_Object first, match_lisp;
803
804 Findent_to (make_number (16), make_number (1));
805
806 if (NILP (value))
807 {
808 insert_string ("default\n");
809 return;
810 }
811
812 if (CHAR_TABLE_P (value))
813 {
814 insert_string ("deeper char-table ...\n");
815 return;
816 }
817
818 if (!CONSP (value))
819 {
820 insert_string ("invalid\n");
821 return;
822 }
823
824 first = XCONS (value)->car;
825 match_lisp = XCONS (value)->cdr;
826
827 if (!INTEGERP (first) || !(NILP (match_lisp) || INTEGERP (match_lisp)))
828 {
829 insert_string ("invalid\n");
830 return;
831 }
832
833 code = (enum syntaxcode) (first & 0377);
834 start1 = (XINT (first) >> 16) & 1;
835 start2 = (XINT (first) >> 17) & 1;
836 end1 = (XINT (first) >> 18) & 1;
837 end2 = (XINT (first) >> 19) & 1;
838 prefix = (XINT (first) >> 20) & 1;
839 comstyle = (XINT (first) >> 21) & 1;
840
841 if ((int) code < 0 || (int) code >= (int) Smax)
842 {
843 insert_string ("invalid");
844 return;
845 }
846 desc = syntax_code_spec[(int) code];
847
848 str[0] = desc, str[1] = 0;
849 insert (str, 1);
850
851 if (NILP (match_lisp))
852 insert (" ", 1);
853 else
854 insert_char (XINT (match_lisp));
855
856 if (start1)
857 insert ("1", 1);
858 if (start2)
859 insert ("2", 1);
860
861 if (end1)
862 insert ("3", 1);
863 if (end2)
864 insert ("4", 1);
865
866 if (prefix)
867 insert ("p", 1);
868 if (comstyle)
869 insert ("b", 1);
870
871 insert_string ("\twhich means: ");
872
873 switch (SWITCH_ENUM_CAST (code))
874 {
875 case Swhitespace:
876 insert_string ("whitespace"); break;
877 case Spunct:
878 insert_string ("punctuation"); break;
879 case Sword:
880 insert_string ("word"); break;
881 case Ssymbol:
882 insert_string ("symbol"); break;
883 case Sopen:
884 insert_string ("open"); break;
885 case Sclose:
886 insert_string ("close"); break;
887 case Squote:
888 insert_string ("quote"); break;
889 case Sstring:
890 insert_string ("string"); break;
891 case Smath:
892 insert_string ("math"); break;
893 case Sescape:
894 insert_string ("escape"); break;
895 case Scharquote:
896 insert_string ("charquote"); break;
897 case Scomment:
898 insert_string ("comment"); break;
899 case Sendcomment:
900 insert_string ("endcomment"); break;
901 default:
902 insert_string ("invalid");
903 return;
904 }
905
906 if (!NILP (match_lisp))
907 {
908 insert_string (", matches ");
909 insert_char (XINT (match_lisp));
910 }
911
912 if (start1)
913 insert_string (",\n\t is the first character of a comment-start sequence");
914 if (start2)
915 insert_string (",\n\t is the second character of a comment-start sequence");
916
917 if (end1)
918 insert_string (",\n\t is the first character of a comment-end sequence");
919 if (end2)
920 insert_string (",\n\t is the second character of a comment-end sequence");
921 if (comstyle)
922 insert_string (" (comment style b)");
923
924 if (prefix)
925 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
926
927 insert_string ("\n");
928 }
929
930 static Lisp_Object
931 describe_syntax_1 (vector)
932 Lisp_Object vector;
933 {
934 struct buffer *old = current_buffer;
935 set_buffer_internal (XBUFFER (Vstandard_output));
936 describe_vector (vector, Qnil, describe_syntax, 0, Qnil, Qnil);
937 while (! NILP (XCHAR_TABLE (vector)->parent))
938 {
939 vector = XCHAR_TABLE (vector)->parent;
940 insert_string ("\nThe parent syntax table is:");
941 describe_vector (vector, Qnil, describe_syntax, 0, Qnil, Qnil);
942 }
943
944 call0 (intern ("help-mode"));
945 set_buffer_internal (old);
946 return Qnil;
947 }
948
949 DEFUN ("describe-syntax", Fdescribe_syntax, Sdescribe_syntax, 0, 0, "",
950 "Describe the syntax specifications in the syntax table.\n\
951 The descriptions are inserted in a buffer, which is then displayed.")
952 ()
953 {
954 internal_with_output_to_temp_buffer
955 ("*Help*", describe_syntax_1, current_buffer->syntax_table);
956
957 return Qnil;
958 }
959 \f
960 int parse_sexp_ignore_comments;
961
962 /* Return the position across COUNT words from FROM.
963 If that many words cannot be found before the end of the buffer, return 0.
964 COUNT negative means scan backward and stop at word beginning. */
965
966 scan_words (from, count)
967 register int from, count;
968 {
969 register int beg = BEGV;
970 register int end = ZV;
971 register enum syntaxcode code;
972 int ch0, ch1;
973 int temp_pos;
974
975 immediate_quit = 1;
976 QUIT;
977
978 SETUP_SYNTAX_TABLE (from, count);
979
980 while (count > 0)
981 {
982 while (1)
983 {
984 if (from == end)
985 {
986 immediate_quit = 0;
987 return 0;
988 }
989 UPDATE_SYNTAX_TABLE_FORWARD (from);
990 ch0 = FETCH_CHAR (from);
991 code = SYNTAX (ch0);
992 INC_POS (from);
993 if (words_include_escapes
994 && (code == Sescape || code == Scharquote))
995 break;
996 if (code == Sword)
997 break;
998 }
999 /* Now CH0 is a character which begins a word and FROM is the
1000 position of the next character. */
1001 while (1)
1002 {
1003 if (from == end) break;
1004 UPDATE_SYNTAX_TABLE_FORWARD (from);
1005 ch1 = FETCH_CHAR (from);
1006 code = SYNTAX (ch1);
1007 if (!(words_include_escapes
1008 && (code == Sescape || code == Scharquote)))
1009 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
1010 break;
1011 INC_POS (from);
1012 ch0 = ch1;
1013 }
1014 count--;
1015 }
1016 while (count < 0)
1017 {
1018 while (1)
1019 {
1020 if (from == beg)
1021 {
1022 immediate_quit = 0;
1023 return 0;
1024 }
1025 DEC_POS (from);
1026 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1027 ch1 = FETCH_CHAR (from);
1028 code = SYNTAX (ch1);
1029 if (words_include_escapes
1030 && (code == Sescape || code == Scharquote))
1031 break;
1032 if (code == Sword)
1033 break;
1034 }
1035 /* Now CH1 is a character which ends a word and FROM is the
1036 position of it. */
1037 while (1)
1038 {
1039 if (from == beg) break;
1040 temp_pos = from;
1041 DEC_POS (temp_pos);
1042 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1043 ch0 = FETCH_CHAR (temp_pos);
1044 code = SYNTAX (ch0);
1045 if (!(words_include_escapes
1046 && (code == Sescape || code == Scharquote)))
1047 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
1048 break;
1049 from = temp_pos;
1050 ch1 = ch0;
1051 }
1052 count++;
1053 }
1054
1055 immediate_quit = 0;
1056
1057 return from;
1058 }
1059
1060 DEFUN ("forward-word", Fforward_word, Sforward_word, 1, 1, "p",
1061 "Move point forward ARG words (backward if ARG is negative).\n\
1062 Normally returns t.\n\
1063 If an edge of the buffer is reached, point is left there\n\
1064 and nil is returned.")
1065 (count)
1066 Lisp_Object count;
1067 {
1068 int val;
1069 CHECK_NUMBER (count, 0);
1070
1071 if (!(val = scan_words (PT, XINT (count))))
1072 {
1073 SET_PT (XINT (count) > 0 ? ZV : BEGV);
1074 return Qnil;
1075 }
1076 SET_PT (val);
1077 return Qt;
1078 }
1079 \f
1080 Lisp_Object skip_chars ();
1081
1082 DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
1083 "Move point forward, stopping before a char not in STRING, or at pos LIM.\n\
1084 STRING is like the inside of a `[...]' in a regular expression\n\
1085 except that `]' is never special and `\\' quotes `^', `-' or `\\'.\n\
1086 Thus, with arg \"a-zA-Z\", this skips letters stopping before first nonletter.\n\
1087 With arg \"^a-zA-Z\", skips nonletters stopping before first letter.\n\
1088 Returns the distance traveled, either zero or positive.")
1089 (string, lim)
1090 Lisp_Object string, lim;
1091 {
1092 return skip_chars (1, 0, string, lim);
1093 }
1094
1095 DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
1096 "Move point backward, stopping after a char not in STRING, or at pos LIM.\n\
1097 See `skip-chars-forward' for details.\n\
1098 Returns the distance traveled, either zero or negative.")
1099 (string, lim)
1100 Lisp_Object string, lim;
1101 {
1102 return skip_chars (0, 0, string, lim);
1103 }
1104
1105 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
1106 "Move point forward across chars in specified syntax classes.\n\
1107 SYNTAX is a string of syntax code characters.\n\
1108 Stop before a char whose syntax is not in SYNTAX, or at position LIM.\n\
1109 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
1110 This function returns the distance traveled, either zero or positive.")
1111 (syntax, lim)
1112 Lisp_Object syntax, lim;
1113 {
1114 return skip_chars (1, 1, syntax, lim);
1115 }
1116
1117 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
1118 "Move point backward across chars in specified syntax classes.\n\
1119 SYNTAX is a string of syntax code characters.\n\
1120 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.\n\
1121 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
1122 This function returns the distance traveled, either zero or negative.")
1123 (syntax, lim)
1124 Lisp_Object syntax, lim;
1125 {
1126 return skip_chars (0, 1, syntax, lim);
1127 }
1128
1129 Lisp_Object
1130 skip_chars (forwardp, syntaxp, string, lim)
1131 int forwardp, syntaxp;
1132 Lisp_Object string, lim;
1133 {
1134 register unsigned char *p, *pend;
1135 register unsigned int c;
1136 unsigned char fastmap[0400];
1137 int negate = 0;
1138 register int i;
1139
1140 CHECK_STRING (string, 0);
1141
1142 if (NILP (lim))
1143 XSETINT (lim, forwardp ? ZV : BEGV);
1144 else
1145 CHECK_NUMBER_COERCE_MARKER (lim, 1);
1146
1147 /* In any case, don't allow scan outside bounds of buffer. */
1148 /* jla turned this off, for no known reason.
1149 bfox turned the ZV part on, and rms turned the
1150 BEGV part back on. */
1151 if (XINT (lim) > ZV)
1152 XSETFASTINT (lim, ZV);
1153 if (XINT (lim) < BEGV)
1154 XSETFASTINT (lim, BEGV);
1155
1156 p = XSTRING (string)->data;
1157 pend = p + XSTRING (string)->size;
1158 bzero (fastmap, sizeof fastmap);
1159
1160 if (p != pend && *p == '^')
1161 {
1162 negate = 1; p++;
1163 }
1164
1165 /* Find the characters specified and set their elements of fastmap.
1166 If syntaxp, each character counts as itself.
1167 Otherwise, handle backslashes and ranges specially. */
1168
1169 while (p != pend)
1170 {
1171 c = *p++;
1172 if (syntaxp)
1173 fastmap[syntax_spec_code[c]] = 1;
1174 else
1175 {
1176 if (c == '\\')
1177 {
1178 if (p == pend) break;
1179 c = *p++;
1180 }
1181 if (p != pend && *p == '-')
1182 {
1183 p++;
1184 if (p == pend) break;
1185 while (c <= *p)
1186 {
1187 fastmap[c] = 1;
1188 c++;
1189 }
1190 p++;
1191 }
1192 else
1193 fastmap[c] = 1;
1194 }
1195 }
1196
1197 /* If ^ was the first character, complement the fastmap. */
1198
1199 if (negate)
1200 for (i = 0; i < sizeof fastmap; i++)
1201 fastmap[i] ^= 1;
1202
1203 {
1204 int start_point = PT;
1205 int pos = PT;
1206
1207 immediate_quit = 1;
1208 if (syntaxp)
1209 {
1210 SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
1211 if (forwardp)
1212 {
1213 while (pos < XINT (lim)
1214 && fastmap[(int) SYNTAX (FETCH_CHAR (pos))])
1215 {
1216 pos++;
1217 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1218 }
1219 }
1220 else
1221 {
1222 while (pos > XINT (lim)
1223 && fastmap[(int) SYNTAX (FETCH_CHAR (pos - 1))])
1224 {
1225 pos--;
1226 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
1227 }
1228 }
1229 }
1230 else
1231 {
1232 if (forwardp)
1233 {
1234 while (pos < XINT (lim) && fastmap[FETCH_CHAR (pos)])
1235 pos++;
1236 }
1237 else
1238 {
1239 while (pos > XINT (lim) && fastmap[FETCH_CHAR (pos - 1)])
1240 pos--;
1241 }
1242 }
1243 SET_PT (pos);
1244 immediate_quit = 0;
1245
1246 return make_number (PT - start_point);
1247 }
1248 }
1249 \f
1250 DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
1251 "Move forward across up to N comments. If N is negative, move backward.\n\
1252 Stop scanning if we find something other than a comment or whitespace.\n\
1253 Set point to where scanning stops.\n\
1254 If N comments are found as expected, with nothing except whitespace\n\
1255 between them, return t; otherwise return nil.")
1256 (count)
1257 Lisp_Object count;
1258 {
1259 register int from;
1260 register int stop;
1261 register int c, c1;
1262 register enum syntaxcode code;
1263 int comstyle = 0; /* style of comment encountered */
1264 int found;
1265 int count1;
1266 int temp_pos;
1267
1268 CHECK_NUMBER (count, 0);
1269 count1 = XINT (count);
1270 stop = count1 > 0 ? ZV : BEGV;
1271
1272 immediate_quit = 1;
1273 QUIT;
1274
1275 from = PT;
1276
1277 SETUP_SYNTAX_TABLE (from, count1);
1278 while (count1 > 0)
1279 {
1280 do
1281 {
1282 if (from == stop)
1283 {
1284 SET_PT (from);
1285 immediate_quit = 0;
1286 return Qnil;
1287 }
1288 UPDATE_SYNTAX_TABLE_FORWARD (from);
1289 c = FETCH_CHAR (from);
1290 code = SYNTAX (c);
1291 INC_POS (from);
1292 comstyle = 0;
1293 if (from < stop && SYNTAX_COMSTART_FIRST (c)
1294 && (c1 = FETCH_CHAR (from),
1295 SYNTAX_COMSTART_SECOND (c1)))
1296 {
1297 /* We have encountered a comment start sequence and we
1298 are ignoring all text inside comments. We must record
1299 the comment style this sequence begins so that later,
1300 only a comment end of the same style actually ends
1301 the comment section. */
1302 code = Scomment;
1303 comstyle = SYNTAX_COMMENT_STYLE (c1);
1304 INC_POS (from);
1305 }
1306 }
1307 while (code == Swhitespace || code == Sendcomment);
1308 if (code != Scomment && code != Scomment_fence)
1309 {
1310 immediate_quit = 0;
1311 DEC_POS (from);
1312 SET_PT (from);
1313 return Qnil;
1314 }
1315 /* We're at the start of a comment. */
1316 while (1)
1317 {
1318 if (from == stop)
1319 {
1320 immediate_quit = 0;
1321 SET_PT (from);
1322 return Qnil;
1323 }
1324 UPDATE_SYNTAX_TABLE_FORWARD (from);
1325 c = FETCH_CHAR (from);
1326 INC_POS (from);
1327 if (SYNTAX (c) == Sendcomment
1328 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1329 /* we have encountered a comment end of the same style
1330 as the comment sequence which began this comment
1331 section */
1332 break;
1333 if (SYNTAX (c) == Scomment_fence
1334 && comstyle == ST_COMMENT_STYLE)
1335 /* we have encountered a comment end of the same style
1336 as the comment sequence which began this comment
1337 section. */
1338 break;
1339 if (from < stop && SYNTAX_COMEND_FIRST (c)
1340 && (c1 = FETCH_CHAR (from),
1341 SYNTAX_COMEND_SECOND (c1))
1342 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1343 /* we have encountered a comment end of the same style
1344 as the comment sequence which began this comment
1345 section */
1346 { INC_POS (from); break; }
1347 }
1348 /* We have skipped one comment. */
1349 count1--;
1350 }
1351
1352 while (count1 < 0)
1353 {
1354 while (from > stop)
1355 {
1356 int quoted;
1357
1358 DEC_POS (from);
1359 quoted = char_quoted (from);
1360 if (quoted)
1361 {
1362 DEC_POS (from);
1363 goto leave; /* ????? XXXXX */
1364 }
1365 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1366 c = FETCH_CHAR (from);
1367 code = SYNTAX (c);
1368 comstyle = 0;
1369 if (code == Sendcomment)
1370 comstyle = SYNTAX_COMMENT_STYLE (c);
1371 temp_pos = from;
1372 DEC_POS (temp_pos);
1373 if (from > stop && SYNTAX_COMEND_SECOND (c)
1374 && (c1 = FETCH_CHAR (temp_pos),
1375 SYNTAX_COMEND_FIRST (c1))
1376 && !char_quoted (temp_pos))
1377 {
1378 /* We must record the comment style encountered so that
1379 later, we can match only the proper comment begin
1380 sequence of the same style. */
1381 code = Sendcomment;
1382 comstyle = SYNTAX_COMMENT_STYLE (c1);
1383 from = temp_pos;
1384 }
1385
1386 if (code == Scomment_fence)
1387 {
1388 /* Skip until first preceding unquoted comment_fence. */
1389 int found = 0, ini = from;
1390
1391 while (--from != stop)
1392 {
1393 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1394 c = FETCH_CHAR (from);
1395 if (SYNTAX (c) == Scomment_fence && !char_quoted (from))
1396 {
1397 found = 1;
1398 break;
1399 }
1400 }
1401 if (found == 0)
1402 {
1403 from = ini; /* Set point to ini + 1. */
1404 goto leave;
1405 }
1406 }
1407 else if (code == Sendcomment)
1408 {
1409 #if 0
1410 if (code != SYNTAX (c))
1411 /* For a two-char comment ender, we can assume
1412 it does end a comment. So scan back in a simple way. */
1413 {
1414 if (from != stop) DEC_POS (from);
1415 while (1)
1416 {
1417 if ((c = FETCH_CHAR (from),
1418 SYNTAX (c) == Scomment)
1419 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1420 break;
1421 if (from == stop)
1422 {
1423 immediate_quit = 0;
1424 SET_PT (from);
1425 return Qnil;
1426 }
1427 DEC_POS (from);
1428 if (SYNTAX_COMSTART_SECOND (c)
1429 && (c1 = FETCH_CHAR (from),
1430 SYNTAX_COMSTART_FIRST (c1))
1431 && SYNTAX_COMMENT_STYLE (c) == comstyle
1432 && !char_quoted (from))
1433 break;
1434 }
1435 break;
1436 }
1437 #endif /* 0 */
1438 found = back_comment (from, stop);
1439 if (found != -1) from = found;
1440 #if 0
1441 /* Look back, counting the parity of string-quotes,
1442 and recording the comment-starters seen.
1443 When we reach a safe place, assume that's not in a string;
1444 then step the main scan to the earliest comment-starter seen
1445 an even number of string quotes away from the safe place.
1446
1447 OFROM[I] is position of the earliest comment-starter seen
1448 which is I+2X quotes from the comment-end.
1449 PARITY is current parity of quotes from the comment end. */
1450 {
1451 int parity = 0;
1452 char my_stringend = 0;
1453 int string_lossage = 0;
1454 int comment_end = from;
1455 int comstart_pos = 0;
1456 int comstart_parity = 0;
1457 int scanstart = from;
1458
1459 DEC_POS (scanstart);
1460 /* At beginning of range to scan, we're outside of strings;
1461 that determines quote parity to the comment-end. */
1462 while (from != stop)
1463 {
1464 /* Move back and examine a character. */
1465 DEC_POS (from);
1466
1467 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1468 c = FETCH_CHAR (from);
1469 code = SYNTAX (c);
1470
1471 /* If this char is the second of a 2-char comment sequence,
1472 back up and give the pair the appropriate syntax. */
1473 temp_pos = from;
1474 DEC_POS (temp_pos);
1475 if (from > stop && SYNTAX_COMEND_SECOND (c)
1476 && (c1 = FETCH_CHAR (temp_pos),
1477 SYNTAX_COMEND_FIRST (c1)))
1478 {
1479 code = Sendcomment;
1480 from = temp_pos;
1481 c = c1;
1482 }
1483
1484 temp_pos = from;
1485 INC_POS (temp_pos);
1486 /* If this char starts a 2-char comment start sequence,
1487 treat it like a 1-char comment starter. */
1488 if (from < scanstart && SYNTAX_COMSTART_FIRST (c)
1489 && (c1 = FETCH_CHAR (temp_pos),
1490 SYNTAX_COMSTART_SECOND (c1))
1491 && comstyle == SYNTAX_COMMENT_STYLE (c1))
1492 code = Scomment;
1493
1494 /* Ignore escaped characters. */
1495 if (char_quoted (from))
1496 continue;
1497
1498 /* Track parity of quotes. */
1499 if (code == Sstring)
1500 {
1501 parity ^= 1;
1502 if (my_stringend == 0)
1503 my_stringend = c;
1504 /* If we have two kinds of string delimiters.
1505 There's no way to grok this scanning backwards. */
1506 else if (my_stringend != c)
1507 string_lossage = 1;
1508 }
1509
1510 /* Record comment-starters according to that
1511 quote-parity to the comment-end. */
1512 if (code == Scomment)
1513 {
1514 comstart_parity = parity;
1515 comstart_pos = from;
1516 }
1517
1518 /* If we find another earlier comment-ender,
1519 any comment-starts earlier than that don't count
1520 (because they go with the earlier comment-ender). */
1521 if (code == Sendcomment
1522 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from)) == comstyle)
1523 break;
1524
1525 /* Assume a defun-start point is outside of strings. */
1526 if (code == Sopen
1527 && (from == stop || FETCH_BYTE (from - 1) == '\n'))
1528 break;
1529 }
1530
1531 if (comstart_pos == 0)
1532 from = comment_end;
1533 /* If the earliest comment starter
1534 is followed by uniform paired string quotes or none,
1535 we know it can't be inside a string
1536 since if it were then the comment ender would be inside one.
1537 So it does start a comment. Skip back to it. */
1538 else if (comstart_parity == 0 && !string_lossage)
1539 from = comstart_pos;
1540 else
1541 {
1542 /* We had two kinds of string delimiters mixed up
1543 together. Decode this going forwards.
1544 Scan fwd from the previous comment ender
1545 to the one in question; this records where we
1546 last passed a comment starter. */
1547 struct lisp_parse_state state;
1548 scan_sexps_forward (&state, find_defun_start (comment_end),
1549 comment_end - 1, -10000, 0, Qnil, 0);
1550 if (state.incomment)
1551 from = state.comstr_start;
1552 else
1553 /* We can't grok this as a comment; scan it normally. */
1554 from = comment_end;
1555 }
1556 }
1557 #endif /* 0 */
1558 /* We have skipped one comment. */
1559 break;
1560 }
1561 else if (code != Swhitespace && code != Scomment)
1562 {
1563 leave:
1564 immediate_quit = 0;
1565 INC_POS (from);
1566 SET_PT (from);
1567 return Qnil;
1568 }
1569 }
1570
1571 count1++;
1572 }
1573
1574 SET_PT (from);
1575 immediate_quit = 0;
1576 return Qt;
1577 }
1578 \f
1579 Lisp_Object
1580 scan_lists (from, count, depth, sexpflag)
1581 register int from;
1582 int count, depth, sexpflag;
1583 {
1584 Lisp_Object val;
1585 register int stop = count > 0 ? ZV : BEGV;
1586 register int c, c1;
1587 int stringterm;
1588 int quoted;
1589 int mathexit = 0;
1590 register enum syntaxcode code, temp_code;
1591 int min_depth = depth; /* Err out if depth gets less than this. */
1592 int comstyle = 0; /* style of comment encountered */
1593 int temp_pos;
1594 int last_good = from;
1595 int found;
1596
1597 if (depth > 0) min_depth = 0;
1598
1599 immediate_quit = 1;
1600 QUIT;
1601
1602 SETUP_SYNTAX_TABLE (from, count);
1603 while (count > 0)
1604 {
1605 while (from < stop)
1606 {
1607 UPDATE_SYNTAX_TABLE_FORWARD (from);
1608 c = FETCH_CHAR (from);
1609 code = SYNTAX (c);
1610 if (depth == min_depth)
1611 last_good = from;
1612 INC_POS (from);
1613 UPDATE_SYNTAX_TABLE_FORWARD (from);
1614 if (from < stop && SYNTAX_COMSTART_FIRST (c)
1615 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from))
1616 && parse_sexp_ignore_comments)
1617 {
1618 /* we have encountered a comment start sequence and we
1619 are ignoring all text inside comments. We must record
1620 the comment style this sequence begins so that later,
1621 only a comment end of the same style actually ends
1622 the comment section */
1623 code = Scomment;
1624 comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from));
1625 INC_POS (from);
1626 }
1627
1628 UPDATE_SYNTAX_TABLE_FORWARD (from);
1629 if (SYNTAX_PREFIX (c))
1630 continue;
1631
1632 switch (SWITCH_ENUM_CAST (code))
1633 {
1634 case Sescape:
1635 case Scharquote:
1636 if (from == stop) goto lose;
1637 INC_POS (from);
1638 /* treat following character as a word constituent */
1639 case Sword:
1640 case Ssymbol:
1641 if (depth || !sexpflag) break;
1642 /* This word counts as a sexp; return at end of it. */
1643 while (from < stop)
1644 {
1645 UPDATE_SYNTAX_TABLE_FORWARD (from);
1646 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from))))
1647 {
1648 case Scharquote:
1649 case Sescape:
1650 INC_POS (from);
1651 if (from == stop) goto lose;
1652 break;
1653 case Sword:
1654 case Ssymbol:
1655 case Squote:
1656 break;
1657 default:
1658 goto done;
1659 }
1660 INC_POS (from);
1661 }
1662 goto done;
1663
1664 case Scomment:
1665 case Scomment_fence:
1666 if (!parse_sexp_ignore_comments) break;
1667 while (1)
1668 {
1669 if (from == stop)
1670 {
1671 if (depth == 0)
1672 goto done;
1673 goto lose;
1674 }
1675 UPDATE_SYNTAX_TABLE_FORWARD (from);
1676 c = FETCH_CHAR (from);
1677 if (code == Scomment
1678 ? (SYNTAX (c) == Sendcomment
1679 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1680 : (SYNTAX (c) == Scomment_fence))
1681 /* we have encountered a comment end of the same style
1682 as the comment sequence which began this comment
1683 section */
1684 break;
1685 INC_POS (from);
1686 if (from < stop && SYNTAX_COMEND_FIRST (c)
1687 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from))
1688 && SYNTAX_COMMENT_STYLE (c) == comstyle
1689 && code == Scomment)
1690 /* we have encountered a comment end of the same style
1691 as the comment sequence which began this comment
1692 section */
1693 { INC_POS (from); break; }
1694 }
1695 break;
1696
1697 case Smath:
1698 if (!sexpflag)
1699 break;
1700 if (from != stop && c == FETCH_CHAR (from))
1701 INC_POS (from);
1702 if (mathexit)
1703 {
1704 mathexit = 0;
1705 goto close1;
1706 }
1707 mathexit = 1;
1708
1709 case Sopen:
1710 if (!++depth) goto done;
1711 break;
1712
1713 case Sclose:
1714 close1:
1715 if (!--depth) goto done;
1716 if (depth < min_depth)
1717 Fsignal (Qscan_error,
1718 Fcons (build_string ("Containing expression ends prematurely"),
1719 Fcons (make_number (last_good),
1720 Fcons (make_number (from), Qnil))));
1721 break;
1722
1723 case Sstring:
1724 case Sstring_fence:
1725 temp_pos = from;
1726 DEC_POS (temp_pos);
1727 stringterm = FETCH_CHAR (temp_pos);
1728 while (1)
1729 {
1730 if (from >= stop) goto lose;
1731 UPDATE_SYNTAX_TABLE_FORWARD (from);
1732 if (code == Sstring
1733 ? (FETCH_CHAR (from) == stringterm)
1734 : SYNTAX (FETCH_CHAR (from)) == Sstring_fence)
1735 break;
1736 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from))))
1737 {
1738 case Scharquote:
1739 case Sescape:
1740 INC_POS (from);
1741 }
1742 INC_POS (from);
1743 }
1744 INC_POS (from);
1745 if (!depth && sexpflag) goto done;
1746 break;
1747 }
1748 }
1749
1750 /* Reached end of buffer. Error if within object, return nil if between */
1751 if (depth) goto lose;
1752
1753 immediate_quit = 0;
1754 return Qnil;
1755
1756 /* End of object reached */
1757 done:
1758 count--;
1759 }
1760
1761 while (count < 0)
1762 {
1763 while (from > stop)
1764 {
1765 DEC_POS (from);
1766 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1767 if (quoted = char_quoted (from))
1768 {
1769 DEC_POS (from);
1770 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1771 }
1772 c = FETCH_CHAR (from);
1773 code = SYNTAX (c);
1774 if (depth == min_depth)
1775 last_good = from;
1776 comstyle = 0;
1777 if (code == Sendcomment)
1778 comstyle = SYNTAX_COMMENT_STYLE (c);
1779 temp_pos = from;
1780 DEC_POS (temp_pos);
1781 if (from > stop && SYNTAX_COMEND_SECOND (c)
1782 && (c1 = FETCH_CHAR (temp_pos), SYNTAX_COMEND_FIRST (c1))
1783 && !char_quoted (temp_pos)
1784 && parse_sexp_ignore_comments)
1785 {
1786 /* we must record the comment style encountered so that
1787 later, we can match only the proper comment begin
1788 sequence of the same style */
1789 code = Sendcomment;
1790 comstyle = SYNTAX_COMMENT_STYLE (c1);
1791 from = temp_pos;
1792 }
1793
1794 if (SYNTAX_PREFIX (c))
1795 continue;
1796
1797 switch (SWITCH_ENUM_CAST (quoted ? Sword : code))
1798 {
1799 case Sword:
1800 case Ssymbol:
1801 if (depth || !sexpflag) break;
1802 /* This word counts as a sexp; count object finished
1803 after passing it. */
1804 while (from > stop)
1805 {
1806 temp_pos = from;
1807 DEC_POS (temp_pos);
1808 UPDATE_SYNTAX_TABLE_BACKWARD (temp_pos);
1809 quoted = char_quoted (temp_pos);
1810 if (quoted)
1811 {
1812 from = temp_pos;
1813 DEC_POS (temp_pos);
1814 UPDATE_SYNTAX_TABLE_BACKWARD (temp_pos);
1815 }
1816 c1 = FETCH_CHAR (temp_pos);
1817 temp_code = SYNTAX (c1);
1818 if (! (quoted || temp_code == Sword
1819 || temp_code == Ssymbol
1820 || temp_code == Squote))
1821 goto done2;
1822 from = temp_pos;
1823 }
1824 goto done2;
1825
1826 case Smath:
1827 if (!sexpflag)
1828 break;
1829 temp_pos = from;
1830 DEC_POS (temp_pos);
1831 UPDATE_SYNTAX_TABLE_BACKWARD (temp_pos);
1832 if (from != stop && c == FETCH_CHAR (temp_pos))
1833 from = temp_pos;
1834 if (mathexit)
1835 {
1836 mathexit = 0;
1837 goto open2;
1838 }
1839 mathexit = 1;
1840
1841 case Sclose:
1842 if (!++depth) goto done2;
1843 break;
1844
1845 case Sopen:
1846 open2:
1847 if (!--depth) goto done2;
1848 if (depth < min_depth)
1849 Fsignal (Qscan_error,
1850 Fcons (build_string ("Containing expression ends prematurely"),
1851 Fcons (make_number (last_good),
1852 Fcons (make_number (from), Qnil))));
1853 break;
1854
1855 case Sendcomment:
1856 if (!parse_sexp_ignore_comments)
1857 break;
1858 #if 0
1859 if (code != SYNTAX (c))
1860 /* For a two-char comment ender, we can assume
1861 it does end a comment. So scan back in a simple way. */
1862 {
1863 if (from != stop) DEC_POS (from);
1864 while (1)
1865 {
1866 if (SYNTAX (c = FETCH_CHAR (from)) == Scomment
1867 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1868 break;
1869 if (from == stop)
1870 {
1871 if (depth == 0)
1872 goto done2;
1873 goto lose;
1874 }
1875 DEC_POS (from);
1876 if (SYNTAX_COMSTART_SECOND (c)
1877 && SYNTAX_COMSTART_FIRST (FETCH_CHAR (from))
1878 && SYNTAX_COMMENT_STYLE (c) == comstyle
1879 && !char_quoted (from))
1880 break;
1881 }
1882 break;
1883 }
1884 #endif /* 0 */
1885 found = back_comment (from, stop);
1886 if (found != -1) from = found;
1887 #if 0
1888 /* Look back, counting the parity of string-quotes,
1889 and recording the comment-starters seen.
1890 When we reach a safe place, assume that's not in a string;
1891 then step the main scan to the earliest comment-starter seen
1892 an even number of string quotes away from the safe place.
1893
1894 OFROM[I] is position of the earliest comment-starter seen
1895 which is I+2X quotes from the comment-end.
1896 PARITY is current parity of quotes from the comment end. */
1897 {
1898 int parity = 0;
1899 char my_stringend = 0;
1900 int string_lossage = 0;
1901 int comment_end = from;
1902 int comstart_pos = 0;
1903 int comstart_parity = 0;
1904 int scanstart = from;
1905
1906 DEC_POS (scanstart);
1907
1908 /* At beginning of range to scan, we're outside of strings;
1909 that determines quote parity to the comment-end. */
1910 while (from != stop)
1911 {
1912 /* Move back and examine a character. */
1913 DEC_POS (from);
1914
1915 c = FETCH_CHAR (from);
1916 code = SYNTAX (c);
1917
1918 /* If this char is the second of a 2-char comment sequence,
1919 back up and give the pair the appropriate syntax. */
1920 temp_pos = from;
1921 DEC_POS (temp_pos);
1922 if (from > stop && SYNTAX_COMEND_SECOND (c)
1923 && (c1 = FETCH_CHAR (temp_pos),
1924 SYNTAX_COMEND_FIRST (c1)))
1925 {
1926 code = Sendcomment;
1927 from = temp_pos;
1928 c = c1;
1929 }
1930
1931 /* If this char starts a 2-char comment start sequence,
1932 treat it like a 1-char comment starter. */
1933 temp_pos = from;
1934 INC_POS (temp_pos);
1935 if (from < scanstart && SYNTAX_COMSTART_FIRST (c)
1936 && (c1 = FETCH_CHAR (temp_pos),
1937 SYNTAX_COMSTART_SECOND (c1))
1938 && comstyle == SYNTAX_COMMENT_STYLE (c1))
1939 code = Scomment;
1940
1941 /* Ignore escaped characters. */
1942 if (char_quoted (from))
1943 continue;
1944
1945 /* Track parity of quotes. */
1946 if (code == Sstring)
1947 {
1948 parity ^= 1;
1949 if (my_stringend == 0)
1950 my_stringend = c;
1951 /* If we have two kinds of string delimiters.
1952 There's no way to grok this scanning backwards. */
1953 else if (my_stringend != c)
1954 string_lossage = 1;
1955 }
1956
1957 /* Record comment-starters according to that
1958 quote-parity to the comment-end. */
1959 if (code == Scomment)
1960 {
1961 comstart_parity = parity;
1962 comstart_pos = from;
1963 }
1964
1965 /* If we find another earlier comment-ender,
1966 any comment-starts earlier than that don't count
1967 (because they go with the earlier comment-ender). */
1968 if (code == Sendcomment
1969 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from)) == comstyle)
1970 break;
1971
1972 /* Assume a defun-start point is outside of strings. */
1973 if (code == Sopen
1974 && (from == stop || FETCH_BYTE (from - 1) == '\n'))
1975 break;
1976 }
1977
1978 if (comstart_pos == 0)
1979 from = comment_end;
1980 /* If the earliest comment starter
1981 is followed by uniform paired string quotes or none,
1982 we know it can't be inside a string
1983 since if it were then the comment ender would be inside one.
1984 So it does start a comment. Skip back to it. */
1985 else if (comstart_parity == 0 && !string_lossage)
1986 from = comstart_pos;
1987 else
1988 {
1989 /* We had two kinds of string delimiters mixed up
1990 together. Decode this going forwards.
1991 Scan fwd from the previous comment ender
1992 to the one in question; this records where we
1993 last passed a comment starter. */
1994 struct lisp_parse_state state;
1995 scan_sexps_forward (&state, find_defun_start (comment_end),
1996 comment_end - 1, -10000, 0, Qnil, 0);
1997 if (state.incomment)
1998 from = state.comstr_start;
1999 else
2000 /* We can't grok this as a comment; scan it normally. */
2001 from = comment_end;
2002 }
2003 }
2004 #endif /* 0 */
2005 break;
2006
2007 case Scomment_fence:
2008 case Sstring_fence:
2009 while (1)
2010 {
2011 DEC_POS (from);
2012 if (from == stop) goto lose;
2013 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2014 if (!char_quoted (from)
2015 && SYNTAX (FETCH_CHAR (from)) == code)
2016 break;
2017 }
2018 if (code == Sstring_fence && !depth && sexpflag) goto done2;
2019 break;
2020
2021 case Sstring:
2022 stringterm = FETCH_CHAR (from);
2023 while (1)
2024 {
2025 if (from == stop) goto lose;
2026 temp_pos = from;
2027 DEC_POS (temp_pos);
2028 UPDATE_SYNTAX_TABLE_BACKWARD (temp_pos);
2029 if (!char_quoted (temp_pos)
2030 && stringterm == FETCH_CHAR (temp_pos))
2031 break;
2032 from = temp_pos;
2033 }
2034 DEC_POS (from);
2035 if (!depth && sexpflag) goto done2;
2036 break;
2037 }
2038 }
2039
2040 /* Reached start of buffer. Error if within object, return nil if between */
2041 if (depth) goto lose;
2042
2043 immediate_quit = 0;
2044 return Qnil;
2045
2046 done2:
2047 count++;
2048 }
2049
2050
2051 immediate_quit = 0;
2052 XSETFASTINT (val, from);
2053 return val;
2054
2055 lose:
2056 Fsignal (Qscan_error,
2057 Fcons (build_string ("Unbalanced parentheses"),
2058 Fcons (make_number (last_good),
2059 Fcons (make_number (from), Qnil))));
2060
2061 /* NOTREACHED */
2062 }
2063
2064 DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
2065 "Scan from character number FROM by COUNT lists.\n\
2066 Returns the character number of the position thus found.\n\
2067 \n\
2068 If DEPTH is nonzero, paren depth begins counting from that value,\n\
2069 only places where the depth in parentheses becomes zero\n\
2070 are candidates for stopping; COUNT such places are counted.\n\
2071 Thus, a positive value for DEPTH means go out levels.\n\
2072 \n\
2073 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
2074 \n\
2075 If the beginning or end of (the accessible part of) the buffer is reached\n\
2076 and the depth is wrong, an error is signaled.\n\
2077 If the depth is right but the count is not used up, nil is returned.")
2078 (from, count, depth)
2079 Lisp_Object from, count, depth;
2080 {
2081 CHECK_NUMBER (from, 0);
2082 CHECK_NUMBER (count, 1);
2083 CHECK_NUMBER (depth, 2);
2084
2085 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
2086 }
2087
2088 DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
2089 "Scan from character number FROM by COUNT balanced expressions.\n\
2090 If COUNT is negative, scan backwards.\n\
2091 Returns the character number of the position thus found.\n\
2092 \n\
2093 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
2094 \n\
2095 If the beginning or end of (the accessible part of) the buffer is reached\n\
2096 in the middle of a parenthetical grouping, an error is signaled.\n\
2097 If the beginning or end is reached between groupings\n\
2098 but before count is used up, nil is returned.")
2099 (from, count)
2100 Lisp_Object from, count;
2101 {
2102 CHECK_NUMBER (from, 0);
2103 CHECK_NUMBER (count, 1);
2104
2105 return scan_lists (XINT (from), XINT (count), 0, 1);
2106 }
2107
2108 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
2109 0, 0, 0,
2110 "Move point backward over any number of chars with prefix syntax.\n\
2111 This includes chars with \"quote\" or \"prefix\" syntax (' or p).")
2112 ()
2113 {
2114 int beg = BEGV;
2115 int pos = PT;
2116 int c;
2117 int temp_pos = pos;
2118
2119 if (pos > beg)
2120 {
2121 SETUP_SYNTAX_TABLE (pos, -1);
2122 }
2123 DEC_POS (temp_pos);
2124
2125 while (pos > beg && !char_quoted (temp_pos)
2126 /* Previous statement updates syntax table. */
2127 && ((c = FETCH_CHAR (temp_pos), SYNTAX (c) == Squote)
2128 || SYNTAX_PREFIX (c)))
2129 {
2130 pos = temp_pos;
2131 DEC_POS (temp_pos);
2132 }
2133
2134 SET_PT (pos);
2135
2136 return Qnil;
2137 }
2138 \f
2139 /* Parse forward from FROM to END,
2140 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
2141 and return a description of the state of the parse at END.
2142 If STOPBEFORE is nonzero, stop at the start of an atom.
2143 If COMMENTSTOP is nonzero, stop at the start of a comment. */
2144
2145 static void
2146 scan_sexps_forward (stateptr, from, end, targetdepth,
2147 stopbefore, oldstate, commentstop)
2148 struct lisp_parse_state *stateptr;
2149 register int from;
2150 int end, targetdepth, stopbefore;
2151 Lisp_Object oldstate;
2152 int commentstop;
2153 {
2154 struct lisp_parse_state state;
2155
2156 register enum syntaxcode code;
2157 struct level { int last, prev; };
2158 struct level levelstart[100];
2159 register struct level *curlevel = levelstart;
2160 struct level *endlevel = levelstart + 100;
2161 int prev;
2162 register int depth; /* Paren depth of current scanning location.
2163 level - levelstart equals this except
2164 when the depth becomes negative. */
2165 int mindepth; /* Lowest DEPTH value seen. */
2166 int start_quoted = 0; /* Nonzero means starting after a char quote */
2167 Lisp_Object tem;
2168 int prev_from; /* Keep one character before FROM. */
2169 int boundary_stop = commentstop == -1;
2170 int nofence;
2171
2172 prev_from = from;
2173 DEC_POS (prev_from);
2174
2175 /* Use this macro instead of `from++'. */
2176 #define INC_FROM do { prev_from = from; INC_POS (from); } while (0)
2177
2178 immediate_quit = 1;
2179 QUIT;
2180
2181 SETUP_SYNTAX_TABLE (from, 1);
2182
2183 if (NILP (oldstate))
2184 {
2185 depth = 0;
2186 state.instring = -1;
2187 state.incomment = 0;
2188 state.comstyle = 0; /* comment style a by default. */
2189 state.comstr_start = -1; /* no comment/string seen. */
2190 }
2191 else
2192 {
2193 tem = Fcar (oldstate);
2194 if (!NILP (tem))
2195 depth = XINT (tem);
2196 else
2197 depth = 0;
2198
2199 oldstate = Fcdr (oldstate);
2200 oldstate = Fcdr (oldstate);
2201 oldstate = Fcdr (oldstate);
2202 tem = Fcar (oldstate);
2203 /* Check whether we are inside string_fence-style string: */
2204 state.instring = ( !NILP (tem)
2205 ? ( INTEGERP (tem) ? XINT (tem) : ST_STRING_STYLE)
2206 : -1);
2207
2208 oldstate = Fcdr (oldstate);
2209 tem = Fcar (oldstate);
2210 state.incomment = !NILP (tem);
2211
2212 oldstate = Fcdr (oldstate);
2213 tem = Fcar (oldstate);
2214 start_quoted = !NILP (tem);
2215
2216 /* if the eight element of the list is nil, we are in comment
2217 style a. If it is non-nil, we are in comment style b */
2218 oldstate = Fcdr (oldstate);
2219 oldstate = Fcdr (oldstate);
2220 tem = Fcar (oldstate);
2221 state.comstyle = NILP (tem) ? 0 : ( EQ (tem, Qsyntax_table)
2222 ? ST_COMMENT_STYLE : 1 );
2223
2224 oldstate = Fcdr (oldstate);
2225 tem = Fcar (oldstate);
2226 state.comstr_start = NILP (tem) ? -1 : XINT (tem) ;
2227 }
2228 state.quoted = 0;
2229 mindepth = depth;
2230
2231 curlevel->prev = -1;
2232 curlevel->last = -1;
2233
2234 /* Enter the loop at a place appropriate for initial state. */
2235
2236 if (state.incomment) goto startincomment;
2237 if (state.instring >= 0)
2238 {
2239 nofence = state.instring != ST_STRING_STYLE;
2240 if (start_quoted) goto startquotedinstring;
2241 goto startinstring;
2242 }
2243 if (start_quoted) goto startquoted;
2244
2245 while (from < end)
2246 {
2247 UPDATE_SYNTAX_TABLE_FORWARD (from);
2248 code = SYNTAX (FETCH_CHAR (from));
2249 INC_FROM;
2250 if (code == Scomment)
2251 state.comstr_start = prev_from;
2252
2253 else if (code == Scomment_fence
2254 || (from < end && SYNTAX_COMSTART_FIRST (FETCH_CHAR (prev_from))
2255 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from))))
2256 {
2257 /* Record the comment style we have entered so that only
2258 the comment-end sequence of the same style actually
2259 terminates the comment section. */
2260 state.comstyle = ( code == Scomment_fence
2261 ? ST_COMMENT_STYLE
2262 : SYNTAX_COMMENT_STYLE (FETCH_CHAR (from)));
2263 state.comstr_start = prev_from;
2264 if (code != Scomment_fence) INC_FROM;
2265 code = Scomment;
2266 }
2267
2268 if (SYNTAX_PREFIX (FETCH_CHAR (prev_from)))
2269 continue;
2270 switch (SWITCH_ENUM_CAST (code))
2271 {
2272 case Sescape:
2273 case Scharquote:
2274 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2275 curlevel->last = prev_from;
2276 startquoted:
2277 if (from == end) goto endquoted;
2278 INC_FROM;
2279 goto symstarted;
2280 /* treat following character as a word constituent */
2281 case Sword:
2282 case Ssymbol:
2283 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2284 curlevel->last = prev_from;
2285 symstarted:
2286 while (from < end)
2287 {
2288 UPDATE_SYNTAX_TABLE_FORWARD (from);
2289 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from))))
2290 {
2291 case Scharquote:
2292 case Sescape:
2293 INC_FROM;
2294 if (from == end) goto endquoted;
2295 break;
2296 case Sword:
2297 case Ssymbol:
2298 case Squote:
2299 break;
2300 default:
2301 goto symdone;
2302 }
2303 INC_FROM;
2304 }
2305 symdone:
2306 curlevel->prev = curlevel->last;
2307 break;
2308
2309 startincomment:
2310 if (commentstop == 1)
2311 goto done;
2312 if (from != BEGV)
2313 {
2314 /* Enter the loop in the middle so that we find
2315 a 2-char comment ender if we start in the middle of it. */
2316 prev = FETCH_CHAR (prev_from);
2317 goto startincomment_1;
2318 }
2319 /* At beginning of buffer, enter the loop the ordinary way. */
2320 state.incomment = 1;
2321 goto commentloop;
2322
2323 case Scomment:
2324 state.incomment = 1;
2325 if (commentstop || boundary_stop) goto done;
2326 commentloop:
2327 while (1)
2328 {
2329 if (from == end) goto done;
2330 UPDATE_SYNTAX_TABLE_FORWARD (from);
2331 prev = FETCH_CHAR (from);
2332 if (SYNTAX (prev) == Sendcomment
2333 && SYNTAX_COMMENT_STYLE (prev) == state.comstyle)
2334 /* Only terminate the comment section if the endcomment
2335 of the same style as the start sequence has been
2336 encountered. */
2337 break;
2338 if (state.comstyle == ST_COMMENT_STYLE
2339 && SYNTAX (prev) == Scomment_fence)
2340 break;
2341 INC_FROM;
2342 startincomment_1:
2343 if (from < end && SYNTAX_COMEND_FIRST (prev)
2344 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from))
2345 && SYNTAX_COMMENT_STYLE (prev) == state.comstyle)
2346 /* Only terminate the comment section if the end-comment
2347 sequence of the same style as the start sequence has
2348 been encountered. */
2349 { break; }
2350 }
2351 INC_FROM;
2352 state.incomment = 0;
2353 state.comstyle = 0; /* reset the comment style */
2354 if (boundary_stop) goto done;
2355 break;
2356
2357 case Sopen:
2358 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2359 depth++;
2360 /* curlevel++->last ran into compiler bug on Apollo */
2361 curlevel->last = prev_from;
2362 if (++curlevel == endlevel)
2363 error ("Nesting too deep for parser");
2364 curlevel->prev = -1;
2365 curlevel->last = -1;
2366 if (targetdepth == depth) goto done;
2367 break;
2368
2369 case Sclose:
2370 depth--;
2371 if (depth < mindepth)
2372 mindepth = depth;
2373 if (curlevel != levelstart)
2374 curlevel--;
2375 curlevel->prev = curlevel->last;
2376 if (targetdepth == depth) goto done;
2377 break;
2378
2379 case Sstring:
2380 case Sstring_fence:
2381 state.comstr_start = from - 1;
2382 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2383 curlevel->last = prev_from;
2384 state.instring = (code == Sstring
2385 ? (FETCH_CHAR (prev_from))
2386 : ST_STRING_STYLE);
2387 if (boundary_stop) goto done;
2388 startinstring:
2389 {
2390 nofence = state.instring != ST_STRING_STYLE;
2391
2392 while (1)
2393 {
2394 int c;
2395
2396 if (from >= end) goto done;
2397 c = FETCH_CHAR (from);
2398 if (nofence && c == state.instring) break;
2399 UPDATE_SYNTAX_TABLE_FORWARD (from);
2400 switch (SWITCH_ENUM_CAST (SYNTAX (c)))
2401 {
2402 case Sstring_fence:
2403 if (!nofence) goto string_end;
2404 break;
2405 case Scharquote:
2406 case Sescape:
2407 INC_FROM;
2408 startquotedinstring:
2409 if (from >= end) goto endquoted;
2410 }
2411 INC_FROM;
2412 }
2413 }
2414 string_end:
2415 state.instring = -1;
2416 curlevel->prev = curlevel->last;
2417 INC_FROM;
2418 if (boundary_stop) goto done;
2419 break;
2420
2421 case Smath:
2422 break;
2423 }
2424 }
2425 goto done;
2426
2427 stop: /* Here if stopping before start of sexp. */
2428 from = prev_from; /* We have just fetched the char that starts it; */
2429 goto done; /* but return the position before it. */
2430
2431 endquoted:
2432 state.quoted = 1;
2433 done:
2434 state.depth = depth;
2435 state.mindepth = mindepth;
2436 state.thislevelstart = curlevel->prev;
2437 state.prevlevelstart
2438 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
2439 state.location = from;
2440 immediate_quit = 0;
2441
2442 *stateptr = state;
2443 }
2444
2445 /* This comment supplies the doc string for parse-partial-sexp,
2446 for make-docfile to see. We cannot put this in the real DEFUN
2447 due to limits in the Unix cpp.
2448
2449 DEFUN ("parse-partial-sexp", Ffoo, Sfoo, 2, 6, 0,
2450 "Parse Lisp syntax starting at FROM until TO; return status of parse at TO.\n\
2451 Parsing stops at TO or when certain criteria are met;\n\
2452 point is set to where parsing stops.\n\
2453 If fifth arg STATE is omitted or nil,\n\
2454 parsing assumes that FROM is the beginning of a function.\n\
2455 Value is a list of nine elements describing final state of parsing:\n\
2456 0. depth in parens.\n\
2457 1. character address of start of innermost containing list; nil if none.\n\
2458 2. character address of start of last complete sexp terminated.\n\
2459 3. non-nil if inside a string.\n\
2460 (it is the character that will terminate the string,\n\
2461 or t if the string should be terminated by an explicit\n\
2462 `syntax-table' property.)\n\
2463 4. t if inside a comment.\n\
2464 5. t if following a quote character.\n\
2465 6. the minimum paren-depth encountered during this scan.\n\
2466 7. t if in a comment of style `b'; `syntax-table' if given by an explicit\n\
2467 `syntax-table' property.\n\
2468 8. character address of start of last comment or string; nil if none.\n\
2469 If third arg TARGETDEPTH is non-nil, parsing stops if the depth\n\
2470 in parentheses becomes equal to TARGETDEPTH.\n\
2471 Fourth arg STOPBEFORE non-nil means stop when come to\n\
2472 any character that starts a sexp.\n\
2473 Fifth arg STATE is an eight-list like what this function returns.\n\
2474 It is used to initialize the state of the parse. Its second and third
2475 elements are ignored.
2476 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment. If\n\
2477 it is `syntax-table', stop after the start of a comment or a string, or\n\
2478 after end of a comment or a string.")
2479 (from, to, targetdepth, stopbefore, state, commentstop)
2480 */
2481
2482 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
2483 0 /* See immediately above */)
2484 (from, to, targetdepth, stopbefore, oldstate, commentstop)
2485 Lisp_Object from, to, targetdepth, stopbefore, oldstate, commentstop;
2486 {
2487 struct lisp_parse_state state;
2488 int target;
2489
2490 if (!NILP (targetdepth))
2491 {
2492 CHECK_NUMBER (targetdepth, 3);
2493 target = XINT (targetdepth);
2494 }
2495 else
2496 target = -100000; /* We won't reach this depth */
2497
2498 validate_region (&from, &to);
2499 scan_sexps_forward (&state, XINT (from), XINT (to),
2500 target, !NILP (stopbefore), oldstate,
2501 (NILP (commentstop)
2502 ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
2503
2504 SET_PT (state.location);
2505
2506 return Fcons (make_number (state.depth),
2507 Fcons (state.prevlevelstart < 0 ? Qnil : make_number (state.prevlevelstart),
2508 Fcons (state.thislevelstart < 0 ? Qnil : make_number (state.thislevelstart),
2509 Fcons (state.instring >= 0
2510 ? (state.instring == ST_STRING_STYLE
2511 ? Qt : make_number (state.instring)) : Qnil,
2512 Fcons (state.incomment ? Qt : Qnil,
2513 Fcons (state.quoted ? Qt : Qnil,
2514 Fcons (make_number (state.mindepth),
2515 Fcons (state.comstyle
2516 ? (state.comstyle == ST_COMMENT_STYLE
2517 ? Qsyntax_table : Qt) : Qnil,
2518 Fcons (state.comstr_start != -1 ? make_number (state.comstr_start) : Qnil,
2519 Qnil)))))))));
2520 }
2521 \f
2522 init_syntax_once ()
2523 {
2524 register int i, c;
2525 Lisp_Object temp;
2526
2527 /* This has to be done here, before we call Fmake_char_table. */
2528 Qsyntax_table = intern ("syntax-table");
2529 staticpro (&Qsyntax_table);
2530
2531 /* Intern this now in case it isn't already done.
2532 Setting this variable twice is harmless.
2533 But don't staticpro it here--that is done in alloc.c. */
2534 Qchar_table_extra_slots = intern ("char-table-extra-slots");
2535
2536 /* Create objects which can be shared among syntax tables. */
2537 Vsyntax_code_object = Fmake_vector (13, Qnil);
2538 for (i = 0; i < XVECTOR (Vsyntax_code_object)->size; i++)
2539 XVECTOR (Vsyntax_code_object)->contents[i]
2540 = Fcons (make_number (i), Qnil);
2541
2542 /* Now we are ready to set up this property, so we can
2543 create syntax tables. */
2544 Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
2545
2546 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
2547
2548 Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
2549
2550 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
2551 for (i = 'a'; i <= 'z'; i++)
2552 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
2553 for (i = 'A'; i <= 'Z'; i++)
2554 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
2555 for (i = '0'; i <= '9'; i++)
2556 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
2557
2558 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
2559 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
2560
2561 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
2562 Fcons (make_number (Sopen), make_number (')')));
2563 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
2564 Fcons (make_number (Sclose), make_number ('(')));
2565 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
2566 Fcons (make_number (Sopen), make_number (']')));
2567 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
2568 Fcons (make_number (Sclose), make_number ('[')));
2569 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
2570 Fcons (make_number (Sopen), make_number ('}')));
2571 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
2572 Fcons (make_number (Sclose), make_number ('{')));
2573 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
2574 Fcons (make_number ((int) Sstring), Qnil));
2575 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
2576 Fcons (make_number ((int) Sescape), Qnil));
2577
2578 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Ssymbol];
2579 for (i = 0; i < 10; i++)
2580 {
2581 c = "_-+*/&|<>="[i];
2582 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
2583 }
2584
2585 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
2586 for (i = 0; i < 12; i++)
2587 {
2588 c = ".,;:?!#@~^'`"[i];
2589 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
2590 }
2591 }
2592
2593 syms_of_syntax ()
2594 {
2595 Qsyntax_table_p = intern ("syntax-table-p");
2596 staticpro (&Qsyntax_table_p);
2597
2598 staticpro (&Vsyntax_code_object);
2599
2600 Qscan_error = intern ("scan-error");
2601 staticpro (&Qscan_error);
2602 Fput (Qscan_error, Qerror_conditions,
2603 Fcons (Qerror, Qnil));
2604 Fput (Qscan_error, Qerror_message,
2605 build_string ("Scan error"));
2606
2607 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments,
2608 "Non-nil means `forward-sexp', etc., should treat comments as whitespace.");
2609
2610 DEFVAR_BOOL ("parse-sexp-lookup-properties", &parse_sexp_lookup_properties,
2611 "Non-nil means `forward-sexp', etc., grant `syntax-table' property.\n\
2612 The value of this property should be either a syntax table, or a cons\n\
2613 of the form (SYNTAXCODE . MATCHCHAR), SYNTAXCODE being the numeric\n\
2614 syntax code, MATCHCHAR being nil or the character to match (which is\n\
2615 relevant only for open/close type.");
2616
2617 words_include_escapes = 0;
2618 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes,
2619 "Non-nil means `forward-word', etc., should treat escape chars part of words.");
2620
2621 defsubr (&Ssyntax_table_p);
2622 defsubr (&Ssyntax_table);
2623 defsubr (&Sstandard_syntax_table);
2624 defsubr (&Scopy_syntax_table);
2625 defsubr (&Sset_syntax_table);
2626 defsubr (&Schar_syntax);
2627 defsubr (&Smatching_paren);
2628 defsubr (&Smodify_syntax_entry);
2629 defsubr (&Sdescribe_syntax);
2630
2631 defsubr (&Sforward_word);
2632
2633 defsubr (&Sskip_chars_forward);
2634 defsubr (&Sskip_chars_backward);
2635 defsubr (&Sskip_syntax_forward);
2636 defsubr (&Sskip_syntax_backward);
2637
2638 defsubr (&Sforward_comment);
2639 defsubr (&Sscan_lists);
2640 defsubr (&Sscan_sexps);
2641 defsubr (&Sbackward_prefix_chars);
2642 defsubr (&Sparse_partial_sexp);
2643 }